(*) Rename EdgeDomain$domain into topDomain
This variable had a very confusing name, and was dangerously easy to use in the wrong place with the result of getting something that only works as expected half the time. Ideally this class needs an overhaul, the assumptions it makes about domain names aren't great.
This commit is contained in:
parent
edf9aa2c23
commit
bf44805e69
@ -15,7 +15,7 @@ public class EdgeDomain implements Serializable {
|
|||||||
@Nonnull
|
@Nonnull
|
||||||
public final String subDomain;
|
public final String subDomain;
|
||||||
@Nonnull
|
@Nonnull
|
||||||
public final String domain;
|
public final String topDomain;
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public EdgeDomain(String host) {
|
public EdgeDomain(String host) {
|
||||||
@ -27,13 +27,13 @@ public class EdgeDomain implements Serializable {
|
|||||||
|
|
||||||
if (dot < 0 || looksLikeAnIp(host)) { // IPV6 >.>
|
if (dot < 0 || looksLikeAnIp(host)) { // IPV6 >.>
|
||||||
subDomain = "";
|
subDomain = "";
|
||||||
domain = host;
|
topDomain = host;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
int dot2 = host.substring(0, dot).lastIndexOf('.');
|
int dot2 = host.substring(0, dot).lastIndexOf('.');
|
||||||
if (dot2 < 0) {
|
if (dot2 < 0) {
|
||||||
subDomain = "";
|
subDomain = "";
|
||||||
domain = host;
|
topDomain = host;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (looksLikeGovTld(host))
|
if (looksLikeGovTld(host))
|
||||||
@ -42,16 +42,16 @@ public class EdgeDomain implements Serializable {
|
|||||||
if (dot3 >= 0) {
|
if (dot3 >= 0) {
|
||||||
dot2 = dot3;
|
dot2 = dot3;
|
||||||
subDomain = host.substring(0, dot2);
|
subDomain = host.substring(0, dot2);
|
||||||
domain = host.substring(dot2 + 1);
|
topDomain = host.substring(dot2 + 1);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
subDomain = "";
|
subDomain = "";
|
||||||
domain = host;
|
topDomain = host;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
subDomain = host.substring(0, dot2);
|
subDomain = host.substring(0, dot2);
|
||||||
domain = host.substring(dot2 + 1);
|
topDomain = host.substring(dot2 + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -97,28 +97,28 @@ public class EdgeDomain implements Serializable {
|
|||||||
|
|
||||||
public String getAddress() {
|
public String getAddress() {
|
||||||
if (!subDomain.isEmpty()) {
|
if (!subDomain.isEmpty()) {
|
||||||
return subDomain + "." + domain;
|
return subDomain + "." + topDomain;
|
||||||
}
|
}
|
||||||
return domain;
|
return topDomain;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getDomainKey() {
|
public String getDomainKey() {
|
||||||
int cutPoint = domain.indexOf('.');
|
int cutPoint = topDomain.indexOf('.');
|
||||||
if (cutPoint < 0) {
|
if (cutPoint < 0) {
|
||||||
return domain;
|
return topDomain;
|
||||||
}
|
}
|
||||||
return domain.substring(0, cutPoint).toLowerCase();
|
return topDomain.substring(0, cutPoint).toLowerCase();
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getLongDomainKey() {
|
public String getLongDomainKey() {
|
||||||
StringBuilder ret = new StringBuilder();
|
StringBuilder ret = new StringBuilder();
|
||||||
|
|
||||||
int cutPoint = domain.indexOf('.');
|
int cutPoint = topDomain.indexOf('.');
|
||||||
if (cutPoint < 0) {
|
if (cutPoint < 0) {
|
||||||
ret.append(domain);
|
ret.append(topDomain);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
ret.append(domain, 0, cutPoint);
|
ret.append(topDomain, 0, cutPoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!"".equals(subDomain) && !"www".equals(subDomain)) {
|
if (!"".equals(subDomain) && !"www".equals(subDomain)) {
|
||||||
@ -133,30 +133,30 @@ public class EdgeDomain implements Serializable {
|
|||||||
public boolean hasSameTopDomain(EdgeDomain other) {
|
public boolean hasSameTopDomain(EdgeDomain other) {
|
||||||
if (other == null) return false;
|
if (other == null) return false;
|
||||||
|
|
||||||
return domain.equalsIgnoreCase(other.domain);
|
return topDomain.equalsIgnoreCase(other.topDomain);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getTld() {
|
public String getTld() {
|
||||||
int dot = -1;
|
int dot = -1;
|
||||||
int length = domain.length();
|
int length = topDomain.length();
|
||||||
|
|
||||||
if (ipPatternTest.test(domain)) {
|
if (ipPatternTest.test(topDomain)) {
|
||||||
return "IP";
|
return "IP";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (govListTest.test(domain)) {
|
if (govListTest.test(topDomain)) {
|
||||||
dot = domain.indexOf('.', Math.max(0, length - ".edu.uk".length()));
|
dot = topDomain.indexOf('.', Math.max(0, length - ".edu.uk".length()));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
dot = domain.lastIndexOf('.');
|
dot = topDomain.lastIndexOf('.');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (dot < 0 || dot == domain.length() - 1) {
|
if (dot < 0 || dot == topDomain.length() - 1) {
|
||||||
return "-";
|
return "-";
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return domain.substring(dot + 1);
|
return topDomain.substring(dot + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -166,8 +166,8 @@ public class EdgeDomain implements Serializable {
|
|||||||
final String this$subDomain = this.getSubDomain();
|
final String this$subDomain = this.getSubDomain();
|
||||||
final String other$subDomain = other.getSubDomain();
|
final String other$subDomain = other.getSubDomain();
|
||||||
if (!Objects.equals(this$subDomain,other$subDomain)) return false;
|
if (!Objects.equals(this$subDomain,other$subDomain)) return false;
|
||||||
final String this$domain = this.getDomain();
|
final String this$domain = this.getTopDomain();
|
||||||
final String other$domain = other.getDomain();
|
final String other$domain = other.getTopDomain();
|
||||||
if (!Objects.equals(this$domain,other$domain)) return false;
|
if (!Objects.equals(this$domain,other$domain)) return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -177,7 +177,7 @@ public class EdgeDomain implements Serializable {
|
|||||||
int result = 1;
|
int result = 1;
|
||||||
final Object $subDomain = this.getSubDomain().toLowerCase();
|
final Object $subDomain = this.getSubDomain().toLowerCase();
|
||||||
result = result * PRIME + $subDomain.hashCode();
|
result = result * PRIME + $subDomain.hashCode();
|
||||||
final Object $domain = this.getDomain().toLowerCase();
|
final Object $domain = this.getTopDomain().toLowerCase();
|
||||||
result = result * PRIME + $domain.hashCode();
|
result = result * PRIME + $domain.hashCode();
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
package nu.marginalia.model;
|
package nu.marginalia.model;
|
||||||
|
|
||||||
import nu.marginalia.model.EdgeUrl;
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
@ -22,7 +21,7 @@ class EdgeDomainTest {
|
|||||||
var domain = new EdgeUrl("http://l7072i3.l7c.net");
|
var domain = new EdgeUrl("http://l7072i3.l7c.net");
|
||||||
assertEquals("http", domain.proto);
|
assertEquals("http", domain.proto);
|
||||||
assertEquals("l7072i3", domain.domain.subDomain);
|
assertEquals("l7072i3", domain.domain.subDomain);
|
||||||
assertEquals("l7c.net", domain.domain.domain);
|
assertEquals("l7c.net", domain.domain.topDomain);
|
||||||
assertEquals("net", domain.domain.getTld());
|
assertEquals("net", domain.domain.getTld());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -31,7 +30,7 @@ class EdgeDomainTest {
|
|||||||
var domain = new EdgeUrl("http://endless.horse/");
|
var domain = new EdgeUrl("http://endless.horse/");
|
||||||
assertEquals("http", domain.proto);
|
assertEquals("http", domain.proto);
|
||||||
assertEquals("", domain.domain.subDomain);
|
assertEquals("", domain.domain.subDomain);
|
||||||
assertEquals("endless.horse", domain.domain.domain);
|
assertEquals("endless.horse", domain.domain.topDomain);
|
||||||
assertEquals("horse", domain.domain.getTld());
|
assertEquals("horse", domain.domain.getTld());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -40,7 +39,7 @@ class EdgeDomainTest {
|
|||||||
var domain = new EdgeUrl("http://uj.edu.pl");
|
var domain = new EdgeUrl("http://uj.edu.pl");
|
||||||
assertEquals("http", domain.proto);
|
assertEquals("http", domain.proto);
|
||||||
assertEquals("", domain.domain.subDomain);
|
assertEquals("", domain.domain.subDomain);
|
||||||
assertEquals("uj.edu.pl", domain.domain.domain);
|
assertEquals("uj.edu.pl", domain.domain.topDomain);
|
||||||
assertEquals("edu.pl", domain.domain.getTld());
|
assertEquals("edu.pl", domain.domain.getTld());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -50,7 +49,7 @@ class EdgeDomainTest {
|
|||||||
var domain = new EdgeUrl("http://www.marginalia.nu");
|
var domain = new EdgeUrl("http://www.marginalia.nu");
|
||||||
assertEquals("http", domain.proto);
|
assertEquals("http", domain.proto);
|
||||||
assertEquals("www", domain.domain.subDomain);
|
assertEquals("www", domain.domain.subDomain);
|
||||||
assertEquals("marginalia.nu", domain.domain.domain);
|
assertEquals("marginalia.nu", domain.domain.topDomain);
|
||||||
assertEquals("http://www.marginalia.nu/", domain.toString());
|
assertEquals("http://www.marginalia.nu/", domain.toString());
|
||||||
assertEquals("nu", domain.domain.getTld());
|
assertEquals("nu", domain.domain.getTld());
|
||||||
}
|
}
|
||||||
@ -58,7 +57,7 @@ class EdgeDomainTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testUkDomain2() throws URISyntaxException {
|
public void testUkDomain2() throws URISyntaxException {
|
||||||
var domain = new EdgeUrl("http://marginalia.co.uk");
|
var domain = new EdgeUrl("http://marginalia.co.uk");
|
||||||
assertEquals("marginalia.co.uk", domain.domain.domain);
|
assertEquals("marginalia.co.uk", domain.domain.topDomain);
|
||||||
assertEquals("http", domain.proto);
|
assertEquals("http", domain.proto);
|
||||||
assertEquals("", domain.domain.subDomain);
|
assertEquals("", domain.domain.subDomain);
|
||||||
assertEquals("http://marginalia.co.uk/", domain.toString());
|
assertEquals("http://marginalia.co.uk/", domain.toString());
|
||||||
@ -68,7 +67,7 @@ class EdgeDomainTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testUkDomain3() throws URISyntaxException {
|
public void testUkDomain3() throws URISyntaxException {
|
||||||
var domain = new EdgeUrl("http://withcandour.co.uk");
|
var domain = new EdgeUrl("http://withcandour.co.uk");
|
||||||
assertEquals("withcandour.co.uk", domain.domain.domain);
|
assertEquals("withcandour.co.uk", domain.domain.topDomain);
|
||||||
assertEquals("http", domain.proto);
|
assertEquals("http", domain.proto);
|
||||||
assertEquals("", domain.domain.subDomain);
|
assertEquals("", domain.domain.subDomain);
|
||||||
assertEquals("http://withcandour.co.uk/", domain.toString());
|
assertEquals("http://withcandour.co.uk/", domain.toString());
|
||||||
@ -80,7 +79,7 @@ class EdgeDomainTest {
|
|||||||
var domain = new EdgeUrl("http://www.marginalia.co.uk");
|
var domain = new EdgeUrl("http://www.marginalia.co.uk");
|
||||||
assertEquals("http", domain.proto);
|
assertEquals("http", domain.proto);
|
||||||
assertEquals("www", domain.domain.subDomain);
|
assertEquals("www", domain.domain.subDomain);
|
||||||
assertEquals("marginalia.co.uk", domain.domain.domain);
|
assertEquals("marginalia.co.uk", domain.domain.topDomain);
|
||||||
assertEquals("http://www.marginalia.co.uk/", domain.toString());
|
assertEquals("http://www.marginalia.co.uk/", domain.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -88,7 +87,7 @@ class EdgeDomainTest {
|
|||||||
public void testThreeLetterDomain() throws URISyntaxException {
|
public void testThreeLetterDomain() throws URISyntaxException {
|
||||||
var domain = new EdgeUrl("http://www.marginalia.abcf.de");
|
var domain = new EdgeUrl("http://www.marginalia.abcf.de");
|
||||||
assertEquals("http", domain.proto);
|
assertEquals("http", domain.proto);
|
||||||
assertEquals("abcf.de", domain.domain.domain);
|
assertEquals("abcf.de", domain.domain.topDomain);
|
||||||
assertEquals("www.marginalia", domain.domain.subDomain);
|
assertEquals("www.marginalia", domain.domain.subDomain);
|
||||||
assertEquals("de", domain.domain.getTld());
|
assertEquals("de", domain.domain.getTld());
|
||||||
}
|
}
|
||||||
@ -98,7 +97,7 @@ class EdgeDomainTest {
|
|||||||
var domain = new EdgeUrl("http://marginalia.nu");
|
var domain = new EdgeUrl("http://marginalia.nu");
|
||||||
assertEquals("http", domain.proto);
|
assertEquals("http", domain.proto);
|
||||||
assertEquals("", domain.domain.subDomain);
|
assertEquals("", domain.domain.subDomain);
|
||||||
assertEquals("marginalia.nu", domain.domain.domain);
|
assertEquals("marginalia.nu", domain.domain.topDomain);
|
||||||
assertEquals("http://marginalia.nu/", domain.toString());
|
assertEquals("http://marginalia.nu/", domain.toString());
|
||||||
assertEquals("nu", domain.domain.getTld());
|
assertEquals("nu", domain.domain.getTld());
|
||||||
}
|
}
|
||||||
@ -108,7 +107,7 @@ class EdgeDomainTest {
|
|||||||
var domain = new EdgeUrl("https://127.0.0.1:8080");
|
var domain = new EdgeUrl("https://127.0.0.1:8080");
|
||||||
assertEquals("https", domain.proto);
|
assertEquals("https", domain.proto);
|
||||||
assertEquals("", domain.domain.subDomain);
|
assertEquals("", domain.domain.subDomain);
|
||||||
assertEquals("127.0.0.1", domain.domain.domain);
|
assertEquals("127.0.0.1", domain.domain.topDomain);
|
||||||
assertEquals("https://127.0.0.1:8080/", domain.toString());
|
assertEquals("https://127.0.0.1:8080/", domain.toString());
|
||||||
assertEquals("IP", domain.domain.getTld());
|
assertEquals("IP", domain.domain.getTld());
|
||||||
}
|
}
|
||||||
@ -118,7 +117,7 @@ class EdgeDomainTest {
|
|||||||
var domain = new EdgeUrl("https://192.168.1.32");
|
var domain = new EdgeUrl("https://192.168.1.32");
|
||||||
assertEquals("https", domain.proto);
|
assertEquals("https", domain.proto);
|
||||||
assertEquals("", domain.domain.subDomain);
|
assertEquals("", domain.domain.subDomain);
|
||||||
assertEquals("192.168.1.32", domain.domain.domain);
|
assertEquals("192.168.1.32", domain.domain.topDomain);
|
||||||
assertEquals("https://192.168.1.32/", domain.toString());
|
assertEquals("https://192.168.1.32/", domain.toString());
|
||||||
assertEquals("IP", domain.domain.getTld());
|
assertEquals("IP", domain.domain.getTld());
|
||||||
}
|
}
|
||||||
|
@ -62,7 +62,7 @@ public class IpBlockList {
|
|||||||
if (blocklistDisabled)
|
if (blocklistDisabled)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (domain.domain.endsWith(".cn")) {
|
if (domain.topDomain.endsWith(".cn")) {
|
||||||
logger.debug("Blocking {} on .cn-end", domain);
|
logger.debug("Blocking {} on .cn-end", domain);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -67,7 +67,7 @@ public class UrlBlocklist {
|
|||||||
|
|
||||||
public boolean isUrlBlocked(EdgeUrl url) {
|
public boolean isUrlBlocked(EdgeUrl url) {
|
||||||
try {
|
try {
|
||||||
if (badDomains.contains(url.domain.domain)) {
|
if (badDomains.contains(url.domain.topDomain)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -76,7 +76,7 @@ public class UrlBlocklist {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ("github.com".equals(url.domain.domain)) {
|
if ("github.com".equals(url.domain.topDomain)) {
|
||||||
return url.path.chars().filter(c -> c == '/').count() > 2;
|
return url.path.chars().filter(c -> c == '/').count() > 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10,7 +10,7 @@ public record BrowseResult (EdgeUrl url,
|
|||||||
public String domainHash() {
|
public String domainHash() {
|
||||||
var domain = url.domain;
|
var domain = url.domain;
|
||||||
if ("www".equals(domain.subDomain)) {
|
if ("www".equals(domain.subDomain)) {
|
||||||
return domain.domain;
|
return domain.topDomain;
|
||||||
}
|
}
|
||||||
return domain.toString();
|
return domain.toString();
|
||||||
}
|
}
|
||||||
@ -19,7 +19,7 @@ public record BrowseResult (EdgeUrl url,
|
|||||||
String ret;
|
String ret;
|
||||||
var domain = url.domain;
|
var domain = url.domain;
|
||||||
if ("www".equals(domain.subDomain)) {
|
if ("www".equals(domain.subDomain)) {
|
||||||
ret = domain.domain;
|
ret = domain.topDomain;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
ret = domain.toString();
|
ret = domain.toString();
|
||||||
|
@ -14,7 +14,6 @@ import nu.marginalia.geoip.GeoIpDictionary;
|
|||||||
import nu.marginalia.model.crawl.DomainIndexingState;
|
import nu.marginalia.model.crawl.DomainIndexingState;
|
||||||
import nu.marginalia.converting.model.ProcessedDomain;
|
import nu.marginalia.converting.model.ProcessedDomain;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.EdgeUrl;
|
|
||||||
import nu.marginalia.converting.processor.logic.links.TopKeywords;
|
import nu.marginalia.converting.processor.logic.links.TopKeywords;
|
||||||
import nu.marginalia.converting.processor.logic.LshDocumentDeduplicator;
|
import nu.marginalia.converting.processor.logic.LshDocumentDeduplicator;
|
||||||
import nu.marginalia.model.crawl.HtmlFeature;
|
import nu.marginalia.model.crawl.HtmlFeature;
|
||||||
@ -161,10 +160,10 @@ public class DomainProcessor {
|
|||||||
private static final Pattern academicPattern = Pattern.compile(".*\\.(ac|edu)\\.[a-z]{2}$");
|
private static final Pattern academicPattern = Pattern.compile(".*\\.(ac|edu)\\.[a-z]{2}$");
|
||||||
private boolean isAcademicDomain(EdgeDomain domain) {
|
private boolean isAcademicDomain(EdgeDomain domain) {
|
||||||
|
|
||||||
if (domain.domain.endsWith(".edu"))
|
if (domain.topDomain.endsWith(".edu"))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (academicPattern.matcher(domain.domain).matches())
|
if (academicPattern.matcher(domain.topDomain).matches())
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
@ -50,7 +50,7 @@ public abstract class AbstractDocumentProcessorPlugin {
|
|||||||
public MetaTagsBuilder addUrl(EdgeUrl url) {
|
public MetaTagsBuilder addUrl(EdgeUrl url) {
|
||||||
add("proto", url.proto);
|
add("proto", url.proto);
|
||||||
add("site", url.domain);
|
add("site", url.domain);
|
||||||
add("site", url.domain.domain);
|
add("site", url.domain.topDomain);
|
||||||
add("tld", url.domain.getTld());
|
add("tld", url.domain.getTld());
|
||||||
|
|
||||||
if (url.path.startsWith("/~")) {
|
if (url.path.startsWith("/~")) {
|
||||||
|
@ -291,7 +291,7 @@ public class HtmlDocumentProcessorPlugin extends AbstractDocumentProcessorPlugin
|
|||||||
|
|
||||||
for (var fd : lp.getForeignDomains()) {
|
for (var fd : lp.getForeignDomains()) {
|
||||||
linkTerms.add("links:"+fd.toString().toLowerCase());
|
linkTerms.add("links:"+fd.toString().toLowerCase());
|
||||||
linkTerms.add("links:"+fd.getDomain().toLowerCase());
|
linkTerms.add("links:"+fd.getTopDomain().toLowerCase());
|
||||||
}
|
}
|
||||||
|
|
||||||
return linkTerms;
|
return linkTerms;
|
||||||
|
@ -54,7 +54,7 @@ public class HtmlProcessorSpecializations {
|
|||||||
return blogSpecialization;
|
return blogSpecialization;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (url.domain.getDomain().equals("mariadb.com")
|
if (url.domain.getTopDomain().equals("mariadb.com")
|
||||||
&& url.path.startsWith("/kb")) {
|
&& url.path.startsWith("/kb")) {
|
||||||
return mariadbKbSpecialization;
|
return mariadbKbSpecialization;
|
||||||
}
|
}
|
||||||
|
@ -33,7 +33,7 @@ class RssCrawlerTest {
|
|||||||
var href = element.attr("href");
|
var href = element.attr("href");
|
||||||
if (href != null && !href.isBlank()) {
|
if (href != null && !href.isBlank()) {
|
||||||
lp.parseLink(base, href)
|
lp.parseLink(base, href)
|
||||||
.filter(u -> Objects.equals(u.domain.domain, base.domain.domain))
|
.filter(u -> Objects.equals(u.domain.topDomain, base.domain.topDomain))
|
||||||
.ifPresent(urls::add);
|
.ifPresent(urls::add);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@ -42,7 +42,7 @@ class RssCrawlerTest {
|
|||||||
var href = element.text();
|
var href = element.text();
|
||||||
if (href != null && !href.isBlank()) {
|
if (href != null && !href.isBlank()) {
|
||||||
lp.parseLink(base, href)
|
lp.parseLink(base, href)
|
||||||
.filter(u -> Objects.equals(u.domain.domain, base.domain.domain))
|
.filter(u -> Objects.equals(u.domain.topDomain, base.domain.topDomain))
|
||||||
.ifPresent(urls::add);
|
.ifPresent(urls::add);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@ -51,7 +51,7 @@ class RssCrawlerTest {
|
|||||||
var href = element.text();
|
var href = element.text();
|
||||||
if (href != null && !href.isBlank()) {
|
if (href != null && !href.isBlank()) {
|
||||||
lp.parseLink(base, href)
|
lp.parseLink(base, href)
|
||||||
.filter(u -> Objects.equals(u.domain.domain, base.domain.domain))
|
.filter(u -> Objects.equals(u.domain.topDomain, base.domain.topDomain))
|
||||||
.ifPresent(urls::add);
|
.ifPresent(urls::add);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -147,7 +147,7 @@ public class DomainLoaderService {
|
|||||||
|
|
||||||
public void accept(EdgeDomain domain) throws SQLException {
|
public void accept(EdgeDomain domain) throws SQLException {
|
||||||
statement.setString(1, domain.toString());
|
statement.setString(1, domain.toString());
|
||||||
statement.setString(2, domain.domain);
|
statement.setString(2, domain.topDomain);
|
||||||
statement.setInt(3, nodeAffinity);
|
statement.setInt(3, nodeAffinity);
|
||||||
statement.addBatch();
|
statement.addBatch();
|
||||||
|
|
||||||
|
@ -81,7 +81,7 @@ public class ControlBlacklistService {
|
|||||||
""")) {
|
""")) {
|
||||||
stmt.setString(1, domain.toString());
|
stmt.setString(1, domain.toString());
|
||||||
stmt.addBatch();
|
stmt.addBatch();
|
||||||
stmt.setString(1, domain.domain);
|
stmt.setString(1, domain.topDomain);
|
||||||
stmt.addBatch();
|
stmt.addBatch();
|
||||||
stmt.executeBatch();
|
stmt.executeBatch();
|
||||||
}
|
}
|
||||||
|
@ -157,7 +157,7 @@ public class ExportAtagsActor extends RecordActorPrototype {
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
// This is an artifact of the link parser typically
|
// This is an artifact of the link parser typically
|
||||||
if ("example.com".equals(url.domain.domain))
|
if ("example.com".equals(url.domain.topDomain))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (linkText.contains(url.domain.toString()))
|
if (linkText.contains(url.domain.toString()))
|
||||||
|
@ -61,7 +61,7 @@ public class DomainListRefreshService {
|
|||||||
for (var domain : domainsAll) {
|
for (var domain : domainsAll) {
|
||||||
var parsed = new EdgeDomain(domain);
|
var parsed = new EdgeDomain(domain);
|
||||||
insert.setString(1, domain.toLowerCase());
|
insert.setString(1, domain.toLowerCase());
|
||||||
insert.setString(2, parsed.domain);
|
insert.setString(2, parsed.topDomain);
|
||||||
insert.setInt(3, nodeId);
|
insert.setInt(3, nodeId);
|
||||||
insert.addBatch();
|
insert.addBatch();
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user