Fixing some compiler warnings + a rotten test

This commit is contained in:
vlofgren 2022-05-19 22:01:51 +02:00
parent ebea6bf81b
commit c4017c23ed
222 changed files with 363 additions and 576 deletions

View File

@ -6,7 +6,7 @@ import java.util.Arrays;
import java.util.stream.LongStream;
public class BinSearchVsLinSearch {
static long[] data = LongStream.generate(() -> (long) (Long.MAX_VALUE * Math.random())).limit(512).sorted().toArray();
static final long[] data = LongStream.generate(() -> (long) (Long.MAX_VALUE * Math.random())).limit(512).sorted().toArray();
@State(Scope.Thread)
public static class Target {

View File

@ -17,7 +17,7 @@ public class BinSearchVsLinSearch2 {
Path tf;
MultimapFileLong file;
MultimapSearcher searcher;
long[] data = new long[512];
final long[] data = new long[512];
{
try {

View File

@ -9,7 +9,7 @@ import java.util.Set;
public class BadBotList {
private final Set<InetAddress> shitlist = new HashSet<>();
public static BadBotList INSTANCE = new BadBotList();
public static final BadBotList INSTANCE = new BadBotList();
private final Logger logger = LoggerFactory.getLogger(getClass().getSimpleName());
private BadBotList() {}

View File

@ -1,11 +1,7 @@
package nu.marginalia.gemini;
import com.google.inject.AbstractModule;
import com.google.inject.Inject;
import com.google.inject.Provider;
import com.google.inject.name.Named;
import com.google.inject.name.Names;
import nu.marginalia.wmsa.memex.system.MemexFileWriter;
import java.nio.file.Path;

View File

@ -3,7 +3,6 @@ package nu.marginalia.gemini.gmi;
import com.google.common.collect.Sets;
import nu.marginalia.gemini.gmi.line.GemtextLineVisitorAdapter;
import nu.marginalia.gemini.gmi.line.GemtextLink;
import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId;
import nu.marginalia.wmsa.memex.model.MemexNodeUrl;
import nu.marginalia.wmsa.memex.model.MemexUrl;
@ -13,7 +12,7 @@ import java.nio.file.Path;
import java.util.*;
public class GemtextDatabase extends Gemtext {
public Map<String, Integer> links;
public final Map<String, Integer> links;
public GemtextDatabase(MemexNodeUrl url, String[] lines) {
super(url, lines);

View File

@ -159,5 +159,5 @@ public class GemtextDocument extends Gemtext {
return null;
}
};
}
}

View File

@ -12,7 +12,7 @@ import javax.annotation.Nullable;
import java.util.regex.Pattern;
public class GemtextLinkParser {
private static Pattern linkPattern = Pattern.compile("^=>\\s?([^\\s]+)\\s*(.+)?$");
private static final Pattern linkPattern = Pattern.compile("^=>\\s?([^\\s]+)\\s*(.+)?$");
@Nullable
public static AbstractGemtextLine parse(String s, MemexNodeHeadingId heading) {

View File

@ -16,7 +16,7 @@ import static nu.marginalia.gemini.GeminiService.DEFAULT_FILENAME;
public class BareStaticPagePlugin implements Plugin {
private final Logger logger = LoggerFactory.getLogger(getClass());
private Path geminiServerRoot;
private final Path geminiServerRoot;
@Inject
public BareStaticPagePlugin(@Named("gemini-server-root") Path geminiServerRoot) {

View File

@ -23,7 +23,7 @@ public class RandomWriteFunnel implements AutoCloseable {
private final static Gauge transfer_rate = Gauge.build("wmsa_rwf_transfer_bytes", "Bytes/s")
.register();
private static final Logger logger = LoggerFactory.getLogger(RandomWriteFunnel.class);
private DataBin[] bins;
private final DataBin[] bins;
private final int binSize;
@ -72,7 +72,7 @@ public class RandomWriteFunnel implements AutoCloseable {
static class DataBin implements AutoCloseable {
private final ByteBuffer buffer;
private int size;
private final int size;
private final FileChannel channel;
private final File file;

View File

@ -11,7 +11,7 @@ public record BTreeHeader(int layers, int numEntries, long indexOffsetLongs, lon
assert (dataOffsetLongs >= indexOffsetLongs);
}
public static int BTreeHeaderSizeLongs = 3;
public static final int BTreeHeaderSizeLongs = 3;
public BTreeHeader(long a, long b, long c) {
this((int)(a >>> 32), (int)(a & 0xFFFF_FFFFL), b, c);

View File

@ -33,7 +33,7 @@ public class DictionaryHashMap {
private final int intsPerBuffer;
private final long maxProbeLength;
private AtomicInteger sz = new AtomicInteger(0);
private final AtomicInteger sz = new AtomicInteger(0);
public DictionaryHashMap(long sizeMemory) {
final int intSize = 4;

View File

@ -2,7 +2,7 @@ package nu.marginalia.util.graphics.dithering;
public class Palettes {
public static int[] MARGINALIA_PALETTE = new int[] {
public static final int[] MARGINALIA_PALETTE = new int[] {
0x000000,
0x000000,
0x808080,

View File

@ -157,8 +157,8 @@ public class LongPairHashMap {
@Getter @EqualsAndHashCode
public static class CellData {
long first;
long second;
final long first;
final long second;
public CellData(long key, long offset) {
first = key | 0x8000_0000_000_000L;

View File

@ -38,7 +38,7 @@ public class MultimapFileLong implements AutoCloseable {
private boolean loadAggressively;
private NativeIO.Advice advice = null;
private final NativeIO.Advice advice = null;
public static MultimapFileLong forReading(Path file) throws IOException {
long fileSize = Files.size(file);
@ -116,7 +116,7 @@ public class MultimapFileLong implements AutoCloseable {
public void advice(NativeIO.Advice advice) {
for (var buffer : mappedByteBuffers) {
NativeIO.madvise(buffer, advice);
};
}
}
@SneakyThrows

View File

@ -8,7 +8,7 @@ import java.io.IOException;
public class BetterReversePageRank extends RankingAlgorithm {
public BetterReversePageRank(HikariDataSource dataSource, String... origins) throws IOException {
public BetterReversePageRank(HikariDataSource dataSource, String... origins) {
super(dataSource, origins);
}

View File

@ -7,7 +7,7 @@ import java.io.IOException;
public class BetterStandardPageRank extends RankingAlgorithm {
public BetterStandardPageRank(HikariDataSource dataSource, String... origins) throws IOException {
public BetterStandardPageRank(HikariDataSource dataSource, String... origins) {
super(dataSource, origins);
}

View File

@ -8,7 +8,7 @@ import java.io.IOException;
public class BuggyReversePageRank extends RankingAlgorithm {
public BuggyReversePageRank(HikariDataSource dataSource, String... origins) throws IOException {
public BuggyReversePageRank(HikariDataSource dataSource, String... origins) {
super(dataSource, origins);
}

View File

@ -7,7 +7,7 @@ import java.io.IOException;
public class BuggyStandardPageRank extends RankingAlgorithm {
public BuggyStandardPageRank(HikariDataSource dataSource, String... origins) throws IOException {
public BuggyStandardPageRank(HikariDataSource dataSource, String... origins) {
super(dataSource, origins);
}

View File

@ -33,12 +33,12 @@ public abstract class RankingAlgorithm {
TIntArrayList[] linkDataSrc2Dest;
TIntArrayList[] linkDataDest2Src;
public Set<String> originDomains = new HashSet<>();
public Set<Integer> originDomainIds = new HashSet<>();
public final Set<String> originDomains = new HashSet<>();
public final Set<Integer> originDomainIds = new HashSet<>();
private int maxKnownUrls = Integer.MAX_VALUE;
private static boolean getNames = true;
private static final boolean getNames = true;
private final Logger logger = LoggerFactory.getLogger(getClass());

View File

@ -25,8 +25,8 @@ public class OldReversePageRankV2 {
private final TIntObjectHashMap<TIntArrayList> reverseLinkData = new TIntObjectHashMap<>();
private final Logger logger = LoggerFactory.getLogger(getClass());
public Set<String> originDomains = new HashSet<>();
public Set<Integer> originDomainIds = new HashSet<>();
public final Set<String> originDomains = new HashSet<>();
public final Set<Integer> originDomainIds = new HashSet<>();
public static void main(String... args) throws IOException {
new OldReversePageRankV2(
@ -62,7 +62,7 @@ public class OldReversePageRankV2 {
System.out.printf("%2.2f", (end - start)/1000.0);
}
public OldReversePageRankV2(HikariDataSource dataSource) throws IOException {
public OldReversePageRankV2(HikariDataSource dataSource) {
originDomains.add("memex.marginalia.nu");
try (var conn = dataSource.getConnection()) {

View File

@ -9,7 +9,6 @@ import gnu.trove.map.hash.TIntObjectHashMap;
import gnu.trove.set.hash.TIntHashSet;
import lombok.AllArgsConstructor;
import lombok.Data;
import nu.marginalia.wmsa.configuration.module.DatabaseModule;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -28,8 +27,8 @@ public class StandardPageRank {
private final TIntObjectHashMap<TIntArrayList> reverseLinkData = new TIntObjectHashMap<>();
private final Logger logger = LoggerFactory.getLogger(getClass());
public Set<String> originDomains = new HashSet();
public Set<Integer> originDomainIds = new HashSet<>();
public final Set<String> originDomains = new HashSet<>();
public final Set<Integer> originDomainIds = new HashSet<>();
public StandardPageRank(IntToDoubleFunction weight, String... seedDomains) throws IOException {
originDomains.addAll(Arrays.asList(seedDomains));
@ -45,7 +44,7 @@ public class StandardPageRank {
return domains.get(id).name;
}
public StandardPageRank(HikariDataSource dataSource, String... origins) throws IOException {
public StandardPageRank(HikariDataSource dataSource, String... origins) {
originDomains.addAll(Arrays.asList(origins));
try (var conn = dataSource.getConnection()) {

View File

@ -21,7 +21,7 @@ public class DedupTool {
public Set<String> originDomains = new HashSet<>();
public Set<Integer> originDomainIds = new HashSet<>();
public long domainIdMax = -1;
public final long domainIdMax = -1;
public int domainCount;
private volatile static int rankMax;
@ -43,7 +43,7 @@ public class DedupTool {
}
@SneakyThrows
public static void main(String... args) throws IOException {
public static void main(String... args) {
Driver driver = new Driver();
var ds = new DatabaseModule().provideConnection();
@ -51,9 +51,9 @@ public class DedupTool {
try (var conn = ds.getConnection();
var fetchStmt = conn.prepareStatement("SELECT URL_TOP_DOMAIN_ID,DATA_HASH,URL,EC_URL.ID,EC_DOMAIN.URL_PART FROM EC_URL INNER JOIN EC_DOMAIN ON EC_DOMAIN.ID=DOMAIN_ID WHERE DATA_HASH IS NOT NULL");
var updateStmt = conn.prepareStatement("UPDATE EC_URL SET STATE='redirect' WHERE ID=?");
var updateStmt = conn.prepareStatement("UPDATE EC_URL SET STATE='redirect' WHERE ID=?")
) {
) {
fetchStmt.setFetchSize(10_000);
var rsp = fetchStmt.executeQuery();
while (rsp.next()) {

View File

@ -39,11 +39,11 @@ public class PerusePageRankV2 {
TIntArrayList[] linkDataSrc2Dest;
TIntArrayList[] linkDataDest2Src;
private static boolean getNames = true;
private static final boolean getNames = true;
private final Logger logger = LoggerFactory.getLogger(getClass());
static LinkedBlockingQueue<LinkAdjacencies> uploadQueue = new LinkedBlockingQueue<>(10);
static final LinkedBlockingQueue<LinkAdjacencies> uploadQueue = new LinkedBlockingQueue<>(10);
volatile static boolean running = true;
public int indexMax() {
@ -55,7 +55,7 @@ public class PerusePageRankV2 {
}
@SneakyThrows
public static void main(String... args) throws IOException {
public static void main(String... args) {
org.mariadb.jdbc.Driver driver = new Driver();
var conn = new DatabaseModule().provideConnection();
var rank = new PerusePageRankV2(conn);
@ -83,7 +83,7 @@ public class PerusePageRankV2 {
static class LinkAdjacencies {
public final int id;
public final int[] neighbors;
};
}
public static void uploadThread(HikariDataSource dataSource) {
try (var conn = dataSource.getConnection()) {
@ -104,7 +104,7 @@ public class PerusePageRankV2 {
}
}
public PerusePageRankV2(HikariDataSource dataSource) throws IOException {
public PerusePageRankV2(HikariDataSource dataSource) {
var blacklist = new EdgeDomainBlacklistImpl(dataSource);
spamDomains = blacklist.getSpamDomains();
this.dataSource = dataSource;

View File

@ -10,7 +10,7 @@ import java.io.IOException;
public class TestAcademiaRankTool {
@SneakyThrows
public static void main(String... args) throws IOException {
public static void main(String... args) {
Driver driver = new Driver();
var conn = new DatabaseModule().provideConnection();

View File

@ -20,7 +20,7 @@ public class UpdateDomainRanksTool {
public Set<String> originDomains = new HashSet<>();
public Set<Integer> originDomainIds = new HashSet<>();
public long domainIdMax = -1;
public final long domainIdMax = -1;
public int domainCount;
private volatile static int rankMax;
@ -31,11 +31,11 @@ public class UpdateDomainRanksTool {
return domainCount;
}
static LinkedBlockingQueue<Integer> uploadQueue = new LinkedBlockingQueue<>(10);
static final LinkedBlockingQueue<Integer> uploadQueue = new LinkedBlockingQueue<>(10);
volatile static boolean running = true;
@SneakyThrows
public static void main(String... args) throws IOException {
public static void main(String... args) {
org.mariadb.jdbc.Driver driver = new Driver();
var conn = new DatabaseModule().provideConnection();

View File

@ -20,7 +20,7 @@ public class UpdateDomainRanksTool2 {
public Set<String> originDomains = new HashSet<>();
public Set<Integer> originDomainIds = new HashSet<>();
public long domainIdMax = -1;
public final long domainIdMax = -1;
public int domainCount;
private volatile static int rankMax;
@ -31,11 +31,11 @@ public class UpdateDomainRanksTool2 {
return domainCount;
}
static LinkedBlockingQueue<Integer> uploadQueue = new LinkedBlockingQueue<>(10);
static final LinkedBlockingQueue<Integer> uploadQueue = new LinkedBlockingQueue<>(10);
volatile static boolean running = true;
@SneakyThrows
public static void main(String... args) throws IOException {
public static void main(String... args) {
Driver driver = new Driver();
var conn = new DatabaseModule().provideConnection();

View File

@ -13,7 +13,7 @@ import java.io.IOException;
public class AuthMain extends MainClass {
@Inject
public AuthMain(AuthService service) throws IOException {
public AuthMain(AuthService service) {
}
public static void main(String... args) {

View File

@ -60,7 +60,7 @@ public class AuthService extends Service {
});
}
private Object loginForm(Request request, Response response) throws IOException {
private Object loginForm(Request request, Response response) {
String redir = Objects.requireNonNull(request.queryParams("redirect"));
String service = Objects.requireNonNull(request.queryParams("service"));

View File

@ -3,9 +3,6 @@ package nu.marginalia.wmsa.auth.api;
import com.google.inject.Guice;
import com.google.inject.Inject;
import com.google.inject.Injector;
import nu.marginalia.wmsa.auth.AuthConfigurationModule;
import nu.marginalia.wmsa.auth.AuthMain;
import nu.marginalia.wmsa.auth.AuthService;
import nu.marginalia.wmsa.configuration.MainClass;
import nu.marginalia.wmsa.configuration.ServiceDescriptor;
import nu.marginalia.wmsa.configuration.module.ConfigurationModule;
@ -17,7 +14,7 @@ import java.io.IOException;
public class ApiMain extends MainClass {
@Inject
public ApiMain(ApiService service) throws IOException {
public ApiMain(ApiService service) {
}
public static void main(String... args) {

View File

@ -33,9 +33,7 @@ public class ApiService extends Service {
Initialization initialization,
MetricsServer metricsServer,
EdgeSearchClient searchClient,
HikariDataSource dataSource)
throws IOException
{
HikariDataSource dataSource) {
super(ip, port, initialization, metricsServer);
this.searchClient = searchClient;
this.dataSource = dataSource;

View File

@ -87,9 +87,7 @@ public abstract class AbstractClient implements AutoCloseable {
try {
alive = isResponsive();
}
catch (java.util.concurrent.TimeoutException tex) {
//
}
//
catch (Exception ex) {
logger.warn("Oops", ex);
}
@ -118,7 +116,7 @@ public abstract class AbstractClient implements AutoCloseable {
public abstract String name();
public synchronized boolean isResponsive() throws java.util.concurrent.TimeoutException {
public synchronized boolean isResponsive() {
Context ctx = Context.internal("ping");
var req = ctx.paint(new Request.Builder()).url(url + "/internal/ping").get().build();

View File

@ -14,7 +14,7 @@ import java.net.UnknownHostException;
import java.util.Arrays;
public abstract class MainClass {
private Logger logger = LoggerFactory.getLogger(getClass());
private final Logger logger = LoggerFactory.getLogger(getClass());
public MainClass() {

View File

@ -14,8 +14,8 @@ import static com.google.inject.name.Names.named;
public class ConfigurationModule extends AbstractModule {
private static final String SERVICE_NAME = System.getProperty("service-name");
public static int MONITOR_PORT = Integer.getInteger("monitor.port", 5000);
public static String MONITOR_HOST = System.getProperty("monitor.host", "127.0.0.1");
public static final int MONITOR_PORT = Integer.getInteger("monitor.port", 5000);
public static final String MONITOR_HOST = System.getProperty("monitor.host", "127.0.0.1");
public void configure() {
bind(Integer.class).annotatedWith(named("monitor-port")).toInstance(MONITOR_PORT);

View File

@ -12,7 +12,7 @@ public class HostnameProvider implements Provider<String> {
private final int monitorPort;
private final String monitorHost;
private final int timeout;
private Logger logger = LoggerFactory.getLogger(getClass());
private final Logger logger = LoggerFactory.getLogger(getClass());
@Inject
public HostnameProvider(@Named("monitor-port") Integer monitorPort,

View File

@ -17,7 +17,7 @@ public class PortProvider implements Provider<Integer> {
private static final Integer DEFAULT_PORT = 5000;
private final int monitorPort;
private final String monitorHost;
private Logger logger = LoggerFactory.getLogger(getClass());
private final Logger logger = LoggerFactory.getLogger(getClass());
private final int timeout = 10;
@Inject
public PortProvider(@Named("monitor-port") Integer monitorPort,

View File

@ -56,8 +56,8 @@ public class Context {
TimeUnit.MILLISECONDS);
}
private String id;
private String session;
private final String id;
private final String session;
private boolean treatAsPublic;
private Context(String id, String session) {

View File

@ -13,10 +13,10 @@ import nu.marginalia.wmsa.edge.index.EdgeTablesModule;
import java.io.IOException;
public class DataStoreMain extends MainClass {
private DataStoreService service;
private final DataStoreService service;
@Inject
public DataStoreMain(DataStoreService service) throws IOException {
public DataStoreMain(DataStoreService service) {
this.service = service;
}

View File

@ -25,7 +25,7 @@ import static spark.Spark.*;
public class DataStoreService extends Service {
private final HikariDataSource dataSource;
private final EdgeDataStoreService edgeService;
private Logger logger = LoggerFactory.getLogger(getClass());
private final Logger logger = LoggerFactory.getLogger(getClass());
private final Gson gson = new GsonBuilder().create();
@Inject

View File

@ -28,7 +28,7 @@ import java.util.zip.GZIPInputStream;
public class EdgeDataStoreService {
private final EdgeDataStoreDao dataStore;
private Gson gson = new GsonBuilder().create();
private final Gson gson = new GsonBuilder().create();
static final Histogram request_time_metrics
@ -184,7 +184,7 @@ public class EdgeDataStoreService {
return new DomainInformation(domain, blacklisted, pagesKnown, pagesVisited, pagesIndexed, incomingLinks, outboundLinks, nominalQuality, rank, state, linkingDomains);
}
private EdgeId<EdgeDomain> getDomainFromPartial(String site) throws URISyntaxException {
private EdgeId<EdgeDomain> getDomainFromPartial(String site) {
try {
return dataStore.getDomainId(new EdgeDomain(site));
}

View File

@ -39,7 +39,7 @@ public class FileRepository {
private final Logger logger = LoggerFactory.getLogger(getClass());
ReadWriteLock rwl = new ReentrantReadWriteLock();
final ReadWriteLock rwl = new ReentrantReadWriteLock();
@SneakyThrows
public Object uploadFile(Request request, Response response) {
@ -105,7 +105,7 @@ public class FileRepository {
lock.unlock();
}
return "";
};
}
private Path getReleasePath() {
return Path.of(fileStoreDir, distroFileName);

View File

@ -10,7 +10,7 @@ import nu.marginalia.wmsa.configuration.module.DatabaseModule;
import nu.marginalia.wmsa.configuration.server.Initialization;
public class EdgeArchiveMain extends MainClass {
private EdgeArchiveService service;
private final EdgeArchiveService service;
@Inject
public EdgeArchiveMain(EdgeArchiveService service) {

View File

@ -12,7 +12,6 @@ import nu.marginalia.wmsa.configuration.server.Service;
import nu.marginalia.wmsa.edge.archive.archiver.ArchivedFile;
import nu.marginalia.wmsa.edge.archive.archiver.Archiver;
import nu.marginalia.wmsa.edge.archive.request.EdgeArchiveSubmissionReq;
import nu.marginalia.wmsa.edge.model.EdgeUrl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Request;
@ -21,7 +20,6 @@ import spark.Spark;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.stream.Collectors;

View File

@ -1,7 +1,5 @@
package nu.marginalia.wmsa.edge.archive.archiver;
import lombok.Data;
public record ArchivedFile(String filename,byte[] data ) {
}

View File

@ -1,7 +1,6 @@
package nu.marginalia.wmsa.edge.archive.archiver;
import com.google.inject.name.Named;
import lombok.SneakyThrows;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream;
@ -12,9 +11,7 @@ import org.slf4j.LoggerFactory;
import javax.inject.Inject;
import javax.inject.Singleton;
import java.io.ByteArrayInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.concurrent.LinkedBlockingDeque;

View File

@ -10,7 +10,7 @@ import nu.marginalia.wmsa.configuration.module.DatabaseModule;
import nu.marginalia.wmsa.configuration.server.Initialization;
public class EdgeAssistantMain extends MainClass {
private EdgeAssistantService service;
private final EdgeAssistantService service;
@Inject
public EdgeAssistantMain(EdgeAssistantService service) {

View File

@ -6,7 +6,6 @@ import com.google.inject.Inject;
import com.google.inject.name.Named;
import io.reactivex.rxjava3.core.Observable;
import lombok.SneakyThrows;
import nu.marginalia.wmsa.client.HttpStatusCode;
import nu.marginalia.wmsa.configuration.server.*;
import nu.marginalia.wmsa.edge.archive.client.ArchiveClient;
import nu.marginalia.wmsa.edge.assistant.dict.DictionaryService;
@ -16,7 +15,6 @@ import nu.marginalia.wmsa.edge.assistant.screenshot.ScreenshotService;
import nu.marginalia.wmsa.edge.assistant.suggest.Suggestions;
import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer;
import nu.marginalia.wmsa.renderer.mustache.RendererFactory;
import org.eclipse.jetty.websocket.api.StatusCode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Request;

View File

@ -2,8 +2,6 @@ package nu.marginalia.wmsa.edge.assistant.dict;
import ca.rmen.porterstemmer.PorterStemmer;
import gnu.trove.map.hash.TLongIntHashMap;
import gnu.trove.map.hash.TLongLongHashMap;
import gnu.trove.set.hash.TLongHashSet;
import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -12,7 +10,6 @@ import javax.annotation.Nullable;
import javax.inject.Inject;
import javax.inject.Singleton;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
@ -26,7 +23,7 @@ public class NGramDict {
private final Logger logger = LoggerFactory.getLogger(getClass());
private static final Pattern separator = Pattern.compile("[_ ]+");
private static PorterStemmer ps = new PorterStemmer();
private static final PorterStemmer ps = new PorterStemmer();
private static long fileSize(Path p) throws IOException {
return Files.size(p);

View File

@ -1,6 +1,5 @@
package nu.marginalia.wmsa.edge.assistant.dict;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import java.util.Comparator;

View File

@ -33,15 +33,15 @@ import java.util.regex.Pattern;
public class SymSpell
{
private int editDistanceMax=2;
private int verbose = 1;
private final int editDistanceMax=2;
private final int verbose = 1;
//0: top suggestion
//1: all suggestions of smallest edit distance
//2: all suggestions <= editDistanceMax (slower, no early termination)
public static class dictionaryItem
{
public List<Integer> suggestions = new ArrayList<Integer>();
public final List<Integer> suggestions = new ArrayList<Integer>();
public int count = 0;
}
@ -67,10 +67,10 @@ public class SymSpell
//Dictionary that contains both the original words and the deletes derived from them. A term might be both word and delete from another word at the same time.
//For space reduction a item might be either of type dictionaryItem or Int.
//A dictionaryItem is used for word, word/delete, and delete with multiple suggestions. Int is used for deletes with a single suggestion (the majority of entries).
private HashMap<String, Object> dictionary = new HashMap<String, Object>(); //initialisierung
private final HashMap<String, Object> dictionary = new HashMap<String, Object>(); //initialisierung
//List of unique words. By using the suggestions (Int) as index for this list they are translated into the original String.
private List<String> wordlist = new ArrayList<String>();
private final List<String> wordlist = new ArrayList<String>();
//create a non-unique wordlist from sample text
//language independent (e.g. works with Chinese characters)
@ -301,7 +301,7 @@ public class SymSpell
//Example: (bank==bnak and bank==bink, but bank!=kanb and bank!=xban and bank!=baxn for editDistanceMaxe=1)
//Two deletes on each side of a pair makes them all equal, but the first two pairs have edit distance=1, the others edit distance=2.
int distance = 0;
if (suggestion != input)
if (!suggestion.equals(input))
{
if (suggestion.length() == candidate.length()) distance = input.length() - candidate.length();
else if (input.length() == candidate.length()) distance = suggestion.length() - candidate.length();

View File

@ -22,7 +22,7 @@ public class MathParser {
private final NumberFormat df;
static final Map<String, Double> constants = Map.of("e", Math.E, "pi", Math.PI, "2pi", 2*Math.PI);
Predicate<String> isTrivial = Pattern.compile("([0-9]+\\.[0-9]*|\\.[0-9]+)").asMatchPredicate();
final Predicate<String> isTrivial = Pattern.compile("([0-9]+\\.[0-9]*|\\.[0-9]+)").asMatchPredicate();
public MathParser() {
df = DecimalFormat.getInstance(Locale.US);
@ -30,7 +30,7 @@ public class MathParser {
df.setMaximumFractionDigits(6);
}
public String evalFormatted(String inputExpression) throws ParseException {
public String evalFormatted(String inputExpression) {
if (isTrivial.test(inputExpression)) {
return "";
}

View File

@ -1,10 +1,7 @@
package nu.marginalia.wmsa.edge.assistant.suggest;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.inject.Inject;
import com.google.inject.name.Named;
import lombok.SneakyThrows;
import nu.marginalia.wmsa.edge.assistant.dict.NGramDict;
import nu.marginalia.wmsa.edge.assistant.dict.SpellChecker;
import org.apache.commons.collections4.trie.PatriciaTrie;

View File

@ -2,13 +2,10 @@ package nu.marginalia.wmsa.edge.converting;
import com.github.luben.zstd.ZstdInputStream;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonParseException;
import com.google.gson.JsonSyntaxException;
import crawlercommons.utils.Strings;
import nu.marginalia.wmsa.edge.converting.interpreter.Instruction;
import nu.marginalia.wmsa.edge.converting.interpreter.InstructionTag;
import nu.marginalia.wmsa.edge.crawling.model.CrawledDomain;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -18,7 +15,6 @@ import java.io.*;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Stream;
public class ConvertedDomainReader {
private static final Logger logger = LoggerFactory.getLogger(ConvertedDomainReader.class);

View File

@ -31,7 +31,7 @@ public class ConverterMain {
private final WorkLog processLog;
private final CrawledInstructionWriter instructionWriter;
private Gson gson;
private final Gson gson;
private final CrawledDomainReader reader = new CrawledDomainReader();
private final Map<String, String> domainToId = new HashMap<>();
@ -86,7 +86,7 @@ public class ConverterMain {
var pipe = new ParallelPipe<CrawledDomain, ProcessingInstructions>("Crawler", 48, 4, 2) {
@Override
protected ProcessingInstructions onProcess(CrawledDomain domainData) throws Exception {
protected ProcessingInstructions onProcess(CrawledDomain domainData) {
var processed = processor.process(domainData);
return new ProcessingInstructions(domainData.id, compiler.compile(processed));
}

View File

@ -2,9 +2,7 @@ package nu.marginalia.wmsa.edge.converting;
import com.github.luben.zstd.ZstdOutputStream;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import nu.marginalia.wmsa.edge.converting.interpreter.Instruction;
import nu.marginalia.wmsa.edge.crawling.model.CrawledDomain;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -18,7 +16,7 @@ import java.util.List;
public class CrawledInstructionWriter {
private final Path outputDir;
private Gson gson;
private final Gson gson;
private static final Logger logger = LoggerFactory.getLogger(CrawledInstructionWriter.class);
public CrawledInstructionWriter(Path outputDir, Gson gson) {

View File

@ -26,16 +26,16 @@ import java.util.concurrent.atomic.AtomicInteger;
public class LoaderMain {
private final Path processDir;
private EdgeCrawlPlan plan;
private final EdgeCrawlPlan plan;
private final ConvertedDomainReader instructionsReader;
private final HikariDataSource dataSource;
private static final Logger logger = LoggerFactory.getLogger(LoaderMain.class);
private final LoaderFactory loaderFactory;
private EdgeIndexClient indexClient;
private volatile boolean running = true;
private final EdgeIndexClient indexClient;
private final boolean running = true;
Thread processorThread = new Thread(this::processor, "Processor Thread");
final Thread processorThread = new Thread(this::processor, "Processor Thread");
public static void main(String... args) throws IOException {
if (args.length != 1) {
@ -87,7 +87,7 @@ public class LoaderMain {
}
private volatile static int loadTotal;
private volatile static int loaded = 0;
private static final int loaded = 0;
private void load(String path, int cnt) {
String first = path.substring(0, 2);
@ -105,7 +105,7 @@ public class LoaderMain {
}
}
static TaskStats taskStats = new TaskStats(100);
static final TaskStats taskStats = new TaskStats(100);
private record LoadJob(String path, Loader loader, List<Instruction> instructionList) {
public void run() {
@ -120,7 +120,8 @@ public class LoaderMain {
logger.info("Loaded {}/{} : {} ({}) {}ms {} l/s", taskStats.getCount(), loadTotal, path, loader.data.sizeHint, loadTime, taskStats.avgTime());
}
};
}
private static final LinkedBlockingQueue<LoadJob> processQueue = new LinkedBlockingQueue<>(2);
private void processor() {

View File

@ -4,13 +4,9 @@ import nu.marginalia.wmsa.edge.converting.interpreter.instruction.DocumentKeywor
import nu.marginalia.wmsa.edge.converting.interpreter.instruction.DomainLink;
import nu.marginalia.wmsa.edge.converting.interpreter.instruction.LoadProcessedDocument;
import nu.marginalia.wmsa.edge.converting.interpreter.instruction.LoadProcessedDocumentWithError;
import nu.marginalia.wmsa.edge.index.model.IndexBlock;
import nu.marginalia.wmsa.edge.model.EdgeDomain;
import nu.marginalia.wmsa.edge.model.EdgeUrl;
import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState;
import nu.marginalia.wmsa.edge.model.crawl.EdgeHtmlStandard;
import nu.marginalia.wmsa.edge.model.crawl.EdgePageWordSet;
import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlState;
public interface Interpreter {
void loadUrl(EdgeUrl[] url);

View File

@ -4,8 +4,6 @@ import nu.marginalia.wmsa.edge.converting.interpreter.Instruction;
import nu.marginalia.wmsa.edge.converting.interpreter.InstructionTag;
import nu.marginalia.wmsa.edge.converting.interpreter.Interpreter;
import java.util.Arrays;
public record LoadDomainRedirect(DomainLink links) implements Instruction {
@Override

View File

@ -4,7 +4,6 @@ import nu.marginalia.wmsa.edge.converting.interpreter.Instruction;
import nu.marginalia.wmsa.edge.converting.interpreter.InstructionTag;
import nu.marginalia.wmsa.edge.converting.interpreter.Interpreter;
import nu.marginalia.wmsa.edge.model.EdgeUrl;
import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState;
import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlState;

View File

@ -16,11 +16,12 @@ import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
public class IndexLoadKeywords implements Runnable {
private EdgeIndexClient client;
private final EdgeIndexClient client;
private static final Logger logger = LoggerFactory.getLogger(IndexLoadKeywords.class);
private final LinkedBlockingQueue<InsertTask> insertQueue = new LinkedBlockingQueue<>(32);
private record InsertTask(int urlId, int domainId, EdgePageWordSet wordSet) {};
private record InsertTask(int urlId, int domainId, EdgePageWordSet wordSet) {}
private final Thread runThread;
private volatile boolean canceled = false;

View File

@ -1,6 +1,5 @@
package nu.marginalia.wmsa.edge.converting.loader;
import io.reactivex.rxjava3.core.Observable;
import nu.marginalia.wmsa.edge.converting.interpreter.Interpreter;
import nu.marginalia.wmsa.edge.converting.interpreter.instruction.DocumentKeywords;
import nu.marginalia.wmsa.edge.converting.interpreter.instruction.DomainLink;

View File

@ -8,7 +8,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.sql.SQLException;
import java.util.Collection;
import java.util.List;
import static java.sql.Statement.SUCCESS_NO_INFO;

View File

@ -241,5 +241,5 @@ public class DocumentProcessor {
return doc.text().length();
}
private record DetailsWithWords(ProcessedDocumentDetails details, EdgePageWordSet words) {};
private record DetailsWithWords(ProcessedDocumentDetails details, EdgePageWordSet words) {}
}

View File

@ -12,10 +12,10 @@ import nu.marginalia.wmsa.configuration.server.Initialization;
import java.io.IOException;
public class EdgeCrawlerMain extends MainClass {
private EdgeCrawlerService service;
private final EdgeCrawlerService service;
@Inject
public EdgeCrawlerMain(EdgeCrawlerService service) throws IOException {
public EdgeCrawlerMain(EdgeCrawlerService service) {
this.service = service;
}

View File

@ -12,7 +12,6 @@ import nu.marginalia.wmsa.data_store.client.DataStoreClient;
import nu.marginalia.wmsa.edge.crawler.worker.UploaderWorker;
import nu.marginalia.wmsa.edge.crawler.worker.Worker;
import nu.marginalia.wmsa.edge.crawler.worker.WorkerFactory;
import nu.marginalia.wmsa.edge.crawler.worker.data.CrawlJobsSpecification;
import nu.marginalia.wmsa.edge.crawler.worker.results.WorkerResults;
import nu.marginalia.wmsa.edge.director.client.EdgeDirectorClient;
import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient;

View File

@ -1,19 +1,11 @@
package nu.marginalia.wmsa.edge.crawler;
import nu.marginalia.wmsa.configuration.module.DatabaseModule;
import nu.marginalia.wmsa.edge.crawler.domain.DomainCrawlerRobotsTxt;
import nu.marginalia.wmsa.edge.crawler.domain.RssCrawler;
import nu.marginalia.wmsa.edge.crawler.fetcher.HttpFetcher;
import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDaoImpl;
import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient;
import org.mariadb.jdbc.Driver;
import java.io.IOException;
public class RssScraperMain {
public static void main(String... args) throws IOException {
public static void main(String... args) {
// Driver driver = new Driver();
//
// var conn = new DatabaseModule().provideConnection();

View File

@ -13,8 +13,8 @@ public class DomainCrawlerFactory {
private final HttpFetcher fetcher;
private final HtmlProcessor htmlProcessor;
private final ArchiveClient archiveClient;
private DomainCrawlerRobotsTxt domainCrawlerRobotsTxt;
private LanguageFilter languageFilter;
private final DomainCrawlerRobotsTxt domainCrawlerRobotsTxt;
private final LanguageFilter languageFilter;
private final IpBlockList blockList;
private final PlainTextProcessor plainTextProcessor;

View File

@ -1,7 +1,6 @@
package nu.marginalia.wmsa.edge.crawler.domain;
import com.google.common.base.CharMatcher;
import io.reactivex.rxjava3.core.Maybe;
import lombok.SneakyThrows;
import nu.marginalia.wmsa.edge.model.EdgeUrl;
import org.jetbrains.annotations.Contract;
@ -100,7 +99,7 @@ public class LinkParser {
return url;
}
private static Pattern paramRegex = Pattern.compile("\\?.*$");
private static final Pattern paramRegex = Pattern.compile("\\?.*$");
@SneakyThrows
private String resolveUrl(EdgeUrl baseUrl, String s) {
s = paramRegex.matcher(s).replaceAll("");

View File

@ -28,8 +28,8 @@ import java.util.concurrent.LinkedBlockingQueue;
public class RssCrawler {
static LinkedBlockingQueue<EdgeUrl> feedsQueue = new LinkedBlockingQueue<>();
static LinkedBlockingQueue<UploadJob> uploadQueue = new LinkedBlockingQueue<>(2);
static final LinkedBlockingQueue<EdgeUrl> feedsQueue = new LinkedBlockingQueue<>();
static final LinkedBlockingQueue<UploadJob> uploadQueue = new LinkedBlockingQueue<>(2);
@AllArgsConstructor
static class UploadJob {
@ -59,7 +59,7 @@ public class RssCrawler {
}
@SneakyThrows
public void run() throws IOException {
public void run() {
var rank = new BuggyStandardPageRank(dataSource, "memex.marginalia.nu");
var nodes = rank.pageRankWithPeripheralNodes(rank.size(), false);

View File

@ -26,7 +26,7 @@ public class DocumentDebugger {
private final KeywordExtractor ke;
private final NameCounter nc;
Map<String, Path> docsByPath = new TreeMap<>();
final Map<String, Path> docsByPath = new TreeMap<>();
Path tempDir;
public DocumentDebugger(LanguageModels lm) throws IOException {
se = new SentenceExtractor(lm);

View File

@ -1,9 +1,6 @@
package nu.marginalia.wmsa.edge.crawler.domain.language;
import com.google.common.collect.Sets;
import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentLanguageData;
import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentSentence;
import opennlp.tools.langdetect.LanguageDetector;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -12,10 +9,7 @@ import javax.inject.Inject;
import javax.inject.Singleton;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.text.BreakIterator;
import java.util.*;
import java.util.function.Predicate;
import java.util.regex.Pattern;
@Singleton
public class LanguageFilter {

View File

@ -37,7 +37,7 @@ public class KeywordCounter {
instances.computeIfAbsent(stemmed, k -> new HashSet<>()).add(sent.constructWordFromSpan(span));
}
};
}
var topWords = counts.entrySet().stream()
.filter(w -> w.getValue() > cutoff)
@ -74,7 +74,7 @@ public class KeywordCounter {
return ret;
}
private static Pattern separator = Pattern.compile("_");
private static final Pattern separator = Pattern.compile("_");
public double getTermValue(Map.Entry<String, Double> e) {
String[] parts = separator.split(e.getKey());

View File

@ -30,7 +30,7 @@ public class LongNameCounter {
counts.merge(stemmed, 1., Double::sum);
instances.computeIfAbsent(stemmed, k -> new HashSet<>()).add(new WordRep(sent, span));
}
};
}
return counts.entrySet().stream().filter(e -> termSize(e.getKey()) > 1)
.sorted(Comparator.comparing(this::getTermValue))
@ -44,7 +44,7 @@ public class LongNameCounter {
}
Pattern separator = Pattern.compile("_");
final Pattern separator = Pattern.compile("_");
public double getTermValue(Map.Entry<String, Double> e) {
String[] parts = separator.split(e.getKey());

View File

@ -27,7 +27,7 @@ public class NameCounter {
counts.merge(stemmed, 1., Double::sum);
instances.computeIfAbsent(stemmed, k -> new HashSet<>()).add(new WordRep(sent, span));
}
};
}
return counts.entrySet().stream()
.filter(e -> e.getValue() >= minCount)

View File

@ -35,7 +35,7 @@ import static nu.marginalia.wmsa.edge.crawler.domain.language.WordPatterns.*;
public class SentenceExtractor {
private SentenceDetectorME sentenceDetector;
private RDRPOSTagger rdrposTagger;
private final RDRPOSTagger rdrposTagger;
private final PorterStemmer porterStemmer = new PorterStemmer();
private boolean legacyMode = false;

View File

@ -1,6 +1,5 @@
package nu.marginalia.wmsa.edge.crawler.domain.language.processing;
import nu.marginalia.wmsa.edge.assistant.dict.NGramDict;
import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentLanguageData;
import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.WordRep;
import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.WordSpan;

View File

@ -2,10 +2,8 @@ package nu.marginalia.wmsa.edge.crawler.domain.language.processing.model;
import gnu.trove.map.hash.TObjectIntHashMap;
import lombok.AllArgsConstructor;
import nu.marginalia.wmsa.edge.assistant.dict.NGramDict;
import java.util.Arrays;
import java.util.Map;
import java.util.stream.Stream;
@AllArgsConstructor

View File

@ -3,11 +3,8 @@ package nu.marginalia.wmsa.edge.crawler.domain.language.processing.model;
import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import nu.marginalia.wmsa.edge.assistant.dict.NGramDict;
import org.jetbrains.annotations.NotNull;
import java.util.Objects;
@AllArgsConstructor @EqualsAndHashCode @Getter
public class WordRep implements Comparable<WordRep> {
public WordRep(DocumentSentence sent, WordSpan span) {

View File

@ -10,7 +10,7 @@ public enum HtmlFeature {
COOKIES(4)
;
public int bit;
public final int bit;
HtmlFeature(int bit) {
this.bit = bit;

View File

@ -130,7 +130,7 @@ public class HtmlProcessor {
rawPageContent.ip);
}
List<String> trackers = List.of("adform.net",
final List<String> trackers = List.of("adform.net",
"connect.facebook",
"googletagmanager.com",
"googlesyndication.com",

View File

@ -10,7 +10,7 @@ import java.util.*;
import java.util.regex.Pattern;
public class HtmlSummarizer {
private static Pattern extendedJunk = Pattern.compile("[^a-zA-Z0-9]{4,}");
private static final Pattern extendedJunk = Pattern.compile("[^a-zA-Z0-9]{4,}");
private static final int MAX_CONSIDERABLE_SENTENCES = 200;
private static final int MIN_SUMMARY_LENGTH = 20;
@ -101,7 +101,7 @@ public class HtmlSummarizer {
if (ret.size() > MAX_CONSIDERABLE_SENTENCES) {
break;
}
};
}
return ret;
}

View File

@ -9,7 +9,7 @@ import java.util.Optional;
public class ContentTypeParser {
static MimeTypeDetector mimeTypeDetector = new MimeTypeDetector();
static final MimeTypeDetector mimeTypeDetector = new MimeTypeDetector();
public static EdgeContentType parse(String contentType, byte[] data) {
return getContentTypeFromContentTypeString(contentType)

View File

@ -5,9 +5,7 @@ import okhttp3.CookieJar;
import okhttp3.HttpUrl;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.WeakHashMap;
import java.util.concurrent.ConcurrentHashMap;
public class Cookies {

View File

@ -34,7 +34,7 @@ public class HttpFetcher {
private final Logger logger = LoggerFactory.getLogger(getClass());
private final String userAgent;
private final int maxFetchSize = 1024*512;
private Cookies cookies = new Cookies();
private final Cookies cookies = new Cookies();
private final LinkParser linkParser = new LinkParser();
@ -49,8 +49,8 @@ public class HttpFetcher {
public enum FetchResultState {
OK,
REDIRECT,
ERROR;
};
ERROR
}
@AllArgsConstructor @ToString
public static class FetchResult {
@ -60,7 +60,7 @@ public class HttpFetcher {
public boolean ok() {
return state == FetchResultState.OK;
}
};
}
@SneakyThrows
private OkHttpClient createClient() {

View File

@ -24,7 +24,7 @@ public class HttpRedirectResolver {
private final Logger logger = LoggerFactory.getLogger(getClass());
private final String userAgent;
private Cookies cookies = new Cookies();
private final Cookies cookies = new Cookies();
private final OkHttpClient client = createClient();
@ -95,7 +95,7 @@ public class HttpRedirectResolver {
throw new NetworkException("Bad status " + response.code());
}
return true;
};
}
public static class BadContentType extends RuntimeException {
public BadContentType(String type) {

View File

@ -13,12 +13,12 @@ public class NoSecuritySSL {
new X509TrustManager() {
@Override
public void checkClientTrusted(java.security.cert.X509Certificate[] chain,
String authType) throws CertificateException {
String authType) {
}
@Override
public void checkServerTrusted(java.security.cert.X509Certificate[] chain,
String authType) throws CertificateException {
String authType) {
}
@Override

View File

@ -19,8 +19,8 @@ import java.util.concurrent.LinkedBlockingQueue;
public class CrawlerDiscoverWorker implements Worker {
private HttpRedirectResolver redirectResolver;
private TaskProvider taskProvider;
private final HttpRedirectResolver redirectResolver;
private final TaskProvider taskProvider;
private final DomainCrawlerFactory domainCrawlerFactory;
private final IpBlockList blockList;
private final LinkedBlockingQueue<WorkerResults> queue;

View File

@ -17,13 +17,12 @@ import java.nio.charset.StandardCharsets;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ExecutionException;
@Singleton
public class GeoIpBlocklist {
private final TreeMap<Long, GeoIpBlocklist.IpRange> ranges = new TreeMap<>();
private Set<String> blacklist = Set.of("CN", "HK");
private Set<String> graylist = Set.of("RU", "TW", "IN", "ZA", "SG", "UA");
private final Set<String> blacklist = Set.of("CN", "HK");
private final Set<String> graylist = Set.of("RU", "TW", "IN", "ZA", "SG", "UA");
private final Cache<String, String> countryCache = CacheBuilder.newBuilder().maximumSize(100_000).build();
@ -34,7 +33,7 @@ public class GeoIpBlocklist {
public final long from;
public final long to;
public final String country;
};
}
public GeoIpBlocklist() throws IOException, CsvValidationException {
var resource = Objects.requireNonNull(ClassLoader.getSystemResourceAsStream("IP2LOCATION-LITE-DB1.CSV"),

View File

@ -9,7 +9,7 @@ import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
public class InetAddressCache {
private static Cache<EdgeDomain, InetAddress> cache = CacheBuilder.newBuilder().maximumSize(10_000_000).expireAfterAccess(1, TimeUnit.HOURS).build();
private static final Cache<EdgeDomain, InetAddress> cache = CacheBuilder.newBuilder().maximumSize(10_000_000).expireAfterAccess(1, TimeUnit.HOURS).build();
public static InetAddress getAddress(EdgeDomain domain) throws Throwable {
try {
return cache.get(domain, ()->{

View File

@ -1,7 +1,5 @@
package nu.marginalia.wmsa.edge.crawler.worker;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import nu.marginalia.wmsa.edge.model.EdgeDomain;
@ -12,14 +10,10 @@ import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.function.Predicate;
import java.util.regex.Pattern;
@ -55,7 +49,7 @@ public class IpBlockList {
logger.info("Loaded {} CIDRs", badSubnets.size());
}
Predicate<String> numericPattern = Pattern.compile(".*\\d{4}.*").asMatchPredicate();
final Predicate<String> numericPattern = Pattern.compile(".*\\d{4}.*").asMatchPredicate();
public boolean isAllowed(EdgeDomain domain) {
if (domain.domain.endsWith(".cn")) {

View File

@ -4,10 +4,10 @@ import io.prometheus.client.Counter;
import io.prometheus.client.Histogram;
public interface Worker extends Runnable {
static Histogram wmsa_edge_crawler_thread_run_times =
Histogram wmsa_edge_crawler_thread_run_times =
Histogram.build("wmsa_edge_crawler_thread_run_times", "Run Times")
.register();
static Counter wmsa_edge_crawler_idle_worker =
Counter wmsa_edge_crawler_idle_worker =
Counter.build("wmsa_edge_crawler_idle_worke", "No work, no money")
.register();

View File

@ -97,7 +97,7 @@ public class CrawlJobExtractorMain {
}
}
private record DomainWithId(String domainName, int id) {};
private record DomainWithId(String domainName, int id) {}
private Stream<CrawlingSpecification> extractDomains() {
List<DomainWithId> ids = new ArrayList<>(100_000);

View File

@ -88,7 +88,7 @@ public class CrawlJobExtractorPageRankMain {
}
}
private record DomainWithId(String domainName, int id) {};
private record DomainWithId(String domainName, int id) {}
public CrawlJobExtractorPageRankMain(HikariDataSource ds) throws SQLException {
blacklist = new EdgeDomainBlacklistImpl(ds);

View File

@ -1,7 +1,6 @@
package nu.marginalia.wmsa.edge.crawling;
import com.github.luben.zstd.ZstdInputStream;
import com.github.luben.zstd.ZstdOutputStream;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import nu.marginalia.wmsa.edge.crawling.model.CrawledDomain;
@ -9,7 +8,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
public class CrawledDomainReader {

View File

@ -42,7 +42,7 @@ public class HttpFetcher {
private final Logger logger = LoggerFactory.getLogger(getClass());
private final String userAgent;
private final int maxFetchSize = 1024*512;
private Cookies cookies = new Cookies();
private final Cookies cookies = new Cookies();
private static final SimpleRobotRulesParser robotsParser = new SimpleRobotRulesParser();
@ -59,8 +59,8 @@ public class HttpFetcher {
public enum FetchResultState {
OK,
REDIRECT,
ERROR;
};
ERROR
}
@AllArgsConstructor @ToString
public static class FetchResult {
@ -70,7 +70,7 @@ public class HttpFetcher {
public boolean ok() {
return state == FetchResultState.OK;
}
};
}
@SneakyThrows
private OkHttpClient createClient(Dispatcher dispatcher) {

View File

@ -5,7 +5,6 @@ import com.google.common.cache.CacheBuilder;
import com.google.common.util.concurrent.UncheckedExecutionException;
import com.google.inject.Inject;
import com.zaxxer.hikari.HikariDataSource;
import edu.stanford.nlp.parser.lexparser.Edge;
import lombok.SneakyThrows;
import nu.marginalia.wmsa.edge.crawler.domain.UrlsCache;
import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist;
@ -21,7 +20,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Types;
import java.util.*;
@ -529,7 +527,7 @@ public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao {
return results;
}
static Pattern badChars = Pattern.compile("[';\\\\]");
static final Pattern badChars = Pattern.compile("[';\\\\]");
private String saneString(String s) {
return "\'"+badChars.matcher(s).replaceAll("?")+"\'";
}

View File

@ -34,7 +34,7 @@ public class EdgeDataStoreTaskTuner {
private volatile double indexQualityLimit = -2.;
private final HikariDataSource dataSource;
private Logger logger = LoggerFactory.getLogger(getClass());
private final Logger logger = LoggerFactory.getLogger(getClass());
@Inject
public EdgeDataStoreTaskTuner(HikariDataSource dataSource) {

View File

@ -7,8 +7,6 @@ import gnu.trove.set.hash.TIntHashSet;
import io.prometheus.client.Counter;
import io.reactivex.rxjava3.schedulers.Schedulers;
import lombok.SneakyThrows;
import nu.marginalia.wmsa.edge.model.EdgeDomain;
import nu.marginalia.wmsa.edge.model.EdgeId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -13,10 +13,10 @@ import spark.Spark;
import java.io.IOException;
public class DatingMain extends MainClass {
DatingService service;
final DatingService service;
@Inject
public DatingMain(DatingService service) throws IOException {
public DatingMain(DatingService service) {
this.service = service;
}

View File

@ -12,10 +12,10 @@ import nu.marginalia.wmsa.configuration.server.Initialization;
import java.io.IOException;
public class EdgeDirectorMain extends MainClass {
private EdgeDirectorService service;
private final EdgeDirectorService service;
@Inject
public EdgeDirectorMain(EdgeDirectorService service) throws IOException {
public EdgeDirectorMain(EdgeDirectorService service) {
this.service = service;
}

View File

@ -29,7 +29,7 @@ public class EdgeDirectorService extends Service {
= Histogram.build("wmsa_edge_director_request_time", "DB Request Time")
.labelNames("request")
.register();
private Logger logger = LoggerFactory.getLogger(getClass());
private final Logger logger = LoggerFactory.getLogger(getClass());
@Inject
public EdgeDirectorService(@Named("service-host") String ip,

Some files were not shown because too many files have changed in this diff Show More