diff --git a/build.gradle b/build.gradle index 9eb4ef2f..8b76efda 100644 --- a/build.gradle +++ b/build.gradle @@ -3,6 +3,10 @@ plugins { id("org.jetbrains.gradle.plugin.idea-ext") version "1.0" id "io.freefair.lombok" version "8.3" id "me.champeau.jmh" version "0.6.6" + + // This is a workaround for a bug in the Jib plugin that causes it to stall randomly + // https://github.com/GoogleContainerTools/jib/issues/3347 + id 'com.google.cloud.tools.jib' version '3.4.0' apply(false) } group 'marginalia' @@ -13,6 +17,14 @@ compileTestJava.options.encoding = "UTF-8" subprojects.forEach {it -> // Enable preview features for the entire project + + if (it.path.contains(':code:')) { + sourceSets.main.java.srcDirs += file('java') + sourceSets.main.resources.srcDirs += file('resources') + sourceSets.test.java.srcDirs += file('test') + sourceSets.test.resources.srcDirs += file('test-resources') + } + it.tasks.withType(JavaCompile).configureEach { options.compilerArgs += ['--enable-preview'] } @@ -28,32 +40,12 @@ subprojects.forEach {it -> preserveFileTimestamps = false reproducibleFileOrder = true } + } - -allprojects { - apply plugin: 'java' - apply plugin: 'io.freefair.lombok' - - dependencies { - implementation libs.lombok - testImplementation libs.lombok - annotationProcessor libs.lombok - - lombok libs.lombok // prevent plugin from downgrading the version to something incompatible with '19 - } - - test { - maxHeapSize = "8G" - useJUnitPlatform() - } - - tasks.register('fastTests', Test) { - maxHeapSize = "8G" - useJUnitPlatform { - excludeTags "slow" - } - } - +ext { + dockerImageBase='container-registry.oracle.com/graalvm/jdk:21@sha256:1fd33d4d4eba3a9e1a41a728e39ea217178d257694eea1214fec68d2ed4d3d9b' + dockerImageTag='latest' + dockerImageRegistry='marginalia' } idea { @@ -77,3 +69,4 @@ java { languageVersion.set(JavaLanguageVersion.of(21)) } } + diff --git a/code/api/assistant-api/readme.md b/code/api/assistant-api/readme.md deleted file mode 100644 index 7a35a592..00000000 --- a/code/api/assistant-api/readme.md +++ /dev/null @@ -1,8 +0,0 @@ -# Assistant API - -Client and models for talking to the [assistant-service](../../services-core/assistant-service), -implemented with the base client from [service-client](../../common/service-client). - -## Central Classes - -* [AssistantClient](src/main/java/nu/marginalia/assistant/client/AssistantClient.java) \ No newline at end of file diff --git a/code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/AssistantClient.java b/code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/AssistantClient.java deleted file mode 100644 index 2e6b209d..00000000 --- a/code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/AssistantClient.java +++ /dev/null @@ -1,95 +0,0 @@ -package nu.marginalia.assistant.client; - -import com.google.gson.reflect.TypeToken; -import com.google.inject.Inject; -import com.google.inject.Singleton; -import io.reactivex.rxjava3.core.Observable; -import nu.marginalia.assistant.client.model.DictionaryResponse; -import nu.marginalia.assistant.client.model.DomainInformation; -import nu.marginalia.assistant.client.model.SimilarDomain; -import nu.marginalia.client.AbstractDynamicClient; -import nu.marginalia.client.exception.RouteNotConfiguredException; -import nu.marginalia.model.gson.GsonFactory; -import nu.marginalia.service.descriptor.ServiceDescriptors; -import nu.marginalia.service.id.ServiceId; -import nu.marginalia.client.Context; - -import java.net.URLEncoder; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.List; - -@Singleton -public class AssistantClient extends AbstractDynamicClient { - - @Inject - public AssistantClient(ServiceDescriptors descriptors) { - super(descriptors.forId(ServiceId.Assistant), GsonFactory::get); - } - - public Observable dictionaryLookup(Context ctx, String word) { - try { - return super.get(ctx, 0, "/dictionary/" + URLEncoder.encode(word, StandardCharsets.UTF_8), DictionaryResponse.class); - } - catch (RouteNotConfiguredException ex) { - return Observable.empty(); - } - } - - @SuppressWarnings("unchecked") - public Observable> spellCheck(Context ctx, String word) { - try { - return (Observable>) (Object) super.get(ctx, 0, "/spell-check/" + URLEncoder.encode(word, StandardCharsets.UTF_8), List.class); - } - catch (RouteNotConfiguredException ex) { - return Observable.empty(); - } - } - public Observable unitConversion(Context ctx, String value, String from, String to) { - try { - return super.get(ctx, 0, "/unit-conversion?value=" + value + "&from=" + from + "&to=" + to); - } - catch (RouteNotConfiguredException ex) { - return Observable.empty(); - } - } - - public Observable evalMath(Context ctx, String expression) { - try { - return super.get(ctx, 0, "/eval-expression?value=" + URLEncoder.encode(expression, StandardCharsets.UTF_8)); - } - catch (RouteNotConfiguredException ex) { - return Observable.empty(); - } - } - - public Observable> similarDomains(Context ctx, int domainId, int count) { - try { - return super.get(ctx, 0, STR."/domain/\{domainId}/similar?count=\{count}", new TypeToken>() {}) - .onErrorResumeWith(Observable.just(new ArrayList<>())); - } - catch (RouteNotConfiguredException ex) { - return Observable.empty(); - } - } - - public Observable> linkedDomains(Context ctx, int domainId, int count) { - try { - return super.get(ctx, 0, STR."/domain/\{domainId}/linking?count=\{count}", new TypeToken>() {}) - .onErrorResumeWith(Observable.just(new ArrayList<>())); - } - catch (RouteNotConfiguredException ex) { - return Observable.empty(); - } - } - - public Observable domainInformation(Context ctx, int domainId) { - try { - return super.get(ctx, 0, STR."/domain/\{domainId}/info", DomainInformation.class) - .onErrorResumeWith(Observable.just(new DomainInformation())); - } - catch (RouteNotConfiguredException ex) { - return Observable.empty(); - } - } -} diff --git a/code/api/executor-api/src/main/java/nu/marginalia/executor/client/ExecutorClient.java b/code/api/executor-api/src/main/java/nu/marginalia/executor/client/ExecutorClient.java deleted file mode 100644 index 658f6b37..00000000 --- a/code/api/executor-api/src/main/java/nu/marginalia/executor/client/ExecutorClient.java +++ /dev/null @@ -1,307 +0,0 @@ -package nu.marginalia.executor.client; - -import com.google.inject.Inject; -import com.google.inject.Singleton; -import nu.marginalia.client.AbstractDynamicClient; -import nu.marginalia.client.Context; -import nu.marginalia.client.grpc.GrpcChannelPool; -import nu.marginalia.executor.api.*; -import nu.marginalia.executor.api.ExecutorApiGrpc.ExecutorApiBlockingStub; -import nu.marginalia.executor.model.ActorRunState; -import nu.marginalia.executor.model.ActorRunStates; -import nu.marginalia.executor.model.transfer.TransferItem; -import nu.marginalia.executor.model.transfer.TransferSpec; -import nu.marginalia.executor.storage.FileStorageContent; -import nu.marginalia.executor.storage.FileStorageFile; -import nu.marginalia.executor.upload.UploadDirContents; -import nu.marginalia.executor.upload.UploadDirItem; -import nu.marginalia.model.gson.GsonFactory; -import nu.marginalia.nodecfg.NodeConfigurationService; -import nu.marginalia.nodecfg.model.NodeConfiguration; -import nu.marginalia.service.descriptor.ServiceDescriptors; -import nu.marginalia.service.id.ServiceId; -import nu.marginalia.storage.model.FileStorageId; - -import io.grpc.ManagedChannel; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.OutputStream; -import java.net.URLEncoder; -import java.nio.charset.StandardCharsets; -import java.nio.file.Path; -import java.util.List; -import java.util.concurrent.TimeUnit; - -@Singleton -public class ExecutorClient extends AbstractDynamicClient { - private final GrpcChannelPool channelPool; - private static final Logger logger = LoggerFactory.getLogger(ExecutorClient.class); - - @Inject - public ExecutorClient(ServiceDescriptors descriptors, NodeConfigurationService nodeConfigurationService) { - super(descriptors.forId(ServiceId.Executor), GsonFactory::get); - - channelPool = new GrpcChannelPool<>(ServiceId.Executor) { - @Override - public ExecutorApiBlockingStub createStub(ManagedChannel channel) { - return ExecutorApiGrpc.newBlockingStub(channel); - } - - @Override - public List getEligibleNodes() { - return nodeConfigurationService.getAll() - .stream() - .map(NodeConfiguration::node) - .toList(); - } - }; - } - - public void startFsm(int node, String actorName) { - channelPool.apiForNode(node).startFsm( - RpcFsmName.newBuilder() - .setActorName(actorName) - .build() - ); - } - - public void stopFsm(int node, String actorName) { - channelPool.apiForNode(node).stopFsm( - RpcFsmName.newBuilder() - .setActorName(actorName) - .build() - ); - } - - public void stopProcess(int node, String id) { - channelPool.apiForNode(node).stopProcess( - RpcProcessId.newBuilder() - .setProcessId(id) - .build() - ); - } - - public void triggerCrawl(int node, FileStorageId fid) { - channelPool.apiForNode(node).triggerCrawl( - RpcFileStorageId.newBuilder() - .setFileStorageId(fid.id()) - .build() - ); - } - - public void triggerRecrawl(int node, FileStorageId fid) { - channelPool.apiForNode(node).triggerRecrawl( - RpcFileStorageId.newBuilder() - .setFileStorageId(fid.id()) - .build() - ); - } - - public void triggerConvert(int node, FileStorageId fid) { - channelPool.apiForNode(node).triggerConvert( - RpcFileStorageId.newBuilder() - .setFileStorageId(fid.id()) - .build() - ); - } - - public void triggerConvertAndLoad(int node, FileStorageId fid) { - channelPool.apiForNode(node).triggerConvertAndLoad( - RpcFileStorageId.newBuilder() - .setFileStorageId(fid.id()) - .build() - ); - } - - public void loadProcessedData(int node, List ids) { - channelPool.apiForNode(node).loadProcessedData( - RpcFileStorageIds.newBuilder() - .addAllFileStorageIds(ids.stream().map(FileStorageId::id).toList()) - .build() - ); - } - - public void calculateAdjacencies(int node) { - channelPool.apiForNode(node).calculateAdjacencies(Empty.getDefaultInstance()); - } - - public void sideloadEncyclopedia(int node, Path sourcePath, String baseUrl) { - channelPool.apiForNode(node).sideloadEncyclopedia( - RpcSideloadEncyclopedia.newBuilder() - .setBaseUrl(baseUrl) - .setSourcePath(sourcePath.toString()) - .build() - ); - } - - public void sideloadDirtree(int node, Path sourcePath) { - channelPool.apiForNode(node).sideloadDirtree( - RpcSideloadDirtree.newBuilder() - .setSourcePath(sourcePath.toString()) - .build() - ); - } - public void sideloadReddit(int node, Path sourcePath) { - channelPool.apiForNode(node).sideloadReddit( - RpcSideloadReddit.newBuilder() - .setSourcePath(sourcePath.toString()) - .build() - ); - } - public void sideloadWarc(int node, Path sourcePath) { - channelPool.apiForNode(node).sideloadWarc( - RpcSideloadWarc.newBuilder() - .setSourcePath(sourcePath.toString()) - .build() - ); - } - - public void sideloadStackexchange(int node, Path sourcePath) { - channelPool.apiForNode(node).sideloadStackexchange( - RpcSideloadStackexchange.newBuilder() - .setSourcePath(sourcePath.toString()) - .build() - ); - } - - public void createCrawlSpecFromDownload(int node, String description, String url) { - channelPool.apiForNode(node).createCrawlSpecFromDownload( - RpcCrawlSpecFromDownload.newBuilder() - .setDescription(description) - .setUrl(url) - .build() - ); - } - - public void exportAtags(int node, FileStorageId fid) { - channelPool.apiForNode(node).exportAtags( - RpcFileStorageId.newBuilder() - .setFileStorageId(fid.id()) - .build() - ); - } - public void exportSampleData(int node, FileStorageId fid, int size, String name) { - channelPool.apiForNode(node).exportSampleData( - RpcExportSampleData.newBuilder() - .setFileStorageId(fid.id()) - .setSize(size) - .setName(name) - .build() - ); - } - - public void exportRssFeeds(int node, FileStorageId fid) { - channelPool.apiForNode(node).exportRssFeeds( - RpcFileStorageId.newBuilder() - .setFileStorageId(fid.id()) - .build() - ); - } - public void exportTermFrequencies(int node, FileStorageId fid) { - channelPool.apiForNode(node).exportTermFrequencies( - RpcFileStorageId.newBuilder() - .setFileStorageId(fid.id()) - .build() - ); - } - - public void downloadSampleData(int node, String sampleSet) { - channelPool.apiForNode(node).downloadSampleData( - RpcDownloadSampleData.newBuilder() - .setSampleSet(sampleSet) - .build() - ); - } - - public void exportData(int node) { - channelPool.apiForNode(node).exportData(Empty.getDefaultInstance()); - } - - public void restoreBackup(int node, FileStorageId fid) { - channelPool.apiForNode(node).restoreBackup( - RpcFileStorageId.newBuilder() - .setFileStorageId(fid.id()) - .build() - ); - } - - public ActorRunStates getActorStates(int node) { - try { - var rs = channelPool.apiForNode(node).getActorStates(Empty.getDefaultInstance()); - var states = rs.getActorRunStatesList().stream() - .map(r -> new ActorRunState( - r.getActorName(), - r.getState(), - r.getActorDescription(), - r.getStateDescription(), - r.getTerminal(), - r.getCanStart()) - ) - .toList(); - - return new ActorRunStates(node, states); - } - catch (Exception ex) { - logger.warn("Failed to get actor states", ex); - - // Return an empty list of states to avoid breaking the UI when a node is down - return new ActorRunStates(node, List.of()); - } - } - - public UploadDirContents listSideloadDir(int node) { - try { - var rs = channelPool.apiForNode(node).listSideloadDir(Empty.getDefaultInstance()); - var items = rs.getEntriesList().stream() - .map(i -> new UploadDirItem(i.getName(), i.getLastModifiedTime(), i.getIsDirectory(), i.getSize())) - .toList(); - return new UploadDirContents(rs.getPath(), items); - } - catch (Exception ex) { - logger.warn("Failed to list sideload dir", ex); - - // Return an empty list of items to avoid breaking the UI when a node is down - return new UploadDirContents("", List.of()); - } - } - - public FileStorageContent listFileStorage(int node, FileStorageId fileId) { - try { - var rs = channelPool.apiForNode(node).listFileStorage( - RpcFileStorageId.newBuilder() - .setFileStorageId(fileId.id()) - .build() - ); - - return new FileStorageContent(rs.getEntriesList().stream() - .map(e -> new FileStorageFile(e.getName(), e.getSize(), e.getLastModifiedTime())) - .toList()); - } - catch (Exception ex) { - logger.warn("Failed to list file storage", ex); - - // Return an empty list of items to avoid breaking the UI when a node is down - return new FileStorageContent(List.of()); - } - } - - public void transferFile(Context context, int node, FileStorageId fileId, String path, OutputStream destOutputStream) { - String endpoint = "/transfer/file/%d?path=%s".formatted(fileId.id(), URLEncoder.encode(path, StandardCharsets.UTF_8)); - - get(context, node, endpoint, - destOutputStream) - .blockingSubscribe(); - } - - public TransferSpec getTransferSpec(Context context, int node, int count) { - return get(context, node, "/transfer/spec?count="+count, TransferSpec.class) - .timeout(30, TimeUnit.MINUTES) - .blockingFirst(); - } - - public void yieldDomain(Context context, int node, TransferItem item) { - post(context, node, "/transfer/yield", item).blockingSubscribe(); - } - -} diff --git a/code/api/executor-api/src/main/java/nu/marginalia/executor/model/transfer/TransferItem.java b/code/api/executor-api/src/main/java/nu/marginalia/executor/model/transfer/TransferItem.java deleted file mode 100644 index 17be3dd8..00000000 --- a/code/api/executor-api/src/main/java/nu/marginalia/executor/model/transfer/TransferItem.java +++ /dev/null @@ -1,9 +0,0 @@ -package nu.marginalia.executor.model.transfer; - -import nu.marginalia.storage.model.FileStorageId; - -public record TransferItem(String domainName, - int domainId, - FileStorageId fileStorageId, - String path) { -} diff --git a/code/api/executor-api/src/main/java/nu/marginalia/executor/model/transfer/TransferSpec.java b/code/api/executor-api/src/main/java/nu/marginalia/executor/model/transfer/TransferSpec.java deleted file mode 100644 index 8048a411..00000000 --- a/code/api/executor-api/src/main/java/nu/marginalia/executor/model/transfer/TransferSpec.java +++ /dev/null @@ -1,13 +0,0 @@ -package nu.marginalia.executor.model.transfer; - -import java.util.List; - -public record TransferSpec(List items) { - public TransferSpec() { - this(List.of()); - } - - public int size() { - return items.size(); - } -} diff --git a/code/api/index-api/readme.md b/code/api/index-api/readme.md deleted file mode 100644 index 796debf5..00000000 --- a/code/api/index-api/readme.md +++ /dev/null @@ -1,8 +0,0 @@ -# Index API - -Client and models for talking to the [index-service](../../services-core/index-service), -implemented with the base client from [service-client](../../common/service-client). - -## Central Classes - -* [IndexClient](src/main/java/nu/marginalia/index/client/IndexClient.java) \ No newline at end of file diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/IndexClient.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/IndexClient.java deleted file mode 100644 index 7c334b8e..00000000 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/IndexClient.java +++ /dev/null @@ -1,95 +0,0 @@ -package nu.marginalia.index.client; - -import com.google.inject.Inject; -import com.google.inject.Singleton; -import com.google.inject.name.Named; -import io.prometheus.client.Summary; -import io.reactivex.rxjava3.core.Observable; -import io.reactivex.rxjava3.schedulers.Schedulers; -import nu.marginalia.client.AbstractDynamicClient; -import nu.marginalia.client.Context; -import nu.marginalia.client.exception.RouteNotConfiguredException; -import nu.marginalia.index.client.model.query.SearchSpecification; -import nu.marginalia.index.client.model.results.SearchResultSet; -import nu.marginalia.model.gson.GsonFactory; -import nu.marginalia.mq.MessageQueueFactory; -import nu.marginalia.mq.outbox.MqOutbox; -import nu.marginalia.service.descriptor.ServiceDescriptors; -import nu.marginalia.service.id.ServiceId; - -import java.util.List; - -import javax.annotation.CheckReturnValue; -import java.util.UUID; - -@Singleton -public class IndexClient extends AbstractDynamicClient { - - private static final Summary wmsa_search_index_api_time = Summary.build().name("wmsa_search_index_api_time").help("-").register(); - private final MessageQueueFactory messageQueueFactory; - - MqOutbox outbox; - - @Inject - public IndexClient(ServiceDescriptors descriptors, - MessageQueueFactory messageQueueFactory, - @Named("wmsa-system-node") Integer nodeId) - { - super(descriptors.forId(ServiceId.Index), GsonFactory::get); - this.messageQueueFactory = messageQueueFactory; - - String inboxName = ServiceId.Index.serviceName; - String outboxName = "pp:"+System.getProperty("service-name", UUID.randomUUID().toString()); - outbox = messageQueueFactory.createOutbox(inboxName, nodeId, outboxName, nodeId, UUID.randomUUID()); - setTimeout(30); - } - - public MqOutbox outbox() { - return outbox; - } - - @CheckReturnValue - public SearchResultSet query(Context ctx, int node, SearchSpecification specs) { - return wmsa_search_index_api_time.time( - () -> this.postGet(ctx, node,"/search/", specs, SearchResultSet.class).blockingFirst() - ); - } - - @CheckReturnValue - public SearchResultSet query(Context ctx, List nodes, SearchSpecification specs) { - return Observable.fromIterable(nodes) - .flatMap(node -> { - try { - return this - .postGet(ctx, node, "/search/", specs, SearchResultSet.class).onErrorReturn(t -> new SearchResultSet()) - .observeOn(Schedulers.io()); - } catch (RouteNotConfiguredException ex) { - return Observable.empty(); - } - }) - .reduce(SearchResultSet::combine) - .blockingGet(); - } - - - @CheckReturnValue - public Observable isBlocked(Context ctx, int node) { - return super.get(ctx, node, "/is-blocked", Boolean.class); - } - - public long triggerRepartition(int node) throws Exception { - return messageQueueFactory.sendSingleShotRequest( - ServiceId.Index.withNode(node), - IndexMqEndpoints.INDEX_REPARTITION, - null - ); - } - - public long triggerRerank(int node) throws Exception { - return messageQueueFactory.sendSingleShotRequest( - ServiceId.Index.withNode(node), - IndexMqEndpoints.INDEX_RERANK, - null - ); - } -} diff --git a/code/api/query-api/src/main/java/nu/marginalia/query/client/QueryClient.java b/code/api/query-api/src/main/java/nu/marginalia/query/client/QueryClient.java deleted file mode 100644 index 6c6e63a4..00000000 --- a/code/api/query-api/src/main/java/nu/marginalia/query/client/QueryClient.java +++ /dev/null @@ -1,204 +0,0 @@ -package nu.marginalia.query.client; - -import com.google.inject.Inject; -import com.google.inject.Singleton; -import gnu.trove.list.array.TIntArrayList; -import io.grpc.ManagedChannel; -import io.grpc.ManagedChannelBuilder; -import io.prometheus.client.Summary; -import nu.marginalia.client.AbstractDynamicClient; -import nu.marginalia.client.Context; -import nu.marginalia.index.api.Empty; -import nu.marginalia.index.api.IndexDomainLinksApiGrpc; -import nu.marginalia.index.api.QueryApiGrpc; -import nu.marginalia.index.api.RpcDomainId; -import nu.marginalia.index.client.model.query.SearchSpecification; -import nu.marginalia.index.client.model.results.SearchResultSet; -import nu.marginalia.model.gson.GsonFactory; -import nu.marginalia.query.QueryProtobufCodec; -import nu.marginalia.query.model.QueryParams; -import nu.marginalia.query.model.QueryResponse; -import nu.marginalia.service.descriptor.ServiceDescriptor; -import nu.marginalia.service.descriptor.ServiceDescriptors; -import nu.marginalia.service.id.ServiceId; -import org.roaringbitmap.PeekableCharIterator; -import org.roaringbitmap.longlong.PeekableLongIterator; -import org.roaringbitmap.longlong.Roaring64Bitmap; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.annotation.CheckReturnValue; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; - -@Singleton -public class QueryClient extends AbstractDynamicClient { - - private static final Summary wmsa_qs_api_delegate_time = Summary.build() - .name("wmsa_qs_api_delegate_time") - .help("query service delegate time") - .register(); - private static final Summary wmsa_qs_api_search_time = Summary.build() - .name("wmsa_qs_api_search_time") - .help("query service search time") - .register(); - - private final Map channels = new ConcurrentHashMap<>(); - private final Map queryIndexApis = new ConcurrentHashMap<>(); - private final Map domainLinkApis = new ConcurrentHashMap<>(); - - record ServiceAndNode(String service, int node) { - public String getHostName() { - return service; - } - } - - private ManagedChannel getChannel(ServiceAndNode serviceAndNode) { - return channels.computeIfAbsent(serviceAndNode, - san -> ManagedChannelBuilder - .forAddress(serviceAndNode.getHostName(), 81) - .usePlaintext() - .build()); - } - - public QueryApiGrpc.QueryApiBlockingStub queryApi(int node) { - return queryIndexApis.computeIfAbsent(new ServiceAndNode("query-service", node), n -> - QueryApiGrpc.newBlockingStub( - getChannel(n) - ) - ); - } - - public IndexDomainLinksApiGrpc.IndexDomainLinksApiBlockingStub domainApi(int node) { - return domainLinkApis.computeIfAbsent(new ServiceAndNode("query-service", node), n -> - IndexDomainLinksApiGrpc.newBlockingStub( - getChannel(n) - ) - ); - } - - private final Logger logger = LoggerFactory.getLogger(getClass()); - - @Inject - public QueryClient(ServiceDescriptors descriptors) { - - super(descriptors.forId(ServiceId.Query), GsonFactory::get); - } - public QueryClient() { - super(new ServiceDescriptor(ServiceId.Query, "query-service"), GsonFactory::get); - } - - /** Delegate an Index API style query directly to the index service */ - @CheckReturnValue - public SearchResultSet delegate(Context ctx, SearchSpecification specs) { - return wmsa_qs_api_delegate_time.time( - () -> this.postGet(ctx, 0, "/delegate/", specs, SearchResultSet.class).blockingFirst() - ); - } - - @CheckReturnValue - public QueryResponse search(Context ctx, QueryParams params) { - return wmsa_qs_api_search_time.time( - () -> QueryProtobufCodec.convertQueryResponse(queryApi(0).query(QueryProtobufCodec.convertQueryParams(params))) - ); - } - - public AllLinks getAllDomainLinks() { - AllLinks links = new AllLinks(); - - domainApi(0).getAllLinks(Empty.newBuilder().build()).forEachRemaining(pairs -> { - for (int i = 0; i < pairs.getDestIdsCount(); i++) { - links.add(pairs.getSourceIds(i), pairs.getDestIds(i)); - } - }); - - return links; - } - - public List getLinksToDomain(int domainId) { - try { - return domainApi(0).getLinksToDomain(RpcDomainId - .newBuilder() - .setDomainId(domainId) - .build()) - .getDomainIdList(); - } - catch (Exception e) { - logger.error("API Exception", e); - return List.of(); - } - } - - public List getLinksFromDomain(int domainId) { - try { - return domainApi(0).getLinksFromDomain(RpcDomainId - .newBuilder() - .setDomainId(domainId) - .build()) - .getDomainIdList(); - } - catch (Exception e) { - logger.error("API Exception", e); - return List.of(); - } - } - - public int countLinksToDomain(int domainId) { - try { - return domainApi(0).countLinksToDomain(RpcDomainId - .newBuilder() - .setDomainId(domainId) - .build()) - .getIdCount(); - } - catch (Exception e) { - logger.error("API Exception", e); - return 0; - } - } - - public int countLinksFromDomain(int domainId) { - try { - return domainApi(0).countLinksFromDomain(RpcDomainId - .newBuilder() - .setDomainId(domainId) - .build()) - .getIdCount(); - } - catch (Exception e) { - logger.error("API Exception", e); - return 0; - } - } - public static class AllLinks { - private final Roaring64Bitmap sourceToDest = new Roaring64Bitmap(); - - public void add(int source, int dest) { - sourceToDest.add(Integer.toUnsignedLong(source) << 32 | Integer.toUnsignedLong(dest)); - } - - public Iterator iterator() { - return new Iterator(); - } - - public class Iterator { - private final PeekableLongIterator base = sourceToDest.getLongIterator(); - long val = Long.MIN_VALUE; - - public boolean advance() { - if (base.hasNext()) { - val = base.next(); - return true; - } - return false; - } - public int source() { - return (int) (val >>> 32); - } - public int dest() { - return (int) (val & 0xFFFF_FFFFL); - } - } - } -} diff --git a/code/api/readme.md b/code/api/readme.md deleted file mode 100644 index 31000815..00000000 --- a/code/api/readme.md +++ /dev/null @@ -1,23 +0,0 @@ -# Clients - -## Core Services - -* [assistant-api](assistant-api/) -* [query-api](query-api/) -* [index-api](index-api/) - -These are clients for the [core services](../services-core/), along with what models -are necessary for speaking to them. They each implement the abstract client classes from -[service-client](../common/service-client). - -All that is necessary is to `@Inject` them into the constructor and then -requests can be sent. - -**Note:** If you are looking for the public API, it's handled by the api service in [services-application/api-service](../services-application/api-service). - -## MQ-API Process API - -[process-mqapi](process-mqapi/) defines requests and inboxes for the message queue based API used -for interacting with processes. - -See [libraries/message-queue](../libraries/message-queue) and [services-application/control-service](../services-core/control-service). \ No newline at end of file diff --git a/code/common/config/build.gradle b/code/common/config/build.gradle index d8eb085b..66a984d4 100644 --- a/code/common/config/build.gradle +++ b/code/common/config/build.gradle @@ -11,9 +11,10 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:service-discovery') - implementation project(':code:common:service-client') implementation project(':code:common:db') implementation project(':code:common:model') diff --git a/code/common/config/src/main/java/nu/marginalia/IndexLocations.java b/code/common/config/java/nu/marginalia/IndexLocations.java similarity index 100% rename from code/common/config/src/main/java/nu/marginalia/IndexLocations.java rename to code/common/config/java/nu/marginalia/IndexLocations.java diff --git a/code/common/config/src/main/java/nu/marginalia/LanguageModels.java b/code/common/config/java/nu/marginalia/LanguageModels.java similarity index 100% rename from code/common/config/src/main/java/nu/marginalia/LanguageModels.java rename to code/common/config/java/nu/marginalia/LanguageModels.java diff --git a/code/common/config/src/main/java/nu/marginalia/UserAgent.java b/code/common/config/java/nu/marginalia/UserAgent.java similarity index 100% rename from code/common/config/src/main/java/nu/marginalia/UserAgent.java rename to code/common/config/java/nu/marginalia/UserAgent.java diff --git a/code/common/config/src/main/java/nu/marginalia/WebsiteUrl.java b/code/common/config/java/nu/marginalia/WebsiteUrl.java similarity index 100% rename from code/common/config/src/main/java/nu/marginalia/WebsiteUrl.java rename to code/common/config/java/nu/marginalia/WebsiteUrl.java diff --git a/code/common/config/src/main/java/nu/marginalia/WmsaHome.java b/code/common/config/java/nu/marginalia/WmsaHome.java similarity index 62% rename from code/common/config/src/main/java/nu/marginalia/WmsaHome.java rename to code/common/config/java/nu/marginalia/WmsaHome.java index 00270102..122f6211 100644 --- a/code/common/config/src/main/java/nu/marginalia/WmsaHome.java +++ b/code/common/config/java/nu/marginalia/WmsaHome.java @@ -3,11 +3,10 @@ package nu.marginalia; import nu.marginalia.service.ServiceHomeNotConfiguredException; -import java.io.FileNotFoundException; -import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.Objects; import java.util.Optional; import java.util.stream.Stream; @@ -28,14 +27,37 @@ public class WmsaHome { } public static Path getHomePath() { - var retStr = Optional.ofNullable(System.getenv("WMSA_HOME")).orElseGet(WmsaHome::findDefaultHomePath); + String[] possibleLocations = new String[] { + System.getenv("WMSA_HOME"), + System.getProperty("system.homePath"), + "/var/lib/wmsa", + "/wmsa" + }; - var ret = Path.of(retStr); + Optional retStr = Stream.of(possibleLocations) + .filter(Objects::nonNull) + .map(Path::of) + .filter(Files::isDirectory) + .map(Path::toString) + .findFirst(); - if (!Files.isDirectory(ret)) { - throw new ServiceHomeNotConfiguredException("Could not find $WMSA_HOME, either set environment variable or ensure " + retStr + " exists"); + if (retStr.isEmpty()) { + // Check if we are running in a test environment + + var testRoot = Stream.iterate(Paths.get("").toAbsolutePath(), f -> f != null && Files.exists(f), Path::getParent) + .filter(p -> Files.exists(p.resolve("run/env"))) + .filter(p -> Files.exists(p.resolve("run/setup.sh"))) + .map(p -> p.resolve("run")) + .findAny(); + + return testRoot.orElseThrow(() -> new ServiceHomeNotConfiguredException(""" + Could not find $WMSA_HOME, either set environment + variable, the 'system.homePath' property, + or ensure either /wmssa or /var/lib/wmsa exists + """)); } + var ret = Path.of(retStr.get()); if (!Files.isDirectory(ret.resolve("model"))) { throw new ServiceHomeNotConfiguredException("You need to run 'run/setup.sh' to download models to run/ before this will work!"); @@ -44,22 +66,6 @@ public class WmsaHome { return ret; } - private static String findDefaultHomePath() { - - // Assume this is a local developer and not a production system, since it would have WMSA_HOME set. - // Developers probably have a "run/" somewhere upstream from cwd. - // - - return Stream.iterate(Paths.get("").toAbsolutePath(), f -> f != null && Files.exists(f), Path::getParent) - .filter(p -> Files.exists(p.resolve("run/env"))) - .filter(p -> Files.exists(p.resolve("run/setup.sh"))) - .map(p -> p.resolve("run")) - .findAny() - .orElse(Path.of("/var/lib/wmsa")) - .toString(); - } - - public static Path getAdsDefinition() { return getHomePath().resolve("data").resolve("adblock.txt"); } diff --git a/code/common/config/src/main/java/nu/marginalia/nodecfg/NodeConfigurationService.java b/code/common/config/java/nu/marginalia/nodecfg/NodeConfigurationService.java similarity index 100% rename from code/common/config/src/main/java/nu/marginalia/nodecfg/NodeConfigurationService.java rename to code/common/config/java/nu/marginalia/nodecfg/NodeConfigurationService.java diff --git a/code/common/config/src/main/java/nu/marginalia/nodecfg/model/NodeConfiguration.java b/code/common/config/java/nu/marginalia/nodecfg/model/NodeConfiguration.java similarity index 100% rename from code/common/config/src/main/java/nu/marginalia/nodecfg/model/NodeConfiguration.java rename to code/common/config/java/nu/marginalia/nodecfg/model/NodeConfiguration.java diff --git a/code/common/config/src/main/java/nu/marginalia/storage/FileStorageManifest.java b/code/common/config/java/nu/marginalia/storage/FileStorageManifest.java similarity index 100% rename from code/common/config/src/main/java/nu/marginalia/storage/FileStorageManifest.java rename to code/common/config/java/nu/marginalia/storage/FileStorageManifest.java diff --git a/code/common/config/src/main/java/nu/marginalia/storage/FileStorageService.java b/code/common/config/java/nu/marginalia/storage/FileStorageService.java similarity index 100% rename from code/common/config/src/main/java/nu/marginalia/storage/FileStorageService.java rename to code/common/config/java/nu/marginalia/storage/FileStorageService.java diff --git a/code/common/config/src/main/java/nu/marginalia/storage/model/FileStorage.java b/code/common/config/java/nu/marginalia/storage/model/FileStorage.java similarity index 100% rename from code/common/config/src/main/java/nu/marginalia/storage/model/FileStorage.java rename to code/common/config/java/nu/marginalia/storage/model/FileStorage.java diff --git a/code/common/config/src/main/java/nu/marginalia/storage/model/FileStorageBase.java b/code/common/config/java/nu/marginalia/storage/model/FileStorageBase.java similarity index 100% rename from code/common/config/src/main/java/nu/marginalia/storage/model/FileStorageBase.java rename to code/common/config/java/nu/marginalia/storage/model/FileStorageBase.java diff --git a/code/common/config/src/main/java/nu/marginalia/storage/model/FileStorageBaseId.java b/code/common/config/java/nu/marginalia/storage/model/FileStorageBaseId.java similarity index 100% rename from code/common/config/src/main/java/nu/marginalia/storage/model/FileStorageBaseId.java rename to code/common/config/java/nu/marginalia/storage/model/FileStorageBaseId.java diff --git a/code/common/config/src/main/java/nu/marginalia/storage/model/FileStorageBaseType.java b/code/common/config/java/nu/marginalia/storage/model/FileStorageBaseType.java similarity index 100% rename from code/common/config/src/main/java/nu/marginalia/storage/model/FileStorageBaseType.java rename to code/common/config/java/nu/marginalia/storage/model/FileStorageBaseType.java diff --git a/code/common/config/src/main/java/nu/marginalia/storage/model/FileStorageId.java b/code/common/config/java/nu/marginalia/storage/model/FileStorageId.java similarity index 100% rename from code/common/config/src/main/java/nu/marginalia/storage/model/FileStorageId.java rename to code/common/config/java/nu/marginalia/storage/model/FileStorageId.java diff --git a/code/common/config/src/main/java/nu/marginalia/storage/model/FileStorageState.java b/code/common/config/java/nu/marginalia/storage/model/FileStorageState.java similarity index 100% rename from code/common/config/src/main/java/nu/marginalia/storage/model/FileStorageState.java rename to code/common/config/java/nu/marginalia/storage/model/FileStorageState.java diff --git a/code/common/config/src/main/java/nu/marginalia/storage/model/FileStorageType.java b/code/common/config/java/nu/marginalia/storage/model/FileStorageType.java similarity index 100% rename from code/common/config/src/main/java/nu/marginalia/storage/model/FileStorageType.java rename to code/common/config/java/nu/marginalia/storage/model/FileStorageType.java diff --git a/code/common/config/src/test/java/nu/marginalia/nodecfg/NodeConfigurationServiceTest.java b/code/common/config/test/nu/marginalia/nodecfg/NodeConfigurationServiceTest.java similarity index 92% rename from code/common/config/src/test/java/nu/marginalia/nodecfg/NodeConfigurationServiceTest.java rename to code/common/config/test/nu/marginalia/nodecfg/NodeConfigurationServiceTest.java index 13ea555d..63a18c93 100644 --- a/code/common/config/src/test/java/nu/marginalia/nodecfg/NodeConfigurationServiceTest.java +++ b/code/common/config/test/nu/marginalia/nodecfg/NodeConfigurationServiceTest.java @@ -2,7 +2,6 @@ package nu.marginalia.nodecfg; import com.zaxxer.hikari.HikariConfig; import com.zaxxer.hikari.HikariDataSource; -import nu.marginalia.storage.FileStorageService; import nu.marginalia.test.TestMigrationLoader; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Tag; @@ -13,12 +12,7 @@ import org.testcontainers.containers.MariaDBContainer; import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; -import java.io.IOException; -import java.nio.file.Path; import java.sql.SQLException; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; import static org.junit.jupiter.api.Assertions.*; diff --git a/code/common/config/src/test/java/nu/marginalia/storage/FileStorageServiceTest.java b/code/common/config/test/nu/marginalia/storage/FileStorageServiceTest.java similarity index 100% rename from code/common/config/src/test/java/nu/marginalia/storage/FileStorageServiceTest.java rename to code/common/config/test/nu/marginalia/storage/FileStorageServiceTest.java diff --git a/code/common/db/build.gradle b/code/common/db/build.gradle index 320a58af..85ab7dba 100644 --- a/code/common/db/build.gradle +++ b/code/common/db/build.gradle @@ -26,6 +26,8 @@ configurations { flywayMigration.extendsFrom(implementation) } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:model') @@ -40,7 +42,6 @@ dependencies { implementation libs.trove - implementation libs.rxjava implementation libs.bundles.mariadb flywayMigration 'org.flywaydb:flyway-mysql:10.0.1' diff --git a/code/common/db/src/main/java/nu/marginalia/db/DbDomainQueries.java b/code/common/db/java/nu/marginalia/db/DbDomainQueries.java similarity index 100% rename from code/common/db/src/main/java/nu/marginalia/db/DbDomainQueries.java rename to code/common/db/java/nu/marginalia/db/DbDomainQueries.java diff --git a/code/common/db/src/main/java/nu/marginalia/db/DbDomainStatsExportMultitool.java b/code/common/db/java/nu/marginalia/db/DbDomainStatsExportMultitool.java similarity index 100% rename from code/common/db/src/main/java/nu/marginalia/db/DbDomainStatsExportMultitool.java rename to code/common/db/java/nu/marginalia/db/DbDomainStatsExportMultitool.java diff --git a/code/common/db/src/main/java/nu/marginalia/db/DomainBlacklist.java b/code/common/db/java/nu/marginalia/db/DomainBlacklist.java similarity index 100% rename from code/common/db/src/main/java/nu/marginalia/db/DomainBlacklist.java rename to code/common/db/java/nu/marginalia/db/DomainBlacklist.java diff --git a/code/common/db/src/main/java/nu/marginalia/db/DomainBlacklistImpl.java b/code/common/db/java/nu/marginalia/db/DomainBlacklistImpl.java similarity index 100% rename from code/common/db/src/main/java/nu/marginalia/db/DomainBlacklistImpl.java rename to code/common/db/java/nu/marginalia/db/DomainBlacklistImpl.java diff --git a/code/common/db/src/main/java/nu/marginalia/db/DomainRankingSetsService.java b/code/common/db/java/nu/marginalia/db/DomainRankingSetsService.java similarity index 99% rename from code/common/db/src/main/java/nu/marginalia/db/DomainRankingSetsService.java rename to code/common/db/java/nu/marginalia/db/DomainRankingSetsService.java index 37ca2a58..50706b77 100644 --- a/code/common/db/src/main/java/nu/marginalia/db/DomainRankingSetsService.java +++ b/code/common/db/java/nu/marginalia/db/DomainRankingSetsService.java @@ -115,7 +115,6 @@ public class DomainRankingSetsService { } } - /** Defines a domain ranking set, parameters for the ranking algorithms. * * @param name Key and name of the set diff --git a/code/common/db/src/main/java/nu/marginalia/db/DomainTypes.java b/code/common/db/java/nu/marginalia/db/DomainTypes.java similarity index 99% rename from code/common/db/src/main/java/nu/marginalia/db/DomainTypes.java rename to code/common/db/java/nu/marginalia/db/DomainTypes.java index c4405aab..9a1fe265 100644 --- a/code/common/db/src/main/java/nu/marginalia/db/DomainTypes.java +++ b/code/common/db/java/nu/marginalia/db/DomainTypes.java @@ -24,7 +24,7 @@ public class DomainTypes { BLOG, CRAWL, TEST - }; + } private final Logger logger = LoggerFactory.getLogger(DomainTypes.class); diff --git a/code/common/db/readme.md b/code/common/db/readme.md index ae683741..07b6191c 100644 --- a/code/common/db/readme.md +++ b/code/common/db/readme.md @@ -17,14 +17,14 @@ It's well documented and these are probably the only four tasks you'll ever need If you are not running the system via docker, you need to provide alternative connection details than the defaults (TODO: how?). -The migration files are in [resources/db/migration](src/main/resources/db/migration). The file name convention +The migration files are in [resources/db/migration](resources/db/migration). The file name convention incorporates the project's cal-ver versioning; and are applied in lexicographical order. VYY_MM_v_nnn__description.sql ## Central Paths -* [migrations](src/main/resources/db/migration) - Flyway migrations +* [migrations](resources/db/migration) - Flyway migrations ## See Also diff --git a/code/common/db/src/main/resources/db/migration/V23_06_0_000__base.sql b/code/common/db/resources/db/migration/V23_06_0_000__base.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_06_0_000__base.sql rename to code/common/db/resources/db/migration/V23_06_0_000__base.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_06_0_001__blacklist.sql b/code/common/db/resources/db/migration/V23_06_0_001__blacklist.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_06_0_001__blacklist.sql rename to code/common/db/resources/db/migration/V23_06_0_001__blacklist.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_06_0_002__dictionary.sql b/code/common/db/resources/db/migration/V23_06_0_002__dictionary.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_06_0_002__dictionary.sql rename to code/common/db/resources/db/migration/V23_06_0_002__dictionary.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_06_0_003__crawl-queue.sql b/code/common/db/resources/db/migration/V23_06_0_003__crawl-queue.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_06_0_003__crawl-queue.sql rename to code/common/db/resources/db/migration/V23_06_0_003__crawl-queue.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_06_0_004__screenshot.sql b/code/common/db/resources/db/migration/V23_06_0_004__screenshot.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_06_0_004__screenshot.sql rename to code/common/db/resources/db/migration/V23_06_0_004__screenshot.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_06_0_005__domain_complaint.sql b/code/common/db/resources/db/migration/V23_06_0_005__domain_complaint.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_06_0_005__domain_complaint.sql rename to code/common/db/resources/db/migration/V23_06_0_005__domain_complaint.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_06_0_006__api_key.sql b/code/common/db/resources/db/migration/V23_06_0_006__api_key.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_06_0_006__api_key.sql rename to code/common/db/resources/db/migration/V23_06_0_006__api_key.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_06_0_007__neighbors.sql b/code/common/db/resources/db/migration/V23_06_0_007__neighbors.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_06_0_007__neighbors.sql rename to code/common/db/resources/db/migration/V23_06_0_007__neighbors.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_06_0_008__random_domains.sql b/code/common/db/resources/db/migration/V23_06_0_008__random_domains.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_06_0_008__random_domains.sql rename to code/common/db/resources/db/migration/V23_06_0_008__random_domains.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_06_0_009__news_feed.sql b/code/common/db/resources/db/migration/V23_06_0_009__news_feed.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_06_0_009__news_feed.sql rename to code/common/db/resources/db/migration/V23_06_0_009__news_feed.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_07_0_001__domain_type.sql b/code/common/db/resources/db/migration/V23_07_0_001__domain_type.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_07_0_001__domain_type.sql rename to code/common/db/resources/db/migration/V23_07_0_001__domain_type.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_07_0_002__service_status.sql b/code/common/db/resources/db/migration/V23_07_0_002__service_status.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_07_0_002__service_status.sql rename to code/common/db/resources/db/migration/V23_07_0_002__service_status.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_07_0_003__message_queue.sql b/code/common/db/resources/db/migration/V23_07_0_003__message_queue.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_07_0_003__message_queue.sql rename to code/common/db/resources/db/migration/V23_07_0_003__message_queue.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_07_0_004__file_storage.sql b/code/common/db/resources/db/migration/V23_07_0_004__file_storage.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_07_0_004__file_storage.sql rename to code/common/db/resources/db/migration/V23_07_0_004__file_storage.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_07_0_005__file_storage_default_values.sql b/code/common/db/resources/db/migration/V23_07_0_005__file_storage_default_values.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_07_0_005__file_storage_default_values.sql rename to code/common/db/resources/db/migration/V23_07_0_005__file_storage_default_values.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_07_0_006__message_queue_default_jobs.sql b/code/common/db/resources/db/migration/V23_07_0_006__message_queue_default_jobs.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_07_0_006__message_queue_default_jobs.sql rename to code/common/db/resources/db/migration/V23_07_0_006__message_queue_default_jobs.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_07_0_007__task_status.sql b/code/common/db/resources/db/migration/V23_07_0_007__task_status.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_07_0_007__task_status.sql rename to code/common/db/resources/db/migration/V23_07_0_007__task_status.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_07_0_008__events_index.sql b/code/common/db/resources/db/migration/V23_07_0_008__events_index.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_07_0_008__events_index.sql rename to code/common/db/resources/db/migration/V23_07_0_008__events_index.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_09_0_000__filestorage_livedb.sql b/code/common/db/resources/db/migration/V23_09_0_000__filestorage_livedb.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_09_0_000__filestorage_livedb.sql rename to code/common/db/resources/db/migration/V23_09_0_000__filestorage_livedb.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_09_1_000__drop_ecurl.sql b/code/common/db/resources/db/migration/V23_09_1_000__drop_ecurl.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_09_1_000__drop_ecurl.sql rename to code/common/db/resources/db/migration/V23_09_1_000__drop_ecurl.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_09_2_000__filestorage_backup.sql b/code/common/db/resources/db/migration/V23_09_2_000__filestorage_backup.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_09_2_000__filestorage_backup.sql rename to code/common/db/resources/db/migration/V23_09_2_000__filestorage_backup.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_09_2_001__filestorage_no_lexicon.sql b/code/common/db/resources/db/migration/V23_09_2_001__filestorage_no_lexicon.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_09_2_001__filestorage_no_lexicon.sql rename to code/common/db/resources/db/migration/V23_09_2_001__filestorage_no_lexicon.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_11_0_000__file_storage_node.sql b/code/common/db/resources/db/migration/V23_11_0_000__file_storage_node.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_11_0_000__file_storage_node.sql rename to code/common/db/resources/db/migration/V23_11_0_000__file_storage_node.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_11_0_001__heartbeat_node.sql b/code/common/db/resources/db/migration/V23_11_0_001__heartbeat_node.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_11_0_001__heartbeat_node.sql rename to code/common/db/resources/db/migration/V23_11_0_001__heartbeat_node.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_11_0_002__file_storage_state.sql b/code/common/db/resources/db/migration/V23_11_0_002__file_storage_state.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_11_0_002__file_storage_state.sql rename to code/common/db/resources/db/migration/V23_11_0_002__file_storage_state.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_11_0_003__node_configuration.sql b/code/common/db/resources/db/migration/V23_11_0_003__node_configuration.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_11_0_003__node_configuration.sql rename to code/common/db/resources/db/migration/V23_11_0_003__node_configuration.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_11_0_004__file_storage_base_type.sql b/code/common/db/resources/db/migration/V23_11_0_004__file_storage_base_type.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_11_0_004__file_storage_base_type.sql rename to code/common/db/resources/db/migration/V23_11_0_004__file_storage_base_type.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_11_0_005__clean_message_queue.sql b/code/common/db/resources/db/migration/V23_11_0_005__clean_message_queue.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_11_0_005__clean_message_queue.sql rename to code/common/db/resources/db/migration/V23_11_0_005__clean_message_queue.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_11_0_006__clean_stores.sql b/code/common/db/resources/db/migration/V23_11_0_006__clean_stores.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_11_0_006__clean_stores.sql rename to code/common/db/resources/db/migration/V23_11_0_006__clean_stores.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_11_0_007__domain_node_affinity.sql b/code/common/db/resources/db/migration/V23_11_0_007__domain_node_affinity.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_11_0_007__domain_node_affinity.sql rename to code/common/db/resources/db/migration/V23_11_0_007__domain_node_affinity.sql diff --git a/code/common/db/src/main/resources/db/migration/V23_11_0_008__purge_procedure.sql b/code/common/db/resources/db/migration/V23_11_0_008__purge_procedure.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V23_11_0_008__purge_procedure.sql rename to code/common/db/resources/db/migration/V23_11_0_008__purge_procedure.sql diff --git a/code/common/db/src/main/resources/db/migration/V24_01_0_001__node_config__keep_warc.sql b/code/common/db/resources/db/migration/V24_01_0_001__node_config__keep_warc.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V24_01_0_001__node_config__keep_warc.sql rename to code/common/db/resources/db/migration/V24_01_0_001__node_config__keep_warc.sql diff --git a/code/common/db/src/main/resources/db/migration/V24_01_0_002__domain_set.sql b/code/common/db/resources/db/migration/V24_01_0_002__domain_set.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V24_01_0_002__domain_set.sql rename to code/common/db/resources/db/migration/V24_01_0_002__domain_set.sql diff --git a/code/common/db/src/main/resources/db/migration/V24_01_0_003__mqaudit.sql b/code/common/db/resources/db/migration/V24_01_0_003__mqaudit.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V24_01_0_003__mqaudit.sql rename to code/common/db/resources/db/migration/V24_01_0_003__mqaudit.sql diff --git a/code/common/db/src/main/resources/db/migration/V24_02_0_000__drop_domain_links.sql b/code/common/db/resources/db/migration/V24_02_0_000__drop_domain_links.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V24_02_0_000__drop_domain_links.sql rename to code/common/db/resources/db/migration/V24_02_0_000__drop_domain_links.sql diff --git a/code/common/db/src/main/resources/db/migration/V24_02_0_001__drop_ranking_set_algo.sql b/code/common/db/resources/db/migration/V24_02_0_001__drop_ranking_set_algo.sql similarity index 100% rename from code/common/db/src/main/resources/db/migration/V24_02_0_001__drop_ranking_set_algo.sql rename to code/common/db/resources/db/migration/V24_02_0_001__drop_ranking_set_algo.sql diff --git a/code/common/db/src/test/java/nu/marginalia/db/DomainRankingSetsServiceTest.java b/code/common/db/test/nu/marginalia/db/DomainRankingSetsServiceTest.java similarity index 100% rename from code/common/db/src/test/java/nu/marginalia/db/DomainRankingSetsServiceTest.java rename to code/common/db/test/nu/marginalia/db/DomainRankingSetsServiceTest.java diff --git a/code/common/db/src/test/java/nu/marginalia/db/DomainTypesTest.java b/code/common/db/test/nu/marginalia/db/DomainTypesTest.java similarity index 100% rename from code/common/db/src/test/java/nu/marginalia/db/DomainTypesTest.java rename to code/common/db/test/nu/marginalia/db/DomainTypesTest.java diff --git a/code/common/linkdb/build.gradle b/code/common/linkdb/build.gradle index 41ec618d..811aa577 100644 --- a/code/common/linkdb/build.gradle +++ b/code/common/linkdb/build.gradle @@ -14,6 +14,8 @@ configurations { flywayMigration.extendsFrom(implementation) } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:model') implementation project(':code:common:service') diff --git a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/LinkdbFileNames.java b/code/common/linkdb/java/nu/marginalia/linkdb/LinkdbFileNames.java similarity index 100% rename from code/common/linkdb/src/main/java/nu/marginalia/linkdb/LinkdbFileNames.java rename to code/common/linkdb/java/nu/marginalia/linkdb/LinkdbFileNames.java diff --git a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/docs/DocumentDbReader.java b/code/common/linkdb/java/nu/marginalia/linkdb/docs/DocumentDbReader.java similarity index 100% rename from code/common/linkdb/src/main/java/nu/marginalia/linkdb/docs/DocumentDbReader.java rename to code/common/linkdb/java/nu/marginalia/linkdb/docs/DocumentDbReader.java diff --git a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/docs/DocumentDbWriter.java b/code/common/linkdb/java/nu/marginalia/linkdb/docs/DocumentDbWriter.java similarity index 100% rename from code/common/linkdb/src/main/java/nu/marginalia/linkdb/docs/DocumentDbWriter.java rename to code/common/linkdb/java/nu/marginalia/linkdb/docs/DocumentDbWriter.java diff --git a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/model/DocdbUrlDetail.java b/code/common/linkdb/java/nu/marginalia/linkdb/model/DocdbUrlDetail.java similarity index 100% rename from code/common/linkdb/src/main/java/nu/marginalia/linkdb/model/DocdbUrlDetail.java rename to code/common/linkdb/java/nu/marginalia/linkdb/model/DocdbUrlDetail.java diff --git a/code/common/linkdb/readme.md b/code/common/linkdb/readme.md index ab86b931..9b3a82a0 100644 --- a/code/common/linkdb/readme.md +++ b/code/common/linkdb/readme.md @@ -1,15 +1,3 @@ -## Domain Link Database - -The domain link database contains information about links -between domains. It is a static in-memory database loaded -from a binary file. - -* [DomainLinkDb](src/main/java/nu/marginalia/linkdb/DomainLinkDb.java) -* * [FileDomainLinkDb](src/main/java/nu/marginalia/linkdb/FileDomainLinkDb.java) -* * [SqlDomainLinkDb](src/main/java/nu/marginalia/linkdb/SqlDomainLinkDb.java) -* [DomainLinkDbWriter](src/main/java/nu/marginalia/linkdb/DomainLinkDbWriter.java) -* [DomainLinkDbLoader](src/main/java/nu/marginalia/linkdb/DomainLinkDbLoader.java) - ## Document Database The document database contains information about links, @@ -21,10 +9,10 @@ is not in the MariaDB database is that this would make updates to this information take effect in production immediately, even before the information was searchable. -* [DocumentLinkDbWriter](src/main/java/nu/marginalia/linkdb/DocumentDbWriter.java) -* [DocumentLinkDbLoader](src/main/java/nu/marginalia/linkdb/DocumentDbReader.java) +* [DocumentLinkDbWriter](java/nu/marginalia/linkdb/docs/DocumentDbWriter.java) +* [DocumentLinkDbLoader](java/nu/marginalia/linkdb/docs/DocumentDbReader.java) ## See Also -These databases are constructed by the [loading-process](../../processes/loading-process), and consumed by the [index-service](../../services-core/index-service). \ No newline at end of file +The database is constructed by the [loading-process](../../processes/loading-process), and consumed by the [index-service](../../services-core/index-service). \ No newline at end of file diff --git a/code/common/linkdb/src/main/resources/db/docdb-document.sql b/code/common/linkdb/resources/db/docdb-document.sql similarity index 100% rename from code/common/linkdb/src/main/resources/db/docdb-document.sql rename to code/common/linkdb/resources/db/docdb-document.sql diff --git a/code/common/linkdb/src/test/java/nu/marginalia/linkdb/DocumentDbWriterTest.java b/code/common/linkdb/test/nu/marginalia/linkdb/DocumentDbWriterTest.java similarity index 100% rename from code/common/linkdb/src/test/java/nu/marginalia/linkdb/DocumentDbWriterTest.java rename to code/common/linkdb/test/nu/marginalia/linkdb/DocumentDbWriterTest.java diff --git a/code/common/model/build.gradle b/code/common/model/build.gradle index b737f02d..80d9f247 100644 --- a/code/common/model/build.gradle +++ b/code/common/model/build.gradle @@ -10,9 +10,10 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:service-discovery') - implementation project(':code:common:service-client') implementation project(':code:libraries:big-string') implementation project(':code:libraries:braille-block-punch-cards') @@ -28,7 +29,6 @@ dependencies { implementation libs.trove implementation libs.fastutil - implementation libs.rxjava implementation libs.bundles.mariadb testImplementation libs.bundles.slf4j.test diff --git a/code/common/model/src/main/java/nu/marginalia/model/EdgeDomain.java b/code/common/model/java/nu/marginalia/model/EdgeDomain.java similarity index 98% rename from code/common/model/src/main/java/nu/marginalia/model/EdgeDomain.java rename to code/common/model/java/nu/marginalia/model/EdgeDomain.java index 0686a9ce..86c17824 100644 --- a/code/common/model/src/main/java/nu/marginalia/model/EdgeDomain.java +++ b/code/common/model/java/nu/marginalia/model/EdgeDomain.java @@ -127,7 +127,7 @@ public class EdgeDomain implements Serializable { ret.append(topDomain, 0, cutPoint); } - if (!"".equals(subDomain) && !"www".equals(subDomain)) { + if (!subDomain.isEmpty() && !"www".equals(subDomain)) { ret.append(":"); ret.append(subDomain); } diff --git a/code/common/model/src/main/java/nu/marginalia/model/EdgeUrl.java b/code/common/model/java/nu/marginalia/model/EdgeUrl.java similarity index 95% rename from code/common/model/src/main/java/nu/marginalia/model/EdgeUrl.java rename to code/common/model/java/nu/marginalia/model/EdgeUrl.java index c09ed550..c86611fb 100644 --- a/code/common/model/src/main/java/nu/marginalia/model/EdgeUrl.java +++ b/code/common/model/java/nu/marginalia/model/EdgeUrl.java @@ -32,7 +32,16 @@ public class EdgeUrl implements Serializable { } public EdgeUrl(String url) throws URISyntaxException { - this(new URI(urlencodeFixer(url))); + this(parseURI(url)); + } + + private static URI parseURI(String url) throws URISyntaxException { + try { + return new URI(urlencodeFixer(url)); + } + catch (URISyntaxException ex) { + throw new URISyntaxException(STR."Failed to parse URI '\{url}'", ex.getMessage()); + } } public static Optional parse(@Nullable String url) { diff --git a/code/common/model/src/main/java/nu/marginalia/model/crawl/DomainIndexingState.java b/code/common/model/java/nu/marginalia/model/crawl/DomainIndexingState.java similarity index 100% rename from code/common/model/src/main/java/nu/marginalia/model/crawl/DomainIndexingState.java rename to code/common/model/java/nu/marginalia/model/crawl/DomainIndexingState.java diff --git a/code/common/model/src/main/java/nu/marginalia/model/crawl/HtmlFeature.java b/code/common/model/java/nu/marginalia/model/crawl/HtmlFeature.java similarity index 100% rename from code/common/model/src/main/java/nu/marginalia/model/crawl/HtmlFeature.java rename to code/common/model/java/nu/marginalia/model/crawl/HtmlFeature.java diff --git a/code/common/model/src/main/java/nu/marginalia/model/crawl/PubDate.java b/code/common/model/java/nu/marginalia/model/crawl/PubDate.java similarity index 100% rename from code/common/model/src/main/java/nu/marginalia/model/crawl/PubDate.java rename to code/common/model/java/nu/marginalia/model/crawl/PubDate.java diff --git a/code/common/model/src/main/java/nu/marginalia/model/crawl/UrlIndexingState.java b/code/common/model/java/nu/marginalia/model/crawl/UrlIndexingState.java similarity index 89% rename from code/common/model/src/main/java/nu/marginalia/model/crawl/UrlIndexingState.java rename to code/common/model/java/nu/marginalia/model/crawl/UrlIndexingState.java index f4312480..25cc6265 100644 --- a/code/common/model/src/main/java/nu/marginalia/model/crawl/UrlIndexingState.java +++ b/code/common/model/java/nu/marginalia/model/crawl/UrlIndexingState.java @@ -5,6 +5,6 @@ public enum UrlIndexingState { OK, REDIRECT, DEAD, - DISQUALIFIED; + DISQUALIFIED } diff --git a/code/common/model/src/main/java/nu/marginalia/model/gson/GsonFactory.java b/code/common/model/java/nu/marginalia/model/gson/GsonFactory.java similarity index 100% rename from code/common/model/src/main/java/nu/marginalia/model/gson/GsonFactory.java rename to code/common/model/java/nu/marginalia/model/gson/GsonFactory.java diff --git a/code/common/model/src/main/java/nu/marginalia/model/html/HtmlStandard.java b/code/common/model/java/nu/marginalia/model/html/HtmlStandard.java similarity index 100% rename from code/common/model/src/main/java/nu/marginalia/model/html/HtmlStandard.java rename to code/common/model/java/nu/marginalia/model/html/HtmlStandard.java diff --git a/code/common/model/src/main/java/nu/marginalia/model/id/UrlIdCodec.java b/code/common/model/java/nu/marginalia/model/id/UrlIdCodec.java similarity index 100% rename from code/common/model/src/main/java/nu/marginalia/model/id/UrlIdCodec.java rename to code/common/model/java/nu/marginalia/model/id/UrlIdCodec.java diff --git a/code/common/model/src/main/java/nu/marginalia/model/idx/DocumentFlags.java b/code/common/model/java/nu/marginalia/model/idx/DocumentFlags.java similarity index 100% rename from code/common/model/src/main/java/nu/marginalia/model/idx/DocumentFlags.java rename to code/common/model/java/nu/marginalia/model/idx/DocumentFlags.java diff --git a/code/common/model/src/main/java/nu/marginalia/model/idx/DocumentMetadata.java b/code/common/model/java/nu/marginalia/model/idx/DocumentMetadata.java similarity index 100% rename from code/common/model/src/main/java/nu/marginalia/model/idx/DocumentMetadata.java rename to code/common/model/java/nu/marginalia/model/idx/DocumentMetadata.java diff --git a/code/common/model/src/main/java/nu/marginalia/model/idx/WordFlags.java b/code/common/model/java/nu/marginalia/model/idx/WordFlags.java similarity index 100% rename from code/common/model/src/main/java/nu/marginalia/model/idx/WordFlags.java rename to code/common/model/java/nu/marginalia/model/idx/WordFlags.java diff --git a/code/common/model/src/main/java/nu/marginalia/model/idx/WordMetadata.java b/code/common/model/java/nu/marginalia/model/idx/WordMetadata.java similarity index 100% rename from code/common/model/src/main/java/nu/marginalia/model/idx/WordMetadata.java rename to code/common/model/java/nu/marginalia/model/idx/WordMetadata.java diff --git a/code/common/model/src/main/java/nu/marginalia/util/QueryParams.java b/code/common/model/java/nu/marginalia/util/QueryParams.java similarity index 100% rename from code/common/model/src/main/java/nu/marginalia/util/QueryParams.java rename to code/common/model/java/nu/marginalia/util/QueryParams.java diff --git a/code/common/model/src/main/java/nu/marginalia/util/StringPool.java b/code/common/model/java/nu/marginalia/util/StringPool.java similarity index 100% rename from code/common/model/src/main/java/nu/marginalia/util/StringPool.java rename to code/common/model/java/nu/marginalia/util/StringPool.java diff --git a/code/common/model/readme.md b/code/common/model/readme.md index 84337753..d07bb4fa 100644 --- a/code/common/model/readme.md +++ b/code/common/model/readme.md @@ -4,9 +4,9 @@ This package contains common models to the search engine ## Central Classes -* [EdgeDomain](src/main/java/nu/marginalia/model/EdgeDomain.java) -* [EdgeUrl](src/main/java/nu/marginalia/model/EdgeUrl.java) -* [DocumentMetadata](src/main/java/nu/marginalia/model/idx/DocumentMetadata.java) -* [DocumentFlags](src/main/java/nu/marginalia/model/idx/DocumentFlags.java) -* [WordMetadata](src/main/java/nu/marginalia/model/idx/WordMetadata.java) -* [WordFlags](src/main/java/nu/marginalia/model/idx/WordFlags.java) \ No newline at end of file +* [EdgeDomain](java/nu/marginalia/model/EdgeDomain.java) +* [EdgeUrl](java/nu/marginalia/model/EdgeUrl.java) +* [DocumentMetadata](java/nu/marginalia/model/idx/DocumentMetadata.java) +* [DocumentFlags](java/nu/marginalia/model/idx/DocumentFlags.java) +* [WordMetadata](java/nu/marginalia/model/idx/WordMetadata.java) +* [WordFlags](java/nu/marginalia/model/idx/WordFlags.java) \ No newline at end of file diff --git a/code/common/model/src/test/java/nu/marginalia/model/DocumentMetadataTest.java b/code/common/model/test/nu/marginalia/model/DocumentMetadataTest.java similarity index 100% rename from code/common/model/src/test/java/nu/marginalia/model/DocumentMetadataTest.java rename to code/common/model/test/nu/marginalia/model/DocumentMetadataTest.java diff --git a/code/common/model/src/test/java/nu/marginalia/model/EdgeDomainTest.java b/code/common/model/test/nu/marginalia/model/EdgeDomainTest.java similarity index 100% rename from code/common/model/src/test/java/nu/marginalia/model/EdgeDomainTest.java rename to code/common/model/test/nu/marginalia/model/EdgeDomainTest.java diff --git a/code/common/model/src/test/java/nu/marginalia/model/EdgeUrlTest.java b/code/common/model/test/nu/marginalia/model/EdgeUrlTest.java similarity index 100% rename from code/common/model/src/test/java/nu/marginalia/model/EdgeUrlTest.java rename to code/common/model/test/nu/marginalia/model/EdgeUrlTest.java diff --git a/code/common/model/src/test/java/nu/marginalia/model/WordMetadataTest.java b/code/common/model/test/nu/marginalia/model/WordMetadataTest.java similarity index 100% rename from code/common/model/src/test/java/nu/marginalia/model/WordMetadataTest.java rename to code/common/model/test/nu/marginalia/model/WordMetadataTest.java diff --git a/code/common/model/src/test/java/nu/marginalia/model/id/UrlIdCodecTest.java b/code/common/model/test/nu/marginalia/model/id/UrlIdCodecTest.java similarity index 100% rename from code/common/model/src/test/java/nu/marginalia/model/id/UrlIdCodecTest.java rename to code/common/model/test/nu/marginalia/model/id/UrlIdCodecTest.java diff --git a/code/common/process/build.gradle b/code/common/process/build.gradle index fb68b32f..7fcff1ab 100644 --- a/code/common/process/build.gradle +++ b/code/common/process/build.gradle @@ -10,6 +10,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation libs.notnull diff --git a/code/common/process/src/main/java/nu/marginalia/ProcessConfiguration.java b/code/common/process/java/nu/marginalia/ProcessConfiguration.java similarity index 100% rename from code/common/process/src/main/java/nu/marginalia/ProcessConfiguration.java rename to code/common/process/java/nu/marginalia/ProcessConfiguration.java diff --git a/code/common/process/src/main/java/nu/marginalia/ProcessConfigurationModule.java b/code/common/process/java/nu/marginalia/ProcessConfigurationModule.java similarity index 89% rename from code/common/process/src/main/java/nu/marginalia/ProcessConfigurationModule.java rename to code/common/process/java/nu/marginalia/ProcessConfigurationModule.java index 3d0023c0..cf87cdf8 100644 --- a/code/common/process/src/main/java/nu/marginalia/ProcessConfigurationModule.java +++ b/code/common/process/java/nu/marginalia/ProcessConfigurationModule.java @@ -1,12 +1,8 @@ package nu.marginalia; import com.google.inject.AbstractModule; -import com.google.inject.Provides; -import com.google.inject.Singleton; -import com.google.inject.name.Named; import com.google.inject.name.Names; -import java.util.Objects; import java.util.UUID; public class ProcessConfigurationModule extends AbstractModule { diff --git a/code/common/process/src/main/java/nu/marginalia/process/control/FakeProcessHeartbeat.java b/code/common/process/java/nu/marginalia/process/control/FakeProcessHeartbeat.java similarity index 100% rename from code/common/process/src/main/java/nu/marginalia/process/control/FakeProcessHeartbeat.java rename to code/common/process/java/nu/marginalia/process/control/FakeProcessHeartbeat.java diff --git a/code/common/process/src/main/java/nu/marginalia/process/control/ProcessAdHocTaskHeartbeat.java b/code/common/process/java/nu/marginalia/process/control/ProcessAdHocTaskHeartbeat.java similarity index 100% rename from code/common/process/src/main/java/nu/marginalia/process/control/ProcessAdHocTaskHeartbeat.java rename to code/common/process/java/nu/marginalia/process/control/ProcessAdHocTaskHeartbeat.java diff --git a/code/common/process/src/main/java/nu/marginalia/process/control/ProcessAdHocTaskHeartbeatImpl.java b/code/common/process/java/nu/marginalia/process/control/ProcessAdHocTaskHeartbeatImpl.java similarity index 100% rename from code/common/process/src/main/java/nu/marginalia/process/control/ProcessAdHocTaskHeartbeatImpl.java rename to code/common/process/java/nu/marginalia/process/control/ProcessAdHocTaskHeartbeatImpl.java diff --git a/code/common/process/src/main/java/nu/marginalia/process/control/ProcessHeartbeat.java b/code/common/process/java/nu/marginalia/process/control/ProcessHeartbeat.java similarity index 100% rename from code/common/process/src/main/java/nu/marginalia/process/control/ProcessHeartbeat.java rename to code/common/process/java/nu/marginalia/process/control/ProcessHeartbeat.java diff --git a/code/common/process/src/main/java/nu/marginalia/process/control/ProcessHeartbeatImpl.java b/code/common/process/java/nu/marginalia/process/control/ProcessHeartbeatImpl.java similarity index 100% rename from code/common/process/src/main/java/nu/marginalia/process/control/ProcessHeartbeatImpl.java rename to code/common/process/java/nu/marginalia/process/control/ProcessHeartbeatImpl.java diff --git a/code/common/process/src/main/java/nu/marginalia/process/control/ProcessTaskHeartbeat.java b/code/common/process/java/nu/marginalia/process/control/ProcessTaskHeartbeat.java similarity index 100% rename from code/common/process/src/main/java/nu/marginalia/process/control/ProcessTaskHeartbeat.java rename to code/common/process/java/nu/marginalia/process/control/ProcessTaskHeartbeat.java diff --git a/code/common/process/src/main/java/nu/marginalia/process/control/ProcessTaskHeartbeatImpl.java b/code/common/process/java/nu/marginalia/process/control/ProcessTaskHeartbeatImpl.java similarity index 100% rename from code/common/process/src/main/java/nu/marginalia/process/control/ProcessTaskHeartbeatImpl.java rename to code/common/process/java/nu/marginalia/process/control/ProcessTaskHeartbeatImpl.java diff --git a/code/common/process/src/main/java/nu/marginalia/process/log/WorkLoadIterable.java b/code/common/process/java/nu/marginalia/process/log/WorkLoadIterable.java similarity index 100% rename from code/common/process/src/main/java/nu/marginalia/process/log/WorkLoadIterable.java rename to code/common/process/java/nu/marginalia/process/log/WorkLoadIterable.java diff --git a/code/common/process/src/main/java/nu/marginalia/process/log/WorkLog.java b/code/common/process/java/nu/marginalia/process/log/WorkLog.java similarity index 100% rename from code/common/process/src/main/java/nu/marginalia/process/log/WorkLog.java rename to code/common/process/java/nu/marginalia/process/log/WorkLog.java diff --git a/code/common/process/src/main/java/nu/marginalia/process/log/WorkLogEntry.java b/code/common/process/java/nu/marginalia/process/log/WorkLogEntry.java similarity index 100% rename from code/common/process/src/main/java/nu/marginalia/process/log/WorkLogEntry.java rename to code/common/process/java/nu/marginalia/process/log/WorkLogEntry.java diff --git a/code/common/process/src/main/resources/log4j2.properties b/code/common/process/resources/log4j2.properties similarity index 100% rename from code/common/process/src/main/resources/log4j2.properties rename to code/common/process/resources/log4j2.properties diff --git a/code/common/process/src/test/java/nu/marginalia/process/log/WorkLogTest.java b/code/common/process/test/nu/marginalia/process/log/WorkLogTest.java similarity index 100% rename from code/common/process/src/test/java/nu/marginalia/process/log/WorkLogTest.java rename to code/common/process/test/nu/marginalia/process/log/WorkLogTest.java diff --git a/code/common/readme.md b/code/common/readme.md index 120d55f9..b6329457 100644 --- a/code/common/readme.md +++ b/code/common/readme.md @@ -7,6 +7,5 @@ as shared models. * [config](config/) contains some `@Inject`ables. * [renderer](renderer/) contains utility code for rendering website templates. * [service](service/) is the shared base classes for main methods and web services. -* [service-client](service-client/) is the shared base class for RPC. -* [service-discovery](service-discovery) contains tools that lets the services find each other. +* [service-discovery](service-discovery) contains tools that lets the services find each other and communicate. * [process](process/) contains boiler plate for batch processes. diff --git a/code/common/renderer/build.gradle b/code/common/renderer/build.gradle index b40aaee2..83957546 100644 --- a/code/common/renderer/build.gradle +++ b/code/common/renderer/build.gradle @@ -11,6 +11,8 @@ java { languageVersion.set(JavaLanguageVersion.of(21)) } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation libs.bundles.slf4j diff --git a/code/common/renderer/src/main/java/nu/marginalia/renderer/MustacheRenderer.java b/code/common/renderer/java/nu/marginalia/renderer/MustacheRenderer.java similarity index 96% rename from code/common/renderer/src/main/java/nu/marginalia/renderer/MustacheRenderer.java rename to code/common/renderer/java/nu/marginalia/renderer/MustacheRenderer.java index 0dae086c..f55cdecb 100644 --- a/code/common/renderer/src/main/java/nu/marginalia/renderer/MustacheRenderer.java +++ b/code/common/renderer/java/nu/marginalia/renderer/MustacheRenderer.java @@ -8,10 +8,8 @@ import lombok.SneakyThrows; import nu.marginalia.renderer.config.HandlebarsConfigurator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import spark.Response; import java.io.*; -import java.nio.charset.StandardCharsets; import java.util.List; import java.util.Map; diff --git a/code/common/renderer/src/main/java/nu/marginalia/renderer/RendererFactory.java b/code/common/renderer/java/nu/marginalia/renderer/RendererFactory.java similarity index 100% rename from code/common/renderer/src/main/java/nu/marginalia/renderer/RendererFactory.java rename to code/common/renderer/java/nu/marginalia/renderer/RendererFactory.java diff --git a/code/common/renderer/src/main/java/nu/marginalia/renderer/RenderingException.java b/code/common/renderer/java/nu/marginalia/renderer/RenderingException.java similarity index 100% rename from code/common/renderer/src/main/java/nu/marginalia/renderer/RenderingException.java rename to code/common/renderer/java/nu/marginalia/renderer/RenderingException.java diff --git a/code/common/renderer/src/main/java/nu/marginalia/renderer/config/DefaultHandlebarsConfigurator.java b/code/common/renderer/java/nu/marginalia/renderer/config/DefaultHandlebarsConfigurator.java similarity index 100% rename from code/common/renderer/src/main/java/nu/marginalia/renderer/config/DefaultHandlebarsConfigurator.java rename to code/common/renderer/java/nu/marginalia/renderer/config/DefaultHandlebarsConfigurator.java diff --git a/code/common/renderer/src/main/java/nu/marginalia/renderer/config/HandlebarsConfigurator.java b/code/common/renderer/java/nu/marginalia/renderer/config/HandlebarsConfigurator.java similarity index 100% rename from code/common/renderer/src/main/java/nu/marginalia/renderer/config/HandlebarsConfigurator.java rename to code/common/renderer/java/nu/marginalia/renderer/config/HandlebarsConfigurator.java diff --git a/code/common/renderer/readme.md b/code/common/renderer/readme.md index 3c34830e..ff80af06 100644 --- a/code/common/renderer/readme.md +++ b/code/common/renderer/readme.md @@ -4,4 +4,4 @@ Renders handlebar-style templates for the user-facing services. ## Central Classes -* [Mustache Renderer](src/main/java/nu/marginalia/renderer/MustacheRenderer.java) \ No newline at end of file +* [Mustache Renderer](java/nu/marginalia/renderer/MustacheRenderer.java) \ No newline at end of file diff --git a/code/common/service-client/readme.md b/code/common/service-client/readme.md deleted file mode 100644 index 09c535b2..00000000 --- a/code/common/service-client/readme.md +++ /dev/null @@ -1,10 +0,0 @@ -# Service Client - -These are base classes for all the [API](../../api) clients for talking to other [services](../service). - -## Central Classes - -* [AbstractDynamicClient](src/main/java/nu/marginalia/client/AbstractDynamicClient.java) base class for API clients -* [AbstractClient](src/main/java/nu/marginalia/client/AbstractClient.java) handles requests at a lower level -* [Context](src/main/java/nu/marginalia/client/Context.java) handles request tracking -* [ContextScrambler](src/main/java/nu/marginalia/client/ContextScrambler.java) handles anonymization of public IPs \ No newline at end of file diff --git a/code/common/service-client/src/main/java/nu/marginalia/client/AbortingScheduler.java b/code/common/service-client/src/main/java/nu/marginalia/client/AbortingScheduler.java deleted file mode 100644 index d603a546..00000000 --- a/code/common/service-client/src/main/java/nu/marginalia/client/AbortingScheduler.java +++ /dev/null @@ -1,49 +0,0 @@ -package nu.marginalia.client; - -import io.reactivex.rxjava3.core.Scheduler; -import io.reactivex.rxjava3.schedulers.Schedulers; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.annotation.Nonnull; -import javax.annotation.Nullable; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; - -public class AbortingScheduler { - private final Logger logger = LoggerFactory.getLogger(getClass()); - - @Nullable - private ExecutorService executorService; - - public AbortingScheduler() { - } - - - public synchronized Scheduler get() { - return Schedulers.from(getExecutorService(), - true, - false); - } - - public synchronized void abort() { - if (null != executorService) { - executorService.shutdownNow(); - executorService = Executors.newVirtualThreadPerTaskExecutor(); - } - } - - @Nonnull - private synchronized ExecutorService getExecutorService() { - if (null == executorService) { - executorService = Executors.newVirtualThreadPerTaskExecutor(); - } - return executorService; - } - - public synchronized void close() { - if (null != executorService) { - executorService.shutdown(); - } - } -} diff --git a/code/common/service-client/src/main/java/nu/marginalia/client/AbstractClient.java b/code/common/service-client/src/main/java/nu/marginalia/client/AbstractClient.java deleted file mode 100644 index 697671f0..00000000 --- a/code/common/service-client/src/main/java/nu/marginalia/client/AbstractClient.java +++ /dev/null @@ -1,438 +0,0 @@ -package nu.marginalia.client; - -import com.google.gson.Gson; -import com.google.gson.reflect.TypeToken; -import com.google.protobuf.GeneratedMessageV3; -import io.reactivex.rxjava3.core.Observable; -import io.reactivex.rxjava3.core.ObservableSource; -import io.reactivex.rxjava3.plugins.RxJavaPlugins; -import lombok.SneakyThrows; -import nu.marginalia.client.exception.LocalException; -import nu.marginalia.client.exception.NetworkException; -import nu.marginalia.client.exception.RemoteException; -import nu.marginalia.client.exception.RouteNotConfiguredException; -import nu.marginalia.client.route.RouteProvider; -import nu.marginalia.client.route.ServiceRoutes; -import nu.marginalia.service.descriptor.ServiceDescriptor; -import okhttp3.*; -import org.apache.logging.log4j.ThreadContext; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import spark.utils.IOUtils; - -import java.io.OutputStream; -import java.lang.reflect.Type; -import java.net.ConnectException; -import java.util.concurrent.TimeUnit; -import java.util.function.Supplier; - -public abstract class AbstractClient implements AutoCloseable { - private final Logger logger = LoggerFactory.getLogger(getClass()); - - public static final String CONTEXT_OUTBOUND_REQUEST = "outbound-request"; - private final Gson gson; - private final OkHttpClient client; - - private boolean quiet; - final ServiceRoutes serviceRoutes; - private int timeout; - - private final EndpointLivenessMonitor livenessMonitor; - - public void setTimeout(int timeout) { - this.timeout = timeout; - } - - public AbstractClient(ServiceDescriptor service, int timeout, Supplier gsonProvider) { - this(new RouteProvider(service), timeout, gsonProvider); - } - - public AbstractClient(RouteProvider routeProvider, - int timeout, - Supplier gsonProvider) - { - this.gson = gsonProvider.get(); - - this.timeout = timeout; - client = new OkHttpClient.Builder() - .connectTimeout(100, TimeUnit.MILLISECONDS) - .readTimeout(6000, TimeUnit.SECONDS) - .retryOnConnectionFailure(true) - .followRedirects(true) - .build(); - - serviceRoutes = new ServiceRoutes(routeProvider); - - RxJavaPlugins.setErrorHandler(e -> { - if (e.getMessage() == null) { - logger.error("Error", e); - } - else { - logger.error("Error {}: {}", e.getClass().getSimpleName(), e.getMessage()); - } - }); - - logger.info("Finished creating client for {}", getClass().getSimpleName()); - livenessMonitor = new EndpointLivenessMonitor(this); - } - - @Override - public void close() { - livenessMonitor.close(); - scheduler().close(); - } - - public abstract AbortingScheduler scheduler(); - - public void setQuiet(boolean quiet) { - this.quiet = quiet; - } - - public abstract String name(); - - - public synchronized boolean isAccepting() { - Context ctx = Context.internal("ready"); - - var req = ctx.paint(new Request.Builder()).url(serviceRoutes.get(0) + "/internal/ready").get().build(); - - return Observable.just(client.newCall(req)) - .subscribeOn(scheduler().get()) - .map(Call::execute) - .map(this::getResponseStatus) - .flatMap(line -> validateStatus(line, req)) - .timeout(100, TimeUnit.MILLISECONDS) - .onErrorReturn(error -> 500) - .map(HttpStatusCode::new) - .map(HttpStatusCode::isGood) - .blockingFirst(); - } - - public synchronized boolean isResponsive(int node) { - Context ctx = Context.internal("ping"); - var req = ctx.paint(new Request.Builder()).url(serviceRoutes.get(node) + "/internal/ping").get().build(); - return Observable.just(client.newCall(req)) - .subscribeOn(scheduler().get()) - .map(Call::execute) - .map(AbstractClient.this::getResponseStatus) - .flatMap(line -> validateStatus(line, req).timeout(5000, TimeUnit.SECONDS).onErrorReturn(e -> 500)) - .onErrorReturn(error -> 500) - .map(HttpStatusCode::new) - .map(HttpStatusCode::isGood) - .blockingFirst(); - } - - @SneakyThrows - protected synchronized Observable post(Context ctx, - int node, - String endpoint, - Object data) { - - ensureAlive(node); - - RequestBody body = RequestBody.create(json(data), MediaType.parse("application/json; charset=utf-8")); - - var req = ctx.paint(new Request.Builder()).url(serviceRoutes.get(node) + endpoint).post(body).build(); - - return Observable - .just(client.newCall(req)) - .subscribeOn(scheduler().get()) - .map(this::logInbound) - .map(Call::execute) - .map(this::logOutbound) - .map(this::getResponseStatus) - .retryWhen(this::retryHandler) - .flatMap(line -> validateStatus(line, req)) - .map(HttpStatusCode::new) - .timeout(timeout, TimeUnit.SECONDS) - .doFinally(() -> ThreadContext.remove("outbound-request")); - } - - @SneakyThrows - protected synchronized Observable post(Context ctx, int node, String endpoint, GeneratedMessageV3 data) { - - ensureAlive(node); - - RequestBody body = RequestBody.create(data.toByteArray(), MediaType.parse("application/protobuf")); - - var req = ctx.paint(new Request.Builder()).url(serviceRoutes.get(node) + endpoint).post(body).build(); - var call = client.newCall(req); - - logInbound(call); - ThreadContext.put("outbound-request", serviceRoutes.get(node) + endpoint); - try (var rsp = call.execute()) { - logOutbound(rsp); - int code = rsp.code(); - - return validateStatus(code, req).map(HttpStatusCode::new); - } - finally { - ThreadContext.remove("outbound-request"); - } - } - - - @SneakyThrows - protected synchronized Observable postGet(Context ctx, int node, String endpoint, Object data, Class returnType) { - - ensureAlive(node); - - RequestBody body = RequestBody.create(json(data), MediaType.parse("application/json")); - var req = ctx.paint(new Request.Builder()).url(serviceRoutes.get(node) + endpoint).post(body).build(); - - return Observable.just(client.newCall(req)) - .subscribeOn(scheduler().get()) - .map(this::logInbound) - .map(Call::execute) - .map(this::logOutbound) - .retryWhen(this::retryHandler) - .map(rsp -> validateResponseStatus(rsp, req, 200)) - .map(rsp -> getEntity(rsp, returnType)) - .timeout(timeout, TimeUnit.SECONDS) - .doFinally(() -> ThreadContext.remove("outbound-request")); - } - - protected synchronized Observable post(Context ctx, int node, String endpoint, String data, MediaType mediaType) { - ensureAlive(node); - - var body = RequestBody.create(data, mediaType); - - var req = ctx.paint(new Request.Builder()).url(serviceRoutes.get(node) + endpoint).post(body).build(); - var call = client.newCall(req); - - - return Observable.just(call) - .map((c) -> { - ThreadContext.put(CONTEXT_OUTBOUND_REQUEST, serviceRoutes.get(node) + endpoint); - return c; - }) - .subscribeOn(scheduler().get()) - .map(this::logInbound) - .map(Call::execute) - .map(this::logOutbound) - .map(this::getResponseStatus) - .retryWhen(this::retryHandler) - .flatMap(line -> validateStatus(line, req)) - .map(HttpStatusCode::new) - .timeout(timeout, TimeUnit.SECONDS) - .doFinally(() -> ThreadContext.remove("outbound-request")); - } - - protected synchronized Observable get(Context ctx, int node, String endpoint, Class type) { - ensureAlive(node); - - var req = ctx.paint(new Request.Builder()).url(serviceRoutes.get(node) + endpoint).get().build(); - - return Observable.just(client.newCall(req)) - .subscribeOn(scheduler().get()) - .map(this::logInbound) - .map(Call::execute) - .map(this::logOutbound) - .map(rsp -> validateResponseStatus(rsp, req, 200)) - .map(rsp -> getEntity(rsp, type)) - .retryWhen(this::retryHandler) - .timeout(timeout, TimeUnit.SECONDS) - .doFinally(() -> ThreadContext.remove("outbound-request")); - } - - protected synchronized Observable get(Context ctx, int node, String endpoint, TypeToken type) { - ensureAlive(node); - - var req = ctx.paint(new Request.Builder()).url(serviceRoutes.get(node) + endpoint).get().build(); - - return Observable.just(client.newCall(req)) - .subscribeOn(scheduler().get()) - .map(this::logInbound) - .map(Call::execute) - .map(this::logOutbound) - .map(rsp -> validateResponseStatus(rsp, req, 200)) - .map(rsp -> getEntity(rsp, type)) - .retryWhen(this::retryHandler) - .timeout(timeout, TimeUnit.SECONDS) - .doFinally(() -> ThreadContext.remove("outbound-request")); - } - protected synchronized Observable get(Context ctx, int node, String endpoint, OutputStream outputStream) { - ensureAlive(node); - - var req = ctx.paint(new Request.Builder()).url(serviceRoutes.get(node) + endpoint).get().build(); - - return Observable.just(client.newCall(req)) - .subscribeOn(scheduler().get()) - .map(this::logInbound) - .map(Call::execute) - .map(this::logOutbound) - .map(rsp -> validateResponseStatus(rsp, req, 200)) - .map(rsp -> copyToOutputStream(rsp, outputStream)) - .retryWhen(this::retryHandler) - .timeout(timeout, TimeUnit.SECONDS) - .doFinally(() -> ThreadContext.remove("outbound-request")); - } - - @SuppressWarnings("unchecked") - protected synchronized Observable get(Context ctx, int node, String endpoint) { - ensureAlive(node); - - var req = ctx.paint(new Request.Builder()).url(serviceRoutes.get(node) + endpoint).get().build(); - - return Observable.just(client.newCall(req)) - .subscribeOn(scheduler().get()) - .map(this::logInbound) - .map(Call::execute) - .map(this::logOutbound) - .map(rsp -> validateResponseStatus(rsp, req,200)) - .map(this::getText) - .retryWhen(this::retryHandler) - .timeout(timeout, TimeUnit.SECONDS) - .doFinally(() -> ThreadContext.remove("outbound-request")); - } - - protected synchronized Observable delete(Context ctx, int node, String endpoint) { - ensureAlive(node); - - var req = ctx.paint(new Request.Builder()).url(serviceRoutes.get(node) + endpoint).delete().build(); - - return Observable.just(client.newCall(req)) - .subscribeOn(scheduler().get()) - .map(this::logInbound) - .map(Call::execute) - .map(this::logOutbound) - .map(this::getResponseStatus) - .flatMap(line -> validateStatus(line, req)) - .map(HttpStatusCode::new) - .retryWhen(this::retryHandler) - .timeout(timeout, TimeUnit.SECONDS) - .doFinally(() -> ThreadContext.remove("outbound-request")); - } - - - @SneakyThrows - private Call logInbound(Call outgoing) { - return outgoing; - } - - @SneakyThrows - private Response logOutbound(Response incoming) { - return incoming; - } - - @SneakyThrows - private void ensureAlive(int node) { - if (!isAlive(node)) { - var route = serviceRoutes.get(node); - - logger.error("Route not configured for {}:{}", name(), node); - - throw new RouteNotConfiguredException("Route not configured for " + name() + ":" + node + " -- tried " + route); - } - } - - - private ObservableSource retryHandler(Observable error) { - return error.flatMap(this::filterRetryableExceptions); - } - - private Observable filterRetryableExceptions(Throwable error) throws Throwable { - - synchronized (livenessMonitor) { - // Signal to the liveness monitor that we may have an outage - livenessMonitor.notifyAll(); - } - - if (error.getClass().equals(RouteNotConfiguredException.class)) { - logger.error("Network error {}", error.getMessage()); - return Observable.empty().delay(50, TimeUnit.MILLISECONDS); - } - else if (error.getClass().equals(NetworkException.class)) { - logger.error("Network error {}", error.getMessage()); - return Observable.empty().delay(1, TimeUnit.SECONDS); - } - else if (error.getClass().equals(ConnectException.class)) { - logger.error("Network error {}", error.getMessage()); - return Observable.empty().delay(1, TimeUnit.SECONDS); - } - - if (!quiet) { - if (error.getMessage() != null) { - logger.error("{} {}", error.getClass().getSimpleName(), error.getMessage()); - } - else { - logger.error("Error ", error); - } - } - throw error; - } - - private Observable validateStatus(int status, Request request) { - if (status == org.apache.http.HttpStatus.SC_OK) - return Observable.just(status); - if (status == org.apache.http.HttpStatus.SC_ACCEPTED) - return Observable.just(status); - if (status == org.apache.http.HttpStatus.SC_CREATED) - return Observable.just(status); - - return Observable.error(new RemoteException(name() + " responded status code " + status + " " + request.url())); - } - - private Response validateResponseStatus(Response response, Request req, int expected) { - if (expected != response.code()) { - response.close(); - - throw new RemoteException(name() + " responded status code " + response.code() + ", " + req.method() + " " + req.url().toString()); - } - return response; - } - - private int getResponseStatus(Response response) { - try (response) { - return response.code(); - } - } - - - @SneakyThrows - private Integer copyToOutputStream(Response response, OutputStream outputStream) { - try (response) { - return IOUtils.copy(response.body().byteStream(), outputStream); - } - } - - @SneakyThrows - private T getEntity(Response response, Class clazz) { - try (response) { - return gson.fromJson(response.body().charStream(), clazz); - } - catch (Exception ex) { - throw ex; - } - } - @SneakyThrows - private T getEntity(Response response, TypeToken clazz) { - try (response) { - return gson.fromJson(response.body().charStream(), clazz); - } - catch (Exception ex) { - throw ex; - } - } - @SneakyThrows - private String getText(Response response) { - try (response) { - return response.body().string(); - } - - } - - public boolean isAlive(int node) { - return livenessMonitor.isAlive(node); - } - - private String json(Object o) { - try { - return gson.toJson(o); - } - catch (Exception ex) { - throw new LocalException(ex); - } - } - -} diff --git a/code/common/service-client/src/main/java/nu/marginalia/client/AbstractDynamicClient.java b/code/common/service-client/src/main/java/nu/marginalia/client/AbstractDynamicClient.java deleted file mode 100644 index 5d2e3ef6..00000000 --- a/code/common/service-client/src/main/java/nu/marginalia/client/AbstractDynamicClient.java +++ /dev/null @@ -1,38 +0,0 @@ -package nu.marginalia.client; - -import com.google.gson.Gson; -import nu.marginalia.service.descriptor.ServiceDescriptor; - -import javax.annotation.Nonnull; -import java.util.function.Supplier; - -public class AbstractDynamicClient extends AbstractClient { - private final ServiceDescriptor service; - private final AbortingScheduler scheduler; - - public AbstractDynamicClient(@Nonnull ServiceDescriptor service, Supplier gsonProvider) { - super( - service, - 10000, - gsonProvider - ); - - this.service = service; - this.scheduler = new AbortingScheduler(); - } - - @Override - public String name() { - return service.name; - } - - public ServiceDescriptor getService() { - return service; - } - - @Override - public AbortingScheduler scheduler() { - return scheduler; - } - -} diff --git a/code/common/service-client/src/main/java/nu/marginalia/client/Context.java b/code/common/service-client/src/main/java/nu/marginalia/client/Context.java deleted file mode 100644 index 101f4c2e..00000000 --- a/code/common/service-client/src/main/java/nu/marginalia/client/Context.java +++ /dev/null @@ -1,89 +0,0 @@ -package nu.marginalia.client; - -import io.reactivex.rxjava3.schedulers.Schedulers; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import spark.Request; - -import java.util.*; -import java.util.concurrent.TimeUnit; - -public class Context { - public static final String CONTEXT_HEADER = "X-Context"; - public static final String SESSION_HEADER = "Cookie"; - public static final String PUBLIC_HEADER = "X-Public"; - - private final String id; - private final String session; - private boolean treatAsPublic; - - private Context(String id, String session) { - this.id = Objects.requireNonNull(id, "Context missing"); - this.session = session; - } - - public Context treatAsPublic() { - this.treatAsPublic = true; - return this; - } - - public static Context internal() { - return new Context(UUID.randomUUID().toString(), null); - } - public static Context internal(String why) { - return new Context(why + ":" + System.nanoTime(), null); - } - - public static Context fromRequest(Request request) { - - if (Boolean.getBoolean("unit-test")) { - return Context.internal(); - } - - final var ctxHeader = anonymizeContext(request); - final var sessHeader = request.headers(SESSION_HEADER); - - return new Context(ctxHeader, sessHeader); - } - - private static String anonymizeContext(Request request) { - String header = request.headers(CONTEXT_HEADER); - if (header != null && header.contains("-") && !header.startsWith("#")) { - // The public X-Context header contains info that traces to the - // external user's IP. Anonymize this by running it through a - // hash code blender with rotating salt - - return ContextScrambler.anonymize(header, request); - } - else if (header != null) { - return header; - } - else { - // When no X-Context is provided, synthesize one from path - return request.pathInfo() + ":" + Thread.currentThread().getId(); - } - } - - public okhttp3.Request.Builder paint(okhttp3.Request.Builder requestBuilder) { - requestBuilder.addHeader(CONTEXT_HEADER, id); - - if (session != null) { - requestBuilder.addHeader(SESSION_HEADER, session); - } - - if (treatAsPublic) { - requestBuilder.header(PUBLIC_HEADER, "1"); - } - - return requestBuilder; - } - - public String getContextId() { - return id; - } - - public boolean isPublic() { - return id.startsWith("#"); - } - -} \ No newline at end of file diff --git a/code/common/service-client/src/main/java/nu/marginalia/client/ContextScrambler.java b/code/common/service-client/src/main/java/nu/marginalia/client/ContextScrambler.java deleted file mode 100644 index c10b5389..00000000 --- a/code/common/service-client/src/main/java/nu/marginalia/client/ContextScrambler.java +++ /dev/null @@ -1,85 +0,0 @@ -package nu.marginalia.client; - -import com.google.common.hash.HashFunction; -import com.google.common.hash.Hashing; -import io.reactivex.rxjava3.schedulers.Schedulers; -import spark.Request; - -import java.util.Arrays; -import java.util.Objects; -import java.util.Random; -import java.util.concurrent.TimeUnit; - -public class ContextScrambler { - private static final Random random; - private static final HashFunction hf = Hashing.sha512(); - private static volatile byte[] seed = new byte[12]; - - static { - random = new Random(); - int gr = random.nextInt(10000, 20000); - for (int i = 0; i < gr; i++) { - random.nextLong(); - } - random.nextBytes(seed); - - updateSalt(); - } - - /** Anonymize the string by running it through a hash function - * together with a salt that is rotated at random intervals. - *

- * This is probably not cryptographically secure, but should at least - * be fairly annoying to reverse-engineer. - */ - public static String anonymize(String connectionInfo, Request request) { - byte[] hashData = Arrays.copyOf(seed, seed.length+4); - int hashi = Objects.hash(connectionInfo.split("-", 2)[0]); - - for (int i = 0; i < 4; i++) { - hashData[seed.length] = (byte)(hashi & 0xFF); - hashData[seed.length+1] = (byte)(hashi>>>8 & 0xFF); - hashData[seed.length+2] = (byte)(hashi>>>16 & 0xFF); - hashData[seed.length+3] = (byte)(hashi>>>24 & 0xFF); - } - - final int connHash = hf.hashBytes(hashData).asInt(); - final int requestHash = Objects.hash(request.url(), request.queryString()); - - return String.format("#%08x:%08x", connHash, requestHash); - } - - /** Generate a humongous salt with as many moving parts as possible, - * as creating a rainbow table of all IP-addresses is fairly easy - */ - private static byte[] generateSalt() { - byte[] oldHash = seed; - - int hash1 = random.nextInt(); - int hash2 = hf.hashLong(System.nanoTime()).asInt(); - int hash3 = hf.hashBytes(oldHash).asInt(); - - return new byte[]{ - (byte) (hash1 & 0xFF), - (byte) (hash1 >>> 8 & 0xFF), - (byte) (hash1 >>> 16 & 0xFF), - (byte) (hash1 >>> 24 & 0xFF), - (byte) (hash2 & 0xFF), - (byte) (hash2 >>> 8 & 0xFF), - (byte) (hash2 >>> 16 & 0xFF), - (byte) (hash2 >>> 24 & 0xFF), - (byte) (hash3 & 0xFF), - (byte) (hash3 >>> 8 & 0xFF), - (byte) (hash3 >>> 16 & 0xFF), - (byte) (hash3 >>> 24 & 0xFF) - }; - } - - private static void updateSalt() { - seed = generateSalt(); - - int delay = (int) (1000 * (300 + 600*Math.random())); - Schedulers.computation().scheduleDirect(ContextScrambler::updateSalt, delay, TimeUnit.MILLISECONDS); - } - -} diff --git a/code/common/service-client/src/main/java/nu/marginalia/client/EndpointLivenessMonitor.java b/code/common/service-client/src/main/java/nu/marginalia/client/EndpointLivenessMonitor.java deleted file mode 100644 index 4c98b28c..00000000 --- a/code/common/service-client/src/main/java/nu/marginalia/client/EndpointLivenessMonitor.java +++ /dev/null @@ -1,84 +0,0 @@ -package nu.marginalia.client; - -import lombok.SneakyThrows; -import nu.marginalia.client.route.ServiceRoutes; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.concurrent.ConcurrentHashMap; - -/** Keep tabs on which endpoints are accessible via polling. This permits us to reduce the chances of - * synchronous requests blocking on timeout. - */ -public class EndpointLivenessMonitor { - - private final ConcurrentHashMap alivenessMap = new ConcurrentHashMap<>(); - private final AbstractClient client; - private final ServiceRoutes serviceRoutes; - - private static final Logger logger = LoggerFactory.getLogger(EndpointLivenessMonitor.class); - private static Thread daemonThread; - - public EndpointLivenessMonitor(AbstractClient client) { - this.client = client; - this.serviceRoutes = client.serviceRoutes; - - daemonThread = new Thread(this::run, client.getClass().getSimpleName()+":Liveness"); - daemonThread.setDaemon(true); - daemonThread.start(); - } - - @SneakyThrows - public void run() { - Thread.sleep(100); // Wait for initialization - - try { - while (!Thread.interrupted()) { - if (updateLivenessMap()) { - synchronized (this) { - wait(1000); - } - } - else Thread.sleep(100); - } - } catch (InterruptedException ex) { - // nothing to see here - } - } - - private boolean updateLivenessMap() { - boolean allAlive = true; - - for (int node : serviceRoutes.getNodes()) { - allAlive &= alivenessMap.compute(node, this::isResponsive); - } - - return allAlive; - } - - private boolean isResponsive(int node, Boolean oldValue) { - try { - boolean wasAlive = Boolean.TRUE.equals(oldValue); - boolean isAlive = client.isResponsive(node); - if (wasAlive != isAlive) { - logger.info("Liveness change {}:{} -- {}", client.name(), node, isAlive ? "UP":"DOWN"); - } - return isAlive; - } - catch (Exception ex) { - logger.warn("Oops", ex); - return false; - } - } - - public boolean isAlive(int node) { - // compute-if-absence ensures we do a synchronous status check if this is a cold start, - // that way we don't have to wait for the polling loop to find out if the service is up - return alivenessMap.computeIfAbsent(node, client::isResponsive); - } - - - public void close() { - daemonThread.interrupt(); - } -} diff --git a/code/common/service-client/src/main/java/nu/marginalia/client/HttpStatusCode.java b/code/common/service-client/src/main/java/nu/marginalia/client/HttpStatusCode.java deleted file mode 100644 index aa23e71d..00000000 --- a/code/common/service-client/src/main/java/nu/marginalia/client/HttpStatusCode.java +++ /dev/null @@ -1,19 +0,0 @@ -package nu.marginalia.client; - -public final class HttpStatusCode { - public final int code; - - public HttpStatusCode(int code) { - this.code = code; - } - - public boolean isGood() { - if (code == org.apache.http.HttpStatus.SC_OK) - return true; - if (code == org.apache.http.HttpStatus.SC_ACCEPTED) - return true; - if (code == org.apache.http.HttpStatus.SC_CREATED) - return true; - return false; - } -} diff --git a/code/common/service-client/src/main/java/nu/marginalia/client/exception/LocalException.java b/code/common/service-client/src/main/java/nu/marginalia/client/exception/LocalException.java deleted file mode 100644 index bcaa2982..00000000 --- a/code/common/service-client/src/main/java/nu/marginalia/client/exception/LocalException.java +++ /dev/null @@ -1,15 +0,0 @@ -package nu.marginalia.client.exception; - -public class LocalException extends MessagingException { - public LocalException() { - } - public LocalException(String message) { - super(message); - } - public LocalException(Throwable cause) { - super(cause); - } - public LocalException(String message, Throwable cause) { - super(message, cause); - } -} diff --git a/code/common/service-client/src/main/java/nu/marginalia/client/exception/MessagingException.java b/code/common/service-client/src/main/java/nu/marginalia/client/exception/MessagingException.java deleted file mode 100644 index 6151381e..00000000 --- a/code/common/service-client/src/main/java/nu/marginalia/client/exception/MessagingException.java +++ /dev/null @@ -1,20 +0,0 @@ -package nu.marginalia.client.exception; - -public class MessagingException extends RuntimeException { - public MessagingException() { - } - public MessagingException(String message) { - super(message); - } - public MessagingException(Throwable cause) { - super(cause); - } - public MessagingException(String message, Throwable cause) { - super(message, cause); - } - - @Override - public Throwable fillInStackTrace() { - return this; - } -} diff --git a/code/common/service-client/src/main/java/nu/marginalia/client/exception/NetworkException.java b/code/common/service-client/src/main/java/nu/marginalia/client/exception/NetworkException.java deleted file mode 100644 index 593333ad..00000000 --- a/code/common/service-client/src/main/java/nu/marginalia/client/exception/NetworkException.java +++ /dev/null @@ -1,15 +0,0 @@ -package nu.marginalia.client.exception; - -public class NetworkException extends MessagingException { - public NetworkException() { - } - public NetworkException(String message) { - super(message); - } - public NetworkException(Throwable cause) { - super(cause); - } - public NetworkException(String message, Throwable cause) { - super(message, cause); - } -} diff --git a/code/common/service-client/src/main/java/nu/marginalia/client/exception/RemoteException.java b/code/common/service-client/src/main/java/nu/marginalia/client/exception/RemoteException.java deleted file mode 100644 index d26df9b3..00000000 --- a/code/common/service-client/src/main/java/nu/marginalia/client/exception/RemoteException.java +++ /dev/null @@ -1,16 +0,0 @@ -package nu.marginalia.client.exception; - -public class RemoteException extends MessagingException { - public RemoteException() { - } - public RemoteException(String message) { - super(message); - } - public RemoteException(Throwable cause) { - super(cause); - } - public RemoteException(String message, Throwable cause) { - super(message, cause); - } - -} diff --git a/code/common/service-client/src/main/java/nu/marginalia/client/exception/RouteNotConfiguredException.java b/code/common/service-client/src/main/java/nu/marginalia/client/exception/RouteNotConfiguredException.java deleted file mode 100644 index c3155dcf..00000000 --- a/code/common/service-client/src/main/java/nu/marginalia/client/exception/RouteNotConfiguredException.java +++ /dev/null @@ -1,15 +0,0 @@ -package nu.marginalia.client.exception; - -public class RouteNotConfiguredException extends MessagingException { - public RouteNotConfiguredException() { - } - public RouteNotConfiguredException(String message) { - super(message); - } - public RouteNotConfiguredException(Throwable cause) { - super(cause); - } - public RouteNotConfiguredException(String message, Throwable cause) { - super(message, cause); - } -} diff --git a/code/common/service-client/src/main/java/nu/marginalia/client/exception/TimeoutException.java b/code/common/service-client/src/main/java/nu/marginalia/client/exception/TimeoutException.java deleted file mode 100644 index 17632758..00000000 --- a/code/common/service-client/src/main/java/nu/marginalia/client/exception/TimeoutException.java +++ /dev/null @@ -1,15 +0,0 @@ -package nu.marginalia.client.exception; - -public class TimeoutException extends MessagingException { - public TimeoutException() { - } - public TimeoutException(String message) { - super(message); - } - public TimeoutException(Throwable cause) { - super(cause); - } - public TimeoutException(String message, Throwable cause) { - super(message, cause); - } -} diff --git a/code/common/service-client/src/main/java/nu/marginalia/client/grpc/GrpcChannelPool.java b/code/common/service-client/src/main/java/nu/marginalia/client/grpc/GrpcChannelPool.java deleted file mode 100644 index 6c23a841..00000000 --- a/code/common/service-client/src/main/java/nu/marginalia/client/grpc/GrpcChannelPool.java +++ /dev/null @@ -1,92 +0,0 @@ -package nu.marginalia.client.grpc; - -import io.grpc.ManagedChannel; -import io.grpc.ManagedChannelBuilder; -import nu.marginalia.service.id.ServiceId; - -import java.util.List; -import java.util.Map; -import java.util.concurrent.*; -import java.util.function.Function; -import java.util.stream.Stream; - -import static io.grpc.ConnectivityState.SHUTDOWN; - -/** A pool of gRPC stubs for a service, with a separate stub for each node. - * Manages broadcast-style request. */ -public abstract class GrpcChannelPool { - public GrpcChannelPool(String serviceName) { - this.serviceName = serviceName; - } - - protected record ServiceAndNode(String service, int node) { - public String getHostName() { - return service+"-"+node; - } - } - - private final Map channels = new ConcurrentHashMap<>(); - private final ExecutorService virtualExecutorService = Executors.newVirtualThreadPerTaskExecutor(); - - private final String serviceName; - - public GrpcChannelPool(ServiceId serviceId) { - this.serviceName = serviceId.serviceName; - } - - /** Get an API stub for the given node */ - public STUB apiForNode(int node) { - return createStub( - channels.compute( - new ServiceAndNode(serviceName, node), - this::refreshChannel) - ); - } - - private ManagedChannel refreshChannel(ServiceAndNode serviceAndNode, ManagedChannel old) { - if (old == null) - return createChannel(serviceAndNode); - - // If the channel is in SHUTDOWN state, we need to create a new one - // (shouldn't really happen in practice, but it's a good idea to be safe) - if (old.getState(true) == SHUTDOWN) { - old.shutdown(); - return createChannel(serviceAndNode); - } - - return old; - } - - protected ManagedChannel createChannel(ServiceAndNode serviceAndNode) { - return ManagedChannelBuilder.forAddress(serviceAndNode.getHostName(), 81) - .usePlaintext() - .build(); - } - - /** Invoke a function on each node, returning a list of futures in a terminal state, as per - * ExecutorService$invokeAll */ - public List> invokeAll(Function> callF) throws InterruptedException { - List> calls = getEligibleNodes().stream() - .map(id -> callF.apply(apiForNode(id))) - .toList(); - - return virtualExecutorService.invokeAll(calls); - } - - /** Invoke a function on each node, returning a stream of results */ - public Stream callEachSequential(Function call) { - return getEligibleNodes().stream() - .map(id -> call.apply(apiForNode(id))); - } - - - /** Create a stub for the given channel, this is an operation - * that needs to be implemented for the particular API this - * pool is intended for - */ - public abstract STUB createStub(ManagedChannel channel); - - /** Get the list of nodes that are eligible for broadcast-style requests */ - public abstract List getEligibleNodes(); - -} diff --git a/code/common/service-client/src/main/java/nu/marginalia/client/route/RouteProvider.java b/code/common/service-client/src/main/java/nu/marginalia/client/route/RouteProvider.java deleted file mode 100644 index 56e2a564..00000000 --- a/code/common/service-client/src/main/java/nu/marginalia/client/route/RouteProvider.java +++ /dev/null @@ -1,25 +0,0 @@ -package nu.marginalia.client.route; - -import nu.marginalia.service.descriptor.ServiceDescriptor; - -public class RouteProvider { - private static int defaultPort = 80; - - private final ServiceDescriptor descriptor; - - public RouteProvider(ServiceDescriptor descriptor) { - this.descriptor = descriptor; - } - - ServiceRoute findRoute(int node) { - return new ServiceRoute(descriptor.getHostName(node), defaultPort); - } - - // Access exists for testing - public static void setDefaultPort(int port) { - defaultPort = port; - } - public static void resetDefaultPort() { - defaultPort = 80; - } -} diff --git a/code/common/service-client/src/main/java/nu/marginalia/client/route/ServiceRoute.java b/code/common/service-client/src/main/java/nu/marginalia/client/route/ServiceRoute.java deleted file mode 100644 index c9c6ffab..00000000 --- a/code/common/service-client/src/main/java/nu/marginalia/client/route/ServiceRoute.java +++ /dev/null @@ -1,12 +0,0 @@ -package nu.marginalia.client.route; - -import org.apache.http.HttpHost; - -public record ServiceRoute(String hostname, int port) { - public String toString() { - if (port == 80) { - return "http://" + hostname; - } - return new HttpHost(hostname(), port()).toURI(); - } -} diff --git a/code/common/service-client/src/main/java/nu/marginalia/client/route/ServiceRoutes.java b/code/common/service-client/src/main/java/nu/marginalia/client/route/ServiceRoutes.java deleted file mode 100644 index 6026bc16..00000000 --- a/code/common/service-client/src/main/java/nu/marginalia/client/route/ServiceRoutes.java +++ /dev/null @@ -1,22 +0,0 @@ -package nu.marginalia.client.route; - -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.ConcurrentHashMap; - -public class ServiceRoutes { - private final ConcurrentHashMap knownRoutes = new ConcurrentHashMap<>(); - private final RouteProvider provider; - - public ServiceRoutes(RouteProvider provider) { - this.provider = provider; - } - - public ServiceRoute get(int node) { - return knownRoutes.computeIfAbsent(node, provider::findRoute); - } - - public List getNodes() { - return new ArrayList<>(knownRoutes.keySet()); - } -} diff --git a/code/common/service-client/src/test/java/nu/marginalia/client/AbstractClientTest.java b/code/common/service-client/src/test/java/nu/marginalia/client/AbstractClientTest.java deleted file mode 100644 index 2f02a8a7..00000000 --- a/code/common/service-client/src/test/java/nu/marginalia/client/AbstractClientTest.java +++ /dev/null @@ -1,189 +0,0 @@ -package nu.marginalia.client; - -import com.google.gson.Gson; -import io.reactivex.rxjava3.core.Observable; -import lombok.AllArgsConstructor; -import lombok.Data; -import lombok.SneakyThrows; -import nu.marginalia.client.route.RouteProvider; -import nu.marginalia.client.route.ServiceRoute; -import nu.marginalia.service.descriptor.ServiceDescriptor; -import nu.marginalia.service.id.ServiceId; -import org.junit.jupiter.api.*; -import spark.Request; -import spark.Response; -import spark.Spark; - -import java.util.ArrayList; -import java.util.List; -import java.util.Random; - -import static org.junit.jupiter.api.Assertions.*; - -public class AbstractClientTest { - - static TestServer testServer; - static AbstractClient client; - Gson gson = new Gson(); - - @Data @AllArgsConstructor - private static class DummyObject { - public int num; - public String str; - } - - @BeforeAll - public static void setUp() { - int port = new Random().nextInt(6000, 10000); - RouteProvider.setDefaultPort(port); - testServer = new TestServer(port); - - - client = new AbstractClient(new RouteProvider(new ServiceDescriptor(ServiceId.Api, "localhost")), 1, Gson::new) { - @Override - public AbortingScheduler scheduler() { - return new AbortingScheduler(); - } - - @Override - public String name() { - return "test"; - } - }; - client.setTimeout(1); - } - - - @AfterAll - public static void tearDown() { - testServer.close(); - client.close(); - RouteProvider.resetDefaultPort(); - } - - private void assertError(Observable observable) { - try { - observable.blockingSubscribe(); - } - catch (RuntimeException ex) { - System.out.println("Got exception " + ex.getClass().getSimpleName() + " -- as expected!" ); - return; - } - Assertions.fail("Expected exception"); - } - @SneakyThrows - private Object timeout(Request request, Response response) { - Thread.sleep(5000); - return "yawn"; - } - @SneakyThrows - private Object error404(Request request, Response response) { - Spark.halt(404); - return ""; - } - - @Test - public void testGetTimeout() { - testServer.get(this::timeout); - - assertError(client.get(Context.internal(), 0, "/get")); - } - - @Test - public void testPostTimeout() { - testServer.post(this::timeout); - - assertError(client.post(Context.internal(), 0, "/post", "test")); - } - - @Test - public void testDeleteTimeout() { - testServer.delete(this::timeout); - - assertError(client.delete(Context.internal(), 0,"/post")); - } - - @Test - public void testPost404() { - testServer.post(this::error404); - - assertError(client.post(Context.internal(), 0,"/post", "test")); - } - - @Test - public void testGet404() { - testServer.get(this::error404); - - assertError(client.get(Context.internal(), 0,"/get")); - } - - @Test - public void testDelete404() { - testServer.delete(this::error404); - - assertError(client.delete(Context.internal(),0, "/delete")); - } - - @Test - public void testGet() { - testServer.get((req, rsp) -> "Hello World"); - - assertEquals("Hello World", client.get(Context.internal(), 0,"/get").blockingFirst()); - } - - @Test - public void testAcceptingUp() { - testServer.setReady(true); - assertTrue(client.isAccepting()); - } - - @Test - public void testAcceptingDown() { - testServer.setReady(false); - assertFalse(client.isAccepting()); - } - - @Test - public void testGetJson() { - testServer.get((req, rsp) -> new DummyObject(5, "23"), new Gson()::toJson); - - assertEquals(client.get(Context.internal(), 0,"/get", DummyObject.class).blockingFirst(), - new DummyObject(5, "23")); - } - - - @Test - public void testDelete() { - testServer.delete((req, rsp) -> "Hello World"); - - assertTrue(client.delete(Context.internal(), 0,"/delete").blockingFirst().isGood()); - } - - - @Test - public void testPost() { - List inbox = new ArrayList<>(); - testServer.post((req, rsp) -> { - inbox.add(gson.fromJson(req.body(), DummyObject.class)); - return "ok"; - }); - - client.post(Context.internal(),0, "/post", new DummyObject(5, "23")).blockingSubscribe(); - assertEquals(1, inbox.size()); - assertEquals(new DummyObject(5, "23"), inbox.get(0)); - } - - @Test - public void testPostGet() { - List inbox = new ArrayList<>(); - testServer.post((req, rsp) -> { - inbox.add(gson.fromJson(req.body(), DummyObject.class)); - return new DummyObject(1, "ret"); - }, gson::toJson); - - var ret = client.postGet(Context.internal(), 0,"/post", new DummyObject(5, "23"), DummyObject.class).blockingFirst(); - assertEquals(1, inbox.size()); - assertEquals(new DummyObject(5, "23"), inbox.get(0)); - assertEquals(new DummyObject(1, "ret"), ret); - } -} diff --git a/code/common/service-client/src/test/java/nu/marginalia/client/TestServer.java b/code/common/service-client/src/test/java/nu/marginalia/client/TestServer.java deleted file mode 100644 index cbfe1075..00000000 --- a/code/common/service-client/src/test/java/nu/marginalia/client/TestServer.java +++ /dev/null @@ -1,59 +0,0 @@ -package nu.marginalia.client; - -import spark.Request; -import spark.Response; -import spark.Spark; - -import java.util.function.BiFunction; -import java.util.function.Function; - -public class TestServer { - BiFunction onGet; - BiFunction onPost; - BiFunction onDelete; - - - boolean isReady; - - public TestServer(int port) { - Spark.port(port); - Spark.get("/internal/ping", (r,q) -> "pong"); - Spark.get("/internal/ready", this::ready); - Spark.get("/get", (request, response) -> onGet.apply(request, response)); - Spark.post("/post", (request, response) -> onPost.apply(request, response)); - Spark.delete("/delete", (request, response) -> onDelete.apply(request, response)); - } - - private Object ready(Request request, Response response) { - if (isReady) { - return ""; - } - else { - response.status(401); - return "bad"; - } - } - - public void close() { - Spark.stop(); - } - - public boolean isReady() { - return isReady; - } - - public void setReady(boolean ready) { - isReady = ready; - } - - public TestServer get(BiFunction onGet) { this.onGet = onGet; return this; } - public TestServer get(BiFunction onGet, Function transform) { - this.onGet = onGet.andThen(transform); - return this; - } - public TestServer delete(BiFunction onDelete) { this.onDelete = onDelete; return this; } - public TestServer post(BiFunction onPost) { this.onPost = onPost; return this; } - public TestServer post(BiFunction onPost, Function transform) { - this.onPost = onPost.andThen(transform); return this; - } -} diff --git a/code/common/service-discovery/build.gradle b/code/common/service-discovery/build.gradle index d0214416..db87eb95 100644 --- a/code/common/service-discovery/build.gradle +++ b/code/common/service-discovery/build.gradle @@ -6,6 +6,11 @@ plugins { repositories { mavenLocal() mavenCentral() + + repositories { + mavenCentral() + maven { url 'https://jitpack.io' } + } } java { @@ -13,10 +18,24 @@ java { languageVersion.set(JavaLanguageVersion.of(21)) } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation libs.bundles.slf4j + implementation libs.bundles.curator + implementation libs.guice + implementation libs.bundles.gson + implementation libs.bundles.mariadb + implementation libs.bundles.grpc + implementation libs.notnull + testImplementation libs.bundles.slf4j.test testImplementation libs.bundles.junit testImplementation libs.mockito + + testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4') + testImplementation 'org.testcontainers:mariadb:1.17.4' + testImplementation 'org.testcontainers:junit-jupiter:1.17.4' + testImplementation project(':code:functions:math:api') } diff --git a/code/services-core/query-service/src/main/java/nu/marginalia/query/NodeConfigurationWatcher.java b/code/common/service-discovery/java/nu/marginalia/service/NodeConfigurationWatcher.java similarity index 55% rename from code/services-core/query-service/src/main/java/nu/marginalia/query/NodeConfigurationWatcher.java rename to code/common/service-discovery/java/nu/marginalia/service/NodeConfigurationWatcher.java index a7117387..aae5080e 100644 --- a/code/services-core/query-service/src/main/java/nu/marginalia/query/NodeConfigurationWatcher.java +++ b/code/common/service-discovery/java/nu/marginalia/service/NodeConfigurationWatcher.java @@ -1,24 +1,25 @@ -package nu.marginalia.query; +package nu.marginalia.service; import com.google.inject.Inject; +import com.zaxxer.hikari.HikariDataSource; import lombok.SneakyThrows; -import nu.marginalia.nodecfg.NodeConfigurationService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; public class NodeConfigurationWatcher { private static final Logger logger = LoggerFactory.getLogger(NodeConfigurationWatcher.class); + private final HikariDataSource dataSource; private volatile List queryNodes = new ArrayList<>(); - private final NodeConfigurationService configurationService; @Inject - public NodeConfigurationWatcher(NodeConfigurationService configurationService) { - this.configurationService = configurationService; + public NodeConfigurationWatcher(HikariDataSource dataSource) { + this.dataSource = dataSource; var watcherThread = new Thread(this::pollConfiguration, "Node Configuration Watcher"); watcherThread.setDaemon(true); @@ -30,11 +31,20 @@ public class NodeConfigurationWatcher { for (;;) { List goodNodes = new ArrayList<>(); - for (var cfg : configurationService.getAll()) { - if (!cfg.disabled() && cfg.acceptQueries()) { - goodNodes.add(cfg.node()); + try (var conn = dataSource.getConnection()) { + var stmt = conn.prepareStatement(""" + SELECT ID FROM NODE_CONFIGURATION + WHERE ACCEPT_QUERIES AND NOT DISABLED + """); + var rs = stmt.executeQuery(); + while (rs.next()) { + goodNodes.add(rs.getInt(1)); } } + catch (SQLException ex) { + logger.error("Error polling node configuration", ex); + } + queryNodes = goodNodes; TimeUnit.SECONDS.sleep(10); diff --git a/code/common/service-discovery/java/nu/marginalia/service/ServiceDiscoveryModule.java b/code/common/service-discovery/java/nu/marginalia/service/ServiceDiscoveryModule.java new file mode 100644 index 00000000..6daa084c --- /dev/null +++ b/code/common/service-discovery/java/nu/marginalia/service/ServiceDiscoveryModule.java @@ -0,0 +1,46 @@ +package nu.marginalia.service; + +import com.google.inject.AbstractModule; +import nu.marginalia.service.discovery.ServiceRegistryIf; +import nu.marginalia.service.discovery.ZkServiceRegistry; +import org.apache.curator.framework.CuratorFramework; +import org.apache.curator.framework.CuratorFrameworkFactory; +import org.apache.curator.retry.ExponentialBackoffRetry; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Optional; + +/** Provides a Guice module for service discovery. */ +public class ServiceDiscoveryModule extends AbstractModule { + + private static final Logger logger = LoggerFactory.getLogger(ServiceDiscoveryModule.class); + + public void configure() { + var hosts = getZookeeperHosts(); + logger.info("Using Zookeeper service registry at {}", hosts); + CuratorFramework client = CuratorFrameworkFactory + .newClient(hosts, new ExponentialBackoffRetry(100, 10, 1000)); + + bind(CuratorFramework.class).toInstance(client); + bind(ServiceRegistryIf.class).to(ZkServiceRegistry.class); + } + + private String getZookeeperHosts() { + if (System.getProperty("zookeeper-hosts") != null) { + return System.getProperty("zookeeper-hosts"); + } + String env = System.getenv("ZOOKEEPER_HOSTS"); + if (null == env) { + System.err.println(""" + ZOOKEEPER_HOSTS not set. This probably means that you are running an old installation, + or that the environment is not set up correctly. + + See the 2024-03+ migration notes, https://docs.marginalia.nu/6_notes/6_1__migrate_2024_03_plus + + """); + } + return env; + } + +} diff --git a/code/common/service-discovery/src/main/java/nu/marginalia/service/ServiceHomeNotConfiguredException.java b/code/common/service-discovery/java/nu/marginalia/service/ServiceHomeNotConfiguredException.java similarity index 64% rename from code/common/service-discovery/src/main/java/nu/marginalia/service/ServiceHomeNotConfiguredException.java rename to code/common/service-discovery/java/nu/marginalia/service/ServiceHomeNotConfiguredException.java index 181428e1..504abdc0 100644 --- a/code/common/service-discovery/src/main/java/nu/marginalia/service/ServiceHomeNotConfiguredException.java +++ b/code/common/service-discovery/java/nu/marginalia/service/ServiceHomeNotConfiguredException.java @@ -1,10 +1,6 @@ package nu.marginalia.service; public class ServiceHomeNotConfiguredException extends RuntimeException { - - public ServiceHomeNotConfiguredException() { - super("WMSA_HOME environment variable not set"); - } public ServiceHomeNotConfiguredException(String message) { super(message); } diff --git a/code/common/service-client/src/main/java/nu/marginalia/client/ServiceMonitors.java b/code/common/service-discovery/java/nu/marginalia/service/ServiceMonitors.java similarity index 99% rename from code/common/service-client/src/main/java/nu/marginalia/client/ServiceMonitors.java rename to code/common/service-discovery/java/nu/marginalia/service/ServiceMonitors.java index b09ffa0c..b3b497ef 100644 --- a/code/common/service-client/src/main/java/nu/marginalia/client/ServiceMonitors.java +++ b/code/common/service-discovery/java/nu/marginalia/service/ServiceMonitors.java @@ -1,4 +1,4 @@ -package nu.marginalia.client; +package nu.marginalia.service; import com.google.inject.Inject; import com.google.inject.Singleton; diff --git a/code/common/service-discovery/java/nu/marginalia/service/client/GrpcChannelPoolFactory.java b/code/common/service-discovery/java/nu/marginalia/service/client/GrpcChannelPoolFactory.java new file mode 100644 index 00000000..9633b0f7 --- /dev/null +++ b/code/common/service-discovery/java/nu/marginalia/service/client/GrpcChannelPoolFactory.java @@ -0,0 +1,87 @@ +package nu.marginalia.service.client; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import io.grpc.ManagedChannel; +import io.grpc.ManagedChannelBuilder; +import nu.marginalia.service.NodeConfigurationWatcher; +import nu.marginalia.service.discovery.ServiceRegistryIf; +import nu.marginalia.service.discovery.property.PartitionTraits; +import nu.marginalia.service.discovery.property.ServiceEndpoint.InstanceAddress; +import nu.marginalia.service.discovery.property.ServiceKey; +import nu.marginalia.service.discovery.property.ServicePartition; +import org.jetbrains.annotations.NotNull; + +import java.util.concurrent.Executor; +import java.util.concurrent.Executors; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; + +@Singleton +public class GrpcChannelPoolFactory { + + private final NodeConfigurationWatcher nodeConfigurationWatcher; + private final ServiceRegistryIf serviceRegistryIf; + private static final Executor executor = Executors.newFixedThreadPool( + Math.clamp(Runtime.getRuntime().availableProcessors() / 2, 2, 16), new ThreadFactory() { + static final AtomicInteger threadNumber = new AtomicInteger(1); + @Override + public Thread newThread(@NotNull Runnable r) { + var thread = new Thread(r, STR."gRPC-Channel-Pool[\{threadNumber.getAndIncrement()}]"); + thread.setDaemon(true); + return thread; + } + }); + private static final Executor offloadExecutor = Executors.newFixedThreadPool( + Math.clamp(Runtime.getRuntime().availableProcessors() / 2, 2, 16), new ThreadFactory() { + static final AtomicInteger threadNumber = new AtomicInteger(1); + @Override + public Thread newThread(@NotNull Runnable r) { + var thread = new Thread(r, STR."gRPC-Offload-Executor[\{threadNumber.getAndIncrement()}]"); + thread.setDaemon(true); + return thread; + } + }); + + @Inject + public GrpcChannelPoolFactory(NodeConfigurationWatcher nodeConfigurationWatcher, + ServiceRegistryIf serviceRegistryIf) + { + this.nodeConfigurationWatcher = nodeConfigurationWatcher; + this.serviceRegistryIf = serviceRegistryIf; + } + + /** Create a new multi-node channel pool for the given service. */ + public GrpcMultiNodeChannelPool createMulti(ServiceKey key, + Function stubConstructor) + { + return new GrpcMultiNodeChannelPool<>(serviceRegistryIf, + key, + this::createChannel, + stubConstructor, + nodeConfigurationWatcher); + } + + /** Create a new single-node channel pool for the given service. */ + public GrpcSingleNodeChannelPool createSingle(ServiceKey key, + Function stubConstructor) + { + return new GrpcSingleNodeChannelPool<>(serviceRegistryIf, key, this::createChannel, stubConstructor); + } + + private ManagedChannel createChannel(InstanceAddress route) { + + var mc = ManagedChannelBuilder + .forAddress(route.host(), route.port()) + .executor(executor) + .offloadExecutor(offloadExecutor) + .usePlaintext() + .build(); + + mc.getState(true); + + return mc; + } +} diff --git a/code/common/service-discovery/java/nu/marginalia/service/client/GrpcMultiNodeChannelPool.java b/code/common/service-discovery/java/nu/marginalia/service/client/GrpcMultiNodeChannelPool.java new file mode 100644 index 00000000..d4f75e66 --- /dev/null +++ b/code/common/service-discovery/java/nu/marginalia/service/client/GrpcMultiNodeChannelPool.java @@ -0,0 +1,142 @@ +package nu.marginalia.service.client; + +import io.grpc.ManagedChannel; +import lombok.SneakyThrows; +import nu.marginalia.service.NodeConfigurationWatcher; +import nu.marginalia.service.discovery.ServiceRegistryIf; +import nu.marginalia.service.discovery.property.PartitionTraits; +import nu.marginalia.service.discovery.property.ServiceEndpoint; +import nu.marginalia.service.discovery.property.ServiceKey; +import nu.marginalia.service.discovery.property.ServicePartition; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.concurrent.*; +import java.util.function.BiFunction; +import java.util.function.Function; + +/** A pool of gRPC channels for a service, with a separate channel for each node. + *

+ * Manages broadcast-style request. */ +public class GrpcMultiNodeChannelPool { + private final ConcurrentHashMap> pools = + new ConcurrentHashMap<>(); + private static final Logger logger = LoggerFactory.getLogger(GrpcMultiNodeChannelPool.class); + private final ServiceRegistryIf serviceRegistryIf; + private final ServiceKey serviceKey; + private final Function channelConstructor; + private final Function stubConstructor; + private final NodeConfigurationWatcher nodeConfigurationWatcher; + + @SneakyThrows + public GrpcMultiNodeChannelPool(ServiceRegistryIf serviceRegistryIf, + ServiceKey serviceKey, + Function channelConstructor, + Function stubConstructor, + NodeConfigurationWatcher nodeConfigurationWatcher) { + this.serviceRegistryIf = serviceRegistryIf; + this.serviceKey = serviceKey; + this.channelConstructor = channelConstructor; + this.stubConstructor = stubConstructor; + this.nodeConfigurationWatcher = nodeConfigurationWatcher; + + // Warm up the pool to reduce latency for the initial request + for (var node : nodeConfigurationWatcher.getQueryNodes()) { + getPoolForNode(node); + } + } + + private GrpcSingleNodeChannelPool getPoolForNode(int node) { + return pools.computeIfAbsent(node, this::newSingleChannelPool); + } + + private GrpcSingleNodeChannelPool newSingleChannelPool(int node) { + return new GrpcSingleNodeChannelPool<>( + serviceRegistryIf, + serviceKey.forPartition(ServicePartition.partition(node)), + channelConstructor, + stubConstructor); + } + + /** Get the list of nodes that are eligible for broadcast-style requests */ + public List getEligibleNodes() { + return nodeConfigurationWatcher.getQueryNodes(); + } + + /** Create a new call builder for the given method. This is a fluent-style + * method, where you can chain calls to specify how to run the method. + *

+ * Example: + *
+     *     var results = channelPool.call(AStub:someMethod)
+     *                   .async(someExecutor)
+     *                   .runAll(argumentToSomeMethod);
+     * 
+ * */ + public CallBuilderBase call(BiFunction method) { + return new CallBuilderBase<>(method); + } + + public class CallBuilderBase { + private final BiFunction method; + + private CallBuilderBase(BiFunction method) { + this.method = method; + } + + /** Create a call for the given method on the given node */ + public GrpcSingleNodeChannelPool.CallBuilderBase forNode(int node) { + return getPoolForNode(node).call(method); + } + + /** Run the given method on each node, returning a list of results. + * This is a blocking method, where each call will be made in sequence */ + public List run(I arg) { + return getEligibleNodes().stream() + .map(node -> getPoolForNode(node).call(method).run(arg)) + .toList(); + } + + /** Generate an async call builder for the given method */ + public CallBuilderAsync async(ExecutorService service) { + return new CallBuilderAsync<>(service, method); + } + } + + public class CallBuilderAsync { + private final Executor executor; + private final BiFunction method; + + public CallBuilderAsync(Executor executor, BiFunction method) { + this.executor = executor; + this.method = method; + } + + /** Run the given method on each node, returning a future of a list of results */ + public CompletableFuture> runAll(I arg) { + var futures = getEligibleNodes().stream() + .map(GrpcMultiNodeChannelPool.this::getPoolForNode) + .map(pool -> + pool.call(method) + .async(executor) + .run(arg) + ).toList(); + + return CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])) + .thenApply(v -> futures.stream().map(CompletableFuture::join).toList()); + } + + /** Run the given method on each node, returning a list of futures. */ + public List> runEach(I arg) { + return getEligibleNodes().stream() + .map(GrpcMultiNodeChannelPool.this::getPoolForNode) + .map(pool -> + pool.call(method) + .async(executor) + .run(arg) + ).toList(); + + } + } +} diff --git a/code/common/service-discovery/java/nu/marginalia/service/client/GrpcSingleNodeChannelPool.java b/code/common/service-discovery/java/nu/marginalia/service/client/GrpcSingleNodeChannelPool.java new file mode 100644 index 00000000..9c777c2b --- /dev/null +++ b/code/common/service-discovery/java/nu/marginalia/service/client/GrpcSingleNodeChannelPool.java @@ -0,0 +1,224 @@ +package nu.marginalia.service.client; + +import com.google.common.collect.Sets; +import io.grpc.ManagedChannel; +import lombok.SneakyThrows; +import nu.marginalia.service.discovery.ServiceRegistryIf; +import nu.marginalia.service.discovery.monitor.ServiceChangeMonitor; +import nu.marginalia.service.discovery.property.PartitionTraits; +import nu.marginalia.service.discovery.property.ServiceEndpoint.InstanceAddress; +import nu.marginalia.service.discovery.property.ServiceKey; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.time.Duration; +import java.util.*; +import java.util.concurrent.*; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.BiFunction; +import java.util.function.Function; + +/** A pool of gRPC channels for a service, with a separate channel for each node. + *

+ * Manages unicast-style requests */ +public class GrpcSingleNodeChannelPool extends ServiceChangeMonitor { + private final Map channels = new ConcurrentHashMap<>(); + + private static final Logger logger = LoggerFactory.getLogger(GrpcSingleNodeChannelPool.class); + + private final ServiceRegistryIf serviceRegistryIf; + private final Function channelConstructor; + private final Function stubConstructor; + + + @SneakyThrows + public GrpcSingleNodeChannelPool(ServiceRegistryIf serviceRegistryIf, + ServiceKey serviceKey, + Function channelConstructor, + Function stubConstructor) { + super(serviceKey); + + this.serviceRegistryIf = serviceRegistryIf; + this.channelConstructor = channelConstructor; + this.stubConstructor = stubConstructor; + + serviceRegistryIf.registerMonitor(this); + + onChange(); + + awaitChannel(Duration.ofSeconds(5)); + } + + + @Override + public synchronized boolean onChange() { + Set newRoutes = serviceRegistryIf.getEndpoints(serviceKey); + Set oldRoutes = new HashSet<>(channels.keySet()); + + // Find the routes that have been added or removed + for (var route : Sets.symmetricDifference(oldRoutes, newRoutes)) { + ConnectionHolder oldChannel; + if (newRoutes.contains(route)) { + logger.info("Adding route {}", route); + oldChannel = channels.put(route, new ConnectionHolder(route)); + } else { + logger.info("Expelling route {}", route); + oldChannel = channels.remove(route); + } + if (oldChannel != null) { + oldChannel.close(); + } + } + + return true; + } + + private class ConnectionHolder { + private final AtomicReference channel = new AtomicReference<>(); + private final InstanceAddress address; + + ConnectionHolder(InstanceAddress address) { + this.address = address; + } + + public ManagedChannel get() { + var value = channel.get(); + if (value != null) { + return value; + } + + try { + logger.info("Creating channel for {}:{}", serviceKey, address); + value = channelConstructor.apply(address); + if (channel.compareAndSet(null, value)) { + return value; + } + else { + value.shutdown(); + return channel.get(); + } + } + catch (Exception e) { + logger.error(STR."Failed to get channel for \{address}", e); + return null; + } + } + + public void close() { + ManagedChannel mc = channel.getAndSet(null); + if (mc != null) { + mc.shutdown(); + } + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + ConnectionHolder that = (ConnectionHolder) o; + return Objects.equals(address, that.address); + } + + + @Override + public int hashCode() { + return Objects.hash(address); + } + } + + + + public boolean hasChannel() { + return !channels.isEmpty(); + } + + public synchronized boolean awaitChannel(Duration timeout) throws InterruptedException { + if (hasChannel()) return true; + + final long endTime = System.currentTimeMillis() + timeout.toMillis(); + + while (!hasChannel()) { + long timeLeft = endTime - System.currentTimeMillis(); + if (timeLeft <= 0) return false; + this.wait(timeLeft); + } + return hasChannel(); + } + + private T call(BiFunction call, I arg) throws RuntimeException { + final List exceptions = new ArrayList<>(); + final List connectionHolders = new ArrayList<>(channels.values()); + + // Randomize the order of the connection holders to spread out the load + Collections.shuffle(connectionHolders); + + for (var channel : connectionHolders) { + try { + return call.apply(stubConstructor.apply(channel.get()), arg); + } + catch (Exception e) { + exceptions.add(e); + } + } + + for (var e : exceptions) { + logger.error("Failed to call service {}", serviceKey, e); + } + + throw new ServiceNotAvailableException(serviceKey); + } + + /** Create a call for the given method on the given node. + * This is a fluent method, so you can chain it with other + * methods to specify the node and arguments */ + public CallBuilderBase call(BiFunction method) { + return new CallBuilderBase<>(method); + } + + public class CallBuilderBase { + private final BiFunction method; + private CallBuilderBase(BiFunction method) { + this.method = method; + } + + /** Execute the call in a blocking manner */ + public T run(I arg) { + return call(method, arg); + } + + /** Create an asynchronous call using the provided executor */ + public CallBuilderAsync async(Executor executor) { + return new CallBuilderAsync<>(executor, method); + } + } + public class CallBuilderAsync { + private final Executor executor; + private final BiFunction method; + + public CallBuilderAsync(Executor executor, BiFunction method) { + this.executor = executor; + this.method = method; + } + + /** Execute the call in an asynchronous manner */ + public CompletableFuture run(I arg) { + return CompletableFuture.supplyAsync(() -> call(method, arg), executor); + } + + /** Execute the call in an asynchronous manner for each of the given arguments */ + public CompletableFuture> runFor(List args) { + List> results = new ArrayList<>(); + for (var arg : args) { + results.add(CompletableFuture.supplyAsync(() -> call(method, arg), executor)); + } + return CompletableFuture.allOf(results.toArray(new CompletableFuture[0])) + .thenApply(v -> results.stream().map(CompletableFuture::join).toList()); + } + + /** Execute the call in an asynchronous manner for each of the given arguments */ + public CompletableFuture> runFor(I... args) { + return runFor(List.of(args)); + } + } +} diff --git a/code/common/service-discovery/java/nu/marginalia/service/client/ServiceNotAvailableException.java b/code/common/service-discovery/java/nu/marginalia/service/client/ServiceNotAvailableException.java new file mode 100644 index 00000000..b905beab --- /dev/null +++ b/code/common/service-discovery/java/nu/marginalia/service/client/ServiceNotAvailableException.java @@ -0,0 +1,9 @@ +package nu.marginalia.service.client; + +import nu.marginalia.service.discovery.property.ServiceKey; + +public class ServiceNotAvailableException extends RuntimeException { + public ServiceNotAvailableException(ServiceKey key) { + super(STR."Service \{key} not available"); + } +} diff --git a/code/common/service-discovery/java/nu/marginalia/service/discovery/ServiceRegistryIf.java b/code/common/service-discovery/java/nu/marginalia/service/discovery/ServiceRegistryIf.java new file mode 100644 index 00000000..04877bd3 --- /dev/null +++ b/code/common/service-discovery/java/nu/marginalia/service/discovery/ServiceRegistryIf.java @@ -0,0 +1,60 @@ +package nu.marginalia.service.discovery; + +import nu.marginalia.service.discovery.monitor.*; +import nu.marginalia.service.discovery.property.ServiceEndpoint; +import static nu.marginalia.service.discovery.property.ServiceEndpoint.*; + +import nu.marginalia.service.discovery.property.ServiceKey; + +import java.util.Set; +import java.util.UUID; + +/** A service registry that allows services to register themselves and + * be discovered by other services on the network. + */ +public interface ServiceRegistryIf { + /** + * Register a service with the registry. + *

+ * Once the instance has announced itself with {@link #announceInstance(UUID instanceUUID) announceInstance(...)}, + * the service will be available for discovery with {@link #getEndpoints(ServiceKey key) getEndpoints(...)}. + * + * @param key the key identifying the service + * @param instanceUUID the unique UUID of the instance + * @param externalAddress the public address of the service + */ + ServiceEndpoint registerService(ServiceKey key, + UUID instanceUUID, + String externalAddress) throws Exception; + + + void declareFirstBoot(); + void waitForFirstBoot() throws InterruptedException; + + /** Let the world know that the service is running + * and ready to accept requests. */ + void announceInstance(UUID instanceUUID); + + /** At the discretion of the implementation, provide a port that is unique + * across (host, api-schema). It may be randomly selected + * or hard-coded or some combination of behaviors. + */ + int requestPort(String externalHost, ServiceKey key); + + /** Get all endpoints for the service on the specified node and schema. */ + Set getEndpoints(ServiceKey schema); + + /** Register a monitor to be notified when the service registry changes. + *

+ * {@link ServiceMonitorIf#onChange()} will be called when the registry changes. + * Spurious calls to {@link ServiceMonitorIf#onChange()} are allowed depending + * on the implementation. + *

+ * Behavior of the monitor depends on the implementation of the registry, and the + * monitor type. + *
    + *
  • {@link ServiceChangeMonitor} is notified when any node for the service changes.
  • + *
+ * */ + void registerMonitor(ServiceMonitorIf monitor) throws Exception; +} diff --git a/code/common/service-discovery/java/nu/marginalia/service/discovery/ZkServiceRegistry.java b/code/common/service-discovery/java/nu/marginalia/service/discovery/ZkServiceRegistry.java new file mode 100644 index 00000000..0e233ced --- /dev/null +++ b/code/common/service-discovery/java/nu/marginalia/service/discovery/ZkServiceRegistry.java @@ -0,0 +1,260 @@ +package nu.marginalia.service.discovery; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import lombok.SneakyThrows; +import nu.marginalia.service.discovery.monitor.*; +import nu.marginalia.service.discovery.property.ServiceEndpoint; +import static nu.marginalia.service.discovery.property.ServiceEndpoint.*; + +import nu.marginalia.service.discovery.property.ServiceKey; +import org.apache.curator.framework.CuratorFramework; +import org.apache.curator.framework.api.CuratorWatcher; +import org.apache.curator.utils.ZKPaths; +import org.apache.zookeeper.CreateMode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.charset.StandardCharsets; +import java.util.*; +import java.util.concurrent.TimeUnit; + +/** A versatile service registry that uses ZooKeeper to store service endpoints. + * It is used to register services and to look up the endpoints of other services. + *

+ * It may also be used to assign ports to services, if the system property + * service.random-port is set to true. This is useful + * for running the system in a bare-metal environment, where the ports are not + * managed by Docker, and there are enough services that managing them manually + * will be a serious headache. + * */ +@Singleton +public class ZkServiceRegistry implements ServiceRegistryIf { + private final CuratorFramework curatorFramework; + private static final Logger logger = LoggerFactory.getLogger(ZkServiceRegistry.class); + private volatile boolean stopped = false; + + private final List livenessPaths = new ArrayList<>(); + + @Inject + @SneakyThrows + public ZkServiceRegistry(CuratorFramework curatorFramework) { + this.curatorFramework = curatorFramework; + + curatorFramework.start(); + if (!curatorFramework.blockUntilConnected(30, TimeUnit.SECONDS)) { + throw new IllegalStateException("Failed to connect to zookeeper after 30s"); + } + + Runtime.getRuntime().addShutdownHook( + new Thread(this::shutDown, "ZkServiceRegistry shutdown hook") + ); + } + + @Override + public ServiceEndpoint registerService(ServiceKey key, + UUID instanceUUID, + String externalAddress) + throws Exception + { + var endpoint = new ServiceEndpoint(externalAddress, requestPort(externalAddress, key)); + + String path = STR."\{key.toPath()}/\{instanceUUID.toString()}"; + byte[] payload = STR."\{endpoint.host()}:\{endpoint.port()}".getBytes(StandardCharsets.UTF_8); + + logger.info("Registering {} -> {}", path, endpoint); + + curatorFramework.create() + .creatingParentsIfNeeded() + .withMode(CreateMode.EPHEMERAL) + .forPath(path, payload); + + return endpoint; + } + + @SneakyThrows + @Override + public void declareFirstBoot() { + if (!isFirstBoot()) { + curatorFramework.create() + .creatingParentsIfNeeded() + .withMode(CreateMode.PERSISTENT) + .forPath("/first-boot"); + } + } + + @Override + public void waitForFirstBoot() throws InterruptedException { + if (!isFirstBoot()) + logger.info("Waiting for first-boot flag"); + + while (true) { + if (isFirstBoot()) + return; + + Thread.sleep(1000); + } + } + + private boolean isFirstBoot() { + try { + return curatorFramework.checkExists().forPath("/first-boot") != null; + } + catch (Exception ex) { + logger.error("Failed to check first-boot", ex); + return false; + } + } + + @Override + public void announceInstance(UUID instanceUUID) { + try { + String serviceRoot = STR."/running-instances/\{instanceUUID.toString()}"; + + livenessPaths.add(serviceRoot); + + curatorFramework.create() + .creatingParentsIfNeeded() + .withMode(CreateMode.EPHEMERAL) + .forPath(serviceRoot); + } + catch (Exception ex) { + logger.error("Failed to create service root for {}", instanceUUID); + } + } + + /** + * Returns true if the service has announced itself as up and running. + */ + public boolean isInstanceRunning(UUID instanceUUID) { + try { + String serviceRoot = STR."/running-instances/\{instanceUUID.toString()}"; + return null != curatorFramework.checkExists().forPath(serviceRoot); + } + catch (Exception ex) { + logger.error("Failed to check if instance is running {}", instanceUUID); + return false; + } + } + + @Override + public int requestPort(String externalHost, + ServiceKey key) { + if (!Boolean.getBoolean("service.random-port")) { + return switch (key) { + case ServiceKey.Rest rest -> 80; + case ServiceKey.Grpc grpc -> 81; + }; + } + + int portRangeLow = 12_000; + int portRangeHigh = 12_999; + + var random = new Random(); + + String identifier = key.toPath(); + + byte[] payload = identifier.getBytes(); + + for (int iter = 0; iter < 1000; iter++) { + try { + int port = random.nextInt(portRangeLow, portRangeHigh); + + curatorFramework.create() + .creatingParentsIfNeeded() + .withMode(CreateMode.EPHEMERAL) + .forPath(STR."/port-registry/\{externalHost}/\{port}", payload); + return port; + } + catch (Exception ex) { + logger.error(STR."Still negotiating port for \{identifier}"); + } + } + + throw new IllegalStateException("Failed to negotiate a port for host " + externalHost); + } + + @Override + public Set getEndpoints(ServiceKey key) { + try { + Set ret = new HashSet<>(); + for (var uuid : curatorFramework + .getChildren() + .forPath(key.toPath())) { + + if (!isInstanceRunning(UUID.fromString(uuid))) { + continue; + } + + var path = ZKPaths.makePath(key.toPath(), uuid); + byte[] data = curatorFramework + .getData() + .forPath(path); + + String hostAndPort = new String(data); + var address = ServiceEndpoint + .parse(hostAndPort) + .asInstance(UUID.fromString(uuid)); + + ret.add(address); + } + + return ret; + } + catch (Exception ex) { + return Set.of(); + } + } + + public void registerMonitor(ServiceMonitorIf monitor) throws Exception { + if (stopped) + logger.info("Not registering monitor for {} because the registry is stopped", monitor.getKey()); + + String path = monitor.getKey().toPath(); + + CuratorWatcher watcher = change -> { + boolean reRegister; + try { + reRegister = monitor.onChange(); + } + catch (Exception ex) { + logger.error("Monitor for path {} failed", path, ex); + reRegister = true; + } + + if (reRegister) { + registerMonitor(monitor); + } + }; + + curatorFramework.watchers().add() + .usingWatcher(watcher) + .forPath(path); + + // Also register for updates to the running-instances list, + // as this will have an effect on the result of getEndpoints() + curatorFramework.watchers().add() + .usingWatcher(watcher) + .forPath("/running-instances"); + } + + /* Exposed for tests */ + public synchronized void shutDown() { + if (stopped) + return; + + stopped = true; + + // Delete all liveness paths + for (var path : livenessPaths) { + logger.info("Cleaning up {}", path); + + try { + curatorFramework.delete().forPath(path); + } + catch (Exception ex) { + logger.error("Failed to delete path {}", path, ex); + } + } + } +} diff --git a/code/common/service-discovery/java/nu/marginalia/service/discovery/monitor/ServiceChangeMonitor.java b/code/common/service-discovery/java/nu/marginalia/service/discovery/monitor/ServiceChangeMonitor.java new file mode 100644 index 00000000..23faba58 --- /dev/null +++ b/code/common/service-discovery/java/nu/marginalia/service/discovery/monitor/ServiceChangeMonitor.java @@ -0,0 +1,17 @@ +package nu.marginalia.service.discovery.monitor; + +import nu.marginalia.service.discovery.property.ServiceKey; + +public abstract class ServiceChangeMonitor implements ServiceMonitorIf { + public final ServiceKey serviceKey; + + public ServiceChangeMonitor(ServiceKey key) { + this.serviceKey = key; + } + + public abstract boolean onChange(); + public ServiceKey getKey() { + return serviceKey; + } + +} diff --git a/code/common/service-discovery/java/nu/marginalia/service/discovery/monitor/ServiceMonitorIf.java b/code/common/service-discovery/java/nu/marginalia/service/discovery/monitor/ServiceMonitorIf.java new file mode 100644 index 00000000..8dca094a --- /dev/null +++ b/code/common/service-discovery/java/nu/marginalia/service/discovery/monitor/ServiceMonitorIf.java @@ -0,0 +1,13 @@ +package nu.marginalia.service.discovery.monitor; + + +import nu.marginalia.service.discovery.property.ServiceKey; + +public interface ServiceMonitorIf { + /** Called when the monitored service has changed. + * @return true if the monitor is to be refreshed + */ + boolean onChange(); + ServiceKey getKey(); + +} diff --git a/code/common/service-discovery/java/nu/marginalia/service/discovery/property/PartitionTraits.java b/code/common/service-discovery/java/nu/marginalia/service/discovery/property/PartitionTraits.java new file mode 100644 index 00000000..64436bdd --- /dev/null +++ b/code/common/service-discovery/java/nu/marginalia/service/discovery/property/PartitionTraits.java @@ -0,0 +1,8 @@ +package nu.marginalia.service.discovery.property; + +public interface PartitionTraits { + interface Grpc {}; + interface Unicast {}; + interface Multicast {}; + interface NoGrpc {}; +} diff --git a/code/common/service-discovery/java/nu/marginalia/service/discovery/property/ServiceEndpoint.java b/code/common/service-discovery/java/nu/marginalia/service/discovery/property/ServiceEndpoint.java new file mode 100644 index 00000000..0e25ce19 --- /dev/null +++ b/code/common/service-discovery/java/nu/marginalia/service/discovery/property/ServiceEndpoint.java @@ -0,0 +1,52 @@ +package nu.marginalia.service.discovery.property; + +import java.net.*; +import java.util.UUID; + +public record ServiceEndpoint(String host, int port) { + + public static ServiceEndpoint parse(String hostAndPort) { + var parts = hostAndPort.split(":"); + if (parts.length != 2) { + throw new IllegalArgumentException("Invalid host:port string: " + hostAndPort); + } + return new ServiceEndpoint(parts[0], Integer.parseInt(parts[1])); + } + + public URL toURL(String endpoint, String query) throws URISyntaxException, MalformedURLException { + return new URI("http", null, host, port, endpoint, query, null) + .toURL(); + } + public InetSocketAddress toInetSocketAddress() { + return new InetSocketAddress(host(), port()); + } + + /** Validate the host by checking if it is a valid IP address or a hostname that can be resolved. + * + * @return true if the host is a valid + */ + public boolean validateHost() { + try { + // Throws UnknownHostException if the host is not a valid IP address or hostname + // (this should not be slow since the DNS lookup should be local, and if it isn't; + // should be cached by the OS or the JVM) + InetAddress.getByName(host()); + return true; + } catch (UnknownHostException e) { + return false; + } + } + + public InstanceAddress asInstance(UUID instance) { + return new InstanceAddress(this, instance); + } + + public record InstanceAddress(ServiceEndpoint endpoint, UUID instance) { + public String host() { + return endpoint.host(); + } + public int port() { + return endpoint.port(); + } + } +} diff --git a/code/common/service-discovery/java/nu/marginalia/service/discovery/property/ServiceKey.java b/code/common/service-discovery/java/nu/marginalia/service/discovery/property/ServiceKey.java new file mode 100644 index 00000000..66ae5ded --- /dev/null +++ b/code/common/service-discovery/java/nu/marginalia/service/discovery/property/ServiceKey.java @@ -0,0 +1,69 @@ +package nu.marginalia.service.discovery.property; + +import io.grpc.ServiceDescriptor; +import nu.marginalia.service.id.ServiceId; + +public sealed interface ServiceKey

{ + String toPath(); + + static ServiceKey forRest(ServiceId id) { + return new Rest(id.serviceName); + } + static ServiceKey forRest(ServiceId id, int node) { + if (node == 0) { + return forRest(id); + } + + return new Rest(id.serviceName + "-" + node); + } + + static Grpc forServiceDescriptor(ServiceDescriptor descriptor, ServicePartition partition) { + return new Grpc<>(descriptor.getName(), partition); + } + + static Grpc forGrpcApi(Class apiClass, P2 partition) { + try { + var name = apiClass.getField("SERVICE_NAME").get(null); + return new Grpc(name.toString(), partition); + } + catch (Exception e) { + throw new IllegalArgumentException("Could not get SERVICE_NAME from " + apiClass.getSimpleName(), e); + } + } + + + + Grpc forPartition(P2 partition); + + + record Rest(String name) implements ServiceKey { + public String toPath() { + return STR."/services/rest/\{name}"; + } + + @Override + public + + Grpc forPartition(P2 partition) + { + throw new UnsupportedOperationException(); + } + } + record Grpc

(String name, P partition) implements ServiceKey

{ + public String baseName() { + return STR."/services/grpc/\{name}"; + } + public String toPath() { + return STR."/services/grpc/\{name}/\{partition.identifier()}"; + } + + @Override + public + + Grpc forPartition(P2 partition) + { + return new Grpc<>(name, partition); + } + } + +} diff --git a/code/common/service-discovery/java/nu/marginalia/service/discovery/property/ServicePartition.java b/code/common/service-discovery/java/nu/marginalia/service/discovery/property/ServicePartition.java new file mode 100644 index 00000000..32aa37fb --- /dev/null +++ b/code/common/service-discovery/java/nu/marginalia/service/discovery/property/ServicePartition.java @@ -0,0 +1,29 @@ +package nu.marginalia.service.discovery.property; + +public sealed interface ServicePartition { + String identifier(); + + static Any any() { return new Any(); } + static Multi multi() { return new Multi(); } + static Partition partition(int node) { return new Partition(node); } + static None none() { return new None(); } + + record Any() implements ServicePartition, PartitionTraits.Grpc, PartitionTraits.Unicast { + public String identifier() { return "*"; } + + } + record Multi() implements ServicePartition, PartitionTraits.Grpc, PartitionTraits.Multicast { + public String identifier() { return "*"; } + + } + record Partition(int node) implements ServicePartition, PartitionTraits.Grpc, PartitionTraits.Unicast { + public String identifier() { + return Integer.toString(node); + } + + } + record None() implements ServicePartition, PartitionTraits.NoGrpc { + public String identifier() { return ""; } + + } +} diff --git a/code/common/service-discovery/src/main/java/nu/marginalia/service/id/ServiceId.java b/code/common/service-discovery/java/nu/marginalia/service/id/ServiceId.java similarity index 100% rename from code/common/service-discovery/src/main/java/nu/marginalia/service/id/ServiceId.java rename to code/common/service-discovery/java/nu/marginalia/service/id/ServiceId.java diff --git a/code/common/service-discovery/readme.md b/code/common/service-discovery/readme.md index c1480192..5e9fe24a 100644 --- a/code/common/service-discovery/readme.md +++ b/code/common/service-discovery/readme.md @@ -1,3 +1,149 @@ # Service Discovery -Contains classes for helping services discover each other. \ No newline at end of file +Contains classes for helping services discover each other, +and managing connections between them. + +## Service Registry + +The service registry is a class that keeps track of the services +that are currently running, and their connection information. + +The service register implementation is based on [Zookeeper](https://zookeeper.apache.org/), +which is a distributed coordination service. This lets services register +themselves and announce their liveness, and then discover each other. + +It supports multiple instances of a service running, and +supports running the system bare-metal, where it will assign +ports to the services from a range. + +* REST services are registered on a per-node basis, and are always non-partitioned. +* gRPC services are registered on a per-api basis, and can be partitioned + or non-partitioned. This means that if a gRPC api is moved between nodes, + the clients will not need to be reconfigured. + +To be discoverable, the caller must first register their +services: + +```java +// Register one or more services +serviceRegistry.registerService( + ServiceKey.forRest(serviceId, nodeId), +instanceUuid, // unique +externalAddress); // bind-address + +// Non-partitioned GRPC service + serviceRegistry.registerService( + ServiceKey.forServiceDescriptor(descriptor, ServicePartition.any()), +instanceUuid, +externalAddress); + +// Partitioned GRPC service + serviceRegistry.registerService( + ServiceKey.forServiceDescriptor(descriptor, ServicePartition.partition(5)), +instanceUuid, +externalAddress); + +// (+ any other services) +``` + +Then, the caller must announce their instance. Before this is done, +the service is not discoverable. + +```java +registry.announceInstance(instanceUUID); +``` + +All of this is done automatically by the `Service` base class +in the [service](../service/) module. + +To discover a service, the caller can query the registry: + +```java +Set endpoints = registry.getEndpoints(serviceKey); +``` + +It's also possible to subscribe to changes in the registry, so that +the caller can be notified when a service comes or goes, with `registry.registerMonitor()`. + +However the `GrpcChannelPoolFactory` is a more convenient way to access the services, +it will let the caller create a pool of channels to the services, and manage their +lifecycle, listen to lifecycle notifications and so on. + +## gRPC Channel Pool + +From the [GrpcChannelPoolFactory](java/nu/marginalia/service/client/GrpcChannelPoolFactory.java), two types of channel pools can be created +that are aware of the service registry: + +* [GrpcMultiNodeChannelPool](java/nu/marginalia/service/client/GrpcMultiNodeChannelPool.java) - This pool permits 1-n style communication with partitioned services +* [GrpcSingleNodeChannelPool](java/nu/marginalia/service/client/GrpcSingleNodeChannelPool.java) - This pool permits 1-1 style communication with non-partitioned services. + if multiple instances are running, it will use one of them and fall back + to another if the first is not available. + +The pools can generate calls to the gRPC services, and will manage the lifecycle of the channels. + +The API is designed to be simple to use, and will permit the caller to access the Stub interfaces +for the services through a fluent API. + +### Example Usage of the GrpcSingleNodeChannelPool + +```java +// create a pool for a non-partitioned service +channelPool = factory.createSingle( + ServiceKey.forGrpcApi(MathApiGrpc.class, ServicePartition.any()), + MathApiGrpc::newBlockingStub); + +// blocking call +Response response = channelPool + .call(MathApiGrpc.MathApiBlockingStub::dictionaryLookup) + .run(request); + +// sequential blocking calls +List response = channelPool + .call(MathApiGrpc.MathApiBlockingStub::dictionaryLookup) + .runFor(request1, request2); + + +// async call +Future response = channelPool + .call(MathApiGrpc.MathApiBlockingStub::dictionaryLookup) + .async(myExecutor) + .run(request); + +// multiple async calls +Future> response = channelPool + .call(MathApiGrpc.MathApiBlockingStub::dictionaryLookup) + .async(myExecutor) + .runFor(request1, request2); +``` + +### Example Usage of the GrpcSingleNodeChannelPool + +```java +// create a pool for a partitioned service +channelPool = factory.createMulti( + ServiceKey.forGrpcApi(MathApiGrpc.class, ServicePartition.multi()), + MathApiGrpc::newBlockingStub); + +// blocking call +List response = channelPool + .call(MathApiGrpc.MathApiBlockingStub::dictionaryLookup) + .run(request); + +// async call +Future> response = channelPool + .call(MathApiGrpc.MathApiBlockingStub::dictionaryLookup) + .async(myExecutor) + .runEach(request); + +// async call, will fail or succeed as a group +Future> response = channelPool + .call(MathApiGrpc.MathApiBlockingStub::dictionaryLookup) + .async(myExecutor) + .runAll(request1, request2); +``` + + +### Central Classes + +* [ServiceRegistryIf](java/nu/marginalia/service/discovery/ServiceRegistryIf.java) +* [ZkServiceRegistry](java/nu/marginalia/service/discovery/ZkServiceRegistry.java) \ No newline at end of file diff --git a/code/common/service-discovery/src/main/java/nu/marginalia/service/SearchServiceDescriptors.java b/code/common/service-discovery/src/main/java/nu/marginalia/service/SearchServiceDescriptors.java deleted file mode 100644 index d5f7fb8d..00000000 --- a/code/common/service-discovery/src/main/java/nu/marginalia/service/SearchServiceDescriptors.java +++ /dev/null @@ -1,21 +0,0 @@ -package nu.marginalia.service; - -import nu.marginalia.service.descriptor.ServiceDescriptor; -import nu.marginalia.service.descriptor.ServiceDescriptors; -import nu.marginalia.service.id.ServiceId; - -import java.util.List; - -public class SearchServiceDescriptors { - public static ServiceDescriptors descriptors = new ServiceDescriptors( - List.of(new ServiceDescriptor(ServiceId.Api), - new ServiceDescriptor(ServiceId.Index), - new ServiceDescriptor(ServiceId.Query), - new ServiceDescriptor(ServiceId.Search), - new ServiceDescriptor(ServiceId.Executor), - new ServiceDescriptor(ServiceId.Assistant), - new ServiceDescriptor(ServiceId.Dating), - new ServiceDescriptor(ServiceId.Explorer), - new ServiceDescriptor(ServiceId.Control) - )); -} diff --git a/code/common/service-discovery/src/main/java/nu/marginalia/service/descriptor/ServiceDescriptor.java b/code/common/service-discovery/src/main/java/nu/marginalia/service/descriptor/ServiceDescriptor.java deleted file mode 100644 index 68008bb3..00000000 --- a/code/common/service-discovery/src/main/java/nu/marginalia/service/descriptor/ServiceDescriptor.java +++ /dev/null @@ -1,33 +0,0 @@ -package nu.marginalia.service.descriptor; - -import nu.marginalia.service.id.ServiceId; - -public class ServiceDescriptor { - public final ServiceId id; - public final String name; - - public ServiceDescriptor(ServiceId id) { - this.id = id; - this.name = id.serviceName; - } - - public ServiceDescriptor(ServiceId id, String host) { - this.id = id; - this.name = host; - } - - public String getHostName(int node) { - if (node > 0) - return name + "-" + node; - - return name; - } - - public String toString() { - return name; - } - - public String describeService() { - return String.format("%s", name); - } -} diff --git a/code/common/service-discovery/src/main/java/nu/marginalia/service/descriptor/ServiceDescriptors.java b/code/common/service-discovery/src/main/java/nu/marginalia/service/descriptor/ServiceDescriptors.java deleted file mode 100644 index 88f2a693..00000000 --- a/code/common/service-discovery/src/main/java/nu/marginalia/service/descriptor/ServiceDescriptors.java +++ /dev/null @@ -1,33 +0,0 @@ -package nu.marginalia.service.descriptor; - -import nu.marginalia.service.SearchServiceDescriptors; -import nu.marginalia.service.id.ServiceId; - -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; - -/** @see SearchServiceDescriptors */ -public class ServiceDescriptors { - private final Map descriptorsAll = new LinkedHashMap<>(); - - public ServiceDescriptors() { - - } - - public ServiceDescriptors(List descriptors) { - descriptors.forEach(d -> descriptorsAll.put(d.id, d)); - } - - public ServiceDescriptor[] values() { - return descriptorsAll.values().toArray(ServiceDescriptor[]::new); - } - - public ServiceDescriptor forId(ServiceId id) { - return Objects.requireNonNull(descriptorsAll.get(id), - "No service descriptor defined for " + id + " -- did you forget to " - + "bind(ServiceDescriptors.class).toInstance(SearchServiceDescriptors.descriptors); ?"); - } - -} diff --git a/code/common/service-discovery/test/nu/marginalia/service/discovery/ZkServiceRegistryTest.java b/code/common/service-discovery/test/nu/marginalia/service/discovery/ZkServiceRegistryTest.java new file mode 100644 index 00000000..3ea107e1 --- /dev/null +++ b/code/common/service-discovery/test/nu/marginalia/service/discovery/ZkServiceRegistryTest.java @@ -0,0 +1,176 @@ +package nu.marginalia.service.discovery; + +import nu.marginalia.api.math.MathApiGrpc; +import nu.marginalia.service.discovery.property.ServiceKey; +import nu.marginalia.service.discovery.property.ServicePartition; +import nu.marginalia.service.id.ServiceId; +import org.apache.curator.framework.CuratorFrameworkFactory; +import org.apache.curator.retry.ExponentialBackoffRetry; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.junit.jupiter.Testcontainers; + +import java.util.*; + +import static org.junit.jupiter.api.Assertions.*; + +@Testcontainers +@Execution(ExecutionMode.SAME_THREAD) +@Tag("slow") +class ZkServiceRegistryTest { + private static final int ZOOKEEPER_PORT = 2181; + private static final GenericContainer zookeeper = + new GenericContainer<>("zookeeper:3.8.0") + .withExposedPorts(ZOOKEEPER_PORT); + + List registries = new ArrayList<>(); + String connectString; + @BeforeEach + public void setUp() { + zookeeper.start(); + connectString = STR."\{zookeeper.getHost()}:\{zookeeper.getMappedPort(ZOOKEEPER_PORT)}"; + } + + @AfterEach + public void tearDown() { + for (var registry : registries) { + registry.shutDown(); + } + zookeeper.stop(); + + System.clearProperty("service.random-port"); + + } + + ZkServiceRegistry createRegistry() { + return new ZkServiceRegistry(CuratorFrameworkFactory.newClient( + connectString, + new ExponentialBackoffRetry(10, 10, 1000))); + } + + @Test + @Disabled // flaky on CI + void getPort() { + System.setProperty("service.random-port", "true"); + + var registry1 = createRegistry(); + var registry2 = createRegistry(); + + List ports = new ArrayList<>(); + Set portsSet = new HashSet<>(); + + var key = ServiceKey.forRest(ServiceId.Search, 0); + + for (int i = 0; i < 500; i++) { + int port = registry1.requestPort("127.0.0.1", key); + ports.add(port); + + // Ensure we get unique ports + assertTrue(portsSet.add(port)); + } + for (int i = 0; i < 50; i++) { + int port = registry2.requestPort("127.0.0.1", key); + ports.add(port); + + // Ensure we get unique ports + assertTrue(portsSet.add(port)); + } + registry1.shutDown(); + for (int i = 0; i < 500; i++) { + // Verify we can reclaim ports + ports.add(registry2.requestPort("127.0.0.1", key)); + } + assertEquals(1050, ports.size()); + } + + @Test + void getInstancesRestgRPC() throws Exception { + var uuid1 = UUID.randomUUID(); + var uuid2 = UUID.randomUUID(); + + var registry1 = createRegistry(); + var registry2 = createRegistry(); + + var key1 = ServiceKey.forRest(ServiceId.Search, 0); + var key2 = ServiceKey.forGrpcApi(MathApiGrpc.class, ServicePartition.any()); + + var endpoint1 = registry1.registerService(key1, uuid1, "127.0.0.1"); + var endpoint2 = registry2.registerService(key2, uuid2, "127.0.0.2"); + + registry1.announceInstance(uuid1); + registry2.announceInstance(uuid2); + + assertEquals(Set.of(endpoint1.asInstance(uuid1)), + registry1.getEndpoints(key1)); + + assertEquals(Set.of(endpoint2.asInstance(uuid2)), + registry1.getEndpoints(key2)); + + registry1.shutDown(); + Thread.sleep(100); + + assertEquals(Set.of(), registry2.getEndpoints(key1)); + assertEquals(Set.of(endpoint2.asInstance(uuid2)), registry2.getEndpoints(key2)); + } + + @Test + void testInstancesTwoAny() throws Exception { + var uuid1 = UUID.randomUUID(); + var uuid2 = UUID.randomUUID(); + + var registry1 = createRegistry(); + var registry2 = createRegistry(); + + var key = ServiceKey.forGrpcApi(MathApiGrpc.class, ServicePartition.any()); + + var endpoint1 = registry1.registerService(key, uuid1, "127.0.0.1"); + var endpoint2 = registry2.registerService(key, uuid2, "127.0.0.2"); + + registry1.announceInstance(uuid1); + registry2.announceInstance(uuid2); + + assertEquals(Set.of(endpoint1.asInstance(uuid1), + endpoint2.asInstance(uuid2)), + registry1.getEndpoints(key)); + + registry1.shutDown(); + Thread.sleep(100); + + assertEquals(Set.of(endpoint2.asInstance(uuid2)), registry2.getEndpoints(key)); + } + + @Test + void testInstancesTwoPartitions() throws Exception { + var uuid1 = UUID.randomUUID(); + var uuid2 = UUID.randomUUID(); + + var registry1 = createRegistry(); + var registry2 = createRegistry(); + + var key1 = ServiceKey.forGrpcApi(MathApiGrpc.class, ServicePartition.partition(1)); + var key2 = ServiceKey.forGrpcApi(MathApiGrpc.class, ServicePartition.partition(2)); + + var endpoint1 = registry1.registerService(key1, uuid1, "127.0.0.1"); + var endpoint2 = registry2.registerService(key2, uuid2, "127.0.0.2"); + + registry1.announceInstance(uuid1); + registry2.announceInstance(uuid2); + + assertEquals(Set.of(endpoint1.asInstance(uuid1)), registry1.getEndpoints(key1)); + assertEquals(Set.of(endpoint2.asInstance(uuid2)), registry1.getEndpoints(key2)); + } + + @Test + public void announceLiveness() throws Exception { + var registry1 = createRegistry(); + var uuid1 = UUID.randomUUID(); + + assertFalse(registry1.isInstanceRunning(uuid1)); + registry1.announceInstance(uuid1); + assertTrue(registry1.isInstanceRunning(uuid1)); + + registry1.shutDown(); + } +} \ No newline at end of file diff --git a/code/common/service/build.gradle b/code/common/service/build.gradle index 64bb7cd5..75fab4ca 100644 --- a/code/common/service/build.gradle +++ b/code/common/service/build.gradle @@ -9,8 +9,9 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { - implementation project(':code:common:service-client') implementation project(':code:common:service-discovery') implementation project(':code:libraries:message-queue') implementation project(':code:common:db') @@ -18,8 +19,9 @@ dependencies { implementation libs.spark implementation libs.guice - implementation libs.rxjava + implementation libs.bundles.curator + implementation libs.bundles.grpc implementation libs.bundles.prometheus implementation libs.bundles.slf4j implementation libs.bucket4j diff --git a/code/common/service/src/main/java/nu/marginalia/service/ConfigLoader.java b/code/common/service/java/nu/marginalia/service/ConfigLoader.java similarity index 93% rename from code/common/service/src/main/java/nu/marginalia/service/ConfigLoader.java rename to code/common/service/java/nu/marginalia/service/ConfigLoader.java index 44512264..a418655f 100644 --- a/code/common/service/src/main/java/nu/marginalia/service/ConfigLoader.java +++ b/code/common/service/java/nu/marginalia/service/ConfigLoader.java @@ -1,8 +1,6 @@ package nu.marginalia.service; import nu.marginalia.WmsaHome; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.IOException; import java.nio.file.Files; diff --git a/code/common/service/src/main/java/nu/marginalia/service/MainClass.java b/code/common/service/java/nu/marginalia/service/MainClass.java similarity index 60% rename from code/common/service/src/main/java/nu/marginalia/service/MainClass.java rename to code/common/service/java/nu/marginalia/service/MainClass.java index be3080fb..18bedc51 100644 --- a/code/common/service/src/main/java/nu/marginalia/service/MainClass.java +++ b/code/common/service/java/nu/marginalia/service/MainClass.java @@ -1,16 +1,10 @@ package nu.marginalia.service; import io.prometheus.client.hotspot.DefaultExports; -import io.reactivex.rxjava3.exceptions.UndeliverableException; -import io.reactivex.rxjava3.plugins.RxJavaPlugins; import nu.marginalia.service.id.ServiceId; -import nu.marginalia.client.exception.NetworkException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.net.SocketTimeoutException; -import java.net.UnknownHostException; - /** Each main class of a service should extend this class. * They must also invoke init() in their main method. */ @@ -25,29 +19,8 @@ public abstract class MainClass { } public MainClass() { - RxJavaPlugins.setErrorHandler(this::handleError); } - protected void handleError(Throwable ex) { - if (ex instanceof UndeliverableException) { - ex = ex.getCause(); - } - - if (ex instanceof SocketTimeoutException) { - logger.warn("SocketTimeoutException"); - } - else if (ex instanceof UnknownHostException) { - logger.warn("UnknownHostException"); - } - else if (ex instanceof NetworkException) { - logger.warn("NetworkException", ex); - } - else { - logger.error("Uncaught exception", ex); - } - } - - protected static void init(ServiceId id, String... args) { System.setProperty("log4j2.isThreadContextMapInheritable", "true"); System.setProperty("isThreadContextMapInheritable", "true"); diff --git a/code/common/service/src/main/java/nu/marginalia/service/ProcessMainClass.java b/code/common/service/java/nu/marginalia/service/ProcessMainClass.java similarity index 100% rename from code/common/service/src/main/java/nu/marginalia/service/ProcessMainClass.java rename to code/common/service/java/nu/marginalia/service/ProcessMainClass.java diff --git a/code/common/service/src/main/java/nu/marginalia/service/control/FakeServiceHeartbeat.java b/code/common/service/java/nu/marginalia/service/control/FakeServiceHeartbeat.java similarity index 100% rename from code/common/service/src/main/java/nu/marginalia/service/control/FakeServiceHeartbeat.java rename to code/common/service/java/nu/marginalia/service/control/FakeServiceHeartbeat.java diff --git a/code/common/service/src/main/java/nu/marginalia/service/control/ServiceEventLog.java b/code/common/service/java/nu/marginalia/service/control/ServiceEventLog.java similarity index 100% rename from code/common/service/src/main/java/nu/marginalia/service/control/ServiceEventLog.java rename to code/common/service/java/nu/marginalia/service/control/ServiceEventLog.java diff --git a/code/common/service/src/main/java/nu/marginalia/service/control/ServiceHeartbeat.java b/code/common/service/java/nu/marginalia/service/control/ServiceHeartbeat.java similarity index 100% rename from code/common/service/src/main/java/nu/marginalia/service/control/ServiceHeartbeat.java rename to code/common/service/java/nu/marginalia/service/control/ServiceHeartbeat.java diff --git a/code/common/service/src/main/java/nu/marginalia/service/control/ServiceHeartbeatImpl.java b/code/common/service/java/nu/marginalia/service/control/ServiceHeartbeatImpl.java similarity index 99% rename from code/common/service/src/main/java/nu/marginalia/service/control/ServiceHeartbeatImpl.java rename to code/common/service/java/nu/marginalia/service/control/ServiceHeartbeatImpl.java index 800d6712..4a6f1c71 100644 --- a/code/common/service/src/main/java/nu/marginalia/service/control/ServiceHeartbeatImpl.java +++ b/code/common/service/java/nu/marginalia/service/control/ServiceHeartbeatImpl.java @@ -156,6 +156,8 @@ public class ServiceHeartbeatImpl implements ServiceHeartbeat { stmt.executeUpdate(); } } + + dataSource.close(); } } diff --git a/code/common/service/src/main/java/nu/marginalia/service/control/ServiceTaskHeartbeat.java b/code/common/service/java/nu/marginalia/service/control/ServiceTaskHeartbeat.java similarity index 100% rename from code/common/service/src/main/java/nu/marginalia/service/control/ServiceTaskHeartbeat.java rename to code/common/service/java/nu/marginalia/service/control/ServiceTaskHeartbeat.java diff --git a/code/common/service/src/main/java/nu/marginalia/service/control/ServiceTaskHeartbeatImpl.java b/code/common/service/java/nu/marginalia/service/control/ServiceTaskHeartbeatImpl.java similarity index 100% rename from code/common/service/src/main/java/nu/marginalia/service/control/ServiceTaskHeartbeatImpl.java rename to code/common/service/java/nu/marginalia/service/control/ServiceTaskHeartbeatImpl.java diff --git a/code/common/service/src/main/java/nu/marginalia/service/module/DatabaseModule.java b/code/common/service/java/nu/marginalia/service/module/DatabaseModule.java similarity index 86% rename from code/common/service/src/main/java/nu/marginalia/service/module/DatabaseModule.java rename to code/common/service/java/nu/marginalia/service/module/DatabaseModule.java index aa8b1203..15a70e57 100644 --- a/code/common/service/src/main/java/nu/marginalia/service/module/DatabaseModule.java +++ b/code/common/service/java/nu/marginalia/service/module/DatabaseModule.java @@ -6,7 +6,7 @@ import com.google.inject.Singleton; import com.zaxxer.hikari.HikariConfig; import com.zaxxer.hikari.HikariDataSource; import lombok.SneakyThrows; -import nu.marginalia.service.ServiceHomeNotConfiguredException; +import nu.marginalia.WmsaHome; import org.flywaydb.core.Flyway; import org.mariadb.jdbc.Driver; import org.slf4j.Logger; @@ -17,7 +17,6 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.time.Duration; -import java.util.Optional; import java.util.Properties; public class DatabaseModule extends AbstractModule { @@ -51,7 +50,7 @@ public class DatabaseModule extends AbstractModule { } private Properties loadDbProperties() { - Path propDir = getHomePath().resolve("conf/db.properties"); + Path propDir = WmsaHome.getHomePath().resolve("conf/db.properties"); if (!Files.isRegularFile(propDir)) { throw new IllegalStateException("Database properties file " + propDir + " does not exist"); } @@ -72,17 +71,6 @@ public class DatabaseModule extends AbstractModule { } - public static Path getHomePath() { - var retStr = Optional.ofNullable(System.getenv("WMSA_HOME")).orElse("/var/lib/wmsa"); - - var ret = Path.of(retStr); - if (!Files.isDirectory(ret)) { - throw new ServiceHomeNotConfiguredException("Could not find WMSA_HOME, either set environment variable or ensure /var/lib/wmsa exists"); - } - return ret; - } - - @SneakyThrows @Singleton @Provides @@ -97,7 +85,6 @@ public class DatabaseModule extends AbstractModule { try { HikariConfig config = new HikariConfig(); - config.setJdbcUrl(connStr); config.setUsername(dbProperties.getProperty(DB_USER_KEY)); config.setPassword(dbProperties.getProperty(DB_PASS_KEY)); diff --git a/code/common/service/src/main/java/nu/marginalia/service/module/ServiceConfiguration.java b/code/common/service/java/nu/marginalia/service/module/ServiceConfiguration.java similarity index 80% rename from code/common/service/src/main/java/nu/marginalia/service/module/ServiceConfiguration.java rename to code/common/service/java/nu/marginalia/service/module/ServiceConfiguration.java index 6e2b3399..e64c88a8 100644 --- a/code/common/service/src/main/java/nu/marginalia/service/module/ServiceConfiguration.java +++ b/code/common/service/java/nu/marginalia/service/module/ServiceConfiguration.java @@ -10,15 +10,14 @@ import java.util.UUID; * * @param serviceId - service descriptor * @param node - always 0 for now, for future service partitioning - * @param host - the bind address of the service - * @param port - main port of the service + * @param bindAddress - the bind address of the service * @param metricsPort - prometheus metrics server port * @param instanceUuid - unique identifier for this instance of the service */ public record ServiceConfiguration(ServiceId serviceId, int node, - String host, - int port, + String bindAddress, + String externalAddress, int metricsPort, UUID instanceUuid) { public String serviceName() { diff --git a/code/common/service/java/nu/marginalia/service/module/ServiceConfigurationModule.java b/code/common/service/java/nu/marginalia/service/module/ServiceConfigurationModule.java new file mode 100644 index 00000000..8ed7f45c --- /dev/null +++ b/code/common/service/java/nu/marginalia/service/module/ServiceConfigurationModule.java @@ -0,0 +1,88 @@ +package nu.marginalia.service.module; + +import com.google.inject.AbstractModule; +import com.google.inject.name.Names; +import nu.marginalia.service.id.ServiceId; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Objects; +import java.util.UUID; + +public class ServiceConfigurationModule extends AbstractModule { + private final ServiceId id; + private static final Logger logger = LoggerFactory.getLogger(ServiceConfigurationModule.class); + + public ServiceConfigurationModule(ServiceId id) { + this.id = id; + } + + public void configure() { + int node = getNode(); + + var configObject = new ServiceConfiguration(id, + node, + getBindAddress(), + getExternalHost(), + getPrometheusPort(), + UUID.randomUUID() + ); + + logger.info("Service configuration: {}", configObject); + + bind(Integer.class).annotatedWith(Names.named("wmsa-system-node")).toInstance(node); + bind(ServiceConfiguration.class).toInstance(configObject); + } + + private int getPrometheusPort() { + String prometheusPortEnv = System.getenv("WMSA_PROMETHEUS_PORT"); + + if (prometheusPortEnv != null) { + return Integer.parseInt(prometheusPortEnv); + } + + return 7000; + } + + private int getNode() { + String nodeEnv = Objects.requireNonNullElse(System.getenv("WMSA_SERVICE_NODE"), "0"); + + return Integer.parseInt(nodeEnv); + } + + /** Get the external host for the service. This is announced via the service registry, + * and should be an IP address or hostname that resolves to this machine */ + private String getExternalHost() { + // Check for an environment variable override + String configuredValue; + if (null != (configuredValue = System.getenv("SERVICE_HOST"))) { + return configuredValue; + } + + // Check for a system property override + if (null != (configuredValue = System.getProperty("service.host"))) { + return configuredValue; + } + + // If we're in docker, we'll use the hostname + if (Boolean.getBoolean("service.useDockerHostname")) { + return System.getenv("HOSTNAME"); + } + + // If we've not been told about a host, and we're not in docker, we'll fall back to localhost + // and hope the operator's remembered to enable random port assignment via zookeeper + return "127.0.0.1"; + } + + /** Get the bind address for the service. This is the address that the service will listen on. + */ + private String getBindAddress() { + String configuredValue = System.getProperty("service.bind-address"); + if (configuredValue != null) { + return configuredValue; + } + + return "127.0.0.1"; + } + +} diff --git a/code/common/service/src/main/java/nu/marginalia/service/server/BaseServiceParams.java b/code/common/service/java/nu/marginalia/service/server/BaseServiceParams.java similarity index 86% rename from code/common/service/src/main/java/nu/marginalia/service/server/BaseServiceParams.java rename to code/common/service/java/nu/marginalia/service/server/BaseServiceParams.java index 9db8d82e..b1661143 100644 --- a/code/common/service/src/main/java/nu/marginalia/service/server/BaseServiceParams.java +++ b/code/common/service/java/nu/marginalia/service/server/BaseServiceParams.java @@ -5,6 +5,7 @@ import com.google.inject.Singleton; import nu.marginalia.mq.MessageQueueFactory; import nu.marginalia.service.control.ServiceEventLog; import nu.marginalia.service.control.ServiceHeartbeatImpl; +import nu.marginalia.service.discovery.ServiceRegistryIf; import nu.marginalia.service.module.ServiceConfiguration; /** This class exists to reduce Service boilerplate */ @@ -15,19 +16,23 @@ public class BaseServiceParams { public final MetricsServer metricsServer; public final ServiceHeartbeatImpl heartbeat; public final ServiceEventLog eventLog; + public final ServiceRegistryIf serviceRegistry; public final MessageQueueFactory messageQueueInboxFactory; + @Inject public BaseServiceParams(ServiceConfiguration configuration, Initialization initialization, MetricsServer metricsServer, ServiceHeartbeatImpl heartbeat, ServiceEventLog eventLog, + ServiceRegistryIf serviceRegistry, MessageQueueFactory messageQueueInboxFactory) { this.configuration = configuration; this.initialization = initialization; this.metricsServer = metricsServer; this.heartbeat = heartbeat; this.eventLog = eventLog; + this.serviceRegistry = serviceRegistry; this.messageQueueInboxFactory = messageQueueInboxFactory; } } diff --git a/code/common/service/src/main/java/nu/marginalia/service/server/Initialization.java b/code/common/service/java/nu/marginalia/service/server/Initialization.java similarity index 85% rename from code/common/service/src/main/java/nu/marginalia/service/server/Initialization.java rename to code/common/service/java/nu/marginalia/service/server/Initialization.java index e75db6fe..717b49cf 100644 --- a/code/common/service/src/main/java/nu/marginalia/service/server/Initialization.java +++ b/code/common/service/java/nu/marginalia/service/server/Initialization.java @@ -8,9 +8,14 @@ import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.List; +/** A class for delayed initialization of services. + *

+ * This is useful for tasks that need to be performed after the service has been + * fully initialized, such as registering with a service registry. + */ @Singleton public class Initialization { - boolean initialized; + private boolean initialized; private final Logger logger = LoggerFactory.getLogger(getClass()); private final List callbacks = new ArrayList<>(); @@ -25,7 +30,6 @@ public class Initialization { logger.info("Initialized"); initialized = true; notifyAll(); - } callbacks.forEach(Runnable::run); diff --git a/code/common/service/src/main/java/nu/marginalia/service/server/MetricsServer.java b/code/common/service/java/nu/marginalia/service/server/MetricsServer.java similarity index 100% rename from code/common/service/src/main/java/nu/marginalia/service/server/MetricsServer.java rename to code/common/service/java/nu/marginalia/service/server/MetricsServer.java diff --git a/code/common/service/src/main/java/nu/marginalia/service/server/NodeStatusWatcher.java b/code/common/service/java/nu/marginalia/service/server/NodeStatusWatcher.java similarity index 100% rename from code/common/service/src/main/java/nu/marginalia/service/server/NodeStatusWatcher.java rename to code/common/service/java/nu/marginalia/service/server/NodeStatusWatcher.java diff --git a/code/common/service/src/main/java/nu/marginalia/service/server/RateLimiter.java b/code/common/service/java/nu/marginalia/service/server/RateLimiter.java similarity index 74% rename from code/common/service/src/main/java/nu/marginalia/service/server/RateLimiter.java rename to code/common/service/java/nu/marginalia/service/server/RateLimiter.java index f9de1cb5..3e6b9c21 100644 --- a/code/common/service/src/main/java/nu/marginalia/service/server/RateLimiter.java +++ b/code/common/service/java/nu/marginalia/service/server/RateLimiter.java @@ -3,12 +3,9 @@ package nu.marginalia.service.server; import io.github.bucket4j.Bandwidth; import io.github.bucket4j.Bucket; import io.github.bucket4j.Refill; -import io.reactivex.rxjava3.schedulers.Schedulers; -import nu.marginalia.client.Context; import java.time.Duration; import java.util.Map; -import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; @@ -23,7 +20,18 @@ public class RateLimiter { this.capacity = capacity; this.refillRate = refillRate; - Schedulers.io().schedulePeriodicallyDirect(this::cleanIdleBuckets, 30, 30, TimeUnit.MINUTES); + Thread.ofPlatform() + .name("rate-limiter-cleaner") + .start(() -> { + while (true) { + cleanIdleBuckets(); + try { + TimeUnit.MINUTES.sleep(30); + } catch (InterruptedException e) { + break; + } + } + }); } @@ -48,19 +56,11 @@ public class RateLimiter { bucketMap.clear(); } - public boolean isAllowed(Context ctx) { - if (!ctx.isPublic()) { // Internal server->server request - return true; - } - - return bucketMap.computeIfAbsent(ctx.getContextId(), - (ip) -> createBucket()).tryConsume(1); - } - public boolean isAllowed() { return bucketMap.computeIfAbsent("any", (ip) -> createBucket()).tryConsume(1); } + private Bucket createBucket() { var refill = Refill.greedy(1, Duration.ofSeconds(refillRate)); var bw = Bandwidth.classic(capacity, refill); diff --git a/code/common/service/src/main/java/nu/marginalia/service/server/Service.java b/code/common/service/java/nu/marginalia/service/server/Service.java similarity index 51% rename from code/common/service/src/main/java/nu/marginalia/service/server/Service.java rename to code/common/service/java/nu/marginalia/service/server/Service.java index 63552cf7..dcc514a6 100644 --- a/code/common/service/src/main/java/nu/marginalia/service/server/Service.java +++ b/code/common/service/java/nu/marginalia/service/server/Service.java @@ -1,10 +1,14 @@ package nu.marginalia.service.server; +import io.grpc.*; +import io.grpc.netty.shaded.io.grpc.netty.NettyServerBuilder; +import io.grpc.netty.shaded.io.netty.channel.nio.NioEventLoopGroup; +import io.grpc.netty.shaded.io.netty.channel.socket.nio.NioServerSocketChannel; import io.prometheus.client.Counter; -import nu.marginalia.client.Context; -import nu.marginalia.client.exception.MessagingException; +import lombok.SneakyThrows; import nu.marginalia.mq.inbox.*; -import nu.marginalia.service.server.mq.MqRequest; +import nu.marginalia.service.discovery.property.*; +import nu.marginalia.service.id.ServiceId; import nu.marginalia.service.server.mq.ServiceMqSubscription; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -14,7 +18,14 @@ import spark.Request; import spark.Response; import spark.Spark; +import java.net.InetSocketAddress; +import java.util.List; import java.util.Optional; +import java.util.concurrent.Executor; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.atomic.AtomicInteger; public class Service { private final Logger logger = LoggerFactory.getLogger(getClass()); @@ -42,16 +53,39 @@ public class Service { protected final MqInboxIf messageQueueInbox; private final int node; + @SneakyThrows public Service(BaseServiceParams params, - Runnable configureStaticFiles - ) { + Runnable configureStaticFiles, + ServicePartition partition, + List grpcServices) { + this.initialization = params.initialization; var config = params.configuration; node = config.node(); + if (config.serviceId() == ServiceId.Control) { + // Special case for first boot, since the control service + // owns database migrations and so on, we need other processes + // to wait for this to be done before they start. This is + // only needed once. + params.serviceRegistry.declareFirstBoot(); + } + else { + params.serviceRegistry.waitForFirstBoot(); + } + String inboxName = config.serviceName(); logger.info("Inbox name: {}", inboxName); + var serviceRegistry = params.serviceRegistry; + + var restEndpoint = + serviceRegistry.registerService( + ServiceKey.forRest(config.serviceId(), config.node()), + config.instanceUuid(), + config.externalAddress() + ); + var mqInboxFactory = params.messageQueueInboxFactory; messageQueueInbox = mqInboxFactory.createSynchronousInbox(inboxName, config.node(), config.instanceUuid()); messageQueueInbox.subscribe(new ServiceMqSubscription(this)); @@ -61,50 +95,100 @@ public class Service { initialization.addCallback(params.heartbeat::start); initialization.addCallback(messageQueueInbox::start); initialization.addCallback(() -> params.eventLog.logEvent("SVC-INIT", serviceName + ":" + config.node())); + initialization.addCallback(() -> serviceRegistry.announceInstance(config.instanceUuid())); + + Thread.setDefaultUncaughtExceptionHandler((t, e) -> { + logger.error("Uncaught exception", e); + request_counter_err.labels(serviceName, Integer.toString(node)).inc(); + }); if (!initialization.isReady() && ! initialized ) { initialized = true; Spark.threadPool(32, 4, 60_000); - Spark.ipAddress(params.configuration.host()); - Spark.port(params.configuration.port()); - logger.info("{} Listening to {}:{}", getClass().getSimpleName(), - params.configuration.host(), - params.configuration.port()); + Spark.ipAddress(config.bindAddress()); + Spark.port(restEndpoint.port()); + + logger.info("{} Listening to {}:{} ({})", getClass().getSimpleName(), + params.configuration.bindAddress(), + restEndpoint.port(), + params.configuration.externalAddress()); configureStaticFiles.run(); Spark.before(this::auditRequestIn); Spark.before(this::filterPublicRequests); Spark.after(this::auditRequestOut); - Spark.exception(MessagingException.class, this::handleException); + // Live and ready endpoints Spark.get("/internal/ping", (rq,rp) -> "pong"); Spark.get("/internal/started", this::isInitialized); Spark.get("/internal/ready", this::isReady); - Spark.get("/public/who", (rq,rp) -> getClass().getSimpleName()); + + int port = params.serviceRegistry.requestPort(config.externalAddress(), new ServiceKey.Grpc<>("-", partition)); + + + int nThreads = Math.clamp(Runtime.getRuntime().availableProcessors() / 2, 2, 8); + + // Start the gRPC server + var grpcServerBuilder = NettyServerBuilder.forAddress(new InetSocketAddress(config.bindAddress(), port)) + .executor(namedExecutor("nettyExecutor", nThreads)) + .workerEventLoopGroup(new NioEventLoopGroup(nThreads, namedExecutor("Worker-ELG", nThreads))) + .bossEventLoopGroup(new NioEventLoopGroup(nThreads, namedExecutor("Boss-ELG", nThreads))) + .channelType(NioServerSocketChannel.class); + + for (var grpcService : grpcServices) { + var svc = grpcService.bindService(); + + params.serviceRegistry.registerService( + ServiceKey.forServiceDescriptor(svc.getServiceDescriptor(), partition), + config.instanceUuid(), + config.externalAddress() + ); + + grpcServerBuilder.addService(svc); + } + grpcServerBuilder.build().start(); } } + private ExecutorService namedExecutor(String name, int limit) { + return Executors.newFixedThreadPool( + limit, + new ThreadFactory() { + static final AtomicInteger threadNumber = new AtomicInteger(1); + @Override + public Thread newThread(Runnable r) { + var thread = new Thread(r, STR."\{name}[\{threadNumber.getAndIncrement()}]"); + thread.setDaemon(true); + return thread; + } + }); + } + + public Service(BaseServiceParams params, + ServicePartition partition, + List grpcServices) { + this(params, + Service::defaultSparkConfig, + partition, + grpcServices); + } + public Service(BaseServiceParams params) { - this(params, () -> { - // configureStaticFiles can't be an overridable method in Service because it may - // need to depend on parameters to the constructor, and super-constructors - // must run first - Spark.staticFiles.expireTime(3600); - Spark.staticFiles.header("Cache-control", "public"); - }); + this(params, + Service::defaultSparkConfig, + ServicePartition.any(), + List.of()); } - @MqRequest(endpoint = "SVC-READY") - public boolean mqIsReady() { - return initialization.isReady(); - } - - @MqRequest(endpoint = "SVC-PING") - public String mqPing() { - return "pong"; + private static void defaultSparkConfig() { + // configureStaticFiles can't be an overridable method in Service because it may + // need to depend on parameters to the constructor, and super-constructors + // must run first + Spark.staticFiles.expireTime(3600); + Spark.staticFiles.header("Cache-control", "public"); } private void filterPublicRequests(Request request, Response response) { @@ -149,16 +233,10 @@ public class Service { } private void auditRequestIn(Request request, Response response) { - // Paint context - paintThreadName(request, "req:"); - request_counter.labels(serviceName, Integer.toString(node)).inc(); } private void auditRequestOut(Request request, Response response) { - - paintThreadName(request, "rsp:"); - if (response.status() < 400) { request_counter_good.labels(serviceName, Integer.toString(node)).inc(); } @@ -170,21 +248,6 @@ public class Service { } - private void paintThreadName(Request request, String prefix) { - var ctx = Context.fromRequest(request); - Thread.currentThread().setName(prefix + ctx.getContextId()); - } - - protected void handleException(Exception ex, Request request, Response response) { - request_counter_err.labels(serviceName, Integer.toString(node)).inc(); - if (ex instanceof MessagingException) { - logger.error("{} {}", ex.getClass().getSimpleName(), ex.getMessage()); - } - else { - logger.error("Uncaught exception", ex); - } - } - /** Log the request on the HTTP log */ protected void logRequest(Request request) { String url = request.pathInfo(); @@ -192,7 +255,7 @@ public class Service { url = url + "?" + request.queryString(); } - logger.info(httpMarker, "PUBLIC {}: {} {}", Context.fromRequest(request).getContextId(), request.requestMethod(), url); + logger.info(httpMarker, "PUBLIC: {} {}", request.requestMethod(), url); } /** Log the response on the HTTP log */ diff --git a/code/common/service/src/main/java/nu/marginalia/service/server/StaticResources.java b/code/common/service/java/nu/marginalia/service/server/StaticResources.java similarity index 97% rename from code/common/service/src/main/java/nu/marginalia/service/server/StaticResources.java rename to code/common/service/java/nu/marginalia/service/server/StaticResources.java index ff684525..71460ecc 100644 --- a/code/common/service/src/main/java/nu/marginalia/service/server/StaticResources.java +++ b/code/common/service/java/nu/marginalia/service/server/StaticResources.java @@ -10,10 +10,6 @@ import spark.staticfiles.MimeType; import java.io.FileNotFoundException; import java.time.LocalDateTime; import java.time.ZoneOffset; -import java.util.Map; -import java.util.TreeMap; - -import static java.util.Map.entry; public class StaticResources { private final long startTime = LocalDateTime.now().toEpochSecond(ZoneOffset.UTC); diff --git a/code/common/service/src/main/java/nu/marginalia/service/server/mq/MqRequest.java b/code/common/service/java/nu/marginalia/service/server/mq/MqRequest.java similarity index 100% rename from code/common/service/src/main/java/nu/marginalia/service/server/mq/MqRequest.java rename to code/common/service/java/nu/marginalia/service/server/mq/MqRequest.java diff --git a/code/common/service/src/main/java/nu/marginalia/service/server/mq/ServiceMqSubscription.java b/code/common/service/java/nu/marginalia/service/server/mq/ServiceMqSubscription.java similarity index 100% rename from code/common/service/src/main/java/nu/marginalia/service/server/mq/ServiceMqSubscription.java rename to code/common/service/java/nu/marginalia/service/server/mq/ServiceMqSubscription.java diff --git a/code/common/service/readme.md b/code/common/service/readme.md index 91741dc0..14abfb07 100644 --- a/code/common/service/readme.md +++ b/code/common/service/readme.md @@ -21,8 +21,7 @@ public class FoobarMain extends MainClass { Injector injector = Guice.createInjector( new FoobarModule(), /* optional custom bindings go here */ new DatabaseModule(), - new ConfigurationModule(SearchServiceDescriptors.descriptors, - ServiceId.Foobar)); + new ConfigurationModule(ServiceId.Foobar)); injector.getInstance(FoobarMain.class); @@ -40,7 +39,7 @@ public class FoobarService extends Service { @Inject public FoobarService(BaseServiceParams params) { - super(params); + super(params, List.of(/* grpc services */)); // set up Spark endpoints here } @@ -51,5 +50,5 @@ Further the new service needs to be added to the `ServiceId` enum in [service-di ## Central Classes -* [MainClass](src/main/java/nu/marginalia/service/MainClass.java) bootstraps all executables -* [Service](src/main/java/nu/marginalia/service/server/Service.java) base class for all services. \ No newline at end of file +* [MainClass](java/nu/marginalia/service/MainClass.java) bootstraps all executables +* [Service](java/nu/marginalia/service/server/Service.java) base class for all services. \ No newline at end of file diff --git a/code/common/service/resources/log4j2-json.xml b/code/common/service/resources/log4j2-json.xml new file mode 100644 index 00000000..dd2fc6d2 --- /dev/null +++ b/code/common/service/resources/log4j2-json.xml @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/code/common/service/src/main/resources/log4j2-prod.xml b/code/common/service/resources/log4j2-prod.xml similarity index 96% rename from code/common/service/src/main/resources/log4j2-prod.xml rename to code/common/service/resources/log4j2-prod.xml index 43150a2e..01c914ac 100644 --- a/code/common/service/src/main/resources/log4j2-prod.xml +++ b/code/common/service/resources/log4j2-prod.xml @@ -21,6 +21,8 @@ + + diff --git a/code/common/service/src/main/resources/log4j2-test.xml b/code/common/service/resources/log4j2-test.xml similarity index 96% rename from code/common/service/src/main/resources/log4j2-test.xml rename to code/common/service/resources/log4j2-test.xml index 3158f632..8fd0b262 100644 --- a/code/common/service/src/main/resources/log4j2-test.xml +++ b/code/common/service/resources/log4j2-test.xml @@ -20,6 +20,8 @@ + + diff --git a/code/common/service/src/main/java/nu/marginalia/service/module/ServiceConfigurationModule.java b/code/common/service/src/main/java/nu/marginalia/service/module/ServiceConfigurationModule.java deleted file mode 100644 index 5ea41d47..00000000 --- a/code/common/service/src/main/java/nu/marginalia/service/module/ServiceConfigurationModule.java +++ /dev/null @@ -1,67 +0,0 @@ -package nu.marginalia.service.module; - -import com.google.inject.AbstractModule; -import com.google.inject.name.Names; -import nu.marginalia.service.descriptor.ServiceDescriptors; -import nu.marginalia.service.id.ServiceId; - -import java.util.Objects; -import java.util.UUID; - -public class ServiceConfigurationModule extends AbstractModule { - private final ServiceDescriptors descriptors; - private final ServiceId id; - - public ServiceConfigurationModule(ServiceDescriptors descriptors, ServiceId id) { - this.descriptors = descriptors; - this.id = id; - } - - public void configure() { - bind(ServiceDescriptors.class).toInstance(descriptors); - - int node = getNode(); - - var configObject = new ServiceConfiguration(id, - node, - getHost(), - getBasePort(), - getPrometheusPort(), - UUID.randomUUID() - ); - - bind(Integer.class).annotatedWith(Names.named("wmsa-system-node")).toInstance(node); - bind(ServiceConfiguration.class).toInstance(configObject); - } - - private int getBasePort() { - String port = System.getenv("WMSA_SERVICE_PORT"); - - if (port != null) { - return Integer.parseInt(port); - } - - return 80; - } - - private int getPrometheusPort() { - String prometheusPortEnv = System.getenv("WMSA_PROMETHEUS_PORT"); - - if (prometheusPortEnv != null) { - return Integer.parseInt(prometheusPortEnv); - } - - return 7000; - } - - private int getNode() { - String nodeEnv = Objects.requireNonNullElse(System.getenv("WMSA_SERVICE_NODE"), "0"); - - return Integer.parseInt(nodeEnv); - } - - private String getHost() { - return System.getProperty("service-host", "127.0.0.1"); - } - -} diff --git a/code/api/executor-api/build.gradle b/code/execution/api/build.gradle similarity index 84% rename from code/api/executor-api/build.gradle rename to code/execution/api/build.gradle index 257c0285..b31d87d3 100644 --- a/code/api/executor-api/build.gradle +++ b/code/execution/api/build.gradle @@ -4,39 +4,41 @@ plugins { id "com.google.protobuf" version "0.9.4" } +jar.archiveBaseName = 'execution-api' + java { toolchain { languageVersion.set(JavaLanguageVersion.of(21)) } } + apply from: "$rootProject.projectDir/protobuf.gradle" + sourceSets { main { proto { - srcDir 'src/main/protobuf' + srcDir 'protobuf' } } } - +apply from: "$rootProject.projectDir/srcsets.gradle" dependencies { implementation project(':code:common:model') - implementation project(':code:api:index-api') + implementation project(':code:index:api') implementation project(':code:common:config') implementation project(':code:common:db') implementation project(':code:libraries:message-queue') implementation project(':code:common:service-discovery') - implementation project(':code:common:service-client') implementation libs.bundles.slf4j implementation libs.prometheus implementation libs.notnull implementation libs.guice - implementation libs.rxjava implementation libs.protobuf implementation libs.bundles.grpc implementation libs.javax.annotation diff --git a/code/execution/api/java/nu/marginalia/executor/client/ExecutorClient.java b/code/execution/api/java/nu/marginalia/executor/client/ExecutorClient.java new file mode 100644 index 00000000..91f890fa --- /dev/null +++ b/code/execution/api/java/nu/marginalia/executor/client/ExecutorClient.java @@ -0,0 +1,179 @@ +package nu.marginalia.executor.client; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import nu.marginalia.executor.model.ActorRunState; +import nu.marginalia.executor.model.ActorRunStates; +import nu.marginalia.executor.storage.FileStorageContent; +import nu.marginalia.executor.storage.FileStorageFile; +import nu.marginalia.executor.upload.UploadDirContents; +import nu.marginalia.executor.upload.UploadDirItem; +import nu.marginalia.functions.execution.api.*; +import nu.marginalia.service.client.GrpcChannelPoolFactory; +import nu.marginalia.service.client.GrpcMultiNodeChannelPool; +import nu.marginalia.service.discovery.ServiceRegistryIf; +import nu.marginalia.service.discovery.property.ServiceKey; +import nu.marginalia.service.discovery.property.ServicePartition; +import nu.marginalia.service.id.ServiceId; +import nu.marginalia.storage.model.FileStorageId; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.OutputStream; +import java.net.URISyntaxException; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.List; + +import static nu.marginalia.functions.execution.api.ExecutorApiGrpc.*; + +@Singleton +public class ExecutorClient { + private final GrpcMultiNodeChannelPool channelPool; + private static final Logger logger = LoggerFactory.getLogger(ExecutorClient.class); + private final ServiceRegistryIf registry; + + @Inject + public ExecutorClient(ServiceRegistryIf registry, + GrpcChannelPoolFactory grpcChannelPoolFactory) + { + this.registry = registry; + this.channelPool = grpcChannelPoolFactory + .createMulti( + ServiceKey.forGrpcApi(ExecutorApiGrpc.class, ServicePartition.multi()), + ExecutorApiGrpc::newBlockingStub); + } + + public void startFsm(int node, String actorName) { + channelPool.call(ExecutorApiBlockingStub::startFsm) + .forNode(node) + .run(RpcFsmName.newBuilder() + .setActorName(actorName) + .build()); + + } + + public void stopFsm(int node, String actorName) { + channelPool.call(ExecutorApiBlockingStub::stopFsm) + .forNode(node) + .run(RpcFsmName.newBuilder() + .setActorName(actorName) + .build()); + } + + public void stopProcess(int node, String id) { + channelPool.call(ExecutorApiBlockingStub::stopProcess) + .forNode(node) + .run(RpcProcessId.newBuilder() + .setProcessId(id) + .build()); + + } + + public void calculateAdjacencies(int node) { + channelPool.call(ExecutorApiBlockingStub::calculateAdjacencies) + .forNode(node) + .run(Empty.getDefaultInstance()); + } + + + + public void downloadSampleData(int node, String sampleSet) { + channelPool.call(ExecutorApiBlockingStub::downloadSampleData) + .forNode(node) + .run(RpcDownloadSampleData.newBuilder() + .setSampleSet(sampleSet) + .build()); + } + + public void restoreBackup(int nodeId, FileStorageId toLoad) { + channelPool.call(ExecutorApiBlockingStub::restoreBackup) + .forNode(nodeId) + .run(RpcFileStorageId.newBuilder() + .setFileStorageId(toLoad.id()) + .build()); + } + + public ActorRunStates getActorStates(int node) { + try { + var rs = channelPool.call(ExecutorApiBlockingStub::getActorStates) + .forNode(node) + .run(Empty.getDefaultInstance()); + var states = rs.getActorRunStatesList().stream() + .map(r -> new ActorRunState( + r.getActorName(), + r.getState(), + r.getActorDescription(), + r.getStateDescription(), + r.getTerminal(), + r.getCanStart()) + ) + .toList(); + + return new ActorRunStates(node, states); + } + catch (Exception ex) { + logger.warn("Failed to get actor states", ex); + + // Return an empty list of states to avoid breaking the UI when a node is down + return new ActorRunStates(node, List.of()); + } + } + + public UploadDirContents listSideloadDir(int node) { + try { + var rs = channelPool.call(ExecutorApiBlockingStub::listSideloadDir) + .forNode(node) + .run(Empty.getDefaultInstance()); + var items = rs.getEntriesList().stream() + .map(i -> new UploadDirItem(i.getName(), i.getLastModifiedTime(), i.getIsDirectory(), i.getSize())) + .toList(); + return new UploadDirContents(rs.getPath(), items); + } + catch (Exception ex) { + logger.warn("Failed to list sideload dir", ex); + + // Return an empty list of items to avoid breaking the UI when a node is down + return new UploadDirContents("", List.of()); + } + } + + public FileStorageContent listFileStorage(int node, FileStorageId fileId) { + try { + var rs = channelPool.call(ExecutorApiBlockingStub::listFileStorage) + .forNode(node) + .run(RpcFileStorageId.newBuilder() + .setFileStorageId(fileId.id()) + .build() + ); + + return new FileStorageContent(rs.getEntriesList().stream() + .map(e -> new FileStorageFile(e.getName(), e.getSize(), e.getLastModifiedTime())) + .toList()); + } + catch (Exception ex) { + logger.warn("Failed to list file storage", ex); + + // Return an empty list of items to avoid breaking the UI when a node is down + return new FileStorageContent(List.of()); + } + } + + public void transferFile(int node, FileStorageId fileId, String path, OutputStream destOutputStream) { + String uriPath = STR."/transfer/file/\{fileId.id()}"; + String uriQuery = STR."path=\{URLEncoder.encode(path, StandardCharsets.UTF_8)}"; + + var service = registry.getEndpoints(ServiceKey.forRest(ServiceId.Executor, node)) + .stream().findFirst().orElseThrow(); + + try (var urlStream = service.endpoint().toURL(uriPath, uriQuery).openStream()) { + urlStream.transferTo(destOutputStream); + } + catch (IOException | URISyntaxException ex) { + throw new RuntimeException(ex); + } + } + +} diff --git a/code/execution/api/java/nu/marginalia/executor/client/ExecutorCrawlClient.java b/code/execution/api/java/nu/marginalia/executor/client/ExecutorCrawlClient.java new file mode 100644 index 00000000..b037702d --- /dev/null +++ b/code/execution/api/java/nu/marginalia/executor/client/ExecutorCrawlClient.java @@ -0,0 +1,80 @@ +package nu.marginalia.executor.client; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import nu.marginalia.functions.execution.api.*; +import nu.marginalia.service.client.GrpcChannelPoolFactory; +import nu.marginalia.service.client.GrpcMultiNodeChannelPool; +import nu.marginalia.service.discovery.property.ServiceKey; +import nu.marginalia.service.discovery.property.ServicePartition; +import nu.marginalia.storage.model.FileStorageId; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.List; + +import static nu.marginalia.functions.execution.api.ExecutorCrawlApiGrpc.*; + +@Singleton +public class ExecutorCrawlClient { + private final GrpcMultiNodeChannelPool channelPool; + private static final Logger logger = LoggerFactory.getLogger(ExecutorCrawlClient.class); + + @Inject + public ExecutorCrawlClient(GrpcChannelPoolFactory grpcChannelPoolFactory) + { + this.channelPool = grpcChannelPoolFactory + .createMulti( + ServiceKey.forGrpcApi(ExecutorCrawlApiGrpc.class, ServicePartition.multi()), + ExecutorCrawlApiGrpc::newBlockingStub); + } + + public void triggerCrawl(int node, FileStorageId fid) { + channelPool.call(ExecutorCrawlApiBlockingStub::triggerCrawl) + .forNode(node) + .run(RpcFileStorageId.newBuilder() + .setFileStorageId(fid.id()) + .build()); + } + + public void triggerRecrawl(int node, FileStorageId fid) { + channelPool.call(ExecutorCrawlApiBlockingStub::triggerRecrawl) + .forNode(node) + .run(RpcFileStorageId.newBuilder() + .setFileStorageId(fid.id()) + .build()); + } + + public void triggerConvert(int node, FileStorageId fid) { + channelPool.call(ExecutorCrawlApiBlockingStub::triggerConvert) + .forNode(node) + .run(RpcFileStorageId.newBuilder() + .setFileStorageId(fid.id()) + .build()); + } + + public void triggerConvertAndLoad(int node, FileStorageId fid) { + channelPool.call(ExecutorCrawlApiBlockingStub::triggerConvertAndLoad) + .forNode(node) + .run(RpcFileStorageId.newBuilder() + .setFileStorageId(fid.id()) + .build()); + } + + public void loadProcessedData(int node, List ids) { + channelPool.call(ExecutorCrawlApiBlockingStub::loadProcessedData) + .forNode(node) + .run(RpcFileStorageIds.newBuilder() + .addAllFileStorageIds(ids.stream().map(FileStorageId::id).toList()) + .build()); + } + + public void createCrawlSpecFromDownload(int node, String description, String url) { + channelPool.call(ExecutorCrawlApiBlockingStub::createCrawlSpecFromDownload) + .forNode(node) + .run(RpcCrawlSpecFromDownload.newBuilder() + .setDescription(description) + .setUrl(url) + .build()); + } + +} diff --git a/code/execution/api/java/nu/marginalia/executor/client/ExecutorExportClient.java b/code/execution/api/java/nu/marginalia/executor/client/ExecutorExportClient.java new file mode 100644 index 00000000..a3286a1b --- /dev/null +++ b/code/execution/api/java/nu/marginalia/executor/client/ExecutorExportClient.java @@ -0,0 +1,74 @@ +package nu.marginalia.executor.client; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import nu.marginalia.functions.execution.api.Empty; +import nu.marginalia.functions.execution.api.ExecutorExportApiGrpc; +import nu.marginalia.functions.execution.api.RpcExportSampleData; +import nu.marginalia.functions.execution.api.RpcFileStorageId; +import nu.marginalia.service.client.GrpcChannelPoolFactory; +import nu.marginalia.service.client.GrpcMultiNodeChannelPool; +import nu.marginalia.service.discovery.property.ServiceKey; +import nu.marginalia.service.discovery.property.ServicePartition; +import nu.marginalia.storage.model.FileStorageId; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static nu.marginalia.functions.execution.api.ExecutorExportApiGrpc.ExecutorExportApiBlockingStub; + +@Singleton +public class ExecutorExportClient { + private final GrpcMultiNodeChannelPool channelPool; + private static final Logger logger = LoggerFactory.getLogger(ExecutorExportClient.class); + + @Inject + public ExecutorExportClient(GrpcChannelPoolFactory grpcChannelPoolFactory) + { + this.channelPool = grpcChannelPoolFactory + .createMulti( + ServiceKey.forGrpcApi(ExecutorExportApiGrpc.class, ServicePartition.multi()), + ExecutorExportApiGrpc::newBlockingStub); + } + + + public void exportAtags(int node, FileStorageId fid) { + channelPool.call(ExecutorExportApiBlockingStub::exportAtags) + .forNode(node) + .run(RpcFileStorageId.newBuilder() + .setFileStorageId(fid.id()) + .build()); + } + public void exportSampleData(int node, FileStorageId fid, int size, String name) { + channelPool.call(ExecutorExportApiBlockingStub::exportSampleData) + .forNode(node) + .run(RpcExportSampleData.newBuilder() + .setFileStorageId(fid.id()) + .setSize(size) + .setName(name) + .build()); + } + + public void exportRssFeeds(int node, FileStorageId fid) { + channelPool.call(ExecutorExportApiBlockingStub::exportRssFeeds) + .forNode(node) + .run(RpcFileStorageId.newBuilder() + .setFileStorageId(fid.id()) + .build()); + } + public void exportTermFrequencies(int node, FileStorageId fid) { + channelPool.call(ExecutorExportApiBlockingStub::exportTermFrequencies) + .forNode(node) + .run(RpcFileStorageId.newBuilder() + .setFileStorageId(fid.id()) + .build()); + } + + public void exportData(int node) { + channelPool.call(ExecutorExportApiBlockingStub::exportData) + .forNode(node) + .run(Empty.getDefaultInstance()); + } + + + +} diff --git a/code/api/executor-api/src/main/java/nu/marginalia/executor/client/ExecutorRemoteActorFactory.java b/code/execution/api/java/nu/marginalia/executor/client/ExecutorRemoteActorFactory.java similarity index 100% rename from code/api/executor-api/src/main/java/nu/marginalia/executor/client/ExecutorRemoteActorFactory.java rename to code/execution/api/java/nu/marginalia/executor/client/ExecutorRemoteActorFactory.java diff --git a/code/execution/api/java/nu/marginalia/executor/client/ExecutorSideloadClient.java b/code/execution/api/java/nu/marginalia/executor/client/ExecutorSideloadClient.java new file mode 100644 index 00000000..0848c009 --- /dev/null +++ b/code/execution/api/java/nu/marginalia/executor/client/ExecutorSideloadClient.java @@ -0,0 +1,71 @@ +package nu.marginalia.executor.client; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import nu.marginalia.functions.execution.api.*; +import nu.marginalia.service.client.GrpcChannelPoolFactory; +import nu.marginalia.service.client.GrpcMultiNodeChannelPool; +import nu.marginalia.service.discovery.property.ServiceKey; +import nu.marginalia.service.discovery.property.ServicePartition; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.file.Path; + +import static nu.marginalia.functions.execution.api.ExecutorSideloadApiGrpc.ExecutorSideloadApiBlockingStub; + +@Singleton +public class ExecutorSideloadClient { + private final GrpcMultiNodeChannelPool channelPool; + private static final Logger logger = LoggerFactory.getLogger(ExecutorSideloadClient.class); + + @Inject + public ExecutorSideloadClient(GrpcChannelPoolFactory grpcChannelPoolFactory) + { + this.channelPool = grpcChannelPoolFactory + .createMulti( + ServiceKey.forGrpcApi(ExecutorSideloadApiGrpc.class, ServicePartition.multi()), + ExecutorSideloadApiGrpc::newBlockingStub); + } + + + public void sideloadEncyclopedia(int node, Path sourcePath, String baseUrl) { + channelPool.call(ExecutorSideloadApiBlockingStub::sideloadEncyclopedia) + .forNode(node) + .run(RpcSideloadEncyclopedia.newBuilder() + .setBaseUrl(baseUrl) + .setSourcePath(sourcePath.toString()) + .build()); + } + + public void sideloadDirtree(int node, Path sourcePath) { + channelPool.call(ExecutorSideloadApiBlockingStub::sideloadDirtree) + .forNode(node) + .run(RpcSideloadDirtree.newBuilder() + .setSourcePath(sourcePath.toString()) + .build()); + } + public void sideloadReddit(int node, Path sourcePath) { + channelPool.call(ExecutorSideloadApiBlockingStub::sideloadReddit) + .forNode(node) + .run(RpcSideloadReddit.newBuilder() + .setSourcePath(sourcePath.toString()) + .build()); + } + public void sideloadWarc(int node, Path sourcePath) { + channelPool.call(ExecutorSideloadApiBlockingStub::sideloadWarc) + .forNode(node) + .run(RpcSideloadWarc.newBuilder() + .setSourcePath(sourcePath.toString()) + .build()); + } + + public void sideloadStackexchange(int node, Path sourcePath) { + channelPool.call(ExecutorSideloadApiBlockingStub::sideloadStackexchange) + .forNode(node) + .run(RpcSideloadStackexchange.newBuilder() + .setSourcePath(sourcePath.toString()) + .build()); + } + +} diff --git a/code/api/executor-api/src/main/java/nu/marginalia/executor/model/ActorRunState.java b/code/execution/api/java/nu/marginalia/executor/model/ActorRunState.java similarity index 100% rename from code/api/executor-api/src/main/java/nu/marginalia/executor/model/ActorRunState.java rename to code/execution/api/java/nu/marginalia/executor/model/ActorRunState.java diff --git a/code/api/executor-api/src/main/java/nu/marginalia/executor/model/ActorRunStates.java b/code/execution/api/java/nu/marginalia/executor/model/ActorRunStates.java similarity index 100% rename from code/api/executor-api/src/main/java/nu/marginalia/executor/model/ActorRunStates.java rename to code/execution/api/java/nu/marginalia/executor/model/ActorRunStates.java diff --git a/code/api/executor-api/src/main/java/nu/marginalia/executor/storage/FileStorageContent.java b/code/execution/api/java/nu/marginalia/executor/storage/FileStorageContent.java similarity index 100% rename from code/api/executor-api/src/main/java/nu/marginalia/executor/storage/FileStorageContent.java rename to code/execution/api/java/nu/marginalia/executor/storage/FileStorageContent.java diff --git a/code/api/executor-api/src/main/java/nu/marginalia/executor/storage/FileStorageFile.java b/code/execution/api/java/nu/marginalia/executor/storage/FileStorageFile.java similarity index 100% rename from code/api/executor-api/src/main/java/nu/marginalia/executor/storage/FileStorageFile.java rename to code/execution/api/java/nu/marginalia/executor/storage/FileStorageFile.java diff --git a/code/api/executor-api/src/main/java/nu/marginalia/executor/upload/UploadDirContents.java b/code/execution/api/java/nu/marginalia/executor/upload/UploadDirContents.java similarity index 100% rename from code/api/executor-api/src/main/java/nu/marginalia/executor/upload/UploadDirContents.java rename to code/execution/api/java/nu/marginalia/executor/upload/UploadDirContents.java diff --git a/code/api/executor-api/src/main/java/nu/marginalia/executor/upload/UploadDirItem.java b/code/execution/api/java/nu/marginalia/executor/upload/UploadDirItem.java similarity index 100% rename from code/api/executor-api/src/main/java/nu/marginalia/executor/upload/UploadDirItem.java rename to code/execution/api/java/nu/marginalia/executor/upload/UploadDirItem.java diff --git a/code/api/executor-api/src/main/protobuf/executor-api.proto b/code/execution/api/src/main/protobuf/executor-api.proto similarity index 93% rename from code/api/executor-api/src/main/protobuf/executor-api.proto rename to code/execution/api/src/main/protobuf/executor-api.proto index 2c2756f2..31cffe9b 100644 --- a/code/api/executor-api/src/main/protobuf/executor-api.proto +++ b/code/execution/api/src/main/protobuf/executor-api.proto @@ -1,39 +1,47 @@ syntax="proto3"; -package actorapi; -option java_package="nu.marginalia.executor.api"; +package nu.marginalia.functions.execution.api; + +option java_package="nu.marginalia.functions.execution.api"; option java_multiple_files=true; service ExecutorApi { rpc startFsm(RpcFsmName) returns (Empty) {} rpc stopFsm(RpcFsmName) returns (Empty) {} - rpc stopProcess(RpcProcessId) returns (Empty) {} + rpc getActorStates(Empty) returns (RpcActorRunStates) {} + rpc listSideloadDir(Empty) returns (RpcUploadDirContents) {} + rpc listFileStorage(RpcFileStorageId) returns (RpcFileStorageContent) {} + + rpc downloadSampleData(RpcDownloadSampleData) returns (Empty) {} + rpc calculateAdjacencies(Empty) returns (Empty) {} + rpc restoreBackup(RpcFileStorageId) returns (Empty) {} +} + +service ExecutorCrawlApi { rpc triggerCrawl(RpcFileStorageId) returns (Empty) {} rpc triggerRecrawl(RpcFileStorageId) returns (Empty) {} rpc triggerConvert(RpcFileStorageId) returns (Empty) {} rpc triggerConvertAndLoad(RpcFileStorageId) returns (Empty) {} rpc loadProcessedData(RpcFileStorageIds) returns (Empty) {} - rpc calculateAdjacencies(Empty) returns (Empty) {} + rpc createCrawlSpecFromDownload(RpcCrawlSpecFromDownload) returns (Empty) {} +} +service ExecutorSideloadApi { rpc sideloadEncyclopedia(RpcSideloadEncyclopedia) returns (Empty) {} rpc sideloadDirtree(RpcSideloadDirtree) returns (Empty) {} rpc sideloadWarc(RpcSideloadWarc) returns (Empty) {} rpc sideloadReddit(RpcSideloadReddit) returns (Empty) {} rpc sideloadStackexchange(RpcSideloadStackexchange) returns (Empty) {} +} - rpc createCrawlSpecFromDownload(RpcCrawlSpecFromDownload) returns (Empty) {} +service ExecutorExportApi { rpc exportAtags(RpcFileStorageId) returns (Empty) {} rpc exportSampleData(RpcExportSampleData) returns (Empty) {} rpc exportRssFeeds(RpcFileStorageId) returns (Empty) {} rpc exportTermFrequencies(RpcFileStorageId) returns (Empty) {} - rpc downloadSampleData(RpcDownloadSampleData) returns (Empty) {} rpc exportData(Empty) returns (Empty) {} - rpc restoreBackup(RpcFileStorageId) returns (Empty) {} - rpc getActorStates(Empty) returns (RpcActorRunStates) {} - rpc listSideloadDir(Empty) returns (RpcUploadDirContents) {} - rpc listFileStorage(RpcFileStorageId) returns (RpcFileStorageContent) {} } message Empty {} diff --git a/code/execution/build.gradle b/code/execution/build.gradle new file mode 100644 index 00000000..a3cec39f --- /dev/null +++ b/code/execution/build.gradle @@ -0,0 +1,77 @@ +plugins { + id 'java' + id 'jvm-test-suite' +} + +java { + toolchain { + languageVersion.set(JavaLanguageVersion.of(21)) + } +} + +apply from: "$rootProject.projectDir/srcsets.gradle" + +dependencies { + // These look weird but they're needed to be able to spawn the processes + // from the executor service + + implementation project(':code:processes:website-adjacencies-calculator') + implementation project(':code:processes:crawling-process') + implementation project(':code:processes:loading-process') + implementation project(':code:processes:converting-process') + implementation project(':code:processes:index-constructor-process') + + implementation project(':code:common:config') + implementation project(':code:common:model') + implementation project(':code:common:process') + implementation project(':code:common:db') + implementation project(':code:common:linkdb') + + implementation project(':code:common:service') + implementation project(':code:common:service-discovery') + + implementation project(':third-party:commons-codec') + + implementation project(':code:libraries:message-queue') + + implementation project(':code:functions:link-graph:api') + implementation project(':code:execution:api') + + implementation project(':code:process-models:crawl-spec') + implementation project(':code:process-models:crawling-model') + implementation project(':code:features-crawl:link-parser') + implementation project(':code:features-convert:data-extractors') + implementation project(':code:features-convert:stackexchange-xml') + implementation project(':code:features-convert:reddit-json') + implementation project(':code:index:index-journal') + implementation project(':code:index:api') + implementation project(':code:process-mqapi') + implementation project(':third-party:encyclopedia-marginalia-nu') + + implementation libs.bundles.slf4j + + implementation libs.spark + implementation libs.bundles.grpc + implementation libs.gson + implementation libs.prometheus + implementation libs.notnull + implementation libs.guice + implementation libs.trove + implementation libs.protobuf + implementation libs.zstd + implementation libs.jsoup + implementation libs.commons.io + implementation libs.commons.compress + implementation libs.commons.lang3 + implementation libs.bundles.mariadb + + testImplementation libs.bundles.slf4j.test + testImplementation libs.bundles.junit + testImplementation libs.mockito + + testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4') + testImplementation 'org.testcontainers:mariadb:1.17.4' + testImplementation 'org.testcontainers:junit-jupiter:1.17.4' + testImplementation project(':code:libraries:test-helpers') + +} \ No newline at end of file diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/ActorApi.java b/code/execution/java/nu/marginalia/actor/ActorApi.java similarity index 60% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/ActorApi.java rename to code/execution/java/nu/marginalia/actor/ActorApi.java index 13923302..608e6ff3 100644 --- a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/ActorApi.java +++ b/code/execution/java/nu/marginalia/actor/ActorApi.java @@ -2,11 +2,7 @@ package nu.marginalia.actor; import com.google.inject.Inject; import com.google.inject.Singleton; -import nu.marginalia.actor.state.ActorStateInstance; -import nu.marginalia.executor.api.RpcActorRunState; -import nu.marginalia.executor.api.RpcActorRunStates; -import nu.marginalia.executor.api.RpcFsmName; -import nu.marginalia.executor.api.RpcProcessId; +import nu.marginalia.functions.execution.api.*; import nu.marginalia.mq.MqMessageState; import nu.marginalia.mq.persistence.MqPersistence; import nu.marginalia.process.ProcessService; @@ -15,8 +11,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import spark.Spark; -import java.util.Comparator; - @Singleton public class ActorApi { private final ExecutorActorControlService actors; @@ -75,45 +69,6 @@ public class ActorApi { return "OK"; } - - public RpcActorRunStates getActorStates() { - var items = actors.getActorStates().entrySet().stream().map(e -> { - final var stateGraph = actors.getActorDefinition(e.getKey()); - - final ActorStateInstance state = e.getValue(); - final String actorDescription = stateGraph.describe(); - - final String machineName = e.getKey().name(); - final String stateName = state.name(); - - final String stateDescription = ""; - - final boolean terminal = state.isFinal(); - final boolean canStart = actors.isDirectlyInitializable(e.getKey()) && terminal; - - return RpcActorRunState - .newBuilder() - .setActorName(machineName) - .setState(stateName) - .setActorDescription(actorDescription) - .setStateDescription(stateDescription) - .setTerminal(terminal) - .setCanStart(canStart) - .build(); - - }) - .filter(s -> !s.getTerminal() || s.getCanStart()) - .sorted(Comparator.comparing(RpcActorRunState::getActorName)) - .toList(); - - return RpcActorRunStates.newBuilder() - .setNode(serviceConfiguration.node()) - .addAllActorRunStates(items) - .build(); - - } - - public ExecutorActor translateActor(String name) { try { return ExecutorActor.valueOf(name.toUpperCase()); diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/ExecutorActor.java b/code/execution/java/nu/marginalia/actor/ExecutorActor.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/ExecutorActor.java rename to code/execution/java/nu/marginalia/actor/ExecutorActor.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/ExecutorActorControlService.java b/code/execution/java/nu/marginalia/actor/ExecutorActorControlService.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/ExecutorActorControlService.java rename to code/execution/java/nu/marginalia/actor/ExecutorActorControlService.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/ExecutorActorStateMachines.java b/code/execution/java/nu/marginalia/actor/ExecutorActorStateMachines.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/ExecutorActorStateMachines.java rename to code/execution/java/nu/marginalia/actor/ExecutorActorStateMachines.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/monitor/AbstractProcessSpawnerActor.java b/code/execution/java/nu/marginalia/actor/monitor/AbstractProcessSpawnerActor.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/monitor/AbstractProcessSpawnerActor.java rename to code/execution/java/nu/marginalia/actor/monitor/AbstractProcessSpawnerActor.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/monitor/FileStorageMonitorActor.java b/code/execution/java/nu/marginalia/actor/monitor/FileStorageMonitorActor.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/monitor/FileStorageMonitorActor.java rename to code/execution/java/nu/marginalia/actor/monitor/FileStorageMonitorActor.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/proc/ConverterMonitorActor.java b/code/execution/java/nu/marginalia/actor/proc/ConverterMonitorActor.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/proc/ConverterMonitorActor.java rename to code/execution/java/nu/marginalia/actor/proc/ConverterMonitorActor.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/proc/CrawlerMonitorActor.java b/code/execution/java/nu/marginalia/actor/proc/CrawlerMonitorActor.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/proc/CrawlerMonitorActor.java rename to code/execution/java/nu/marginalia/actor/proc/CrawlerMonitorActor.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/proc/IndexConstructorMonitorActor.java b/code/execution/java/nu/marginalia/actor/proc/IndexConstructorMonitorActor.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/proc/IndexConstructorMonitorActor.java rename to code/execution/java/nu/marginalia/actor/proc/IndexConstructorMonitorActor.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/proc/LoaderMonitorActor.java b/code/execution/java/nu/marginalia/actor/proc/LoaderMonitorActor.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/proc/LoaderMonitorActor.java rename to code/execution/java/nu/marginalia/actor/proc/LoaderMonitorActor.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/proc/ProcessLivenessMonitorActor.java b/code/execution/java/nu/marginalia/actor/proc/ProcessLivenessMonitorActor.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/proc/ProcessLivenessMonitorActor.java rename to code/execution/java/nu/marginalia/actor/proc/ProcessLivenessMonitorActor.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ActorProcessWatcher.java b/code/execution/java/nu/marginalia/actor/task/ActorProcessWatcher.java similarity index 95% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ActorProcessWatcher.java rename to code/execution/java/nu/marginalia/actor/task/ActorProcessWatcher.java index 4bae8674..65d59f7e 100644 --- a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ActorProcessWatcher.java +++ b/code/execution/java/nu/marginalia/actor/task/ActorProcessWatcher.java @@ -39,6 +39,10 @@ public class ActorProcessWatcher { public MqMessage waitResponse(MqOutbox outbox, ProcessService.ProcessId processId, long msgId) throws ActorControlFlowException, InterruptedException, SQLException { + // enums values only have a single instance, + // so it's safe to synchronize on them + // even though it looks a bit weird to + // synchronize on a parameter like this: synchronized (processId) { // Wake up the process spawning actor processId.notifyAll(); diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ConvertActor.java b/code/execution/java/nu/marginalia/actor/task/ConvertActor.java similarity index 98% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ConvertActor.java rename to code/execution/java/nu/marginalia/actor/task/ConvertActor.java index 7a187f5f..f7d5cf1f 100644 --- a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ConvertActor.java +++ b/code/execution/java/nu/marginalia/actor/task/ConvertActor.java @@ -34,16 +34,23 @@ public class ConvertActor extends RecordActorPrototype { private final MqOutbox mqConverterOutbox; private final FileStorageService storageService; - public record Convert(FileStorageId fid) implements ActorStep {}; - public record ConvertEncyclopedia(String source, String baseUrl) implements ActorStep {}; - public record PredigestEncyclopedia(String source, String dest, String baseUrl) implements ActorStep {}; - public record ConvertDirtree(String source) implements ActorStep {}; - public record ConvertWarc(String source) implements ActorStep {}; - public record ConvertReddit(String source) implements ActorStep {}; - public record ConvertStackexchange(String source) implements ActorStep {}; + public record Convert(FileStorageId fid) implements ActorStep {} + + public record ConvertEncyclopedia(String source, String baseUrl) implements ActorStep {} + + public record PredigestEncyclopedia(String source, String dest, String baseUrl) implements ActorStep {} + + public record ConvertDirtree(String source) implements ActorStep {} + + public record ConvertWarc(String source) implements ActorStep {} + + public record ConvertReddit(String source) implements ActorStep {} + + public record ConvertStackexchange(String source) implements ActorStep {} + @Resume(behavior = ActorResumeBehavior.RETRY) public record ConvertWait(FileStorageId destFid, - long msgId) implements ActorStep {}; + long msgId) implements ActorStep {} @Override public ActorStep transition(ActorStep self) throws Exception { diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ConvertAndLoadActor.java b/code/execution/java/nu/marginalia/actor/task/ConvertAndLoadActor.java similarity index 97% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ConvertAndLoadActor.java rename to code/execution/java/nu/marginalia/actor/task/ConvertAndLoadActor.java index 62aa8929..085dffed 100644 --- a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ConvertAndLoadActor.java +++ b/code/execution/java/nu/marginalia/actor/task/ConvertAndLoadActor.java @@ -19,8 +19,8 @@ import nu.marginalia.svc.BackupService; import nu.marginalia.storage.FileStorageService; import nu.marginalia.storage.model.FileStorageId; import nu.marginalia.storage.model.FileStorageType; -import nu.marginalia.index.client.IndexClient; -import nu.marginalia.index.client.IndexMqEndpoints; +import nu.marginalia.index.api.IndexMqClient; +import nu.marginalia.index.api.IndexMqEndpoints; import nu.marginalia.mq.MqMessageState; import nu.marginalia.mq.outbox.MqOutbox; import nu.marginalia.mqapi.converting.ConvertRequest; @@ -58,9 +58,9 @@ public class ConvertAndLoadActor extends RecordActorPrototype { public List processedStorageId = null; public long converterMsgId = 0L; public long loaderMsgId = 0L; - }; + } - public record Initial(FileStorageId fid) implements ActorStep {}; + public record Initial(FileStorageId fid) implements ActorStep {} @Resume(behavior = ActorResumeBehavior.RETRY) public record Convert(FileStorageId crawlId, FileStorageId processedId, long msgId) implements ActorStep { @@ -69,7 +69,8 @@ public class ConvertAndLoadActor extends RecordActorPrototype { @Resume(behavior = ActorResumeBehavior.RETRY) public record Load(List processedId, long msgId) implements ActorStep { public Load(List processedId) { this(processedId, -1); } - }; + } + @Resume(behavior = ActorResumeBehavior.RETRY) public record Backup(List processedIds) implements ActorStep { } @Resume(behavior = ActorResumeBehavior.RETRY) @@ -195,7 +196,7 @@ public class ConvertAndLoadActor extends RecordActorPrototype { public ConvertAndLoadActor(ActorProcessWatcher processWatcher, ProcessOutboxes processOutboxes, FileStorageService storageService, - IndexClient indexClient, + IndexMqClient indexMqClient, BackupService backupService, Gson gson, NodeConfigurationService nodeConfigurationService, @@ -203,7 +204,7 @@ public class ConvertAndLoadActor extends RecordActorPrototype { { super(gson); this.processWatcher = processWatcher; - this.indexOutbox = indexClient.outbox(); + this.indexOutbox = indexMqClient.outbox(); this.mqConverterOutbox = processOutboxes.getConverterOutbox(); this.mqLoaderOutbox = processOutboxes.getLoaderOutbox(); this.mqIndexConstructorOutbox = processOutboxes.getIndexConstructorOutbox(); diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/CrawlActor.java b/code/execution/java/nu/marginalia/actor/task/CrawlActor.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/CrawlActor.java rename to code/execution/java/nu/marginalia/actor/task/CrawlActor.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/CrawlJobExtractorActor.java b/code/execution/java/nu/marginalia/actor/task/CrawlJobExtractorActor.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/CrawlJobExtractorActor.java rename to code/execution/java/nu/marginalia/actor/task/CrawlJobExtractorActor.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/DownloadSampleActor.java b/code/execution/java/nu/marginalia/actor/task/DownloadSampleActor.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/DownloadSampleActor.java rename to code/execution/java/nu/marginalia/actor/task/DownloadSampleActor.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ExportAtagsActor.java b/code/execution/java/nu/marginalia/actor/task/ExportAtagsActor.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ExportAtagsActor.java rename to code/execution/java/nu/marginalia/actor/task/ExportAtagsActor.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ExportDataActor.java b/code/execution/java/nu/marginalia/actor/task/ExportDataActor.java similarity index 94% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ExportDataActor.java rename to code/execution/java/nu/marginalia/actor/task/ExportDataActor.java index 042a3ec7..0a5d1056 100644 --- a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ExportDataActor.java +++ b/code/execution/java/nu/marginalia/actor/task/ExportDataActor.java @@ -6,7 +6,7 @@ import com.google.inject.Singleton; import com.zaxxer.hikari.HikariDataSource; import nu.marginalia.actor.prototype.RecordActorPrototype; import nu.marginalia.actor.state.ActorStep; -import nu.marginalia.query.client.QueryClient; +import nu.marginalia.api.linkgraph.AggregateLinkGraphClient; import nu.marginalia.storage.FileStorageService; import nu.marginalia.storage.model.FileStorageId; import nu.marginalia.storage.model.FileStorageType; @@ -32,7 +32,7 @@ public class ExportDataActor extends RecordActorPrototype { private final FileStorageService storageService; private final HikariDataSource dataSource; private final Logger logger = LoggerFactory.getLogger(getClass()); - private final QueryClient queryClient; + private final AggregateLinkGraphClient linkGraphClient; public record Export() implements ActorStep {} public record ExportBlacklist(FileStorageId fid) implements ActorStep {} @@ -54,7 +54,7 @@ public class ExportDataActor extends RecordActorPrototype { try (var bw = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(Files.newOutputStream(tmpFile, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)))); var conn = dataSource.getConnection(); - var stmt = conn.prepareStatement("SELECT URL_DOMAIN FROM EC_DOMAIN_BLACKLIST"); + var stmt = conn.prepareStatement("SELECT URL_DOMAIN FROM EC_DOMAIN_BLACKLIST") ) { stmt.setFetchSize(1000); @@ -82,7 +82,7 @@ public class ExportDataActor extends RecordActorPrototype { try (var bw = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(Files.newOutputStream(tmpFile, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)))); var conn = dataSource.getConnection(); - var stmt = conn.prepareStatement("SELECT DOMAIN_NAME, ID, INDEXED, STATE FROM EC_DOMAIN"); + var stmt = conn.prepareStatement("SELECT DOMAIN_NAME, ID, INDEXED, STATE FROM EC_DOMAIN") ) { stmt.setFetchSize(1000); @@ -114,7 +114,7 @@ public class ExportDataActor extends RecordActorPrototype { var tmpFile = Files.createTempFile(storage.asPath(), "export", ".csv.gz", PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--"))); - var allLinks = queryClient.getAllDomainLinks(); + var allLinks = linkGraphClient.getAllDomainLinks(); try (var bw = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(Files.newOutputStream(tmpFile, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING))))) { @@ -154,12 +154,13 @@ public class ExportDataActor extends RecordActorPrototype { @Inject public ExportDataActor(Gson gson, FileStorageService storageService, - HikariDataSource dataSource, QueryClient queryClient) + HikariDataSource dataSource, + AggregateLinkGraphClient linkGraphClient) { super(gson); this.storageService = storageService; this.dataSource = dataSource; - this.queryClient = queryClient; + this.linkGraphClient = linkGraphClient; } } diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ExportFeedsActor.java b/code/execution/java/nu/marginalia/actor/task/ExportFeedsActor.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ExportFeedsActor.java rename to code/execution/java/nu/marginalia/actor/task/ExportFeedsActor.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ExportSampleDataActor.java b/code/execution/java/nu/marginalia/actor/task/ExportSampleDataActor.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ExportSampleDataActor.java rename to code/execution/java/nu/marginalia/actor/task/ExportSampleDataActor.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ExportTermFreqActor.java b/code/execution/java/nu/marginalia/actor/task/ExportTermFreqActor.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/ExportTermFreqActor.java rename to code/execution/java/nu/marginalia/actor/task/ExportTermFreqActor.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/RecrawlActor.java b/code/execution/java/nu/marginalia/actor/task/RecrawlActor.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/RecrawlActor.java rename to code/execution/java/nu/marginalia/actor/task/RecrawlActor.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/RestoreBackupActor.java b/code/execution/java/nu/marginalia/actor/task/RestoreBackupActor.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/RestoreBackupActor.java rename to code/execution/java/nu/marginalia/actor/task/RestoreBackupActor.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/TriggerAdjacencyCalculationActor.java b/code/execution/java/nu/marginalia/actor/task/TriggerAdjacencyCalculationActor.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/actor/task/TriggerAdjacencyCalculationActor.java rename to code/execution/java/nu/marginalia/actor/task/TriggerAdjacencyCalculationActor.java diff --git a/code/execution/java/nu/marginalia/execution/ExecutionInit.java b/code/execution/java/nu/marginalia/execution/ExecutionInit.java new file mode 100644 index 00000000..faaa16f8 --- /dev/null +++ b/code/execution/java/nu/marginalia/execution/ExecutionInit.java @@ -0,0 +1,26 @@ +package nu.marginalia.execution; + +import com.google.inject.Inject; +import com.google.inject.Singleton; + +import nu.marginalia.actor.ExecutorActorControlService; +import nu.marginalia.actor.ExecutorActor; + +@Singleton +public class ExecutionInit { + private final ExecutorActorControlService actorControlService; + + @Inject + public ExecutionInit(ExecutorActorControlService actorControlService) { + this.actorControlService = actorControlService; + } + + public void initDefaultActors() throws Exception { + actorControlService.start(ExecutorActor.MONITOR_PROCESS_LIVENESS); + actorControlService.start(ExecutorActor.MONITOR_FILE_STORAGE); + actorControlService.start(ExecutorActor.PROC_CONVERTER_SPAWNER); + actorControlService.start(ExecutorActor.PROC_CRAWLER_SPAWNER); + actorControlService.start(ExecutorActor.PROC_INDEX_CONSTRUCTOR_SPAWNER); + actorControlService.start(ExecutorActor.PROC_LOADER_SPAWNER); + } +} diff --git a/code/execution/java/nu/marginalia/execution/ExecutorCrawlGrpcService.java b/code/execution/java/nu/marginalia/execution/ExecutorCrawlGrpcService.java new file mode 100644 index 00000000..b95f64d0 --- /dev/null +++ b/code/execution/java/nu/marginalia/execution/ExecutorCrawlGrpcService.java @@ -0,0 +1,113 @@ +package nu.marginalia.execution; + +import com.google.inject.Inject; +import io.grpc.stub.StreamObserver; +import nu.marginalia.actor.ExecutorActor; +import nu.marginalia.actor.ExecutorActorControlService; +import nu.marginalia.actor.task.*; +import nu.marginalia.functions.execution.api.*; +import nu.marginalia.storage.model.FileStorageId; + +import java.util.stream.Collectors; + +public class ExecutorCrawlGrpcService extends ExecutorCrawlApiGrpc.ExecutorCrawlApiImplBase { + private final ExecutorActorControlService actorControlService; + + @Inject + public ExecutorCrawlGrpcService(ExecutorActorControlService actorControlService) + { + this.actorControlService = actorControlService; + } + + @Override + public void triggerCrawl(RpcFileStorageId request, StreamObserver responseObserver) { + try { + actorControlService.startFrom(ExecutorActor.CRAWL, + new CrawlActor.Initial(FileStorageId.of(request.getFileStorageId()))); + + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + + @Override + public void triggerRecrawl(RpcFileStorageId request, StreamObserver responseObserver) { + try { + actorControlService.startFrom(ExecutorActor.RECRAWL, + new RecrawlActor.Initial(FileStorageId.of(request.getFileStorageId()), false)); + + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + + @Override + public void triggerConvert(RpcFileStorageId request, StreamObserver responseObserver) { + try { + actorControlService.startFrom(ExecutorActor.CONVERT, + new ConvertActor.Convert(FileStorageId.of(request.getFileStorageId()))); + + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + + @Override + public void triggerConvertAndLoad(RpcFileStorageId request, StreamObserver responseObserver) { + try { + actorControlService.startFrom(ExecutorActor.CONVERT_AND_LOAD, + new ConvertAndLoadActor.Initial(FileStorageId.of(request.getFileStorageId()))); + + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + + @Override + public void loadProcessedData(RpcFileStorageIds request, StreamObserver responseObserver) { + try { + actorControlService.startFrom(ExecutorActor.CONVERT_AND_LOAD, + new ConvertAndLoadActor.Load(request.getFileStorageIdsList() + .stream() + .map(FileStorageId::of) + .collect(Collectors.toList())) + ); + + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + + @Override + public void createCrawlSpecFromDownload(RpcCrawlSpecFromDownload request, StreamObserver responseObserver) { + try { + actorControlService.startFrom(ExecutorActor.CRAWL_JOB_EXTRACTOR, + new CrawlJobExtractorActor.CreateFromUrl( + request.getDescription(), + request.getUrl()) + ); + + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + +} diff --git a/code/execution/java/nu/marginalia/execution/ExecutorExportGrpcService.java b/code/execution/java/nu/marginalia/execution/ExecutorExportGrpcService.java new file mode 100644 index 00000000..41c8bb8b --- /dev/null +++ b/code/execution/java/nu/marginalia/execution/ExecutorExportGrpcService.java @@ -0,0 +1,95 @@ +package nu.marginalia.execution; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import io.grpc.stub.StreamObserver; +import nu.marginalia.actor.ExecutorActor; +import nu.marginalia.actor.ExecutorActorControlService; +import nu.marginalia.actor.task.*; +import nu.marginalia.functions.execution.api.Empty; +import nu.marginalia.functions.execution.api.ExecutorExportApiGrpc; +import nu.marginalia.functions.execution.api.RpcExportSampleData; +import nu.marginalia.functions.execution.api.RpcFileStorageId; +import nu.marginalia.storage.model.FileStorageId; + +@Singleton +public class ExecutorExportGrpcService extends ExecutorExportApiGrpc.ExecutorExportApiImplBase { + private final ExecutorActorControlService actorControlService; + + @Inject + public ExecutorExportGrpcService(ExecutorActorControlService actorControlService) { + this.actorControlService = actorControlService; + } + + @Override + public void exportAtags(RpcFileStorageId request, StreamObserver responseObserver) { + try { + actorControlService.startFrom(ExecutorActor.EXPORT_ATAGS, + new ExportAtagsActor.Export(FileStorageId.of(request.getFileStorageId())) + ); + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + + @Override + public void exportSampleData(RpcExportSampleData request, StreamObserver responseObserver) { + try { + actorControlService.startFrom(ExecutorActor.EXPORT_SAMPLE_DATA, + new ExportSampleDataActor.Export( + FileStorageId.of(request.getFileStorageId()), + request.getSize(), + request.getName() + ) + ); + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + + @Override + public void exportRssFeeds(RpcFileStorageId request, StreamObserver responseObserver) { + try { + actorControlService.startFrom(ExecutorActor.EXPORT_FEEDS, + new ExportFeedsActor.Export(FileStorageId.of(request.getFileStorageId())) + ); + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + + @Override + public void exportTermFrequencies(RpcFileStorageId request, StreamObserver responseObserver) { + try { + actorControlService.startFrom(ExecutorActor.EXPORT_TERM_FREQUENCIES, + new ExportTermFreqActor.Export(FileStorageId.of(request.getFileStorageId())) + ); + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + + @Override + public void exportData(Empty request, StreamObserver responseObserver) { + try { + actorControlService.startFrom(ExecutorActor.EXPORT_DATA, new ExportDataActor.Export()); + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } +} diff --git a/code/execution/java/nu/marginalia/execution/ExecutorGrpcService.java b/code/execution/java/nu/marginalia/execution/ExecutorGrpcService.java new file mode 100644 index 00000000..b4182bb0 --- /dev/null +++ b/code/execution/java/nu/marginalia/execution/ExecutorGrpcService.java @@ -0,0 +1,237 @@ +package nu.marginalia.execution; + +import com.google.inject.Inject; +import io.grpc.stub.StreamObserver; +import lombok.SneakyThrows; +import nu.marginalia.WmsaHome; +import nu.marginalia.actor.ActorApi; +import nu.marginalia.actor.ExecutorActor; +import nu.marginalia.actor.ExecutorActorControlService; +import nu.marginalia.actor.state.ActorStateInstance; +import nu.marginalia.actor.task.DownloadSampleActor; +import nu.marginalia.actor.task.RestoreBackupActor; +import nu.marginalia.actor.task.TriggerAdjacencyCalculationActor; +import nu.marginalia.functions.execution.api.*; +import nu.marginalia.service.module.ServiceConfiguration; +import nu.marginalia.storage.FileStorageService; +import nu.marginalia.storage.model.FileStorageId; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.Comparator; + +public class ExecutorGrpcService extends ExecutorApiGrpc.ExecutorApiImplBase { + private final ActorApi actorApi; + private final FileStorageService fileStorageService; + private final ServiceConfiguration serviceConfiguration; + private final ExecutorActorControlService actorControlService; + + @Inject + public ExecutorGrpcService(ActorApi actorApi, + FileStorageService fileStorageService, + ServiceConfiguration serviceConfiguration, + ExecutorActorControlService actorControlService) + { + this.actorApi = actorApi; + this.fileStorageService = fileStorageService; + this.serviceConfiguration = serviceConfiguration; + this.actorControlService = actorControlService; + } + + @Override + public void startFsm(RpcFsmName request, StreamObserver responseObserver) { + try { + actorApi.startActor(request); + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + + @Override + public void stopFsm(RpcFsmName request, StreamObserver responseObserver) { + try { + actorApi.stopActor(request); + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + + @Override + public void stopProcess(RpcProcessId request, StreamObserver responseObserver) { + try { + actorApi.stopProcess(request); + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + + @Override + public void calculateAdjacencies(Empty request, StreamObserver responseObserver) { + try { + actorControlService.startFrom(ExecutorActor.ADJACENCY_CALCULATION, + new TriggerAdjacencyCalculationActor.Run()); + + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + + @Override + public void downloadSampleData(RpcDownloadSampleData request, StreamObserver responseObserver) { + try { + String sampleSet = request.getSampleSet(); + + actorControlService.startFrom(ExecutorActor.DOWNLOAD_SAMPLE, + new DownloadSampleActor.Run(sampleSet)); + + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + + @Override + public void restoreBackup(RpcFileStorageId request, StreamObserver responseObserver) { + try { + var fid = FileStorageId.of(request.getFileStorageId()); + + actorControlService.startFrom(ExecutorActor.RESTORE_BACKUP, + new RestoreBackupActor.Restore(fid)); + + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + + @Override + public void getActorStates(Empty request, StreamObserver responseObserver) { + var items = actorControlService.getActorStates().entrySet().stream().map(e -> { + final var stateGraph = actorControlService.getActorDefinition(e.getKey()); + + final ActorStateInstance state = e.getValue(); + final String actorDescription = stateGraph.describe(); + + final String machineName = e.getKey().name(); + final String stateName = state.name(); + + final String stateDescription = ""; + + final boolean terminal = state.isFinal(); + final boolean canStart = actorControlService.isDirectlyInitializable(e.getKey()) && terminal; + + return RpcActorRunState + .newBuilder() + .setActorName(machineName) + .setState(stateName) + .setActorDescription(actorDescription) + .setStateDescription(stateDescription) + .setTerminal(terminal) + .setCanStart(canStart) + .build(); + + }) + .filter(s -> !s.getTerminal() || s.getCanStart()) + .sorted(Comparator.comparing(RpcActorRunState::getActorName)) + .toList(); + + responseObserver.onNext(RpcActorRunStates.newBuilder() + .setNode(serviceConfiguration.node()) + .addAllActorRunStates(items) + .build()); + responseObserver.onCompleted(); + } + + @Override + public void listSideloadDir(Empty request, StreamObserver responseObserver) { + try { + Path uploadDir = WmsaHome.getUploadDir(); + + try (var items = Files.list(uploadDir).sorted( + Comparator.comparing((Path d) -> Files.isDirectory(d)).reversed() + .thenComparing(path -> path.getFileName().toString()) + )) { + var builder = RpcUploadDirContents.newBuilder().setPath(uploadDir.toString()); + + var iter = items.iterator(); + while (iter.hasNext()) { + var path = iter.next(); + + boolean isDir = Files.isDirectory(path); + long size = isDir ? 0 : Files.size(path); + var mtime = Files.getLastModifiedTime(path); + + builder.addEntriesBuilder() + .setName(path.toString()) + .setIsDirectory(isDir) + .setLastModifiedTime( + LocalDateTime.ofInstant(mtime.toInstant(), ZoneId.systemDefault()).format(DateTimeFormatter.ISO_DATE_TIME)) + .setSize(size); + } + + responseObserver.onNext(builder.build()); + } + + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + + @Override + public void listFileStorage(RpcFileStorageId request, StreamObserver responseObserver) { + try { + FileStorageId fileStorageId = FileStorageId.of(request.getFileStorageId()); + + var storage = fileStorageService.getStorage(fileStorageId); + + var builder = RpcFileStorageContent.newBuilder(); + + + try (var fs = Files.list(storage.asPath())) { + fs.filter(Files::isRegularFile) + .map(this::createFileModel) + .sorted(Comparator.comparing(RpcFileStorageEntry::getName)) + .forEach(builder::addEntries); + } + + responseObserver.onNext(builder.build()); + + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + + @SneakyThrows + private RpcFileStorageEntry createFileModel(Path path) { + return RpcFileStorageEntry.newBuilder() + .setName(path.toFile().getName()) + .setSize(Files.size(path)) + .setLastModifiedTime(Files.getLastModifiedTime(path).toInstant().toString()) + .build(); + } + + +} diff --git a/code/execution/java/nu/marginalia/execution/ExecutorSideloadGrpcService.java b/code/execution/java/nu/marginalia/execution/ExecutorSideloadGrpcService.java new file mode 100644 index 00000000..945824c0 --- /dev/null +++ b/code/execution/java/nu/marginalia/execution/ExecutorSideloadGrpcService.java @@ -0,0 +1,96 @@ +package nu.marginalia.execution; + +import com.google.inject.Inject; +import io.grpc.stub.StreamObserver; +import nu.marginalia.actor.ExecutorActor; +import nu.marginalia.actor.ExecutorActorControlService; +import nu.marginalia.actor.task.ConvertActor; +import nu.marginalia.functions.execution.api.*; + +public class ExecutorSideloadGrpcService extends ExecutorSideloadApiGrpc.ExecutorSideloadApiImplBase { + private final ExecutorActorControlService actorControlService; + + @Inject + public ExecutorSideloadGrpcService(ExecutorActorControlService actorControlService) + { + this.actorControlService = actorControlService; + } + + @Override + public void sideloadEncyclopedia(RpcSideloadEncyclopedia request, StreamObserver responseObserver) { + try { + actorControlService.startFrom(ExecutorActor.CONVERT, + new ConvertActor.ConvertEncyclopedia( + request.getSourcePath(), + request.getBaseUrl() + )); + + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + + @Override + public void sideloadDirtree(RpcSideloadDirtree request, StreamObserver responseObserver) { + try { + actorControlService.startFrom(ExecutorActor.CONVERT, + new ConvertActor.ConvertDirtree(request.getSourcePath()) + ); + + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + + @Override + public void sideloadReddit(RpcSideloadReddit request, StreamObserver responseObserver) { + try { + actorControlService.startFrom(ExecutorActor.CONVERT, + new ConvertActor.ConvertReddit(request.getSourcePath()) + ); + + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + + @Override + public void sideloadWarc(RpcSideloadWarc request, StreamObserver responseObserver) { + try { + actorControlService.startFrom(ExecutorActor.CONVERT, + new ConvertActor.ConvertWarc(request.getSourcePath()) + ); + + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + + @Override + public void sideloadStackexchange(RpcSideloadStackexchange request, StreamObserver responseObserver) { + try { + actorControlService.startFrom(ExecutorActor.CONVERT, + new ConvertActor.ConvertStackexchange(request.getSourcePath()) + ); + + responseObserver.onNext(Empty.getDefaultInstance()); + responseObserver.onCompleted(); + } + catch (Exception e) { + responseObserver.onError(e); + } + } + +} diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/process/ProcessOutboxes.java b/code/execution/java/nu/marginalia/process/ProcessOutboxes.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/process/ProcessOutboxes.java rename to code/execution/java/nu/marginalia/process/ProcessOutboxes.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/process/ProcessService.java b/code/execution/java/nu/marginalia/process/ProcessService.java similarity index 66% rename from code/services-core/executor-service/src/main/java/nu/marginalia/process/ProcessService.java rename to code/execution/java/nu/marginalia/process/ProcessService.java index aadfa625..0d153c8f 100644 --- a/code/services-core/executor-service/src/main/java/nu/marginalia/process/ProcessService.java +++ b/code/execution/java/nu/marginalia/process/ProcessService.java @@ -2,7 +2,13 @@ package nu.marginalia.process; import com.google.inject.Inject; import com.google.inject.Singleton; -import com.google.inject.name.Named; +import nu.marginalia.WmsaHome; +import nu.marginalia.adjacencies.WebsiteAdjacenciesCalculator; +import nu.marginalia.converting.ConverterMain; +import nu.marginalia.crawl.CrawlerMain; +import nu.marginalia.index.IndexConstructorMain; +import nu.marginalia.loading.LoaderMain; +import nu.marginalia.service.ProcessMainClass; import nu.marginalia.service.control.ServiceEventLog; import nu.marginalia.service.server.BaseServiceParams; import org.slf4j.Logger; @@ -13,8 +19,6 @@ import org.slf4j.MarkerFactory; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; -import java.nio.file.Files; -import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -26,7 +30,6 @@ public class ProcessService { private final Marker processMarker = MarkerFactory.getMarker("PROCESS"); private final ServiceEventLog eventLog; - private final Path distPath; private final ConcurrentHashMap processes = new ConcurrentHashMap<>(); @@ -43,47 +46,71 @@ public class ProcessService { } public enum ProcessId { - CRAWLER("crawler-process/bin/crawler-process"), - CONVERTER("converter-process/bin/converter-process"), - LOADER("loader-process/bin/loader-process"), - INDEX_CONSTRUCTOR("index-construction-process/bin/index-construction-process"), - ADJACENCIES_CALCULATOR("website-adjacencies-calculator/bin/website-adjacencies-calculator") + CRAWLER(CrawlerMain.class), + CONVERTER(ConverterMain.class), + LOADER(LoaderMain.class), + INDEX_CONSTRUCTOR(IndexConstructorMain.class), + ADJACENCIES_CALCULATOR(WebsiteAdjacenciesCalculator.class) ; - public final String path; - ProcessId(String path) { - this.path = path; + public final String mainClass; + ProcessId(Class mainClass) { + this.mainClass = mainClass.getName(); } - }; + + List envOpts() { + String variable = switch (this) { + case CRAWLER -> "CRAWLER_PROCESS_OPTS"; + case CONVERTER -> "CONVERTER_PROCESS_OPTS"; + case LOADER -> "LOADER_PROCESS_OPTS"; + case INDEX_CONSTRUCTOR -> "INDEX_CONSTRUCTION_PROCESS_OPTS"; + case ADJACENCIES_CALCULATOR -> "ADJACENCIES_CALCULATOR_PROCESS_OPTS"; + }; + String value = System.getenv(variable); + + if (value == null) + return List.of(); + else + return Arrays.asList(value.split("\\s+")); + } + } @Inject - public ProcessService(BaseServiceParams params, - @Named("distPath") Path distPath) { + public ProcessService(BaseServiceParams params) { this.eventLog = params.eventLog; - this.distPath = distPath; } - public boolean trigger(ProcessId processId) throws Exception { - return trigger(processId, new String[0]); - } - public boolean trigger(ProcessId processId, String... parameters) throws Exception { - final String processPath = distPath.resolve(processId.path).toString(); + public boolean trigger(ProcessId processId, String... extraArgs) throws Exception { final String[] env = createEnvironmentVariables(); - final String[] args = createCommandArguments(processPath, parameters); + List args = new ArrayList<>(); + String javaHome = System.getProperty("java.home"); + + args.add(STR."\{javaHome}/bin/java"); + args.add("-cp"); + args.add(System.getProperty("java.class.path")); + + if (getClass().desiredAssertionStatus()) args.add("-ea"); + else args.add("-da"); + + args.add("--enable-preview"); + + String loggingOpts = System.getProperty("log4j2.configurationFile"); + if (loggingOpts != null) { + args.add("-Dlog4j.configurationFile=" + loggingOpts); + } + + args.addAll(processId.envOpts()); + args.add(processId.mainClass); + args.addAll(Arrays.asList(extraArgs)); Process process; - if (!Files.exists(Path.of(processPath))) { - logger.error("Process not found: {}", processPath); - return false; - } - - logger.info("Starting process: {}: {} // {}", processId, Arrays.toString(args), Arrays.toString(env)); + logger.info("Starting process: {} {}", processId, processId.envOpts()); synchronized (processes) { if (processes.containsKey(processId)) return false; - process = Runtime.getRuntime().exec(args, env); + process = Runtime.getRuntime().exec(args.toArray(String[]::new), env); processes.put(processId, process); } @@ -107,13 +134,6 @@ public class ProcessService { } - private String[] createCommandArguments(String processPath, String[] parameters) { - final String[] args = new String[parameters.length + 1]; - args[0] = processPath; - System.arraycopy(parameters, 0, args, 1, parameters.length); - return args; - } - public boolean isRunning(ProcessId processId) { return processes.containsKey(processId); } @@ -131,24 +151,14 @@ public class ProcessService { /** These environment variables are propagated from the parent process to the child process, * along with WMSA_HOME, but it has special logic */ private final List propagatedEnvironmentVariables = List.of( - "JAVA_HOME", - "WMSA_SERVICE_NODE", - "CONVERTER_PROCESS_OPTS", - "LOADER_PROCESS_OPTS", - "INDEX_CONSTRUCTION_PROCESS_OPTS", - "CRAWLER_PROCESS_OPTS"); + "ZOOKEEPER_HOSTS", + "WMSA_SERVICE_NODE" + ); private String[] createEnvironmentVariables() { List opts = new ArrayList<>(); - String WMSA_HOME = System.getenv("WMSA_HOME"); - - if (WMSA_HOME == null || WMSA_HOME.isBlank()) { - WMSA_HOME = "/var/lib/wmsa"; - } - - opts.add(env2str("WMSA_HOME", WMSA_HOME)); - opts.add(env2str("JAVA_OPTS", "--enable-preview")); // + opts.add(env2str("WMSA_HOME", WmsaHome.getHomePath().toString())); for (String envKey : propagatedEnvironmentVariables) { String envValue = System.getenv(envKey); diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/sideload/RedditSideloadHelper.java b/code/execution/java/nu/marginalia/sideload/RedditSideloadHelper.java similarity index 99% rename from code/services-core/executor-service/src/main/java/nu/marginalia/sideload/RedditSideloadHelper.java rename to code/execution/java/nu/marginalia/sideload/RedditSideloadHelper.java index 23d7e544..e5c68120 100644 --- a/code/services-core/executor-service/src/main/java/nu/marginalia/sideload/RedditSideloadHelper.java +++ b/code/execution/java/nu/marginalia/sideload/RedditSideloadHelper.java @@ -10,7 +10,6 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardCopyOption; import java.util.*; -import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.ForkJoinPool; import java.util.stream.Collectors; diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/sideload/SideloadHelper.java b/code/execution/java/nu/marginalia/sideload/SideloadHelper.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/sideload/SideloadHelper.java rename to code/execution/java/nu/marginalia/sideload/SideloadHelper.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/sideload/StackExchangeSideloadHelper.java b/code/execution/java/nu/marginalia/sideload/StackExchangeSideloadHelper.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/sideload/StackExchangeSideloadHelper.java rename to code/execution/java/nu/marginalia/sideload/StackExchangeSideloadHelper.java diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/svc/BackupService.java b/code/execution/java/nu/marginalia/svc/BackupService.java similarity index 90% rename from code/services-core/executor-service/src/main/java/nu/marginalia/svc/BackupService.java rename to code/execution/java/nu/marginalia/svc/BackupService.java index 46578348..23b95f6c 100644 --- a/code/services-core/executor-service/src/main/java/nu/marginalia/svc/BackupService.java +++ b/code/execution/java/nu/marginalia/svc/BackupService.java @@ -3,6 +3,7 @@ package nu.marginalia.svc; import com.github.luben.zstd.ZstdInputStream; import com.github.luben.zstd.ZstdOutputStream; import nu.marginalia.IndexLocations; +import nu.marginalia.linkdb.LinkdbFileNames; import nu.marginalia.service.control.ServiceHeartbeat; import nu.marginalia.storage.FileStorageService; import nu.marginalia.storage.model.FileStorageId; @@ -18,9 +19,6 @@ import java.sql.SQLException; import java.time.LocalDateTime; import java.util.List; -import static nu.marginalia.linkdb.LinkdbFileNames.DOCDB_FILE_NAME; -import static nu.marginalia.linkdb.LinkdbFileNames.DOMAIN_LINKS_FILE_NAME; - public class BackupService { private final FileStorageService storageService; @@ -59,10 +57,10 @@ public class BackupService { try (var heartbeat = serviceHeartbeat.createServiceTaskHeartbeat(BackupHeartbeatSteps.class, "Backup")) { heartbeat.progress(BackupHeartbeatSteps.DOCS); - backupFileCompressed(DOCDB_FILE_NAME, linkdbStagingStorage, backupStorage.asPath()); + backupFileCompressed(LinkdbFileNames.DOCDB_FILE_NAME, linkdbStagingStorage, backupStorage.asPath()); heartbeat.progress(BackupHeartbeatSteps.LINKS); - backupFileCompressed(DOMAIN_LINKS_FILE_NAME, linkdbStagingStorage, backupStorage.asPath()); + backupFileCompressed(LinkdbFileNames.DOMAIN_LINKS_FILE_NAME, linkdbStagingStorage, backupStorage.asPath()); heartbeat.progress(BackupHeartbeatSteps.JOURNAL); // This file format is already compressed @@ -84,10 +82,10 @@ public class BackupService { try (var heartbeat = serviceHeartbeat.createServiceTaskHeartbeat(BackupHeartbeatSteps.class, "Restore Backup")) { heartbeat.progress(BackupHeartbeatSteps.DOCS); - restoreBackupCompressed(DOCDB_FILE_NAME, linkdbStagingStorage, backupStorage); + restoreBackupCompressed(LinkdbFileNames.DOCDB_FILE_NAME, linkdbStagingStorage, backupStorage); heartbeat.progress(BackupHeartbeatSteps.LINKS); - restoreBackupCompressed(DOMAIN_LINKS_FILE_NAME, linkdbStagingStorage, backupStorage); + restoreBackupCompressed(LinkdbFileNames.DOMAIN_LINKS_FILE_NAME, linkdbStagingStorage, backupStorage); heartbeat.progress(BackupHeartbeatSteps.JOURNAL); restoreJournal(indexStagingStorage, backupStorage); diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/svc/DomainListRefreshService.java b/code/execution/java/nu/marginalia/svc/DomainListRefreshService.java similarity index 100% rename from code/services-core/executor-service/src/main/java/nu/marginalia/svc/DomainListRefreshService.java rename to code/execution/java/nu/marginalia/svc/DomainListRefreshService.java diff --git a/code/execution/readme.md b/code/execution/readme.md new file mode 100644 index 00000000..d819c023 --- /dev/null +++ b/code/execution/readme.md @@ -0,0 +1,12 @@ +The execution subsystem is responsible for the execution of long running tasks on each +index node. It lives in the [executor-service](../services-core/executor-service) module. + +It accomplishes this using the [message queue and actor library](../libraries/message-queue/), +which permits program state to survive crashes and reboots. + +The subsystem exposes four [APIs](api/src/main/protobuf/executor-api.proto): + +* Execution API - for starting and stopping tasks, also contains miscellaneous commands +* Crawl API - for managing the crawl workflow +* Sideload API - for sideloading data +* Export API - for exporting data \ No newline at end of file diff --git a/code/services-core/executor-service/src/test/java/nu/marginalia/svc/DomainListRefreshServiceTest.java b/code/execution/test/nu/marginalia/svc/DomainListRefreshServiceTest.java similarity index 98% rename from code/services-core/executor-service/src/test/java/nu/marginalia/svc/DomainListRefreshServiceTest.java rename to code/execution/test/nu/marginalia/svc/DomainListRefreshServiceTest.java index 95f4801b..e9c085aa 100644 --- a/code/services-core/executor-service/src/test/java/nu/marginalia/svc/DomainListRefreshServiceTest.java +++ b/code/execution/test/nu/marginalia/svc/DomainListRefreshServiceTest.java @@ -15,7 +15,6 @@ import org.testcontainers.containers.MariaDBContainer; import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; -import java.io.IOException; import java.sql.SQLException; import java.util.*; @@ -55,7 +54,7 @@ class DomainListRefreshServiceTest { void downloadDomainsList() throws SQLException { DomainTypes domainTypes = new DomainTypes(dataSource); DomainListRefreshService service = new DomainListRefreshService(dataSource, - domainTypes, new ServiceConfiguration(null, 1, null, -1, -1, null)); + domainTypes, new ServiceConfiguration(null, 1, null, null, -1, null)); domainTypes.updateUrlForSelection(DomainTypes.Type.CRAWL, "https://downloads.marginalia.nu/domain-list-test.txt"); service.synchronizeDomainList(); diff --git a/code/features-convert/adblock/build.gradle b/code/features-convert/adblock/build.gradle index 90e3e205..f28fe998 100644 --- a/code/features-convert/adblock/build.gradle +++ b/code/features-convert/adblock/build.gradle @@ -13,6 +13,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:config') diff --git a/code/features-convert/adblock/src/main/java/nu/marginalia/adblock/AdblockSimulator.java b/code/features-convert/adblock/java/nu/marginalia/adblock/AdblockSimulator.java similarity index 100% rename from code/features-convert/adblock/src/main/java/nu/marginalia/adblock/AdblockSimulator.java rename to code/features-convert/adblock/java/nu/marginalia/adblock/AdblockSimulator.java diff --git a/code/features-convert/adblock/src/main/java/nu/marginalia/adblock/GoogleAnwersSpamDetector.java b/code/features-convert/adblock/java/nu/marginalia/adblock/GoogleAnwersSpamDetector.java similarity index 100% rename from code/features-convert/adblock/src/main/java/nu/marginalia/adblock/GoogleAnwersSpamDetector.java rename to code/features-convert/adblock/java/nu/marginalia/adblock/GoogleAnwersSpamDetector.java diff --git a/code/features-convert/adblock/readme.md b/code/features-convert/adblock/readme.md index 1df54936..32919300 100644 --- a/code/features-convert/adblock/readme.md +++ b/code/features-convert/adblock/readme.md @@ -5,4 +5,4 @@ uses it to identify if a document has ads. ## Central Classes -* [AdblockSimulator](src/main/java/nu/marginalia/adblock/AdblockSimulator.java) \ No newline at end of file +* [AdblockSimulator](java/nu/marginalia/adblock/AdblockSimulator.java) \ No newline at end of file diff --git a/code/features-convert/anchor-keywords/build.gradle b/code/features-convert/anchor-keywords/build.gradle index 122046f8..23e415b9 100644 --- a/code/features-convert/anchor-keywords/build.gradle +++ b/code/features-convert/anchor-keywords/build.gradle @@ -10,6 +10,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:config') implementation project(':code:common:model') diff --git a/code/features-convert/anchor-keywords/src/main/java/nu/marginalia/atags/AnchorTextKeywords.java b/code/features-convert/anchor-keywords/java/nu/marginalia/atags/AnchorTextKeywords.java similarity index 100% rename from code/features-convert/anchor-keywords/src/main/java/nu/marginalia/atags/AnchorTextKeywords.java rename to code/features-convert/anchor-keywords/java/nu/marginalia/atags/AnchorTextKeywords.java diff --git a/code/features-convert/anchor-keywords/src/main/java/nu/marginalia/atags/model/DomainLinks.java b/code/features-convert/anchor-keywords/java/nu/marginalia/atags/model/DomainLinks.java similarity index 100% rename from code/features-convert/anchor-keywords/src/main/java/nu/marginalia/atags/model/DomainLinks.java rename to code/features-convert/anchor-keywords/java/nu/marginalia/atags/model/DomainLinks.java diff --git a/code/features-convert/anchor-keywords/src/main/java/nu/marginalia/atags/model/Link.java b/code/features-convert/anchor-keywords/java/nu/marginalia/atags/model/Link.java similarity index 100% rename from code/features-convert/anchor-keywords/src/main/java/nu/marginalia/atags/model/Link.java rename to code/features-convert/anchor-keywords/java/nu/marginalia/atags/model/Link.java diff --git a/code/features-convert/anchor-keywords/src/main/java/nu/marginalia/atags/model/LinkWithText.java b/code/features-convert/anchor-keywords/java/nu/marginalia/atags/model/LinkWithText.java similarity index 100% rename from code/features-convert/anchor-keywords/src/main/java/nu/marginalia/atags/model/LinkWithText.java rename to code/features-convert/anchor-keywords/java/nu/marginalia/atags/model/LinkWithText.java diff --git a/code/features-convert/anchor-keywords/src/main/java/nu/marginalia/atags/source/AnchorTagsImpl.java b/code/features-convert/anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsImpl.java similarity index 100% rename from code/features-convert/anchor-keywords/src/main/java/nu/marginalia/atags/source/AnchorTagsImpl.java rename to code/features-convert/anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsImpl.java diff --git a/code/features-convert/anchor-keywords/src/main/java/nu/marginalia/atags/source/AnchorTagsSource.java b/code/features-convert/anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsSource.java similarity index 100% rename from code/features-convert/anchor-keywords/src/main/java/nu/marginalia/atags/source/AnchorTagsSource.java rename to code/features-convert/anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsSource.java diff --git a/code/features-convert/anchor-keywords/src/main/java/nu/marginalia/atags/source/AnchorTagsSourceFactory.java b/code/features-convert/anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsSourceFactory.java similarity index 100% rename from code/features-convert/anchor-keywords/src/main/java/nu/marginalia/atags/source/AnchorTagsSourceFactory.java rename to code/features-convert/anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsSourceFactory.java diff --git a/code/features-convert/anchor-keywords/src/main/resources/atags-stop-list b/code/features-convert/anchor-keywords/resources/atags-stop-list similarity index 100% rename from code/features-convert/anchor-keywords/src/main/resources/atags-stop-list rename to code/features-convert/anchor-keywords/resources/atags-stop-list diff --git a/code/features-convert/anchor-keywords/src/test/java/nu/marginalia/atags/DomainAnchorTagsImplTest.java b/code/features-convert/anchor-keywords/test/nu/marginalia/atags/DomainAnchorTagsImplTest.java similarity index 100% rename from code/features-convert/anchor-keywords/src/test/java/nu/marginalia/atags/DomainAnchorTagsImplTest.java rename to code/features-convert/anchor-keywords/test/nu/marginalia/atags/DomainAnchorTagsImplTest.java diff --git a/code/features-convert/anchor-keywords/src/test/java/nu/marginalia/util/TestLanguageModels.java b/code/features-convert/anchor-keywords/test/nu/marginalia/util/TestLanguageModels.java similarity index 100% rename from code/features-convert/anchor-keywords/src/test/java/nu/marginalia/util/TestLanguageModels.java rename to code/features-convert/anchor-keywords/test/nu/marginalia/util/TestLanguageModels.java diff --git a/code/features-convert/data-extractors/build.gradle b/code/features-convert/data-extractors/build.gradle index 67c42633..6fd5671d 100644 --- a/code/features-convert/data-extractors/build.gradle +++ b/code/features-convert/data-extractors/build.gradle @@ -13,6 +13,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:config') implementation project(':code:common:process') diff --git a/code/features-convert/data-extractors/src/main/java/nu/marginalia/extractor/AtagExporter.java b/code/features-convert/data-extractors/java/nu/marginalia/extractor/AtagExporter.java similarity index 99% rename from code/features-convert/data-extractors/src/main/java/nu/marginalia/extractor/AtagExporter.java rename to code/features-convert/data-extractors/java/nu/marginalia/extractor/AtagExporter.java index dc286323..3db0a284 100644 --- a/code/features-convert/data-extractors/src/main/java/nu/marginalia/extractor/AtagExporter.java +++ b/code/features-convert/data-extractors/java/nu/marginalia/extractor/AtagExporter.java @@ -48,7 +48,7 @@ public class AtagExporter implements ExporterIf { Path inputDir = storageService.getStorage(crawlId).asPath(); - try (var bw = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(Files.newOutputStream(tmpFile, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)))); + try (var bw = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(Files.newOutputStream(tmpFile, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)))) ) { Path crawlerLogFile = inputDir.resolve("crawler.log"); diff --git a/code/features-convert/data-extractors/src/main/java/nu/marginalia/extractor/ExporterIf.java b/code/features-convert/data-extractors/java/nu/marginalia/extractor/ExporterIf.java similarity index 100% rename from code/features-convert/data-extractors/src/main/java/nu/marginalia/extractor/ExporterIf.java rename to code/features-convert/data-extractors/java/nu/marginalia/extractor/ExporterIf.java diff --git a/code/features-convert/data-extractors/src/main/java/nu/marginalia/extractor/FeedExporter.java b/code/features-convert/data-extractors/java/nu/marginalia/extractor/FeedExporter.java similarity index 100% rename from code/features-convert/data-extractors/src/main/java/nu/marginalia/extractor/FeedExporter.java rename to code/features-convert/data-extractors/java/nu/marginalia/extractor/FeedExporter.java diff --git a/code/features-convert/data-extractors/src/main/java/nu/marginalia/extractor/SampleDataExporter.java b/code/features-convert/data-extractors/java/nu/marginalia/extractor/SampleDataExporter.java similarity index 100% rename from code/features-convert/data-extractors/src/main/java/nu/marginalia/extractor/SampleDataExporter.java rename to code/features-convert/data-extractors/java/nu/marginalia/extractor/SampleDataExporter.java diff --git a/code/features-convert/data-extractors/src/main/java/nu/marginalia/extractor/TermFrequencyExporter.java b/code/features-convert/data-extractors/java/nu/marginalia/extractor/TermFrequencyExporter.java similarity index 100% rename from code/features-convert/data-extractors/src/main/java/nu/marginalia/extractor/TermFrequencyExporter.java rename to code/features-convert/data-extractors/java/nu/marginalia/extractor/TermFrequencyExporter.java diff --git a/code/features-convert/data-extractors/readme.md b/code/features-convert/data-extractors/readme.md index d8c9fc0d..ea318e9f 100644 --- a/code/features-convert/data-extractors/readme.md +++ b/code/features-convert/data-extractors/readme.md @@ -2,6 +2,6 @@ Contains converter-*like* extraction jobs that operate on crawled data to produc ## Important classes -* [AtagExporter](src/main/java/nu/marginalia/extractor/AtagExporter.java) - extracts anchor texts from the crawled data. -* [FeedExporter](src/main/java/nu/marginalia/extractor/FeedExporter.java) - tries to find RSS/Atom feeds within the crawled data. -* [TermFrequencyExporter](src/main/java/nu/marginalia/extractor/TermFrequencyExporter.java) - exports the 'TF' part of TF-IDF. \ No newline at end of file +* [AtagExporter](java/nu/marginalia/extractor/AtagExporter.java) - extracts anchor texts from the crawled data. +* [FeedExporter](java/nu/marginalia/extractor/FeedExporter.java) - tries to find RSS/Atom feeds within the crawled data. +* [TermFrequencyExporter](java/nu/marginalia/extractor/TermFrequencyExporter.java) - exports the 'TF' part of TF-IDF. \ No newline at end of file diff --git a/code/features-convert/keyword-extraction/build.gradle b/code/features-convert/keyword-extraction/build.gradle index 9afcc61c..7912e246 100644 --- a/code/features-convert/keyword-extraction/build.gradle +++ b/code/features-convert/keyword-extraction/build.gradle @@ -10,6 +10,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':third-party:porterstemmer') implementation project(':code:common:model') diff --git a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/DocumentKeywordExtractor.java b/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/DocumentKeywordExtractor.java similarity index 100% rename from code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/DocumentKeywordExtractor.java rename to code/features-convert/keyword-extraction/java/nu/marginalia/keyword/DocumentKeywordExtractor.java diff --git a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/KeywordExtractor.java b/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/KeywordExtractor.java similarity index 100% rename from code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/KeywordExtractor.java rename to code/features-convert/keyword-extraction/java/nu/marginalia/keyword/KeywordExtractor.java diff --git a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/KeywordMetadata.java b/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/KeywordMetadata.java similarity index 100% rename from code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/KeywordMetadata.java rename to code/features-convert/keyword-extraction/java/nu/marginalia/keyword/KeywordMetadata.java diff --git a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/WordReps.java b/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/WordReps.java similarity index 100% rename from code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/WordReps.java rename to code/features-convert/keyword-extraction/java/nu/marginalia/keyword/WordReps.java diff --git a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/ArtifactKeywords.java b/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/ArtifactKeywords.java similarity index 100% rename from code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/ArtifactKeywords.java rename to code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/ArtifactKeywords.java diff --git a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/KeywordPositionBitmask.java b/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/KeywordPositionBitmask.java similarity index 100% rename from code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/KeywordPositionBitmask.java rename to code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/KeywordPositionBitmask.java diff --git a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/NameLikeKeywords.java b/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/NameLikeKeywords.java similarity index 100% rename from code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/NameLikeKeywords.java rename to code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/NameLikeKeywords.java diff --git a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/SubjectLikeKeywords.java b/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/SubjectLikeKeywords.java similarity index 100% rename from code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/SubjectLikeKeywords.java rename to code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/SubjectLikeKeywords.java diff --git a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/TitleKeywords.java b/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/TitleKeywords.java similarity index 100% rename from code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/TitleKeywords.java rename to code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/TitleKeywords.java diff --git a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/UrlKeywords.java b/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/UrlKeywords.java similarity index 96% rename from code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/UrlKeywords.java rename to code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/UrlKeywords.java index 0b91a050..83c17078 100644 --- a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/UrlKeywords.java +++ b/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/UrlKeywords.java @@ -1,7 +1,6 @@ package nu.marginalia.keyword.extractors; import ca.rmen.porterstemmer.PorterStemmer; -import nu.marginalia.model.EdgeDomain; import nu.marginalia.model.EdgeUrl; import java.util.Arrays; diff --git a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/WordsTfIdfCounts.java b/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/WordsTfIdfCounts.java similarity index 97% rename from code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/WordsTfIdfCounts.java rename to code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/WordsTfIdfCounts.java index 559f69a6..8904e16e 100644 --- a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/extractors/WordsTfIdfCounts.java +++ b/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/WordsTfIdfCounts.java @@ -3,10 +3,8 @@ package nu.marginalia.keyword.extractors; import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; import nu.marginalia.keyword.WordReps; import nu.marginalia.language.model.DocumentLanguageData; -import nu.marginalia.language.model.DocumentSentence; import nu.marginalia.language.model.WordRep; import nu.marginalia.keyword.KeywordExtractor; -import nu.marginalia.language.model.WordSpan; import nu.marginalia.term_frequency_dict.TermFrequencyDict; import org.apache.commons.lang3.StringUtils; diff --git a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/model/DocumentKeywords.java b/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/model/DocumentKeywords.java similarity index 100% rename from code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/model/DocumentKeywords.java rename to code/features-convert/keyword-extraction/java/nu/marginalia/keyword/model/DocumentKeywords.java diff --git a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/model/DocumentKeywordsBuilder.java b/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/model/DocumentKeywordsBuilder.java similarity index 100% rename from code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/model/DocumentKeywordsBuilder.java rename to code/features-convert/keyword-extraction/java/nu/marginalia/keyword/model/DocumentKeywordsBuilder.java diff --git a/code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/model/DocumentKeywordsPointer.java b/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/model/DocumentKeywordsPointer.java similarity index 100% rename from code/features-convert/keyword-extraction/src/main/java/nu/marginalia/keyword/model/DocumentKeywordsPointer.java rename to code/features-convert/keyword-extraction/java/nu/marginalia/keyword/model/DocumentKeywordsPointer.java diff --git a/code/features-convert/keyword-extraction/readme.md b/code/features-convert/keyword-extraction/readme.md index 17ad8600..a9c04962 100644 --- a/code/features-convert/keyword-extraction/readme.md +++ b/code/features-convert/keyword-extraction/readme.md @@ -6,8 +6,8 @@ functions based on [POS tags](https://www.ling.upenn.edu/courses/Fall_2003/ling0 ## Central Classes -* [DocumentKeywordExtractor](src/main/java/nu/marginalia/keyword/DocumentKeywordExtractor.java) -* [KeywordMetadata](src/main/java/nu/marginalia/keyword/KeywordMetadata.java) +* [DocumentKeywordExtractor](java/nu/marginalia/keyword/DocumentKeywordExtractor.java) +* [KeywordMetadata](java/nu/marginalia/keyword/KeywordMetadata.java) ## See Also diff --git a/code/features-convert/keyword-extraction/src/test/resources/test-data/java.html b/code/features-convert/keyword-extraction/test-resources/test-data/java.html similarity index 100% rename from code/features-convert/keyword-extraction/src/test/resources/test-data/java.html rename to code/features-convert/keyword-extraction/test-resources/test-data/java.html diff --git a/code/features-convert/keyword-extraction/src/test/resources/test-data/keyboards.html b/code/features-convert/keyword-extraction/test-resources/test-data/keyboards.html similarity index 100% rename from code/features-convert/keyword-extraction/src/test/resources/test-data/keyboards.html rename to code/features-convert/keyword-extraction/test-resources/test-data/keyboards.html diff --git a/code/features-convert/keyword-extraction/src/test/resources/test-data/madonna.html b/code/features-convert/keyword-extraction/test-resources/test-data/madonna.html similarity index 100% rename from code/features-convert/keyword-extraction/src/test/resources/test-data/madonna.html rename to code/features-convert/keyword-extraction/test-resources/test-data/madonna.html diff --git a/code/features-convert/keyword-extraction/src/test/resources/test-data/spam.html b/code/features-convert/keyword-extraction/test-resources/test-data/spam.html similarity index 100% rename from code/features-convert/keyword-extraction/src/test/resources/test-data/spam.html rename to code/features-convert/keyword-extraction/test-resources/test-data/spam.html diff --git a/code/features-convert/keyword-extraction/src/test/java/nu/marginalia/keyword/DocumentKeywordExtractorTest.java b/code/features-convert/keyword-extraction/test/nu/marginalia/keyword/DocumentKeywordExtractorTest.java similarity index 100% rename from code/features-convert/keyword-extraction/src/test/java/nu/marginalia/keyword/DocumentKeywordExtractorTest.java rename to code/features-convert/keyword-extraction/test/nu/marginalia/keyword/DocumentKeywordExtractorTest.java diff --git a/code/features-convert/keyword-extraction/src/test/java/nu/marginalia/keyword/SentenceExtractorTest.java b/code/features-convert/keyword-extraction/test/nu/marginalia/keyword/SentenceExtractorTest.java similarity index 92% rename from code/features-convert/keyword-extraction/src/test/java/nu/marginalia/keyword/SentenceExtractorTest.java rename to code/features-convert/keyword-extraction/test/nu/marginalia/keyword/SentenceExtractorTest.java index c561eae7..dabad6d1 100644 --- a/code/features-convert/keyword-extraction/src/test/java/nu/marginalia/keyword/SentenceExtractorTest.java +++ b/code/features-convert/keyword-extraction/test/nu/marginalia/keyword/SentenceExtractorTest.java @@ -2,23 +2,17 @@ package nu.marginalia.keyword; import lombok.SneakyThrows; import nu.marginalia.LanguageModels; -import nu.marginalia.language.model.WordRep; -import nu.marginalia.language.model.WordSpan; import nu.marginalia.language.sentence.SentenceExtractor; import nu.marginalia.term_frequency_dict.TermFrequencyDict; -import nu.marginalia.language.model.WordSeparator; import nu.marginalia.WmsaHome; import nu.marginalia.model.EdgeUrl; import nu.marginalia.test.util.TestLanguageModels; import org.jsoup.Jsoup; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; import java.io.IOException; import java.nio.file.Files; -import java.nio.file.Path; import java.util.*; import java.util.regex.Pattern; diff --git a/code/features-convert/keyword-extraction/src/test/java/nu/marginalia/keyword/extractors/ArtifactKeywordsTest.java b/code/features-convert/keyword-extraction/test/nu/marginalia/keyword/extractors/ArtifactKeywordsTest.java similarity index 100% rename from code/features-convert/keyword-extraction/src/test/java/nu/marginalia/keyword/extractors/ArtifactKeywordsTest.java rename to code/features-convert/keyword-extraction/test/nu/marginalia/keyword/extractors/ArtifactKeywordsTest.java diff --git a/code/features-convert/keyword-extraction/src/test/java/nu/marginalia/keyword/extractors/NameLikeKeywordsTest.java b/code/features-convert/keyword-extraction/test/nu/marginalia/keyword/extractors/NameLikeKeywordsTest.java similarity index 100% rename from code/features-convert/keyword-extraction/src/test/java/nu/marginalia/keyword/extractors/NameLikeKeywordsTest.java rename to code/features-convert/keyword-extraction/test/nu/marginalia/keyword/extractors/NameLikeKeywordsTest.java diff --git a/code/features-convert/keyword-extraction/src/test/java/nu/marginalia/keyword/extractors/SubjectLikeKeywordsTest.java b/code/features-convert/keyword-extraction/test/nu/marginalia/keyword/extractors/SubjectLikeKeywordsTest.java similarity index 100% rename from code/features-convert/keyword-extraction/src/test/java/nu/marginalia/keyword/extractors/SubjectLikeKeywordsTest.java rename to code/features-convert/keyword-extraction/test/nu/marginalia/keyword/extractors/SubjectLikeKeywordsTest.java diff --git a/code/features-convert/keyword-extraction/src/test/java/nu/marginalia/keyword/extractors/TitleKeywordsTest.java b/code/features-convert/keyword-extraction/test/nu/marginalia/keyword/extractors/TitleKeywordsTest.java similarity index 100% rename from code/features-convert/keyword-extraction/src/test/java/nu/marginalia/keyword/extractors/TitleKeywordsTest.java rename to code/features-convert/keyword-extraction/test/nu/marginalia/keyword/extractors/TitleKeywordsTest.java diff --git a/code/features-convert/keyword-extraction/src/test/java/nu/marginalia/keyword/extractors/UrlKeywordsTest.java b/code/features-convert/keyword-extraction/test/nu/marginalia/keyword/extractors/UrlKeywordsTest.java similarity index 100% rename from code/features-convert/keyword-extraction/src/test/java/nu/marginalia/keyword/extractors/UrlKeywordsTest.java rename to code/features-convert/keyword-extraction/test/nu/marginalia/keyword/extractors/UrlKeywordsTest.java diff --git a/code/features-convert/keyword-extraction/src/test/java/nu/marginalia/test/util/TestLanguageModels.java b/code/features-convert/keyword-extraction/test/nu/marginalia/test/util/TestLanguageModels.java similarity index 100% rename from code/features-convert/keyword-extraction/src/test/java/nu/marginalia/test/util/TestLanguageModels.java rename to code/features-convert/keyword-extraction/test/nu/marginalia/test/util/TestLanguageModels.java diff --git a/code/features-convert/pubdate/build.gradle b/code/features-convert/pubdate/build.gradle index 0a08ea74..bebd3d8e 100644 --- a/code/features-convert/pubdate/build.gradle +++ b/code/features-convert/pubdate/build.gradle @@ -13,6 +13,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:model') diff --git a/code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/PubDateEffortLevel.java b/code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateEffortLevel.java similarity index 100% rename from code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/PubDateEffortLevel.java rename to code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateEffortLevel.java diff --git a/code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/PubDateFromHtmlStandard.java b/code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateFromHtmlStandard.java similarity index 100% rename from code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/PubDateFromHtmlStandard.java rename to code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateFromHtmlStandard.java diff --git a/code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/PubDateHeuristic.java b/code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateHeuristic.java similarity index 100% rename from code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/PubDateHeuristic.java rename to code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateHeuristic.java diff --git a/code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/PubDateParser.java b/code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateParser.java similarity index 100% rename from code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/PubDateParser.java rename to code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateParser.java diff --git a/code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/PubDateSniffer.java b/code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateSniffer.java similarity index 100% rename from code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/PubDateSniffer.java rename to code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateSniffer.java diff --git a/code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicDOMParsingPass1.java b/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicDOMParsingPass1.java similarity index 100% rename from code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicDOMParsingPass1.java rename to code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicDOMParsingPass1.java diff --git a/code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicDOMParsingPass2.java b/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicDOMParsingPass2.java similarity index 100% rename from code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicDOMParsingPass2.java rename to code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicDOMParsingPass2.java diff --git a/code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicGuessFromHtmlStandard.java b/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicGuessFromHtmlStandard.java similarity index 100% rename from code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicGuessFromHtmlStandard.java rename to code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicGuessFromHtmlStandard.java diff --git a/code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicHtml5AnyTimeTag.java b/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicHtml5AnyTimeTag.java similarity index 100% rename from code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicHtml5AnyTimeTag.java rename to code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicHtml5AnyTimeTag.java diff --git a/code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicHtml5ArticleDateTag.java b/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicHtml5ArticleDateTag.java similarity index 100% rename from code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicHtml5ArticleDateTag.java rename to code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicHtml5ArticleDateTag.java diff --git a/code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicHtml5ItempropDateTag.java b/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicHtml5ItempropDateTag.java similarity index 100% rename from code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicHtml5ItempropDateTag.java rename to code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicHtml5ItempropDateTag.java diff --git a/code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicJSONLD.java b/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicJSONLD.java similarity index 100% rename from code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicJSONLD.java rename to code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicJSONLD.java diff --git a/code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicLastModified.java b/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicLastModified.java similarity index 100% rename from code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicLastModified.java rename to code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicLastModified.java diff --git a/code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicMicrodata.java b/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicMicrodata.java similarity index 100% rename from code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicMicrodata.java rename to code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicMicrodata.java diff --git a/code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicOpenGraph.java b/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicOpenGraph.java similarity index 100% rename from code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicOpenGraph.java rename to code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicOpenGraph.java diff --git a/code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicRDFaTag.java b/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicRDFaTag.java similarity index 100% rename from code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicRDFaTag.java rename to code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicRDFaTag.java diff --git a/code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicUrlPatternPass1.java b/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicUrlPatternPass1.java similarity index 100% rename from code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicUrlPatternPass1.java rename to code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicUrlPatternPass1.java diff --git a/code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicUrlPatternPass2.java b/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicUrlPatternPass2.java similarity index 100% rename from code/features-convert/pubdate/src/main/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicUrlPatternPass2.java rename to code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicUrlPatternPass2.java diff --git a/code/features-convert/pubdate/readme.md b/code/features-convert/pubdate/readme.md index 40f28710..add657ee 100644 --- a/code/features-convert/pubdate/readme.md +++ b/code/features-convert/pubdate/readme.md @@ -4,4 +4,4 @@ Contains advanced haruspicy for figuring out when a document was published. ## Central Classes -* [PubDateSniffer](src/main/java/nu/marginalia/pubdate/PubDateSniffer.java) \ No newline at end of file +* [PubDateSniffer](java/nu/marginalia/pubdate/PubDateSniffer.java) \ No newline at end of file diff --git a/code/features-convert/pubdate/src/test/java/nu/marginalia/pubdate/PubDateSnifferTest.java b/code/features-convert/pubdate/test/nu/marginalia/pubdate/PubDateSnifferTest.java similarity index 100% rename from code/features-convert/pubdate/src/test/java/nu/marginalia/pubdate/PubDateSnifferTest.java rename to code/features-convert/pubdate/test/nu/marginalia/pubdate/PubDateSnifferTest.java diff --git a/code/features-convert/pubdate/src/test/java/nu/marginalia/pubdate/PubDateTest.java b/code/features-convert/pubdate/test/nu/marginalia/pubdate/PubDateTest.java similarity index 100% rename from code/features-convert/pubdate/src/test/java/nu/marginalia/pubdate/PubDateTest.java rename to code/features-convert/pubdate/test/nu/marginalia/pubdate/PubDateTest.java diff --git a/code/features-convert/reddit-json/build.gradle b/code/features-convert/reddit-json/build.gradle index 08420127..a9aa3bd6 100644 --- a/code/features-convert/reddit-json/build.gradle +++ b/code/features-convert/reddit-json/build.gradle @@ -10,6 +10,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation libs.bundles.slf4j diff --git a/code/features-convert/reddit-json/src/main/java/nu/marginalia/integration/reddit/RedditEntryReader.java b/code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/RedditEntryReader.java similarity index 100% rename from code/features-convert/reddit-json/src/main/java/nu/marginalia/integration/reddit/RedditEntryReader.java rename to code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/RedditEntryReader.java diff --git a/code/features-convert/reddit-json/src/main/java/nu/marginalia/integration/reddit/db/RedditDb.java b/code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/db/RedditDb.java similarity index 100% rename from code/features-convert/reddit-json/src/main/java/nu/marginalia/integration/reddit/db/RedditDb.java rename to code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/db/RedditDb.java diff --git a/code/features-convert/reddit-json/src/main/java/nu/marginalia/integration/reddit/model/ProcessableRedditComment.java b/code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/model/ProcessableRedditComment.java similarity index 100% rename from code/features-convert/reddit-json/src/main/java/nu/marginalia/integration/reddit/model/ProcessableRedditComment.java rename to code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/model/ProcessableRedditComment.java diff --git a/code/features-convert/reddit-json/src/main/java/nu/marginalia/integration/reddit/model/ProcessableRedditSubmission.java b/code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/model/ProcessableRedditSubmission.java similarity index 100% rename from code/features-convert/reddit-json/src/main/java/nu/marginalia/integration/reddit/model/ProcessableRedditSubmission.java rename to code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/model/ProcessableRedditSubmission.java diff --git a/code/features-convert/reddit-json/src/main/java/nu/marginalia/integration/reddit/model/RawRedditComment.java b/code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/model/RawRedditComment.java similarity index 100% rename from code/features-convert/reddit-json/src/main/java/nu/marginalia/integration/reddit/model/RawRedditComment.java rename to code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/model/RawRedditComment.java diff --git a/code/features-convert/reddit-json/src/main/java/nu/marginalia/integration/reddit/model/RawRedditSubmission.java b/code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/model/RawRedditSubmission.java similarity index 100% rename from code/features-convert/reddit-json/src/main/java/nu/marginalia/integration/reddit/model/RawRedditSubmission.java rename to code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/model/RawRedditSubmission.java diff --git a/code/features-convert/reddit-json/src/main/resources/db/reddit.sql b/code/features-convert/reddit-json/resources/db/reddit.sql similarity index 100% rename from code/features-convert/reddit-json/src/main/resources/db/reddit.sql rename to code/features-convert/reddit-json/resources/db/reddit.sql diff --git a/code/features-convert/reddit-json/src/test/java/nu/marginalia/integration/reddit/RedditEntryReaderTest.java b/code/features-convert/reddit-json/test/nu/marginalia/integration/reddit/RedditEntryReaderTest.java similarity index 95% rename from code/features-convert/reddit-json/src/test/java/nu/marginalia/integration/reddit/RedditEntryReaderTest.java rename to code/features-convert/reddit-json/test/nu/marginalia/integration/reddit/RedditEntryReaderTest.java index 7201eb33..f8f2cff7 100644 --- a/code/features-convert/reddit-json/src/test/java/nu/marginalia/integration/reddit/RedditEntryReaderTest.java +++ b/code/features-convert/reddit-json/test/nu/marginalia/integration/reddit/RedditEntryReaderTest.java @@ -5,9 +5,6 @@ import org.junit.jupiter.api.Test; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.time.Instant; -import java.time.LocalDate; -import java.time.ZoneOffset; class RedditEntryReaderTest { /** This test case exists for debugging, reddit sideloading. It requires local reddit data, diff --git a/code/features-convert/reddit-json/src/test/java/nu/marginalia/integration/reddit/db/RedditDbTest.java b/code/features-convert/reddit-json/test/nu/marginalia/integration/reddit/db/RedditDbTest.java similarity index 97% rename from code/features-convert/reddit-json/src/test/java/nu/marginalia/integration/reddit/db/RedditDbTest.java rename to code/features-convert/reddit-json/test/nu/marginalia/integration/reddit/db/RedditDbTest.java index bff2aab6..3670e7cf 100644 --- a/code/features-convert/reddit-json/src/test/java/nu/marginalia/integration/reddit/db/RedditDbTest.java +++ b/code/features-convert/reddit-json/test/nu/marginalia/integration/reddit/db/RedditDbTest.java @@ -10,8 +10,6 @@ import java.time.Instant; import java.time.LocalDate; import java.time.ZoneOffset; -import static org.junit.jupiter.api.Assertions.*; - class RedditDbTest { @Disabled diff --git a/code/features-convert/stackexchange-xml/build.gradle b/code/features-convert/stackexchange-xml/build.gradle index 8126187d..d693047b 100644 --- a/code/features-convert/stackexchange-xml/build.gradle +++ b/code/features-convert/stackexchange-xml/build.gradle @@ -10,6 +10,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation libs.bundles.slf4j diff --git a/code/features-convert/stackexchange-xml/src/main/java/nu/marginalia/integration/stackexchange/model/StackExchangeComment.java b/code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/model/StackExchangeComment.java similarity index 100% rename from code/features-convert/stackexchange-xml/src/main/java/nu/marginalia/integration/stackexchange/model/StackExchangeComment.java rename to code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/model/StackExchangeComment.java diff --git a/code/features-convert/stackexchange-xml/src/main/java/nu/marginalia/integration/stackexchange/model/StackExchangePost.java b/code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/model/StackExchangePost.java similarity index 100% rename from code/features-convert/stackexchange-xml/src/main/java/nu/marginalia/integration/stackexchange/model/StackExchangePost.java rename to code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/model/StackExchangePost.java diff --git a/code/features-convert/stackexchange-xml/src/main/java/nu/marginalia/integration/stackexchange/sqlite/StackExchangePostsDb.java b/code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/sqlite/StackExchangePostsDb.java similarity index 100% rename from code/features-convert/stackexchange-xml/src/main/java/nu/marginalia/integration/stackexchange/sqlite/StackExchangePostsDb.java rename to code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/sqlite/StackExchangePostsDb.java diff --git a/code/features-convert/stackexchange-xml/src/main/java/nu/marginalia/integration/stackexchange/xml/StackExchange7zXmlEventReaderSource.java b/code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/xml/StackExchange7zXmlEventReaderSource.java similarity index 100% rename from code/features-convert/stackexchange-xml/src/main/java/nu/marginalia/integration/stackexchange/xml/StackExchange7zXmlEventReaderSource.java rename to code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/xml/StackExchange7zXmlEventReaderSource.java diff --git a/code/features-convert/stackexchange-xml/src/main/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlCommentReader.java b/code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlCommentReader.java similarity index 100% rename from code/features-convert/stackexchange-xml/src/main/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlCommentReader.java rename to code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlCommentReader.java diff --git a/code/features-convert/stackexchange-xml/src/main/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlIterator.java b/code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlIterator.java similarity index 100% rename from code/features-convert/stackexchange-xml/src/main/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlIterator.java rename to code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlIterator.java diff --git a/code/features-convert/stackexchange-xml/src/main/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlPostReader.java b/code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlPostReader.java similarity index 100% rename from code/features-convert/stackexchange-xml/src/main/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlPostReader.java rename to code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlPostReader.java diff --git a/code/features-convert/stackexchange-xml/src/main/java/nu/marginalia/integration/stackexchange/xml/XmlEventReaderSource.java b/code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/xml/XmlEventReaderSource.java similarity index 100% rename from code/features-convert/stackexchange-xml/src/main/java/nu/marginalia/integration/stackexchange/xml/XmlEventReaderSource.java rename to code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/xml/XmlEventReaderSource.java diff --git a/code/features-convert/stackexchange-xml/readme.md b/code/features-convert/stackexchange-xml/readme.md index 8af6d05a..1701ad7f 100644 --- a/code/features-convert/stackexchange-xml/readme.md +++ b/code/features-convert/stackexchange-xml/readme.md @@ -16,4 +16,3 @@ holistically, not by question or answer, it is necessary to re-arrange the data (which is very large). SQLite does a decent job of enabling this task. -See [tools/stackexchange-converter](../../tools/stackexchange-converter). \ No newline at end of file diff --git a/code/features-convert/stackexchange-xml/src/main/resources/db/stackexchange.sql b/code/features-convert/stackexchange-xml/resources/db/stackexchange.sql similarity index 100% rename from code/features-convert/stackexchange-xml/src/main/resources/db/stackexchange.sql rename to code/features-convert/stackexchange-xml/resources/db/stackexchange.sql diff --git a/code/features-convert/stackexchange-xml/src/test/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlCommentReaderTest.java b/code/features-convert/stackexchange-xml/test/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlCommentReaderTest.java similarity index 100% rename from code/features-convert/stackexchange-xml/src/test/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlCommentReaderTest.java rename to code/features-convert/stackexchange-xml/test/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlCommentReaderTest.java diff --git a/code/features-convert/stackexchange-xml/src/test/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlPostReaderTest.java b/code/features-convert/stackexchange-xml/test/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlPostReaderTest.java similarity index 100% rename from code/features-convert/stackexchange-xml/src/test/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlPostReaderTest.java rename to code/features-convert/stackexchange-xml/test/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlPostReaderTest.java diff --git a/code/features-convert/stackexchange-xml/src/test/java/nu/marginalia/integration/stackexchange/xml/StringXmlTestEventReader.java b/code/features-convert/stackexchange-xml/test/nu/marginalia/integration/stackexchange/xml/StringXmlTestEventReader.java similarity index 100% rename from code/features-convert/stackexchange-xml/src/test/java/nu/marginalia/integration/stackexchange/xml/StringXmlTestEventReader.java rename to code/features-convert/stackexchange-xml/test/nu/marginalia/integration/stackexchange/xml/StringXmlTestEventReader.java diff --git a/code/features-convert/summary-extraction/build.gradle b/code/features-convert/summary-extraction/build.gradle index 7ff4f23c..bf6a87c5 100644 --- a/code/features-convert/summary-extraction/build.gradle +++ b/code/features-convert/summary-extraction/build.gradle @@ -10,6 +10,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation libs.bundles.slf4j diff --git a/code/features-convert/summary-extraction/src/main/java/nu/marginalia/summary/SummaryExtractor.java b/code/features-convert/summary-extraction/java/nu/marginalia/summary/SummaryExtractor.java similarity index 100% rename from code/features-convert/summary-extraction/src/main/java/nu/marginalia/summary/SummaryExtractor.java rename to code/features-convert/summary-extraction/java/nu/marginalia/summary/SummaryExtractor.java diff --git a/code/features-convert/summary-extraction/src/main/java/nu/marginalia/summary/heuristic/DomFilterHeuristic.java b/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/DomFilterHeuristic.java similarity index 100% rename from code/features-convert/summary-extraction/src/main/java/nu/marginalia/summary/heuristic/DomFilterHeuristic.java rename to code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/DomFilterHeuristic.java diff --git a/code/features-convert/summary-extraction/src/main/java/nu/marginalia/summary/heuristic/FallbackHeuristic.java b/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/FallbackHeuristic.java similarity index 100% rename from code/features-convert/summary-extraction/src/main/java/nu/marginalia/summary/heuristic/FallbackHeuristic.java rename to code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/FallbackHeuristic.java diff --git a/code/features-convert/summary-extraction/src/main/java/nu/marginalia/summary/heuristic/HeuristicTextUtil.java b/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/HeuristicTextUtil.java similarity index 100% rename from code/features-convert/summary-extraction/src/main/java/nu/marginalia/summary/heuristic/HeuristicTextUtil.java rename to code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/HeuristicTextUtil.java diff --git a/code/features-convert/summary-extraction/src/main/java/nu/marginalia/summary/heuristic/MetaDescriptionHeuristic.java b/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/MetaDescriptionHeuristic.java similarity index 100% rename from code/features-convert/summary-extraction/src/main/java/nu/marginalia/summary/heuristic/MetaDescriptionHeuristic.java rename to code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/MetaDescriptionHeuristic.java diff --git a/code/features-convert/summary-extraction/src/main/java/nu/marginalia/summary/heuristic/OpenGraphDescriptionHeuristic.java b/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/OpenGraphDescriptionHeuristic.java similarity index 100% rename from code/features-convert/summary-extraction/src/main/java/nu/marginalia/summary/heuristic/OpenGraphDescriptionHeuristic.java rename to code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/OpenGraphDescriptionHeuristic.java diff --git a/code/features-convert/summary-extraction/src/main/java/nu/marginalia/summary/heuristic/SummarizingDOMFilter.java b/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/SummarizingDOMFilter.java similarity index 100% rename from code/features-convert/summary-extraction/src/main/java/nu/marginalia/summary/heuristic/SummarizingDOMFilter.java rename to code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/SummarizingDOMFilter.java diff --git a/code/features-convert/summary-extraction/src/main/java/nu/marginalia/summary/heuristic/SummaryHeuristic.java b/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/SummaryHeuristic.java similarity index 100% rename from code/features-convert/summary-extraction/src/main/java/nu/marginalia/summary/heuristic/SummaryHeuristic.java rename to code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/SummaryHeuristic.java diff --git a/code/features-convert/summary-extraction/src/main/java/nu/marginalia/summary/heuristic/TagDensityHeuristic.java b/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/TagDensityHeuristic.java similarity index 100% rename from code/features-convert/summary-extraction/src/main/java/nu/marginalia/summary/heuristic/TagDensityHeuristic.java rename to code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/TagDensityHeuristic.java diff --git a/code/features-convert/summary-extraction/readme.md b/code/features-convert/summary-extraction/readme.md index 1aa38a42..b617d947 100644 --- a/code/features-convert/summary-extraction/readme.md +++ b/code/features-convert/summary-extraction/readme.md @@ -21,5 +21,5 @@ order of a 100,000,000 documents with a time budget of a couple of hours. ## Central Classes -* [SummaryExtractor](src/main/java/nu/marginalia/summary/SummaryExtractor.java) +* [SummaryExtractor](java/nu/marginalia/summary/SummaryExtractor.java) diff --git a/code/features-convert/summary-extraction/src/test/resources/html/monadnock.html b/code/features-convert/summary-extraction/test-resources/html/monadnock.html similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/monadnock.html rename to code/features-convert/summary-extraction/test-resources/html/monadnock.html diff --git a/code/features-convert/summary-extraction/src/test/resources/html/readme.md b/code/features-convert/summary-extraction/test-resources/html/readme.md similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/readme.md rename to code/features-convert/summary-extraction/test-resources/html/readme.md diff --git a/code/features-convert/summary-extraction/src/test/resources/html/summarization/187.shtml b/code/features-convert/summary-extraction/test-resources/html/summarization/187.shtml similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/summarization/187.shtml rename to code/features-convert/summary-extraction/test-resources/html/summarization/187.shtml diff --git a/code/features-convert/summary-extraction/src/test/resources/html/summarization/surrey.html b/code/features-convert/summary-extraction/test-resources/html/summarization/surrey.html similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/summarization/surrey.html rename to code/features-convert/summary-extraction/test-resources/html/summarization/surrey.html diff --git a/code/features-convert/summary-extraction/src/test/resources/html/summarization/surrey.html.1 b/code/features-convert/summary-extraction/test-resources/html/summarization/surrey.html.1 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/summarization/surrey.html.1 rename to code/features-convert/summary-extraction/test-resources/html/summarization/surrey.html.1 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/theregister.html b/code/features-convert/summary-extraction/test-resources/html/theregister.html similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/theregister.html rename to code/features-convert/summary-extraction/test-resources/html/theregister.html diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/index b/code/features-convert/summary-extraction/test-resources/html/work-set/index similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/index rename to code/features-convert/summary-extraction/test-resources/html/work-set/index diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1021546012 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1021546012 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1021546012 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1021546012 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1028592943 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1028592943 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1028592943 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1028592943 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1081293162 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1081293162 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1081293162 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1081293162 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1105046394 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1105046394 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1105046394 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1105046394 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1146923296 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1146923296 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1146923296 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1146923296 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1194694074 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1194694074 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1194694074 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1194694074 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1207898281 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1207898281 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1207898281 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1207898281 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1268145073 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1268145073 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1268145073 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1268145073 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1294876331 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1294876331 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1294876331 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1294876331 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1314767420 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1314767420 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1314767420 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1314767420 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1316269786 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1316269786 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1316269786 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1316269786 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1316766580 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1316766580 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1316766580 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1316766580 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1319968043 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1319968043 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1319968043 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1319968043 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1338576987 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1338576987 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1338576987 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1338576987 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1341909571 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1341909571 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1341909571 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1341909571 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1369578579 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1369578579 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1369578579 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1369578579 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1437315645 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1437315645 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1437315645 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1437315645 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1458954960 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1458954960 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1458954960 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1458954960 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1475681345 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1475681345 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1475681345 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1475681345 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1498328446 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1498328446 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1498328446 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1498328446 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1507779664 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1507779664 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1507779664 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1507779664 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1540303379 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1540303379 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1540303379 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1540303379 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--154898476 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--154898476 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--154898476 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--154898476 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1552059399 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1552059399 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1552059399 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1552059399 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1557688340 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1557688340 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1557688340 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1557688340 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1584145751 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1584145751 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1584145751 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1584145751 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1605151204 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1605151204 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1605151204 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1605151204 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--162269247 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--162269247 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--162269247 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--162269247 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1624294488 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1624294488 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1624294488 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1624294488 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--164108285 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--164108285 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--164108285 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--164108285 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1645688243 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1645688243 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1645688243 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1645688243 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1658004609 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1658004609 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1658004609 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1658004609 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1658558834 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1658558834 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1658558834 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1658558834 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1698664879 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1698664879 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1698664879 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1698664879 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--169975195 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--169975195 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--169975195 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--169975195 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1701203332 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1701203332 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1701203332 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1701203332 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--17281998 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--17281998 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--17281998 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--17281998 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1742070028 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1742070028 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1742070028 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1742070028 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1745376814 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1745376814 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1745376814 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1745376814 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1749889035 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1749889035 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1749889035 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1749889035 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--176177364 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--176177364 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--176177364 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--176177364 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--177014197 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--177014197 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--177014197 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--177014197 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1794527707 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1794527707 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1794527707 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1794527707 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1797740201 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1797740201 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1797740201 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1797740201 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1799098579 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1799098579 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1799098579 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1799098579 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1959637826 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1959637826 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1959637826 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1959637826 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1971916964 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1971916964 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1971916964 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1971916964 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1985840368 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--1985840368 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--1985840368 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--1985840368 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--2012610859 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--2012610859 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--2012610859 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--2012610859 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--202178680 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--202178680 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--202178680 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--202178680 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--2043528727 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--2043528727 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--2043528727 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--2043528727 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--2081757477 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--2081757477 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--2081757477 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--2081757477 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--2103982576 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--2103982576 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--2103982576 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--2103982576 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--2111558769 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--2111558769 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--2111558769 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--2111558769 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--213168798 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--213168798 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--213168798 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--213168798 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--232544032 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--232544032 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--232544032 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--232544032 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--253010011 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--253010011 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--253010011 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--253010011 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--274250994 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--274250994 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--274250994 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--274250994 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--332442790 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--332442790 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--332442790 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--332442790 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--353437903 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--353437903 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--353437903 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--353437903 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--364546777 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--364546777 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--364546777 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--364546777 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--379129416 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--379129416 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--379129416 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--379129416 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--399428149 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--399428149 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--399428149 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--399428149 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--425233170 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--425233170 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--425233170 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--425233170 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--434612307 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--434612307 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--434612307 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--434612307 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--439772328 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--439772328 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--439772328 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--439772328 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--458002611 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--458002611 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--458002611 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--458002611 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--506010305 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--506010305 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--506010305 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--506010305 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--546773534 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--546773534 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--546773534 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--546773534 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--551288516 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--551288516 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--551288516 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--551288516 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--602577763 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--602577763 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--602577763 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--602577763 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--611668054 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--611668054 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--611668054 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--611668054 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--634771245 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--634771245 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--634771245 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--634771245 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--639320493 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--639320493 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--639320493 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--639320493 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--643179018 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--643179018 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--643179018 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--643179018 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--663772351 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--663772351 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--663772351 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--663772351 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--670789152 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--670789152 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--670789152 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--670789152 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--6797317 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--6797317 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--6797317 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--6797317 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--700978490 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--700978490 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--700978490 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--700978490 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--708035332 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--708035332 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--708035332 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--708035332 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--804917062 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--804917062 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--804917062 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--804917062 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--819771302 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--819771302 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--819771302 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--819771302 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--840796372 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--840796372 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--840796372 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--840796372 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--841445362 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--841445362 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--841445362 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--841445362 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--862385354 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--862385354 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--862385354 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--862385354 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--879796466 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--879796466 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--879796466 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--879796466 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--89134993 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--89134993 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--89134993 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--89134993 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--905197876 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--905197876 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--905197876 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--905197876 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--920328354 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--920328354 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--920328354 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--920328354 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--952827759 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--952827759 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--952827759 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--952827759 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--964018507 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--964018507 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--964018507 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--964018507 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url--972614909 b/code/features-convert/summary-extraction/test-resources/html/work-set/url--972614909 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url--972614909 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url--972614909 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-10088520 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-10088520 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-10088520 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-10088520 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1013281103 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1013281103 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1013281103 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1013281103 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1019241851 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1019241851 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1019241851 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1019241851 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1059944953 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1059944953 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1059944953 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1059944953 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1118681302 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1118681302 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1118681302 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1118681302 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1179298706 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1179298706 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1179298706 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1179298706 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1191749784 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1191749784 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1191749784 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1191749784 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1207094790 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1207094790 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1207094790 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1207094790 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1213989666 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1213989666 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1213989666 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1213989666 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1222442301 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1222442301 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1222442301 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1222442301 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-130332455 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-130332455 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-130332455 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-130332455 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1311055461 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1311055461 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1311055461 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1311055461 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1391842722 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1391842722 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1391842722 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1391842722 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1457388763 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1457388763 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1457388763 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1457388763 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1506356272 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1506356272 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1506356272 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1506356272 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1511762169 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1511762169 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1511762169 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1511762169 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1534640058 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1534640058 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1534640058 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1534640058 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1551513871 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1551513871 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1551513871 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1551513871 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1567632447 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1567632447 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1567632447 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1567632447 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1623049502 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1623049502 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1623049502 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1623049502 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-163919330 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-163919330 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-163919330 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-163919330 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1661398327 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1661398327 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1661398327 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1661398327 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1724309925 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1724309925 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1724309925 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1724309925 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1736807128 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1736807128 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1736807128 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1736807128 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1739031345 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1739031345 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1739031345 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1739031345 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1755745765 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1755745765 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1755745765 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1755745765 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1802811100 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1802811100 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1802811100 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1802811100 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1805364707 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1805364707 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1805364707 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1805364707 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1832702370 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1832702370 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1832702370 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1832702370 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1853114311 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1853114311 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1853114311 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1853114311 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1924872844 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1924872844 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1924872844 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1924872844 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-197772804 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-197772804 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-197772804 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-197772804 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1984259912 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1984259912 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1984259912 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1984259912 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1990903988 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-1990903988 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-1990903988 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-1990903988 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-2039310951 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-2039310951 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-2039310951 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-2039310951 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-2040857056 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-2040857056 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-2040857056 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-2040857056 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-2052613093 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-2052613093 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-2052613093 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-2052613093 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-2063899866 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-2063899866 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-2063899866 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-2063899866 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-2115548255 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-2115548255 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-2115548255 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-2115548255 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-2127148436 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-2127148436 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-2127148436 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-2127148436 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-2133781904 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-2133781904 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-2133781904 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-2133781904 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-225690385 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-225690385 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-225690385 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-225690385 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-226401955 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-226401955 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-226401955 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-226401955 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-262970770 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-262970770 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-262970770 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-262970770 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-30106798 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-30106798 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-30106798 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-30106798 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-302167335 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-302167335 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-302167335 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-302167335 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-327999153 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-327999153 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-327999153 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-327999153 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-332568225 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-332568225 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-332568225 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-332568225 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-343223418 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-343223418 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-343223418 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-343223418 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-383103932 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-383103932 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-383103932 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-383103932 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-412929678 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-412929678 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-412929678 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-412929678 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-475213997 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-475213997 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-475213997 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-475213997 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-483403121 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-483403121 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-483403121 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-483403121 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-488667993 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-488667993 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-488667993 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-488667993 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-50815201 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-50815201 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-50815201 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-50815201 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-522685905 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-522685905 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-522685905 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-522685905 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-570714305 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-570714305 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-570714305 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-570714305 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-58733529 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-58733529 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-58733529 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-58733529 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-616518304 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-616518304 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-616518304 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-616518304 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-662169426 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-662169426 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-662169426 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-662169426 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-677278788 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-677278788 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-677278788 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-677278788 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-690486170 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-690486170 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-690486170 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-690486170 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-709693331 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-709693331 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-709693331 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-709693331 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-734531556 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-734531556 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-734531556 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-734531556 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-767530276 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-767530276 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-767530276 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-767530276 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-783154014 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-783154014 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-783154014 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-783154014 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-796905237 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-796905237 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-796905237 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-796905237 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-800099955 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-800099955 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-800099955 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-800099955 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-804101946 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-804101946 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-804101946 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-804101946 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-830664902 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-830664902 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-830664902 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-830664902 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-876060686 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-876060686 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-876060686 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-876060686 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-892584998 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-892584998 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-892584998 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-892584998 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-942458463 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-942458463 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-942458463 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-942458463 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-952036171 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-952036171 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-952036171 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-952036171 diff --git a/code/features-convert/summary-extraction/src/test/resources/html/work-set/url-968207276 b/code/features-convert/summary-extraction/test-resources/html/work-set/url-968207276 similarity index 100% rename from code/features-convert/summary-extraction/src/test/resources/html/work-set/url-968207276 rename to code/features-convert/summary-extraction/test-resources/html/work-set/url-968207276 diff --git a/code/features-convert/summary-extraction/src/test/java/nu/marginalia/summary/SummaryExtractorTest.java b/code/features-convert/summary-extraction/test/nu/marginalia/summary/SummaryExtractorTest.java similarity index 100% rename from code/features-convert/summary-extraction/src/test/java/nu/marginalia/summary/SummaryExtractorTest.java rename to code/features-convert/summary-extraction/test/nu/marginalia/summary/SummaryExtractorTest.java diff --git a/code/features-convert/summary-extraction/src/test/java/nu/marginalia/summary/heuristic/HeuristicTextUtilTest.java b/code/features-convert/summary-extraction/test/nu/marginalia/summary/heuristic/HeuristicTextUtilTest.java similarity index 100% rename from code/features-convert/summary-extraction/src/test/java/nu/marginalia/summary/heuristic/HeuristicTextUtilTest.java rename to code/features-convert/summary-extraction/test/nu/marginalia/summary/heuristic/HeuristicTextUtilTest.java diff --git a/code/features-convert/topic-detection/build.gradle b/code/features-convert/topic-detection/build.gradle index 5836dc2b..49661c25 100644 --- a/code/features-convert/topic-detection/build.gradle +++ b/code/features-convert/topic-detection/build.gradle @@ -13,6 +13,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:config') implementation project(':code:libraries:language-processing') diff --git a/code/features-convert/topic-detection/src/main/java/nu/marginalia/topic/RecipeDetector.java b/code/features-convert/topic-detection/java/nu/marginalia/topic/RecipeDetector.java similarity index 100% rename from code/features-convert/topic-detection/src/main/java/nu/marginalia/topic/RecipeDetector.java rename to code/features-convert/topic-detection/java/nu/marginalia/topic/RecipeDetector.java diff --git a/code/features-convert/topic-detection/src/main/java/nu/marginalia/topic/TextileCraftDetector.java b/code/features-convert/topic-detection/java/nu/marginalia/topic/TextileCraftDetector.java similarity index 100% rename from code/features-convert/topic-detection/src/main/java/nu/marginalia/topic/TextileCraftDetector.java rename to code/features-convert/topic-detection/java/nu/marginalia/topic/TextileCraftDetector.java diff --git a/code/features-convert/topic-detection/src/main/java/nu/marginalia/topic/WoodworkingDetector.java b/code/features-convert/topic-detection/java/nu/marginalia/topic/WoodworkingDetector.java similarity index 100% rename from code/features-convert/topic-detection/src/main/java/nu/marginalia/topic/WoodworkingDetector.java rename to code/features-convert/topic-detection/java/nu/marginalia/topic/WoodworkingDetector.java diff --git a/code/features-crawl/content-type/build.gradle b/code/features-crawl/content-type/build.gradle index 73a155cb..c807b86e 100644 --- a/code/features-crawl/content-type/build.gradle +++ b/code/features-crawl/content-type/build.gradle @@ -11,6 +11,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:model') implementation libs.crawlercommons diff --git a/code/features-crawl/content-type/src/main/java/nu/marginalia/contenttype/ContentType.java b/code/features-crawl/content-type/java/nu/marginalia/contenttype/ContentType.java similarity index 100% rename from code/features-crawl/content-type/src/main/java/nu/marginalia/contenttype/ContentType.java rename to code/features-crawl/content-type/java/nu/marginalia/contenttype/ContentType.java diff --git a/code/features-crawl/content-type/src/main/java/nu/marginalia/contenttype/ContentTypeParser.java b/code/features-crawl/content-type/java/nu/marginalia/contenttype/ContentTypeParser.java similarity index 100% rename from code/features-crawl/content-type/src/main/java/nu/marginalia/contenttype/ContentTypeParser.java rename to code/features-crawl/content-type/java/nu/marginalia/contenttype/ContentTypeParser.java diff --git a/code/features-crawl/content-type/src/main/java/nu/marginalia/contenttype/DocumentBodyToString.java b/code/features-crawl/content-type/java/nu/marginalia/contenttype/DocumentBodyToString.java similarity index 100% rename from code/features-crawl/content-type/src/main/java/nu/marginalia/contenttype/DocumentBodyToString.java rename to code/features-crawl/content-type/java/nu/marginalia/contenttype/DocumentBodyToString.java diff --git a/code/features-crawl/content-type/src/test/java/nu/marginalia/contenttype/ContentTypeParserTest.java b/code/features-crawl/content-type/test/nu/marginalia/contenttype/ContentTypeParserTest.java similarity index 100% rename from code/features-crawl/content-type/src/test/java/nu/marginalia/contenttype/ContentTypeParserTest.java rename to code/features-crawl/content-type/test/nu/marginalia/contenttype/ContentTypeParserTest.java diff --git a/code/features-crawl/content-type/src/test/java/nu/marginalia/contenttype/DocumentBodyToStringTest.java b/code/features-crawl/content-type/test/nu/marginalia/contenttype/DocumentBodyToStringTest.java similarity index 100% rename from code/features-crawl/content-type/src/test/java/nu/marginalia/contenttype/DocumentBodyToStringTest.java rename to code/features-crawl/content-type/test/nu/marginalia/contenttype/DocumentBodyToStringTest.java diff --git a/code/features-crawl/crawl-blocklist/build.gradle b/code/features-crawl/crawl-blocklist/build.gradle index 8288aa0c..a6fc4f91 100644 --- a/code/features-crawl/crawl-blocklist/build.gradle +++ b/code/features-crawl/crawl-blocklist/build.gradle @@ -11,6 +11,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:model') implementation project(':code:common:config') @@ -20,7 +22,6 @@ dependencies { implementation libs.notnull implementation libs.bundles.gson - implementation libs.rxjava implementation libs.bundles.slf4j testImplementation libs.bundles.slf4j.test diff --git a/code/features-crawl/crawl-blocklist/src/main/java/nu/marginalia/ip_blocklist/GeoIpBlocklist.java b/code/features-crawl/crawl-blocklist/java/nu/marginalia/ip_blocklist/GeoIpBlocklist.java similarity index 100% rename from code/features-crawl/crawl-blocklist/src/main/java/nu/marginalia/ip_blocklist/GeoIpBlocklist.java rename to code/features-crawl/crawl-blocklist/java/nu/marginalia/ip_blocklist/GeoIpBlocklist.java diff --git a/code/features-crawl/crawl-blocklist/src/main/java/nu/marginalia/ip_blocklist/InetAddressCache.java b/code/features-crawl/crawl-blocklist/java/nu/marginalia/ip_blocklist/InetAddressCache.java similarity index 100% rename from code/features-crawl/crawl-blocklist/src/main/java/nu/marginalia/ip_blocklist/InetAddressCache.java rename to code/features-crawl/crawl-blocklist/java/nu/marginalia/ip_blocklist/InetAddressCache.java diff --git a/code/features-crawl/crawl-blocklist/src/main/java/nu/marginalia/ip_blocklist/IpBlockList.java b/code/features-crawl/crawl-blocklist/java/nu/marginalia/ip_blocklist/IpBlockList.java similarity index 100% rename from code/features-crawl/crawl-blocklist/src/main/java/nu/marginalia/ip_blocklist/IpBlockList.java rename to code/features-crawl/crawl-blocklist/java/nu/marginalia/ip_blocklist/IpBlockList.java diff --git a/code/features-crawl/crawl-blocklist/src/main/java/nu/marginalia/ip_blocklist/UrlBlocklist.java b/code/features-crawl/crawl-blocklist/java/nu/marginalia/ip_blocklist/UrlBlocklist.java similarity index 100% rename from code/features-crawl/crawl-blocklist/src/main/java/nu/marginalia/ip_blocklist/UrlBlocklist.java rename to code/features-crawl/crawl-blocklist/java/nu/marginalia/ip_blocklist/UrlBlocklist.java diff --git a/code/features-crawl/crawl-blocklist/readme.md b/code/features-crawl/crawl-blocklist/readme.md index 9db3912b..777f4260 100644 --- a/code/features-crawl/crawl-blocklist/readme.md +++ b/code/features-crawl/crawl-blocklist/readme.md @@ -4,6 +4,6 @@ Contains tools for blocking links from crawling. ## Central Classes -* [GeoIpBlocklist](src/main/java/nu/marginalia/ip_blocklist/GeoIpBlocklist.java) - country blocking -* [IpBlocklist](src/main/java/nu/marginalia/ip_blocklist/IpBlockList.java) - CIDR-based blocking -* [UrlBlocklist](src/main/java/nu/marginalia/ip_blocklist/UrlBlocklist.java) - URL pattern blocking \ No newline at end of file +* [GeoIpBlocklist](java/nu/marginalia/ip_blocklist/GeoIpBlocklist.java) - country blocking +* [IpBlocklist](java/nu/marginalia/ip_blocklist/IpBlockList.java) - CIDR-based blocking +* [UrlBlocklist](java/nu/marginalia/ip_blocklist/UrlBlocklist.java) - URL pattern blocking \ No newline at end of file diff --git a/code/features-crawl/crawl-blocklist/src/test/java/nu/marginalia/ip_blocklist/UrlBlocklistTest.java b/code/features-crawl/crawl-blocklist/test/nu/marginalia/ip_blocklist/UrlBlocklistTest.java similarity index 100% rename from code/features-crawl/crawl-blocklist/src/test/java/nu/marginalia/ip_blocklist/UrlBlocklistTest.java rename to code/features-crawl/crawl-blocklist/test/nu/marginalia/ip_blocklist/UrlBlocklistTest.java diff --git a/code/features-crawl/link-parser/build.gradle b/code/features-crawl/link-parser/build.gradle index dc551943..2dd04a5c 100644 --- a/code/features-crawl/link-parser/build.gradle +++ b/code/features-crawl/link-parser/build.gradle @@ -11,6 +11,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:model') diff --git a/code/features-crawl/link-parser/src/main/java/nu/marginalia/link_parser/FeedExtractor.java b/code/features-crawl/link-parser/java/nu/marginalia/link_parser/FeedExtractor.java similarity index 100% rename from code/features-crawl/link-parser/src/main/java/nu/marginalia/link_parser/FeedExtractor.java rename to code/features-crawl/link-parser/java/nu/marginalia/link_parser/FeedExtractor.java diff --git a/code/features-crawl/link-parser/src/main/java/nu/marginalia/link_parser/LinkParser.java b/code/features-crawl/link-parser/java/nu/marginalia/link_parser/LinkParser.java similarity index 99% rename from code/features-crawl/link-parser/src/main/java/nu/marginalia/link_parser/LinkParser.java rename to code/features-crawl/link-parser/java/nu/marginalia/link_parser/LinkParser.java index 08ccc221..652eab8d 100644 --- a/code/features-crawl/link-parser/src/main/java/nu/marginalia/link_parser/LinkParser.java +++ b/code/features-crawl/link-parser/java/nu/marginalia/link_parser/LinkParser.java @@ -209,7 +209,7 @@ public class LinkParser { } private boolean isUrlRelevant(String href) { - if (null == href || "".equals(href)) { + if (null == href || href.isEmpty()) { return false; } if (href.length() > 128) { diff --git a/code/features-crawl/link-parser/readme.md b/code/features-crawl/link-parser/readme.md index 55289227..2893ba87 100644 --- a/code/features-crawl/link-parser/readme.md +++ b/code/features-crawl/link-parser/readme.md @@ -5,4 +5,4 @@ pathological links, etc. ## Central Classes -* [LinkParser](src/main/java/nu/marginalia/link_parser/LinkParser.java) \ No newline at end of file +* [LinkParser](java/nu/marginalia/link_parser/LinkParser.java) \ No newline at end of file diff --git a/code/features-index/domain-ranking/readme.md b/code/features-index/domain-ranking/readme.md deleted file mode 100644 index 5dc4ec2c..00000000 --- a/code/features-index/domain-ranking/readme.md +++ /dev/null @@ -1,39 +0,0 @@ -# Domain Ranking - -Contains domain ranking algorithms. The domain ranking algorithms are based on -the JGraphT library. - -Two principal algorithms are available, the standard PageRank algorithm, -and personalized pagerank; each are available for two graphs, the link graph -and a similarity graph where each edge corresponds to the similarity between -the sets of incident links to two domains, their cosine similarity acting as -the weight of the links. - -With the standard PageRank algorithm, the similarity graph does not produce -anything useful, but something magical happens when you apply Personalized PageRank -to this graph. It turns into a very good "vibe"-sensitive ranking algorithm. - -It's unclear if this is a well known result, but it's a very interesting one -for creating a ranking algorithm that is focused on a particular segment of the web. - -## Central Classes - -* [PageRankDomainRanker](src/main/java/nu/marginalia/ranking/PageRankDomainRanker.java) - Ranks domains using the - PageRank or Personalized PageRank algorithm depending on whether a list of influence domains is provided. - -### Data sources - -* [LinkGraphSource](src/main/java/nu/marginalia/ranking/data/LinkGraphSource.java) - fetches the link graph -* [InvertedLinkGraphSource](src/main/java/nu/marginalia/ranking/data/InvertedLinkGraphSource.java) - fetches the inverted link graph -* [SimilarityGraphSource](src/main/java/nu/marginalia/ranking/data/SimilarityGraphSource.java) - fetches the similarity graph from the database - -Note that the similarity graph needs to be precomputed and stored in the database for -the similarity graph source to be available. - -## See Also - -* [result-ranking](../result-ranking) - Ranks search results - -## Useful Resources - -* [The PageRank Citation Ranking: Bringing Order to the Web](http://ilpubs.stanford.edu:8090/422/1/1999-66.pdf) diff --git a/code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ParamMatchingQueryFilter.java b/code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ParamMatchingQueryFilter.java deleted file mode 100644 index d7e6a9b3..00000000 --- a/code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ParamMatchingQueryFilter.java +++ /dev/null @@ -1,100 +0,0 @@ -package nu.marginalia.index.forward; - -import nu.marginalia.model.id.UrlIdCodec; -import nu.marginalia.model.idx.DocumentMetadata; -import nu.marginalia.index.query.limit.SpecificationLimitType; -import nu.marginalia.index.query.IndexQueryParams; -import nu.marginalia.index.query.filter.QueryFilterStepIf; - -public class ParamMatchingQueryFilter implements QueryFilterStepIf { - private final IndexQueryParams params; - private final ForwardIndexReader forwardIndexReader; - - public ParamMatchingQueryFilter(IndexQueryParams params, ForwardIndexReader forwardIndexReader) { - this.params = params; - this.forwardIndexReader = forwardIndexReader; - } - - @Override - public boolean test(long combinedId) { - long docId = UrlIdCodec.removeRank(combinedId); - int domainId = UrlIdCodec.getDomainId(docId); - - long meta = forwardIndexReader.getDocMeta(docId); - - if (!validateDomain(domainId, meta)) { - return false; - } - - if (!validateQuality(meta)) { - return false; - } - - if (!validateYear(meta)) { - return false; - } - - if (!validateSize(meta)) { - return false; - } - - if (!validateRank(meta)) { - return false; - } - - return true; - } - - private boolean validateDomain(int domainId, long meta) { - return params.searchSet().contains(domainId, meta); - } - - private boolean validateQuality(long meta) { - final var limit = params.qualityLimit(); - - if (limit.type() == SpecificationLimitType.NONE) { - return true; - } - - final int quality = DocumentMetadata.decodeQuality(meta); - - return limit.test(quality); - } - - private boolean validateYear(long meta) { - if (params.year().type() == SpecificationLimitType.NONE) - return true; - - int postVal = DocumentMetadata.decodeYear(meta); - - return params.year().test(postVal); - } - - private boolean validateSize(long meta) { - if (params.size().type() == SpecificationLimitType.NONE) - return true; - - int postVal = DocumentMetadata.decodeSize(meta); - - return params.size().test(postVal); - } - - private boolean validateRank(long meta) { - if (params.rank().type() == SpecificationLimitType.NONE) - return true; - - int postVal = DocumentMetadata.decodeRank(meta); - - return params.rank().test(postVal); - } - - @Override - public double cost() { - return 32; - } - - @Override - public String describe() { - return getClass().getSimpleName(); - } -} diff --git a/code/features-index/readme.md b/code/features-index/readme.md deleted file mode 100644 index 28fe56b2..00000000 --- a/code/features-index/readme.md +++ /dev/null @@ -1,24 +0,0 @@ -# Index - -These are components that offer functionality for the [index-service](../services-core/index-service). - -## Indexes - -There are two indexes with accompanying tools for constructing them. - -* [index-reverse](index-reverse/) is code for `word->document` indexes. There are two such indexes, one containing only document-word pairs that are flagged as important, e.g. the word appears in the title or has a high TF-IDF. This allows good results to be discovered quickly without having to sift through ten thousand bad ones first. - -* [index-forward](index-forward/) is the `document->word` index containing metadata about each word, such as its position. It is used after identifying candidate search results via the reverse index to fetch metadata and rank the results. - -These indices rely heavily on the [libraries/btree](../libraries/btree) and [libraries/array](../libraries/array) components. - -## Algorithms - -* [domain-ranking](domain-ranking/) contains domain ranking algorithms. -* [result-ranking](result-ranking/) contains logic for ranking search results by relevance. - -# Libraries - -* [index-query](index-query/) contains structures for evaluating search queries. -* [index-journal](index-journal/) contains tools for writing and reading index data. - diff --git a/code/features-index/result-ranking/readme.md b/code/features-index/result-ranking/readme.md deleted file mode 100644 index 99ba8647..00000000 --- a/code/features-index/result-ranking/readme.md +++ /dev/null @@ -1,14 +0,0 @@ -# Result Ranking - -Contains various heuristics for deciding which search results are important -with regard to a query. In broad strokes [BM-25](https://nlp.stanford.edu/IR-book/html/htmledition/okapi-bm25-a-non-binary-model-1.html) -is used, with a number of additional bonuses and penalties to rank the appropriate search -results higher. - -## Central Classes - -* [ResultValuator](src/main/java/nu/marginalia/ranking/ResultValuator.java) - -## See Also - -* [features-index/domain-ranking](../domain-ranking) - Ranks domains \ No newline at end of file diff --git a/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/factors/PriorityTermBonus.java b/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/factors/PriorityTermBonus.java deleted file mode 100644 index 05579e47..00000000 --- a/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/factors/PriorityTermBonus.java +++ /dev/null @@ -1,20 +0,0 @@ -package nu.marginalia.ranking.factors; - -import nu.marginalia.index.client.model.results.SearchResultKeywordScore; - -import java.util.List; - -/** Rewards results that have a priority term */ -public class PriorityTermBonus { - public double calculate(List scores) { - - for (var result : scores) { - if (result.hasPriorityTerms()) { - return 2.0; - } - } - - return 0; - } - -} diff --git a/code/features-qs/query-parser/readme.md b/code/features-qs/query-parser/readme.md deleted file mode 100644 index 1296be13..00000000 --- a/code/features-qs/query-parser/readme.md +++ /dev/null @@ -1,10 +0,0 @@ -# Query Parser - -End-user search query parsing tools used by the [query-service](../../services-core/query-service). - -## Central Classes - -* [QueryTokenizer](src/main/java/nu/marginalia/query_parser/QueryTokenizer.java) -* [QueryParser](src/main/java/nu/marginalia/query_parser/QueryParser.java) -* [QueryPermutations](src/main/java/nu/marginalia/query_parser/QueryVariants.java) - here be dragons -* [QueryVariants](src/main/java/nu/marginalia/query_parser/QueryVariants.java) - here be dragons \ No newline at end of file diff --git a/code/features-qs/query-parser/src/test/java/nu/marginalia/ngrams/DenseBitMapTest.java b/code/features-qs/query-parser/src/test/java/nu/marginalia/ngrams/DenseBitMapTest.java deleted file mode 100644 index d2db16b6..00000000 --- a/code/features-qs/query-parser/src/test/java/nu/marginalia/ngrams/DenseBitMapTest.java +++ /dev/null @@ -1,57 +0,0 @@ -package nu.marginalia.ngrams; - -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -class DenseBitMapTest { - - @Test - public void testSetAll() { - var dbm = new DenseBitMap(129); - for (int i = 0; i < dbm.cardinality; i++) { - dbm.set(i); - } - - for (int i = 0; i < dbm.cardinality; i++) { - assertTrue(dbm.get(i)); - } - } - - @Test - public void testSetEven() { - var dbm = new DenseBitMap(131); - for (int i = 0; i < dbm.cardinality; i+=2) { - dbm.set(i); - } - - for (int i = 0; i < dbm.cardinality; i+=2) { - assertTrue(dbm.get(i)); - } - - for (int i = 1; i < dbm.cardinality; i+=2) { - assertFalse(dbm.get(i)); - } - } - - @Test - public void testSetAllClearSome() { - var dbm = new DenseBitMap(129); - - for (int i = 0; i < dbm.cardinality; i++) { - dbm.set(i); - } - for (int i = 1; i < dbm.cardinality; i+=2) { - dbm.clear(i); - } - - for (int i = 0; i < dbm.cardinality; i+=2) { - assertTrue(dbm.get(i), "Expected " + i + " to be set"); - } - - for (int i = 1; i < dbm.cardinality; i+=2) { - assertFalse(dbm.get(i), "Expected " + i + " to be clear"); - } - } -} \ No newline at end of file diff --git a/code/features-qs/query-parser/src/test/java/nu/marginalia/query_parser/BodyQueryParserTest.java b/code/features-qs/query-parser/src/test/java/nu/marginalia/query_parser/BodyQueryParserTest.java deleted file mode 100644 index 8cc38312..00000000 --- a/code/features-qs/query-parser/src/test/java/nu/marginalia/query_parser/BodyQueryParserTest.java +++ /dev/null @@ -1,116 +0,0 @@ -package nu.marginalia.query_parser; - -import nu.marginalia.LanguageModels; -import nu.marginalia.language.EnglishDictionary; -import nu.marginalia.ngrams.NGramBloomFilter; -import nu.marginalia.term_frequency_dict.TermFrequencyDict; -import nu.marginalia.query_parser.token.TokenType; -import nu.marginalia.util.TestLanguageModels; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.io.IOException; -import java.util.List; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -class BodyQueryParserTest { - private QueryParser parser; - private static TermFrequencyDict dict; - private static EnglishDictionary englishDictionary; - private static NGramBloomFilter nGramBloomFilter; - private static final LanguageModels lm = TestLanguageModels.getLanguageModels(); - private QueryPermutation permutation; - - @BeforeAll - public static void init() throws IOException { - dict = new TermFrequencyDict(lm); - nGramBloomFilter = new NGramBloomFilter(lm); - englishDictionary = new EnglishDictionary(dict); - } - - @BeforeEach - public void setUp() { - parser = new QueryParser(); - permutation = new QueryPermutation(new QueryVariants(lm, dict, nGramBloomFilter, englishDictionary)); - } - - @Test - public void testTitleMatcher() { - List terms = List.of("3d", "realms"); - assertEquals(2, terms.stream().map(String::toLowerCase).filter("3D Realms Site: Forums".toLowerCase()::contains).count()); - } - @Test - void parseSimple() { - var results = parser.parse("hello"); - results.forEach(System.out::println); - assertEquals(1, results.size()); - assertEquals(TokenType.LITERAL_TERM, results.get(0).type); - assertEquals("hello", results.get(0).str); - } - - @Test - void parseQuotes() { - var results = parser.parse("\u201Chello world\u201D"); - results.forEach(System.out::println); - assertEquals(TokenType.QUOT_TERM, results.get(0).type); - assertEquals("hello_world", results.get(0).str); - assertEquals("\"hello world\"", results.get(0).displayStr); - } - - @Test - void parseExclude() { - var results = parser.parse("-Hello"); - results.forEach(System.out::println); - assertEquals(TokenType.EXCLUDE_TERM, results.get(0).type); - assertEquals("hello", results.get(0).str); - assertEquals("-hello", results.get(0).displayStr); - } - - @Test - void parseNear() { - var results = parser.parse("near:memex.marginalia.nu"); - results.forEach(System.out::println); - assertEquals(TokenType.NEAR_TERM, results.get(0).type); - assertEquals("memex.marginalia.nu", results.get(0).str); - assertEquals("near:memex.marginalia.nu", results.get(0).displayStr); - } - - @Test - void parseCombined() { - for (var list : permutation.permuteQueries(parser.parse("dune 2 remake"))) { - for (var t: list) { - System.out.printf("%s ", t.str); - } - System.out.println(); - } - } - @Test - void parseCombinedDOS() { - for (var list : permutation.permuteQueries(parser.parse("ab ba baa abba baba ab ba"))) { - for (var t: list) { - System.out.printf("%s ", t.str); - } - System.out.println(); - } - } - - @Test - void parseCombinedSuperman() { - for (var list : permutation.permuteQueries(parser.parse("wizardry proving grounds of the mad overlord"))) { - for (var t: list) { - System.out.printf("%s ", t.str); - } - System.out.println(); - } - } - @Test - void testEdgeCases() { - parser.parse("site:localhost 3D").forEach(System.out::println); - parser.parse("-wolfenstein 3D").forEach(System.out::println); - parser.parse("-wolfenstein 3D \"").forEach(System.out::println); - } - - -} \ No newline at end of file diff --git a/code/features-qs/query-parser/src/test/java/nu/marginalia/query_parser/QueryParserTest.java b/code/features-qs/query-parser/src/test/java/nu/marginalia/query_parser/QueryParserTest.java deleted file mode 100644 index 8bc19545..00000000 --- a/code/features-qs/query-parser/src/test/java/nu/marginalia/query_parser/QueryParserTest.java +++ /dev/null @@ -1,93 +0,0 @@ -package nu.marginalia.query_parser; - -import nu.marginalia.query_parser.token.Token; -import nu.marginalia.query_parser.token.TokenType; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -import java.util.List; - -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; - -class QueryParserTest { - private static QueryParser parser; - - @BeforeAll - public static void setUp() { - parser = new QueryParser(); - } - - @Test - public void testTabHandling() { - var query = " lorem\tipsum\ndolor sit"; - var ret = parser.parse(query); - assertEquals(4, ret.size()); - - var lorem = ret.get(0); - assertEquals("lorem", lorem.str); - assertEquals("lorem", lorem.displayStr); - - var ipsum = ret.get(1); - assertEquals("ipsum", ipsum.str); - assertEquals("ipsum", ipsum.displayStr); - - var dolor = ret.get(2); - assertEquals("dolor", dolor.str); - assertEquals("dolor", dolor.displayStr); - - var sit = ret.get(3); - assertEquals("sit", sit.str); - assertEquals("sit", sit.displayStr); - } - - @Test - public void testAdviceString() { - var ret = parser.parse("alcibiades (plato) \"my query\" -cars"); - assertEquals(4, ret.size()); - - var alcibiades = ret.get(0); - assertEquals(TokenType.LITERAL_TERM, alcibiades.type); - assertEquals("alcibiades", alcibiades.str); - assertEquals("alcibiades", alcibiades.displayStr); - - var plato = ret.get(1); - assertEquals(TokenType.ADVICE_TERM, plato.type); - assertEquals("plato", plato.str); - assertEquals("(plato)", plato.displayStr); - - var my_query = ret.get(2); - assertEquals(TokenType.QUOT_TERM, my_query.type); - assertEquals("my_query", my_query.str); - assertEquals("\"my query\"", my_query.displayStr); - - var not_cars = ret.get(3); - assertEquals(TokenType.EXCLUDE_TERM, not_cars.type); - assertEquals("cars", not_cars.str); - assertEquals("-cars", not_cars.displayStr); - } - - @Test - public void testParseYear() { - System.out.println(parser.parse("year>2000")); - System.out.println(parser.parse("year=2000")); - System.out.println(parser.parse("year<2000")); - } - - @Test - public void testNonAsciiNames() { - verifyParseResult("André the Giant", "andre", "the", "giant"); - verifyParseResult("Stanisław Lem", "stanislaw", "lem"); - verifyParseResult("Nicolae Ceaușescu", "nicolae", "ceausescu"); - verifyParseResult("Þorrablót", "thorrablot"); - verifyParseResult("Karolis Koncevičius", "karolis", "koncevicius"); - } - - private void verifyParseResult(String query, String... expectedTokens) { - assertArrayEquals(expectedTokens, getTokenStrings(parser.parse(query))); - } - private String[] getTokenStrings(List tokens) { - return tokens.stream().map(t -> t.str).toArray(String[]::new); - } - -} \ No newline at end of file diff --git a/code/features-qs/query-parser/src/test/java/nu/marginalia/query_parser/QueryVariantsTest.java b/code/features-qs/query-parser/src/test/java/nu/marginalia/query_parser/QueryVariantsTest.java deleted file mode 100644 index d82976e9..00000000 --- a/code/features-qs/query-parser/src/test/java/nu/marginalia/query_parser/QueryVariantsTest.java +++ /dev/null @@ -1,75 +0,0 @@ -package nu.marginalia.query_parser; - -import nu.marginalia.LanguageModels; -import nu.marginalia.language.EnglishDictionary; -import nu.marginalia.ngrams.NGramBloomFilter; -import nu.marginalia.term_frequency_dict.TermFrequencyDict; -import nu.marginalia.util.TestLanguageModels; -import nu.marginalia.language.sentence.SentenceExtractor; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; - -import java.io.IOException; - -class QueryVariantsTest { - static QueryVariants variants; - static QueryParser parser; - static SentenceExtractor se; - - @BeforeAll - public static void setUp() throws IOException { - LanguageModels lm = TestLanguageModels.getLanguageModels(); - - se = new SentenceExtractor(lm); - - var dict = new TermFrequencyDict(lm); - var ngrams = new NGramBloomFilter(lm); - variants = new QueryVariants(lm, dict, ngrams, new EnglishDictionary(dict)); - parser = new QueryParser(); - } - - @Test - void getQueryFood() { - System.out.println(se.extractSentence("we are alone")); - testCase("Omelet recipe"); - } - - @Test - void queryNegation() { - System.out.println(se.extractSentence("salt lake -city")); - testCase("salt lake -city"); - } - - - @Test - void getQueryVariants() { - System.out.println(se.extractSentence("we are alone")); - testCase("inside job reviews"); - testCase("plato apology"); - testCase("mechanical keyboard"); - testCase("DOS"); - testCase("dos"); - testCase("we are alone"); - testCase("3D Realms"); - testCase("I am alone"); - testCase("plato cave"); - testCase("The internet is dead"); - - testCase("TRS80"); - testCase("TRS-80"); - testCase("TRS-80"); - testCase("Raspberry Pi 2"); - testCase("Duke Nukem 3D"); - testCase("The Man of Tomorrow"); - testCase("Computer Manual"); - testCase("Knitting"); - testCase("capcom"); - testCase("the man of tomorrow"); - - } - - private void testCase(String input) { - var tokens = variants.getQueryVariants(parser.parse(input)); - System.out.println(tokens); - } -} \ No newline at end of file diff --git a/code/features-qs/query-parser/src/test/java/nu/marginalia/transform_list/TransformListTest.java b/code/features-qs/query-parser/src/test/java/nu/marginalia/transform_list/TransformListTest.java deleted file mode 100644 index d34a86ea..00000000 --- a/code/features-qs/query-parser/src/test/java/nu/marginalia/transform_list/TransformListTest.java +++ /dev/null @@ -1,120 +0,0 @@ -package nu.marginalia.transform_list; - -import org.junit.jupiter.api.Test; - -import java.util.List; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -import static org.junit.jupiter.api.Assertions.assertEquals; - -class TransformListTest { - - @Test - void transformEach() { - - List values = Stream.of(1,2,3,4).collect(Collectors.toList()); - new TransformList<>(values).transformEach(e -> { - int v = e.value(); - if (v == 1) e.remove(); - if (v == 2) e.replace(5); - if (v == 4) e.remove(); - }); - - assertEquals(List.of(5,3), values); - } - - @Test - void transformEachPairRemoveReplace() { - List values = Stream.of(1,2,3,4,5,6).collect(Collectors.toList()); - new TransformList<>(values).transformEachPair((a,b) -> { - System.out.println(a.value() + ":" + b.value()); - int v = a.value(); - if (v == 1 || v == 3 || v == 5) { - a.remove(); - b.replace(-b.value()); - } - - }); - - assertEquals(List.of(-2, -4, -6), values); - } - - @Test - void transformEachPairRemoveRemove() { - List values = Stream.of(1,2,3,4,5,6).collect(Collectors.toList()); - new TransformList<>(values).transformEachPair((a,b) -> { - System.out.println(a.value() + ":" + b.value()); - int v = a.value(); - if (v == 1 || v == 3 || v == 5) { - a.remove(); - b.remove(); - } - - }); - - assertEquals(List.of(), values); - } - - @Test - void transformEachPairReplaceRemove() { - List values = Stream.of(1,2,3,4,5,6).collect(Collectors.toList()); - new TransformList<>(values).transformEachPair((a,b) -> { - System.out.println(a.value() + ":" + b.value()); - int v = a.value(); - if (v == 1 || v == 3 || v == 5) { - a.replace(-a.value()); - b.remove(); - } - - }); - - assertEquals(List.of(-1, -3, -5), values); - } - - @Test - void transformEachPairReplaceReplace() { - List values = Stream.of(1,2,3,4,5,6).collect(Collectors.toList()); - new TransformList<>(values).transformEachPair((a,b) -> { - System.out.println(a.value() + ":" + b.value()); - int v = a.value(); - if (v == 1 || v == 3 || v == 5) { - a.replace(-a.value()); - b.replace(-b.value()); - } - - }); - - assertEquals(List.of(-1, -2, -3, -4, -5, -6), values); - } - - @Test - void scanAndTransform() { - List values = Stream.of(1,2,3,4,5,6,7,8,9,10).collect(Collectors.toList()); - new TransformList<>(values).scanAndTransform(Integer.valueOf(3)::equals, Integer.valueOf(7)::equals, entity -> { - entity.replace(entity.value() * 2); - }); - - assertEquals(List.of(1,2,6,8,10,12,14,8,9,10), values); - } - - @Test - void scanAndTransformEndsAtEnd() { - List values = Stream.of(1,2,3,4,5,6,7,8,9,10).collect(Collectors.toList()); - new TransformList<>(values).scanAndTransform(Integer.valueOf(3)::equals, Integer.valueOf(10)::equals, entity -> { - entity.replace(entity.value() * 2); - }); - - assertEquals(List.of(1,2,6,8,10,12,14,16,18,20), values); - } - - @Test - void scanAndTransformOverlap() { - List values = Stream.of(1,2,3,3,5,7,7,8,9,10).collect(Collectors.toList()); - new TransformList<>(values).scanAndTransform(Integer.valueOf(3)::equals, Integer.valueOf(7)::equals, entity -> { - entity.replace(entity.value() * 2); - }); - - assertEquals(List.of(1, 2, 6, 6, 10, 14, 7, 8, 9, 10), values); - } -} \ No newline at end of file diff --git a/code/features-qs/readme.md b/code/features-qs/readme.md deleted file mode 100644 index c0d77dec..00000000 --- a/code/features-qs/readme.md +++ /dev/null @@ -1,7 +0,0 @@ -**Note**: This package is called `features-qs` rather than `features-query` because the latter, -though more consistent with other packages like features-index, would be very confusing -as there are other packages elsewhere with the 'query' name (e.g. features-index/index-query). - -## Contents - -* [query-parser](query-parser/) contains code for parsing the user-facing query grammar. diff --git a/code/features-search/feedlot-client/build.gradle b/code/features-search/feedlot-client/build.gradle index 808c9ca6..c62182fe 100644 --- a/code/features-search/feedlot-client/build.gradle +++ b/code/features-search/feedlot-client/build.gradle @@ -9,6 +9,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation libs.bundles.slf4j diff --git a/code/features-search/feedlot-client/src/main/java/nu/marginalia/feedlot/FeedlotClient.java b/code/features-search/feedlot-client/java/nu/marginalia/feedlot/FeedlotClient.java similarity index 96% rename from code/features-search/feedlot-client/src/main/java/nu/marginalia/feedlot/FeedlotClient.java rename to code/features-search/feedlot-client/java/nu/marginalia/feedlot/FeedlotClient.java index 3392a8d2..d247a8e2 100644 --- a/code/features-search/feedlot-client/src/main/java/nu/marginalia/feedlot/FeedlotClient.java +++ b/code/features-search/feedlot-client/java/nu/marginalia/feedlot/FeedlotClient.java @@ -30,7 +30,7 @@ public class FeedlotClient { this.gson = gson; httpClient = HttpClient.newBuilder() - .executor(Executors.newVirtualThreadPerTaskExecutor()) + .executor(Executors.newCachedThreadPool()) .connectTimeout(connectTimeout) .build(); this.requestTimeout = requestTimeout; diff --git a/code/features-search/feedlot-client/src/main/java/nu/marginalia/feedlot/model/FeedItem.java b/code/features-search/feedlot-client/java/nu/marginalia/feedlot/model/FeedItem.java similarity index 100% rename from code/features-search/feedlot-client/src/main/java/nu/marginalia/feedlot/model/FeedItem.java rename to code/features-search/feedlot-client/java/nu/marginalia/feedlot/model/FeedItem.java diff --git a/code/features-search/feedlot-client/src/main/java/nu/marginalia/feedlot/model/FeedItems.java b/code/features-search/feedlot-client/java/nu/marginalia/feedlot/model/FeedItems.java similarity index 100% rename from code/features-search/feedlot-client/src/main/java/nu/marginalia/feedlot/model/FeedItems.java rename to code/features-search/feedlot-client/java/nu/marginalia/feedlot/model/FeedItems.java diff --git a/code/features-search/random-websites/build.gradle b/code/features-search/random-websites/build.gradle index b95e7aa1..ec4201ec 100644 --- a/code/features-search/random-websites/build.gradle +++ b/code/features-search/random-websites/build.gradle @@ -11,6 +11,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:model') implementation project(':code:common:db') diff --git a/code/features-search/random-websites/src/main/java/nu/marginalia/browse/DbBrowseDomainsRandom.java b/code/features-search/random-websites/java/nu/marginalia/browse/DbBrowseDomainsRandom.java similarity index 98% rename from code/features-search/random-websites/src/main/java/nu/marginalia/browse/DbBrowseDomainsRandom.java rename to code/features-search/random-websites/java/nu/marginalia/browse/DbBrowseDomainsRandom.java index 385d8eb5..138230f9 100644 --- a/code/features-search/random-websites/src/main/java/nu/marginalia/browse/DbBrowseDomainsRandom.java +++ b/code/features-search/random-websites/java/nu/marginalia/browse/DbBrowseDomainsRandom.java @@ -38,7 +38,7 @@ public class DbBrowseDomainsRandom { List domains = new ArrayList<>(count); try (var conn = dataSource.getConnection()) { try (var stmt = conn.prepareStatement(q)) { - stmt.setInt(1, set);; + stmt.setInt(1, set); stmt.setInt(2, count); var rsp = stmt.executeQuery(); while (rsp.next()) { diff --git a/code/features-search/random-websites/src/main/java/nu/marginalia/browse/DbBrowseDomainsSimilarCosine.java b/code/features-search/random-websites/java/nu/marginalia/browse/DbBrowseDomainsSimilarCosine.java similarity index 100% rename from code/features-search/random-websites/src/main/java/nu/marginalia/browse/DbBrowseDomainsSimilarCosine.java rename to code/features-search/random-websites/java/nu/marginalia/browse/DbBrowseDomainsSimilarCosine.java diff --git a/code/features-search/random-websites/src/main/java/nu/marginalia/browse/model/BrowseResult.java b/code/features-search/random-websites/java/nu/marginalia/browse/model/BrowseResult.java similarity index 100% rename from code/features-search/random-websites/src/main/java/nu/marginalia/browse/model/BrowseResult.java rename to code/features-search/random-websites/java/nu/marginalia/browse/model/BrowseResult.java diff --git a/code/features-search/random-websites/src/main/java/nu/marginalia/browse/model/BrowseResultSet.java b/code/features-search/random-websites/java/nu/marginalia/browse/model/BrowseResultSet.java similarity index 100% rename from code/features-search/random-websites/src/main/java/nu/marginalia/browse/model/BrowseResultSet.java rename to code/features-search/random-websites/java/nu/marginalia/browse/model/BrowseResultSet.java diff --git a/code/features-search/screenshots/build.gradle b/code/features-search/screenshots/build.gradle index ac2779f6..52572e91 100644 --- a/code/features-search/screenshots/build.gradle +++ b/code/features-search/screenshots/build.gradle @@ -10,6 +10,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:model') implementation project(':code:common:db') diff --git a/code/features-search/screenshots/src/main/java/nu/marginalia/screenshot/ScreenshotService.java b/code/features-search/screenshots/java/nu/marginalia/screenshot/ScreenshotService.java similarity index 100% rename from code/features-search/screenshots/src/main/java/nu/marginalia/screenshot/ScreenshotService.java rename to code/features-search/screenshots/java/nu/marginalia/screenshot/ScreenshotService.java diff --git a/code/functions/domain-info/api/build.gradle b/code/functions/domain-info/api/build.gradle new file mode 100644 index 00000000..c99c4fbf --- /dev/null +++ b/code/functions/domain-info/api/build.gradle @@ -0,0 +1,38 @@ +plugins { + id 'java' + + id "com.google.protobuf" version "0.9.4" + id 'jvm-test-suite' +} + +java { + toolchain { + languageVersion.set(JavaLanguageVersion.of(21)) + } +} + +jar.archiveBaseName = 'domain-info-api' + +apply from: "$rootProject.projectDir/protobuf.gradle" +apply from: "$rootProject.projectDir/srcsets.gradle" + +dependencies { + implementation project(':code:common:model') + implementation project(':code:common:config') + implementation project(':code:common:service-discovery') + + implementation libs.bundles.slf4j + + implementation libs.prometheus + implementation libs.notnull + implementation libs.guice + implementation libs.gson + implementation libs.protobuf + implementation libs.javax.annotation + implementation libs.bundles.grpc + + testImplementation libs.bundles.slf4j.test + testImplementation libs.bundles.junit + testImplementation libs.mockito + +} diff --git a/code/functions/domain-info/api/java/nu/marginalia/api/domains/DomainInfoClient.java b/code/functions/domain-info/api/java/nu/marginalia/api/domains/DomainInfoClient.java new file mode 100644 index 00000000..dbd9b102 --- /dev/null +++ b/code/functions/domain-info/api/java/nu/marginalia/api/domains/DomainInfoClient.java @@ -0,0 +1,55 @@ +package nu.marginalia.api.domains; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import nu.marginalia.service.client.GrpcChannelPoolFactory; +import nu.marginalia.service.client.GrpcSingleNodeChannelPool; +import nu.marginalia.service.discovery.property.ServiceKey; +import nu.marginalia.service.discovery.property.ServicePartition; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.concurrent.*; + +import nu.marginalia.api.domains.model.*; + +@Singleton +public class DomainInfoClient { + private static final Logger logger = LoggerFactory.getLogger(DomainInfoClient.class); + + private final GrpcSingleNodeChannelPool channelPool; + private final ExecutorService executor = Executors.newWorkStealingPool(8); + + @Inject + public DomainInfoClient(GrpcChannelPoolFactory factory) { + this.channelPool = factory.createSingle( + ServiceKey.forGrpcApi(DomainInfoAPIGrpc.class, ServicePartition.any()), + DomainInfoAPIGrpc::newBlockingStub); + } + + public Future> similarDomains(int domainId, int count) { + return channelPool.call(DomainInfoAPIGrpc.DomainInfoAPIBlockingStub::getSimilarDomains) + .async(executor) + .run(DomainsProtobufCodec.DomainQueries.createRequest(domainId, count)) + .thenApply(DomainsProtobufCodec.DomainQueries::convertResponse); + } + + public Future> linkedDomains(int domainId, int count) { + return channelPool.call(DomainInfoAPIGrpc.DomainInfoAPIBlockingStub::getLinkingDomains) + .async(executor) + .run(DomainsProtobufCodec.DomainQueries.createRequest(domainId, count)) + .thenApply(DomainsProtobufCodec.DomainQueries::convertResponse); + } + + public Future domainInformation(int domainId) { + return channelPool.call(DomainInfoAPIGrpc.DomainInfoAPIBlockingStub::getDomainInfo) + .async(executor) + .run(DomainsProtobufCodec.DomainInfo.createRequest(domainId)) + .thenApply(DomainsProtobufCodec.DomainInfo::convertResponse); + } + + public boolean isAccepting() { + return channelPool.hasChannel(); + } +} diff --git a/code/functions/domain-info/api/java/nu/marginalia/api/domains/DomainsProtobufCodec.java b/code/functions/domain-info/api/java/nu/marginalia/api/domains/DomainsProtobufCodec.java new file mode 100644 index 00000000..d3c90f9f --- /dev/null +++ b/code/functions/domain-info/api/java/nu/marginalia/api/domains/DomainsProtobufCodec.java @@ -0,0 +1,76 @@ +package nu.marginalia.api.domains; + +import lombok.SneakyThrows; +import nu.marginalia.model.EdgeDomain; +import nu.marginalia.model.EdgeUrl; +import nu.marginalia.api.domains.model.*; + +import java.util.ArrayList; +import java.util.List; + +public class DomainsProtobufCodec { + + public static class DomainQueries { + public static RpcDomainLinksRequest createRequest(int domainId, int count) { + return RpcDomainLinksRequest.newBuilder() + .setDomainId(domainId) + .setCount(count) + .build(); + } + + public static List convertResponse(RpcSimilarDomains rsp) { + List ret = new ArrayList<>(rsp.getDomainsCount()); + + for (RpcSimilarDomain sd : rsp.getDomainsList()) { + ret.add(convertResponseEntry(sd)); + } + + return ret; + } + + @SneakyThrows + private static SimilarDomain convertResponseEntry(RpcSimilarDomain sd) { + return new SimilarDomain( + new EdgeUrl(sd.getUrl()), + sd.getDomainId(), + sd.getRelatedness(), + sd.getRank(), + sd.getIndexed(), + sd.getActive(), + sd.getScreenshot(), + SimilarDomain.LinkType.valueOf(sd.getLinkType().name()) + ); + } + } + + public static class DomainInfo { + public static RpcDomainId createRequest(int domainId) { + return RpcDomainId.newBuilder() + .setDomainId(domainId) + .build(); + } + + public static DomainInformation convertResponse(RpcDomainInfoResponse rsp) { + return new DomainInformation( + new EdgeDomain(rsp.getDomain()), + rsp.getBlacklisted(), + rsp.getPagesKnown(), + rsp.getPagesFetched(), + rsp.getPagesIndexed(), + rsp.getIncomingLinks(), + rsp.getOutboundLinks(), + rsp.getNodeAffinity(), + rsp.getRanking(), + rsp.getSuggestForCrawling(), + rsp.getInCrawlQueue(), + rsp.getUnknownDomain(), + rsp.getIp(), + rsp.getAsn(), + rsp.getAsnOrg(), + rsp.getAsnCountry(), + rsp.getIpCountry(), + rsp.getState() + ); + } + } +} diff --git a/code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/model/DomainInformation.java b/code/functions/domain-info/api/java/nu/marginalia/api/domains/model/DomainInformation.java similarity index 96% rename from code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/model/DomainInformation.java rename to code/functions/domain-info/api/java/nu/marginalia/api/domains/model/DomainInformation.java index 5be64a97..2e62af22 100644 --- a/code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/model/DomainInformation.java +++ b/code/functions/domain-info/api/java/nu/marginalia/api/domains/model/DomainInformation.java @@ -1,4 +1,4 @@ -package nu.marginalia.assistant.client.model; +package nu.marginalia.api.domains.model; import lombok.*; import nu.marginalia.model.EdgeDomain; diff --git a/code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/model/SimilarDomain.java b/code/functions/domain-info/api/java/nu/marginalia/api/domains/model/SimilarDomain.java similarity index 97% rename from code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/model/SimilarDomain.java rename to code/functions/domain-info/api/java/nu/marginalia/api/domains/model/SimilarDomain.java index 1bdae22e..d15d0f3d 100644 --- a/code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/model/SimilarDomain.java +++ b/code/functions/domain-info/api/java/nu/marginalia/api/domains/model/SimilarDomain.java @@ -1,4 +1,4 @@ -package nu.marginalia.assistant.client.model; +package nu.marginalia.api.domains.model; import nu.marginalia.model.EdgeUrl; diff --git a/code/functions/domain-info/api/src/main/protobuf/domain-info.proto b/code/functions/domain-info/api/src/main/protobuf/domain-info.proto new file mode 100644 index 00000000..8be12d11 --- /dev/null +++ b/code/functions/domain-info/api/src/main/protobuf/domain-info.proto @@ -0,0 +1,111 @@ +syntax="proto3"; +package marginalia.api.domain; + +option java_package="nu.marginalia.api.domains"; +option java_multiple_files=true; + +service DomainInfoAPI { + + /** Fetches information about a domain. */ + rpc getDomainInfo(RpcDomainId) returns (RpcDomainInfoResponse) {} + + /** Fetches similar domains to the given domain. */ + rpc getSimilarDomains(RpcDomainLinksRequest) returns (RpcSimilarDomains) {} + + /** Fetches domains with links to or from the given domain. */ + rpc getLinkingDomains(RpcDomainLinksRequest) returns (RpcSimilarDomains) {} +} + +message RpcDomainLinksRequest { + int32 domainId = 1; + int32 count = 2; +} + +message RpcDomainId { + int32 domainId = 1; +} + +message RpcDictionaryLookupRequest { + string word = 1; +} + +message RpcDictionaryLookupResponse { + string word = 1; + repeated RpcDictionaryEntry entries = 2; +} + +message RpcDictionaryEntry { + string type = 1; + string word = 2; + string definition = 3; +} + +message RpcSpellCheckRequest { + string text = 1; +} + +message RpcSpellCheckResponse { + repeated string suggestions = 1; +} + +message RpcUnitConversionRequest { + string unit = 1; + string from = 2; + string to = 3; +} + +message RpcUnitConversionResponse { + string result = 1; +} + +message RpcEvalMathRequest { + string expression = 1; +} + +message RpcEvalMathResponse { + string result = 1; +} + +message RpcDomainInfoResponse { + int32 domainId = 1; + string domain = 2; + bool blacklisted = 3; + int32 pagesKnown = 4; + int32 pagesFetched = 5; + int32 pagesIndexed = 6; + int32 incomingLinks = 7; + int32 outboundLinks = 8; + int32 nodeAffinity = 9; + double ranking = 10; + bool suggestForCrawling = 11; + bool inCrawlQueue = 12; + bool unknownDomain = 13; + string ip = 14; + int32 asn = 15; + string asnOrg = 16; + string asnCountry = 17; + string ipCountry = 18; + string state = 19; +} + +message RpcSimilarDomains { + repeated RpcSimilarDomain domains = 1; +} + +message RpcSimilarDomain { + string url = 1; + int32 domainId = 2; + double relatedness = 3; + double rank = 4; + bool indexed = 5; + bool active = 6; + bool screenshot = 7; + LINK_TYPE linkType = 8; + + enum LINK_TYPE { + BACKWARD = 0; + FOWARD = 1; + BIDIRECTIONAL = 2; + NONE = 3; + }; +} \ No newline at end of file diff --git a/code/features-index/domain-ranking/build.gradle b/code/functions/domain-info/build.gradle similarity index 52% rename from code/features-index/domain-ranking/build.gradle rename to code/functions/domain-info/build.gradle index 29b191ef..4858e935 100644 --- a/code/features-index/domain-ranking/build.gradle +++ b/code/functions/domain-info/build.gradle @@ -1,9 +1,7 @@ plugins { id 'java' - - id "de.undercouch.download" version "5.1.0" - + id 'application' id 'jvm-test-suite' } @@ -13,33 +11,37 @@ java { } } -dependencies { - implementation project(':code:common:db') - implementation project(':code:common:model') - implementation project(':code:common:service') - implementation project(':code:common:service-client') - implementation project(':code:api:query-api') +apply from: "$rootProject.projectDir/srcsets.gradle" - implementation 'org.jgrapht:jgrapht-core:1.5.2' +dependencies { + implementation project(':code:functions:domain-info:api') + implementation project(':code:functions:link-graph:api') + + implementation project(':code:common:config') + implementation project(':code:common:service') + implementation project(':code:common:model') + implementation project(':code:common:db') + implementation project(':code:common:service-discovery') + + implementation project(':code:libraries:geo-ip') implementation libs.bundles.slf4j - implementation libs.bundles.mariadb - implementation libs.guice + + implementation libs.prometheus + implementation libs.bundles.grpc implementation libs.notnull - implementation libs.roaringbitmap + implementation libs.guice + implementation libs.spark + implementation libs.opencsv implementation libs.trove + implementation libs.roaringbitmap implementation libs.fastutil - implementation libs.hll + implementation libs.bundles.gson + implementation libs.bundles.mariadb - testImplementation project(':code:libraries:array') - - testImplementation libs.commons.lang3 testImplementation libs.bundles.slf4j.test testImplementation libs.bundles.junit testImplementation libs.mockito - testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4') - testImplementation 'org.testcontainers:mariadb:1.17.4' - testImplementation 'org.testcontainers:junit-jupiter:1.17.4' - testImplementation project(':code:libraries:test-helpers') -} \ No newline at end of file + +} diff --git a/code/functions/domain-info/java/nu/marginalia/functions/domains/DomainInfoGrpcService.java b/code/functions/domain-info/java/nu/marginalia/functions/domains/DomainInfoGrpcService.java new file mode 100644 index 00000000..8af30e56 --- /dev/null +++ b/code/functions/domain-info/java/nu/marginalia/functions/domains/DomainInfoGrpcService.java @@ -0,0 +1,52 @@ +package nu.marginalia.functions.domains; + +import com.google.inject.Inject; +import io.grpc.stub.StreamObserver; +import nu.marginalia.api.domains.*; + +public class DomainInfoGrpcService extends DomainInfoAPIGrpc.DomainInfoAPIImplBase { + + private final DomainInformationService domainInformationService; + private final SimilarDomainsService similarDomainsService; + @Inject + public DomainInfoGrpcService(DomainInformationService domainInformationService, SimilarDomainsService similarDomainsService) + { + + this.domainInformationService = domainInformationService; + this.similarDomainsService = similarDomainsService; + } + + @Override + public void getDomainInfo(RpcDomainId request, StreamObserver responseObserver) { + var ret = domainInformationService.domainInfo(request.getDomainId()); + + ret.ifPresent(responseObserver::onNext); + + responseObserver.onCompleted(); + } + + @Override + public void getSimilarDomains(RpcDomainLinksRequest request, + StreamObserver responseObserver) { + var ret = similarDomainsService.getSimilarDomains(request.getDomainId(), request.getCount()); + + var responseBuilder = RpcSimilarDomains + .newBuilder() + .addAllDomains(ret); + + responseObserver.onNext(responseBuilder.build()); + responseObserver.onCompleted(); + } + + @Override + public void getLinkingDomains(RpcDomainLinksRequest request, StreamObserver responseObserver) { + var ret = similarDomainsService.getLinkingDomains(request.getDomainId(), request.getCount()); + + var responseBuilder = RpcSimilarDomains + .newBuilder() + .addAllDomains(ret); + + responseObserver.onNext(responseBuilder.build()); + responseObserver.onCompleted(); + } +} diff --git a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/domains/DomainInformationService.java b/code/functions/domain-info/java/nu/marginalia/functions/domains/DomainInformationService.java similarity index 61% rename from code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/domains/DomainInformationService.java rename to code/functions/domain-info/java/nu/marginalia/functions/domains/DomainInformationService.java index b99c3abf..1aeffae6 100644 --- a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/domains/DomainInformationService.java +++ b/code/functions/domain-info/java/nu/marginalia/functions/domains/DomainInformationService.java @@ -1,11 +1,11 @@ -package nu.marginalia.assistant.domains; +package nu.marginalia.functions.domains; import com.zaxxer.hikari.HikariDataSource; +import nu.marginalia.api.domains.RpcDomainInfoResponse; +import nu.marginalia.api.linkgraph.AggregateLinkGraphClient; import nu.marginalia.geoip.GeoIpDictionary; import nu.marginalia.model.EdgeDomain; import nu.marginalia.db.DbDomainQueries; -import nu.marginalia.assistant.client.model.DomainInformation; -import nu.marginalia.query.client.QueryClient; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -21,7 +21,7 @@ public class DomainInformationService { private final GeoIpDictionary geoIpDictionary; private DbDomainQueries dbDomainQueries; - private final QueryClient queryClient; + private final AggregateLinkGraphClient linkGraphClient; private HikariDataSource dataSource; private final Logger logger = LoggerFactory.getLogger(getClass()); @@ -29,16 +29,16 @@ public class DomainInformationService { public DomainInformationService( DbDomainQueries dbDomainQueries, GeoIpDictionary geoIpDictionary, - QueryClient queryClient, + AggregateLinkGraphClient linkGraphClient, HikariDataSource dataSource) { this.dbDomainQueries = dbDomainQueries; this.geoIpDictionary = geoIpDictionary; - this.queryClient = queryClient; + this.linkGraphClient = linkGraphClient; this.dataSource = dataSource; } - public Optional domainInfo(int domainId) { + public Optional domainInfo(int domainId) { Optional domain = dbDomainQueries.getDomain(domainId); if (domain.isEmpty()) { @@ -46,9 +46,9 @@ public class DomainInformationService { } - var builder = DomainInformation.builder(); + var builder = RpcDomainInfoResponse.newBuilder(); try (var connection = dataSource.getConnection(); - var stmt = connection.createStatement(); + var stmt = connection.createStatement() ) { boolean inCrawlQueue; int outboundLinks = 0; @@ -63,18 +63,18 @@ public class DomainInformationService { if (rs.next()) { String ip = rs.getString("IP"); - builder.ip(ip); + builder.setIp(ip); geoIpDictionary.getAsnInfo(ip).ifPresent(asnInfo -> { - builder.asn(asnInfo.asn()); - builder.asnOrg(asnInfo.org()); - builder.asnCountry(asnInfo.country()); + builder.setAsn(asnInfo.asn()); + builder.setAsnOrg(asnInfo.org()); + builder.setAsnCountry(asnInfo.country()); }); - builder.ipCountry(geoIpDictionary.getCountry(ip)); + builder.setIpCountry(geoIpDictionary.getCountry(ip)); - builder.nodeAffinity(rs.getInt("NODE_AFFINITY")); - builder.domain(new EdgeDomain(rs.getString("DOMAIN_NAME"))); - builder.state(rs.getString("STATE")); - builder.ranking(Math.round(100.0*(1.0-rs.getDouble("RANK")))); + builder.setNodeAffinity(rs.getInt("NODE_AFFINITY")); + builder.setDomain(rs.getString("DOMAIN_NAME")); + builder.setState(rs.getString("STATE")); + builder.setRanking(Math.round(100.0*(1.0-rs.getDouble("RANK")))); } rs = stmt.executeQuery(STR.""" SELECT 1 FROM CRAWL_QUEUE @@ -82,10 +82,10 @@ public class DomainInformationService { WHERE EC_DOMAIN.ID=\{domainId} """); inCrawlQueue = rs.next(); - builder.inCrawlQueue(inCrawlQueue); + builder.setInCrawlQueue(inCrawlQueue); - builder.incomingLinks(queryClient.countLinksToDomain(domainId)); - builder.outboundLinks(queryClient.countLinksFromDomain(domainId)); + builder.setIncomingLinks(linkGraphClient.countLinksToDomain(domainId)); + builder.setOutboundLinks(linkGraphClient.countLinksFromDomain(domainId)); rs = stmt.executeQuery(STR.""" SELECT KNOWN_URLS, GOOD_URLS, VISITED_URLS FROM DOMAIN_METADATA WHERE ID=\{domainId} @@ -93,12 +93,12 @@ public class DomainInformationService { if (rs.next()) { pagesVisited = rs.getInt("VISITED_URLS"); - builder.pagesKnown(rs.getInt("KNOWN_URLS")); - builder.pagesIndexed(rs.getInt("GOOD_URLS")); - builder.pagesFetched(rs.getInt("VISITED_URLS")); + builder.setPagesKnown(rs.getInt("KNOWN_URLS")); + builder.setPagesIndexed(rs.getInt("GOOD_URLS")); + builder.setPagesFetched(rs.getInt("VISITED_URLS")); } - builder.suggestForCrawling((pagesVisited == 0 && outboundLinks == 0 && !inCrawlQueue)); + builder.setSuggestForCrawling((pagesVisited == 0 && outboundLinks == 0 && !inCrawlQueue)); return Optional.of(builder.build()); } diff --git a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/domains/SimilarDomainsService.java b/code/functions/domain-info/java/nu/marginalia/functions/domains/SimilarDomainsService.java similarity index 68% rename from code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/domains/SimilarDomainsService.java rename to code/functions/domain-info/java/nu/marginalia/functions/domains/SimilarDomainsService.java index ddcc2e98..8d6cd70e 100644 --- a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/domains/SimilarDomainsService.java +++ b/code/functions/domain-info/java/nu/marginalia/functions/domains/SimilarDomainsService.java @@ -1,4 +1,4 @@ -package nu.marginalia.assistant.domains; +package nu.marginalia.functions.domains; import com.google.inject.Inject; import com.zaxxer.hikari.HikariDataSource; @@ -8,16 +8,18 @@ import gnu.trove.map.hash.TIntDoubleHashMap; import gnu.trove.map.hash.TIntIntHashMap; import gnu.trove.set.TIntSet; import gnu.trove.set.hash.TIntHashSet; -import nu.marginalia.assistant.client.model.SimilarDomain; +import it.unimi.dsi.fastutil.ints.Int2DoubleArrayMap; +import nu.marginalia.api.domains.*; +import nu.marginalia.api.domains.model.SimilarDomain; +import nu.marginalia.api.linkgraph.AggregateLinkGraphClient; import nu.marginalia.model.EdgeDomain; -import nu.marginalia.query.client.QueryClient; +import org.roaringbitmap.RoaringBitmap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; -import java.util.BitSet; import java.util.List; import java.util.concurrent.Executors; import java.util.stream.IntStream; @@ -26,25 +28,25 @@ public class SimilarDomainsService { private static final Logger logger = LoggerFactory.getLogger(SimilarDomainsService.class); private final HikariDataSource dataSource; - private final QueryClient queryClient; + private final AggregateLinkGraphClient linkGraphClient; private volatile TIntIntHashMap domainIdToIdx = new TIntIntHashMap(100_000); private volatile int[] domainIdxToId; - public volatile TIntDoubleHashMap[] relatedDomains; + public volatile Int2DoubleArrayMap[] relatedDomains; public volatile TIntList[] domainNeighbors = null; - public volatile BitSet screenshotDomains = null; - public volatile BitSet activeDomains = null; - public volatile BitSet indexedDomains = null; - public volatile double[] domainRanks = null; + public volatile RoaringBitmap screenshotDomains = null; + public volatile RoaringBitmap activeDomains = null; + public volatile RoaringBitmap indexedDomains = null; + public volatile TIntDoubleHashMap domainRanks = null; public volatile String[] domainNames = null; volatile boolean isReady = false; @Inject - public SimilarDomainsService(HikariDataSource dataSource, QueryClient queryClient) { + public SimilarDomainsService(HikariDataSource dataSource, AggregateLinkGraphClient linkGraphClient) { this.dataSource = dataSource; - this.queryClient = queryClient; + this.linkGraphClient = linkGraphClient; Executors.newSingleThreadExecutor().submit(this::init); } @@ -68,13 +70,13 @@ public class SimilarDomainsService { domainIdxToId[idx] = id; return true; }); - domainRanks = new double[domainIdToIdx.size()]; + domainRanks = new TIntDoubleHashMap(100_000, 0.5f, -1, 0.); domainNames = new String[domainIdToIdx.size()]; domainNeighbors = new TIntList[domainIdToIdx.size()]; - screenshotDomains = new BitSet(domainIdToIdx.size()); - activeDomains = new BitSet(domainIdToIdx.size()); - indexedDomains = new BitSet(domainIdToIdx.size()); - relatedDomains = new TIntDoubleHashMap[domainIdToIdx.size()]; + screenshotDomains = new RoaringBitmap(); + activeDomains = new RoaringBitmap(); + indexedDomains = new RoaringBitmap(); + relatedDomains = new Int2DoubleArrayMap[domainIdToIdx.size()]; logger.info("Loaded {} domain IDs", domainIdToIdx.size()); @@ -93,13 +95,17 @@ public class SimilarDomainsService { int higherIndex = Math.max(didx, nidx); if (relatedDomains[lowerIndex] == null) - relatedDomains[lowerIndex] = new TIntDoubleHashMap(32); - relatedDomains[lowerIndex].put(higherIndex, Math.round(100 * rs.getDouble(3))); + relatedDomains[lowerIndex] = new Int2DoubleArrayMap(4); + + double rank = Math.round(100 * rs.getDouble(3)); + if (rank > 0.1) { + relatedDomains[lowerIndex].put(higherIndex, rank); + } if (domainNeighbors[didx] == null) - domainNeighbors[didx] = new TIntArrayList(32); + domainNeighbors[didx] = new TIntArrayList(4); if (domainNeighbors[nidx] == null) - domainNeighbors[nidx] = new TIntArrayList(32); + domainNeighbors[nidx] = new TIntArrayList(4); domainNeighbors[didx].add(nidx); domainNeighbors[nidx].add(didx); @@ -121,14 +127,14 @@ public class SimilarDomainsService { final int id = rs.getInt("ID"); final int idx = domainIdToIdx.get(id); - domainRanks[idx] = Math.round(100 * (1. - rs.getDouble("RANK"))); + domainRanks.put(idx, Math.round(100 * (1. - rs.getDouble("RANK")))); domainNames[idx] = rs.getString("DOMAIN_NAME"); if (rs.getBoolean("INDEXED")) - indexedDomains.set(idx); + indexedDomains.add(idx); if (rs.getBoolean("ACTIVE")) - activeDomains.set(idx); + activeDomains.add(idx); } @@ -141,10 +147,10 @@ public class SimilarDomainsService { final int id = rs.getInt(1); final int idx = domainIdToIdx.get(id); - screenshotDomains.set(idx); + screenshotDomains.add(idx); } - logger.info("Loaded {} domains", domainRanks.length); + logger.info("Loaded {} domains", domainRanks.size()); isReady = true; } } @@ -168,7 +174,7 @@ public class SimilarDomainsService { } - public List getSimilarDomains(int domainId, int count) { + public List getSimilarDomains(int domainId, int count) { int domainIdx = domainIdToIdx.get(domainId); TIntList allIdsList = domainNeighbors[domainIdx]; @@ -204,23 +210,30 @@ public class SimilarDomainsService { .limit(count) .toArray(); - List domains = new ArrayList<>(); + List domains = new ArrayList<>(); + for (int idx : resultIds) { int id = domainIdxToId[idx]; - domains.add(new SimilarDomain( - new EdgeDomain(domainNames[idx]).toRootUrl(), - id, - getRelatedness(domainId, id), - domainRanks[idx], - indexedDomains.get(idx), - activeDomains.get(idx), - screenshotDomains.get(idx), - SimilarDomain.LinkType.find( - linkingIdsStoD.contains(idx), - linkingIdsDtoS.contains(idx) - ) - )); + if (domainNames[idx].length() > 32) + continue; + + var linkType = SimilarDomain.LinkType.find( + linkingIdsStoD.contains(idx), + linkingIdsDtoS.contains(idx) + ); + + domains.add(RpcSimilarDomain.newBuilder() + .setDomainId(id) + .setUrl(new EdgeDomain(domainNames[idx]).toRootUrl().toString()) + .setRelatedness(getRelatedness(domainId, id)) + .setRank(domainRanks.get(idx)) + .setIndexed(indexedDomains.contains(idx)) + .setActive(activeDomains.contains(idx)) + .setScreenshot(screenshotDomains.contains(idx)) + .setLinkType(RpcSimilarDomain.LINK_TYPE.valueOf(linkType.name())) + .build()); + } domains.removeIf(this::shouldRemove); @@ -228,21 +241,18 @@ public class SimilarDomainsService { return domains; } - private boolean shouldRemove(SimilarDomain domainResult) { - if (domainResult.url().domain.toString().length() > 32) - return true; - + private boolean shouldRemove(RpcSimilarDomain domainResult) { // Remove domains that have a relatively high likelihood of being dead links // or not very interesting - if (!(domainResult.indexed() && domainResult.active()) - && domainResult.relatedness() <= 50) + if (!(domainResult.getIndexed() && domainResult.getActive()) + && domainResult.getRelatedness() <= 50) { return true; } // Remove domains that are not very similar if there is no mutual link - if (domainResult.linkType() == SimilarDomain.LinkType.NONE - && domainResult.relatedness() <= 25) + if (domainResult.getLinkType() == RpcSimilarDomain.LINK_TYPE.NONE + && domainResult.getRelatedness() <= 25) return true; return false; @@ -251,7 +261,7 @@ public class SimilarDomainsService { private TIntSet getLinkingIdsDToS(int domainIdx) { var items = new TIntHashSet(); - for (int id : queryClient.getLinksFromDomain(domainIdxToId[domainIdx])) { + for (int id : linkGraphClient.getLinksFromDomain(domainIdxToId[domainIdx])) { items.add(domainIdToIdx.get(id)); } @@ -261,14 +271,14 @@ public class SimilarDomainsService { private TIntSet getLinkingIdsSToD(int domainIdx) { var items = new TIntHashSet(); - for (int id : queryClient.getLinksToDomain(domainIdxToId[domainIdx])) { + for (int id : linkGraphClient.getLinksToDomain(domainIdxToId[domainIdx])) { items.add(domainIdToIdx.get(id)); } return items; } - public List getLinkingDomains(int domainId, int count) { + public List getLinkingDomains(int domainId, int count) { int domainIdx = domainIdToIdx.get(domainId); TIntSet linkingIdsDtoS = getLinkingIdsDToS(domainIdx); @@ -286,7 +296,7 @@ public class SimilarDomainsService { double[] ranksArray = new double[idsArray.length]; for (int i = 0; i < idxArray.length; i++) { - ranksArray[i] = domainRanks[idxArray[i]]; + ranksArray[i] = this.domainRanks.get(idxArray[i]); } double[] relatednessArray = new double[idsArray.length]; for (int i = 0; i < idsArray.length; i++) { @@ -316,23 +326,29 @@ public class SimilarDomainsService { .limit(count) .toArray(); - List domains = new ArrayList<>(); + List domains = new ArrayList<>(); for (int id : resultIds) { int idx = domainIdToIdx.get(id); - domains.add(new SimilarDomain( - new EdgeDomain(domainNames[idx]).toRootUrl(), - id, - getRelatedness(domainId, id), - domainRanks[idx], - indexedDomains.get(idx), - activeDomains.get(idx), - screenshotDomains.get(idx), - SimilarDomain.LinkType.find( - linkingIdsStoD.contains(idx), - linkingIdsDtoS.contains(idx) - ) - )); + if (domainNames[idx].length() > 32) + continue; + + var linkType = SimilarDomain.LinkType.find( + linkingIdsStoD.contains(idx), + linkingIdsDtoS.contains(idx) + ); + + domains.add(RpcSimilarDomain.newBuilder() + .setDomainId(id) + .setUrl(new EdgeDomain(domainNames[idx]).toRootUrl().toString()) + .setRelatedness(getRelatedness(domainId, id)) + .setRank(ranksArray[id]) + .setIndexed(indexedDomains.contains(idx)) + .setActive(activeDomains.contains(idx)) + .setScreenshot(screenshotDomains.contains(idx)) + .setLinkType(RpcSimilarDomain.LINK_TYPE.valueOf(linkType.name())) + .build()); + } domains.removeIf(this::shouldRemove); diff --git a/code/features-index/result-ranking/build.gradle b/code/functions/link-graph/aggregate/build.gradle similarity index 58% rename from code/features-index/result-ranking/build.gradle rename to code/functions/link-graph/aggregate/build.gradle index a8cb48a4..52be585f 100644 --- a/code/features-index/result-ranking/build.gradle +++ b/code/functions/link-graph/aggregate/build.gradle @@ -1,6 +1,7 @@ plugins { id 'java' - id "de.undercouch.download" version "5.1.0" + + id 'application' id 'jvm-test-suite' } @@ -10,21 +11,28 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { - implementation project(':code:common:model') + implementation project(':code:functions:link-graph:api') + + implementation project(':code:common:config') implementation project(':code:common:service') - implementation project(':code:api:index-api') + implementation project(':code:common:model') + implementation project(':code:common:service-discovery') implementation libs.bundles.slf4j - implementation libs.guice + + implementation libs.prometheus + implementation libs.bundles.grpc implementation libs.notnull - implementation libs.trove + implementation libs.guice implementation libs.fastutil + implementation libs.bundles.mariadb testImplementation libs.bundles.slf4j.test testImplementation libs.bundles.junit testImplementation libs.mockito - testImplementation project(':code:libraries:term-frequency-dict') - testImplementation project(':code:libraries:braille-block-punch-cards') -} + +} diff --git a/code/functions/link-graph/aggregate/java/nu/marginalia/linkgraph/AggregateLinkGraphService.java b/code/functions/link-graph/aggregate/java/nu/marginalia/linkgraph/AggregateLinkGraphService.java new file mode 100644 index 00000000..7731b335 --- /dev/null +++ b/code/functions/link-graph/aggregate/java/nu/marginalia/linkgraph/AggregateLinkGraphService.java @@ -0,0 +1,101 @@ +package nu.marginalia.linkgraph; + +import com.google.inject.Inject; +import io.grpc.stub.StreamObserver; +import nu.marginalia.api.linkgraph.*; +import nu.marginalia.api.linkgraph.PartitionLinkGraphClient; +import nu.marginalia.api.linkgraph.LinkGraphApiGrpc; +import nu.marginalia.api.linkgraph.LinkGraphApiGrpc.LinkGraphApiBlockingStub; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +/** This class is responsible for aggregating the link graph data from the partitioned link graph + * services. + */ +public class AggregateLinkGraphService extends LinkGraphApiGrpc.LinkGraphApiImplBase { + private static final Logger logger = LoggerFactory.getLogger(AggregateLinkGraphService.class); + private final PartitionLinkGraphClient client; + + @Inject + public AggregateLinkGraphService(PartitionLinkGraphClient client) { + this.client = client; + } + + @Override + public void getAllLinks(Empty request, + StreamObserver responseObserver) { + + client.getChannelPool().call(LinkGraphApiBlockingStub::getAllLinks) + .run(Empty.getDefaultInstance()) + .forEach(iter -> iter.forEachRemaining(responseObserver::onNext)); + + responseObserver.onCompleted(); + } + + @Override + public void getLinksFromDomain(RpcDomainId request, + StreamObserver responseObserver) { + var rspBuilder = RpcDomainIdList.newBuilder(); + + client.getChannelPool().call(LinkGraphApiBlockingStub::getLinksFromDomain) + .run(request) + .stream() + .map(RpcDomainIdList::getDomainIdList) + .flatMap(List::stream) + .forEach(rspBuilder::addDomainId); + + responseObserver.onNext(rspBuilder.build()); + responseObserver.onCompleted(); + } + + @Override + public void getLinksToDomain(RpcDomainId request, + StreamObserver responseObserver) { + var rspBuilder = RpcDomainIdList.newBuilder(); + + + client.getChannelPool().call(LinkGraphApiBlockingStub::getLinksToDomain) + .run(request) + .stream() + .map(RpcDomainIdList::getDomainIdList) + .flatMap(List::stream) + .forEach(rspBuilder::addDomainId); + + responseObserver.onNext(rspBuilder.build()); + responseObserver.onCompleted(); + } + + @Override + public void countLinksFromDomain(RpcDomainId request, + StreamObserver responseObserver) { + int sum = client.getChannelPool().call(LinkGraphApiBlockingStub::countLinksFromDomain) + .run(request) + .stream() + .mapToInt(RpcDomainIdCount::getIdCount) + .sum(); + + var rspBuilder = RpcDomainIdCount.newBuilder(); + rspBuilder.setIdCount(sum); + responseObserver.onNext(rspBuilder.build()); + responseObserver.onCompleted(); + } + + @Override + public void countLinksToDomain(RpcDomainId request, + StreamObserver responseObserver) { + + int sum = client.getChannelPool().call(LinkGraphApiBlockingStub::countLinksToDomain) + .run(request) + .stream() + .mapToInt(RpcDomainIdCount::getIdCount) + .sum(); + + var rspBuilder = RpcDomainIdCount.newBuilder(); + rspBuilder.setIdCount(sum); + responseObserver.onNext(rspBuilder.build()); + responseObserver.onCompleted(); + } + +} diff --git a/code/functions/link-graph/aggregate/readme.md b/code/functions/link-graph/aggregate/readme.md new file mode 100644 index 00000000..a9429ca5 --- /dev/null +++ b/code/functions/link-graph/aggregate/readme.md @@ -0,0 +1,3 @@ +This module is responsible for aggregating the link graph from the partitioned services, and exposing a unified +view of the link graph. It does not keep any data or state, but instead delegates to the partitioned +services. \ No newline at end of file diff --git a/code/api/query-api/build.gradle b/code/functions/link-graph/api/build.gradle similarity index 71% rename from code/api/query-api/build.gradle rename to code/functions/link-graph/api/build.gradle index ed893ae1..ffeab7f8 100644 --- a/code/api/query-api/build.gradle +++ b/code/functions/link-graph/api/build.gradle @@ -1,5 +1,7 @@ plugins { id 'java' + + id "com.google.protobuf" version "0.9.4" id 'jvm-test-suite' } @@ -9,26 +11,26 @@ java { } } +jar.archiveBaseName = 'link-graph-api' + +apply from: "$rootProject.projectDir/protobuf.gradle" +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:model') - implementation project(':code:api:index-api') implementation project(':code:common:config') - implementation project(':code:libraries:message-queue') - implementation project(':code:features-index:index-query') implementation project(':code:common:service-discovery') - implementation project(':code:common:service-client') implementation libs.bundles.slf4j - implementation libs.roaringbitmap implementation libs.prometheus implementation libs.notnull - implementation libs.trove implementation libs.guice - implementation libs.rxjava implementation libs.gson - implementation libs.bundles.grpc implementation libs.protobuf + implementation libs.roaringbitmap + implementation libs.javax.annotation + implementation libs.bundles.grpc testImplementation libs.bundles.slf4j.test testImplementation libs.bundles.junit diff --git a/code/functions/link-graph/api/java/nu/marginalia/api/linkgraph/AggregateLinkGraphClient.java b/code/functions/link-graph/api/java/nu/marginalia/api/linkgraph/AggregateLinkGraphClient.java new file mode 100644 index 00000000..4e9c9a3d --- /dev/null +++ b/code/functions/link-graph/api/java/nu/marginalia/api/linkgraph/AggregateLinkGraphClient.java @@ -0,0 +1,138 @@ +package nu.marginalia.api.linkgraph; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import nu.marginalia.api.linkgraph.LinkGraphApiGrpc; +import nu.marginalia.service.client.GrpcChannelPoolFactory; +import nu.marginalia.service.client.GrpcSingleNodeChannelPool; +import nu.marginalia.service.discovery.property.ServiceKey; +import nu.marginalia.service.discovery.property.ServicePartition; +import org.roaringbitmap.longlong.PeekableLongIterator; +import org.roaringbitmap.longlong.Roaring64Bitmap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.time.Duration; +import java.util.List; + +import static nu.marginalia.api.linkgraph.LinkGraphApiGrpc.*; + +@Singleton +public class AggregateLinkGraphClient { + private static final Logger logger = LoggerFactory.getLogger(AggregateLinkGraphClient.class); + + private final GrpcSingleNodeChannelPool channelPool; + + @Inject + public AggregateLinkGraphClient(GrpcChannelPoolFactory factory) { + this.channelPool = factory.createSingle( + ServiceKey.forGrpcApi(LinkGraphApiGrpc.class, ServicePartition.any()), + LinkGraphApiGrpc::newBlockingStub); + } + + + public AllLinks getAllDomainLinks() { + AllLinks links = new AllLinks(); + + channelPool.call(LinkGraphApiBlockingStub::getAllLinks) + .run(Empty.getDefaultInstance()) + .forEachRemaining(pairs -> { + for (int i = 0; i < pairs.getDestIdsCount(); i++) { + links.add(pairs.getSourceIds(i), pairs.getDestIds(i)); + } + }); + + return links; + } + + public List getLinksToDomain(int domainId) { + try { + return channelPool.call(LinkGraphApiBlockingStub::getLinksToDomain) + .run(RpcDomainId.newBuilder().setDomainId(domainId).build()) + .getDomainIdList() + .stream() + .sorted() + .toList(); + } + catch (Exception e) { + logger.error("API Exception", e); + return List.of(); + } + } + + public List getLinksFromDomain(int domainId) { + try { + return channelPool.call(LinkGraphApiBlockingStub::getLinksFromDomain) + .run(RpcDomainId.newBuilder().setDomainId(domainId).build()) + .getDomainIdList() + .stream() + .sorted() + .toList(); + + } + catch (Exception e) { + logger.error("API Exception", e); + return List.of(); + } + } + + public int countLinksToDomain(int domainId) { + try { + return channelPool.call(LinkGraphApiBlockingStub::countLinksToDomain) + .run(RpcDomainId.newBuilder().setDomainId(domainId).build()) + .getIdCount(); + + } + catch (Exception e) { + logger.error("API Exception", e); + return 0; + } + } + + public int countLinksFromDomain(int domainId) { + try { + return channelPool.call(LinkGraphApiBlockingStub::countLinksFromDomain) + .run(RpcDomainId.newBuilder().setDomainId(domainId).build()) + .getIdCount(); + } + catch (Exception e) { + logger.error("API Exception", e); + return 0; + } + } + + public boolean waitReady(Duration duration) throws InterruptedException { + return channelPool.awaitChannel(duration); + } + + public static class AllLinks { + private final Roaring64Bitmap sourceToDest = new Roaring64Bitmap(); + + public void add(int source, int dest) { + sourceToDest.add(Integer.toUnsignedLong(source) << 32 | Integer.toUnsignedLong(dest)); + } + + public Iterator iterator() { + return new Iterator(); + } + + public class Iterator { + private final PeekableLongIterator base = sourceToDest.getLongIterator(); + long val = Long.MIN_VALUE; + + public boolean advance() { + if (base.hasNext()) { + val = base.next(); + return true; + } + return false; + } + public int source() { + return (int) (val >>> 32); + } + public int dest() { + return (int) (val & 0xFFFF_FFFFL); + } + } + } +} diff --git a/code/functions/link-graph/api/java/nu/marginalia/api/linkgraph/PartitionLinkGraphClient.java b/code/functions/link-graph/api/java/nu/marginalia/api/linkgraph/PartitionLinkGraphClient.java new file mode 100644 index 00000000..e6fa5ce8 --- /dev/null +++ b/code/functions/link-graph/api/java/nu/marginalia/api/linkgraph/PartitionLinkGraphClient.java @@ -0,0 +1,29 @@ +package nu.marginalia.api.linkgraph; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import nu.marginalia.service.client.GrpcChannelPoolFactory; +import nu.marginalia.service.client.GrpcMultiNodeChannelPool; +import nu.marginalia.service.discovery.property.ServiceKey; +import nu.marginalia.service.discovery.property.ServicePartition; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Singleton +public class PartitionLinkGraphClient { + private static final Logger logger = LoggerFactory.getLogger(PartitionLinkGraphClient.class); + + private final GrpcMultiNodeChannelPool channelPool; + + @Inject + public PartitionLinkGraphClient(GrpcChannelPoolFactory factory) { + this.channelPool = factory.createMulti( + ServiceKey.forGrpcApi(LinkGraphApiGrpc.class, ServicePartition.multi()), + LinkGraphApiGrpc::newBlockingStub); + } + + public GrpcMultiNodeChannelPool getChannelPool() { + return channelPool; + } + +} diff --git a/code/functions/link-graph/api/src/main/protobuf/link-graph.proto b/code/functions/link-graph/api/src/main/protobuf/link-graph.proto new file mode 100644 index 00000000..753c8e1a --- /dev/null +++ b/code/functions/link-graph/api/src/main/protobuf/link-graph.proto @@ -0,0 +1,29 @@ +syntax="proto3"; +package nu.marginalia.api.linkgraph; + +option java_package="nu.marginalia.api.linkgraph"; +option java_multiple_files=true; + +service LinkGraphApi { + rpc getAllLinks(Empty) returns (stream RpcDomainIdPairs) {} + rpc getLinksFromDomain(RpcDomainId) returns (RpcDomainIdList) {} + rpc getLinksToDomain(RpcDomainId) returns (RpcDomainIdList) {} + rpc countLinksFromDomain(RpcDomainId) returns (RpcDomainIdCount) {} + rpc countLinksToDomain(RpcDomainId) returns (RpcDomainIdCount) {} +} + +message RpcDomainId { + int32 domainId = 1; +} +message RpcDomainIdList { + repeated int32 domainId = 1 [packed=true]; +} +message RpcDomainIdCount { + int32 idCount = 1; +} +message RpcDomainIdPairs { + repeated int32 sourceIds = 1 [packed=true]; + repeated int32 destIds = 2 [packed=true]; +} + +message Empty {} \ No newline at end of file diff --git a/code/functions/link-graph/partition/build.gradle b/code/functions/link-graph/partition/build.gradle new file mode 100644 index 00000000..43a2e654 --- /dev/null +++ b/code/functions/link-graph/partition/build.gradle @@ -0,0 +1,44 @@ +plugins { + id 'java' + + id 'application' + id 'jvm-test-suite' +} + +java { + toolchain { + languageVersion.set(JavaLanguageVersion.of(21)) + } +} + +apply from: "$rootProject.projectDir/srcsets.gradle" + +dependencies { + implementation project(':code:functions:link-graph:api') + + implementation project(':code:common:config') + implementation project(':code:common:service') + implementation project(':code:common:model') + implementation project(':code:common:linkdb') + implementation project(':code:common:db') + implementation project(':code:common:service-discovery') + + implementation libs.bundles.slf4j + + implementation libs.prometheus + implementation libs.bundles.grpc + implementation libs.notnull + implementation libs.guice + implementation libs.spark + implementation libs.opencsv + implementation libs.trove + implementation libs.fastutil + implementation libs.bundles.gson + implementation libs.bundles.mariadb + + testImplementation libs.bundles.slf4j.test + testImplementation libs.bundles.junit + testImplementation libs.mockito + + +} diff --git a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/DomainLinkDb.java b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/DomainLinks.java similarity index 72% rename from code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/DomainLinkDb.java rename to code/functions/link-graph/partition/java/nu/marginalia/linkgraph/DomainLinks.java index bb7c43c1..ed0be39e 100644 --- a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/DomainLinkDb.java +++ b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/DomainLinks.java @@ -1,16 +1,13 @@ -package nu.marginalia.linkdb.dlinks; +package nu.marginalia.linkgraph; import gnu.trove.list.array.TIntArrayList; -import java.io.IOException; import java.nio.file.Path; -import java.sql.SQLException; -import java.util.Arrays; -/** A database of source-destination pairs of domain IDs. The database is loaded into memory from - * a source. The database is then kept in memory, reloading it upon switchInput(). +/** A repository of source-destination pairs of domain IDs. The database is loaded into memory from + * a source. The data is then kept in memory, reloading it upon switchInput(). */ -public interface DomainLinkDb { +public interface DomainLinks { /** Replace the current db file with the provided file. The provided file will be deleted. * The in-memory database MAY be updated to reflect the change. * */ diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexDomainLinksService.java b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/PartitionLinkGraphService.java similarity index 70% rename from code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexDomainLinksService.java rename to code/functions/link-graph/partition/java/nu/marginalia/linkgraph/PartitionLinkGraphService.java index b368d289..814f7ffb 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexDomainLinksService.java +++ b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/PartitionLinkGraphService.java @@ -1,25 +1,28 @@ -package nu.marginalia.index.svc; +package nu.marginalia.linkgraph; import com.google.inject.Inject; import io.grpc.stub.StreamObserver; -import nu.marginalia.index.api.*; -import nu.marginalia.linkdb.dlinks.DomainLinkDb; +import nu.marginalia.api.linkgraph.*; +import nu.marginalia.api.linkgraph.Empty; +import nu.marginalia.api.linkgraph.LinkGraphApiGrpc; -/** GRPC service for interrogating domain links +/** GRPC service for interrogating domain links for a single partition. For accessing the data + * in the application, the AggregateLinkGraphService should be used instead via the + * AggregateLinkGraphClient. */ -public class IndexDomainLinksService extends IndexDomainLinksApiGrpc.IndexDomainLinksApiImplBase { - private final DomainLinkDb domainLinkDb; +public class PartitionLinkGraphService extends LinkGraphApiGrpc.LinkGraphApiImplBase { + private final DomainLinks domainLinks; @Inject - public IndexDomainLinksService(DomainLinkDb domainLinkDb) { - this.domainLinkDb = domainLinkDb; + public PartitionLinkGraphService(DomainLinks domainLinks) { + this.domainLinks = domainLinks; } - public void getAllLinks(nu.marginalia.index.api.Empty request, - io.grpc.stub.StreamObserver responseObserver) { + public void getAllLinks(Empty request, + io.grpc.stub.StreamObserver responseObserver) { try (var idsConverter = new AllIdsResponseConverter(responseObserver)) { - domainLinkDb.forEach(idsConverter::accept); + domainLinks.forEach(idsConverter::accept); } responseObserver.onCompleted(); @@ -58,7 +61,7 @@ public class IndexDomainLinksService extends IndexDomainLinksApiGrpc.IndexDomain public void getLinksFromDomain(RpcDomainId request, StreamObserver responseObserver) { - var links = domainLinkDb.findDestinations(request.getDomainId()); + var links = domainLinks.findDestinations(request.getDomainId()); var rspBuilder = RpcDomainIdList.newBuilder(); for (int i = 0; i < links.size(); i++) { @@ -73,7 +76,7 @@ public class IndexDomainLinksService extends IndexDomainLinksApiGrpc.IndexDomain public void getLinksToDomain(RpcDomainId request, StreamObserver responseObserver) { - var links = domainLinkDb.findSources(request.getDomainId()); + var links = domainLinks.findSources(request.getDomainId()); var rspBuilder = RpcDomainIdList.newBuilder(); for (int i = 0; i < links.size(); i++) { @@ -87,7 +90,7 @@ public class IndexDomainLinksService extends IndexDomainLinksApiGrpc.IndexDomain public void countLinksFromDomain(RpcDomainId request, StreamObserver responseObserver) { responseObserver.onNext(RpcDomainIdCount.newBuilder() - .setIdCount(domainLinkDb.countDestinations(request.getDomainId())) + .setIdCount(domainLinks.countDestinations(request.getDomainId())) .build()); responseObserver.onCompleted(); } @@ -95,7 +98,7 @@ public class IndexDomainLinksService extends IndexDomainLinksApiGrpc.IndexDomain public void countLinksToDomain(RpcDomainId request, StreamObserver responseObserver) { responseObserver.onNext(RpcDomainIdCount.newBuilder() - .setIdCount(domainLinkDb.countSources(request.getDomainId())) + .setIdCount(domainLinks.countSources(request.getDomainId())) .build()); responseObserver.onCompleted(); } diff --git a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/DelayingDomainLinkDb.java b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/impl/DelayingDomainLinks.java similarity index 85% rename from code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/DelayingDomainLinkDb.java rename to code/functions/link-graph/partition/java/nu/marginalia/linkgraph/impl/DelayingDomainLinks.java index 3d2c7270..cf0ad162 100644 --- a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/DelayingDomainLinkDb.java +++ b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/impl/DelayingDomainLinks.java @@ -1,7 +1,8 @@ -package nu.marginalia.linkdb.dlinks; +package nu.marginalia.linkgraph.impl; import com.google.inject.name.Named; import gnu.trove.list.array.TIntArrayList; +import nu.marginalia.linkgraph.DomainLinks; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -14,13 +15,13 @@ import java.nio.file.StandardCopyOption; * is not yet loaded. This speeds up the startup of the index service, as the database is * loaded in a separate thread. */ -public class DelayingDomainLinkDb implements DomainLinkDb { - private final static Logger logger = LoggerFactory.getLogger(DelayingDomainLinkDb.class); +public class DelayingDomainLinks implements DomainLinks { + private final static Logger logger = LoggerFactory.getLogger(DelayingDomainLinks.class); - private volatile DomainLinkDb currentDb; + private volatile DomainLinks currentDb; private final Path filename; - public DelayingDomainLinkDb(@Named("domain-linkdb-file") Path filename) { + public DelayingDomainLinks(@Named("domain-linkdb-file") Path filename) { this.filename = filename; // Load the database in a separate thread, so that the constructor can return @@ -29,7 +30,7 @@ public class DelayingDomainLinkDb implements DomainLinkDb { Thread.ofPlatform().start(() -> { try { - currentDb = new FileDomainLinkDb(filename); + currentDb = new FileDomainLinks(filename); logger.info("Loaded linkdb"); } catch (Exception e) { logger.error("Failed to load linkdb", e); @@ -43,7 +44,7 @@ public class DelayingDomainLinkDb implements DomainLinkDb { Thread.ofPlatform().start(() -> { try { - currentDb = new FileDomainLinkDb(filename); + currentDb = new FileDomainLinks(filename); } catch (IOException e) { logger.error("Failed to load linkdb", e); } diff --git a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/FileDomainLinkDb.java b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/impl/FileDomainLinks.java similarity index 90% rename from code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/FileDomainLinkDb.java rename to code/functions/link-graph/partition/java/nu/marginalia/linkgraph/impl/FileDomainLinks.java index 0fda3467..68281229 100644 --- a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/FileDomainLinkDb.java +++ b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/impl/FileDomainLinks.java @@ -1,7 +1,9 @@ -package nu.marginalia.linkdb.dlinks; +package nu.marginalia.linkgraph.impl; import com.google.inject.name.Named; import gnu.trove.list.array.TIntArrayList; +import nu.marginalia.linkgraph.DomainLinks; +import nu.marginalia.linkgraph.io.DomainLinksLoader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -14,13 +16,13 @@ import java.util.Arrays; /** Canonical DomainLinkDb implementation. The database is loaded into memory from * a file. The database is then kept in memory, reloading it upon switchInput(). */ -public class FileDomainLinkDb implements DomainLinkDb { - private static final Logger logger = LoggerFactory.getLogger(FileDomainLinkDb.class); +public class FileDomainLinks implements DomainLinks { + private static final Logger logger = LoggerFactory.getLogger(FileDomainLinks.class); private final Path filename; private volatile long[] sourceToDest = new long[0]; private volatile long[] destToSource = new long[0]; - public FileDomainLinkDb(@Named("domain-linkdb-file") Path filename) throws IOException { + public FileDomainLinks(@Named("domain-linkdb-file") Path filename) throws IOException { this.filename = filename; if (Files.exists(filename)) { @@ -35,7 +37,7 @@ public class FileDomainLinkDb implements DomainLinkDb { } public void loadInput(Path filename) throws IOException { - try (var loader = new DomainLinkDbLoader(filename)) { + try (var loader = new DomainLinksLoader(filename)) { int size = loader.size(); var newSourceToDest = new long[size]; diff --git a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/DomainLinkDbLoader.java b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/io/DomainLinksLoader.java similarity index 84% rename from code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/DomainLinkDbLoader.java rename to code/functions/link-graph/partition/java/nu/marginalia/linkgraph/io/DomainLinksLoader.java index 83af733d..4ab10414 100644 --- a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/DomainLinkDbLoader.java +++ b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/io/DomainLinksLoader.java @@ -1,17 +1,17 @@ -package nu.marginalia.linkdb.dlinks; +package nu.marginalia.linkgraph.io; import java.io.DataInputStream; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -public class DomainLinkDbLoader implements AutoCloseable { +public class DomainLinksLoader implements AutoCloseable { private final DataInputStream stream; private final Path filename; private long nextVal; - public DomainLinkDbLoader(Path filename) throws IOException { + public DomainLinksLoader(Path filename) throws IOException { this.stream = new DataInputStream(Files.newInputStream(filename)); this.filename = filename; } diff --git a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/DomainLinkDbWriter.java b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/io/DomainLinksWriter.java similarity index 82% rename from code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/DomainLinkDbWriter.java rename to code/functions/link-graph/partition/java/nu/marginalia/linkgraph/io/DomainLinksWriter.java index 99830443..0744bcf6 100644 --- a/code/common/linkdb/src/main/java/nu/marginalia/linkdb/dlinks/DomainLinkDbWriter.java +++ b/code/functions/link-graph/partition/java/nu/marginalia/linkgraph/io/DomainLinksWriter.java @@ -1,4 +1,4 @@ -package nu.marginalia.linkdb.dlinks; +package nu.marginalia.linkgraph.io; import java.io.DataOutputStream; import java.io.IOException; @@ -6,10 +6,10 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; -public class DomainLinkDbWriter implements AutoCloseable { +public class DomainLinksWriter implements AutoCloseable { private final DataOutputStream stream; - public DomainLinkDbWriter(Path fileName) throws IOException { + public DomainLinksWriter(Path fileName) throws IOException { this.stream = new DataOutputStream(Files.newOutputStream(fileName, StandardOpenOption.CREATE, StandardOpenOption.WRITE, diff --git a/code/functions/link-graph/partition/readme.md b/code/functions/link-graph/partition/readme.md new file mode 100644 index 00000000..cb116bc0 --- /dev/null +++ b/code/functions/link-graph/partition/readme.md @@ -0,0 +1,11 @@ +The link graph partition module is responsible for knowledge about the link graph +for a single index node. It's based on in-memory data structures, and is updated +atomically from file. + +## Central Classes + +* [PartitionLinkGraphService](java/nu/marginalia/linkgraph/PartitionLinkGraphService.java) +* [DomainLink](java/nu/marginalia/linkgraph/DomainLinks.java) +* * [FileDomainLinks](java/nu/marginalia/linkgraph/impl/FileDomainLinks.java) +* [DomainLinksWriter](java/nu/marginalia/linkgraph/io/DomainLinksWriter.java) +* [DomainLinksLoader](java/nu/marginalia/linkgraph/io/DomainLinksLoader.java) \ No newline at end of file diff --git a/code/common/linkdb/src/test/java/nu/marginalia/linkdb/DomainLinkDbTest.java b/code/functions/link-graph/partition/test/nu/marginalia/linkgraph/DomainLinkDbTest.java similarity index 84% rename from code/common/linkdb/src/test/java/nu/marginalia/linkdb/DomainLinkDbTest.java rename to code/functions/link-graph/partition/test/nu/marginalia/linkgraph/DomainLinkDbTest.java index 6db4a8cf..e8b0d3d3 100644 --- a/code/common/linkdb/src/test/java/nu/marginalia/linkdb/DomainLinkDbTest.java +++ b/code/functions/link-graph/partition/test/nu/marginalia/linkgraph/DomainLinkDbTest.java @@ -1,7 +1,7 @@ -package nu.marginalia.linkdb; +package nu.marginalia.linkgraph; -import nu.marginalia.linkdb.dlinks.DomainLinkDbLoader; -import nu.marginalia.linkdb.dlinks.DomainLinkDbWriter; +import nu.marginalia.linkgraph.io.DomainLinksLoader; +import nu.marginalia.linkgraph.io.DomainLinksWriter; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; @@ -24,7 +24,7 @@ public class DomainLinkDbTest { @Test public void testWriteRead() { - try (var writer = new DomainLinkDbWriter(fileName)) { + try (var writer = new DomainLinksWriter(fileName)) { writer.write(1, 2); writer.write(2, 3); writer.write(3, 4); @@ -33,7 +33,7 @@ public class DomainLinkDbTest { throw new RuntimeException(ex); } - try (var reader = new DomainLinkDbLoader(fileName)) { + try (var reader = new DomainLinksLoader(fileName)) { Assertions.assertTrue(reader.next()); Assertions.assertEquals(1, reader.getSource()); Assertions.assertEquals(2, reader.getDest()); diff --git a/code/functions/link-graph/readme.md b/code/functions/link-graph/readme.md new file mode 100644 index 00000000..b906978d --- /dev/null +++ b/code/functions/link-graph/readme.md @@ -0,0 +1,9 @@ +The link graph subsystem is responsible for knowledge about the link graph. + +A SQL database is not very well suited for this, principally it's too slow to update, +instead the link graph is stored in memory, and atomically updated from file. The storage +aspect is handled by the [common/linkdb](../../common/linkdb/) component. + +The link graph subsystem has two components, one which injects into the partitioned services, +e.g. index or execution, and one which aggregates the results from the partitioned services, +and exposes a unified view of the link graph. \ No newline at end of file diff --git a/code/api/assistant-api/build.gradle b/code/functions/math/api/build.gradle similarity index 67% rename from code/api/assistant-api/build.gradle rename to code/functions/math/api/build.gradle index 02bc8780..779c4508 100644 --- a/code/api/assistant-api/build.gradle +++ b/code/functions/math/api/build.gradle @@ -1,7 +1,7 @@ plugins { id 'java' - + id "com.google.protobuf" version "0.9.4" id 'jvm-test-suite' } @@ -10,19 +10,26 @@ java { languageVersion.set(JavaLanguageVersion.of(21)) } } + +jar.archiveBaseName = 'math-api' + +apply from: "$rootProject.projectDir/protobuf.gradle" +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:model') implementation project(':code:common:config') implementation project(':code:common:service-discovery') - implementation project(':code:common:service-client') implementation libs.bundles.slf4j implementation libs.prometheus implementation libs.notnull implementation libs.guice - implementation libs.rxjava implementation libs.gson + implementation libs.protobuf + implementation libs.javax.annotation + implementation libs.bundles.grpc testImplementation libs.bundles.slf4j.test testImplementation libs.bundles.junit diff --git a/code/functions/math/api/java/nu/marginalia/api/math/MathClient.java b/code/functions/math/api/java/nu/marginalia/api/math/MathClient.java new file mode 100644 index 00000000..ee0a55cd --- /dev/null +++ b/code/functions/math/api/java/nu/marginalia/api/math/MathClient.java @@ -0,0 +1,89 @@ +package nu.marginalia.api.math; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import nu.marginalia.service.client.GrpcChannelPoolFactory; +import nu.marginalia.service.client.GrpcSingleNodeChannelPool; +import nu.marginalia.service.discovery.property.ServiceKey; +import nu.marginalia.service.discovery.property.ServicePartition; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.time.Duration; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.*; + +import nu.marginalia.api.math.model.*; +import nu.marginalia.api.math.MathProtobufCodec.*; + + +@Singleton +public class MathClient { + private static final Logger logger = LoggerFactory.getLogger(MathClient.class); + + private final GrpcSingleNodeChannelPool channelPool; + private final ExecutorService executor = Executors.newWorkStealingPool(8); + + @Inject + public MathClient(GrpcChannelPoolFactory factory) { + this.channelPool = factory.createSingle( + ServiceKey.forGrpcApi(MathApiGrpc.class, ServicePartition.any()), + MathApiGrpc::newBlockingStub); + + } + + public Future dictionaryLookup(String word) { + return channelPool.call(MathApiGrpc.MathApiBlockingStub::dictionaryLookup) + .async(executor) + .run(DictionaryLookup.createRequest(word)) + .thenApply(DictionaryLookup::convertResponse); + } + + @SuppressWarnings("unchecked") + public Future> spellCheck(String word) { + return channelPool.call(MathApiGrpc.MathApiBlockingStub::spellCheck) + .async(executor) + .run(SpellCheck.createRequest(word)) + .thenApply(SpellCheck::convertResponse); + } + + public Map> spellCheck(List words, Duration timeout) throws InterruptedException { + List requests = words.stream().map(SpellCheck::createRequest).toList(); + + var future = channelPool.call(MathApiGrpc.MathApiBlockingStub::spellCheck) + .async(executor) + .runFor(requests); + + try { + var results = future.get(); + Map> map = new HashMap<>(); + for (int i = 0; i < words.size(); i++) { + map.put(words.get(i), SpellCheck.convertResponse(results.get(i))); + } + return map; + } + catch (ExecutionException e) { + throw new RuntimeException(e); + } + } + + public Future unitConversion(String value, String from, String to) { + return channelPool.call(MathApiGrpc.MathApiBlockingStub::unitConversion) + .async(executor) + .run(UnitConversion.createRequest(from, to, value)) + .thenApply(UnitConversion::convertResponse); + } + + public Future evalMath(String expression) { + return channelPool.call(MathApiGrpc.MathApiBlockingStub::evalMath) + .async(executor) + .run(EvalMath.createRequest(expression)) + .thenApply(EvalMath::convertResponse); + } + + public boolean isAccepting() { + return channelPool.hasChannel(); + } +} diff --git a/code/functions/math/api/java/nu/marginalia/api/math/MathProtobufCodec.java b/code/functions/math/api/java/nu/marginalia/api/math/MathProtobufCodec.java new file mode 100644 index 00000000..2b865b21 --- /dev/null +++ b/code/functions/math/api/java/nu/marginalia/api/math/MathProtobufCodec.java @@ -0,0 +1,66 @@ +package nu.marginalia.api.math; + +import nu.marginalia.api.math.model.DictionaryEntry; +import nu.marginalia.api.math.model.DictionaryResponse; + +import java.util.List; + +public class MathProtobufCodec { + + public static class DictionaryLookup { + public static RpcDictionaryLookupRequest createRequest(String word) { + return RpcDictionaryLookupRequest.newBuilder() + .setWord(word) + .build(); + } + public static DictionaryResponse convertResponse(RpcDictionaryLookupResponse rsp) { + return new DictionaryResponse( + rsp.getWord(), + rsp.getEntriesList().stream().map(DictionaryLookup::convertResponseEntry).toList() + ); + } + + private static DictionaryEntry convertResponseEntry(RpcDictionaryEntry e) { + return new DictionaryEntry(e.getType(), e.getWord(), e.getDefinition()); + } + } + + public static class SpellCheck { + public static RpcSpellCheckRequest createRequest(String text) { + return RpcSpellCheckRequest.newBuilder() + .setText(text) + .build(); + } + + public static List convertResponse(RpcSpellCheckResponse rsp) { + return rsp.getSuggestionsList(); + } + } + + public static class UnitConversion { + public static RpcUnitConversionRequest createRequest(String from, String to, String unit) { + return RpcUnitConversionRequest.newBuilder() + .setFrom(from) + .setTo(to) + .setUnit(unit) + .build(); + } + + public static String convertResponse(RpcUnitConversionResponse rsp) { + return rsp.getResult(); + } + } + + public static class EvalMath { + public static RpcEvalMathRequest createRequest(String expression) { + return RpcEvalMathRequest.newBuilder() + .setExpression(expression) + .build(); + } + + public static String convertResponse(RpcEvalMathResponse rsp) { + return rsp.getResult(); + } + } + +} diff --git a/code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/model/DictionaryEntry.java b/code/functions/math/api/java/nu/marginalia/api/math/model/DictionaryEntry.java similarity index 84% rename from code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/model/DictionaryEntry.java rename to code/functions/math/api/java/nu/marginalia/api/math/model/DictionaryEntry.java index c40ea97f..084e2e47 100644 --- a/code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/model/DictionaryEntry.java +++ b/code/functions/math/api/java/nu/marginalia/api/math/model/DictionaryEntry.java @@ -1,4 +1,4 @@ -package nu.marginalia.assistant.client.model; +package nu.marginalia.api.math.model; import lombok.AllArgsConstructor; import lombok.Getter; diff --git a/code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/model/DictionaryResponse.java b/code/functions/math/api/java/nu/marginalia/api/math/model/DictionaryResponse.java similarity index 86% rename from code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/model/DictionaryResponse.java rename to code/functions/math/api/java/nu/marginalia/api/math/model/DictionaryResponse.java index 03fbd2e6..0668b6b2 100644 --- a/code/api/assistant-api/src/main/java/nu/marginalia/assistant/client/model/DictionaryResponse.java +++ b/code/functions/math/api/java/nu/marginalia/api/math/model/DictionaryResponse.java @@ -1,4 +1,4 @@ -package nu.marginalia.assistant.client.model; +package nu.marginalia.api.math.model; import lombok.AllArgsConstructor; import lombok.Getter; diff --git a/code/functions/math/api/src/main/protobuf/math-api.proto b/code/functions/math/api/src/main/protobuf/math-api.proto new file mode 100644 index 00000000..b29732ee --- /dev/null +++ b/code/functions/math/api/src/main/protobuf/math-api.proto @@ -0,0 +1,57 @@ +syntax="proto3"; +package nu.marginalia.api.math; + +option java_package="nu.marginalia.api.math"; +option java_multiple_files=true; + +service MathApi { + /** Looks up a word in the dictionary. */ + rpc dictionaryLookup(RpcDictionaryLookupRequest) returns (RpcDictionaryLookupResponse) {} + /** Checks the spelling of a text. */ + rpc spellCheck(RpcSpellCheckRequest) returns (RpcSpellCheckResponse) {} + /** Converts a unit from one to another. */ + rpc unitConversion(RpcUnitConversionRequest) returns (RpcUnitConversionResponse) {} + /** Evaluates a mathematical expression. */ + rpc evalMath(RpcEvalMathRequest) returns (RpcEvalMathResponse) {} +} + +message RpcDictionaryLookupRequest { + string word = 1; +} + +message RpcDictionaryLookupResponse { + string word = 1; + repeated RpcDictionaryEntry entries = 2; +} + +message RpcDictionaryEntry { + string type = 1; + string word = 2; + string definition = 3; +} + +message RpcSpellCheckRequest { + string text = 1; +} + +message RpcSpellCheckResponse { + repeated string suggestions = 1; +} + +message RpcUnitConversionRequest { + string unit = 1; + string from = 2; + string to = 3; +} + +message RpcUnitConversionResponse { + string result = 1; +} + +message RpcEvalMathRequest { + string expression = 1; +} + +message RpcEvalMathResponse { + string result = 1; +} diff --git a/code/common/service-client/build.gradle b/code/functions/math/build.gradle similarity index 56% rename from code/common/service-client/build.gradle rename to code/functions/math/build.gradle index da1b1a71..c1fe528d 100644 --- a/code/common/service-client/build.gradle +++ b/code/functions/math/build.gradle @@ -1,6 +1,5 @@ plugins { id 'java' - id "de.undercouch.download" version "5.1.0" id 'jvm-test-suite' } @@ -10,25 +9,22 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { - implementation project(':code:common:service-discovery') + implementation project(':third-party:symspell') + implementation project(':code:functions:math:api') implementation libs.bundles.slf4j - implementation libs.commons.lang3 - implementation libs.spark - implementation libs.guice - implementation libs.rxjava - - implementation libs.okhttp3 - implementation libs.bundles.httpcomponents - - implementation libs.bundles.gson + implementation libs.prometheus implementation libs.bundles.grpc - implementation libs.protobuf - - implementation libs.bundles.prometheus - + implementation libs.notnull + implementation libs.guice + implementation libs.opencsv + implementation libs.trove + implementation libs.fastutil + implementation libs.bundles.gson implementation libs.bundles.mariadb testImplementation libs.bundles.slf4j.test diff --git a/code/functions/math/java/nu/marginalia/functions/math/MathGrpcService.java b/code/functions/math/java/nu/marginalia/functions/math/MathGrpcService.java new file mode 100644 index 00000000..66c0ffe5 --- /dev/null +++ b/code/functions/math/java/nu/marginalia/functions/math/MathGrpcService.java @@ -0,0 +1,92 @@ +package nu.marginalia.functions.math; + +import com.google.inject.Inject; +import io.grpc.stub.StreamObserver; +import nu.marginalia.api.math.*; +import nu.marginalia.functions.math.dict.DictionaryService; +import nu.marginalia.functions.math.dict.SpellChecker; +import nu.marginalia.functions.math.eval.MathParser; +import nu.marginalia.functions.math.eval.Units; + +public class MathGrpcService extends MathApiGrpc.MathApiImplBase { + + private final DictionaryService dictionaryService; + private final SpellChecker spellChecker; + private final Units units; + private final MathParser mathParser; + + @Inject + public MathGrpcService(DictionaryService dictionaryService, SpellChecker spellChecker, Units units, MathParser mathParser) + { + + this.dictionaryService = dictionaryService; + this.spellChecker = spellChecker; + this.units = units; + this.mathParser = mathParser; + } + + @Override + public void dictionaryLookup(RpcDictionaryLookupRequest request, + StreamObserver responseObserver) + { + var definition = dictionaryService.define(request.getWord()); + + var responseBuilder = RpcDictionaryLookupResponse + .newBuilder() + .setWord(request.getWord()); + + for (var def : definition.entries) { + responseBuilder.addEntries( + RpcDictionaryEntry.newBuilder() + .setWord(def.word) + .setDefinition(def.definition) + .setType(def.type) + ); + } + + responseObserver.onNext(responseBuilder.build()); + responseObserver.onCompleted(); + } + + @Override + public void spellCheck(RpcSpellCheckRequest request, + StreamObserver responseObserver) + { + var result = spellChecker.correct(request.getText()); + var response = RpcSpellCheckResponse.newBuilder() + .addAllSuggestions(result) + .build(); + + responseObserver.onNext(response); + responseObserver.onCompleted(); + } + + @Override + public void unitConversion(RpcUnitConversionRequest request, StreamObserver responseObserver) { + var res = units.convert(request.getUnit(), + request.getFrom(), + request.getTo()); + + res.ifPresent(s -> { + var response = RpcUnitConversionResponse.newBuilder() + .setResult(s) + .build(); + responseObserver.onNext(response); + + }); + + responseObserver.onCompleted(); + } + + @Override + public void evalMath(RpcEvalMathRequest request, StreamObserver responseObserver) { + var ret = mathParser.eval(request.getExpression()); + + responseObserver.onNext(RpcEvalMathResponse.newBuilder() + .setResult(Double.toString(ret)) + .build()); + + responseObserver.onCompleted(); + } + +} diff --git a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/dict/DictionaryService.java b/code/functions/math/java/nu/marginalia/functions/math/dict/DictionaryService.java similarity index 89% rename from code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/dict/DictionaryService.java rename to code/functions/math/java/nu/marginalia/functions/math/dict/DictionaryService.java index 40686f74..630e9035 100644 --- a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/dict/DictionaryService.java +++ b/code/functions/math/java/nu/marginalia/functions/math/dict/DictionaryService.java @@ -1,10 +1,10 @@ -package nu.marginalia.assistant.dict; +package nu.marginalia.functions.math.dict; import com.google.inject.Inject; import com.google.inject.Singleton; import com.zaxxer.hikari.HikariDataSource; -import nu.marginalia.assistant.client.model.DictionaryEntry; -import nu.marginalia.assistant.client.model.DictionaryResponse; +import nu.marginalia.api.math.model.DictionaryEntry; +import nu.marginalia.api.math.model.DictionaryResponse; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/dict/SpellChecker.java b/code/functions/math/java/nu/marginalia/functions/math/dict/SpellChecker.java similarity index 91% rename from code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/dict/SpellChecker.java rename to code/functions/math/java/nu/marginalia/functions/math/dict/SpellChecker.java index d1710122..988a533d 100644 --- a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/dict/SpellChecker.java +++ b/code/functions/math/java/nu/marginalia/functions/math/dict/SpellChecker.java @@ -1,4 +1,4 @@ -package nu.marginalia.assistant.dict; +package nu.marginalia.functions.math.dict; import com.google.inject.Singleton; import symspell.SymSpell; diff --git a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/eval/MathParser.java b/code/functions/math/java/nu/marginalia/functions/math/eval/MathParser.java similarity index 99% rename from code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/eval/MathParser.java rename to code/functions/math/java/nu/marginalia/functions/math/eval/MathParser.java index 1ec4bb0a..4e37f83d 100644 --- a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/eval/MathParser.java +++ b/code/functions/math/java/nu/marginalia/functions/math/eval/MathParser.java @@ -1,4 +1,4 @@ -package nu.marginalia.assistant.eval; +package nu.marginalia.functions.math.eval; import lombok.AllArgsConstructor; import lombok.SneakyThrows; diff --git a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/eval/Unit.java b/code/functions/math/java/nu/marginalia/functions/math/eval/Unit.java similarity index 86% rename from code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/eval/Unit.java rename to code/functions/math/java/nu/marginalia/functions/math/eval/Unit.java index cdc352c8..2216623b 100644 --- a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/eval/Unit.java +++ b/code/functions/math/java/nu/marginalia/functions/math/eval/Unit.java @@ -1,4 +1,4 @@ -package nu.marginalia.assistant.eval; +package nu.marginalia.functions.math.eval; public class Unit { diff --git a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/eval/Units.java b/code/functions/math/java/nu/marginalia/functions/math/eval/Units.java similarity index 98% rename from code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/eval/Units.java rename to code/functions/math/java/nu/marginalia/functions/math/eval/Units.java index 4cbb141a..bc1cba7b 100644 --- a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/eval/Units.java +++ b/code/functions/math/java/nu/marginalia/functions/math/eval/Units.java @@ -1,4 +1,4 @@ -package nu.marginalia.assistant.eval; +package nu.marginalia.functions.math.eval; import com.opencsv.CSVReader; import lombok.SneakyThrows; diff --git a/code/services-core/assistant-service/src/main/resources/units.csv b/code/functions/math/resources/units.csv similarity index 100% rename from code/services-core/assistant-service/src/main/resources/units.csv rename to code/functions/math/resources/units.csv diff --git a/code/services-core/assistant-service/src/test/java/nu/marginalia/assistant/eval/MathParserTest.java b/code/functions/math/test/nu/marginalia/functions/math/eval/MathParserTest.java similarity index 93% rename from code/services-core/assistant-service/src/test/java/nu/marginalia/assistant/eval/MathParserTest.java rename to code/functions/math/test/nu/marginalia/functions/math/eval/MathParserTest.java index 4fdfdc71..e9db457e 100644 --- a/code/services-core/assistant-service/src/test/java/nu/marginalia/assistant/eval/MathParserTest.java +++ b/code/functions/math/test/nu/marginalia/functions/math/eval/MathParserTest.java @@ -1,5 +1,6 @@ -package nu.marginalia.assistant.eval; +package nu.marginalia.functions.math.eval; +import nu.marginalia.functions.math.eval.MathParser; import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/code/services-core/assistant-service/src/test/java/nu/marginalia/assistant/eval/UnitsTest.java b/code/functions/math/test/nu/marginalia/functions/math/eval/UnitsTest.java similarity index 90% rename from code/services-core/assistant-service/src/test/java/nu/marginalia/assistant/eval/UnitsTest.java rename to code/functions/math/test/nu/marginalia/functions/math/eval/UnitsTest.java index de2b709b..7f608b5b 100644 --- a/code/services-core/assistant-service/src/test/java/nu/marginalia/assistant/eval/UnitsTest.java +++ b/code/functions/math/test/nu/marginalia/functions/math/eval/UnitsTest.java @@ -1,5 +1,7 @@ -package nu.marginalia.assistant.eval; +package nu.marginalia.functions.math.eval; +import nu.marginalia.functions.math.eval.MathParser; +import nu.marginalia.functions.math.eval.Units; import org.junit.jupiter.api.Test; class UnitsTest { diff --git a/code/api/index-api/build.gradle b/code/functions/search-query/api/build.gradle similarity index 70% rename from code/api/index-api/build.gradle rename to code/functions/search-query/api/build.gradle index 08e629a4..8da48164 100644 --- a/code/api/index-api/build.gradle +++ b/code/functions/search-query/api/build.gradle @@ -10,40 +10,33 @@ java { languageVersion.set(JavaLanguageVersion.of(21)) } } -sourceSets { - main { - proto { - srcDir 'src/main/protobuf' - } - } -} + +jar.archiveBaseName = 'search-query-api' + apply from: "$rootProject.projectDir/protobuf.gradle" +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:model') implementation project(':code:common:config') + implementation project(':code:index:query') implementation project(':code:common:service-discovery') - implementation project(':code:common:service-client') - implementation project(':code:libraries:message-queue') - implementation project(':code:features-index:index-query') implementation libs.bundles.slf4j implementation libs.prometheus implementation libs.notnull implementation libs.guice - implementation libs.rxjava + implementation libs.gson implementation libs.protobuf - implementation libs.fastutil implementation libs.javax.annotation - implementation libs.bundles.gson implementation libs.bundles.grpc + implementation libs.fastutil testImplementation libs.bundles.slf4j.test testImplementation libs.bundles.junit testImplementation libs.mockito + } - - - diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/IndexProtobufCodec.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/IndexProtobufCodec.java similarity index 94% rename from code/api/index-api/src/main/java/nu/marginalia/index/client/IndexProtobufCodec.java rename to code/functions/search-query/api/java/nu/marginalia/api/searchquery/IndexProtobufCodec.java index 633da26e..4b2f0032 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/IndexProtobufCodec.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/IndexProtobufCodec.java @@ -1,9 +1,9 @@ -package nu.marginalia.index.client; +package nu.marginalia.api.searchquery; -import nu.marginalia.index.api.*; -import nu.marginalia.index.client.model.query.SearchSubquery; -import nu.marginalia.index.client.model.results.Bm25Parameters; -import nu.marginalia.index.client.model.results.ResultRankingParameters; +import nu.marginalia.api.searchquery.*; +import nu.marginalia.api.searchquery.model.query.SearchSubquery; +import nu.marginalia.api.searchquery.model.results.Bm25Parameters; +import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; import nu.marginalia.index.query.limit.QueryLimits; import nu.marginalia.index.query.limit.SpecificationLimit; import nu.marginalia.index.query.limit.SpecificationLimitType; @@ -91,7 +91,7 @@ public class IndexProtobufCodec { ResultRankingParameters.TemporalBias.valueOf(params.getTemporalBias().getBias().name()), params.getTemporalBiasWeight() ); - }; + } public static RpcResultRankingParameters convertRankingParameterss(ResultRankingParameters rankingParams, RpcTemporalBias temporalBias) diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryClient.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryClient.java new file mode 100644 index 00000000..76bdae5f --- /dev/null +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryClient.java @@ -0,0 +1,47 @@ +package nu.marginalia.api.searchquery; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import io.prometheus.client.Summary; +import nu.marginalia.api.searchquery.model.query.QueryParams; +import nu.marginalia.api.searchquery.model.query.QueryResponse; +import nu.marginalia.service.client.GrpcChannelPoolFactory; +import nu.marginalia.service.client.GrpcSingleNodeChannelPool; +import nu.marginalia.service.discovery.property.ServiceKey; +import nu.marginalia.service.discovery.property.ServicePartition; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.CheckReturnValue; + +@Singleton +public class QueryClient { + + private static final Summary wmsa_qs_api_search_time = Summary.build() + .name("wmsa_qs_api_search_time") + .help("query service search time") + .register(); + + private final GrpcSingleNodeChannelPool queryApiPool; + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + @Inject + public QueryClient(GrpcChannelPoolFactory channelPoolFactory) { + this.queryApiPool = channelPoolFactory.createSingle( + ServiceKey.forGrpcApi(QueryApiGrpc.class, ServicePartition.any()), + QueryApiGrpc::newBlockingStub); + } + + @CheckReturnValue + public QueryResponse search(QueryParams params) { + var query = QueryProtobufCodec.convertQueryParams(params); + + return wmsa_qs_api_search_time.time(() -> + QueryProtobufCodec.convertQueryResponse( + queryApiPool.call(QueryApiGrpc.QueryApiBlockingStub::query).run(query) + ) + ); + } + +} diff --git a/code/api/query-api/src/main/java/nu/marginalia/query/QueryProtobufCodec.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryProtobufCodec.java similarity index 59% rename from code/api/query-api/src/main/java/nu/marginalia/query/QueryProtobufCodec.java rename to code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryProtobufCodec.java index d24a727e..28d14c82 100644 --- a/code/api/query-api/src/main/java/nu/marginalia/query/QueryProtobufCodec.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryProtobufCodec.java @@ -1,25 +1,21 @@ -package nu.marginalia.query; +package nu.marginalia.api.searchquery; import lombok.SneakyThrows; -import nu.marginalia.index.api.*; -import nu.marginalia.index.client.IndexProtobufCodec; -import nu.marginalia.index.client.model.query.SearchSpecification; -import nu.marginalia.index.client.model.query.SearchSubquery; -import nu.marginalia.index.client.model.results.DecoratedSearchResultItem; -import nu.marginalia.index.client.model.results.ResultRankingParameters; -import nu.marginalia.index.client.model.results.SearchResultItem; -import nu.marginalia.index.client.model.results.SearchResultKeywordScore; +import nu.marginalia.api.searchquery.model.query.SearchSpecification; +import nu.marginalia.api.searchquery.model.query.SearchSubquery; +import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem; +import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; +import nu.marginalia.api.searchquery.model.results.SearchResultItem; +import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore; import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.model.EdgeUrl; -import nu.marginalia.query.model.ProcessedQuery; -import nu.marginalia.query.model.QueryParams; -import nu.marginalia.query.model.QueryResponse; +import nu.marginalia.api.searchquery.model.query.ProcessedQuery; +import nu.marginalia.api.searchquery.model.query.QueryParams; +import nu.marginalia.api.searchquery.model.query.QueryResponse; import java.util.ArrayList; import java.util.List; -import static nu.marginalia.index.client.IndexProtobufCodec.*; - public class QueryProtobufCodec { public static RpcIndexQuery convertQuery(RpcQsQuery request, ProcessedQuery query) { @@ -34,11 +30,10 @@ public class QueryProtobufCodec { builder.setSearchSetIdentifier(query.specs.searchSetIdentifier); builder.setHumanQuery(request.getHumanQuery()); - builder.setQuality(convertSpecLimit(query.specs.quality)); - builder.setYear(convertSpecLimit(query.specs.year)); - builder.setSize(convertSpecLimit(query.specs.size)); - builder.setRank(convertSpecLimit(query.specs.rank)); - builder.setDomainCount(convertSpecLimit(query.specs.domainCount)); + builder.setQuality(IndexProtobufCodec.convertSpecLimit(query.specs.quality)); + builder.setYear(IndexProtobufCodec.convertSpecLimit(query.specs.year)); + builder.setSize(IndexProtobufCodec.convertSpecLimit(query.specs.size)); + builder.setRank(IndexProtobufCodec.convertSpecLimit(query.specs.rank)); builder.setQueryLimits(IndexProtobufCodec.convertQueryLimits(query.specs.queryLimits)); @@ -53,6 +48,36 @@ public class QueryProtobufCodec { return builder.build(); } + public static RpcIndexQuery convertQuery(String humanQuery, ProcessedQuery query) { + var builder = RpcIndexQuery.newBuilder(); + + for (var subquery : query.specs.subqueries) { + builder.addSubqueries(IndexProtobufCodec.convertSearchSubquery(subquery)); + } + + builder.setSearchSetIdentifier(query.specs.searchSetIdentifier); + builder.setHumanQuery(humanQuery); + + builder.setQuality(IndexProtobufCodec.convertSpecLimit(query.specs.quality)); + builder.setYear(IndexProtobufCodec.convertSpecLimit(query.specs.year)); + builder.setSize(IndexProtobufCodec.convertSpecLimit(query.specs.size)); + builder.setRank(IndexProtobufCodec.convertSpecLimit(query.specs.rank)); + + builder.setQueryLimits(IndexProtobufCodec.convertQueryLimits(query.specs.queryLimits)); + + // Query strategy may be overridden by the query, but if not, use the one from the request + builder.setQueryStrategy(query.specs.queryStrategy.name()); + + builder.setParameters(IndexProtobufCodec.convertRankingParameterss( + query.specs.rankingParams, + RpcTemporalBias.newBuilder().setBias( + RpcTemporalBias.Bias.NONE) + .build()) + ); + + return builder.build(); + } + public static QueryParams convertRequest(RpcQsQuery request) { return new QueryParams( request.getHumanQuery(), @@ -61,11 +86,10 @@ public class QueryProtobufCodec { request.getTacitExcludesList(), request.getTacitPriorityList(), request.getTacitAdviceList(), - convertSpecLimit(request.getQuality()), - convertSpecLimit(request.getYear()), - convertSpecLimit(request.getSize()), - convertSpecLimit(request.getRank()), - convertSpecLimit(request.getDomainCount()), + IndexProtobufCodec.convertSpecLimit(request.getQuality()), + IndexProtobufCodec.convertSpecLimit(request.getYear()), + IndexProtobufCodec.convertSpecLimit(request.getSize()), + IndexProtobufCodec.convertSpecLimit(request.getRank()), request.getDomainIdsList(), IndexProtobufCodec.convertQueryLimits(request.getQueryLimits()), request.getSearchSetIdentifier(), @@ -117,7 +141,7 @@ public class QueryProtobufCodec { rawItem.getCombinedId(), keywordScores, rawItem.getResultsFromDomain(), - null + Double.NaN // Not set ); } @@ -127,8 +151,7 @@ public class QueryProtobufCodec { keywordScores.getKeyword(), keywordScores.getEncodedWordMetadata(), keywordScores.getEncodedDocMetadata(), - keywordScores.getHtmlFeatures(), - keywordScores.getHasPriorityTerms() + keywordScores.getHtmlFeatures() ); } @@ -136,7 +159,7 @@ public class QueryProtobufCodec { List subqueries = new ArrayList<>(specs.getSubqueriesCount()); for (int i = 0; i < specs.getSubqueriesCount(); i++) { - subqueries.add(convertSearchSubquery(specs.getSubqueries(i))); + subqueries.add(IndexProtobufCodec.convertSearchSubquery(specs.getSubqueries(i))); } return new SearchSpecification( @@ -148,10 +171,9 @@ public class QueryProtobufCodec { IndexProtobufCodec.convertSpecLimit(specs.getYear()), IndexProtobufCodec.convertSpecLimit(specs.getSize()), IndexProtobufCodec.convertSpecLimit(specs.getRank()), - IndexProtobufCodec.convertSpecLimit(specs.getDomainCount()), IndexProtobufCodec.convertQueryLimits(specs.getQueryLimits()), QueryStrategy.valueOf(specs.getQueryStrategy()), - convertRankingParameterss(specs.getParameters()) + IndexProtobufCodec.convertRankingParameterss(specs.getParameters()) ); } @@ -163,11 +185,11 @@ public class QueryProtobufCodec { .addAllTacitIncludes(params.tacitIncludes()) .addAllTacitPriority(params.tacitPriority()) .setHumanQuery(params.humanQuery()) - .setQueryLimits(convertQueryLimits(params.limits())) - .setQuality(convertSpecLimit(params.quality())) - .setYear(convertSpecLimit(params.year())) - .setSize(convertSpecLimit(params.size())) - .setRank(convertSpecLimit(params.rank())) + .setQueryLimits(IndexProtobufCodec.convertQueryLimits(params.limits())) + .setQuality(IndexProtobufCodec.convertSpecLimit(params.quality())) + .setYear(IndexProtobufCodec.convertSpecLimit(params.year())) + .setSize(IndexProtobufCodec.convertSpecLimit(params.size())) + .setRank(IndexProtobufCodec.convertSpecLimit(params.rank())) .setSearchSetIdentifier(params.identifier()) .setQueryStrategy(params.queryStrategy().name()) .setTemporalBias(RpcTemporalBias.newBuilder() @@ -179,4 +201,22 @@ public class QueryProtobufCodec { return builder.build(); } + + @SneakyThrows + public static DecoratedSearchResultItem convertQueryResult(RpcDecoratedResultItem rpcDecoratedResultItem) { + return new DecoratedSearchResultItem( + convertRawResult(rpcDecoratedResultItem.getRawItem()), + new EdgeUrl(rpcDecoratedResultItem.getUrl()), + rpcDecoratedResultItem.getTitle(), + rpcDecoratedResultItem.getDescription(), + rpcDecoratedResultItem.getUrlQuality(), + rpcDecoratedResultItem.getFormat(), + rpcDecoratedResultItem.getFeatures(), + rpcDecoratedResultItem.getPubYear(), + rpcDecoratedResultItem.getDataHash(), + rpcDecoratedResultItem.getWordsTotal(), + rpcDecoratedResultItem.getRankingScore() + ); + } + } diff --git a/code/api/query-api/src/main/java/nu/marginalia/query/model/ProcessedQuery.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/ProcessedQuery.java similarity index 82% rename from code/api/query-api/src/main/java/nu/marginalia/query/model/ProcessedQuery.java rename to code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/ProcessedQuery.java index b5f7e323..09ebdcbd 100644 --- a/code/api/query-api/src/main/java/nu/marginalia/query/model/ProcessedQuery.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/ProcessedQuery.java @@ -1,6 +1,4 @@ -package nu.marginalia.query.model; - -import nu.marginalia.index.client.model.query.SearchSpecification; +package nu.marginalia.api.searchquery.model.query; import java.util.*; diff --git a/code/api/query-api/src/main/java/nu/marginalia/query/model/QueryParams.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/QueryParams.java similarity index 87% rename from code/api/query-api/src/main/java/nu/marginalia/query/model/QueryParams.java rename to code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/QueryParams.java index d588ac2f..176b977e 100644 --- a/code/api/query-api/src/main/java/nu/marginalia/query/model/QueryParams.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/QueryParams.java @@ -1,10 +1,9 @@ -package nu.marginalia.query.model; +package nu.marginalia.api.searchquery.model.query; -import nu.marginalia.index.client.model.results.ResultRankingParameters; +import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; import nu.marginalia.index.query.limit.QueryLimits; import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.query.limit.SpecificationLimit; - import javax.annotation.Nullable; import java.util.List; @@ -20,7 +19,6 @@ public record QueryParams( SpecificationLimit year, SpecificationLimit size, SpecificationLimit rank, - SpecificationLimit domainCount, List domainIds, QueryLimits limits, String identifier, @@ -38,7 +36,6 @@ public record QueryParams( SpecificationLimit.none(), SpecificationLimit.none(), SpecificationLimit.none(), - SpecificationLimit.none(), List.of(), limits, identifier, diff --git a/code/api/query-api/src/main/java/nu/marginalia/query/model/QueryResponse.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/QueryResponse.java similarity index 76% rename from code/api/query-api/src/main/java/nu/marginalia/query/model/QueryResponse.java rename to code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/QueryResponse.java index 2adadd83..80e5b61a 100644 --- a/code/api/query-api/src/main/java/nu/marginalia/query/model/QueryResponse.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/QueryResponse.java @@ -1,7 +1,6 @@ -package nu.marginalia.query.model; +package nu.marginalia.api.searchquery.model.query; -import nu.marginalia.index.client.model.query.SearchSpecification; -import nu.marginalia.index.client.model.results.DecoratedSearchResultItem; +import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem; import java.util.HashSet; import java.util.List; diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSetIdentifier.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchSetIdentifier.java similarity index 81% rename from code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSetIdentifier.java rename to code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchSetIdentifier.java index 4eb0eae0..43cc13ac 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSetIdentifier.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchSetIdentifier.java @@ -1,4 +1,4 @@ -package nu.marginalia.index.client.model.query; +package nu.marginalia.api.searchquery.model.query; /** Identifies a RankingSearchSet, associated with an EdgeSearchProfile * diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSpecification.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchSpecification.java similarity index 83% rename from code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSpecification.java rename to code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchSpecification.java index 2ea743cf..be2a6895 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSpecification.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchSpecification.java @@ -1,7 +1,7 @@ -package nu.marginalia.index.client.model.query; +package nu.marginalia.api.searchquery.model.query; import lombok.*; -import nu.marginalia.index.client.model.results.ResultRankingParameters; +import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; import nu.marginalia.index.query.limit.QueryLimits; import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.query.limit.SpecificationLimit; @@ -24,8 +24,6 @@ public class SearchSpecification { public final SpecificationLimit size; public final SpecificationLimit rank; - public final SpecificationLimit domainCount; - public final QueryLimits queryLimits; public final QueryStrategy queryStrategy; diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSubquery.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchSubquery.java similarity index 98% rename from code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSubquery.java rename to code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchSubquery.java index f401008e..3798ae89 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSubquery.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/query/SearchSubquery.java @@ -1,4 +1,4 @@ -package nu.marginalia.index.client.model.query; +package nu.marginalia.api.searchquery.model.query; import lombok.AllArgsConstructor; import lombok.EqualsAndHashCode; diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/Bm25Parameters.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/Bm25Parameters.java similarity index 67% rename from code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/Bm25Parameters.java rename to code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/Bm25Parameters.java index 9c8b9209..8a21f177 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/Bm25Parameters.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/Bm25Parameters.java @@ -1,11 +1,11 @@ -package nu.marginalia.index.client.model.results; +package nu.marginalia.api.searchquery.model.results; /** Tuning parameters for BM25. * * @param k determines the size of the impact of a single term * @param b determines the magnitude of the length normalization * - * @see nu.marginalia.ranking.factors.Bm25Factor + * @see nu.marginalia.ranking.results.factors.Bm25Factor */ public record Bm25Parameters(double k, double b) { } diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/DecoratedSearchResultItem.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/DecoratedSearchResultItem.java similarity index 85% rename from code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/DecoratedSearchResultItem.java rename to code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/DecoratedSearchResultItem.java index 92504672..b099dc01 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/DecoratedSearchResultItem.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/DecoratedSearchResultItem.java @@ -1,4 +1,4 @@ -package nu.marginalia.index.client.model.results; +package nu.marginalia.api.searchquery.model.results; import lombok.Getter; import lombok.ToString; @@ -10,7 +10,7 @@ import java.util.List; @Getter @ToString -public class DecoratedSearchResultItem { +public class DecoratedSearchResultItem implements Comparable { public final SearchResultItem rawIndexResult; @NotNull @@ -79,4 +79,14 @@ public class DecoratedSearchResultItem { this.wordsTotal = wordsTotal; this.rankingScore = rankingScore; } + + @Override + public int compareTo(@NotNull DecoratedSearchResultItem o) { + int diff = Double.compare(rankingScore, o.rankingScore); + + if (diff == 0) + diff = Long.compare(documentId(), o.documentId()); + + return diff; + } } diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/ResultRankingContext.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/ResultRankingContext.java similarity index 95% rename from code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/ResultRankingContext.java rename to code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/ResultRankingContext.java index 5837a543..f0ad172f 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/ResultRankingContext.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/ResultRankingContext.java @@ -1,4 +1,4 @@ -package nu.marginalia.index.client.model.results; +package nu.marginalia.api.searchquery.model.results; import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; import lombok.ToString; diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/ResultRankingParameters.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/ResultRankingParameters.java similarity index 97% rename from code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/ResultRankingParameters.java rename to code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/ResultRankingParameters.java index b7bb9075..a16ccf8b 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/ResultRankingParameters.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/ResultRankingParameters.java @@ -1,4 +1,4 @@ -package nu.marginalia.index.client.model.results; +package nu.marginalia.api.searchquery.model.results; import lombok.AllArgsConstructor; import lombok.Builder; @@ -58,5 +58,5 @@ public class ResultRankingParameters { public enum TemporalBias { RECENT, OLD, NONE - }; + } } diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultItem.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultItem.java similarity index 87% rename from code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultItem.java rename to code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultItem.java index 7393e3b4..cc02ae28 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultItem.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultItem.java @@ -1,4 +1,4 @@ -package nu.marginalia.index.client.model.results; +package nu.marginalia.api.searchquery.model.results; import lombok.AllArgsConstructor; import lombok.Getter; @@ -36,11 +36,11 @@ public class SearchResultItem implements Comparable { } /* Used for evaluation */ - private transient SearchResultPreliminaryScore scoreValue = null; - public void setScore(SearchResultPreliminaryScore score) { + private transient double scoreValue = Double.MAX_VALUE; + public void setScore(double score) { scoreValue = score; } - public SearchResultPreliminaryScore getScore() { + public double getScore() { return scoreValue; } @@ -70,7 +70,7 @@ public class SearchResultItem implements Comparable { @Override public int compareTo(@NotNull SearchResultItem o) { // this looks like a bug, but we actually want this in a reversed order - int diff = o.getScore().compareTo(getScore()); + int diff = Double.compare(getScore(), o.getScore()); if (diff != 0) return diff; diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultKeywordScore.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultKeywordScore.java similarity index 82% rename from code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultKeywordScore.java rename to code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultKeywordScore.java index 77266112..b84dad0b 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultKeywordScore.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultKeywordScore.java @@ -1,4 +1,4 @@ -package nu.marginalia.index.client.model.results; +package nu.marginalia.api.searchquery.model.results; import nu.marginalia.model.idx.WordFlags; import nu.marginalia.model.idx.WordMetadata; @@ -11,7 +11,6 @@ public final class SearchResultKeywordScore { public final String keyword; private final long encodedWordMetadata; private final long encodedDocMetadata; - private final boolean hasPriorityTerms; private final int htmlFeatures; @@ -19,14 +18,12 @@ public final class SearchResultKeywordScore { String keyword, long encodedWordMetadata, long encodedDocMetadata, - int htmlFeatures, - boolean hasPriorityTerms) { + int htmlFeatures) { this.subquery = subquery; this.keyword = keyword; this.encodedWordMetadata = encodedWordMetadata; this.encodedDocMetadata = encodedDocMetadata; this.htmlFeatures = htmlFeatures; - this.hasPriorityTerms = hasPriorityTerms; } public boolean hasTermFlag(WordFlags flag) { @@ -65,10 +62,6 @@ public final class SearchResultKeywordScore { return htmlFeatures; } - public boolean hasPriorityTerms() { - return hasPriorityTerms; - } - @Override public boolean equals(Object obj) { if (obj == this) return true; @@ -77,13 +70,12 @@ public final class SearchResultKeywordScore { return this.subquery == that.subquery && Objects.equals(this.keyword, that.keyword) && this.encodedWordMetadata == that.encodedWordMetadata && - this.encodedDocMetadata == that.encodedDocMetadata && - this.hasPriorityTerms == that.hasPriorityTerms; + this.encodedDocMetadata == that.encodedDocMetadata; } @Override public int hashCode() { - return Objects.hash(subquery, keyword, encodedWordMetadata, encodedDocMetadata, hasPriorityTerms); + return Objects.hash(subquery, keyword, encodedWordMetadata, encodedDocMetadata); } @Override @@ -92,8 +84,7 @@ public final class SearchResultKeywordScore { "set=" + subquery + ", " + "keyword=" + keyword + ", " + "encodedWordMetadata=" + new WordMetadata(encodedWordMetadata) + ", " + - "encodedDocMetadata=" + new DocumentMetadata(encodedDocMetadata) + ", " + - "hasPriorityTerms=" + hasPriorityTerms + ']'; + "encodedDocMetadata=" + new DocumentMetadata(encodedDocMetadata) + ']'; } } diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultPreliminaryScore.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultPreliminaryScore.java similarity index 73% rename from code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultPreliminaryScore.java rename to code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultPreliminaryScore.java index a46e14b1..81e16474 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultPreliminaryScore.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultPreliminaryScore.java @@ -1,4 +1,4 @@ -package nu.marginalia.index.client.model.results; +package nu.marginalia.api.searchquery.model.results; import org.jetbrains.annotations.NotNull; @@ -6,7 +6,6 @@ import static java.lang.Boolean.compare; import static java.lang.Double.compare; public record SearchResultPreliminaryScore( - boolean hasPriorityTerm, double searchRankingScore) implements Comparable { @@ -18,9 +17,6 @@ public record SearchResultPreliminaryScore( public int compareTo(@NotNull SearchResultPreliminaryScore other) { int diff; - diff = PREFER_HIGH * compare(hasPriorityTerm, other.hasPriorityTerm); - if (diff != 0) return diff; - return PREFER_LOW * compare(searchRankingScore, other.searchRankingScore); } diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultSet.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultSet.java similarity index 94% rename from code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultSet.java rename to code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultSet.java index f4159f3a..c7e76fd9 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultSet.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/SearchResultSet.java @@ -1,4 +1,4 @@ -package nu.marginalia.index.client.model.results; +package nu.marginalia.api.searchquery.model.results; import lombok.AllArgsConstructor; import lombok.Getter; diff --git a/code/api/index-api/src/main/protobuf/index-api.proto b/code/functions/search-query/api/src/main/protobuf/query-api.proto similarity index 82% rename from code/api/index-api/src/main/protobuf/index-api.proto rename to code/functions/search-query/api/src/main/protobuf/query-api.proto index ad05152e..f5ec5e8d 100644 --- a/code/api/index-api/src/main/protobuf/index-api.proto +++ b/code/functions/search-query/api/src/main/protobuf/query-api.proto @@ -1,31 +1,9 @@ syntax="proto3"; -package actorapi; +package nu.marginalia.api.searchquery; -option java_package="nu.marginalia.index.api"; +option java_package="nu.marginalia.api.searchquery"; option java_multiple_files=true; -service IndexDomainLinksApi { - rpc getAllLinks(Empty) returns (stream RpcDomainIdPairs) {} - rpc getLinksFromDomain(RpcDomainId) returns (RpcDomainIdList) {} - rpc getLinksToDomain(RpcDomainId) returns (RpcDomainIdList) {} - rpc countLinksFromDomain(RpcDomainId) returns (RpcDomainIdCount) {} - rpc countLinksToDomain(RpcDomainId) returns (RpcDomainIdCount) {} -} - -message RpcDomainId { - int32 domainId = 1; -} -message RpcDomainIdList { - repeated int32 domainId = 1 [packed=true]; -} -message RpcDomainIdCount { - int32 idCount = 1; -} -message RpcDomainIdPairs { - repeated int32 sourceIds = 1 [packed=true]; - repeated int32 destIds = 2 [packed=true]; -} - service QueryApi { rpc query(RpcQsQuery) returns (RpcQsResponse) {} } @@ -47,7 +25,6 @@ message RpcQsQuery { RpcSpecLimit year = 8; RpcSpecLimit size = 9; RpcSpecLimit rank = 10; - RpcSpecLimit domainCount = 11; repeated int32 domainIds = 12; RpcQueryLimits queryLimits = 13; string searchSetIdentifier = 14; @@ -57,11 +34,11 @@ message RpcQsQuery { /* Query service query response */ message RpcQsResponse { - RpcIndexQuery specs = 1; - repeated RpcDecoratedResultItem results = 2; - repeated string searchTermsHuman = 3; - repeated string problems = 4; - string domain = 5; + RpcIndexQuery specs = 1; + repeated RpcDecoratedResultItem results = 2; + repeated string searchTermsHuman = 3; + repeated string problems = 4; + string domain = 5; } message RpcTemporalBias { @@ -83,7 +60,6 @@ message RpcIndexQuery { RpcSpecLimit year = 6; RpcSpecLimit size = 7; RpcSpecLimit rank = 8; - RpcSpecLimit domainCount = 9; RpcQueryLimits queryLimits = 10; string queryStrategy = 11; // Named query configuration RpcResultRankingParameters parameters = 12; diff --git a/code/api/index-api/src/test/java/nu/marginalia/index/client/IndexProtobufCodecTest.java b/code/functions/search-query/api/test/nu/marginalia/index/client/IndexProtobufCodecTest.java similarity index 91% rename from code/api/index-api/src/test/java/nu/marginalia/index/client/IndexProtobufCodecTest.java rename to code/functions/search-query/api/test/nu/marginalia/index/client/IndexProtobufCodecTest.java index 2ea6d9ea..1782765d 100644 --- a/code/api/index-api/src/test/java/nu/marginalia/index/client/IndexProtobufCodecTest.java +++ b/code/functions/search-query/api/test/nu/marginalia/index/client/IndexProtobufCodecTest.java @@ -1,8 +1,8 @@ package nu.marginalia.index.client; -import nu.marginalia.index.api.RpcTemporalBias; -import nu.marginalia.index.client.model.query.SearchSubquery; -import nu.marginalia.index.client.model.results.ResultRankingParameters; +import nu.marginalia.api.searchquery.IndexProtobufCodec; +import nu.marginalia.api.searchquery.model.query.SearchSubquery; +import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; import nu.marginalia.index.query.limit.QueryLimits; import nu.marginalia.index.query.limit.SpecificationLimit; import org.junit.jupiter.api.Test; diff --git a/code/features-qs/query-parser/build.gradle b/code/functions/search-query/build.gradle similarity index 52% rename from code/features-qs/query-parser/build.gradle rename to code/functions/search-query/build.gradle index 8fd14810..14b662f9 100644 --- a/code/features-qs/query-parser/build.gradle +++ b/code/functions/search-query/build.gradle @@ -1,34 +1,48 @@ plugins { id 'java' - - id 'jvm-test-suite' } - java { toolchain { languageVersion.set(JavaLanguageVersion.of(21)) } } + +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { + implementation project(':code:index:api') + + implementation project(':code:common:db') + implementation project(':code:common:config') + implementation project(':code:common:model') + implementation project(':code:common:service-discovery') + + implementation project(':code:functions:search-query:api') + + implementation project(':code:index:query') + + implementation project(':code:libraries:language-processing') + implementation project(':code:libraries:term-frequency-dict') + + implementation project(':third-party:porterstemmer') implementation project(':code:libraries:language-processing') implementation project(':code:libraries:term-frequency-dict') implementation project(':code:features-convert:keyword-extraction') - implementation project(':code:common:config') - implementation project(':code:common:model') - - implementation project(':third-party:porterstemmer') implementation libs.bundles.slf4j - implementation libs.bundles.nlp - implementation libs.bundles.handlebars - implementation libs.trove + implementation libs.prometheus + implementation libs.bundles.grpc + implementation libs.notnull implementation libs.guice + implementation libs.trove + implementation libs.fastutil + implementation libs.bundles.gson + implementation libs.bundles.mariadb testImplementation libs.bundles.slf4j.test testImplementation libs.bundles.junit testImplementation libs.mockito } - diff --git a/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryGRPCService.java b/code/functions/search-query/java/nu/marginalia/functions/searchquery/QueryGRPCService.java similarity index 54% rename from code/services-core/query-service/src/main/java/nu/marginalia/query/QueryGRPCService.java rename to code/functions/search-query/java/nu/marginalia/functions/searchquery/QueryGRPCService.java index bd2c0452..d2cdd27d 100644 --- a/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryGRPCService.java +++ b/code/functions/search-query/java/nu/marginalia/functions/searchquery/QueryGRPCService.java @@ -1,21 +1,23 @@ -package nu.marginalia.query; +package nu.marginalia.functions.searchquery; import com.google.inject.Inject; -import io.grpc.ManagedChannel; +import com.google.inject.Singleton; +import io.grpc.stub.StreamObserver; import io.prometheus.client.Histogram; import lombok.SneakyThrows; -import nu.marginalia.client.grpc.GrpcChannelPool; +import nu.marginalia.api.searchquery.*; +import nu.marginalia.api.searchquery.model.query.QueryParams; import nu.marginalia.db.DomainBlacklist; -import nu.marginalia.index.api.*; +import nu.marginalia.index.api.IndexClient; +import nu.marginalia.functions.searchquery.svc.QueryFactory; +import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem; import nu.marginalia.model.id.UrlIdCodec; -import nu.marginalia.query.svc.QueryFactory; -import nu.marginalia.service.id.ServiceId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.*; -import java.util.concurrent.*; +@Singleton public class QueryGRPCService extends QueryApiGrpc.QueryApiImplBase { private final Logger logger = LoggerFactory.getLogger(QueryGRPCService.class); @@ -27,34 +29,21 @@ public class QueryGRPCService extends QueryApiGrpc.QueryApiImplBase { .help("QS-side query time (GRPC endpoint)") .register(); - private final GrpcChannelPool channelPool; private final QueryFactory queryFactory; private final DomainBlacklist blacklist; - + private final IndexClient indexClient; @Inject public QueryGRPCService(QueryFactory queryFactory, DomainBlacklist blacklist, - NodeConfigurationWatcher nodeConfigurationWatcher) + IndexClient indexClient) { this.queryFactory = queryFactory; this.blacklist = blacklist; - - channelPool = new GrpcChannelPool<>(ServiceId.Index) { - @Override - public IndexApiGrpc.IndexApiBlockingStub createStub(ManagedChannel channel) { - return IndexApiGrpc.newBlockingStub(channel); - } - - @Override - public List getEligibleNodes() { - return nodeConfigurationWatcher.getQueryNodes(); - } - }; + this.indexClient = indexClient; } - public void query(nu.marginalia.index.api.RpcQsQuery request, - io.grpc.stub.StreamObserver responseObserver) + public void query(RpcQsQuery request, StreamObserver responseObserver) { try { wmsa_qs_query_time_grpc @@ -87,44 +76,28 @@ public class QueryGRPCService extends QueryApiGrpc.QueryApiImplBase { private static final Comparator comparator = Comparator.comparing(RpcDecoratedResultItem::getRankingScore); - @SneakyThrows - private List executeQueries(RpcIndexQuery indexRequest, int totalSize) { - return channelPool.invokeAll(stub -> new QueryTask(stub, indexRequest)) - .stream() - .filter(f -> f.state() == Future.State.SUCCESS) - .map(Future::resultNow) - .flatMap(List::stream) - .sorted(comparator) - .limit(totalSize) - .toList(); - } - - private class QueryTask implements Callable> { - private final IndexApiGrpc.IndexApiBlockingStub stub; - private final RpcIndexQuery indexRequest; - - public QueryTask(IndexApiGrpc.IndexApiBlockingStub stub, RpcIndexQuery indexRequest) { - this.stub = stub; - this.indexRequest = indexRequest; - } - - @Override - public List call() { - var rsp = stub.query(indexRequest); - List ret = new ArrayList<>(); - - while (rsp.hasNext()) { - RpcDecoratedResultItem next = rsp.next(); - if (isBlacklisted(next)) - continue; - ret.add(next); - } - - return ret; - } - } private boolean isBlacklisted(RpcDecoratedResultItem item) { return blacklist.isBlacklisted(UrlIdCodec.getDomainId(item.getRawItem().getCombinedId())); } + + public List executeDirect(String originalQuery, QueryParams params, int count) { + var query = queryFactory.createQuery(params); + + return executeQueries( + QueryProtobufCodec.convertQuery(originalQuery, query), + count) + .stream().map(QueryProtobufCodec::convertQueryResult) + .toList(); + } + + @SneakyThrows + List executeQueries(RpcIndexQuery indexRequest, int totalSize) { + var results = indexClient.executeQueries(indexRequest); + + results.sort(comparator); + results.removeIf(this::isBlacklisted); + return results.subList(0, Math.min(totalSize, results.size())); + } + } diff --git a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/QueryParser.java b/code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/QueryParser.java similarity index 91% rename from code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/QueryParser.java rename to code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/QueryParser.java index 2abda918..bbaf5c87 100644 --- a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/QueryParser.java +++ b/code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/QueryParser.java @@ -1,9 +1,9 @@ -package nu.marginalia.query_parser; +package nu.marginalia.functions.searchquery.query_parser; import nu.marginalia.language.WordPatterns; -import nu.marginalia.query_parser.token.Token; -import nu.marginalia.query_parser.token.TokenType; -import nu.marginalia.transform_list.TransformList; +import nu.marginalia.functions.searchquery.query_parser.token.Token; +import nu.marginalia.functions.searchquery.query_parser.token.TokenType; +import nu.marginalia.util.transform_list.TransformList; import java.util.List; @@ -82,8 +82,6 @@ public class QueryParser { entity.replace(new Token(TokenType.SIZE_TERM, t.str.substring(4), t.displayStr)); } else if (t.str.startsWith("rank") && t.str.matches("rank[=><]\\d+")) { entity.replace(new Token(TokenType.RANK_TERM, t.str.substring(4), t.displayStr)); - } else if (t.str.startsWith("count") && t.str.matches("count[=><]\\d+")) { - entity.replace(new Token(TokenType.DOMAIN_COUNT_TERM, t.str.substring(5), t.displayStr)); } else if (t.str.startsWith("qs=")) { entity.replace(new Token(TokenType.QS_TERM, t.str.substring(3), t.displayStr)); } else if (t.str.contains(":")) { diff --git a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/QueryPermutation.java b/code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/QueryPermutation.java similarity index 97% rename from code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/QueryPermutation.java rename to code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/QueryPermutation.java index 62774449..417ceda3 100644 --- a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/QueryPermutation.java +++ b/code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/QueryPermutation.java @@ -1,8 +1,8 @@ -package nu.marginalia.query_parser; +package nu.marginalia.functions.searchquery.query_parser; +import nu.marginalia.functions.searchquery.query_parser.token.Token; +import nu.marginalia.functions.searchquery.query_parser.token.TokenType; import nu.marginalia.language.WordPatterns; -import nu.marginalia.query_parser.token.Token; -import nu.marginalia.query_parser.token.TokenType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/QueryTokenizer.java b/code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/QueryTokenizer.java similarity index 90% rename from code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/QueryTokenizer.java rename to code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/QueryTokenizer.java index 992e3ee5..b7b0a2b7 100644 --- a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/QueryTokenizer.java +++ b/code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/QueryTokenizer.java @@ -1,8 +1,8 @@ -package nu.marginalia.query_parser; +package nu.marginalia.functions.searchquery.query_parser; +import nu.marginalia.functions.searchquery.query_parser.token.Token; +import nu.marginalia.functions.searchquery.query_parser.token.TokenType; import nu.marginalia.language.encoding.AsciiFlattener; -import nu.marginalia.query_parser.token.Token; -import nu.marginalia.query_parser.token.TokenType; import java.util.ArrayList; import java.util.List; diff --git a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/QueryVariants.java b/code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/QueryVariants.java similarity index 96% rename from code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/QueryVariants.java rename to code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/QueryVariants.java index eb4abd79..9732e53f 100644 --- a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/QueryVariants.java +++ b/code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/QueryVariants.java @@ -1,20 +1,20 @@ -package nu.marginalia.query_parser; +package nu.marginalia.functions.searchquery.query_parser; +import ca.rmen.porterstemmer.PorterStemmer; import lombok.AllArgsConstructor; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.ToString; +import nu.marginalia.functions.searchquery.query_parser.token.Token; +import nu.marginalia.functions.searchquery.query_parser.token.TokenType; +import nu.marginalia.util.language.EnglishDictionary; import nu.marginalia.LanguageModels; import nu.marginalia.keyword.KeywordExtractor; -import nu.marginalia.language.EnglishDictionary; import nu.marginalia.language.sentence.SentenceExtractor; -import nu.marginalia.ngrams.NGramBloomFilter; +import nu.marginalia.util.ngrams.NGramBloomFilter; import nu.marginalia.term_frequency_dict.TermFrequencyDict; import nu.marginalia.language.model.DocumentSentence; import nu.marginalia.language.model.WordSpan; -import nu.marginalia.query_parser.token.Token; -import nu.marginalia.query_parser.token.TokenType; -import opennlp.tools.stemmer.PorterStemmer; import java.util.*; import java.util.regex.Pattern; @@ -251,7 +251,7 @@ public class QueryVariants { for (var span : ls) { var matcher = dashBoundary.matcher(span.word); - if (matcher.find() && nGramBloomFilter.isKnownNGram(ps.stem(dashBoundary.matcher(span.word).replaceAll("")))) { + if (matcher.find() && nGramBloomFilter.isKnownNGram(ps.stemWord(dashBoundary.matcher(span.word).replaceAll("")))) { dash = true; String combined = dashBoundary.matcher(span.word).replaceAll(""); asTokens2.add(combined); @@ -332,7 +332,7 @@ public class QueryVariants { var a = span.get(i); var b = span.get(i+1); - var stemmed = ps.stem(a.word + b.word); + var stemmed = ps.stemWord(a.word + b.word); double scoreCombo = dict.getTermFreqStemmed(stemmed); if (scoreCombo > 10000) { diff --git a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/token/Token.java b/code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/token/Token.java similarity index 92% rename from code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/token/Token.java rename to code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/token/Token.java index 31aa4e22..06c28972 100644 --- a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/token/Token.java +++ b/code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/token/Token.java @@ -1,4 +1,4 @@ -package nu.marginalia.query_parser.token; +package nu.marginalia.functions.searchquery.query_parser.token; import lombok.EqualsAndHashCode; import lombok.ToString; @@ -40,7 +40,6 @@ public class Token { case YEAR_TERM: visitor.onYearTerm(this); break; case RANK_TERM: visitor.onRankTerm(this); break; - case DOMAIN_COUNT_TERM: visitor.onDomainCountTerm(this); break; case SIZE_TERM: visitor.onSizeTerm(this); break; case QS_TERM: visitor.onQsTerm(this); break; diff --git a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/token/TokenType.java b/code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/token/TokenType.java similarity index 86% rename from code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/token/TokenType.java rename to code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/token/TokenType.java index d929665f..85d55c35 100644 --- a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/token/TokenType.java +++ b/code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/token/TokenType.java @@ -1,4 +1,4 @@ -package nu.marginalia.query_parser.token; +package nu.marginalia.functions.searchquery.query_parser.token; import java.util.function.Predicate; @@ -16,7 +16,6 @@ public enum TokenType implements Predicate { YEAR_TERM, SIZE_TERM, RANK_TERM, - DOMAIN_COUNT_TERM, NEAR_TERM, QS_TERM, diff --git a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/token/TokenVisitor.java b/code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/token/TokenVisitor.java similarity index 82% rename from code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/token/TokenVisitor.java rename to code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/token/TokenVisitor.java index a5c53e4e..2e14f837 100644 --- a/code/features-qs/query-parser/src/main/java/nu/marginalia/query_parser/token/TokenVisitor.java +++ b/code/functions/search-query/java/nu/marginalia/functions/searchquery/query_parser/token/TokenVisitor.java @@ -1,4 +1,4 @@ -package nu.marginalia.query_parser.token; +package nu.marginalia.functions.searchquery.query_parser.token; public interface TokenVisitor { void onLiteralTerm(Token token); @@ -9,7 +9,6 @@ public interface TokenVisitor { void onYearTerm(Token token); void onSizeTerm(Token token); void onRankTerm(Token token); - void onDomainCountTerm(Token token); void onQualityTerm(Token token); void onQsTerm(Token token); } diff --git a/code/services-core/query-service/src/main/java/nu/marginalia/query/svc/QueryFactory.java b/code/functions/search-query/java/nu/marginalia/functions/searchquery/svc/QueryFactory.java similarity index 86% rename from code/services-core/query-service/src/main/java/nu/marginalia/query/svc/QueryFactory.java rename to code/functions/search-query/java/nu/marginalia/functions/searchquery/svc/QueryFactory.java index d0234326..ac7ce2b2 100644 --- a/code/services-core/query-service/src/main/java/nu/marginalia/query/svc/QueryFactory.java +++ b/code/functions/search-query/java/nu/marginalia/functions/searchquery/svc/QueryFactory.java @@ -1,21 +1,21 @@ -package nu.marginalia.query.svc; +package nu.marginalia.functions.searchquery.svc; import com.google.inject.Inject; import com.google.inject.Singleton; import nu.marginalia.LanguageModels; -import nu.marginalia.index.client.model.query.SearchSpecification; -import nu.marginalia.index.client.model.query.SearchSubquery; -import nu.marginalia.index.client.model.results.ResultRankingParameters; -import nu.marginalia.language.EnglishDictionary; +import nu.marginalia.api.searchquery.model.query.SearchSpecification; +import nu.marginalia.api.searchquery.model.query.SearchSubquery; +import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; +import nu.marginalia.util.language.EnglishDictionary; import nu.marginalia.language.WordPatterns; -import nu.marginalia.ngrams.NGramBloomFilter; -import nu.marginalia.query.model.QueryParams; -import nu.marginalia.query.model.ProcessedQuery; -import nu.marginalia.query_parser.QueryParser; -import nu.marginalia.query_parser.QueryPermutation; -import nu.marginalia.query_parser.QueryVariants; -import nu.marginalia.query_parser.token.Token; -import nu.marginalia.query_parser.token.TokenType; +import nu.marginalia.util.ngrams.NGramBloomFilter; +import nu.marginalia.api.searchquery.model.query.QueryParams; +import nu.marginalia.api.searchquery.model.query.ProcessedQuery; +import nu.marginalia.functions.searchquery.query_parser.QueryParser; +import nu.marginalia.functions.searchquery.query_parser.QueryPermutation; +import nu.marginalia.functions.searchquery.query_parser.QueryVariants; +import nu.marginalia.functions.searchquery.query_parser.token.Token; +import nu.marginalia.functions.searchquery.query_parser.token.TokenType; import nu.marginalia.term_frequency_dict.TermFrequencyDict; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -81,7 +81,7 @@ public class QueryFactory { var basicQuery = queryParser.parse(query); - if (basicQuery.size() >= 8) { + if (basicQuery.size() >= 12) { problems.add("Your search query is too long"); basicQuery.clear(); } @@ -127,7 +127,6 @@ public class QueryFactory { .subqueries(subqueries) .humanQuery(query) .quality(qualityLimits.qualityLimit) - .domainCount(qualityLimits.domainCount) .year(qualityLimits.year) .size(qualityLimits.size) .rank(qualityLimits.rank) diff --git a/code/services-core/query-service/src/main/java/nu/marginalia/query/svc/QueryLimitsAccumulator.java b/code/functions/search-query/java/nu/marginalia/functions/searchquery/svc/QueryLimitsAccumulator.java similarity index 87% rename from code/services-core/query-service/src/main/java/nu/marginalia/query/svc/QueryLimitsAccumulator.java rename to code/functions/search-query/java/nu/marginalia/functions/searchquery/svc/QueryLimitsAccumulator.java index f1f17bed..1b49bab3 100644 --- a/code/services-core/query-service/src/main/java/nu/marginalia/query/svc/QueryLimitsAccumulator.java +++ b/code/functions/search-query/java/nu/marginalia/functions/searchquery/svc/QueryLimitsAccumulator.java @@ -1,17 +1,16 @@ -package nu.marginalia.query.svc; +package nu.marginalia.functions.searchquery.svc; +import nu.marginalia.api.searchquery.model.query.QueryParams; import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.query.limit.SpecificationLimit; -import nu.marginalia.query.model.QueryParams; -import nu.marginalia.query_parser.token.Token; -import nu.marginalia.query_parser.token.TokenVisitor; +import nu.marginalia.functions.searchquery.query_parser.token.Token; +import nu.marginalia.functions.searchquery.query_parser.token.TokenVisitor; public class QueryLimitsAccumulator implements TokenVisitor { public SpecificationLimit qualityLimit; public SpecificationLimit year; public SpecificationLimit size; public SpecificationLimit rank; - public SpecificationLimit domainCount; public QueryStrategy queryStrategy = QueryStrategy.AUTO; @@ -20,7 +19,6 @@ public class QueryLimitsAccumulator implements TokenVisitor { year = params.year(); size = params.size(); rank = params.rank(); - domainCount = params.domainCount(); } private SpecificationLimit parseSpecificationLimit(String str) { @@ -67,11 +65,6 @@ public class QueryLimitsAccumulator implements TokenVisitor { rank = parseSpecificationLimit(token.str); } - @Override - public void onDomainCountTerm(Token token) { - domainCount = parseSpecificationLimit(token.str); - } - @Override public void onQualityTerm(Token token) { qualityLimit = parseSpecificationLimit(token.str); diff --git a/code/services-core/query-service/src/main/java/nu/marginalia/query/svc/QuerySearchTermsAccumulator.java b/code/functions/search-query/java/nu/marginalia/functions/searchquery/svc/QuerySearchTermsAccumulator.java similarity index 92% rename from code/services-core/query-service/src/main/java/nu/marginalia/query/svc/QuerySearchTermsAccumulator.java rename to code/functions/search-query/java/nu/marginalia/functions/searchquery/svc/QuerySearchTermsAccumulator.java index 1d5588bc..e4def0d0 100644 --- a/code/services-core/query-service/src/main/java/nu/marginalia/query/svc/QuerySearchTermsAccumulator.java +++ b/code/functions/search-query/java/nu/marginalia/functions/searchquery/svc/QuerySearchTermsAccumulator.java @@ -1,9 +1,9 @@ -package nu.marginalia.query.svc; +package nu.marginalia.functions.searchquery.svc; -import nu.marginalia.index.client.model.query.SearchSubquery; +import nu.marginalia.api.searchquery.model.query.SearchSubquery; import nu.marginalia.language.WordPatterns; -import nu.marginalia.query_parser.token.Token; -import nu.marginalia.query_parser.token.TokenVisitor; +import nu.marginalia.functions.searchquery.query_parser.token.Token; +import nu.marginalia.functions.searchquery.query_parser.token.TokenVisitor; import java.util.ArrayList; import java.util.Arrays; @@ -103,8 +103,6 @@ public class QuerySearchTermsAccumulator implements TokenVisitor { @Override public void onRankTerm(Token token) {} @Override - public void onDomainCountTerm(Token token) {} - @Override public void onQualityTerm(Token token) {} @Override public void onQsTerm(Token token) {} diff --git a/code/features-qs/query-parser/src/main/java/nu/marginalia/language/EnglishDictionary.java b/code/functions/search-query/java/nu/marginalia/util/language/EnglishDictionary.java similarity index 99% rename from code/features-qs/query-parser/src/main/java/nu/marginalia/language/EnglishDictionary.java rename to code/functions/search-query/java/nu/marginalia/util/language/EnglishDictionary.java index 0afd3625..56e90701 100644 --- a/code/features-qs/query-parser/src/main/java/nu/marginalia/language/EnglishDictionary.java +++ b/code/functions/search-query/java/nu/marginalia/util/language/EnglishDictionary.java @@ -1,4 +1,4 @@ -package nu.marginalia.language; +package nu.marginalia.util.language; import com.google.inject.Inject; import nu.marginalia.term_frequency_dict.TermFrequencyDict; diff --git a/code/features-qs/query-parser/src/main/java/nu/marginalia/ngrams/DenseBitMap.java b/code/functions/search-query/java/nu/marginalia/util/ngrams/DenseBitMap.java similarity index 92% rename from code/features-qs/query-parser/src/main/java/nu/marginalia/ngrams/DenseBitMap.java rename to code/functions/search-query/java/nu/marginalia/util/ngrams/DenseBitMap.java index ca5666b1..008b17b3 100644 --- a/code/features-qs/query-parser/src/main/java/nu/marginalia/ngrams/DenseBitMap.java +++ b/code/functions/search-query/java/nu/marginalia/util/ngrams/DenseBitMap.java @@ -1,11 +1,14 @@ -package nu.marginalia.ngrams; +package nu.marginalia.util.ngrams; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; +import java.util.BitSet; +// It's unclear why this exists, we should probably use a BitSet instead? +// Chesterton's fence? public class DenseBitMap { public static final long MAX_CAPACITY_2GB_16BN_ITEMS=(1L<<34)-8; diff --git a/code/features-qs/query-parser/src/main/java/nu/marginalia/ngrams/NGramBloomFilter.java b/code/functions/search-query/java/nu/marginalia/util/ngrams/NGramBloomFilter.java similarity index 98% rename from code/features-qs/query-parser/src/main/java/nu/marginalia/ngrams/NGramBloomFilter.java rename to code/functions/search-query/java/nu/marginalia/util/ngrams/NGramBloomFilter.java index 89a6d9cf..3326956d 100644 --- a/code/features-qs/query-parser/src/main/java/nu/marginalia/ngrams/NGramBloomFilter.java +++ b/code/functions/search-query/java/nu/marginalia/util/ngrams/NGramBloomFilter.java @@ -1,4 +1,4 @@ -package nu.marginalia.ngrams; +package nu.marginalia.util.ngrams; import ca.rmen.porterstemmer.PorterStemmer; import com.google.common.hash.HashFunction; diff --git a/code/features-qs/query-parser/src/main/java/nu/marginalia/transform_list/TransformList.java b/code/functions/search-query/java/nu/marginalia/util/transform_list/TransformList.java similarity index 98% rename from code/features-qs/query-parser/src/main/java/nu/marginalia/transform_list/TransformList.java rename to code/functions/search-query/java/nu/marginalia/util/transform_list/TransformList.java index 20b2b5ea..08bc428e 100644 --- a/code/features-qs/query-parser/src/main/java/nu/marginalia/transform_list/TransformList.java +++ b/code/functions/search-query/java/nu/marginalia/util/transform_list/TransformList.java @@ -1,4 +1,4 @@ -package nu.marginalia.transform_list; +package nu.marginalia.util.transform_list; import java.util.List; import java.util.function.BiConsumer; diff --git a/code/functions/search-query/readme.md b/code/functions/search-query/readme.md new file mode 100644 index 00000000..54022655 --- /dev/null +++ b/code/functions/search-query/readme.md @@ -0,0 +1,4 @@ +The search query subsystem is responsible for parsing a query, +translating it to a request, and then dispatching it to the +appropriate index nodes and translating the responses back again. + diff --git a/code/services-core/query-service/src/test/java/nu/marginalia/query/svc/QueryFactoryTest.java b/code/functions/search-query/test/nu/marginalia/query/svc/QueryFactoryTest.java similarity index 93% rename from code/services-core/query-service/src/test/java/nu/marginalia/query/svc/QueryFactoryTest.java rename to code/functions/search-query/test/nu/marginalia/query/svc/QueryFactoryTest.java index 44cddced..fe93a1f6 100644 --- a/code/services-core/query-service/src/test/java/nu/marginalia/query/svc/QueryFactoryTest.java +++ b/code/functions/search-query/test/nu/marginalia/query/svc/QueryFactoryTest.java @@ -1,16 +1,16 @@ package nu.marginalia.query.svc; import nu.marginalia.WmsaHome; -import nu.marginalia.index.client.model.query.SearchSetIdentifier; -import nu.marginalia.index.client.model.query.SearchSpecification; -import nu.marginalia.index.client.model.results.ResultRankingParameters; +import nu.marginalia.api.searchquery.model.query.SearchSpecification; +import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; +import nu.marginalia.functions.searchquery.svc.QueryFactory; import nu.marginalia.index.query.limit.QueryLimits; import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.query.limit.SpecificationLimit; import nu.marginalia.index.query.limit.SpecificationLimitType; -import nu.marginalia.language.EnglishDictionary; -import nu.marginalia.ngrams.NGramBloomFilter; -import nu.marginalia.query.model.QueryParams; +import nu.marginalia.util.language.EnglishDictionary; +import nu.marginalia.util.ngrams.NGramBloomFilter; +import nu.marginalia.api.searchquery.model.query.QueryParams; import nu.marginalia.term_frequency_dict.TermFrequencyDict; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -48,7 +48,6 @@ public class QueryFactoryTest { SpecificationLimit.none(), SpecificationLimit.none(), SpecificationLimit.none(), - SpecificationLimit.none(), null, new QueryLimits(100, 100, 100, 100), "NONE", diff --git a/code/index/api/build.gradle b/code/index/api/build.gradle new file mode 100644 index 00000000..895f75b0 --- /dev/null +++ b/code/index/api/build.gradle @@ -0,0 +1,38 @@ +plugins { + id 'java' + id 'jvm-test-suite' +} + +java { + toolchain { + languageVersion.set(JavaLanguageVersion.of(21)) + } +} + +apply from: "$rootProject.projectDir/srcsets.gradle" + +dependencies { + implementation project(':code:common:model') + implementation project(':code:common:config') + implementation project(':code:common:service-discovery') + implementation project(':code:libraries:message-queue') + implementation project(':code:functions:search-query:api') + + implementation libs.bundles.slf4j + + implementation libs.prometheus + implementation libs.notnull + implementation libs.guice + implementation libs.protobuf + implementation libs.fastutil + implementation libs.javax.annotation + implementation libs.bundles.gson + implementation libs.bundles.grpc + + testImplementation libs.bundles.slf4j.test + testImplementation libs.bundles.junit + testImplementation libs.mockito +} + + + diff --git a/code/index/api/java/nu/marginalia/index/api/IndexClient.java b/code/index/api/java/nu/marginalia/index/api/IndexClient.java new file mode 100644 index 00000000..3a83b5de --- /dev/null +++ b/code/index/api/java/nu/marginalia/index/api/IndexClient.java @@ -0,0 +1,52 @@ +package nu.marginalia.index.api; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import lombok.SneakyThrows; +import nu.marginalia.api.searchquery.IndexApiGrpc; +import nu.marginalia.api.searchquery.RpcDecoratedResultItem; +import nu.marginalia.api.searchquery.RpcIndexQuery; +import nu.marginalia.service.client.GrpcChannelPoolFactory; +import nu.marginalia.service.client.GrpcMultiNodeChannelPool; +import nu.marginalia.service.discovery.property.ServiceKey; +import nu.marginalia.service.discovery.property.ServicePartition; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +@Singleton +public class IndexClient { + private static final Logger logger = LoggerFactory.getLogger(IndexClient.class); + private final GrpcMultiNodeChannelPool channelPool; + private static final ExecutorService executor = Executors.newFixedThreadPool(32); + @Inject + public IndexClient(GrpcChannelPoolFactory channelPoolFactory) { + this.channelPool = channelPoolFactory.createMulti( + ServiceKey.forGrpcApi(IndexApiGrpc.class, ServicePartition.multi()), + IndexApiGrpc::newBlockingStub); + } + + @SneakyThrows + public List executeQueries(RpcIndexQuery indexRequest) { + var futures = + channelPool.call(IndexApiGrpc.IndexApiBlockingStub::query) + .async(executor) + .runEach(indexRequest); + List results = new ArrayList<>(); + for (var future : futures) { + try { + future.get().forEachRemaining(results::add); + } + catch (Exception e) { + logger.error("Downstream exception", e); + } + } + + return results; + } + +} diff --git a/code/index/api/java/nu/marginalia/index/api/IndexMqClient.java b/code/index/api/java/nu/marginalia/index/api/IndexMqClient.java new file mode 100644 index 00000000..27465f6e --- /dev/null +++ b/code/index/api/java/nu/marginalia/index/api/IndexMqClient.java @@ -0,0 +1,49 @@ +package nu.marginalia.index.api; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import com.google.inject.name.Named; +import nu.marginalia.mq.MessageQueueFactory; +import nu.marginalia.mq.outbox.MqOutbox; +import nu.marginalia.service.id.ServiceId; + +import java.util.UUID; + +@Singleton +public class IndexMqClient { + + private final MessageQueueFactory messageQueueFactory; + + MqOutbox outbox; + + @Inject + public IndexMqClient(MessageQueueFactory messageQueueFactory, + @Named("wmsa-system-node") Integer nodeId) + { + this.messageQueueFactory = messageQueueFactory; + + String inboxName = ServiceId.Index.serviceName; + String outboxName = "pp:"+System.getProperty("service-name", UUID.randomUUID().toString()); + outbox = messageQueueFactory.createOutbox(inboxName, nodeId, outboxName, nodeId, UUID.randomUUID()); + } + + public MqOutbox outbox() { + return outbox; + } + + public long triggerRepartition(int node) throws Exception { + return messageQueueFactory.sendSingleShotRequest( + ServiceId.Index.withNode(node), + IndexMqEndpoints.INDEX_REPARTITION, + null + ); + } + + public long triggerRerank(int node) throws Exception { + return messageQueueFactory.sendSingleShotRequest( + ServiceId.Index.withNode(node), + IndexMqEndpoints.INDEX_RERANK, + null + ); + } +} diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/IndexMqEndpoints.java b/code/index/api/java/nu/marginalia/index/api/IndexMqEndpoints.java similarity index 91% rename from code/api/index-api/src/main/java/nu/marginalia/index/client/IndexMqEndpoints.java rename to code/index/api/java/nu/marginalia/index/api/IndexMqEndpoints.java index 0a4635eb..ec618912 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/IndexMqEndpoints.java +++ b/code/index/api/java/nu/marginalia/index/api/IndexMqEndpoints.java @@ -1,4 +1,4 @@ -package nu.marginalia.index.client; +package nu.marginalia.index.api; public class IndexMqEndpoints { public static final String INDEX_IS_BLOCKED = "INDEX-IS-BLOCKED"; diff --git a/code/index/build.gradle b/code/index/build.gradle new file mode 100644 index 00000000..7d52facc --- /dev/null +++ b/code/index/build.gradle @@ -0,0 +1,65 @@ +plugins { + id 'java' + id 'jvm-test-suite' +} + +java { + toolchain { + languageVersion.set(JavaLanguageVersion.of(21)) + } +} + +apply from: "$rootProject.projectDir/srcsets.gradle" + +dependencies { + implementation 'org.jgrapht:jgrapht-core:1.5.2' + + implementation project(':third-party:commons-codec') + + implementation project(':code:index:api') + implementation project(':code:functions:link-graph:api') + + implementation project(':code:libraries:array') + implementation project(':code:libraries:btree') + + implementation project(':code:common:db') + implementation project(':code:common:config') + implementation project(':code:common:model') + implementation project(':code:common:linkdb') + implementation project(':code:common:service') + implementation project(':code:common:service-discovery') + + implementation project(':code:functions:search-query:api') + + implementation project(':code:index:index-forward') + implementation project(':code:index:index-reverse') + implementation project(':code:index:query') + implementation project(':code:index:index-journal') + + + implementation libs.bundles.slf4j + + implementation libs.prometheus + implementation libs.roaringbitmap + implementation libs.bundles.grpc + implementation libs.notnull + implementation libs.guice + implementation libs.trove + implementation libs.fastutil + implementation libs.bundles.gson + implementation libs.bundles.mariadb + + testImplementation libs.bundles.slf4j.test + testImplementation libs.bundles.junit + testImplementation libs.mockito + testImplementation libs.commons.lang3 + testImplementation project(':code:common:process') + testImplementation project(':code:libraries:array') + + testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4') + testImplementation 'org.testcontainers:mariadb:1.17.4' + testImplementation 'org.testcontainers:junit-jupiter:1.17.4' + testImplementation project(':code:libraries:test-helpers') + testImplementation project(':code:libraries:term-frequency-dict') + testImplementation project(':code:libraries:braille-block-punch-cards') +} diff --git a/code/features-index/index-forward/build.gradle b/code/index/index-forward/build.gradle similarity index 77% rename from code/features-index/index-forward/build.gradle rename to code/index/index-forward/build.gradle index 8ceb710e..daf9bc0b 100644 --- a/code/features-index/index-forward/build.gradle +++ b/code/index/index-forward/build.gradle @@ -10,12 +10,13 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:libraries:array') implementation project(':code:libraries:btree') - implementation project(':code:features-index:domain-ranking') - implementation project(':code:features-index:index-query') - implementation project(':code:features-index:index-journal') + implementation project(':code:index:query') + implementation project(':code:index:index-journal') implementation project(':code:common:model') implementation project(':code:common:process') diff --git a/code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ForwardIndexConverter.java b/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexConverter.java similarity index 98% rename from code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ForwardIndexConverter.java rename to code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexConverter.java index e7e11e9b..80cf502b 100644 --- a/code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ForwardIndexConverter.java +++ b/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexConverter.java @@ -2,12 +2,12 @@ package nu.marginalia.index.forward; import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap; import nu.marginalia.array.LongArrayFactory; +import nu.marginalia.index.domainrankings.DomainRankings; import nu.marginalia.index.journal.reader.IndexJournalReader; import nu.marginalia.array.LongArray; import nu.marginalia.model.id.UrlIdCodec; import nu.marginalia.model.idx.DocumentMetadata; import nu.marginalia.process.control.ProcessHeartbeat; -import nu.marginalia.ranking.DomainRankings; import org.roaringbitmap.longlong.LongConsumer; import org.roaringbitmap.longlong.Roaring64Bitmap; import org.slf4j.Logger; diff --git a/code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ForwardIndexFileNames.java b/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexFileNames.java similarity index 99% rename from code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ForwardIndexFileNames.java rename to code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexFileNames.java index 89cd0d6d..e16e8618 100644 --- a/code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ForwardIndexFileNames.java +++ b/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexFileNames.java @@ -19,7 +19,7 @@ public class ForwardIndexFileNames { public enum FileVersion { CURRENT, NEXT - }; + } public enum FileIdentifier { DOC_DATA, diff --git a/code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ForwardIndexParameters.java b/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexParameters.java similarity index 100% rename from code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ForwardIndexParameters.java rename to code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexParameters.java diff --git a/code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ForwardIndexReader.java b/code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexReader.java similarity index 100% rename from code/features-index/index-forward/src/main/java/nu/marginalia/index/forward/ForwardIndexReader.java rename to code/index/index-forward/java/nu/marginalia/index/forward/ForwardIndexReader.java diff --git a/code/features-index/index-forward/readme.md b/code/index/index-forward/readme.md similarity index 63% rename from code/features-index/index-forward/readme.md rename to code/index/index-forward/readme.md index 545fbf1e..39e272e5 100644 --- a/code/features-index/index-forward/readme.md +++ b/code/index/index-forward/readme.md @@ -8,8 +8,8 @@ The `id` file contains a list of sorted document ids, and the `data` file contai metadata for each document id, in the same order as the `id` file, with a fixed size record containing data associated with each document id. -Each record contains a binary encoded [DocumentMetadata](../../common/model/src/main/java/nu/marginalia/model/idx/DocumentMetadata.java) object, -as well as a [HtmlFeatures](../../common/model/src/main/java/nu/marginalia/model/crawl/HtmlFeature.java) bitmask. +Each record contains a binary encoded [DocumentMetadata](../../common/model/java/nu/marginalia/model/idx/DocumentMetadata.java) object, +as well as a [HtmlFeatures](../../common/model/java/nu/marginalia/model/crawl/HtmlFeature.java) bitmask. Unlike the reverse index, the forward index is not split into two tiers, and the data is in the same order as it is in the source data, and the cardinality of the document IDs is assumed to fit in memory, @@ -17,5 +17,5 @@ so it's relatively easy to construct. ## Central Classes -* [ForwardIndexConverter](src/main/java/nu/marginalia/index/forward/ForwardIndexConverter.java) constructs the index. -* [ForwardIndexReader](src/main/java/nu/marginalia/index/forward/ForwardIndexReader.java) interrogates the index. \ No newline at end of file +* [ForwardIndexConverter](java/nu/marginalia/index/forward/ForwardIndexConverter.java) constructs the index. +* [ForwardIndexReader](java/nu/marginalia/index/forward/ForwardIndexReader.java) interrogates the index. \ No newline at end of file diff --git a/code/features-index/index-forward/src/test/java/nu/marginalia/index/forward/ForwardIndexConverterTest.java b/code/index/index-forward/test/nu/marginalia/index/forward/ForwardIndexConverterTest.java similarity index 98% rename from code/features-index/index-forward/src/test/java/nu/marginalia/index/forward/ForwardIndexConverterTest.java rename to code/index/index-forward/test/nu/marginalia/index/forward/ForwardIndexConverterTest.java index 062d3716..de571664 100644 --- a/code/features-index/index-forward/src/test/java/nu/marginalia/index/forward/ForwardIndexConverterTest.java +++ b/code/index/index-forward/test/nu/marginalia/index/forward/ForwardIndexConverterTest.java @@ -1,13 +1,13 @@ package nu.marginalia.index.forward; import lombok.SneakyThrows; +import nu.marginalia.index.domainrankings.DomainRankings; import nu.marginalia.index.journal.model.IndexJournalEntry; import nu.marginalia.index.journal.reader.IndexJournalReaderSingleFile; import nu.marginalia.index.journal.writer.IndexJournalWriter; import nu.marginalia.index.journal.writer.IndexJournalWriterSingleFileImpl; import nu.marginalia.model.id.UrlIdCodec; import nu.marginalia.process.control.FakeProcessHeartbeat; -import nu.marginalia.ranking.DomainRankings; import nu.marginalia.test.TestUtil; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; diff --git a/code/features-index/index-forward/src/test/java/nu/marginalia/test/TestUtil.java b/code/index/index-forward/test/nu/marginalia/test/TestUtil.java similarity index 100% rename from code/features-index/index-forward/src/test/java/nu/marginalia/test/TestUtil.java rename to code/index/index-forward/test/nu/marginalia/test/TestUtil.java diff --git a/code/features-index/index-journal/build.gradle b/code/index/index-journal/build.gradle similarity index 92% rename from code/features-index/index-journal/build.gradle rename to code/index/index-journal/build.gradle index f9197954..c6186cf7 100644 --- a/code/features-index/index-journal/build.gradle +++ b/code/index/index-journal/build.gradle @@ -10,6 +10,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:libraries:array') implementation project(':code:common:model') @@ -18,7 +20,6 @@ dependencies { implementation libs.prometheus implementation libs.notnull - implementation libs.rxjava implementation libs.guava implementation libs.trove implementation libs.zstd diff --git a/code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/IndexJournalFileNames.java b/code/index/index-journal/java/nu/marginalia/index/journal/IndexJournalFileNames.java similarity index 100% rename from code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/IndexJournalFileNames.java rename to code/index/index-journal/java/nu/marginalia/index/journal/IndexJournalFileNames.java diff --git a/code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/model/IndexJournalEntry.java b/code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntry.java similarity index 100% rename from code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/model/IndexJournalEntry.java rename to code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntry.java diff --git a/code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/model/IndexJournalEntryBuilder.java b/code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntryBuilder.java similarity index 100% rename from code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/model/IndexJournalEntryBuilder.java rename to code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntryBuilder.java diff --git a/code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/model/IndexJournalEntryData.java b/code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntryData.java similarity index 100% rename from code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/model/IndexJournalEntryData.java rename to code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntryData.java diff --git a/code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/model/IndexJournalEntryHeader.java b/code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntryHeader.java similarity index 100% rename from code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/model/IndexJournalEntryHeader.java rename to code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalEntryHeader.java diff --git a/code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/model/IndexJournalFileHeader.java b/code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalFileHeader.java similarity index 100% rename from code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/model/IndexJournalFileHeader.java rename to code/index/index-journal/java/nu/marginalia/index/journal/model/IndexJournalFileHeader.java diff --git a/code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/reader/IndexJournalReadEntry.java b/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReadEntry.java similarity index 100% rename from code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/reader/IndexJournalReadEntry.java rename to code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReadEntry.java diff --git a/code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/reader/IndexJournalReader.java b/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReader.java similarity index 100% rename from code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/reader/IndexJournalReader.java rename to code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReader.java diff --git a/code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/reader/IndexJournalReaderPagingImpl.java b/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReaderPagingImpl.java similarity index 100% rename from code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/reader/IndexJournalReaderPagingImpl.java rename to code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReaderPagingImpl.java diff --git a/code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/reader/IndexJournalReaderSingleFile.java b/code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReaderSingleFile.java similarity index 100% rename from code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/reader/IndexJournalReaderSingleFile.java rename to code/index/index-journal/java/nu/marginalia/index/journal/reader/IndexJournalReaderSingleFile.java diff --git a/code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/reader/pointer/IndexJournalPointer.java b/code/index/index-journal/java/nu/marginalia/index/journal/reader/pointer/IndexJournalPointer.java similarity index 100% rename from code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/reader/pointer/IndexJournalPointer.java rename to code/index/index-journal/java/nu/marginalia/index/journal/reader/pointer/IndexJournalPointer.java diff --git a/code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/writer/IndexJournalWriter.java b/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriter.java similarity index 100% rename from code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/writer/IndexJournalWriter.java rename to code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriter.java diff --git a/code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/writer/IndexJournalWriterPagingImpl.java b/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriterPagingImpl.java similarity index 100% rename from code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/writer/IndexJournalWriterPagingImpl.java rename to code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriterPagingImpl.java diff --git a/code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/writer/IndexJournalWriterSingleFileImpl.java b/code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriterSingleFileImpl.java similarity index 100% rename from code/features-index/index-journal/src/main/java/nu/marginalia/index/journal/writer/IndexJournalWriterSingleFileImpl.java rename to code/index/index-journal/java/nu/marginalia/index/journal/writer/IndexJournalWriterSingleFileImpl.java diff --git a/code/features-index/index-journal/readme.md b/code/index/index-journal/readme.md similarity index 58% rename from code/features-index/index-journal/readme.md rename to code/index/index-journal/readme.md index 24ed9c43..af7059b3 100644 --- a/code/features-index/index-journal/readme.md +++ b/code/index/index-journal/readme.md @@ -16,9 +16,9 @@ are designed to handle this transparently via their *Paging* implementation. ## Central Classes ### Model -* [IndexJournalEntry](src/main/java/nu/marginalia/index/journal/model/IndexJournalEntry.java) -* [IndexJournalEntryHeader](src/main/java/nu/marginalia/index/journal/model/IndexJournalEntryHeader.java) -* [IndexJournalEntryData](src/main/java/nu/marginalia/index/journal/model/IndexJournalEntryData.java) +* [IndexJournalEntry](java/nu/marginalia/index/journal/model/IndexJournalEntry.java) +* [IndexJournalEntryHeader](java/nu/marginalia/index/journal/model/IndexJournalEntryHeader.java) +* [IndexJournalEntryData](java/nu/marginalia/index/journal/model/IndexJournalEntryData.java) ### I/O -* [IndexJournalReader](src/main/java/nu/marginalia/index/journal/reader/IndexJournalReader.java) -* [IndexJournalWriter](src/main/java/nu/marginalia/index/journal/writer/IndexJournalWriter.java) \ No newline at end of file +* [IndexJournalReader](java/nu/marginalia/index/journal/reader/IndexJournalReader.java) +* [IndexJournalWriter](java/nu/marginalia/index/journal/writer/IndexJournalWriter.java) \ No newline at end of file diff --git a/code/features-index/index-journal/src/test/java/nu/marginalia/index/journal/IndexJournalTest.java b/code/index/index-journal/test/nu/marginalia/index/journal/IndexJournalTest.java similarity index 100% rename from code/features-index/index-journal/src/test/java/nu/marginalia/index/journal/IndexJournalTest.java rename to code/index/index-journal/test/nu/marginalia/index/journal/IndexJournalTest.java diff --git a/code/features-index/index-journal/src/test/java/nu/marginalia/index/journal/reader/pointer/IndexJournalPointerTest.java b/code/index/index-journal/test/nu/marginalia/index/journal/reader/pointer/IndexJournalPointerTest.java similarity index 100% rename from code/features-index/index-journal/src/test/java/nu/marginalia/index/journal/reader/pointer/IndexJournalPointerTest.java rename to code/index/index-journal/test/nu/marginalia/index/journal/reader/pointer/IndexJournalPointerTest.java diff --git a/code/features-index/index-reverse/build.gradle b/code/index/index-reverse/build.gradle similarity index 76% rename from code/features-index/index-reverse/build.gradle rename to code/index/index-reverse/build.gradle index afa0136f..a9523656 100644 --- a/code/features-index/index-reverse/build.gradle +++ b/code/index/index-reverse/build.gradle @@ -11,13 +11,14 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:libraries:array') implementation project(':code:libraries:btree') implementation project(':code:libraries:random-write-funnel') - implementation project(':code:features-index:domain-ranking') - implementation project(':code:features-index:index-query') - implementation project(':code:features-index:index-journal') + implementation project(':code:index:query') + implementation project(':code:index:index-journal') implementation project(':code:common:model') implementation project(':code:common:process') diff --git a/code/features-index/index-reverse/index.svg b/code/index/index-reverse/index.svg similarity index 100% rename from code/features-index/index-reverse/index.svg rename to code/index/index-reverse/index.svg diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexEntrySource.java b/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexEntrySource.java similarity index 100% rename from code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexEntrySource.java rename to code/index/index-reverse/java/nu/marginalia/index/ReverseIndexEntrySource.java diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexFullFileNames.java b/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexFullFileNames.java similarity index 99% rename from code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexFullFileNames.java rename to code/index/index-reverse/java/nu/marginalia/index/ReverseIndexFullFileNames.java index c8005fdb..3d0f2499 100644 --- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexFullFileNames.java +++ b/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexFullFileNames.java @@ -19,7 +19,7 @@ public class ReverseIndexFullFileNames { public enum FileVersion { CURRENT, NEXT - }; + } public enum FileIdentifier { WORDS, diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexParameters.java b/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexParameters.java similarity index 100% rename from code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexParameters.java rename to code/index/index-reverse/java/nu/marginalia/index/ReverseIndexParameters.java diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexPrioFileNames.java b/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexPrioFileNames.java similarity index 99% rename from code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexPrioFileNames.java rename to code/index/index-reverse/java/nu/marginalia/index/ReverseIndexPrioFileNames.java index 6e73df43..e99841d4 100644 --- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexPrioFileNames.java +++ b/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexPrioFileNames.java @@ -19,7 +19,7 @@ public class ReverseIndexPrioFileNames { public enum FileVersion { CURRENT, NEXT - }; + } public enum FileIdentifier { WORDS, diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexReader.java b/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexReader.java similarity index 95% rename from code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexReader.java rename to code/index/index-reverse/java/nu/marginalia/index/ReverseIndexReader.java index 1bcfee5a..f37420dd 100644 --- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexReader.java +++ b/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexReader.java @@ -16,6 +16,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.util.Arrays; import java.util.concurrent.Executors; public class ReverseIndexReader { @@ -123,13 +124,13 @@ public class ReverseIndexReader { return new long[docIds.length]; } - assert isSorted(docIds) : "The input array docIds is assumed to be sorted"; + assert isUniqueAndSorted(docIds) : "The input array docIds is assumed to be unique and sorted, was " + Arrays.toString(docIds); var reader = createReaderNew(offset); return reader.queryData(docIds, 1); } - private boolean isSorted(long[] ids) { + private boolean isUniqueAndSorted(long[] ids) { if (ids.length == 0) return true; long prev = ids[0]; diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexSelfTest.java b/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexSelfTest.java similarity index 98% rename from code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexSelfTest.java rename to code/index/index-reverse/java/nu/marginalia/index/ReverseIndexSelfTest.java index ffb25782..61dee824 100644 --- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/ReverseIndexSelfTest.java +++ b/code/index/index-reverse/java/nu/marginalia/index/ReverseIndexSelfTest.java @@ -6,8 +6,6 @@ import nu.marginalia.btree.BTreeReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.HashMap; -import java.util.HashSet; import java.util.Random; public class ReverseIndexSelfTest { diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/CountToOffsetTransformer.java b/code/index/index-reverse/java/nu/marginalia/index/construction/CountToOffsetTransformer.java similarity index 100% rename from code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/CountToOffsetTransformer.java rename to code/index/index-reverse/java/nu/marginalia/index/construction/CountToOffsetTransformer.java diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/DocIdRewriter.java b/code/index/index-reverse/java/nu/marginalia/index/construction/DocIdRewriter.java similarity index 100% rename from code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/DocIdRewriter.java rename to code/index/index-reverse/java/nu/marginalia/index/construction/DocIdRewriter.java diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/IndexSizeEstimator.java b/code/index/index-reverse/java/nu/marginalia/index/construction/IndexSizeEstimator.java similarity index 100% rename from code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/IndexSizeEstimator.java rename to code/index/index-reverse/java/nu/marginalia/index/construction/IndexSizeEstimator.java diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/JournalReaderSource.java b/code/index/index-reverse/java/nu/marginalia/index/construction/JournalReaderSource.java similarity index 100% rename from code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/JournalReaderSource.java rename to code/index/index-reverse/java/nu/marginalia/index/construction/JournalReaderSource.java diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReverseIndexBTreeTransformer.java b/code/index/index-reverse/java/nu/marginalia/index/construction/ReverseIndexBTreeTransformer.java similarity index 95% rename from code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReverseIndexBTreeTransformer.java rename to code/index/index-reverse/java/nu/marginalia/index/construction/ReverseIndexBTreeTransformer.java index 2f5c05f4..4ace48a9 100644 --- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReverseIndexBTreeTransformer.java +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/ReverseIndexBTreeTransformer.java @@ -30,7 +30,7 @@ public class ReverseIndexBTreeTransformer implements LongIOTransformer { @Override public long transform(long pos, long end) throws IOException { - final int size = (int) (end - start) / entrySize; + final int size = (int) ((end - start) / entrySize); if (size == 0) { return -1; diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReverseIndexConstructor.java b/code/index/index-reverse/java/nu/marginalia/index/construction/ReverseIndexConstructor.java similarity index 100% rename from code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReverseIndexConstructor.java rename to code/index/index-reverse/java/nu/marginalia/index/construction/ReverseIndexConstructor.java diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindex.java b/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindex.java similarity index 92% rename from code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindex.java rename to code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindex.java index 22fc1431..ac39e817 100644 --- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindex.java +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindex.java @@ -92,7 +92,8 @@ public class ReversePreindex { LongArray wordIds = segments.wordIds; - assert offsets.size() == wordIds.size() : "Offsets and word-ids of different size"; + if (offsets.size() != wordIds.size()) + throw new IllegalStateException("Offsets and word-ids of different size"); if (offsets.size() > Integer.MAX_VALUE) { throw new IllegalStateException("offsets.size() too big!"); } @@ -137,7 +138,7 @@ public class ReversePreindex { Path docsFile = Files.createTempFile(destDir, "docs", ".dat"); - LongArray mergedDocuments = LongArrayFactory.mmapForWritingConfined(docsFile, 2 * (left.documents.size() + right.documents.size())); + LongArray mergedDocuments = LongArrayFactory.mmapForWritingConfined(docsFile, left.documents.size() + right.documents.size()); leftIter.next(); rightIter.next(); @@ -180,9 +181,15 @@ public class ReversePreindex { } - assert !leftIter.isPositionBeforeEnd() : "Left has more to go"; - assert !rightIter.isPositionBeforeEnd() : "Right has more to go"; - assert !mergingIter.canPutMore() : "Source iters ran dry before merging iter"; + if (leftIter.isPositionBeforeEnd()) + throw new IllegalStateException("Left has more to go"); + if (rightIter.isPositionBeforeEnd()) + throw new IllegalStateException("Right has more to go"); + if (mergingIter.canPutMore()) + throw new IllegalStateException("Source iters ran dry before merging iter"); + + + mergingSegment.force(); // We may have overestimated the size of the merged docs size in the case there were // duplicates in the data, so we need to shrink it to the actual size we wrote. @@ -190,8 +197,6 @@ public class ReversePreindex { mergedDocuments = shrinkMergedDocuments(mergedDocuments, docsFile, 2 * mergingSegment.totalSize()); - mergingSegment.force(); - return new ReversePreindex( mergingSegment, new ReversePreindexDocuments(mergedDocuments, docsFile) @@ -233,16 +238,15 @@ public class ReversePreindex { mergedDocuments.force(); long beforeSize = mergedDocuments.size(); - try (var bc = Files.newByteChannel(docsFile, StandardOpenOption.WRITE)) { - bc.truncate(sizeLongs * 8); - } - long afterSize = mergedDocuments.size(); - mergedDocuments.close(); - - mergedDocuments = LongArrayFactory.mmapForWritingConfined(docsFile, sizeLongs); - + long afterSize = sizeLongs * 8; if (beforeSize != afterSize) { + mergedDocuments.close(); + try (var bc = Files.newByteChannel(docsFile, StandardOpenOption.WRITE)) { + bc.truncate(sizeLongs * 8); + } + logger.info("Shrunk {} from {}b to {}b", docsFile, beforeSize, afterSize); + mergedDocuments = LongArrayFactory.mmapForWritingConfined(docsFile, sizeLongs); } return mergedDocuments; @@ -291,7 +295,8 @@ public class ReversePreindex { boolean putNext = mergingIter.putNext(size / 2); boolean iterNext = sourceIter.next(); - assert putNext || !iterNext : "Source iterator ran out before dest iterator?!"; + if (!putNext && iterNext) + throw new IllegalStateException("Source iterator ran out before dest iterator?!"); return iterNext; } diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexDocuments.java b/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexDocuments.java similarity index 100% rename from code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexDocuments.java rename to code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexDocuments.java diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexReference.java b/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexReference.java similarity index 96% rename from code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexReference.java rename to code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexReference.java index 1acaa514..16c542d5 100644 --- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexReference.java +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexReference.java @@ -1,6 +1,5 @@ package nu.marginalia.index.construction; -import nu.marginalia.array.LongArray; import nu.marginalia.array.LongArrayFactory; import java.io.IOException; diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java b/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java similarity index 95% rename from code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java rename to code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java index bcfe486e..04303210 100644 --- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java @@ -38,6 +38,9 @@ public class ReversePreindexWordSegments { * and each value is the start offset of the data. */ public Long2LongOpenHashMap asMap(int recordSize) { + if (wordIds.size() > Integer.MAX_VALUE) + throw new IllegalArgumentException("Cannot create a map with more than Integer.MAX_VALUE entries"); + Long2LongOpenHashMap ret = new Long2LongOpenHashMap((int) wordIds.size(), 0.75f); var iter = iterator(recordSize); @@ -62,7 +65,7 @@ public class ReversePreindexWordSegments { // Create the words file by iterating over the map and inserting them into // the words file in whatever bizarro hash table order they appear in - int i = 0; + long i = 0; LongIterator iter = countsMap.keySet().iterator(); while (iter.hasNext()) { words.set(i++, iter.nextLong()); @@ -120,8 +123,8 @@ public class ReversePreindexWordSegments { this.fileSize = wordIds.size(); } - private int i = -1; - public int idx() { + private long i = -1; + public long idx() { return i; } public boolean next() { @@ -166,8 +169,8 @@ public class ReversePreindexWordSegments { this.wordId = wordIds.get(0); } - private int i = 0; - public int idx() { + private long i = 0; + public long idx() { return i; } diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/query/ReverseIndexRejectFilter.java b/code/index/index-reverse/java/nu/marginalia/index/query/ReverseIndexRejectFilter.java similarity index 100% rename from code/features-index/index-reverse/src/main/java/nu/marginalia/index/query/ReverseIndexRejectFilter.java rename to code/index/index-reverse/java/nu/marginalia/index/query/ReverseIndexRejectFilter.java diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/query/ReverseIndexRetainFilter.java b/code/index/index-reverse/java/nu/marginalia/index/query/ReverseIndexRetainFilter.java similarity index 100% rename from code/features-index/index-reverse/src/main/java/nu/marginalia/index/query/ReverseIndexRetainFilter.java rename to code/index/index-reverse/java/nu/marginalia/index/query/ReverseIndexRetainFilter.java diff --git a/code/features-index/index-reverse/merging.svg b/code/index/index-reverse/merging.svg similarity index 100% rename from code/features-index/index-reverse/merging.svg rename to code/index/index-reverse/merging.svg diff --git a/code/features-index/index-reverse/preindex.svg b/code/index/index-reverse/preindex.svg similarity index 100% rename from code/features-index/index-reverse/preindex.svg rename to code/index/index-reverse/preindex.svg diff --git a/code/features-index/index-reverse/readme.md b/code/index/index-reverse/readme.md similarity index 82% rename from code/features-index/index-reverse/readme.md rename to code/index/index-reverse/readme.md index a27371d6..fcc4fcfc 100644 --- a/code/features-index/index-reverse/readme.md +++ b/code/index/index-reverse/readme.md @@ -34,9 +34,9 @@ to form a finalized reverse index. ![Illustration of the data layout of the finalized index](index.svg) ## Central Classes -* [ReversePreindex](src/main/java/nu/marginalia/index/construction/ReversePreindex.java) intermediate reverse index state. -* [ReverseIndexConstructor](src/main/java/nu/marginalia/index/construction/ReverseIndexConstructor.java) constructs the index. -* [ReverseIndexReader](src/main/java/nu/marginalia/index/ReverseIndexReader.java) interrogates the index. +* [ReversePreindex](java/nu/marginalia/index/construction/ReversePreindex.java) intermediate reverse index state. +* [ReverseIndexConstructor](java/nu/marginalia/index/construction/ReverseIndexConstructor.java) constructs the index. +* [ReverseIndexReader](java/nu/marginalia/index/ReverseIndexReader.java) interrogates the index. ## See Also diff --git a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/ReverseIndexDebugTest.java b/code/index/index-reverse/test/nu/marginalia/index/ReverseIndexDebugTest.java similarity index 100% rename from code/features-index/index-reverse/src/test/java/nu/marginalia/index/ReverseIndexDebugTest.java rename to code/index/index-reverse/test/nu/marginalia/index/ReverseIndexDebugTest.java diff --git a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/ReverseIndexReaderTest.java b/code/index/index-reverse/test/nu/marginalia/index/ReverseIndexReaderTest.java similarity index 100% rename from code/features-index/index-reverse/src/test/java/nu/marginalia/index/ReverseIndexReaderTest.java rename to code/index/index-reverse/test/nu/marginalia/index/ReverseIndexReaderTest.java diff --git a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexDocsTest.java b/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexDocsTest.java similarity index 100% rename from code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexDocsTest.java rename to code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexDocsTest.java diff --git a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java b/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java similarity index 99% rename from code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java rename to code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java index 882bc34a..049c9bad 100644 --- a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java +++ b/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java @@ -1,7 +1,6 @@ package nu.marginalia.index.construction; -import nu.marginalia.array.LongArray; import nu.marginalia.array.LongArrayFactory; import nu.marginalia.array.algo.SortingContext; import nu.marginalia.btree.BTreeReader; diff --git a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexMergeTest.java b/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexMergeTest.java similarity index 100% rename from code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexMergeTest.java rename to code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexMergeTest.java diff --git a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexWordSegmentsTest.java b/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexWordSegmentsTest.java similarity index 99% rename from code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexWordSegmentsTest.java rename to code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexWordSegmentsTest.java index 1f5556ac..0ad3205a 100644 --- a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexWordSegmentsTest.java +++ b/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexWordSegmentsTest.java @@ -1,7 +1,6 @@ package nu.marginalia.index.construction; import nu.marginalia.array.LongArray; -import nu.marginalia.array.algo.SortingContext; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; diff --git a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/TestJournalFactory.java b/code/index/index-reverse/test/nu/marginalia/index/construction/TestJournalFactory.java similarity index 100% rename from code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/TestJournalFactory.java rename to code/index/index-reverse/test/nu/marginalia/index/construction/TestJournalFactory.java diff --git a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/TestSegmentData.java b/code/index/index-reverse/test/nu/marginalia/index/construction/TestSegmentData.java similarity index 100% rename from code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/TestSegmentData.java rename to code/index/index-reverse/test/nu/marginalia/index/construction/TestSegmentData.java diff --git a/code/features-index/index-reverse/src/test/java/nu/marginalia/test/TestUtil.java b/code/index/index-reverse/test/nu/marginalia/test/TestUtil.java similarity index 100% rename from code/features-index/index-reverse/src/test/java/nu/marginalia/test/TestUtil.java rename to code/index/index-reverse/test/nu/marginalia/test/TestUtil.java diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexServicesFactory.java b/code/index/java/nu/marginalia/index/IndexFactory.java similarity index 87% rename from code/services-core/index-service/src/main/java/nu/marginalia/index/IndexServicesFactory.java rename to code/index/java/nu/marginalia/index/IndexFactory.java index bcf041ca..48911546 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexServicesFactory.java +++ b/code/index/java/nu/marginalia/index/IndexFactory.java @@ -3,11 +3,10 @@ package nu.marginalia.index; import com.google.inject.Inject; import com.google.inject.Singleton; import nu.marginalia.IndexLocations; +import nu.marginalia.index.index.CombinedIndexReader; import nu.marginalia.storage.FileStorageService; import nu.marginalia.index.forward.ForwardIndexFileNames; import nu.marginalia.index.forward.ForwardIndexReader; -import nu.marginalia.index.index.SearchIndexReader; -import nu.marginalia.service.control.ServiceHeartbeat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -15,24 +14,27 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardCopyOption; -import java.sql.SQLException; @Singleton -public class IndexServicesFactory { - private final Logger logger = LoggerFactory.getLogger(getClass()); +public class IndexFactory { private final FileStorageService fileStorageService; private final Path liveStorage; @Inject - public IndexServicesFactory( - ServiceHeartbeat heartbeat, - FileStorageService fileStorageService - ) throws SQLException { + public IndexFactory(FileStorageService fileStorageService) { this.fileStorageService = fileStorageService; this.liveStorage = IndexLocations.getCurrentIndex(fileStorageService); } + public CombinedIndexReader getCombinedIndexReader() throws IOException { + return new CombinedIndexReader( + getForwardIndexReader(), + getReverseIndexReader(), + getReverseIndexPrioReader() + ); + } + public Path getSearchSetsBase() { return IndexLocations.getSearchSetsPath(fileStorageService); } @@ -59,6 +61,7 @@ public class IndexServicesFactory { ); } + /** Switches the current index to the next index */ public void switchFiles() throws IOException { for (var file : ReverseIndexFullFileNames.FileIdentifier.values()) { @@ -87,11 +90,5 @@ public class IndexServicesFactory { } } - public SearchIndexReader getSearchIndexReader() throws IOException { - return new SearchIndexReader( - getForwardIndexReader(), - getReverseIndexReader(), - getReverseIndexPrioReader() - ); - } + } diff --git a/code/index/java/nu/marginalia/index/IndexGrpcService.java b/code/index/java/nu/marginalia/index/IndexGrpcService.java new file mode 100644 index 00000000..957dec5c --- /dev/null +++ b/code/index/java/nu/marginalia/index/IndexGrpcService.java @@ -0,0 +1,408 @@ +package nu.marginalia.index; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import io.grpc.stub.StreamObserver; +import io.prometheus.client.Counter; +import io.prometheus.client.Gauge; +import io.prometheus.client.Histogram; +import lombok.SneakyThrows; +import nu.marginalia.api.searchquery.*; +import nu.marginalia.api.searchquery.model.query.SearchSpecification; +import nu.marginalia.api.searchquery.model.query.SearchSubquery; +import nu.marginalia.api.searchquery.model.results.*; +import nu.marginalia.index.index.IndexQueryService; +import nu.marginalia.index.index.StatefulIndex; +import nu.marginalia.index.model.SearchParameters; +import nu.marginalia.index.model.SearchTerms; +import nu.marginalia.index.model.SearchTermsUtil; +import nu.marginalia.index.query.IndexQuery; +import nu.marginalia.index.query.IndexSearchBudget; +import nu.marginalia.index.results.IndexResultValuatorService; +import nu.marginalia.index.results.model.ids.CombinedDocIdList; +import nu.marginalia.index.searchset.SearchSetsService; +import nu.marginalia.index.searchset.SmallSearchSet; +import nu.marginalia.index.searchset.SearchSet; +import nu.marginalia.service.module.ServiceConfiguration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.slf4j.Marker; +import org.slf4j.MarkerFactory; + +import java.sql.SQLException; +import java.util.*; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.Executor; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + +@Singleton +public class IndexGrpcService extends IndexApiGrpc.IndexApiImplBase { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + // This marker is used to mark sensitive log messages that are related to queries + // so that they can be filtered out in the production logging configuration + private final Marker queryMarker = MarkerFactory.getMarker("QUERY"); + + private static final Counter wmsa_query_timeouts = Counter.build() + .name("wmsa_index_query_timeouts") + .help("Query timeout counter") + .labelNames("node", "api") + .register(); + private static final Gauge wmsa_query_cost = Gauge.build() + .name("wmsa_index_query_cost") + .help("Computational cost of query") + .labelNames("node", "api") + .register(); + private static final Histogram wmsa_query_time = Histogram.build() + .name("wmsa_index_query_time") + .linearBuckets(0.05, 0.05, 15) + .labelNames("node", "api") + .help("Index-side query time") + .register(); + + private static final Gauge wmsa_index_query_exec_stall_time = Gauge.build() + .name("wmsa_index_query_exec_stall_time") + .help("Execution stall time") + .labelNames("node") + .register(); + + private static final Gauge wmsa_index_query_exec_block_time = Gauge.build() + .name("wmsa_index_query_exec_block_time") + .help("Execution stall time") + .labelNames("node") + .register(); + + private final StatefulIndex index; + private final SearchSetsService searchSetsService; + + private final IndexQueryService indexQueryService; + private final IndexResultValuatorService resultValuator; + + private final String nodeName; + + private static final int indexValuationThreads = Integer.getInteger("index.valuationThreads", 8); + + @Inject + public IndexGrpcService(ServiceConfiguration serviceConfiguration, + StatefulIndex index, + SearchSetsService searchSetsService, + IndexQueryService indexQueryService, + IndexResultValuatorService resultValuator) + { + var nodeId = serviceConfiguration.node(); + this.nodeName = Integer.toString(nodeId); + this.index = index; + this.searchSetsService = searchSetsService; + this.resultValuator = resultValuator; + this.indexQueryService = indexQueryService; + } + + // GRPC endpoint + @SneakyThrows + public void query(RpcIndexQuery request, + StreamObserver responseObserver) { + + try { + var params = new SearchParameters(request, getSearchSet(request)); + + long endTime = System.currentTimeMillis() + request.getQueryLimits().getTimeoutMs(); + + SearchResultSet results = wmsa_query_time + .labels(nodeName, "GRPC") + .time(() -> { + // Perform the search + return executeSearch(params); + }); + + // Prometheus bookkeeping + wmsa_query_cost + .labels(nodeName, "GRPC") + .set(params.getDataCost()); + + if (System.currentTimeMillis() >= endTime) { + wmsa_query_timeouts + .labels(nodeName, "GRPC") + .inc(); + } + + // Send the results back to the client + for (var result : results.results) { + + var rawResult = result.rawIndexResult; + + var rawItem = RpcRawResultItem.newBuilder(); + rawItem.setCombinedId(rawResult.combinedId); + rawItem.setResultsFromDomain(rawResult.resultsFromDomain); + + for (var score : rawResult.keywordScores) { + rawItem.addKeywordScores( + RpcResultKeywordScore.newBuilder() + .setEncodedDocMetadata(score.encodedDocMetadata()) + .setEncodedWordMetadata(score.encodedWordMetadata()) + .setKeyword(score.keyword) + .setHtmlFeatures(score.htmlFeatures()) + .setSubquery(score.subquery) + ); + } + + var decoratedBuilder = RpcDecoratedResultItem.newBuilder() + .setDataHash(result.dataHash) + .setDescription(result.description) + .setFeatures(result.features) + .setFormat(result.format) + .setRankingScore(result.rankingScore) + .setTitle(result.title) + .setUrl(result.url.toString()) + .setWordsTotal(result.wordsTotal) + .setRawItem(rawItem); + + if (result.pubYear != null) { + decoratedBuilder.setPubYear(result.pubYear); + } + responseObserver.onNext(decoratedBuilder.build()); + } + + responseObserver.onCompleted(); + } + catch (Exception ex) { + logger.error("Error in handling request", ex); + responseObserver.onError(ex); + } + } + + // exists for test access + @SneakyThrows + SearchResultSet justQuery(SearchSpecification specsSet) { + return executeSearch(new SearchParameters(specsSet, getSearchSet(specsSet))); + } + + private SearchSet getSearchSet(SearchSpecification specsSet) { + + if (specsSet.domains != null && !specsSet.domains.isEmpty()) { + return new SmallSearchSet(specsSet.domains); + } + + return searchSetsService.getSearchSetByName(specsSet.searchSetIdentifier); + } + + private SearchSet getSearchSet(RpcIndexQuery request) { + + if (request.getDomainsCount() > 0) { + return new SmallSearchSet(request.getDomainsList()); + } + + return searchSetsService.getSearchSetByName(request.getSearchSetIdentifier()); + } + + private SearchResultSet executeSearch(SearchParameters params) throws SQLException, InterruptedException { + + if (!index.isLoaded()) { + // Short-circuit if the index is not loaded, as we trivially know that there can be no results + return new SearchResultSet(List.of()); + } + + ResultRankingContext rankingContext = createRankingContext(params.rankingParams, params.subqueries); + + var queryExecution = new QueryExecution(rankingContext, params.fetchSize); + + var ret = queryExecution.run(params); + + wmsa_index_query_exec_block_time + .labels(nodeName) + .set(queryExecution.getBlockTime() / 1000.); + wmsa_index_query_exec_stall_time + .labels(nodeName) + .set(queryExecution.getStallTime() / 1000.); + + return ret; + } + + /** This class is responsible for executing a search query. It uses a thread pool to + * execute the subqueries in parallel, and then uses another thread pool to rank the + * results in parallel. The results are then combined into a bounded priority queue, + * and finally the best results are returned. + */ + private class QueryExecution { + private static final Executor workerPool = Executors.newWorkStealingPool(indexValuationThreads*4); + + private final ArrayBlockingQueue resultCandidateQueue + = new ArrayBlockingQueue<>(8); + + private final ResultPriorityQueue resultHeap; + private final ResultRankingContext resultRankingContext; + + private final AtomicInteger remainingIndexTasks = new AtomicInteger(0); + private final AtomicInteger remainingValuationTasks = new AtomicInteger(0); + + private final AtomicLong blockTime = new AtomicLong(0); + private final AtomicLong stallTime = new AtomicLong(0); + + public long getStallTime() { + return stallTime.get(); + } + public long getBlockTime() { + return blockTime.get(); + } + + private QueryExecution(ResultRankingContext resultRankingContext, int maxResults) { + this.resultRankingContext = resultRankingContext; + this.resultHeap = new ResultPriorityQueue(maxResults); + } + + /** Execute a search query */ + public SearchResultSet run(SearchParameters parameters) throws SQLException, InterruptedException { + + for (var subquery : parameters.subqueries) { + var terms = new SearchTerms(subquery); + if (terms.isEmpty()) + continue; + + for (var indexQuery : index.createQueries(terms, parameters.queryParams)) { + workerPool.execute(new IndexLookup(indexQuery, parameters.budget)); + } + } + + for (int i = 0; i < indexValuationThreads; i++) { + workerPool.execute(new ResultRanker(parameters, resultRankingContext)); + } + + // Wait for all tasks to complete + awaitCompletion(); + + // Return the best results + return new SearchResultSet( + resultValuator.selectBestResults(parameters, + resultRankingContext, + resultHeap)); + } + + /** Wait for all tasks to complete */ + private void awaitCompletion() throws InterruptedException { + synchronized (remainingValuationTasks) { + while (remainingValuationTasks.get() > 0) { + remainingValuationTasks.wait(20); + } + } + } + + /** This class is responsible for executing a subquery and adding the results to the + * resultCandidateQueue, which depending on the state of the valuator threads may + * or may not block*/ + class IndexLookup implements Runnable { + private final IndexQuery query; + private final IndexSearchBudget budget; + + IndexLookup(IndexQuery query, + IndexSearchBudget budget) { + this.query = query; + this.budget = budget; + + remainingIndexTasks.incrementAndGet(); + } + + public void run() { + try { + indexQueryService.evaluateSubquery( + query, + budget, + this::drain + ); + } + finally { + synchronized (remainingIndexTasks) { + if (remainingIndexTasks.decrementAndGet() == 0) { + remainingIndexTasks.notifyAll(); + } + } + } + } + + private void drain(CombinedDocIdList resultIds) { + long remainingTime = budget.timeLeft(); + + try { + if (!resultCandidateQueue.offer(resultIds)) { + long start = System.currentTimeMillis(); + resultCandidateQueue.offer(resultIds, remainingTime, TimeUnit.MILLISECONDS); + blockTime.addAndGet(System.currentTimeMillis() - start); + } + } + catch (InterruptedException e) { + logger.warn("Interrupted while waiting to offer resultIds to queue", e); + } + } + } + + /** This class is responsible for ranking the results and adding the best results to the + * resultHeap, which depending on the state of the indexLookup threads may or may not block + */ + class ResultRanker implements Runnable { + private final SearchParameters parameters; + private final ResultRankingContext rankingContext; + + ResultRanker(SearchParameters parameters, ResultRankingContext rankingContext) { + this.parameters = parameters; + this.rankingContext = rankingContext; + + remainingValuationTasks.incrementAndGet(); + } + + public void run() { + try { + while (parameters.budget.timeLeft() > 0) { + + long start = System.currentTimeMillis(); + + CombinedDocIdList resultIds = resultCandidateQueue.poll( + Math.clamp(parameters.budget.timeLeft(), 1, 5), + TimeUnit.MILLISECONDS); + + if (resultIds == null) { + if (remainingIndexTasks.get() == 0 + && resultCandidateQueue.isEmpty()) + break; + else + continue; + } + + stallTime.addAndGet(System.currentTimeMillis() - start); + + var bestResults = resultValuator.rankResults(parameters, rankingContext, resultIds); + + resultHeap.addAll(bestResults); + } + } + catch (Exception e) { + logger.warn("Interrupted while waiting to poll resultIds from queue", e); + } + finally { + synchronized (remainingValuationTasks) { + if (remainingValuationTasks.decrementAndGet() == 0) + remainingValuationTasks.notifyAll(); + } + } + } + } + + } + + private ResultRankingContext createRankingContext(ResultRankingParameters rankingParams, List subqueries) { + final var termToId = SearchTermsUtil.getAllIncludeTerms(subqueries); + final Map termFrequencies = new HashMap<>(termToId.size()); + final Map prioFrequencies = new HashMap<>(termToId.size()); + + termToId.forEach((key, id) -> termFrequencies.put(key, index.getTermFrequency(id))); + termToId.forEach((key, id) -> prioFrequencies.put(key, index.getTermFrequencyPrio(id))); + + return new ResultRankingContext(index.getTotalDocCount(), + rankingParams, + termFrequencies, + prioFrequencies); + } + +} + diff --git a/code/index/java/nu/marginalia/index/ResultPriorityQueue.java b/code/index/java/nu/marginalia/index/ResultPriorityQueue.java new file mode 100644 index 00000000..d84c43d0 --- /dev/null +++ b/code/index/java/nu/marginalia/index/ResultPriorityQueue.java @@ -0,0 +1,107 @@ +package nu.marginalia.index; + +import it.unimi.dsi.fastutil.longs.LongOpenHashSet; +import nu.marginalia.api.searchquery.model.results.SearchResultItem; +import org.jetbrains.annotations.NotNull; + +import java.util.*; + +public class ResultPriorityQueue implements Iterable, + Collection { + private final int limit; + private final ArrayList backingList = new ArrayList<>(); + private final LongOpenHashSet idsInSet = new LongOpenHashSet(); + + public ResultPriorityQueue(int limit) { + this.limit = limit; + } + + public Iterator iterator() { + return backingList.iterator(); + } + + @NotNull + @Override + public Object[] toArray() { + return backingList.toArray(); + } + + @NotNull + @Override + public T[] toArray(@NotNull T[] a) { + return backingList.toArray(a); + } + + @Override + public boolean add(SearchResultItem searchResultItem) { + throw new UnsupportedOperationException("Use addAll instead ya dingus"); + } + + @Override + public boolean remove(Object o) { + if (o instanceof SearchResultItem sri) { + idsInSet.remove(sri.getDocumentId()); + return idsInSet.remove(sri.getDocumentId()); + } + throw new IllegalArgumentException("Object is not a SearchResultItem"); + } + + @Override + public boolean containsAll(@NotNull Collection c) { + return idsInSet.containsAll(c); + } + + /** Adds all items to the queue, and returns true if any items were added. + * This is a thread-safe operation. + */ + @Override + public synchronized boolean addAll(@NotNull Collection items) { + boolean itemsAdded = false; + for (var item: items) { + if (idsInSet.add(item.getDocumentId())) { + backingList.add(item); + itemsAdded = true; + } + } + if (!itemsAdded) { + return false; + } + + backingList.sort(Comparator.naturalOrder()); + if (backingList.size() > limit) { + backingList.subList(limit, backingList.size()).clear(); + } + + return true; + } + + @Override + public boolean removeAll(@NotNull Collection c) { + return backingList.removeAll(c); + } + + @Override + public boolean retainAll(@NotNull Collection c) { + return backingList.retainAll(c); + } + + @Override + public void clear() { + + } + + public int size() { + return backingList.size(); + } + + @Override + public boolean isEmpty() { + return backingList.isEmpty(); + } + + @Override + public boolean contains(Object o) { + return backingList.contains(o); + } + +} diff --git a/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java b/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java new file mode 100644 index 00000000..01a94362 --- /dev/null +++ b/code/index/java/nu/marginalia/index/index/CombinedIndexReader.java @@ -0,0 +1,227 @@ +package nu.marginalia.index.index; + +import nu.marginalia.index.ReverseIndexReader; +import nu.marginalia.index.forward.ForwardIndexReader; +import nu.marginalia.index.model.QueryParams; +import nu.marginalia.index.query.IndexQuery; +import nu.marginalia.index.query.IndexQueryBuilder; +import nu.marginalia.index.query.IndexQueryPriority; +import nu.marginalia.index.query.filter.QueryFilterStepIf; +import nu.marginalia.index.query.limit.SpecificationLimitType; +import nu.marginalia.index.results.model.ids.CombinedDocIdList; +import nu.marginalia.index.results.model.ids.DocMetadataList; +import nu.marginalia.model.id.UrlIdCodec; +import nu.marginalia.model.idx.DocumentMetadata; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.time.Duration; +import java.util.List; +import java.util.concurrent.TimeUnit; + +/** A reader for the combined forward and reverse indexes */ +public class CombinedIndexReader { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private final ForwardIndexReader forwardIndexReader; + private final ReverseIndexReader reverseIndexFullReader; + private final ReverseIndexReader reverseIndexPriorityReader; + + public CombinedIndexReader(ForwardIndexReader forwardIndexReader, + ReverseIndexReader reverseIndexFullReader, + ReverseIndexReader reverseIndexPriorityReader) { + this.forwardIndexReader = forwardIndexReader; + this.reverseIndexFullReader = reverseIndexFullReader; + this.reverseIndexPriorityReader = reverseIndexPriorityReader; + } + + public IndexQueryBuilderImpl newQueryBuilder(IndexQuery query) { + return new IndexQueryBuilderImpl(reverseIndexFullReader, reverseIndexPriorityReader, query); + } + + + /** Creates a query builder for terms in the priority index */ + public IndexQueryBuilder findPriorityWord(IndexQueryPriority priority, + long wordId, + int fetchSizeMultiplier) { + return newQueryBuilder(new IndexQuery( + List.of(reverseIndexPriorityReader.documents(wordId)), + priority, + fetchSizeMultiplier)) + .withSourceTerms(wordId); + } + + /** Creates a query builder for terms in the full index */ + public IndexQueryBuilder findFullWord(IndexQueryPriority priority, long wordId, int fetchSizeMultiplier) { + return newQueryBuilder( + new IndexQuery(List.of(reverseIndexFullReader.documents(wordId)), + priority, + fetchSizeMultiplier)) + .withSourceTerms(wordId); + } + + /** Creates a parameter matching filter step for the provided parameters */ + public QueryFilterStepIf filterForParams(QueryParams params) { + return new ParamMatchingQueryFilter(params, forwardIndexReader); + } + + /** Returns the number of occurrences of the word in the full index */ + public long numHits(long word) { + return reverseIndexFullReader.numDocuments(word); + } + + /** Returns the number of occurrences of the word in the priority index */ + public long numHitsPrio(long word) { + return reverseIndexPriorityReader.numDocuments(word); + } + + /** Retrieves the term metadata for the specified word for the provided documents */ + public DocMetadataList getMetadata(long wordId, CombinedDocIdList docIds) { + return new DocMetadataList(reverseIndexFullReader.getTermMeta(wordId, docIds.array())); + } + + /** Retrieves the document metadata for the specified document */ + public long getDocumentMetadata(long docId) { + return forwardIndexReader.getDocMeta(docId); + } + + /** Returns the total number of documents in the index */ + public int totalDocCount() { + return forwardIndexReader.totalDocCount(); + } + + /** Retrieves the HTML features for the specified document */ + public int getHtmlFeatures(long docId) { + return forwardIndexReader.getHtmlFeatures(docId); + } + + /** Close the indexes (this is not done immediately) + * */ + public void close() throws InterruptedException { + /* Delay the invocation of close method to allow for a clean shutdown of the service. + * + * This is especially important when using Unsafe-based LongArrays, since we have + * concurrent access to the underlying memory-mapped file. If pull the rug from + * under the caller by closing the file, we'll get a SIGSEGV. Even with MemorySegment, + * we'll get ugly stacktraces if we close the file while a thread is still accessing it. + */ + + delayedCall(forwardIndexReader::close, Duration.ofMinutes(1)); + delayedCall(reverseIndexFullReader::close, Duration.ofMinutes(1)); + delayedCall(reverseIndexPriorityReader::close, Duration.ofMinutes(1)); + } + + + private void delayedCall(Runnable call, Duration delay) throws InterruptedException { + Thread.ofPlatform().start(() -> { + try { + TimeUnit.SECONDS.sleep(delay.toSeconds()); + call.run(); + } catch (InterruptedException e) { + logger.error("Interrupted", e); + } + }); + } + + /** Returns true if index data is available */ + public boolean isLoaded() { + // We only need to check one of the readers, as they are either all loaded or none are + return forwardIndexReader.isLoaded(); + } +} + +class ParamMatchingQueryFilter implements QueryFilterStepIf { + private final QueryParams params; + private final ForwardIndexReader forwardIndexReader; + + public ParamMatchingQueryFilter(QueryParams params, + ForwardIndexReader forwardIndexReader) + { + this.params = params; + this.forwardIndexReader = forwardIndexReader; + } + + @Override + public boolean test(long combinedId) { + long docId = UrlIdCodec.removeRank(combinedId); + int domainId = UrlIdCodec.getDomainId(docId); + + long meta = forwardIndexReader.getDocMeta(docId); + + if (!validateDomain(domainId, meta)) { + return false; + } + + if (!validateQuality(meta)) { + return false; + } + + if (!validateYear(meta)) { + return false; + } + + if (!validateSize(meta)) { + return false; + } + + if (!validateRank(meta)) { + return false; + } + + return true; + } + + private boolean validateDomain(int domainId, long meta) { + return params.searchSet().contains(domainId, meta); + } + + private boolean validateQuality(long meta) { + final var limit = params.qualityLimit(); + + if (limit.type() == SpecificationLimitType.NONE) { + return true; + } + + final int quality = DocumentMetadata.decodeQuality(meta); + + return limit.test(quality); + } + + private boolean validateYear(long meta) { + if (params.year().type() == SpecificationLimitType.NONE) + return true; + + int postVal = DocumentMetadata.decodeYear(meta); + + return params.year().test(postVal); + } + + private boolean validateSize(long meta) { + if (params.size().type() == SpecificationLimitType.NONE) + return true; + + int postVal = DocumentMetadata.decodeSize(meta); + + return params.size().test(postVal); + } + + private boolean validateRank(long meta) { + if (params.rank().type() == SpecificationLimitType.NONE) + return true; + + int postVal = DocumentMetadata.decodeRank(meta); + + return params.rank().test(postVal); + } + + @Override + public double cost() { + return 32; + } + + @Override + public String describe() { + return getClass().getSimpleName(); + } +} \ No newline at end of file diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexQueryBuilder.java b/code/index/java/nu/marginalia/index/index/IndexQueryBuilderImpl.java similarity index 84% rename from code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexQueryBuilder.java rename to code/index/java/nu/marginalia/index/index/IndexQueryBuilderImpl.java index 1d1396f9..825728ae 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexQueryBuilder.java +++ b/code/index/java/nu/marginalia/index/index/IndexQueryBuilderImpl.java @@ -6,7 +6,7 @@ import nu.marginalia.index.query.IndexQuery; import nu.marginalia.index.query.IndexQueryBuilder; import nu.marginalia.index.query.filter.QueryFilterStepIf; -public class SearchIndexQueryBuilder implements IndexQueryBuilder { +public class IndexQueryBuilderImpl implements IndexQueryBuilder { private final IndexQuery query; private final ReverseIndexReader reverseIndexFullReader; private final ReverseIndexReader reverseIndexPrioReader; @@ -19,15 +19,19 @@ public class SearchIndexQueryBuilder implements IndexQueryBuilder { * */ private final TLongHashSet alreadyConsideredTerms = new TLongHashSet(); - SearchIndexQueryBuilder(ReverseIndexReader reverseIndexFullReader, - ReverseIndexReader reverseIndexPrioReader, - IndexQuery query, long... sourceTerms) + IndexQueryBuilderImpl(ReverseIndexReader reverseIndexFullReader, + ReverseIndexReader reverseIndexPrioReader, + IndexQuery query) { this.query = query; this.reverseIndexFullReader = reverseIndexFullReader; this.reverseIndexPrioReader = reverseIndexPrioReader; + } + public IndexQueryBuilder withSourceTerms(long... sourceTerms) { alreadyConsideredTerms.addAll(sourceTerms); + + return this; } public IndexQueryBuilder alsoFull(long termId) { @@ -66,4 +70,5 @@ public class SearchIndexQueryBuilder implements IndexQueryBuilder { return query; } + } diff --git a/code/index/java/nu/marginalia/index/index/IndexQueryService.java b/code/index/java/nu/marginalia/index/index/IndexQueryService.java new file mode 100644 index 00000000..72a13910 --- /dev/null +++ b/code/index/java/nu/marginalia/index/index/IndexQueryService.java @@ -0,0 +1,64 @@ +package nu.marginalia.index.index; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import it.unimi.dsi.fastutil.longs.LongArrayList; +import nu.marginalia.api.searchquery.model.query.SearchSubquery; +import nu.marginalia.array.buffer.LongQueryBuffer; +import nu.marginalia.index.model.QueryParams; +import nu.marginalia.index.model.SearchTerms; +import nu.marginalia.index.query.IndexQuery; +import nu.marginalia.index.query.IndexSearchBudget; +import nu.marginalia.index.results.model.ids.CombinedDocIdList; +import org.roaringbitmap.longlong.Roaring64Bitmap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.slf4j.Marker; +import org.slf4j.MarkerFactory; + +import java.util.function.Consumer; + +@Singleton +public class IndexQueryService { + private final Marker queryMarker = MarkerFactory.getMarker("QUERY"); + + private static final Logger logger = LoggerFactory.getLogger(IndexQueryService.class); + private final StatefulIndex index; + + @Inject + public IndexQueryService(StatefulIndex index) { + this.index = index; + } + + /** Execute subqueries and return a list of document ids. The index is queried for each subquery, + * at different priorty depths until timeout is reached or the results are all visited. + * Then the results are combined. + * */ + public void evaluateSubquery(IndexQuery query, + IndexSearchBudget timeout, + Consumer drain) + { + final LongArrayList results = new LongArrayList(512); + + // These queries are different indices for one subquery + final LongQueryBuffer buffer = new LongQueryBuffer(512); + + while (query.hasMore() && timeout.hasTimeLeft()) + { + buffer.reset(); + query.getMoreResults(buffer); + + results.addElements(0, buffer.data, 0, buffer.end); + + if (results.size() < 512) { + drain.accept(new CombinedDocIdList(results)); + results.clear(); + } + } + + if (!results.isEmpty()) { + drain.accept(new CombinedDocIdList(results)); + } + } + +} diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndex.java b/code/index/java/nu/marginalia/index/index/StatefulIndex.java similarity index 71% rename from code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndex.java rename to code/index/java/nu/marginalia/index/index/StatefulIndex.java index b21a3e72..ae2a8f6e 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndex.java +++ b/code/index/java/nu/marginalia/index/index/StatefulIndex.java @@ -2,9 +2,14 @@ package nu.marginalia.index.index; import com.google.inject.Inject; import com.google.inject.Singleton; -import nu.marginalia.index.IndexServicesFactory; +import nu.marginalia.index.results.model.ids.CombinedDocIdList; +import nu.marginalia.index.results.model.ids.DocMetadataList; +import nu.marginalia.index.model.QueryParams; +import nu.marginalia.index.IndexFactory; +import nu.marginalia.index.model.SearchTerms; import nu.marginalia.index.query.*; import nu.marginalia.index.query.filter.QueryFilterStepFromPredicate; +import nu.marginalia.index.results.model.ids.TermIdList; import nu.marginalia.service.control.ServiceEventLog; import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; @@ -24,22 +29,20 @@ import java.util.function.LongPredicate; * */ @Singleton -public class SearchIndex { +public class StatefulIndex { private final Logger logger = LoggerFactory.getLogger(getClass()); - private volatile SearchIndexReader indexReader; - private final ReadWriteLock indexReplacementLock = new ReentrantReadWriteLock(); - @NotNull - private final IndexServicesFactory servicesFactory; - + private final IndexFactory servicesFactory; private final ServiceEventLog eventLog; + private volatile CombinedIndexReader combinedIndexReader; + @Inject - public SearchIndex(@NotNull IndexServicesFactory servicesFactory, - ServiceEventLog eventLog) { + public StatefulIndex(@NotNull IndexFactory servicesFactory, + ServiceEventLog eventLog) { this.servicesFactory = servicesFactory; this.eventLog = eventLog; } @@ -51,8 +54,8 @@ public class SearchIndex { lock.lock(); logger.info("Initializing index"); - if (indexReader == null) { - indexReader = servicesFactory.getSearchIndexReader(); + if (combinedIndexReader == null) { + combinedIndexReader = servicesFactory.getCombinedIndexReader(); eventLog.logEvent("INDEX-INIT", "Index loaded"); } else { @@ -73,12 +76,12 @@ public class SearchIndex { try { lock.lock(); - if (indexReader != null) - indexReader.close(); + if (combinedIndexReader != null) + combinedIndexReader.close(); servicesFactory.switchFiles(); - indexReader = servicesFactory.getSearchIndexReader(); + combinedIndexReader = servicesFactory.getCombinedIndexReader(); eventLog.logEvent("INDEX-SWITCH-OK", ""); } @@ -97,16 +100,16 @@ public class SearchIndex { /** Returns true if the service has initialized */ public boolean isAvailable() { - return indexReader != null; + return combinedIndexReader != null; } /** Stronger version of isAvailable() that also checks that the index is loaded */ public boolean isLoaded() { - return indexReader != null && indexReader.isLoaded(); + return combinedIndexReader != null && combinedIndexReader.isLoaded(); } - public List createQueries(SearchIndexSearchTerms terms, IndexQueryParams params, LongPredicate includePred) { + public List createQueries(SearchTerms terms, QueryParams params) { if (!isLoaded()) { logger.warn("Index reader not ready"); @@ -130,7 +133,7 @@ public class SearchIndex { if (orderedIncludesPrio.length > 1) { for (int i = 0; i + 1 < orderedIncludesPrio.length; i++) { for (int j = i + 1; j < orderedIncludesPrio.length; j++) { - var entrySource = indexReader + var entrySource = combinedIndexReader .findPriorityWord(IndexQueryPriority.BEST, orderedIncludesPrio[i], fetchSizeMultiplier) .alsoPrio(orderedIncludesPrio[j]); queryHeads.add(entrySource); @@ -140,12 +143,12 @@ public class SearchIndex { // Next consider entries that appear only once in the priority index for (var wordId : orderedIncludesPrio) { - queryHeads.add(indexReader.findPriorityWord(IndexQueryPriority.GOOD, wordId, fetchSizeMultiplier)); + queryHeads.add(combinedIndexReader.findPriorityWord(IndexQueryPriority.GOOD, wordId, fetchSizeMultiplier)); } // Finally consider terms in the full index, but only do this for sufficiently long queries // as short queries tend to be too underspecified to produce anything other than CPU warmth - queryHeads.add(indexReader.findFullWord(IndexQueryPriority.FALLBACK, orderedIncludes[0], fetchSizeMultiplier)); + queryHeads.add(combinedIndexReader.findFullWord(IndexQueryPriority.FALLBACK, orderedIncludes[0], fetchSizeMultiplier)); for (var query : queryHeads) { if (query == null) { @@ -160,13 +163,9 @@ public class SearchIndex { query = query.notFull(term); } - // This filtering step needs to happen only on terms that have passed all term-based filtering steps, - // it's essentially a memoization of the params filtering job which is relatively expensive - query = query.addInclusionFilter(new QueryFilterStepFromPredicate(includePred)); - // Run these last, as they'll worst-case cause as many page faults as there are // items in the buffer - queries.add(query.addInclusionFilter(indexReader.filterForParams(params)).build()); + queries.add(query.addInclusionFilter(combinedIndexReader.filterForParams(params)).build()); } return queries; @@ -174,15 +173,15 @@ public class SearchIndex { private int compareKeywords(long a, long b) { return Long.compare( - indexReader.numHits(a), - indexReader.numHits(b) + combinedIndexReader.numHits(a), + combinedIndexReader.numHits(b) ); } private int compareKeywordsPrio(long a, long b) { return Long.compare( - indexReader.numHitsPrio(a), - indexReader.numHitsPrio(b) + combinedIndexReader.numHitsPrio(a), + combinedIndexReader.numHitsPrio(b) ); } @@ -190,25 +189,25 @@ public class SearchIndex { * document identifiers provided; with metadata for termId. The input array * docs[] *must* be sorted. */ - public long[] getTermMetadata(long termId, long[] docs) { - return indexReader.getMetadata(termId, docs); + public DocMetadataList getTermMetadata(long termId, CombinedDocIdList docs) { + return combinedIndexReader.getMetadata(termId, docs); } public long getDocumentMetadata(long docId) { - return indexReader.getDocumentMetadata(docId); + return combinedIndexReader.getDocumentMetadata(docId); } public int getHtmlFeatures(long docId) { - return indexReader.getHtmlFeatures(docId); + return combinedIndexReader.getHtmlFeatures(docId); } public int getTotalDocCount() { - return indexReader.totalDocCount(); + return combinedIndexReader.totalDocCount(); } public int getTermFrequency(long id) { - return (int) indexReader.numHits(id); + return (int) combinedIndexReader.numHits(id); } public int getTermFrequencyPrio(long id) { - return (int) indexReader.numHitsPrio(id); + return (int) combinedIndexReader.numHitsPrio(id); } } diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQueryParams.java b/code/index/java/nu/marginalia/index/model/QueryParams.java similarity index 57% rename from code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQueryParams.java rename to code/index/java/nu/marginalia/index/model/QueryParams.java index 46224638..56e40551 100644 --- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQueryParams.java +++ b/code/index/java/nu/marginalia/index/model/QueryParams.java @@ -1,7 +1,7 @@ -package nu.marginalia.index.query; +package nu.marginalia.index.model; -import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.searchset.SearchSet; +import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.query.limit.SpecificationLimit; /** IndexQueryParams is a set of parameters for a query. @@ -10,19 +10,16 @@ import nu.marginalia.index.query.limit.SpecificationLimit; * @param year The year limit. * @param size The size limit. Eliminates results from domains that do not satisfy the size criteria. * @param rank The rank limit. Eliminates results from domains that do not satisfy the domain rank criteria. - * @param domainCount The domain count limit. Filters out results from domains that do not contain enough - * documents that match the query. * @param searchSet The search set. Limits the search to a set of domains. * @param queryStrategy The query strategy. May impose additional constraints on the query, such as requiring * the keywords to appear in the title, or in the domain. */ -public record IndexQueryParams(SpecificationLimit qualityLimit, - SpecificationLimit year, - SpecificationLimit size, - SpecificationLimit rank, - SpecificationLimit domainCount, - SearchSet searchSet, - QueryStrategy queryStrategy +public record QueryParams(SpecificationLimit qualityLimit, + SpecificationLimit year, + SpecificationLimit size, + SpecificationLimit rank, + SearchSet searchSet, + QueryStrategy queryStrategy ) { diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/SearchParameters.java b/code/index/java/nu/marginalia/index/model/SearchParameters.java similarity index 52% rename from code/services-core/index-service/src/main/java/nu/marginalia/index/svc/SearchParameters.java rename to code/index/java/nu/marginalia/index/model/SearchParameters.java index 52195a6b..0594bd68 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/SearchParameters.java +++ b/code/index/java/nu/marginalia/index/model/SearchParameters.java @@ -1,53 +1,42 @@ -package nu.marginalia.index.svc; +package nu.marginalia.index.model; import gnu.trove.set.hash.TLongHashSet; -import nu.marginalia.index.api.RpcIndexQuery; -import nu.marginalia.index.api.RpcSpecLimit; -import nu.marginalia.index.client.IndexProtobufCodec; -import nu.marginalia.index.client.model.query.SearchSpecification; -import nu.marginalia.index.client.model.query.SearchSubquery; -import nu.marginalia.index.client.model.results.Bm25Parameters; -import nu.marginalia.index.client.model.results.ResultRankingParameters; -import nu.marginalia.index.index.SearchIndex; -import nu.marginalia.index.index.SearchIndexSearchTerms; +import nu.marginalia.api.searchquery.RpcIndexQuery; +import nu.marginalia.api.searchquery.model.query.SearchSpecification; +import nu.marginalia.api.searchquery.model.query.SearchSubquery; +import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; +import nu.marginalia.api.searchquery.IndexProtobufCodec; +import nu.marginalia.index.index.StatefulIndex; import nu.marginalia.index.query.IndexQuery; -import nu.marginalia.index.query.IndexQueryParams; import nu.marginalia.index.query.IndexSearchBudget; -import nu.marginalia.index.query.limit.QueryLimits; import nu.marginalia.index.query.limit.QueryStrategy; -import nu.marginalia.index.query.limit.SpecificationLimit; -import nu.marginalia.index.query.limit.SpecificationLimitType; import nu.marginalia.index.searchset.SearchSet; import java.util.ArrayList; import java.util.List; +import static nu.marginalia.api.searchquery.IndexProtobufCodec.convertSpecLimit; + public class SearchParameters { /** * This is how many results matching the keywords we'll try to get * before evaluating them for the best result. */ - final int fetchSize; - final IndexSearchBudget budget; - final List subqueries; - final IndexQueryParams queryParams; - final ResultRankingParameters rankingParams; + public final int fetchSize; + public final IndexSearchBudget budget; + public final List subqueries; + public final QueryParams queryParams; + public final ResultRankingParameters rankingParams; - final int limitByDomain; - final int limitTotal; + public final int limitByDomain; + public final int limitTotal; // mutable: /** * An estimate of how much data has been read */ - long dataCost = 0; - - /** - * A set of id:s considered during each subquery, - * for deduplication - */ - final TLongHashSet consideredUrlIds; + public long dataCost = 0; public SearchParameters(SearchSpecification specsSet, SearchSet searchSet) { var limits = specsSet.queryLimits; @@ -58,14 +47,11 @@ public class SearchParameters { this.limitByDomain = limits.resultsByDomain(); this.limitTotal = limits.resultsTotal(); - this.consideredUrlIds = CachedObjects.getConsideredUrlsMap(); - - queryParams = new IndexQueryParams( + queryParams = new QueryParams( specsSet.quality, specsSet.year, specsSet.size, specsSet.rank, - specsSet.domainCount, searchSet, specsSet.queryStrategy); @@ -76,7 +62,11 @@ public class SearchParameters { var limits = IndexProtobufCodec.convertQueryLimits(request.getQueryLimits()); this.fetchSize = limits.fetchSize(); - this.budget = new IndexSearchBudget(limits.timeoutMs()); + + // The time budget is halved because this is the point when we start to + // wrap up the search and return the results. + this.budget = new IndexSearchBudget(limits.timeoutMs() / 2); + this.subqueries = new ArrayList<>(request.getSubqueriesCount()); for (int i = 0; i < request.getSubqueriesCount(); i++) { this.subqueries.add(IndexProtobufCodec.convertSearchSubquery(request.getSubqueries(i))); @@ -84,29 +74,22 @@ public class SearchParameters { this.limitByDomain = limits.resultsByDomain(); this.limitTotal = limits.resultsTotal(); - this.consideredUrlIds = CachedObjects.getConsideredUrlsMap(); - - queryParams = new IndexQueryParams( - IndexProtobufCodec.convertSpecLimit(request.getQuality()), - IndexProtobufCodec.convertSpecLimit(request.getYear()), - IndexProtobufCodec.convertSpecLimit(request.getSize()), - IndexProtobufCodec.convertSpecLimit(request.getRank()), - IndexProtobufCodec.convertSpecLimit(request.getDomainCount()), + queryParams = new QueryParams( + convertSpecLimit(request.getQuality()), + convertSpecLimit(request.getYear()), + convertSpecLimit(request.getSize()), + convertSpecLimit(request.getRank()), searchSet, QueryStrategy.valueOf(request.getQueryStrategy())); rankingParams = IndexProtobufCodec.convertRankingParameterss(request.getParameters()); } - List createIndexQueries(SearchIndex index, SearchIndexSearchTerms terms) { - return index.createQueries(terms, queryParams, consideredUrlIds::add); - } - - boolean hasTimeLeft() { + public boolean hasTimeLeft() { return budget.hasTimeLeft(); } - long getDataCost() { + public long getDataCost() { return dataCost; } diff --git a/code/index/java/nu/marginalia/index/model/SearchTerms.java b/code/index/java/nu/marginalia/index/model/SearchTerms.java new file mode 100644 index 00000000..dc62ae11 --- /dev/null +++ b/code/index/java/nu/marginalia/index/model/SearchTerms.java @@ -0,0 +1,70 @@ +package nu.marginalia.index.model; + +import it.unimi.dsi.fastutil.longs.LongArrayList; +import it.unimi.dsi.fastutil.longs.LongComparator; +import it.unimi.dsi.fastutil.longs.LongList; +import it.unimi.dsi.fastutil.longs.LongOpenHashSet; +import nu.marginalia.api.searchquery.model.query.SearchSubquery; + +import java.util.ArrayList; +import java.util.List; + +import static nu.marginalia.index.model.SearchTermsUtil.getWordId; + +public record SearchTerms( + LongList includes, + LongList excludes, + LongList priority, + List coherences + ) +{ + public SearchTerms(SearchSubquery subquery) { + this(new LongArrayList(), + new LongArrayList(), + new LongArrayList(), + new ArrayList<>()); + + for (var word : subquery.searchTermsInclude) { + includes.add(getWordId(word)); + } + for (var word : subquery.searchTermsAdvice) { + // This looks like a bug, but it's not + includes.add(getWordId(word)); + } + + + for (var coherence : subquery.searchTermCoherences) { + LongList parts = new LongArrayList(coherence.size()); + + for (var word : coherence) { + parts.add(getWordId(word)); + } + + coherences.add(parts); + } + + for (var word : subquery.searchTermsExclude) { + excludes.add(getWordId(word)); + } + for (var word : subquery.searchTermsPriority) { + priority.add(getWordId(word)); + } + } + + public boolean isEmpty() { + return includes.isEmpty(); + } + + public long[] sortedDistinctIncludes(LongComparator comparator) { + if (includes.isEmpty()) + return includes.toLongArray(); + + LongList list = new LongArrayList(new LongOpenHashSet(includes)); + list.sort(comparator); + return list.toLongArray(); + } + + public int size() { + return includes.size() + excludes.size() + priority.size(); + } +} diff --git a/code/index/java/nu/marginalia/index/model/SearchTermsUtil.java b/code/index/java/nu/marginalia/index/model/SearchTermsUtil.java new file mode 100644 index 00000000..9797ca95 --- /dev/null +++ b/code/index/java/nu/marginalia/index/model/SearchTermsUtil.java @@ -0,0 +1,33 @@ +package nu.marginalia.index.model; + +import nu.marginalia.api.searchquery.model.query.SearchSubquery; +import nu.marginalia.hash.MurmurHash3_128; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class SearchTermsUtil { + + /** Extract all include-terms from the specified subqueries, + * and a return a map of the terms and their termIds. + */ + public static Map getAllIncludeTerms(List subqueries) { + Map ret = new HashMap<>(); + + for (var subquery : subqueries) { + for (var include : subquery.searchTermsInclude) { + ret.computeIfAbsent(include, i -> getWordId(include)); + } + } + + return ret; + } + + private static final MurmurHash3_128 hasher = new MurmurHash3_128(); + + /** Translate the word to a unique id. */ + public static long getWordId(String s) { + return hasher.hashKeyword(s); + } +} diff --git a/code/index/java/nu/marginalia/index/results/IndexMetadataService.java b/code/index/java/nu/marginalia/index/results/IndexMetadataService.java new file mode 100644 index 00000000..d5356f53 --- /dev/null +++ b/code/index/java/nu/marginalia/index/results/IndexMetadataService.java @@ -0,0 +1,85 @@ +package nu.marginalia.index.results; + +import com.google.inject.Inject; +import gnu.trove.map.hash.TObjectLongHashMap; +import it.unimi.dsi.fastutil.longs.Long2ObjectArrayMap; +import it.unimi.dsi.fastutil.longs.LongArrayList; +import nu.marginalia.api.searchquery.model.query.SearchSubquery; +import nu.marginalia.index.index.StatefulIndex; +import nu.marginalia.index.model.SearchTermsUtil; +import nu.marginalia.index.results.model.QuerySearchTerms; +import nu.marginalia.index.results.model.TermCoherenceGroupList; +import nu.marginalia.index.results.model.TermMetadataForCombinedDocumentIds; +import nu.marginalia.index.results.model.ids.CombinedDocIdList; +import nu.marginalia.index.results.model.ids.TermIdList; + +import java.util.ArrayList; +import java.util.List; + +import static nu.marginalia.index.results.model.TermCoherenceGroupList.TermCoherenceGroup; +import static nu.marginalia.index.results.model.TermMetadataForCombinedDocumentIds.DocumentsWithMetadata; + +public class IndexMetadataService { + private final StatefulIndex index; + + + @Inject + public IndexMetadataService(StatefulIndex index) { + this.index = index; + } + + public TermMetadataForCombinedDocumentIds getTermMetadataForDocuments(CombinedDocIdList combinedIdsAll, + TermIdList termIdsList) + { + Long2ObjectArrayMap termdocToMeta = + new Long2ObjectArrayMap<>(termIdsList.size()); + + for (long termId : termIdsList.array()) { + var metadata = index.getTermMetadata(termId, combinedIdsAll); + termdocToMeta.put(termId, + new DocumentsWithMetadata(combinedIdsAll, metadata)); + } + + return new TermMetadataForCombinedDocumentIds(termdocToMeta); + } + + public QuerySearchTerms getSearchTerms(List searchTermVariants) { + + LongArrayList termIdsList = new LongArrayList(); + + TObjectLongHashMap termToId = new TObjectLongHashMap<>(10, 0.75f, -1); + + for (var subquery : searchTermVariants) { + for (var term : subquery.searchTermsInclude) { + if (termToId.containsKey(term)) { + continue; + } + + long id = SearchTermsUtil.getWordId(term); + termIdsList.add(id); + termToId.put(term, id); + } + } + + return new QuerySearchTerms(termToId, + new TermIdList(termIdsList), + getTermCoherences(searchTermVariants)); + } + + + private TermCoherenceGroupList getTermCoherences(List searchTermVariants) { + List coherences = new ArrayList<>(); + + for (var subquery : searchTermVariants) { + for (var coh : subquery.searchTermCoherences) { + coherences.add(new TermCoherenceGroup(coh)); + } + + // It's assumed each subquery has identical coherences + break; + } + + return new TermCoherenceGroupList(coherences); + } + +} diff --git a/code/index/java/nu/marginalia/index/results/IndexResultDomainDeduplicator.java b/code/index/java/nu/marginalia/index/results/IndexResultDomainDeduplicator.java new file mode 100644 index 00000000..d87b39fe --- /dev/null +++ b/code/index/java/nu/marginalia/index/results/IndexResultDomainDeduplicator.java @@ -0,0 +1,27 @@ +package nu.marginalia.index.results; + +import gnu.trove.map.TLongIntMap; +import gnu.trove.map.hash.TLongIntHashMap; +import nu.marginalia.api.searchquery.model.results.SearchResultItem; + +public class IndexResultDomainDeduplicator { + final TLongIntMap resultsByDomainId = new TLongIntHashMap(2048, 0.5f, -1, 0); + final int limitByDomain; + + public IndexResultDomainDeduplicator(int limitByDomain) { + this.limitByDomain = limitByDomain; + } + + public boolean test(SearchResultItem item) { + final long key = item.getDomainId(); + + return resultsByDomainId.adjustOrPutValue(key, 1, 1) <= limitByDomain; + } + + public int getCount(SearchResultItem item) { + final long key = item.getDomainId(); + + return resultsByDomainId.get(key); + } +} + diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultValuator.java b/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java similarity index 68% rename from code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultValuator.java rename to code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java index 1e51fbd6..967a600f 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultValuator.java +++ b/code/index/java/nu/marginalia/index/results/IndexResultValuationContext.java @@ -1,55 +1,54 @@ package nu.marginalia.index.results; -import gnu.trove.list.TLongList; -import gnu.trove.set.hash.TLongHashSet; -import nu.marginalia.index.client.model.results.SearchResultPreliminaryScore; -import nu.marginalia.index.client.model.results.ResultRankingContext; +import nu.marginalia.api.searchquery.model.query.SearchSubquery; +import nu.marginalia.api.searchquery.model.results.ResultRankingContext; +import nu.marginalia.api.searchquery.model.results.SearchResultItem; +import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore; +import nu.marginalia.index.index.StatefulIndex; +import nu.marginalia.index.results.model.ids.CombinedDocIdList; +import nu.marginalia.index.model.QueryParams; +import nu.marginalia.index.results.model.QuerySearchTerms; +import nu.marginalia.index.results.model.TermMetadataForCombinedDocumentIds; import nu.marginalia.model.id.UrlIdCodec; import nu.marginalia.model.idx.WordFlags; import nu.marginalia.model.idx.WordMetadata; import nu.marginalia.index.query.limit.QueryStrategy; -import nu.marginalia.index.client.model.results.SearchResultItem; -import nu.marginalia.index.client.model.results.SearchResultKeywordScore; -import nu.marginalia.index.client.model.query.SearchSubquery; -import nu.marginalia.index.query.IndexQueryParams; -import nu.marginalia.ranking.ResultValuator; +import nu.marginalia.ranking.results.ResultValuator; import javax.annotation.Nullable; -import java.util.Arrays; import java.util.List; -public class IndexResultValuator { - private final IndexMetadataService metadataService; +/** This class is responsible for calculating the score of a search result. + * It holds the data required to perform the scoring, as there is strong + * reasons to cache this data, and performs the calculations */ +public class IndexResultValuationContext { + private final StatefulIndex statefulIndex; private final List> searchTermVariants; - private final IndexQueryParams queryParams; - private final TLongHashSet resultsWithPriorityTerms; + private final QueryParams queryParams; - private final IndexMetadataService.TermMetadataForDocuments termMetadataForDocuments; - private final IndexMetadataService.QuerySearchTerms searchTerms; + private final TermMetadataForCombinedDocumentIds termMetadataForCombinedDocumentIds; + private final QuerySearchTerms searchTerms; private final ResultRankingContext rankingContext; private final ResultValuator searchResultValuator; - public IndexResultValuator(IndexMetadataService metadataService, - TLongList results, - ResultRankingContext rankingContext, - List subqueries, - IndexQueryParams queryParams + public IndexResultValuationContext(IndexMetadataService metadataService, + ResultValuator searchResultValuator, + CombinedDocIdList ids, + StatefulIndex statefulIndex, + ResultRankingContext rankingContext, + List subqueries, + QueryParams queryParams ) { + this.statefulIndex = statefulIndex; this.rankingContext = rankingContext; - this.searchResultValuator = metadataService.getSearchResultValuator(); - - final long[] ids = results.toArray(); - Arrays.sort(ids); + this.searchResultValuator = searchResultValuator; this.searchTermVariants = subqueries.stream().map(sq -> sq.searchTermsInclude).distinct().toList(); this.queryParams = queryParams; - this.metadataService = metadataService; this.searchTerms = metadataService.getSearchTerms(subqueries); - this.termMetadataForDocuments = metadataService.getTermMetadataForDocuments(ids, searchTerms.termIdsAll); - - resultsWithPriorityTerms = metadataService.getResultsWithPriorityTerms(subqueries, ids); + this.termMetadataForCombinedDocumentIds = metadataService.getTermMetadataForDocuments(ids, searchTerms.termIdsAll); } private final long flagsFilterMask = @@ -58,19 +57,19 @@ public class IndexResultValuator { @Nullable public SearchResultItem calculatePreliminaryScore(long combinedId) { - final long docId = UrlIdCodec.removeRank(combinedId); + long docId = UrlIdCodec.removeRank(combinedId); - if (!termMetadataForDocuments.testCoherence(combinedId, searchTerms.coherences)) + if (!searchTerms.coherences.test(termMetadataForCombinedDocumentIds, docId)) return null; - long docMetadata = metadataService.getDocumentMetadata(docId); - int htmlFeatures = metadataService.getHtmlFeatures(docId); + long docMetadata = statefulIndex.getDocumentMetadata(docId); + int htmlFeatures = statefulIndex.getHtmlFeatures(docId); int maxFlagsCount = 0; boolean anyAllSynthetic = false; int maxPositionsSet = 0; - SearchResultItem searchResult = new SearchResultItem(combinedId, + SearchResultItem searchResult = new SearchResultItem(docId, searchTermVariants.stream().mapToInt(List::size).sum()); for (int querySetId = 0; @@ -86,9 +85,9 @@ public class IndexResultValuator { for (int termIdx = 0; termIdx < termList.size(); termIdx++) { String searchTerm = termList.get(termIdx); - long termMetadata = termMetadataForDocuments.getTermMetadata( + long termMetadata = termMetadataForCombinedDocumentIds.getTermMetadata( searchTerms.getIdForTerm(searchTerm), - searchResult.combinedId + combinedId ); var score = new SearchResultKeywordScore( @@ -96,8 +95,7 @@ public class IndexResultValuator { searchTerm, termMetadata, docMetadata, - htmlFeatures, - resultsWithPriorityTerms.contains(searchResult.combinedId) + htmlFeatures ); synthetic &= WordFlags.Synthetic.isPresent(termMetadata); @@ -132,10 +130,7 @@ public class IndexResultValuator { 5000, // use a dummy value here as it's not present in the index rankingContext); - searchResult.setScore(new SearchResultPreliminaryScore( - resultsWithPriorityTerms.contains(combinedId), - score - )); + searchResult.setScore(score); return searchResult; } diff --git a/code/index/java/nu/marginalia/index/results/IndexResultValuatorService.java b/code/index/java/nu/marginalia/index/results/IndexResultValuatorService.java new file mode 100644 index 00000000..9251a5d2 --- /dev/null +++ b/code/index/java/nu/marginalia/index/results/IndexResultValuatorService.java @@ -0,0 +1,144 @@ +package nu.marginalia.index.results; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import gnu.trove.list.TLongList; +import gnu.trove.list.array.TLongArrayList; +import it.unimi.dsi.fastutil.longs.LongArrayList; +import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem; +import nu.marginalia.api.searchquery.model.results.ResultRankingContext; +import nu.marginalia.api.searchquery.model.results.SearchResultItem; +import nu.marginalia.index.index.StatefulIndex; +import nu.marginalia.index.model.SearchParameters; +import nu.marginalia.index.results.model.ids.CombinedDocIdList; +import nu.marginalia.linkdb.docs.DocumentDbReader; +import nu.marginalia.linkdb.model.DocdbUrlDetail; +import nu.marginalia.ranking.results.ResultValuator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.SQLException; +import java.util.*; +import java.util.function.Consumer; +import java.util.stream.Collectors; + +@Singleton +public class IndexResultValuatorService { + private static final Logger logger = LoggerFactory.getLogger(IndexResultValuatorService.class); + + private final IndexMetadataService metadataService; + private final DocumentDbReader documentDbReader; + private final ResultValuator resultValuator; + private final StatefulIndex statefulIndex; + + @Inject + public IndexResultValuatorService(IndexMetadataService metadataService, + DocumentDbReader documentDbReader, + ResultValuator resultValuator, + StatefulIndex statefulIndex) + { + this.metadataService = metadataService; + this.documentDbReader = documentDbReader; + this.resultValuator = resultValuator; + this.statefulIndex = statefulIndex; + } + + public List rankResults(SearchParameters params, + ResultRankingContext rankingContext, + CombinedDocIdList resultIds) + { + final var evaluator = new IndexResultValuationContext(metadataService, + resultValuator, + resultIds, + statefulIndex, + rankingContext, + params.subqueries, + params.queryParams); + + List results = new ArrayList<>(resultIds.size()); + + for (long docId : resultIds.array()) { + var score = evaluator.calculatePreliminaryScore(docId); + if (score != null) { + results.add(score); + } + } + + return results; + } + + + public List selectBestResults(SearchParameters params, + ResultRankingContext rankingContext, + Collection results) throws SQLException { + + var domainCountFilter = new IndexResultDomainDeduplicator(params.limitByDomain); + + List resultsList = new ArrayList<>(results.size()); + + for (var item : results) { + if (domainCountFilter.test(item)) { + // It's important that this filter runs across all results, not just the top N + if (resultsList.size() < params.limitTotal) { + resultsList.add(item); + } + } + } + + for (var item : resultsList) { + item.resultsFromDomain = domainCountFilter.getCount(item); + } + + return decorateAndRerank(resultsList, rankingContext); + } + + /** Decorate the result items with additional information from the link database + * and calculate an updated ranking with the additional information */ + public List decorateAndRerank(List rawResults, + ResultRankingContext rankingContext) + throws SQLException + { + TLongList idsList = new TLongArrayList(rawResults.size()); + + for (var result : rawResults) + idsList.add(result.getDocumentId()); + + Map urlDetailsById = new HashMap<>(rawResults.size()); + + for (var item : documentDbReader.getUrlDetails(idsList)) + urlDetailsById.put(item.urlId(), item); + + List resultItems = new ArrayList<>(rawResults.size()); + for (var result : rawResults) { + var id = result.getDocumentId(); + var docData = urlDetailsById.get(id); + + if (docData == null) { + logger.warn("No document data for id {}", id); + continue; + } + + resultItems.add(createCombinedItem(result, docData, rankingContext)); + } + return resultItems; + } + + private DecoratedSearchResultItem createCombinedItem(SearchResultItem result, + DocdbUrlDetail docData, + ResultRankingContext rankingContext) { + return new DecoratedSearchResultItem( + result, + docData.url(), + docData.title(), + docData.description(), + docData.urlQuality(), + docData.format(), + docData.features(), + docData.pubYear(), + docData.dataHash(), + docData.wordsTotal(), + resultValuator.calculateSearchResultValue(result.keywordScores, docData.wordsTotal(), rankingContext) + ); + + } +} diff --git a/code/index/java/nu/marginalia/index/results/model/QuerySearchTerms.java b/code/index/java/nu/marginalia/index/results/model/QuerySearchTerms.java new file mode 100644 index 00000000..d72e0ea9 --- /dev/null +++ b/code/index/java/nu/marginalia/index/results/model/QuerySearchTerms.java @@ -0,0 +1,23 @@ +package nu.marginalia.index.results.model; + +import gnu.trove.map.hash.TObjectLongHashMap; +import nu.marginalia.index.results.model.ids.TermIdList; + +public class QuerySearchTerms { + private final TObjectLongHashMap termToId; + public final TermIdList termIdsAll; + + public final TermCoherenceGroupList coherences; + + public QuerySearchTerms(TObjectLongHashMap termToId, + TermIdList termIdsAll, + TermCoherenceGroupList coherences) { + this.termToId = termToId; + this.termIdsAll = termIdsAll; + this.coherences = coherences; + } + + public long getIdForTerm(String searchTerm) { + return termToId.get(searchTerm); + } +} diff --git a/code/index/java/nu/marginalia/index/results/model/TermCoherenceGroupList.java b/code/index/java/nu/marginalia/index/results/model/TermCoherenceGroupList.java new file mode 100644 index 00000000..2b6c24f5 --- /dev/null +++ b/code/index/java/nu/marginalia/index/results/model/TermCoherenceGroupList.java @@ -0,0 +1,49 @@ +package nu.marginalia.index.results.model; + +import nu.marginalia.index.model.SearchTermsUtil; +import nu.marginalia.model.idx.WordMetadata; + +import java.util.Collections; +import java.util.List; + +/** + * wordIds that we require to be in the same sentence + */ +public record TermCoherenceGroupList(List words) { + + public TermCoherenceGroupList(List words) { + this.words = Collections.unmodifiableList(words); + } + + public boolean test(TermMetadataForCombinedDocumentIds documents, long docId) { + for (var coherenceSet : words()) { + if (!coherenceSet.test(documents, docId)) { + return false; + } + } + + return true; + } + + public static final class TermCoherenceGroup { + private final long[] words; + + public TermCoherenceGroup(long[] words) { + this.words = words; + } + + public TermCoherenceGroup(List coh) { + this(coh.stream().mapToLong(SearchTermsUtil::getWordId).toArray()); + } + + public boolean test(TermMetadataForCombinedDocumentIds documents, long docId) { + long overlap = 0xFF_FFFF_FFFF_FFFFL; + + for (var word : words) { + overlap &= documents.getTermMetadata(word, docId); + } + + return WordMetadata.decodePositions(overlap) != 0L; + } + } +} diff --git a/code/index/java/nu/marginalia/index/results/model/TermMetadataForCombinedDocumentIds.java b/code/index/java/nu/marginalia/index/results/model/TermMetadataForCombinedDocumentIds.java new file mode 100644 index 00000000..9068dd69 --- /dev/null +++ b/code/index/java/nu/marginalia/index/results/model/TermMetadataForCombinedDocumentIds.java @@ -0,0 +1,36 @@ +package nu.marginalia.index.results.model; + +import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap; +import it.unimi.dsi.fastutil.longs.Long2ObjectArrayMap; +import nu.marginalia.index.results.model.ids.CombinedDocIdList; +import nu.marginalia.index.results.model.ids.DocMetadataList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TermMetadataForCombinedDocumentIds { + private static final Logger logger = LoggerFactory.getLogger(TermMetadataForCombinedDocumentIds.class); + private final Long2ObjectArrayMap termdocToMeta; + + public TermMetadataForCombinedDocumentIds(Long2ObjectArrayMap termdocToMeta) { + this.termdocToMeta = termdocToMeta; + } + + public long getTermMetadata(long termId, long combinedId) { + var metaByCombinedId = termdocToMeta.get(termId); + if (metaByCombinedId == null) { + logger.warn("Missing meta for term {}", termId); + return 0; + } + return metaByCombinedId.get(combinedId); + } + + public record DocumentsWithMetadata(Long2LongOpenHashMap data) { + public DocumentsWithMetadata(CombinedDocIdList combinedDocIdsAll, DocMetadataList metadata) { + this(new Long2LongOpenHashMap(combinedDocIdsAll.array(), metadata.array())); + } + + public long get(long combinedId) { + return data.getOrDefault(combinedId, 0); + } + } +} diff --git a/code/index/java/nu/marginalia/index/results/model/ids/CombinedDocIdList.java b/code/index/java/nu/marginalia/index/results/model/ids/CombinedDocIdList.java new file mode 100644 index 00000000..bec6fb8e --- /dev/null +++ b/code/index/java/nu/marginalia/index/results/model/ids/CombinedDocIdList.java @@ -0,0 +1,52 @@ +package nu.marginalia.index.results.model.ids; + +import it.unimi.dsi.fastutil.longs.LongArrayList; +import org.roaringbitmap.longlong.Roaring64Bitmap; + +import java.util.Arrays; +import java.util.stream.LongStream; + +public final class CombinedDocIdList { + private final long[] data; + + public CombinedDocIdList(LongArrayList data) { + this.data = data.toLongArray(); + } + public CombinedDocIdList(Roaring64Bitmap data) { + this.data = data.toArray(); + } + public CombinedDocIdList() { + this.data = new long[0]; + } + + public int size() { + return data.length; + } + + public LongStream stream() { + return Arrays.stream(data); + } + + @Override + public boolean equals(Object obj) { + if (obj == this) return true; + if (obj == null || obj.getClass() != this.getClass()) return false; + var that = (CombinedDocIdList) obj; + return Arrays.equals(this.data, that.data); + } + + @Override + public int hashCode() { + return Arrays.hashCode(data); + } + + + public long[] array() { + return data; + } + + public void sort() { + Arrays.sort(data); + } +} + diff --git a/code/index/java/nu/marginalia/index/results/model/ids/DocIdList.java b/code/index/java/nu/marginalia/index/results/model/ids/DocIdList.java new file mode 100644 index 00000000..beb07c00 --- /dev/null +++ b/code/index/java/nu/marginalia/index/results/model/ids/DocIdList.java @@ -0,0 +1,49 @@ +package nu.marginalia.index.results.model.ids; + +import it.unimi.dsi.fastutil.longs.LongArrayList; + +import java.util.Arrays; +import java.util.Objects; +import java.util.stream.LongStream; + +public final class DocIdList { + private final long[] array; + + public DocIdList(long[] array) { + this.array = array; + } + + public DocIdList(LongArrayList list) { + this(list.toLongArray()); + } + + public int size() { + return array.length; + } + + public void sort() { + Arrays.sort(array); + } + + public LongStream stream() { + return LongStream.of(array); + } + + public long[] array() { + return array; + } + + @Override + public boolean equals(Object obj) { + if (obj == this) return true; + if (obj == null || obj.getClass() != this.getClass()) return false; + var that = (DocIdList) obj; + return Arrays.equals(this.array, that.array); + } + + @Override + public int hashCode() { + return Arrays.hashCode(array); + } + +} diff --git a/code/index/java/nu/marginalia/index/results/model/ids/DocMetadataList.java b/code/index/java/nu/marginalia/index/results/model/ids/DocMetadataList.java new file mode 100644 index 00000000..0104f89c --- /dev/null +++ b/code/index/java/nu/marginalia/index/results/model/ids/DocMetadataList.java @@ -0,0 +1,45 @@ +package nu.marginalia.index.results.model.ids; + +import it.unimi.dsi.fastutil.longs.LongArrayList; + +import java.util.Arrays; +import java.util.Objects; +import java.util.stream.LongStream; + +public final class DocMetadataList { + private final long[] array; + + public DocMetadataList(long[] array) { + this.array = array; + } + + public DocMetadataList(LongArrayList list) { + this(list.toLongArray()); + } + + public int size() { + return array.length; + } + + public LongStream stream() { + return LongStream.of(array); + } + + public long[] array() { + return array; + } + + @Override + public boolean equals(Object obj) { + if (obj == this) return true; + if (obj == null || obj.getClass() != this.getClass()) return false; + var that = (DocMetadataList) obj; + return Arrays.equals(this.array, that.array); + } + + @Override + public int hashCode() { + return Arrays.hashCode(array); + } + +} diff --git a/code/index/java/nu/marginalia/index/results/model/ids/TermIdList.java b/code/index/java/nu/marginalia/index/results/model/ids/TermIdList.java new file mode 100644 index 00000000..f25ab1b9 --- /dev/null +++ b/code/index/java/nu/marginalia/index/results/model/ids/TermIdList.java @@ -0,0 +1,45 @@ +package nu.marginalia.index.results.model.ids; + +import it.unimi.dsi.fastutil.longs.LongArrayList; + +import java.util.Arrays; +import java.util.Objects; +import java.util.stream.LongStream; + +public final class TermIdList { + private final long[] array; + + public TermIdList(long[] array) { + this.array = array; + } + + public TermIdList(LongArrayList list) { + this(list.toLongArray()); + } + + public int size() { + return array.length; + } + + public LongStream stream() { + return LongStream.of(array); + } + + public long[] array() { + return array; + } + + @Override + public boolean equals(Object obj) { + if (obj == this) return true; + if (obj == null || obj.getClass() != this.getClass()) return false; + var that = (TermIdList) obj; + return Arrays.equals(this.array, that.array); + } + + @Override + public int hashCode() { + return Arrays.hashCode(array); + } + +} diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/db/DbUpdateRanks.java b/code/index/java/nu/marginalia/index/searchset/DbUpdateRanks.java similarity index 97% rename from code/services-core/index-service/src/main/java/nu/marginalia/index/db/DbUpdateRanks.java rename to code/index/java/nu/marginalia/index/searchset/DbUpdateRanks.java index d6696f59..ce8d8af0 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/db/DbUpdateRanks.java +++ b/code/index/java/nu/marginalia/index/searchset/DbUpdateRanks.java @@ -1,4 +1,4 @@ -package nu.marginalia.index.db; +package nu.marginalia.index.searchset; import com.zaxxer.hikari.HikariDataSource; import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap; diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/searchset/RankingSearchSet.java b/code/index/java/nu/marginalia/index/searchset/RankingSearchSet.java similarity index 90% rename from code/services-core/index-service/src/main/java/nu/marginalia/index/svc/searchset/RankingSearchSet.java rename to code/index/java/nu/marginalia/index/searchset/RankingSearchSet.java index 89018493..1734cf01 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/searchset/RankingSearchSet.java +++ b/code/index/java/nu/marginalia/index/searchset/RankingSearchSet.java @@ -1,10 +1,6 @@ -package nu.marginalia.index.svc.searchset; +package nu.marginalia.index.searchset; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; -import nu.marginalia.index.client.model.query.SearchSetIdentifier; -import nu.marginalia.index.searchset.SearchSet; -import nu.marginalia.model.idx.DocumentFlags; -import nu.marginalia.model.idx.DocumentMetadata; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/searchset/SearchSet.java b/code/index/java/nu/marginalia/index/searchset/SearchSet.java similarity index 100% rename from code/features-index/index-query/src/main/java/nu/marginalia/index/searchset/SearchSet.java rename to code/index/java/nu/marginalia/index/searchset/SearchSet.java diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/searchset/SearchSetAny.java b/code/index/java/nu/marginalia/index/searchset/SearchSetAny.java similarity index 72% rename from code/services-core/index-service/src/main/java/nu/marginalia/index/svc/searchset/SearchSetAny.java rename to code/index/java/nu/marginalia/index/searchset/SearchSetAny.java index b0ee4e39..c00bb435 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/searchset/SearchSetAny.java +++ b/code/index/java/nu/marginalia/index/searchset/SearchSetAny.java @@ -1,6 +1,4 @@ -package nu.marginalia.index.svc.searchset; - -import nu.marginalia.index.searchset.SearchSet; +package nu.marginalia.index.searchset; public class SearchSetAny implements SearchSet { @Override diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexSearchSetsService.java b/code/index/java/nu/marginalia/index/searchset/SearchSetsService.java similarity index 79% rename from code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexSearchSetsService.java rename to code/index/java/nu/marginalia/index/searchset/SearchSetsService.java index df597a4d..4fe974fe 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexSearchSetsService.java +++ b/code/index/java/nu/marginalia/index/searchset/SearchSetsService.java @@ -1,4 +1,4 @@ -package nu.marginalia.index.svc; +package nu.marginalia.index.searchset; import com.google.inject.Inject; import com.google.inject.Singleton; @@ -6,17 +6,14 @@ import gnu.trove.list.TIntList; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import nu.marginalia.db.DomainRankingSetsService; import nu.marginalia.db.DomainTypes; -import nu.marginalia.index.IndexServicesFactory; -import nu.marginalia.index.searchset.SearchSet; -import nu.marginalia.ranking.*; -import nu.marginalia.ranking.accumulator.RankingResultHashMapAccumulator; -import nu.marginalia.ranking.accumulator.RankingResultHashSetAccumulator; -import nu.marginalia.index.svc.searchset.RankingSearchSet; -import nu.marginalia.index.svc.searchset.SearchSetAny; -import nu.marginalia.index.db.DbUpdateRanks; -import nu.marginalia.ranking.data.GraphSource; -import nu.marginalia.ranking.data.LinkGraphSource; -import nu.marginalia.ranking.data.SimilarityGraphSource; +import nu.marginalia.index.IndexFactory; +import nu.marginalia.index.domainrankings.DomainRankings; +import nu.marginalia.ranking.domains.PageRankDomainRanker; +import nu.marginalia.ranking.domains.accumulator.RankingResultHashMapAccumulator; +import nu.marginalia.ranking.domains.accumulator.RankingResultHashSetAccumulator; +import nu.marginalia.ranking.domains.data.GraphSource; +import nu.marginalia.ranking.domains.data.LinkGraphSource; +import nu.marginalia.ranking.domains.data.SimilarityGraphSource; import nu.marginalia.service.control.ServiceEventLog; import nu.marginalia.service.module.ServiceConfiguration; import org.slf4j.Logger; @@ -29,10 +26,10 @@ import java.util.Objects; import java.util.concurrent.ConcurrentHashMap; @Singleton -public class IndexSearchSetsService { +public class SearchSetsService { private final Logger logger = LoggerFactory.getLogger(getClass()); private final DomainTypes domainTypes; - private final IndexServicesFactory indexServicesFactory; + private final IndexFactory indexFactory; private final ServiceEventLog eventLog; private final DomainRankingSetsService domainRankingSetsService; private final DbUpdateRanks dbUpdateRanks; @@ -50,17 +47,17 @@ public class IndexSearchSetsService { private static final String primaryRankingSet = "RANK"; @Inject - public IndexSearchSetsService(DomainTypes domainTypes, - ServiceConfiguration serviceConfiguration, - LinkGraphSource rankingDomains, - SimilarityGraphSource similarityDomains, - IndexServicesFactory indexServicesFactory, - ServiceEventLog eventLog, - DomainRankingSetsService domainRankingSetsService, - DbUpdateRanks dbUpdateRanks) throws IOException { + public SearchSetsService(DomainTypes domainTypes, + ServiceConfiguration serviceConfiguration, + LinkGraphSource rankingDomains, + SimilarityGraphSource similarityDomains, + IndexFactory indexFactory, + ServiceEventLog eventLog, + DomainRankingSetsService domainRankingSetsService, + DbUpdateRanks dbUpdateRanks) throws IOException { this.nodeId = serviceConfiguration.node(); this.domainTypes = domainTypes; - this.indexServicesFactory = indexServicesFactory; + this.indexFactory = indexFactory; this.eventLog = eventLog; this.domainRankingSetsService = domainRankingSetsService; @@ -80,7 +77,7 @@ public class IndexSearchSetsService { for (var rankingSet : domainRankingSetsService.getAll()) { rankingSets.put(rankingSet.name(), new RankingSearchSet(rankingSet.name(), - rankingSet.fileName(indexServicesFactory.getSearchSetsBase()) + rankingSet.fileName(indexFactory.getSearchSetsBase()) ) ); } @@ -96,7 +93,7 @@ public class IndexSearchSetsService { return anySet; } - if ("NONE".equals(searchSetIdentifier) || "".equals(searchSetIdentifier)) { + if ("NONE".equals(searchSetIdentifier) || searchSetIdentifier.isEmpty()) { return anySet; } @@ -150,7 +147,7 @@ public class IndexSearchSetsService { .forDomainNames(source, domains) .calculate(rankingSet.depth(), RankingResultHashSetAccumulator::new); - var set = new RankingSearchSet(rankingSet.name(), rankingSet.fileName(indexServicesFactory.getSearchSetsBase()), data); + var set = new RankingSearchSet(rankingSet.name(), rankingSet.fileName(indexFactory.getSearchSetsBase()), data); rankingSets.put(rankingSet.name(), set); try { @@ -173,7 +170,7 @@ public class IndexSearchSetsService { } synchronized (this) { - var blogSet = new RankingSearchSet(rankingSet.name(), rankingSet.fileName(indexServicesFactory.getSearchSetsBase()), new IntOpenHashSet(knownDomains.toArray())); + var blogSet = new RankingSearchSet(rankingSet.name(), rankingSet.fileName(indexFactory.getSearchSetsBase()), new IntOpenHashSet(knownDomains.toArray())); rankingSets.put(rankingSet.name(), blogSet); blogSet.write(); } @@ -200,7 +197,7 @@ public class IndexSearchSetsService { domainRankings = new DomainRankings(ranks); } - domainRankings.save(indexServicesFactory.getSearchSetsBase()); + domainRankings.save(indexFactory.getSearchSetsBase()); if (nodeId == 1) { // The EC_DOMAIN table has a field that reflects the rank, this needs to be set for search result ordering to diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/searchset/SmallSearchSet.java b/code/index/java/nu/marginalia/index/searchset/SmallSearchSet.java similarity index 87% rename from code/services-core/index-service/src/main/java/nu/marginalia/index/svc/searchset/SmallSearchSet.java rename to code/index/java/nu/marginalia/index/searchset/SmallSearchSet.java index 37cc07e0..0c4c0862 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/searchset/SmallSearchSet.java +++ b/code/index/java/nu/marginalia/index/searchset/SmallSearchSet.java @@ -1,7 +1,6 @@ -package nu.marginalia.index.svc.searchset; +package nu.marginalia.index.searchset; import gnu.trove.set.hash.TIntHashSet; -import nu.marginalia.index.searchset.SearchSet; import java.util.Arrays; import java.util.Collection; diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/PageRankDomainRanker.java b/code/index/java/nu/marginalia/ranking/domains/PageRankDomainRanker.java similarity index 88% rename from code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/PageRankDomainRanker.java rename to code/index/java/nu/marginalia/ranking/domains/PageRankDomainRanker.java index e87352b0..354ebb31 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/PageRankDomainRanker.java +++ b/code/index/java/nu/marginalia/ranking/domains/PageRankDomainRanker.java @@ -1,10 +1,10 @@ -package nu.marginalia.ranking; +package nu.marginalia.ranking.domains; import gnu.trove.list.TIntList; import gnu.trove.list.array.TIntArrayList; -import nu.marginalia.ranking.accumulator.RankingResultAccumulator; -import nu.marginalia.ranking.data.GraphSource; -import nu.marginalia.ranking.jgrapht.PersonalizedPageRank; +import nu.marginalia.ranking.domains.accumulator.RankingResultAccumulator; +import nu.marginalia.ranking.domains.data.GraphSource; +import nu.marginalia.ranking.domains.jgrapht.PersonalizedPageRank; import org.jgrapht.Graph; import org.jgrapht.alg.interfaces.VertexScoringAlgorithm; import org.jgrapht.alg.scoring.PageRank; diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/RankingAlgorithm.java b/code/index/java/nu/marginalia/ranking/domains/RankingAlgorithm.java similarity index 75% rename from code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/RankingAlgorithm.java rename to code/index/java/nu/marginalia/ranking/domains/RankingAlgorithm.java index f67d47be..9df110b5 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/RankingAlgorithm.java +++ b/code/index/java/nu/marginalia/ranking/domains/RankingAlgorithm.java @@ -1,6 +1,6 @@ -package nu.marginalia.ranking; +package nu.marginalia.ranking.domains; -import nu.marginalia.ranking.accumulator.RankingResultAccumulator; +import nu.marginalia.ranking.domains.accumulator.RankingResultAccumulator; import java.util.function.Supplier; diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/accumulator/RankingResultAccumulator.java b/code/index/java/nu/marginalia/ranking/domains/accumulator/RankingResultAccumulator.java similarity index 66% rename from code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/accumulator/RankingResultAccumulator.java rename to code/index/java/nu/marginalia/ranking/domains/accumulator/RankingResultAccumulator.java index e9055f6e..8fcbfea2 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/accumulator/RankingResultAccumulator.java +++ b/code/index/java/nu/marginalia/ranking/domains/accumulator/RankingResultAccumulator.java @@ -1,4 +1,4 @@ -package nu.marginalia.ranking.accumulator; +package nu.marginalia.ranking.domains.accumulator; public interface RankingResultAccumulator { void add(int domainId, int rank); diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/accumulator/RankingResultBitSetAccumulator.java b/code/index/java/nu/marginalia/ranking/domains/accumulator/RankingResultBitSetAccumulator.java similarity index 88% rename from code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/accumulator/RankingResultBitSetAccumulator.java rename to code/index/java/nu/marginalia/ranking/domains/accumulator/RankingResultBitSetAccumulator.java index 3a806d95..aa12d378 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/accumulator/RankingResultBitSetAccumulator.java +++ b/code/index/java/nu/marginalia/ranking/domains/accumulator/RankingResultBitSetAccumulator.java @@ -1,4 +1,4 @@ -package nu.marginalia.ranking.accumulator; +package nu.marginalia.ranking.domains.accumulator; import org.roaringbitmap.RoaringBitmap; diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/accumulator/RankingResultHashMapAccumulator.java b/code/index/java/nu/marginalia/ranking/domains/accumulator/RankingResultHashMapAccumulator.java similarity index 90% rename from code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/accumulator/RankingResultHashMapAccumulator.java rename to code/index/java/nu/marginalia/ranking/domains/accumulator/RankingResultHashMapAccumulator.java index 15365466..f5cdd410 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/accumulator/RankingResultHashMapAccumulator.java +++ b/code/index/java/nu/marginalia/ranking/domains/accumulator/RankingResultHashMapAccumulator.java @@ -1,4 +1,4 @@ -package nu.marginalia.ranking.accumulator; +package nu.marginalia.ranking.domains.accumulator; import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap; diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/accumulator/RankingResultHashSetAccumulator.java b/code/index/java/nu/marginalia/ranking/domains/accumulator/RankingResultHashSetAccumulator.java similarity index 88% rename from code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/accumulator/RankingResultHashSetAccumulator.java rename to code/index/java/nu/marginalia/ranking/domains/accumulator/RankingResultHashSetAccumulator.java index 54ee465d..36777478 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/accumulator/RankingResultHashSetAccumulator.java +++ b/code/index/java/nu/marginalia/ranking/domains/accumulator/RankingResultHashSetAccumulator.java @@ -1,4 +1,4 @@ -package nu.marginalia.ranking.accumulator; +package nu.marginalia.ranking.domains.accumulator; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/accumulator/RankingResultListAccumulator.java b/code/index/java/nu/marginalia/ranking/domains/accumulator/RankingResultListAccumulator.java similarity index 91% rename from code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/accumulator/RankingResultListAccumulator.java rename to code/index/java/nu/marginalia/ranking/domains/accumulator/RankingResultListAccumulator.java index ecfab27c..5722b22e 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/accumulator/RankingResultListAccumulator.java +++ b/code/index/java/nu/marginalia/ranking/domains/accumulator/RankingResultListAccumulator.java @@ -1,4 +1,4 @@ -package nu.marginalia.ranking.accumulator; +package nu.marginalia.ranking.domains.accumulator; import gnu.trove.list.array.TIntArrayList; diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/AbstractGraphSource.java b/code/index/java/nu/marginalia/ranking/domains/data/AbstractGraphSource.java similarity index 97% rename from code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/AbstractGraphSource.java rename to code/index/java/nu/marginalia/ranking/domains/data/AbstractGraphSource.java index 26b4f3d3..57e10323 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/AbstractGraphSource.java +++ b/code/index/java/nu/marginalia/ranking/domains/data/AbstractGraphSource.java @@ -1,4 +1,4 @@ -package nu.marginalia.ranking.data; +package nu.marginalia.ranking.domains.data; import com.zaxxer.hikari.HikariDataSource; import org.jgrapht.Graph; diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/GraphSource.java b/code/index/java/nu/marginalia/ranking/domains/data/GraphSource.java similarity index 94% rename from code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/GraphSource.java rename to code/index/java/nu/marginalia/ranking/domains/data/GraphSource.java index b6e1daeb..7c137245 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/GraphSource.java +++ b/code/index/java/nu/marginalia/ranking/domains/data/GraphSource.java @@ -1,4 +1,4 @@ -package nu.marginalia.ranking.data; +package nu.marginalia.ranking.domains.data; import org.jgrapht.Graph; diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/InvertedLinkGraphSource.java b/code/index/java/nu/marginalia/ranking/domains/data/InvertedLinkGraphSource.java similarity index 77% rename from code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/InvertedLinkGraphSource.java rename to code/index/java/nu/marginalia/ranking/domains/data/InvertedLinkGraphSource.java index 9d5564d0..8562cdce 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/InvertedLinkGraphSource.java +++ b/code/index/java/nu/marginalia/ranking/domains/data/InvertedLinkGraphSource.java @@ -1,27 +1,24 @@ -package nu.marginalia.ranking.data; +package nu.marginalia.ranking.domains.data; import com.google.inject.Inject; import com.zaxxer.hikari.HikariDataSource; import lombok.SneakyThrows; -import nu.marginalia.query.client.QueryClient; +import nu.marginalia.api.linkgraph.AggregateLinkGraphClient; import org.jgrapht.Graph; import org.jgrapht.graph.DefaultDirectedGraph; import org.jgrapht.graph.DefaultEdge; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.List; /** A source for the inverted link graph, * which is the same as the regular graph except * the direction of the links have been inverted */ public class InvertedLinkGraphSource extends AbstractGraphSource { - private final QueryClient queryClient; + private final AggregateLinkGraphClient graphClient; @Inject - public InvertedLinkGraphSource(HikariDataSource dataSource, QueryClient queryClient) { + public InvertedLinkGraphSource(HikariDataSource dataSource, AggregateLinkGraphClient graphClient) { super(dataSource); - this.queryClient = queryClient; + this.graphClient = graphClient; } @SneakyThrows @Override @@ -30,7 +27,7 @@ public class InvertedLinkGraphSource extends AbstractGraphSource { addVertices(graph); - var allLinks = queryClient.getAllDomainLinks(); + var allLinks = graphClient.getAllDomainLinks(); var iter = allLinks.iterator(); while (iter.advance()) { if (!graph.containsVertex(iter.dest())) { diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/LinkGraphSource.java b/code/index/java/nu/marginalia/ranking/domains/data/LinkGraphSource.java similarity index 72% rename from code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/LinkGraphSource.java rename to code/index/java/nu/marginalia/ranking/domains/data/LinkGraphSource.java index cc7f2b53..341a77df 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/LinkGraphSource.java +++ b/code/index/java/nu/marginalia/ranking/domains/data/LinkGraphSource.java @@ -1,21 +1,21 @@ -package nu.marginalia.ranking.data; +package nu.marginalia.ranking.domains.data; import com.google.inject.Inject; import com.zaxxer.hikari.HikariDataSource; import lombok.SneakyThrows; -import nu.marginalia.query.client.QueryClient; +import nu.marginalia.api.linkgraph.AggregateLinkGraphClient; import org.jgrapht.Graph; import org.jgrapht.graph.DefaultDirectedGraph; import org.jgrapht.graph.DefaultEdge; /** A source for the regular link graph. */ public class LinkGraphSource extends AbstractGraphSource { - private final QueryClient queryClient; + private final AggregateLinkGraphClient graphClient; @Inject - public LinkGraphSource(HikariDataSource dataSource, QueryClient queryClient) { + public LinkGraphSource(HikariDataSource dataSource, AggregateLinkGraphClient graphClient) { super(dataSource); - this.queryClient = queryClient; + this.graphClient = graphClient; } @SneakyThrows @@ -25,7 +25,7 @@ public class LinkGraphSource extends AbstractGraphSource { addVertices(graph); - var allLinks = queryClient.getAllDomainLinks(); + var allLinks = graphClient.getAllDomainLinks(); var iter = allLinks.iterator(); while (iter.advance()) { if (!graph.containsVertex(iter.dest())) { diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/SimilarityGraphSource.java b/code/index/java/nu/marginalia/ranking/domains/data/SimilarityGraphSource.java similarity index 84% rename from code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/SimilarityGraphSource.java rename to code/index/java/nu/marginalia/ranking/domains/data/SimilarityGraphSource.java index 359f76fd..e52091b1 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/SimilarityGraphSource.java +++ b/code/index/java/nu/marginalia/ranking/domains/data/SimilarityGraphSource.java @@ -1,4 +1,4 @@ -package nu.marginalia.ranking.data; +package nu.marginalia.ranking.domains.data; import com.google.inject.Inject; import com.zaxxer.hikari.HikariDataSource; @@ -52,6 +52,14 @@ public class SimilarityGraphSource extends AbstractGraphSource { while (rs.next()) { int src = rs.getInt(1); int dest = rs.getInt(2); + + // Similarity data may contain domain ids that we don't have indexed, + // omit these from the graph. + if (!graph.containsVertex(src)) + continue; + if (!graph.containsVertex(dest)) + continue; + double weight = rs.getDouble(3); graph.addEdge(src, dest); diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/jgrapht/PersonalizedPageRank.java b/code/index/java/nu/marginalia/ranking/domains/jgrapht/PersonalizedPageRank.java similarity index 99% rename from code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/jgrapht/PersonalizedPageRank.java rename to code/index/java/nu/marginalia/ranking/domains/jgrapht/PersonalizedPageRank.java index 0fd6a194..4eedb684 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/jgrapht/PersonalizedPageRank.java +++ b/code/index/java/nu/marginalia/ranking/domains/jgrapht/PersonalizedPageRank.java @@ -1,4 +1,4 @@ -package nu.marginalia.ranking.jgrapht; +package nu.marginalia.ranking.domains.jgrapht; /* * (C) Copyright 2016-2023, by Dimitrios Michail and Contributors. diff --git a/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/ResultKeywordSet.java b/code/index/java/nu/marginalia/ranking/results/ResultKeywordSet.java similarity index 82% rename from code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/ResultKeywordSet.java rename to code/index/java/nu/marginalia/ranking/results/ResultKeywordSet.java index af33281d..19405dcb 100644 --- a/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/ResultKeywordSet.java +++ b/code/index/java/nu/marginalia/ranking/results/ResultKeywordSet.java @@ -1,6 +1,7 @@ -package nu.marginalia.ranking; +package nu.marginalia.ranking.results; -import nu.marginalia.index.client.model.results.SearchResultKeywordScore; + +import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore; import java.util.List; diff --git a/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/ResultValuator.java b/code/index/java/nu/marginalia/ranking/results/ResultValuator.java similarity index 81% rename from code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/ResultValuator.java rename to code/index/java/nu/marginalia/ranking/results/ResultValuator.java index 961a9e81..6c67559d 100644 --- a/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/ResultValuator.java +++ b/code/index/java/nu/marginalia/ranking/results/ResultValuator.java @@ -1,13 +1,13 @@ -package nu.marginalia.ranking; +package nu.marginalia.ranking.results; -import nu.marginalia.index.client.model.results.ResultRankingContext; -import nu.marginalia.index.client.model.results.ResultRankingParameters; -import nu.marginalia.index.client.model.results.SearchResultKeywordScore; +import nu.marginalia.api.searchquery.model.results.ResultRankingContext; +import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; +import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore; import nu.marginalia.model.crawl.HtmlFeature; import nu.marginalia.model.crawl.PubDate; import nu.marginalia.model.idx.DocumentFlags; import nu.marginalia.model.idx.DocumentMetadata; -import nu.marginalia.ranking.factors.*; +import nu.marginalia.ranking.results.factors.*; import com.google.inject.Inject; import com.google.inject.Singleton; @@ -17,8 +17,6 @@ import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.List; -import static java.lang.Math.min; - @Singleton public class ResultValuator { final static double scalingFactor = 500.; @@ -26,32 +24,21 @@ public class ResultValuator { private final Bm25Factor bm25Factor; private final TermCoherenceFactor termCoherenceFactor; - private final PriorityTermBonus priorityTermBonus; - - private final ThreadLocal> listPool = - ThreadLocal.withInitial(ValuatorListPool::new); - private static final Logger logger = LoggerFactory.getLogger(ResultValuator.class); @Inject public ResultValuator(Bm25Factor bm25Factor, - TermCoherenceFactor termCoherenceFactor, - PriorityTermBonus priorityTermBonus) { - + TermCoherenceFactor termCoherenceFactor) { this.bm25Factor = bm25Factor; this.termCoherenceFactor = termCoherenceFactor; - this.priorityTermBonus = priorityTermBonus; - } public double calculateSearchResultValue(List scores, int length, ResultRankingContext ctx) { - var threadListPool = listPool.get(); int sets = numberOfSets(scores); - long documentMetadata = documentMetadata(scores); int features = htmlFeatures(scores); var rankingParams = ctx.params; @@ -86,8 +73,7 @@ public class ResultValuator { + rankingBonus + topologyBonus + temporalBias - + flagsPenalty - + priorityTermBonus.calculate(scores); + + flagsPenalty; double bestTcf = 0; double bestBM25F = 0; @@ -95,7 +81,7 @@ public class ResultValuator { double bestBM25PN = 0; for (int set = 0; set < sets; set++) { - ResultKeywordSet keywordSet = createKeywordSet(threadListPool, scores, set); + ResultKeywordSet keywordSet = createKeywordSet(scores, set); if (keywordSet.isEmpty()) continue; @@ -187,12 +173,10 @@ public class ResultValuator { return 0; } - private ResultKeywordSet createKeywordSet(ValuatorListPool listPool, - List rawScores, + private ResultKeywordSet createKeywordSet(List rawScores, int thisSet) { - List scoresList = listPool.get(thisSet); - scoresList.clear(); + List scoresList = new ArrayList<>(); for (var score : rawScores) { if (score.subquery != thisSet) @@ -227,26 +211,3 @@ public class ResultValuator { return Math.sqrt((1.0 + scalingFactor + 10 * penalty) / (1.0 + value)); } } - -/** Pool of List instances used to reduce memory churn during result ranking in the index - * where potentially tens of thousands of candidate results are ranked. - * - * @param - */ -@SuppressWarnings({"unchecked", "rawtypes"}) -class ValuatorListPool { - private final ArrayList[] items = new ArrayList[256]; - - public ValuatorListPool() { - for (int i = 0; i < items.length; i++) { - items[i] = new ArrayList(); - } - } - - public List get(int i) { - var ret = (ArrayList) items[i]; - ret.clear(); - return ret; - } - -} diff --git a/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/factors/Bm25Factor.java b/code/index/java/nu/marginalia/ranking/results/factors/Bm25Factor.java similarity index 92% rename from code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/factors/Bm25Factor.java rename to code/index/java/nu/marginalia/ranking/results/factors/Bm25Factor.java index d99df66f..335b5fa8 100644 --- a/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/factors/Bm25Factor.java +++ b/code/index/java/nu/marginalia/ranking/results/factors/Bm25Factor.java @@ -1,10 +1,10 @@ -package nu.marginalia.ranking.factors; +package nu.marginalia.ranking.results.factors; -import nu.marginalia.index.client.model.results.Bm25Parameters; -import nu.marginalia.index.client.model.results.ResultRankingContext; -import nu.marginalia.index.client.model.results.SearchResultKeywordScore; +import nu.marginalia.api.searchquery.model.results.Bm25Parameters; +import nu.marginalia.api.searchquery.model.results.ResultRankingContext; +import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore; import nu.marginalia.model.idx.WordFlags; -import nu.marginalia.ranking.ResultKeywordSet; +import nu.marginalia.ranking.results.ResultKeywordSet; public class Bm25Factor { private static final int AVG_LENGTH = 5000; diff --git a/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/factors/TermCoherenceFactor.java b/code/index/java/nu/marginalia/ranking/results/factors/TermCoherenceFactor.java similarity index 87% rename from code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/factors/TermCoherenceFactor.java rename to code/index/java/nu/marginalia/ranking/results/factors/TermCoherenceFactor.java index 54964dc1..f956ce88 100644 --- a/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/factors/TermCoherenceFactor.java +++ b/code/index/java/nu/marginalia/ranking/results/factors/TermCoherenceFactor.java @@ -1,7 +1,7 @@ -package nu.marginalia.ranking.factors; +package nu.marginalia.ranking.results.factors; import nu.marginalia.model.idx.WordMetadata; -import nu.marginalia.ranking.ResultKeywordSet; +import nu.marginalia.ranking.results.ResultKeywordSet; /** Rewards documents where terms appear frequently within the same sentences */ diff --git a/code/features-index/index-query/build.gradle b/code/index/query/build.gradle similarity index 70% rename from code/features-index/index-query/build.gradle rename to code/index/query/build.gradle index 71b925e8..7977ad73 100644 --- a/code/features-index/index-query/build.gradle +++ b/code/index/query/build.gradle @@ -11,12 +11,17 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:libraries:array') + implementation project(':code:common:model') implementation libs.bundles.slf4j implementation libs.prometheus + implementation libs.fastutil + implementation libs.bundles.mariadb testImplementation libs.bundles.slf4j.test testImplementation libs.bundles.junit diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/DomainRankings.java b/code/index/query/java/nu/marginalia/index/domainrankings/DomainRankings.java similarity index 98% rename from code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/DomainRankings.java rename to code/index/query/java/nu/marginalia/index/domainrankings/DomainRankings.java index db5321b1..5d79a0f9 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/DomainRankings.java +++ b/code/index/query/java/nu/marginalia/index/domainrankings/DomainRankings.java @@ -1,4 +1,4 @@ -package nu.marginalia.ranking; +package nu.marginalia.index.domainrankings; import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap; import it.unimi.dsi.fastutil.ints.Int2ShortOpenHashMap; diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/EmptyEntrySource.java b/code/index/query/java/nu/marginalia/index/query/EmptyEntrySource.java similarity index 100% rename from code/features-index/index-query/src/main/java/nu/marginalia/index/query/EmptyEntrySource.java rename to code/index/query/java/nu/marginalia/index/query/EmptyEntrySource.java diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/EntrySource.java b/code/index/query/java/nu/marginalia/index/query/EntrySource.java similarity index 100% rename from code/features-index/index-query/src/main/java/nu/marginalia/index/query/EntrySource.java rename to code/index/query/java/nu/marginalia/index/query/EntrySource.java diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQuery.java b/code/index/query/java/nu/marginalia/index/query/IndexQuery.java similarity index 96% rename from code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQuery.java rename to code/index/query/java/nu/marginalia/index/query/IndexQuery.java index b55a8bef..734f08dd 100644 --- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQuery.java +++ b/code/index/query/java/nu/marginalia/index/query/IndexQuery.java @@ -22,6 +22,10 @@ public class IndexQuery { public final IndexQueryPriority queryPriority; public final int fetchSizeMultiplier; + public IndexQuery(EntrySource... sources) { + this(List.of(sources), IndexQueryPriority.BEST, 1); + } + /** * Creates an IndexQuery object with the given sources, priority, and fetchSizeMultiplier. * diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQueryBuilder.java b/code/index/query/java/nu/marginalia/index/query/IndexQueryBuilder.java similarity index 100% rename from code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQueryBuilder.java rename to code/index/query/java/nu/marginalia/index/query/IndexQueryBuilder.java diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQueryPriority.java b/code/index/query/java/nu/marginalia/index/query/IndexQueryPriority.java similarity index 100% rename from code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexQueryPriority.java rename to code/index/query/java/nu/marginalia/index/query/IndexQueryPriority.java diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexSearchBudget.java b/code/index/query/java/nu/marginalia/index/query/IndexSearchBudget.java similarity index 82% rename from code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexSearchBudget.java rename to code/index/query/java/nu/marginalia/index/query/IndexSearchBudget.java index 1ed211d1..f8d1ffa3 100644 --- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/IndexSearchBudget.java +++ b/code/index/query/java/nu/marginalia/index/query/IndexSearchBudget.java @@ -10,4 +10,5 @@ public class IndexSearchBudget { } public boolean hasTimeLeft() { return System.currentTimeMillis() < timeout; } + public long timeLeft() { return timeout - System.currentTimeMillis(); } } diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterAnyOf.java b/code/index/query/java/nu/marginalia/index/query/filter/QueryFilterAnyOf.java similarity index 100% rename from code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterAnyOf.java rename to code/index/query/java/nu/marginalia/index/query/filter/QueryFilterAnyOf.java diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterLetThrough.java b/code/index/query/java/nu/marginalia/index/query/filter/QueryFilterLetThrough.java similarity index 100% rename from code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterLetThrough.java rename to code/index/query/java/nu/marginalia/index/query/filter/QueryFilterLetThrough.java diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterNoPass.java b/code/index/query/java/nu/marginalia/index/query/filter/QueryFilterNoPass.java similarity index 100% rename from code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterNoPass.java rename to code/index/query/java/nu/marginalia/index/query/filter/QueryFilterNoPass.java diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterStepExcludeFromPredicate.java b/code/index/query/java/nu/marginalia/index/query/filter/QueryFilterStepExcludeFromPredicate.java similarity index 100% rename from code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterStepExcludeFromPredicate.java rename to code/index/query/java/nu/marginalia/index/query/filter/QueryFilterStepExcludeFromPredicate.java diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterStepFromPredicate.java b/code/index/query/java/nu/marginalia/index/query/filter/QueryFilterStepFromPredicate.java similarity index 100% rename from code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterStepFromPredicate.java rename to code/index/query/java/nu/marginalia/index/query/filter/QueryFilterStepFromPredicate.java diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterStepIf.java b/code/index/query/java/nu/marginalia/index/query/filter/QueryFilterStepIf.java similarity index 97% rename from code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterStepIf.java rename to code/index/query/java/nu/marginalia/index/query/filter/QueryFilterStepIf.java index a13615ce..748fdde6 100644 --- a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/filter/QueryFilterStepIf.java +++ b/code/index/query/java/nu/marginalia/index/query/filter/QueryFilterStepIf.java @@ -2,8 +2,6 @@ package nu.marginalia.index.query.filter; import nu.marginalia.array.buffer.LongQueryBuffer; -import java.util.List; - public interface QueryFilterStepIf extends Comparable { boolean test(long value); diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/limit/QueryLimits.java b/code/index/query/java/nu/marginalia/index/query/limit/QueryLimits.java similarity index 100% rename from code/features-index/index-query/src/main/java/nu/marginalia/index/query/limit/QueryLimits.java rename to code/index/query/java/nu/marginalia/index/query/limit/QueryLimits.java diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/limit/QueryStrategy.java b/code/index/query/java/nu/marginalia/index/query/limit/QueryStrategy.java similarity index 100% rename from code/features-index/index-query/src/main/java/nu/marginalia/index/query/limit/QueryStrategy.java rename to code/index/query/java/nu/marginalia/index/query/limit/QueryStrategy.java diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/limit/SpecificationLimit.java b/code/index/query/java/nu/marginalia/index/query/limit/SpecificationLimit.java similarity index 100% rename from code/features-index/index-query/src/main/java/nu/marginalia/index/query/limit/SpecificationLimit.java rename to code/index/query/java/nu/marginalia/index/query/limit/SpecificationLimit.java diff --git a/code/features-index/index-query/src/main/java/nu/marginalia/index/query/limit/SpecificationLimitType.java b/code/index/query/java/nu/marginalia/index/query/limit/SpecificationLimitType.java similarity index 100% rename from code/features-index/index-query/src/main/java/nu/marginalia/index/query/limit/SpecificationLimitType.java rename to code/index/query/java/nu/marginalia/index/query/limit/SpecificationLimitType.java diff --git a/code/features-index/index-query/readme.md b/code/index/query/readme.md similarity index 70% rename from code/features-index/index-query/readme.md rename to code/index/query/readme.md index 3334cada..7386339c 100644 --- a/code/features-index/index-query/readme.md +++ b/code/index/query/readme.md @@ -12,11 +12,11 @@ interfaces are implemented within the index-service module. ## Central Classes -* [IndexQuery](src/main/java/nu/marginalia/index/query/IndexQuery.java) -* [query/filter](src/main/java/nu/marginalia/index/query/filter/) +* [IndexQuery](java/nu/marginalia/index/query/IndexQuery.java) +* [query/filter](java/nu/marginalia/index/query/filter/) ## See Also * [index/index-reverse](../index-reverse) implements many of these interfaces. * [libraries/array](../../libraries/array) -* [libraries/array/.../LongQueryBuffer](../../libraries/array/src/main/java/nu/marginalia/array/buffer/LongQueryBuffer.java) \ No newline at end of file +* [libraries/array/.../LongQueryBuffer](../../libraries/array/java/nu/marginalia/array/buffer/LongQueryBuffer.java) \ No newline at end of file diff --git a/code/features-index/index-query/src/test/java/nu/marginalia/index/query/filter/QueryFilterStepIfTest.java b/code/index/query/test/nu/marginalia/index/query/filter/QueryFilterStepIfTest.java similarity index 100% rename from code/features-index/index-query/src/test/java/nu/marginalia/index/query/filter/QueryFilterStepIfTest.java rename to code/index/query/test/nu/marginalia/index/query/filter/QueryFilterStepIfTest.java diff --git a/code/index/readme.md b/code/index/readme.md new file mode 100644 index 00000000..2254c2a2 --- /dev/null +++ b/code/index/readme.md @@ -0,0 +1,70 @@ +# Index + +This index subsystem contains the components that make up the search index. + +It exposes an API for querying the index, and contains the logic +for ranking search results. It does not parse the query, that is +the responsibility of the [search-query](../functions/search-query) module. + +## Indexes + +There are two indexes with accompanying tools for constructing them. + +* [index-reverse](index-reverse/) is code for `word->document` indexes. There are two such indexes, one containing only document-word pairs that are flagged as important, e.g. the word appears in the title or has a high TF-IDF. This allows good results to be discovered quickly without having to sift through ten thousand bad ones first. + +* [index-forward](index-forward/) is the `document->word` index containing metadata about each word, such as its position. It is used after identifying candidate search results via the reverse index to fetch metadata and rank the results. + +Additionally, the [index-journal](index-journal/) contains code for constructing a journal of the index, which is used to keep the index up to date. + +These indices rely heavily on the [libraries/btree](../libraries/btree) and [libraries/array](../libraries/array) components. + +--- + +# Result Ranking + +The module is also responsible for ranking search results, and contains various heuristics +for deciding which search results are important with regard to a query. In broad strokes [BM-25](https://nlp.stanford.edu/IR-book/html/htmledition/okapi-bm25-a-non-binary-model-1.html) +is used, with a number of additional bonuses and penalties to rank the appropriate search +results higher. + +## Central Classes + +* [ResultValuator](java/nu/marginalia/ranking/results/ResultValuator.java) + +--- + +# Domain Ranking + +The module contains domain ranking algorithms. The domain ranking algorithms are based on +the JGraphT library. + +Two principal algorithms are available, the standard PageRank algorithm, +and personalized pagerank; each are available for two graphs, the link graph +and a similarity graph where each edge corresponds to the similarity between +the sets of incident links to two domains, their cosine similarity acting as +the weight of the links. + +With the standard PageRank algorithm, the similarity graph does not produce +anything useful, but something magical happens when you apply Personalized PageRank +to this graph. It turns into a very good "vibe"-sensitive ranking algorithm. + +It's unclear if this is a well known result, but it's a very interesting one +for creating a ranking algorithm that is focused on a particular segment of the web. + +## Central Classes + +* [PageRankDomainRanker](java/nu/marginalia/ranking/domains/PageRankDomainRanker.java) - Ranks domains using the + PageRank or Personalized PageRank algorithm depending on whether a list of influence domains is provided. + +### Data sources + +* [LinkGraphSource](java/nu/marginalia/ranking/domains/data/LinkGraphSource.java) - fetches the link graph +* [InvertedLinkGraphSource](java/nu/marginalia/ranking/domains/data/InvertedLinkGraphSource.java) - fetches the inverted link graph +* [SimilarityGraphSource](java/nu/marginalia/ranking/domains/data/SimilarityGraphSource.java) - fetches the similarity graph from the database + +Note that the similarity graph needs to be precomputed and stored in the database for +the similarity graph source to be available. + +## Useful Resources + +* [The PageRank Citation Ranking: Bringing Order to the Web](http://ilpubs.stanford.edu:8090/422/1/1999-66.pdf) diff --git a/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationSmokeTest.java b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationSmokeTest.java similarity index 93% rename from code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationSmokeTest.java rename to code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationSmokeTest.java index 68e3437f..634481f4 100644 --- a/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationSmokeTest.java +++ b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationSmokeTest.java @@ -1,21 +1,21 @@ -package nu.marginalia.index.svc; +package nu.marginalia.index; import com.google.inject.Guice; import com.google.inject.Inject; import lombok.SneakyThrows; import nu.marginalia.IndexLocations; +import nu.marginalia.api.searchquery.model.query.SearchSpecification; +import nu.marginalia.api.searchquery.model.query.SearchSubquery; +import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; +import nu.marginalia.index.index.StatefulIndex; +import nu.marginalia.process.control.FakeProcessHeartbeat; +import nu.marginalia.process.control.ProcessHeartbeat; import nu.marginalia.storage.FileStorageService; import nu.marginalia.hash.MurmurHash3_128; -import nu.marginalia.index.ReverseIndexFullFileNames; -import nu.marginalia.index.ReverseIndexPrioFileNames; -import nu.marginalia.index.client.model.query.SearchSpecification; -import nu.marginalia.index.client.model.query.SearchSubquery; -import nu.marginalia.index.client.model.results.ResultRankingParameters; import nu.marginalia.index.construction.DocIdRewriter; import nu.marginalia.index.construction.ReverseIndexConstructor; import nu.marginalia.index.forward.ForwardIndexConverter; import nu.marginalia.index.forward.ForwardIndexFileNames; -import nu.marginalia.index.index.SearchIndex; import nu.marginalia.index.journal.model.IndexJournalEntryData; import nu.marginalia.index.journal.model.IndexJournalEntryHeader; import nu.marginalia.index.journal.reader.IndexJournalReader; @@ -31,9 +31,7 @@ import nu.marginalia.model.id.UrlIdCodec; import nu.marginalia.model.idx.WordFlags; import nu.marginalia.model.idx.DocumentMetadata; import nu.marginalia.model.idx.WordMetadata; -import nu.marginalia.process.control.FakeProcessHeartbeat; -import nu.marginalia.process.control.ProcessHeartbeat; -import nu.marginalia.ranking.DomainRankings; +import nu.marginalia.index.domainrankings.DomainRankings; import nu.marginalia.service.control.ServiceHeartbeat; import nu.marginalia.service.server.Initialization; import org.junit.jupiter.api.AfterEach; @@ -41,7 +39,6 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.parallel.Execution; -import spark.Spark; import java.io.IOException; import java.nio.file.Files; @@ -63,9 +60,9 @@ public class IndexQueryServiceIntegrationSmokeTest { IndexQueryServiceIntegrationTestModule testModule; @Inject - IndexQueryService queryService; + IndexGrpcService queryService; @Inject - SearchIndex searchIndex; + StatefulIndex statefulIndex; @Inject ServiceHeartbeat heartbeat; @@ -97,8 +94,6 @@ public class IndexQueryServiceIntegrationSmokeTest { @AfterEach public void tearDown() throws IOException { testModule.cleanUp(); - - Spark.stop(); } @Test @@ -115,7 +110,7 @@ public class IndexQueryServiceIntegrationSmokeTest { indexJournalWriter.close(); constructIndex(); - searchIndex.switchIndex(); + statefulIndex.switchIndex(); var rsp = queryService.justQuery( SearchSpecification.builder() @@ -125,7 +120,6 @@ public class IndexQueryServiceIntegrationSmokeTest { .quality(SpecificationLimit.none()) .size(SpecificationLimit.none()) .rank(SpecificationLimit.none()) - .domainCount(SpecificationLimit.none()) .rankingParams(ResultRankingParameters.sensibleDefaults()) .domains(new ArrayList<>()) .searchSetIdentifier("NONE") @@ -140,6 +134,8 @@ public class IndexQueryServiceIntegrationSmokeTest { .mapToLong(i -> i.rawIndexResult.getDocumentId()) .toArray(); + System.out.println(Arrays.toString(actual)); + System.out.println(Arrays.toString(ids)); Assertions.assertArrayEquals(ids, actual); } @@ -158,7 +154,7 @@ public class IndexQueryServiceIntegrationSmokeTest { indexJournalWriter.close(); constructIndex(); - searchIndex.switchIndex(); + statefulIndex.switchIndex(); var rsp = queryService.justQuery( SearchSpecification.builder() @@ -167,7 +163,6 @@ public class IndexQueryServiceIntegrationSmokeTest { .quality(SpecificationLimit.none()) .size(SpecificationLimit.none()) .rank(SpecificationLimit.none()) - .domainCount(SpecificationLimit.none()) .rankingParams(ResultRankingParameters.sensibleDefaults()) .queryStrategy(QueryStrategy.SENTENCE) .domains(List.of(2)) @@ -195,7 +190,7 @@ public class IndexQueryServiceIntegrationSmokeTest { indexJournalWriter.close(); constructIndex(); - searchIndex.switchIndex(); + statefulIndex.switchIndex(); var rsp = queryService.justQuery( SearchSpecification.builder() @@ -204,7 +199,6 @@ public class IndexQueryServiceIntegrationSmokeTest { .year(SpecificationLimit.equals(1998)) .size(SpecificationLimit.none()) .rank(SpecificationLimit.none()) - .domainCount(SpecificationLimit.none()) .queryStrategy(QueryStrategy.SENTENCE) .searchSetIdentifier("NONE") .rankingParams(ResultRankingParameters.sensibleDefaults()) diff --git a/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTest.java b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java similarity index 97% rename from code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTest.java rename to code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java index 845b643b..6def5bbc 100644 --- a/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTest.java +++ b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTest.java @@ -1,20 +1,18 @@ -package nu.marginalia.index.svc; +package nu.marginalia.index; import com.google.inject.Guice; import com.google.inject.Inject; import nu.marginalia.IndexLocations; +import nu.marginalia.api.searchquery.model.query.SearchSpecification; +import nu.marginalia.api.searchquery.model.query.SearchSubquery; +import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; +import nu.marginalia.index.index.StatefulIndex; import nu.marginalia.storage.FileStorageService; import nu.marginalia.hash.MurmurHash3_128; -import nu.marginalia.index.ReverseIndexFullFileNames; -import nu.marginalia.index.ReverseIndexPrioFileNames; -import nu.marginalia.index.client.model.query.SearchSpecification; -import nu.marginalia.index.client.model.query.SearchSubquery; -import nu.marginalia.index.client.model.results.ResultRankingParameters; import nu.marginalia.index.construction.DocIdRewriter; import nu.marginalia.index.construction.ReverseIndexConstructor; import nu.marginalia.index.forward.ForwardIndexConverter; import nu.marginalia.index.forward.ForwardIndexFileNames; -import nu.marginalia.index.index.SearchIndex; import nu.marginalia.index.journal.model.IndexJournalEntryData; import nu.marginalia.index.journal.model.IndexJournalEntryHeader; import nu.marginalia.index.journal.reader.IndexJournalReader; @@ -34,14 +32,13 @@ import nu.marginalia.model.idx.WordFlags; import nu.marginalia.model.idx.WordMetadata; import nu.marginalia.process.control.FakeProcessHeartbeat; import nu.marginalia.process.control.ProcessHeartbeat; -import nu.marginalia.ranking.DomainRankings; +import nu.marginalia.index.domainrankings.DomainRankings; import nu.marginalia.service.control.ServiceHeartbeat; import nu.marginalia.service.server.Initialization; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.parallel.Execution; -import spark.Spark; import javax.annotation.CheckReturnValue; import java.io.IOException; @@ -65,9 +62,9 @@ public class IndexQueryServiceIntegrationTest { IndexQueryServiceIntegrationTestModule testModule; @Inject - IndexQueryService queryService; + IndexGrpcService queryService; @Inject - SearchIndex searchIndex; + StatefulIndex statefulIndex; @Inject ServiceHeartbeat heartbeat; @@ -98,8 +95,6 @@ public class IndexQueryServiceIntegrationTest { @AfterEach public void tearDown() throws IOException { testModule.cleanUp(); - - Spark.stop(); } @Test @@ -427,7 +422,6 @@ public class IndexQueryServiceIntegrationTest { .quality(SpecificationLimit.none()) .size(SpecificationLimit.none()) .rank(SpecificationLimit.none()) - .domainCount(SpecificationLimit.none()) .rankingParams(ResultRankingParameters.sensibleDefaults()) .domains(new ArrayList<>()) .searchSetIdentifier("NONE") @@ -588,7 +582,7 @@ public class IndexQueryServiceIntegrationTest { indexJournalWriter.close(); constructIndex(); documentDbReader.reconnect(); - searchIndex.switchIndex(); + statefulIndex.switchIndex(); } } diff --git a/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTestModule.java b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTestModule.java similarity index 90% rename from code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTestModule.java rename to code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTestModule.java index a7f67bbe..d04b458c 100644 --- a/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTestModule.java +++ b/code/index/test/nu/marginalia/index/IndexQueryServiceIntegrationTestModule.java @@ -1,7 +1,10 @@ -package nu.marginalia.index.svc; +package nu.marginalia.index; import com.google.inject.AbstractModule; import nu.marginalia.IndexLocations; +import nu.marginalia.index.searchset.SearchSetAny; +import nu.marginalia.index.searchset.SearchSetsService; +import nu.marginalia.index.util.TestUtil; import nu.marginalia.storage.FileStorageService; import nu.marginalia.storage.model.FileStorageBase; import nu.marginalia.storage.model.FileStorageBaseType; @@ -10,9 +13,7 @@ import nu.marginalia.index.journal.writer.IndexJournalWriterPagingImpl; import nu.marginalia.linkdb.docs.DocumentDbReader; import nu.marginalia.process.control.FakeProcessHeartbeat; import nu.marginalia.process.control.ProcessHeartbeat; -import nu.marginalia.ranking.DomainRankings; -import nu.marginalia.index.svc.searchset.SearchSetAny; -import nu.marginalia.index.util.TestUtil; +import nu.marginalia.index.domainrankings.DomainRankings; import nu.marginalia.service.control.*; import nu.marginalia.service.id.ServiceId; import nu.marginalia.service.module.ServiceConfiguration; @@ -67,10 +68,10 @@ public class IndexQueryServiceIntegrationTestModule extends AbstractModule { bind(ServiceHeartbeat.class).toInstance(new FakeServiceHeartbeat()); bind(ProcessHeartbeat.class).toInstance(new FakeProcessHeartbeat()); - IndexSearchSetsService setsServiceMock = Mockito.mock(IndexSearchSetsService.class); + SearchSetsService setsServiceMock = Mockito.mock(SearchSetsService.class); when(setsServiceMock.getSearchSetByName("NONE")).thenReturn(new SearchSetAny()); when(setsServiceMock.getDomainRankings()).thenReturn(new DomainRankings()); - bind(IndexSearchSetsService.class).toInstance(setsServiceMock); + bind(SearchSetsService.class).toInstance(setsServiceMock); bind(ServiceEventLog.class).toInstance(Mockito.mock(ServiceEventLog.class)); @@ -82,7 +83,7 @@ public class IndexQueryServiceIntegrationTestModule extends AbstractModule { ServiceId.Index, 0, "127.0.0.1", - randomPort(), + "127.0.0.1", randomPort(), UUID.randomUUID() )); diff --git a/code/services-core/index-service/src/test/java/nu/marginalia/index/results/IndexResultDomainDeduplicatorTest.java b/code/index/test/nu/marginalia/index/results/IndexResultDomainDeduplicatorTest.java similarity index 83% rename from code/services-core/index-service/src/test/java/nu/marginalia/index/results/IndexResultDomainDeduplicatorTest.java rename to code/index/test/nu/marginalia/index/results/IndexResultDomainDeduplicatorTest.java index 891a5ff0..4f5a12cd 100644 --- a/code/services-core/index-service/src/test/java/nu/marginalia/index/results/IndexResultDomainDeduplicatorTest.java +++ b/code/index/test/nu/marginalia/index/results/IndexResultDomainDeduplicatorTest.java @@ -1,6 +1,6 @@ package nu.marginalia.index.results; -import nu.marginalia.index.client.model.results.SearchResultItem; +import nu.marginalia.api.searchquery.model.results.SearchResultItem; import nu.marginalia.model.id.UrlIdCodec; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Test; @@ -9,11 +9,6 @@ import static org.junit.jupiter.api.Assertions.*; class IndexResultDomainDeduplicatorTest { - @AfterEach - public void clear() { - IndexResultDomainDeduplicator.clearCachedObjects(); - } - @Test public void testDeduplicator() { diff --git a/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/searchset/RankingSearchSetTest.java b/code/index/test/nu/marginalia/index/searchset/RankingSearchSetTest.java similarity index 88% rename from code/services-core/index-service/src/test/java/nu/marginalia/index/svc/searchset/RankingSearchSetTest.java rename to code/index/test/nu/marginalia/index/searchset/RankingSearchSetTest.java index 577c1fb3..30f3ded4 100644 --- a/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/searchset/RankingSearchSetTest.java +++ b/code/index/test/nu/marginalia/index/searchset/RankingSearchSetTest.java @@ -1,7 +1,6 @@ -package nu.marginalia.index.svc.searchset; +package nu.marginalia.index.searchset; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; -import nu.marginalia.index.client.model.query.SearchSetIdentifier; import org.junit.jupiter.api.Test; import java.io.IOException; diff --git a/code/services-core/index-service/src/test/java/nu/marginalia/index/util/TestUtil.java b/code/index/test/nu/marginalia/index/util/TestUtil.java similarity index 100% rename from code/services-core/index-service/src/test/java/nu/marginalia/index/util/TestUtil.java rename to code/index/test/nu/marginalia/index/util/TestUtil.java diff --git a/code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/RankingAlgorithmWithRealDataTest.java b/code/index/test/nu/marginalia/ranking/domains/RankingAlgorithmWithRealDataTest.java similarity index 95% rename from code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/RankingAlgorithmWithRealDataTest.java rename to code/index/test/nu/marginalia/ranking/domains/RankingAlgorithmWithRealDataTest.java index 88a8b5e8..10195f92 100644 --- a/code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/RankingAlgorithmWithRealDataTest.java +++ b/code/index/test/nu/marginalia/ranking/domains/RankingAlgorithmWithRealDataTest.java @@ -1,6 +1,6 @@ -package nu.marginalia.ranking; +package nu.marginalia.ranking.domains; -import nu.marginalia.ranking.accumulator.RankingResultListAccumulator; +import nu.marginalia.ranking.domains.accumulator.RankingResultListAccumulator; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; diff --git a/code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/RankingAlgorithmsContainerTest.java b/code/index/test/nu/marginalia/ranking/domains/RankingAlgorithmsContainerTest.java similarity index 85% rename from code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/RankingAlgorithmsContainerTest.java rename to code/index/test/nu/marginalia/ranking/domains/RankingAlgorithmsContainerTest.java index 7fdd2f82..f748465a 100644 --- a/code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/RankingAlgorithmsContainerTest.java +++ b/code/index/test/nu/marginalia/ranking/domains/RankingAlgorithmsContainerTest.java @@ -1,12 +1,12 @@ -package nu.marginalia.ranking; +package nu.marginalia.ranking.domains; import com.zaxxer.hikari.HikariConfig; import com.zaxxer.hikari.HikariDataSource; -import nu.marginalia.query.client.QueryClient; -import nu.marginalia.ranking.data.InvertedLinkGraphSource; -import nu.marginalia.ranking.data.LinkGraphSource; -import nu.marginalia.ranking.data.SimilarityGraphSource; +import nu.marginalia.api.linkgraph.AggregateLinkGraphClient; +import nu.marginalia.ranking.domains.data.InvertedLinkGraphSource; +import nu.marginalia.ranking.domains.data.LinkGraphSource; +import nu.marginalia.ranking.domains.data.SimilarityGraphSource; import nu.marginalia.test.TestMigrationLoader; import org.jgrapht.Graph; import org.jgrapht.graph.DefaultWeightedEdge; @@ -20,7 +20,6 @@ import org.testcontainers.junit.jupiter.Testcontainers; import java.sql.SQLException; import java.util.List; -import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; import static org.mockito.Mockito.when; @@ -37,8 +36,9 @@ public class RankingAlgorithmsContainerTest { static HikariDataSource dataSource; - QueryClient queryClient; - QueryClient.AllLinks allLinks; + AggregateLinkGraphClient domainLinksClient; + AggregateLinkGraphClient.AllLinks allLinks; + @BeforeAll public static void setup() { HikariConfig config = new HikariConfig(); @@ -66,9 +66,9 @@ public class RankingAlgorithmsContainerTest { @BeforeEach public void setupQueryClient() { - queryClient = Mockito.mock(QueryClient.class); - allLinks = new QueryClient.AllLinks(); - when(queryClient.getAllDomainLinks()).thenReturn(allLinks); + domainLinksClient = Mockito.mock(AggregateLinkGraphClient.class); + allLinks = new AggregateLinkGraphClient.AllLinks(); + when(domainLinksClient.getAllDomainLinks()).thenReturn(allLinks); try (var conn = dataSource.getConnection(); var stmt = conn.createStatement()) { @@ -97,7 +97,7 @@ public class RankingAlgorithmsContainerTest { @Test public void testGetDomains() { // should all be the same, doesn't matter which one we use - var source = new LinkGraphSource(dataSource, queryClient); + var source = new LinkGraphSource(dataSource, domainLinksClient); Assertions.assertEquals(List.of(1), source.domainIds(List.of("memex.marginalia.nu"))); @@ -111,7 +111,7 @@ public class RankingAlgorithmsContainerTest { public void testLinkGraphSource() { allLinks.add(1, 3); - var graph = new LinkGraphSource(dataSource, queryClient).getGraph(); + var graph = new LinkGraphSource(dataSource, domainLinksClient).getGraph(); Assertions.assertTrue(graph.containsVertex(1)); Assertions.assertTrue(graph.containsVertex(2)); @@ -127,7 +127,7 @@ public class RankingAlgorithmsContainerTest { public void testInvertedLinkGraphSource() { allLinks.add(1, 3); - var graph = new InvertedLinkGraphSource(dataSource, queryClient).getGraph(); + var graph = new InvertedLinkGraphSource(dataSource, domainLinksClient).getGraph(); Assertions.assertTrue(graph.containsVertex(1)); Assertions.assertTrue(graph.containsVertex(2)); diff --git a/code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/TestGraphSourceForInvertedLinkData.java b/code/index/test/nu/marginalia/ranking/domains/TestGraphSourceForInvertedLinkData.java similarity index 97% rename from code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/TestGraphSourceForInvertedLinkData.java rename to code/index/test/nu/marginalia/ranking/domains/TestGraphSourceForInvertedLinkData.java index e07cd176..a9a51fe4 100644 --- a/code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/TestGraphSourceForInvertedLinkData.java +++ b/code/index/test/nu/marginalia/ranking/domains/TestGraphSourceForInvertedLinkData.java @@ -1,8 +1,8 @@ -package nu.marginalia.ranking; +package nu.marginalia.ranking.domains; import lombok.SneakyThrows; import nu.marginalia.array.LongArrayFactory; -import nu.marginalia.ranking.data.GraphSource; +import nu.marginalia.ranking.domains.data.GraphSource; import org.apache.commons.lang3.StringUtils; import org.jgrapht.Graph; import org.jgrapht.graph.DefaultDirectedGraph; diff --git a/code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/TestGraphSourceForLinkData.java b/code/index/test/nu/marginalia/ranking/domains/TestGraphSourceForLinkData.java similarity index 97% rename from code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/TestGraphSourceForLinkData.java rename to code/index/test/nu/marginalia/ranking/domains/TestGraphSourceForLinkData.java index e009f628..03fcdb14 100644 --- a/code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/TestGraphSourceForLinkData.java +++ b/code/index/test/nu/marginalia/ranking/domains/TestGraphSourceForLinkData.java @@ -1,8 +1,8 @@ -package nu.marginalia.ranking; +package nu.marginalia.ranking.domains; import lombok.SneakyThrows; import nu.marginalia.array.LongArrayFactory; -import nu.marginalia.ranking.data.GraphSource; +import nu.marginalia.ranking.domains.data.GraphSource; import org.apache.commons.lang3.StringUtils; import org.jgrapht.Graph; import org.jgrapht.graph.DefaultDirectedGraph; diff --git a/code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/TestGraphSourceForSimilarityData.java b/code/index/test/nu/marginalia/ranking/domains/TestGraphSourceForSimilarityData.java similarity index 92% rename from code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/TestGraphSourceForSimilarityData.java rename to code/index/test/nu/marginalia/ranking/domains/TestGraphSourceForSimilarityData.java index 47fd6e40..4aa48fbc 100644 --- a/code/features-index/domain-ranking/src/test/java/nu/marginalia/ranking/TestGraphSourceForSimilarityData.java +++ b/code/index/test/nu/marginalia/ranking/domains/TestGraphSourceForSimilarityData.java @@ -1,12 +1,9 @@ -package nu.marginalia.ranking; +package nu.marginalia.ranking.domains; import lombok.SneakyThrows; -import nu.marginalia.array.LongArrayFactory; -import nu.marginalia.ranking.data.GraphSource; +import nu.marginalia.ranking.domains.data.GraphSource; import org.apache.commons.lang3.StringUtils; import org.jgrapht.Graph; -import org.jgrapht.graph.DefaultDirectedGraph; -import org.jgrapht.graph.DefaultEdge; import org.jgrapht.graph.DefaultUndirectedWeightedGraph; import org.jgrapht.graph.DefaultWeightedEdge; diff --git a/code/features-index/result-ranking/src/test/java/nu/marginalia/ranking/ResultValuatorTest.java b/code/index/test/nu/marginalia/ranking/results/ResultValuatorTest.java similarity index 87% rename from code/features-index/result-ranking/src/test/java/nu/marginalia/ranking/ResultValuatorTest.java rename to code/index/test/nu/marginalia/ranking/results/ResultValuatorTest.java index a4100e79..8f8f7eaa 100644 --- a/code/features-index/result-ranking/src/test/java/nu/marginalia/ranking/ResultValuatorTest.java +++ b/code/index/test/nu/marginalia/ranking/results/ResultValuatorTest.java @@ -1,14 +1,14 @@ -package nu.marginalia.ranking; +package nu.marginalia.ranking.results; -import nu.marginalia.index.client.model.results.ResultRankingContext; -import nu.marginalia.index.client.model.results.ResultRankingParameters; -import nu.marginalia.index.client.model.results.SearchResultKeywordScore; +import nu.marginalia.api.searchquery.model.results.ResultRankingContext; +import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; +import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore; import nu.marginalia.model.idx.DocumentFlags; import nu.marginalia.model.idx.WordFlags; import nu.marginalia.model.crawl.PubDate; import nu.marginalia.model.idx.DocumentMetadata; import nu.marginalia.model.idx.WordMetadata; -import nu.marginalia.ranking.factors.*; +import nu.marginalia.ranking.results.factors.*; import nu.marginalia.term_frequency_dict.TermFrequencyDict; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -31,8 +31,7 @@ class ResultValuatorTest { valuator = new ResultValuator( new Bm25Factor(), - new TermCoherenceFactor(), - new PriorityTermBonus() + new TermCoherenceFactor() ); } @@ -40,20 +39,20 @@ class ResultValuatorTest { new SearchResultKeywordScore(0, "bob", wordMetadata(Set.of(1), EnumSet.of(WordFlags.Title)), docMetadata(0, 2010, 5, EnumSet.noneOf(DocumentFlags.class)), - 0, false) + 0) ); List highCountNoTitleSet = List.of( new SearchResultKeywordScore(0, "bob", wordMetadata(Set.of(1,3,4,6,7,9,10,11,12,14,15,16), EnumSet.of(WordFlags.TfIdfHigh)), docMetadata(0, 2010, 5, EnumSet.noneOf(DocumentFlags.class)), - 0, false) + 0) ); List highCountSubjectSet = List.of( new SearchResultKeywordScore(0, "bob", wordMetadata(Set.of(1,3,4,6,7,9,10,11,12,14,15,16), EnumSet.of(WordFlags.TfIdfHigh, WordFlags.Subjects)), docMetadata(0, 2010, 5, EnumSet.noneOf(DocumentFlags.class)), - 0, false) + 0) ); diff --git a/code/features-index/result-ranking/src/test/java/nu/marginalia/ranking/factors/TermCoherenceFactorTest.java b/code/index/test/nu/marginalia/ranking/results/factors/TermCoherenceFactorTest.java similarity index 93% rename from code/features-index/result-ranking/src/test/java/nu/marginalia/ranking/factors/TermCoherenceFactorTest.java rename to code/index/test/nu/marginalia/ranking/results/factors/TermCoherenceFactorTest.java index b4f455f4..a5bca54e 100644 --- a/code/features-index/result-ranking/src/test/java/nu/marginalia/ranking/factors/TermCoherenceFactorTest.java +++ b/code/index/test/nu/marginalia/ranking/results/factors/TermCoherenceFactorTest.java @@ -1,9 +1,9 @@ -package nu.marginalia.ranking.factors; +package nu.marginalia.ranking.results.factors; +import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore; import nu.marginalia.bbpc.BrailleBlockPunchCards; -import nu.marginalia.index.client.model.results.SearchResultKeywordScore; import nu.marginalia.model.idx.WordMetadata; -import nu.marginalia.ranking.ResultKeywordSet; +import nu.marginalia.ranking.results.ResultKeywordSet; import org.junit.jupiter.api.Test; import java.util.ArrayList; @@ -89,7 +89,7 @@ class TermCoherenceFactorTest { for (int i = 0; i < positionMasks.length; i++) { keywords.add(new SearchResultKeywordScore(0, "", - new WordMetadata(positionMasks[i], (byte) 0).encode(), 0, 0, false)); + new WordMetadata(positionMasks[i], (byte) 0).encode(), 0, 0)); } return new ResultKeywordSet(keywords); diff --git a/code/libraries/array/build.gradle b/code/libraries/array/build.gradle index 306ab2f9..88e27107 100644 --- a/code/libraries/array/build.gradle +++ b/code/libraries/array/build.gradle @@ -9,6 +9,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation libs.bundles.slf4j diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/ArrayRangeReference.java b/code/libraries/array/java/nu/marginalia/array/ArrayRangeReference.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/ArrayRangeReference.java rename to code/libraries/array/java/nu/marginalia/array/ArrayRangeReference.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/IntArray.java b/code/libraries/array/java/nu/marginalia/array/IntArray.java similarity index 97% rename from code/libraries/array/src/main/java/nu/marginalia/array/IntArray.java rename to code/libraries/array/java/nu/marginalia/array/IntArray.java index 7f16bb63..355dc2dc 100644 --- a/code/libraries/array/src/main/java/nu/marginalia/array/IntArray.java +++ b/code/libraries/array/java/nu/marginalia/array/IntArray.java @@ -7,7 +7,6 @@ import nu.marginalia.array.algo.IntArrayTransformations; import nu.marginalia.array.delegate.ShiftedIntArray; import nu.marginalia.array.page.SegmentIntArray; -import java.io.IOException; import java.lang.foreign.Arena; public interface IntArray extends IntArrayBase, IntArrayTransformations, IntArraySearch, IntArraySort { diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/LongArray.java b/code/libraries/array/java/nu/marginalia/array/LongArray.java similarity index 95% rename from code/libraries/array/src/main/java/nu/marginalia/array/LongArray.java rename to code/libraries/array/java/nu/marginalia/array/LongArray.java index c58014d0..98059f9f 100644 --- a/code/libraries/array/src/main/java/nu/marginalia/array/LongArray.java +++ b/code/libraries/array/java/nu/marginalia/array/LongArray.java @@ -5,7 +5,6 @@ import nu.marginalia.array.algo.LongArraySearch; import nu.marginalia.array.algo.LongArraySort; import nu.marginalia.array.algo.LongArrayTransformations; import nu.marginalia.array.delegate.ShiftedLongArray; -import nu.marginalia.array.page.SegmentLongArray; import nu.marginalia.array.page.UnsafeLongArray; import java.lang.foreign.Arena; diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/LongArrayFactory.java b/code/libraries/array/java/nu/marginalia/array/LongArrayFactory.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/LongArrayFactory.java rename to code/libraries/array/java/nu/marginalia/array/LongArrayFactory.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/algo/BulkTransferArray.java b/code/libraries/array/java/nu/marginalia/array/algo/BulkTransferArray.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/algo/BulkTransferArray.java rename to code/libraries/array/java/nu/marginalia/array/algo/BulkTransferArray.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/algo/IntArrayBase.java b/code/libraries/array/java/nu/marginalia/array/algo/IntArrayBase.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/algo/IntArrayBase.java rename to code/libraries/array/java/nu/marginalia/array/algo/IntArrayBase.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/algo/IntArraySearch.java b/code/libraries/array/java/nu/marginalia/array/algo/IntArraySearch.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/algo/IntArraySearch.java rename to code/libraries/array/java/nu/marginalia/array/algo/IntArraySearch.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/algo/IntArraySort.java b/code/libraries/array/java/nu/marginalia/array/algo/IntArraySort.java similarity index 98% rename from code/libraries/array/src/main/java/nu/marginalia/array/algo/IntArraySort.java rename to code/libraries/array/java/nu/marginalia/array/algo/IntArraySort.java index 89905ea6..5eec50ef 100644 --- a/code/libraries/array/src/main/java/nu/marginalia/array/algo/IntArraySort.java +++ b/code/libraries/array/java/nu/marginalia/array/algo/IntArraySort.java @@ -1,7 +1,6 @@ package nu.marginalia.array.algo; import java.io.IOException; -import java.nio.IntBuffer; import java.nio.channels.FileChannel; import java.nio.file.Files; import java.nio.file.Path; diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/algo/IntArrayTransformations.java b/code/libraries/array/java/nu/marginalia/array/algo/IntArrayTransformations.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/algo/IntArrayTransformations.java rename to code/libraries/array/java/nu/marginalia/array/algo/IntArrayTransformations.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/algo/LongArrayBase.java b/code/libraries/array/java/nu/marginalia/array/algo/LongArrayBase.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/algo/LongArrayBase.java rename to code/libraries/array/java/nu/marginalia/array/algo/LongArrayBase.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/algo/LongArraySearch.java b/code/libraries/array/java/nu/marginalia/array/algo/LongArraySearch.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/algo/LongArraySearch.java rename to code/libraries/array/java/nu/marginalia/array/algo/LongArraySearch.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/algo/LongArraySort.java b/code/libraries/array/java/nu/marginalia/array/algo/LongArraySort.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/algo/LongArraySort.java rename to code/libraries/array/java/nu/marginalia/array/algo/LongArraySort.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/algo/LongArrayTransformations.java b/code/libraries/array/java/nu/marginalia/array/algo/LongArrayTransformations.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/algo/LongArrayTransformations.java rename to code/libraries/array/java/nu/marginalia/array/algo/LongArrayTransformations.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/algo/SortAlgoInsertionSort.java b/code/libraries/array/java/nu/marginalia/array/algo/SortAlgoInsertionSort.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/algo/SortAlgoInsertionSort.java rename to code/libraries/array/java/nu/marginalia/array/algo/SortAlgoInsertionSort.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/algo/SortAlgoMergeSort.java b/code/libraries/array/java/nu/marginalia/array/algo/SortAlgoMergeSort.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/algo/SortAlgoMergeSort.java rename to code/libraries/array/java/nu/marginalia/array/algo/SortAlgoMergeSort.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/algo/SortAlgoQuickSort.java b/code/libraries/array/java/nu/marginalia/array/algo/SortAlgoQuickSort.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/algo/SortAlgoQuickSort.java rename to code/libraries/array/java/nu/marginalia/array/algo/SortAlgoQuickSort.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/algo/SortingContext.java b/code/libraries/array/java/nu/marginalia/array/algo/SortingContext.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/algo/SortingContext.java rename to code/libraries/array/java/nu/marginalia/array/algo/SortingContext.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/algo/TwoArrayOperations.java b/code/libraries/array/java/nu/marginalia/array/algo/TwoArrayOperations.java similarity index 99% rename from code/libraries/array/src/main/java/nu/marginalia/array/algo/TwoArrayOperations.java rename to code/libraries/array/java/nu/marginalia/array/algo/TwoArrayOperations.java index 5eafa361..c4af9090 100644 --- a/code/libraries/array/src/main/java/nu/marginalia/array/algo/TwoArrayOperations.java +++ b/code/libraries/array/java/nu/marginalia/array/algo/TwoArrayOperations.java @@ -2,8 +2,6 @@ package nu.marginalia.array.algo; import nu.marginalia.array.LongArray; -import java.util.function.LongBinaryOperator; - /** Functions for operating on pairs of arrays. */ diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/buffer/IntQueryBuffer.java b/code/libraries/array/java/nu/marginalia/array/buffer/IntQueryBuffer.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/buffer/IntQueryBuffer.java rename to code/libraries/array/java/nu/marginalia/array/buffer/IntQueryBuffer.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/buffer/LongQueryBuffer.java b/code/libraries/array/java/nu/marginalia/array/buffer/LongQueryBuffer.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/buffer/LongQueryBuffer.java rename to code/libraries/array/java/nu/marginalia/array/buffer/LongQueryBuffer.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/delegate/ReferenceImplIntArrayDelegate.java b/code/libraries/array/java/nu/marginalia/array/delegate/ReferenceImplIntArrayDelegate.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/delegate/ReferenceImplIntArrayDelegate.java rename to code/libraries/array/java/nu/marginalia/array/delegate/ReferenceImplIntArrayDelegate.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/delegate/ReferenceImplLongArrayDelegate.java b/code/libraries/array/java/nu/marginalia/array/delegate/ReferenceImplLongArrayDelegate.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/delegate/ReferenceImplLongArrayDelegate.java rename to code/libraries/array/java/nu/marginalia/array/delegate/ReferenceImplLongArrayDelegate.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/delegate/ShiftedIntArray.java b/code/libraries/array/java/nu/marginalia/array/delegate/ShiftedIntArray.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/delegate/ShiftedIntArray.java rename to code/libraries/array/java/nu/marginalia/array/delegate/ShiftedIntArray.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/delegate/ShiftedLongArray.java b/code/libraries/array/java/nu/marginalia/array/delegate/ShiftedLongArray.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/delegate/ShiftedLongArray.java rename to code/libraries/array/java/nu/marginalia/array/delegate/ShiftedLongArray.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/functional/AddressRangeCall.java b/code/libraries/array/java/nu/marginalia/array/functional/AddressRangeCall.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/functional/AddressRangeCall.java rename to code/libraries/array/java/nu/marginalia/array/functional/AddressRangeCall.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/functional/AddressRangeCallIO.java b/code/libraries/array/java/nu/marginalia/array/functional/AddressRangeCallIO.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/functional/AddressRangeCallIO.java rename to code/libraries/array/java/nu/marginalia/array/functional/AddressRangeCallIO.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/functional/AddressRangeIntFunction.java b/code/libraries/array/java/nu/marginalia/array/functional/AddressRangeIntFunction.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/functional/AddressRangeIntFunction.java rename to code/libraries/array/java/nu/marginalia/array/functional/AddressRangeIntFunction.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/functional/AddressRangeLongFunction.java b/code/libraries/array/java/nu/marginalia/array/functional/AddressRangeLongFunction.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/functional/AddressRangeLongFunction.java rename to code/libraries/array/java/nu/marginalia/array/functional/AddressRangeLongFunction.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/functional/IntBinaryIOOperation.java b/code/libraries/array/java/nu/marginalia/array/functional/IntBinaryIOOperation.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/functional/IntBinaryIOOperation.java rename to code/libraries/array/java/nu/marginalia/array/functional/IntBinaryIOOperation.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/functional/IntBinaryOperation.java b/code/libraries/array/java/nu/marginalia/array/functional/IntBinaryOperation.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/functional/IntBinaryOperation.java rename to code/libraries/array/java/nu/marginalia/array/functional/IntBinaryOperation.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/functional/IntIOTransformer.java b/code/libraries/array/java/nu/marginalia/array/functional/IntIOTransformer.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/functional/IntIOTransformer.java rename to code/libraries/array/java/nu/marginalia/array/functional/IntIOTransformer.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/functional/IntTransformer.java b/code/libraries/array/java/nu/marginalia/array/functional/IntTransformer.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/functional/IntTransformer.java rename to code/libraries/array/java/nu/marginalia/array/functional/IntTransformer.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/functional/LongBinaryIOOperation.java b/code/libraries/array/java/nu/marginalia/array/functional/LongBinaryIOOperation.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/functional/LongBinaryIOOperation.java rename to code/libraries/array/java/nu/marginalia/array/functional/LongBinaryIOOperation.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/functional/LongBinaryOperation.java b/code/libraries/array/java/nu/marginalia/array/functional/LongBinaryOperation.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/functional/LongBinaryOperation.java rename to code/libraries/array/java/nu/marginalia/array/functional/LongBinaryOperation.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/functional/LongIOTransformer.java b/code/libraries/array/java/nu/marginalia/array/functional/LongIOTransformer.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/functional/LongIOTransformer.java rename to code/libraries/array/java/nu/marginalia/array/functional/LongIOTransformer.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/functional/LongIntConsumer.java b/code/libraries/array/java/nu/marginalia/array/functional/LongIntConsumer.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/functional/LongIntConsumer.java rename to code/libraries/array/java/nu/marginalia/array/functional/LongIntConsumer.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/functional/LongLongConsumer.java b/code/libraries/array/java/nu/marginalia/array/functional/LongLongConsumer.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/functional/LongLongConsumer.java rename to code/libraries/array/java/nu/marginalia/array/functional/LongLongConsumer.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/functional/LongTransformer.java b/code/libraries/array/java/nu/marginalia/array/functional/LongTransformer.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/functional/LongTransformer.java rename to code/libraries/array/java/nu/marginalia/array/functional/LongTransformer.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/page/AbstractPagingArray.java b/code/libraries/array/java/nu/marginalia/array/page/AbstractPagingArray.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/page/AbstractPagingArray.java rename to code/libraries/array/java/nu/marginalia/array/page/AbstractPagingArray.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/page/PartitionPage.java b/code/libraries/array/java/nu/marginalia/array/page/PartitionPage.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/page/PartitionPage.java rename to code/libraries/array/java/nu/marginalia/array/page/PartitionPage.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/page/SegmentIntArray.java b/code/libraries/array/java/nu/marginalia/array/page/SegmentIntArray.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/page/SegmentIntArray.java rename to code/libraries/array/java/nu/marginalia/array/page/SegmentIntArray.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/page/SegmentLongArray.java b/code/libraries/array/java/nu/marginalia/array/page/SegmentLongArray.java similarity index 95% rename from code/libraries/array/src/main/java/nu/marginalia/array/page/SegmentLongArray.java rename to code/libraries/array/java/nu/marginalia/array/page/SegmentLongArray.java index 21044b68..5e3b8a59 100644 --- a/code/libraries/array/src/main/java/nu/marginalia/array/page/SegmentLongArray.java +++ b/code/libraries/array/java/nu/marginalia/array/page/SegmentLongArray.java @@ -154,7 +154,11 @@ public class SegmentLongArray implements PartitionPage, LongArray { @Override public void transferFrom(FileChannel source, long sourceStart, long arrayStart, long arrayEnd) throws IOException { - final int stride = 1024*1204*128; // Copy 1 GB at a time 'cause byte buffers are 'a byte buffering + final int stride = 1024*1024*128; // Copy 1 GB at a time 'cause byte buffers are 'a byte buffering + + if (source.size() / 8 < sourceStart + (arrayEnd - arrayStart)) { + throw new IndexOutOfBoundsException(STR."Source channel too small: \{source.size()} < \{sourceStart + (arrayEnd - arrayStart)}"); + } long ss = sourceStart; for (long as = arrayStart; as < arrayEnd; as += stride, ss += stride) { diff --git a/code/libraries/array/java/nu/marginalia/array/page/UnsafeLongArray.java b/code/libraries/array/java/nu/marginalia/array/page/UnsafeLongArray.java new file mode 100644 index 00000000..4ef0da02 --- /dev/null +++ b/code/libraries/array/java/nu/marginalia/array/page/UnsafeLongArray.java @@ -0,0 +1,276 @@ +package nu.marginalia.array.page; + +import nu.marginalia.array.ArrayRangeReference; +import nu.marginalia.array.LongArray; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import sun.misc.Unsafe; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.io.IOException; +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.nio.ByteBuffer; +import java.nio.LongBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; + +import static java.lang.foreign.ValueLayout.JAVA_LONG; + +/** Variant of SegmentLongArray that uses Unsafe to access the memory. + * */ +public class UnsafeLongArray implements PartitionPage, LongArray { + + private static final Unsafe unsafe = UnsafeProvider.getUnsafe(); + private static final Logger logger = LoggerFactory.getLogger(UnsafeLongArray.class); + + @Nullable + private final Arena arena; + @Nullable + private final FileChannel channel; + + private final MemorySegment segment; + private boolean closed; + + UnsafeLongArray(MemorySegment segment, + @Nullable Arena arena) { + this.segment = segment; + this.arena = arena; + this.channel = null; + } + + UnsafeLongArray(MemorySegment segment, + @Nonnull FileChannel channel, + @Nullable Arena arena) { + this.segment = segment; + this.arena = arena; + this.channel = channel; + } + + public static UnsafeLongArray onHeap(Arena arena, long size) { + return new UnsafeLongArray(arena.allocate(WORD_SIZE*size, 8), arena); + } + + public static UnsafeLongArray fromMmapReadOnly(Arena arena, Path file, long offset, long size) throws IOException { + try (var channel = (FileChannel) Files.newByteChannel(file, StandardOpenOption.READ)) { + return new UnsafeLongArray(channel.map(FileChannel.MapMode.READ_ONLY, + JAVA_LONG.byteSize() * offset, JAVA_LONG.byteSize() * size, + arena), arena); + } + catch (IOException ex) { + throw new IOException("Failed to map file " + file + " (" + offset + ":" + size + ")", ex); + } + } + + public static UnsafeLongArray fromMmapReadWrite(Arena arena, Path file, long offset, long size) throws IOException { + var channel = (FileChannel) Files.newByteChannel(file, + StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + var segment = channel.map(FileChannel.MapMode.READ_WRITE, + JAVA_LONG.byteSize() * offset, JAVA_LONG.byteSize() * size, + arena); + + return new UnsafeLongArray(segment, channel, arena); + } + + @Override + public LongArray range(long start, long end) { + return new UnsafeLongArray( + segment.asSlice( + start * JAVA_LONG.byteSize(), + (end-start) * JAVA_LONG.byteSize()), + null); + } + + @Override + public LongArray shifted(long start) { + return new UnsafeLongArray( + segment.asSlice(start * JAVA_LONG.byteSize()), + null); + } + + @Override + public long get(long at) { + try { + return unsafe.getLong(segment.address() + at * JAVA_LONG.byteSize()); + } + catch (IndexOutOfBoundsException ex) { + throw new IndexOutOfBoundsException("@" + at + "(" + 0 + ":" + segment.byteSize()/8 + ")"); + } + } + + @Override + public void get(long start, long end, long[] buffer) { + for (int i = 0; i < end - start; i++) { + buffer[i] = unsafe.getLong(segment.address() + (start + i) * JAVA_LONG.byteSize()); + } + } + + @Override + public void set(long at, long val) { + unsafe.putLong(segment.address() + at * JAVA_LONG.byteSize(), val); + } + + @Override + public void set(long start, long end, LongBuffer buffer, int bufferStart) { + for (int i = 0; i < end - start; i++) { + unsafe.putLong(segment.address() + (start + i) * JAVA_LONG.byteSize(), buffer.get(bufferStart + i)); + } + } + + @Override + public synchronized void close() { + if (arena != null && !closed) { + arena.close(); + } + if (channel != null && !closed) { + try { + channel.close(); + } + catch (IOException ex) { + throw new RuntimeException("Failed to close channel", ex); + } + } + + closed = true; + } + + @Override + public long size() { + return segment.byteSize() / JAVA_LONG.byteSize(); + } + + @Override + public ByteBuffer getByteBuffer() { + return segment.asByteBuffer(); + } + + @Override + public void write(Path filename) throws IOException { + try (var arena = Arena.ofConfined()) { + var destSegment = UnsafeLongArray.fromMmapReadWrite(arena, filename, 0, segment.byteSize() / JAVA_LONG.byteSize()); + + destSegment.segment.copyFrom(segment); + destSegment.force(); + } + } + + @Override + public void force() { + if (segment.isMapped()) { + segment.force(); + try { + if (channel != null) { + channel.force(false); + } + } catch (IOException e) { + throw new RuntimeException("Failed to force channel", e); + } + } + } + + public ArrayRangeReference directRangeIfPossible(long start, long end) { + return new ArrayRangeReference<>(this, start, end); + } + + public void chanelChannelTransfer(FileChannel source, + long sourceStartL, + long arrayStartL, + long arrayEndL) throws IOException { + + assert channel != null; + + final int B_per_L = (int) JAVA_LONG.byteSize(); + + final int strideB = 128*1024*1024; // Copy in 128 MB chunks + + final long destStartB = arrayStartL * B_per_L; + final long destEndB = arrayEndL * B_per_L; + final long lengthB = destEndB - destStartB; + + final long sourceStartB = sourceStartL * B_per_L; + final long sourceEndB = sourceStartB + lengthB; + + + if (sourceStartB > sourceEndB) + throw new IndexOutOfBoundsException("Source start after end"); + if (sourceStartB > source.size()) + throw new IndexOutOfBoundsException("Source channel too small, start " + sourceStartB + " < input size " + source.size()); + if (sourceEndB > source.size()) + throw new IndexOutOfBoundsException("Source channel too small, end " + sourceEndB + " < input size " + source.size()); + + long destIndexB = destStartB; + + source.position(sourceStartB); + + while (destIndexB < destEndB) + { + long stepSizeB = Math.min(destIndexB + strideB, destEndB); + long copyLengthB = (stepSizeB - destIndexB); + + long transferred = channel.transferFrom(source, destIndexB, copyLengthB); + if (transferred != copyLengthB) { + logger.warn("Less than {} bytes were copied: {}", copyLengthB, transferred); + } + + destIndexB += copyLengthB; + } + } + + @Override + public void transferFrom(FileChannel source, + long sourceStartL, + long arrayStartL, + long arrayEndL) throws IOException { + + + if (channel != null) { + chanelChannelTransfer(source, sourceStartL, arrayStartL, arrayEndL); + return; + } + + final int B_per_L = (int) JAVA_LONG.byteSize(); + + final int strideB = 1024*1024*1024; // Copy 1 GB at a time + + final long arrayStartB = arrayStartL * B_per_L; + final long arrayEndB = arrayEndL * B_per_L; + final long arrayLengthB = arrayEndB - arrayStartB; + + final long sourceStartB = sourceStartL * B_per_L; + final long sourceEndB = sourceStartB + arrayLengthB; + + + if (sourceStartB > sourceEndB) + throw new IndexOutOfBoundsException("Source start after end"); + if (sourceStartB > source.size()) + throw new IndexOutOfBoundsException("Source channel too small, start " + sourceStartB + " < input size " + source.size()); + if (sourceEndB > source.size()) + throw new IndexOutOfBoundsException("Source channel too small, end " + sourceEndB + " < input size " + source.size()); + + long channelIndexB = sourceStartB; + long segmentIndexB = arrayStartB; + + while (segmentIndexB < arrayEndB) + { + long segmentEndB = Math.min(segmentIndexB + strideB, arrayEndB); + long lengthB = (segmentEndB - segmentIndexB); + + var bufferSlice = segment.asSlice(segmentIndexB, lengthB).asByteBuffer(); + + while (bufferSlice.position() < bufferSlice.capacity()) { + if (source.position() + bufferSlice.capacity() > sourceEndB) + throw new IndexOutOfBoundsException("Source channel too small"); + + if (source.read(bufferSlice, channelIndexB + bufferSlice.position()) < 0) + throw new IOException("Failed to read from source"); + } + + channelIndexB += lengthB; + segmentIndexB += lengthB; + } + } + +} diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/page/UnsafeProvider.java b/code/libraries/array/java/nu/marginalia/array/page/UnsafeProvider.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/page/UnsafeProvider.java rename to code/libraries/array/java/nu/marginalia/array/page/UnsafeProvider.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/scheme/ArrayPartitioningScheme.java b/code/libraries/array/java/nu/marginalia/array/scheme/ArrayPartitioningScheme.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/scheme/ArrayPartitioningScheme.java rename to code/libraries/array/java/nu/marginalia/array/scheme/ArrayPartitioningScheme.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/scheme/PowerOf2PartitioningScheme.java b/code/libraries/array/java/nu/marginalia/array/scheme/PowerOf2PartitioningScheme.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/scheme/PowerOf2PartitioningScheme.java rename to code/libraries/array/java/nu/marginalia/array/scheme/PowerOf2PartitioningScheme.java diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/scheme/SequentialPartitioningScheme.java b/code/libraries/array/java/nu/marginalia/array/scheme/SequentialPartitioningScheme.java similarity index 100% rename from code/libraries/array/src/main/java/nu/marginalia/array/scheme/SequentialPartitioningScheme.java rename to code/libraries/array/java/nu/marginalia/array/scheme/SequentialPartitioningScheme.java diff --git a/code/libraries/array/readme.md b/code/libraries/array/readme.md index 42417b42..7e44b3c6 100644 --- a/code/libraries/array/readme.md +++ b/code/libraries/array/readme.md @@ -32,8 +32,8 @@ try (var array = LongArrayFactory.mmapForWritingConfined(Path.of("/tmp/test"), 1 ## Query Buffers -The classes [IntQueryBuffer](src/main/java/nu/marginalia/array/buffer/IntQueryBuffer.java) -and [LongQueryBuffer](src/main/java/nu/marginalia/array/buffer/LongQueryBuffer.java) are used +The classes [IntQueryBuffer](java/nu/marginalia/array/buffer/IntQueryBuffer.java) +and [LongQueryBuffer](java/nu/marginalia/array/buffer/LongQueryBuffer.java) are used heavily in the search engine's query processing. They are dual-pointer buffers that offer tools for filtering data. @@ -75,7 +75,7 @@ buffer.finalizeFiltering(); Especially noteworthy are the operations `retain()` and `reject()` in -[IntArraySearch](src/main/java/nu/marginalia/array/algo/IntArraySearch.java) and [LongArraySearch](src/main/java/nu/marginalia/array/algo/LongArraySearch.java). +[IntArraySearch](java/nu/marginalia/array/algo/IntArraySearch.java) and [LongArraySearch](java/nu/marginalia/array/algo/LongArraySearch.java). They keep or remove all items in the buffer that exist in the referenced range of the array, which must be sorted. diff --git a/code/libraries/array/src/jmh/java/nu/marginalia/array/page/FoldBenchmark.java b/code/libraries/array/src/jmh/java/nu/marginalia/array/page/FoldBenchmark.java index 31eb1359..50f6e925 100644 --- a/code/libraries/array/src/jmh/java/nu/marginalia/array/page/FoldBenchmark.java +++ b/code/libraries/array/src/jmh/java/nu/marginalia/array/page/FoldBenchmark.java @@ -2,7 +2,6 @@ package nu.marginalia.array.page; import nu.marginalia.array.LongArray; import nu.marginalia.array.SimulatedNaiveArray; -import nu.marginalia.array.scheme.ArrayPartitioningScheme; import org.openjdk.jmh.annotations.*; import java.lang.foreign.Arena; diff --git a/code/libraries/array/src/jmh/java/nu/marginalia/array/page/QuicksortBenchmark.java b/code/libraries/array/src/jmh/java/nu/marginalia/array/page/QuicksortBenchmark.java index 84905577..e90777d8 100644 --- a/code/libraries/array/src/jmh/java/nu/marginalia/array/page/QuicksortBenchmark.java +++ b/code/libraries/array/src/jmh/java/nu/marginalia/array/page/QuicksortBenchmark.java @@ -2,7 +2,6 @@ package nu.marginalia.array.page; import nu.marginalia.array.LongArray; import nu.marginalia.array.SimulatedNaiveArray; -import nu.marginalia.array.scheme.ArrayPartitioningScheme; import org.openjdk.jmh.annotations.*; import java.lang.foreign.Arena; diff --git a/code/libraries/array/src/main/java/nu/marginalia/array/page/UnsafeLongArray.java b/code/libraries/array/src/main/java/nu/marginalia/array/page/UnsafeLongArray.java deleted file mode 100644 index 8ba7182d..00000000 --- a/code/libraries/array/src/main/java/nu/marginalia/array/page/UnsafeLongArray.java +++ /dev/null @@ -1,181 +0,0 @@ -package nu.marginalia.array.page; - -import nu.marginalia.array.ArrayRangeReference; -import nu.marginalia.array.LongArray; -import sun.misc.Unsafe; - -import javax.annotation.Nullable; -import java.io.IOException; -import java.lang.foreign.Arena; -import java.lang.foreign.MemorySegment; -import java.nio.ByteBuffer; -import java.nio.LongBuffer; -import java.nio.channels.FileChannel; -import java.nio.file.Files; -import java.nio.file.OpenOption; -import java.nio.file.Path; -import java.nio.file.StandardOpenOption; - -import static java.lang.foreign.ValueLayout.JAVA_LONG; - -/** Variant of SegmentLongArray that uses Unsafe to access the memory. - * */ -public class UnsafeLongArray implements PartitionPage, LongArray { - - private static final Unsafe unsafe = UnsafeProvider.getUnsafe(); - - @Nullable - private final Arena arena; - private final MemorySegment segment; - private boolean closed; - - UnsafeLongArray(MemorySegment segment, - @Nullable Arena arena) { - this.segment = segment; - this.arena = arena; - } - - public static UnsafeLongArray onHeap(Arena arena, long size) { - return new UnsafeLongArray(arena.allocate(WORD_SIZE*size, 8), arena); - } - - public static UnsafeLongArray fromMmapReadOnly(Arena arena, Path file, long offset, long size) throws IOException { - return new UnsafeLongArray( - mmapFile(arena, file, offset, size, FileChannel.MapMode.READ_ONLY, StandardOpenOption.READ), - arena); - } - - public static UnsafeLongArray fromMmapReadWrite(Arena arena, Path file, long offset, long size) throws IOException { - - return new UnsafeLongArray( - mmapFile(arena, file, offset, size, FileChannel.MapMode.READ_WRITE, - StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE), - arena); - } - - private static MemorySegment mmapFile(Arena arena, - Path file, - long offset, - long size, - FileChannel.MapMode mode, - OpenOption... openOptions) throws IOException - { - try (var channel = (FileChannel) Files.newByteChannel(file, openOptions)) { - - return channel.map(mode, - JAVA_LONG.byteSize() * offset, - JAVA_LONG.byteSize() * size, - arena); - } - catch (IOException ex) { - throw new IOException("Failed to map file " + file + " (" + offset + ":" + size + ")", ex); - } - } - - @Override - public LongArray range(long start, long end) { - return new UnsafeLongArray( - segment.asSlice( - start * JAVA_LONG.byteSize(), - (end-start) * JAVA_LONG.byteSize()), - null); - } - - @Override - public LongArray shifted(long start) { - return new UnsafeLongArray( - segment.asSlice(start * JAVA_LONG.byteSize()), - null); - } - - @Override - public long get(long at) { - try { - return unsafe.getLong(segment.address() + at * JAVA_LONG.byteSize()); - } - catch (IndexOutOfBoundsException ex) { - throw new IndexOutOfBoundsException("@" + at + "(" + 0 + ":" + segment.byteSize()/8 + ")"); - } - } - - @Override - public void get(long start, long end, long[] buffer) { - for (int i = 0; i < end - start; i++) { - buffer[i] = unsafe.getLong(segment.address() + (start + i) * JAVA_LONG.byteSize()); - } - } - - @Override - public void set(long at, long val) { - unsafe.putLong(segment.address() + at * JAVA_LONG.byteSize(), val); - } - - @Override - public void set(long start, long end, LongBuffer buffer, int bufferStart) { - for (int i = 0; i < end - start; i++) { - unsafe.putLong(segment.address() + (start + i) * JAVA_LONG.byteSize(), buffer.get(bufferStart + i)); - } - } - - @Override - public synchronized void close() { - if (arena != null && !closed) { - arena.close(); - } - closed = true; - } - - @Override - public long size() { - return segment.byteSize() / JAVA_LONG.byteSize(); - } - - @Override - public ByteBuffer getByteBuffer() { - return segment.asByteBuffer(); - } - - @Override - public void write(Path filename) throws IOException { - try (var arena = Arena.ofConfined()) { - var destSegment = UnsafeLongArray.fromMmapReadWrite(arena, filename, 0, segment.byteSize() / JAVA_LONG.byteSize()); - - destSegment.segment.copyFrom(segment); - destSegment.force(); - } - } - - @Override - public void force() { - if (segment.isMapped()) { - segment.force(); - } - } - - public ArrayRangeReference directRangeIfPossible(long start, long end) { - return new ArrayRangeReference<>(this, start, end); - } - - @Override - public void transferFrom(FileChannel source, long sourceStart, long arrayStart, long arrayEnd) throws IOException { - - final int stride = 1024*1204*128; // Copy 1 GB at a time 'cause byte buffers are 'a byte buffering - - long ss = sourceStart; - for (long as = arrayStart; as < arrayEnd; as += stride, ss += stride) { - long ae = Math.min(as + stride, arrayEnd); - - long index = as * JAVA_LONG.byteSize(); - long length = (ae - as) * JAVA_LONG.byteSize(); - - var bufferSlice = segment.asSlice(index, length).asByteBuffer(); - - long startPos = ss * JAVA_LONG.byteSize(); - while (bufferSlice.position() < bufferSlice.capacity()) { - source.read(bufferSlice, startPos + bufferSlice.position()); - } - } - - } - -} diff --git a/code/libraries/array/src/test/java/nu/marginalia/array/IntLowBitPartitioningSchemeTest.java b/code/libraries/array/test/nu/marginalia/array/IntLowBitPartitioningSchemeTest.java similarity index 100% rename from code/libraries/array/src/test/java/nu/marginalia/array/IntLowBitPartitioningSchemeTest.java rename to code/libraries/array/test/nu/marginalia/array/IntLowBitPartitioningSchemeTest.java diff --git a/code/libraries/array/src/test/java/nu/marginalia/array/algo/IntArraySearchTest.java b/code/libraries/array/test/nu/marginalia/array/algo/IntArraySearchTest.java similarity index 98% rename from code/libraries/array/src/test/java/nu/marginalia/array/algo/IntArraySearchTest.java rename to code/libraries/array/test/nu/marginalia/array/algo/IntArraySearchTest.java index 85ec4410..9c1680a9 100644 --- a/code/libraries/array/src/test/java/nu/marginalia/array/algo/IntArraySearchTest.java +++ b/code/libraries/array/test/nu/marginalia/array/algo/IntArraySearchTest.java @@ -3,7 +3,6 @@ package nu.marginalia.array.algo; import nu.marginalia.array.IntArray; import nu.marginalia.array.buffer.IntQueryBuffer; import nu.marginalia.array.page.SegmentIntArray; -import nu.marginalia.array.scheme.PowerOf2PartitioningScheme; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; diff --git a/code/libraries/array/src/test/java/nu/marginalia/array/algo/IntArraySortTest.java b/code/libraries/array/test/nu/marginalia/array/algo/IntArraySortTest.java similarity index 98% rename from code/libraries/array/src/test/java/nu/marginalia/array/algo/IntArraySortTest.java rename to code/libraries/array/test/nu/marginalia/array/algo/IntArraySortTest.java index dc3985c6..8dc16dab 100644 --- a/code/libraries/array/src/test/java/nu/marginalia/array/algo/IntArraySortTest.java +++ b/code/libraries/array/test/nu/marginalia/array/algo/IntArraySortTest.java @@ -1,7 +1,6 @@ package nu.marginalia.array.algo; import nu.marginalia.array.IntArray; -import nu.marginalia.array.scheme.PowerOf2PartitioningScheme; import nu.marginalia.util.test.TestUtil; import org.apache.commons.lang3.ArrayUtils; import org.junit.jupiter.api.BeforeEach; diff --git a/code/libraries/array/src/test/java/nu/marginalia/array/algo/IntArrayTransformations2Test.java b/code/libraries/array/test/nu/marginalia/array/algo/IntArrayTransformations2Test.java similarity index 97% rename from code/libraries/array/src/test/java/nu/marginalia/array/algo/IntArrayTransformations2Test.java rename to code/libraries/array/test/nu/marginalia/array/algo/IntArrayTransformations2Test.java index b9b374ac..b44ae07e 100644 --- a/code/libraries/array/src/test/java/nu/marginalia/array/algo/IntArrayTransformations2Test.java +++ b/code/libraries/array/test/nu/marginalia/array/algo/IntArrayTransformations2Test.java @@ -1,7 +1,6 @@ package nu.marginalia.array.algo; import nu.marginalia.array.IntArray; -import nu.marginalia.array.scheme.PowerOf2PartitioningScheme; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; diff --git a/code/libraries/array/src/test/java/nu/marginalia/array/algo/IntArrayTransformationsTest.java b/code/libraries/array/test/nu/marginalia/array/algo/IntArrayTransformationsTest.java similarity index 95% rename from code/libraries/array/src/test/java/nu/marginalia/array/algo/IntArrayTransformationsTest.java rename to code/libraries/array/test/nu/marginalia/array/algo/IntArrayTransformationsTest.java index 7c2dc120..37954f27 100644 --- a/code/libraries/array/src/test/java/nu/marginalia/array/algo/IntArrayTransformationsTest.java +++ b/code/libraries/array/test/nu/marginalia/array/algo/IntArrayTransformationsTest.java @@ -1,8 +1,6 @@ package nu.marginalia.array.algo; import nu.marginalia.array.IntArray; -import nu.marginalia.array.LongArray; -import nu.marginalia.array.scheme.PowerOf2PartitioningScheme; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; diff --git a/code/libraries/array/src/test/java/nu/marginalia/array/algo/LongArraySearchTest.java b/code/libraries/array/test/nu/marginalia/array/algo/LongArraySearchTest.java similarity index 97% rename from code/libraries/array/src/test/java/nu/marginalia/array/algo/LongArraySearchTest.java rename to code/libraries/array/test/nu/marginalia/array/algo/LongArraySearchTest.java index c09a0a01..a515917b 100644 --- a/code/libraries/array/src/test/java/nu/marginalia/array/algo/LongArraySearchTest.java +++ b/code/libraries/array/test/nu/marginalia/array/algo/LongArraySearchTest.java @@ -3,8 +3,6 @@ package nu.marginalia.array.algo; import nu.marginalia.array.LongArray; import nu.marginalia.array.LongArrayFactory; import nu.marginalia.array.buffer.LongQueryBuffer; -import nu.marginalia.array.page.SegmentLongArray; -import nu.marginalia.array.scheme.PowerOf2PartitioningScheme; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; diff --git a/code/libraries/array/src/test/java/nu/marginalia/array/algo/LongArraySortNTest.java b/code/libraries/array/test/nu/marginalia/array/algo/LongArraySortNTest.java similarity index 99% rename from code/libraries/array/src/test/java/nu/marginalia/array/algo/LongArraySortNTest.java rename to code/libraries/array/test/nu/marginalia/array/algo/LongArraySortNTest.java index aa963070..8094d956 100644 --- a/code/libraries/array/src/test/java/nu/marginalia/array/algo/LongArraySortNTest.java +++ b/code/libraries/array/test/nu/marginalia/array/algo/LongArraySortNTest.java @@ -4,7 +4,6 @@ import it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap; import it.unimi.dsi.fastutil.longs.LongOpenHashSet; import nu.marginalia.array.LongArray; import nu.marginalia.array.LongArrayFactory; -import nu.marginalia.array.scheme.PowerOf2PartitioningScheme; import nu.marginalia.util.test.TestUtil; import org.apache.commons.lang3.ArrayUtils; import org.junit.jupiter.api.BeforeEach; diff --git a/code/libraries/array/src/test/java/nu/marginalia/array/algo/LongArraySortTest.java b/code/libraries/array/test/nu/marginalia/array/algo/LongArraySortTest.java similarity index 99% rename from code/libraries/array/src/test/java/nu/marginalia/array/algo/LongArraySortTest.java rename to code/libraries/array/test/nu/marginalia/array/algo/LongArraySortTest.java index 22f7528c..81916b61 100644 --- a/code/libraries/array/src/test/java/nu/marginalia/array/algo/LongArraySortTest.java +++ b/code/libraries/array/test/nu/marginalia/array/algo/LongArraySortTest.java @@ -3,7 +3,6 @@ package nu.marginalia.array.algo; import it.unimi.dsi.fastutil.longs.LongOpenHashSet; import nu.marginalia.array.LongArray; import nu.marginalia.array.LongArrayFactory; -import nu.marginalia.array.scheme.PowerOf2PartitioningScheme; import nu.marginalia.util.test.TestUtil; import org.apache.commons.lang3.ArrayUtils; import org.junit.jupiter.api.BeforeEach; diff --git a/code/libraries/array/src/test/java/nu/marginalia/array/algo/LongArrayTransformations2Test.java b/code/libraries/array/test/nu/marginalia/array/algo/LongArrayTransformations2Test.java similarity index 95% rename from code/libraries/array/src/test/java/nu/marginalia/array/algo/LongArrayTransformations2Test.java rename to code/libraries/array/test/nu/marginalia/array/algo/LongArrayTransformations2Test.java index eb9aafe2..01af58cc 100644 --- a/code/libraries/array/src/test/java/nu/marginalia/array/algo/LongArrayTransformations2Test.java +++ b/code/libraries/array/test/nu/marginalia/array/algo/LongArrayTransformations2Test.java @@ -1,8 +1,6 @@ package nu.marginalia.array.algo; import nu.marginalia.array.LongArray; -import nu.marginalia.array.LongArrayFactory; -import nu.marginalia.array.scheme.PowerOf2PartitioningScheme; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; diff --git a/code/libraries/array/src/test/java/nu/marginalia/array/algo/LongArrayTransformationsTest.java b/code/libraries/array/test/nu/marginalia/array/algo/LongArrayTransformationsTest.java similarity index 95% rename from code/libraries/array/src/test/java/nu/marginalia/array/algo/LongArrayTransformationsTest.java rename to code/libraries/array/test/nu/marginalia/array/algo/LongArrayTransformationsTest.java index 489ae54b..92fb3842 100644 --- a/code/libraries/array/src/test/java/nu/marginalia/array/algo/LongArrayTransformationsTest.java +++ b/code/libraries/array/test/nu/marginalia/array/algo/LongArrayTransformationsTest.java @@ -1,8 +1,6 @@ package nu.marginalia.array.algo; import nu.marginalia.array.LongArray; -import nu.marginalia.array.LongArrayFactory; -import nu.marginalia.array.scheme.PowerOf2PartitioningScheme; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; diff --git a/code/libraries/array/src/test/java/nu/marginalia/array/algo/TwoArrayOperationsTest.java b/code/libraries/array/test/nu/marginalia/array/algo/TwoArrayOperationsTest.java similarity index 100% rename from code/libraries/array/src/test/java/nu/marginalia/array/algo/TwoArrayOperationsTest.java rename to code/libraries/array/test/nu/marginalia/array/algo/TwoArrayOperationsTest.java diff --git a/code/libraries/array/src/test/java/nu/marginalia/array/scheme/ArrayPartitioningSchemeTest.java b/code/libraries/array/test/nu/marginalia/array/scheme/ArrayPartitioningSchemeTest.java similarity index 100% rename from code/libraries/array/src/test/java/nu/marginalia/array/scheme/ArrayPartitioningSchemeTest.java rename to code/libraries/array/test/nu/marginalia/array/scheme/ArrayPartitioningSchemeTest.java diff --git a/code/libraries/array/src/test/java/nu/marginalia/util/test/TestUtil.java b/code/libraries/array/test/nu/marginalia/util/test/TestUtil.java similarity index 100% rename from code/libraries/array/src/test/java/nu/marginalia/util/test/TestUtil.java rename to code/libraries/array/test/nu/marginalia/util/test/TestUtil.java diff --git a/code/libraries/big-string/build.gradle b/code/libraries/big-string/build.gradle index c769bc12..2f67aa7f 100644 --- a/code/libraries/big-string/build.gradle +++ b/code/libraries/big-string/build.gradle @@ -8,6 +8,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation libs.bundles.slf4j diff --git a/code/libraries/big-string/src/main/java/nu/marginalia/bigstring/BigString.java b/code/libraries/big-string/java/nu/marginalia/bigstring/BigString.java similarity index 100% rename from code/libraries/big-string/src/main/java/nu/marginalia/bigstring/BigString.java rename to code/libraries/big-string/java/nu/marginalia/bigstring/BigString.java diff --git a/code/libraries/big-string/src/main/java/nu/marginalia/bigstring/CompressedBigString.java b/code/libraries/big-string/java/nu/marginalia/bigstring/CompressedBigString.java similarity index 100% rename from code/libraries/big-string/src/main/java/nu/marginalia/bigstring/CompressedBigString.java rename to code/libraries/big-string/java/nu/marginalia/bigstring/CompressedBigString.java diff --git a/code/libraries/big-string/src/main/java/nu/marginalia/bigstring/CompressionBuffer.java b/code/libraries/big-string/java/nu/marginalia/bigstring/CompressionBuffer.java similarity index 100% rename from code/libraries/big-string/src/main/java/nu/marginalia/bigstring/CompressionBuffer.java rename to code/libraries/big-string/java/nu/marginalia/bigstring/CompressionBuffer.java diff --git a/code/libraries/big-string/src/main/java/nu/marginalia/bigstring/CompressionBufferPool.java b/code/libraries/big-string/java/nu/marginalia/bigstring/CompressionBufferPool.java similarity index 100% rename from code/libraries/big-string/src/main/java/nu/marginalia/bigstring/CompressionBufferPool.java rename to code/libraries/big-string/java/nu/marginalia/bigstring/CompressionBufferPool.java diff --git a/code/libraries/big-string/src/main/java/nu/marginalia/bigstring/PlainBigString.java b/code/libraries/big-string/java/nu/marginalia/bigstring/PlainBigString.java similarity index 88% rename from code/libraries/big-string/src/main/java/nu/marginalia/bigstring/PlainBigString.java rename to code/libraries/big-string/java/nu/marginalia/bigstring/PlainBigString.java index 5ff08c04..f5fef370 100644 --- a/code/libraries/big-string/src/main/java/nu/marginalia/bigstring/PlainBigString.java +++ b/code/libraries/big-string/java/nu/marginalia/bigstring/PlainBigString.java @@ -1,7 +1,5 @@ package nu.marginalia.bigstring; -import java.nio.charset.StandardCharsets; - public class PlainBigString implements BigString { private final String value; diff --git a/code/libraries/big-string/src/test/java/nu/marginalia/bigstring/CompressedBigStringTest.java b/code/libraries/big-string/test/nu/marginalia/bigstring/CompressedBigStringTest.java similarity index 100% rename from code/libraries/big-string/src/test/java/nu/marginalia/bigstring/CompressedBigStringTest.java rename to code/libraries/big-string/test/nu/marginalia/bigstring/CompressedBigStringTest.java diff --git a/code/libraries/blocking-thread-pool/build.gradle b/code/libraries/blocking-thread-pool/build.gradle index e716f1c8..8c5609a5 100644 --- a/code/libraries/blocking-thread-pool/build.gradle +++ b/code/libraries/blocking-thread-pool/build.gradle @@ -8,6 +8,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation libs.bundles.slf4j diff --git a/code/libraries/blocking-thread-pool/src/main/java/nu/marginalia/util/ProcessingIterator.java b/code/libraries/blocking-thread-pool/java/nu/marginalia/util/ProcessingIterator.java similarity index 100% rename from code/libraries/blocking-thread-pool/src/main/java/nu/marginalia/util/ProcessingIterator.java rename to code/libraries/blocking-thread-pool/java/nu/marginalia/util/ProcessingIterator.java diff --git a/code/libraries/blocking-thread-pool/src/main/java/nu/marginalia/util/SimpleBlockingThreadPool.java b/code/libraries/blocking-thread-pool/java/nu/marginalia/util/SimpleBlockingThreadPool.java similarity index 100% rename from code/libraries/blocking-thread-pool/src/main/java/nu/marginalia/util/SimpleBlockingThreadPool.java rename to code/libraries/blocking-thread-pool/java/nu/marginalia/util/SimpleBlockingThreadPool.java diff --git a/code/libraries/blocking-thread-pool/src/test/java/nu/marginalia/util/ProcessingIteratorTest.java b/code/libraries/blocking-thread-pool/test/nu/marginalia/util/ProcessingIteratorTest.java similarity index 100% rename from code/libraries/blocking-thread-pool/src/test/java/nu/marginalia/util/ProcessingIteratorTest.java rename to code/libraries/blocking-thread-pool/test/nu/marginalia/util/ProcessingIteratorTest.java diff --git a/code/libraries/braille-block-punch-cards/build.gradle b/code/libraries/braille-block-punch-cards/build.gradle index 4d9c5997..673d944f 100644 --- a/code/libraries/braille-block-punch-cards/build.gradle +++ b/code/libraries/braille-block-punch-cards/build.gradle @@ -8,6 +8,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation libs.bundles.slf4j implementation libs.notnull diff --git a/code/libraries/braille-block-punch-cards/src/main/java/nu/marginalia/bbpc/BrailleBlockPunchCards.java b/code/libraries/braille-block-punch-cards/java/nu/marginalia/bbpc/BrailleBlockPunchCards.java similarity index 100% rename from code/libraries/braille-block-punch-cards/src/main/java/nu/marginalia/bbpc/BrailleBlockPunchCards.java rename to code/libraries/braille-block-punch-cards/java/nu/marginalia/bbpc/BrailleBlockPunchCards.java diff --git a/code/libraries/braille-block-punch-cards/readme.md b/code/libraries/braille-block-punch-cards/readme.md index 1785a2fc..2923ef6b 100644 --- a/code/libraries/braille-block-punch-cards/readme.md +++ b/code/libraries/braille-block-punch-cards/readme.md @@ -6,4 +6,4 @@ This is The Way when it comes to representing bit masks to humans. ## Central Classes -* [BrailleBlockPunchCards](src/main/java/nu/marginalia/bbpc/BrailleBlockPunchCards.java) \ No newline at end of file +* [BrailleBlockPunchCards](java/nu/marginalia/bbpc/BrailleBlockPunchCards.java) \ No newline at end of file diff --git a/code/libraries/braille-block-punch-cards/src/test/java/nu/marginalia/bbpc/BrailleBlockPunchCardsTest.java b/code/libraries/braille-block-punch-cards/test/nu/marginalia/bbpc/BrailleBlockPunchCardsTest.java similarity index 100% rename from code/libraries/braille-block-punch-cards/src/test/java/nu/marginalia/bbpc/BrailleBlockPunchCardsTest.java rename to code/libraries/braille-block-punch-cards/test/nu/marginalia/bbpc/BrailleBlockPunchCardsTest.java diff --git a/code/libraries/btree/build.gradle b/code/libraries/btree/build.gradle index d39af03c..99917255 100644 --- a/code/libraries/btree/build.gradle +++ b/code/libraries/btree/build.gradle @@ -8,6 +8,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:libraries:array') implementation project(':code:libraries:next-prime') diff --git a/code/libraries/btree/src/main/java/nu/marginalia/btree/BTreeDogEar.java b/code/libraries/btree/java/nu/marginalia/btree/BTreeDogEar.java similarity index 100% rename from code/libraries/btree/src/main/java/nu/marginalia/btree/BTreeDogEar.java rename to code/libraries/btree/java/nu/marginalia/btree/BTreeDogEar.java diff --git a/code/libraries/btree/src/main/java/nu/marginalia/btree/BTreeReader.java b/code/libraries/btree/java/nu/marginalia/btree/BTreeReader.java similarity index 99% rename from code/libraries/btree/src/main/java/nu/marginalia/btree/BTreeReader.java rename to code/libraries/btree/java/nu/marginalia/btree/BTreeReader.java index d12b21be..048e0301 100644 --- a/code/libraries/btree/src/main/java/nu/marginalia/btree/BTreeReader.java +++ b/code/libraries/btree/java/nu/marginalia/btree/BTreeReader.java @@ -3,7 +3,6 @@ package nu.marginalia.btree; import nu.marginalia.array.LongArray; import nu.marginalia.array.algo.LongArraySearch; import nu.marginalia.array.buffer.LongQueryBuffer; -import nu.marginalia.array.delegate.ShiftedLongArray; import nu.marginalia.btree.model.BTreeContext; import nu.marginalia.btree.model.BTreeHeader; diff --git a/code/libraries/btree/src/main/java/nu/marginalia/btree/BTreeWriteCallback.java b/code/libraries/btree/java/nu/marginalia/btree/BTreeWriteCallback.java similarity index 100% rename from code/libraries/btree/src/main/java/nu/marginalia/btree/BTreeWriteCallback.java rename to code/libraries/btree/java/nu/marginalia/btree/BTreeWriteCallback.java diff --git a/code/libraries/btree/src/main/java/nu/marginalia/btree/BTreeWriter.java b/code/libraries/btree/java/nu/marginalia/btree/BTreeWriter.java similarity index 99% rename from code/libraries/btree/src/main/java/nu/marginalia/btree/BTreeWriter.java rename to code/libraries/btree/java/nu/marginalia/btree/BTreeWriter.java index c8d36c7b..1804ff40 100644 --- a/code/libraries/btree/src/main/java/nu/marginalia/btree/BTreeWriter.java +++ b/code/libraries/btree/java/nu/marginalia/btree/BTreeWriter.java @@ -1,7 +1,6 @@ package nu.marginalia.btree; import nu.marginalia.array.LongArray; -import nu.marginalia.array.delegate.ShiftedLongArray; import nu.marginalia.btree.model.BTreeContext; import nu.marginalia.btree.model.BTreeHeader; diff --git a/code/libraries/btree/src/main/java/nu/marginalia/btree/model/BTreeBlockSize.java b/code/libraries/btree/java/nu/marginalia/btree/model/BTreeBlockSize.java similarity index 100% rename from code/libraries/btree/src/main/java/nu/marginalia/btree/model/BTreeBlockSize.java rename to code/libraries/btree/java/nu/marginalia/btree/model/BTreeBlockSize.java diff --git a/code/libraries/btree/src/main/java/nu/marginalia/btree/model/BTreeContext.java b/code/libraries/btree/java/nu/marginalia/btree/model/BTreeContext.java similarity index 100% rename from code/libraries/btree/src/main/java/nu/marginalia/btree/model/BTreeContext.java rename to code/libraries/btree/java/nu/marginalia/btree/model/BTreeContext.java diff --git a/code/libraries/btree/src/main/java/nu/marginalia/btree/model/BTreeHeader.java b/code/libraries/btree/java/nu/marginalia/btree/model/BTreeHeader.java similarity index 100% rename from code/libraries/btree/src/main/java/nu/marginalia/btree/model/BTreeHeader.java rename to code/libraries/btree/java/nu/marginalia/btree/model/BTreeHeader.java diff --git a/code/libraries/btree/readme.md b/code/libraries/btree/readme.md index 446195a8..95a10e7f 100644 --- a/code/libraries/btree/readme.md +++ b/code/libraries/btree/readme.md @@ -4,11 +4,11 @@ This package contains a small library for creating and reading a static b-tree i Both binary indices (i.e. sets) are supported, as well as arbitrary multiple-of-keysize key-value mappings where the data is interlaced with the keys in the leaf nodes. This is a fairly low-level datastructure. -The b-trees are specified through a [BTreeContext](src/main/java/nu/marginalia/btree/model/BTreeContext.java) +The b-trees are specified through a [BTreeContext](java/nu/marginalia/btree/model/BTreeContext.java) which contains information about the data and index layout. -The b-trees are written through a [BTreeWriter](src/main/java/nu/marginalia/btree/BTreeWriter.java) and -read with a [BTreeReader](src/main/java/nu/marginalia/btree/BTreeReader.java). +The b-trees are written through a [BTreeWriter](java/nu/marginalia/btree/BTreeWriter.java) and +read with a [BTreeReader](java/nu/marginalia/btree/BTreeReader.java). ## Demo diff --git a/code/libraries/btree/src/test/java/nu/marginalia/btree/BTreeReaderQueryDataWithIndexTest.java b/code/libraries/btree/test/nu/marginalia/btree/BTreeReaderQueryDataWithIndexTest.java similarity index 100% rename from code/libraries/btree/src/test/java/nu/marginalia/btree/BTreeReaderQueryDataWithIndexTest.java rename to code/libraries/btree/test/nu/marginalia/btree/BTreeReaderQueryDataWithIndexTest.java diff --git a/code/libraries/btree/src/test/java/nu/marginalia/btree/BTreeReaderQueryDataWithoutIndexTest.java b/code/libraries/btree/test/nu/marginalia/btree/BTreeReaderQueryDataWithoutIndexTest.java similarity index 100% rename from code/libraries/btree/src/test/java/nu/marginalia/btree/BTreeReaderQueryDataWithoutIndexTest.java rename to code/libraries/btree/test/nu/marginalia/btree/BTreeReaderQueryDataWithoutIndexTest.java diff --git a/code/libraries/btree/src/test/java/nu/marginalia/btree/BTreeReaderRejectRetainWithIndexTest.java b/code/libraries/btree/test/nu/marginalia/btree/BTreeReaderRejectRetainWithIndexTest.java similarity index 100% rename from code/libraries/btree/src/test/java/nu/marginalia/btree/BTreeReaderRejectRetainWithIndexTest.java rename to code/libraries/btree/test/nu/marginalia/btree/BTreeReaderRejectRetainWithIndexTest.java diff --git a/code/libraries/btree/src/test/java/nu/marginalia/btree/BTreeReaderRejectRetainWithoutIndexTest.java b/code/libraries/btree/test/nu/marginalia/btree/BTreeReaderRejectRetainWithoutIndexTest.java similarity index 100% rename from code/libraries/btree/src/test/java/nu/marginalia/btree/BTreeReaderRejectRetainWithoutIndexTest.java rename to code/libraries/btree/test/nu/marginalia/btree/BTreeReaderRejectRetainWithoutIndexTest.java diff --git a/code/libraries/btree/src/test/java/nu/marginalia/btree/BTreeWriterTest.java b/code/libraries/btree/test/nu/marginalia/btree/BTreeWriterTest.java similarity index 100% rename from code/libraries/btree/src/test/java/nu/marginalia/btree/BTreeWriterTest.java rename to code/libraries/btree/test/nu/marginalia/btree/BTreeWriterTest.java diff --git a/code/libraries/easy-lsh/build.gradle b/code/libraries/easy-lsh/build.gradle index e8b11046..74fd976e 100644 --- a/code/libraries/easy-lsh/build.gradle +++ b/code/libraries/easy-lsh/build.gradle @@ -8,6 +8,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation libs.bundles.slf4j diff --git a/code/libraries/easy-lsh/src/main/java/nu/marginalia/lsh/EasyLSH.java b/code/libraries/easy-lsh/java/nu/marginalia/lsh/EasyLSH.java similarity index 100% rename from code/libraries/easy-lsh/src/main/java/nu/marginalia/lsh/EasyLSH.java rename to code/libraries/easy-lsh/java/nu/marginalia/lsh/EasyLSH.java diff --git a/code/libraries/easy-lsh/readme.md b/code/libraries/easy-lsh/readme.md index 7bae8da9..2b2409e8 100644 --- a/code/libraries/easy-lsh/readme.md +++ b/code/libraries/easy-lsh/readme.md @@ -5,7 +5,7 @@ for document deduplication. Hashes are compared using their hamming distance. ## Central Classes -* [EasyLSH](src/main/java/nu/marginalia/lsh/EasyLSH.java) +* [EasyLSH](java/nu/marginalia/lsh/EasyLSH.java) ## Demo diff --git a/code/libraries/easy-lsh/src/test/java/nu/marginalia/lsh/EasyLSHTest.java b/code/libraries/easy-lsh/test/nu/marginalia/lsh/EasyLSHTest.java similarity index 100% rename from code/libraries/easy-lsh/src/test/java/nu/marginalia/lsh/EasyLSHTest.java rename to code/libraries/easy-lsh/test/nu/marginalia/lsh/EasyLSHTest.java diff --git a/code/libraries/geo-ip/build.gradle b/code/libraries/geo-ip/build.gradle index b0180ef8..a47f64a5 100644 --- a/code/libraries/geo-ip/build.gradle +++ b/code/libraries/geo-ip/build.gradle @@ -8,6 +8,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:config') diff --git a/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/GeoIpDictionary.java b/code/libraries/geo-ip/java/nu/marginalia/geoip/GeoIpDictionary.java similarity index 100% rename from code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/GeoIpDictionary.java rename to code/libraries/geo-ip/java/nu/marginalia/geoip/GeoIpDictionary.java diff --git a/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/AsnMapping.java b/code/libraries/geo-ip/java/nu/marginalia/geoip/sources/AsnMapping.java similarity index 100% rename from code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/AsnMapping.java rename to code/libraries/geo-ip/java/nu/marginalia/geoip/sources/AsnMapping.java diff --git a/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/AsnTable.java b/code/libraries/geo-ip/java/nu/marginalia/geoip/sources/AsnTable.java similarity index 100% rename from code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/AsnTable.java rename to code/libraries/geo-ip/java/nu/marginalia/geoip/sources/AsnTable.java diff --git a/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/IP2LocationMapping.java b/code/libraries/geo-ip/java/nu/marginalia/geoip/sources/IP2LocationMapping.java similarity index 100% rename from code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/IP2LocationMapping.java rename to code/libraries/geo-ip/java/nu/marginalia/geoip/sources/IP2LocationMapping.java diff --git a/code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/IpRangeMapping.java b/code/libraries/geo-ip/java/nu/marginalia/geoip/sources/IpRangeMapping.java similarity index 100% rename from code/libraries/geo-ip/src/main/java/nu/marginalia/geoip/sources/IpRangeMapping.java rename to code/libraries/geo-ip/java/nu/marginalia/geoip/sources/IpRangeMapping.java diff --git a/code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/GeoIpDictionaryTest.java b/code/libraries/geo-ip/test/nu/marginalia/geoip/GeoIpDictionaryTest.java similarity index 100% rename from code/libraries/geo-ip/src/test/java/nu/marginalia/geoip/GeoIpDictionaryTest.java rename to code/libraries/geo-ip/test/nu/marginalia/geoip/GeoIpDictionaryTest.java diff --git a/code/libraries/guarded-regex/build.gradle b/code/libraries/guarded-regex/build.gradle index 8640776d..6faa6f3c 100644 --- a/code/libraries/guarded-regex/build.gradle +++ b/code/libraries/guarded-regex/build.gradle @@ -8,6 +8,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation libs.bundles.slf4j diff --git a/code/libraries/guarded-regex/src/main/java/nu/marginalia/gregex/GuardedRegex.java b/code/libraries/guarded-regex/java/nu/marginalia/gregex/GuardedRegex.java similarity index 100% rename from code/libraries/guarded-regex/src/main/java/nu/marginalia/gregex/GuardedRegex.java rename to code/libraries/guarded-regex/java/nu/marginalia/gregex/GuardedRegex.java diff --git a/code/libraries/guarded-regex/src/main/java/nu/marginalia/gregex/GuardedRegexFactory.java b/code/libraries/guarded-regex/java/nu/marginalia/gregex/GuardedRegexFactory.java similarity index 100% rename from code/libraries/guarded-regex/src/main/java/nu/marginalia/gregex/GuardedRegexFactory.java rename to code/libraries/guarded-regex/java/nu/marginalia/gregex/GuardedRegexFactory.java diff --git a/code/libraries/guarded-regex/readme.md b/code/libraries/guarded-regex/readme.md index 42d0ca08..ddef661c 100644 --- a/code/libraries/guarded-regex/readme.md +++ b/code/libraries/guarded-regex/readme.md @@ -34,4 +34,4 @@ void ifTheThingDoTheThing(String str) { ## Central Classes -* [GuardedRegexFactory](src/main/java/nu/marginalia/gregex/GuardedRegexFactory.java) \ No newline at end of file +* [GuardedRegexFactory](java/nu/marginalia/gregex/GuardedRegexFactory.java) \ No newline at end of file diff --git a/code/libraries/language-processing/build.gradle b/code/libraries/language-processing/build.gradle index 266bcfb4..56abe731 100644 --- a/code/libraries/language-processing/build.gradle +++ b/code/libraries/language-processing/build.gradle @@ -9,6 +9,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':third-party:rdrpostagger') implementation project(':third-party:porterstemmer') diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/WordDictionary.java b/code/libraries/language-processing/java/nu/marginalia/language/WordDictionary.java similarity index 100% rename from code/libraries/language-processing/src/main/java/nu/marginalia/language/WordDictionary.java rename to code/libraries/language-processing/java/nu/marginalia/language/WordDictionary.java diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/WordPatterns.java b/code/libraries/language-processing/java/nu/marginalia/language/WordPatterns.java similarity index 100% rename from code/libraries/language-processing/src/main/java/nu/marginalia/language/WordPatterns.java rename to code/libraries/language-processing/java/nu/marginalia/language/WordPatterns.java diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/encoding/AsciiFlattener.java b/code/libraries/language-processing/java/nu/marginalia/language/encoding/AsciiFlattener.java similarity index 100% rename from code/libraries/language-processing/src/main/java/nu/marginalia/language/encoding/AsciiFlattener.java rename to code/libraries/language-processing/java/nu/marginalia/language/encoding/AsciiFlattener.java diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/encoding/UnicodeRanges.java b/code/libraries/language-processing/java/nu/marginalia/language/encoding/UnicodeRanges.java similarity index 100% rename from code/libraries/language-processing/src/main/java/nu/marginalia/language/encoding/UnicodeRanges.java rename to code/libraries/language-processing/java/nu/marginalia/language/encoding/UnicodeRanges.java diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/filter/FasttextLanguagePredictionModel.java b/code/libraries/language-processing/java/nu/marginalia/language/filter/FasttextLanguagePredictionModel.java similarity index 100% rename from code/libraries/language-processing/src/main/java/nu/marginalia/language/filter/FasttextLanguagePredictionModel.java rename to code/libraries/language-processing/java/nu/marginalia/language/filter/FasttextLanguagePredictionModel.java diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/filter/LanguageFilter.java b/code/libraries/language-processing/java/nu/marginalia/language/filter/LanguageFilter.java similarity index 100% rename from code/libraries/language-processing/src/main/java/nu/marginalia/language/filter/LanguageFilter.java rename to code/libraries/language-processing/java/nu/marginalia/language/filter/LanguageFilter.java diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/filter/LanguagePredictionModel.java b/code/libraries/language-processing/java/nu/marginalia/language/filter/LanguagePredictionModel.java similarity index 100% rename from code/libraries/language-processing/src/main/java/nu/marginalia/language/filter/LanguagePredictionModel.java rename to code/libraries/language-processing/java/nu/marginalia/language/filter/LanguagePredictionModel.java diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/filter/UngaBungaLanguagePredictionModel.java b/code/libraries/language-processing/java/nu/marginalia/language/filter/UngaBungaLanguagePredictionModel.java similarity index 100% rename from code/libraries/language-processing/src/main/java/nu/marginalia/language/filter/UngaBungaLanguagePredictionModel.java rename to code/libraries/language-processing/java/nu/marginalia/language/filter/UngaBungaLanguagePredictionModel.java diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/model/DocumentLanguageData.java b/code/libraries/language-processing/java/nu/marginalia/language/model/DocumentLanguageData.java similarity index 100% rename from code/libraries/language-processing/src/main/java/nu/marginalia/language/model/DocumentLanguageData.java rename to code/libraries/language-processing/java/nu/marginalia/language/model/DocumentLanguageData.java diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/model/DocumentSentence.java b/code/libraries/language-processing/java/nu/marginalia/language/model/DocumentSentence.java similarity index 98% rename from code/libraries/language-processing/src/main/java/nu/marginalia/language/model/DocumentSentence.java rename to code/libraries/language-processing/java/nu/marginalia/language/model/DocumentSentence.java index 71b9fab4..ef5bc0a9 100644 --- a/code/libraries/language-processing/src/main/java/nu/marginalia/language/model/DocumentSentence.java +++ b/code/libraries/language-processing/java/nu/marginalia/language/model/DocumentSentence.java @@ -8,8 +8,6 @@ import java.lang.ref.SoftReference; import java.util.BitSet; import java.util.Iterator; import java.util.StringJoiner; -import java.util.stream.Collectors; -import java.util.stream.IntStream; public class DocumentSentence implements Iterable{ public final String originalSentence; diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/model/WordFrequencyData.java b/code/libraries/language-processing/java/nu/marginalia/language/model/WordFrequencyData.java similarity index 100% rename from code/libraries/language-processing/src/main/java/nu/marginalia/language/model/WordFrequencyData.java rename to code/libraries/language-processing/java/nu/marginalia/language/model/WordFrequencyData.java diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/model/WordRep.java b/code/libraries/language-processing/java/nu/marginalia/language/model/WordRep.java similarity index 100% rename from code/libraries/language-processing/src/main/java/nu/marginalia/language/model/WordRep.java rename to code/libraries/language-processing/java/nu/marginalia/language/model/WordRep.java diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/model/WordSeparator.java b/code/libraries/language-processing/java/nu/marginalia/language/model/WordSeparator.java similarity index 100% rename from code/libraries/language-processing/src/main/java/nu/marginalia/language/model/WordSeparator.java rename to code/libraries/language-processing/java/nu/marginalia/language/model/WordSeparator.java diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/model/WordSpan.java b/code/libraries/language-processing/java/nu/marginalia/language/model/WordSpan.java similarity index 100% rename from code/libraries/language-processing/src/main/java/nu/marginalia/language/model/WordSpan.java rename to code/libraries/language-processing/java/nu/marginalia/language/model/WordSpan.java diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/sentence/SentenceExtractor.java b/code/libraries/language-processing/java/nu/marginalia/language/sentence/SentenceExtractor.java similarity index 100% rename from code/libraries/language-processing/src/main/java/nu/marginalia/language/sentence/SentenceExtractor.java rename to code/libraries/language-processing/java/nu/marginalia/language/sentence/SentenceExtractor.java diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/sentence/SentenceExtractorHtmlTagCleaner.java b/code/libraries/language-processing/java/nu/marginalia/language/sentence/SentenceExtractorHtmlTagCleaner.java similarity index 100% rename from code/libraries/language-processing/src/main/java/nu/marginalia/language/sentence/SentenceExtractorHtmlTagCleaner.java rename to code/libraries/language-processing/java/nu/marginalia/language/sentence/SentenceExtractorHtmlTagCleaner.java diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/sentence/SentenceExtractorStringUtils.java b/code/libraries/language-processing/java/nu/marginalia/language/sentence/SentenceExtractorStringUtils.java similarity index 100% rename from code/libraries/language-processing/src/main/java/nu/marginalia/language/sentence/SentenceExtractorStringUtils.java rename to code/libraries/language-processing/java/nu/marginalia/language/sentence/SentenceExtractorStringUtils.java diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/sentence/SentencePreCleaner.java b/code/libraries/language-processing/java/nu/marginalia/language/sentence/SentencePreCleaner.java similarity index 100% rename from code/libraries/language-processing/src/main/java/nu/marginalia/language/sentence/SentencePreCleaner.java rename to code/libraries/language-processing/java/nu/marginalia/language/sentence/SentencePreCleaner.java diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/sentence/SentenceSegmentSplitter.java b/code/libraries/language-processing/java/nu/marginalia/language/sentence/SentenceSegmentSplitter.java similarity index 100% rename from code/libraries/language-processing/src/main/java/nu/marginalia/language/sentence/SentenceSegmentSplitter.java rename to code/libraries/language-processing/java/nu/marginalia/language/sentence/SentenceSegmentSplitter.java diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/sentence/ThreadLocalSentenceExtractorProvider.java b/code/libraries/language-processing/java/nu/marginalia/language/sentence/ThreadLocalSentenceExtractorProvider.java similarity index 100% rename from code/libraries/language-processing/src/main/java/nu/marginalia/language/sentence/ThreadLocalSentenceExtractorProvider.java rename to code/libraries/language-processing/java/nu/marginalia/language/sentence/ThreadLocalSentenceExtractorProvider.java diff --git a/code/libraries/language-processing/readme.md b/code/libraries/language-processing/readme.md index 08965755..7b8ee049 100644 --- a/code/libraries/language-processing/readme.md +++ b/code/libraries/language-processing/readme.md @@ -4,13 +4,11 @@ This library contains various tools used in language processing. ## Central Classes -* [SentenceExtractor](src/main/java/nu/marginalia/language/sentence/SentenceExtractor.java) - -Creates a [DocumentLanguageData](src/main/java/nu/marginalia/language/model/DocumentLanguageData.java) from a text, containing +* [SentenceExtractor](java/nu/marginalia/language/sentence/SentenceExtractor.java) - +Creates a [DocumentLanguageData](java/nu/marginalia/language/model/DocumentLanguageData.java) from a text, containing its words, how they stem, POS tags, and so on. ## See Also [features-convert/keyword-extraction](../../features-convert/keyword-extraction) uses this code to identify which keywords -are important. - -[features-qs/query-parser](../../features-qs/query-parser) also does some language processing. \ No newline at end of file +are important. \ No newline at end of file diff --git a/code/libraries/language-processing/src/main/resources/dictionary/en-1000 b/code/libraries/language-processing/resources/dictionary/en-1000 similarity index 100% rename from code/libraries/language-processing/src/main/resources/dictionary/en-1000 rename to code/libraries/language-processing/resources/dictionary/en-1000 diff --git a/code/libraries/language-processing/src/main/resources/dictionary/en-stopwords b/code/libraries/language-processing/resources/dictionary/en-stopwords similarity index 100% rename from code/libraries/language-processing/src/main/resources/dictionary/en-stopwords rename to code/libraries/language-processing/resources/dictionary/en-stopwords diff --git a/code/libraries/language-processing/src/main/resources/dictionary/en-words b/code/libraries/language-processing/resources/dictionary/en-words similarity index 100% rename from code/libraries/language-processing/src/main/resources/dictionary/en-words rename to code/libraries/language-processing/resources/dictionary/en-words diff --git a/code/libraries/language-processing/src/main/resources/dictionary/latin-1000 b/code/libraries/language-processing/resources/dictionary/latin-1000 similarity index 100% rename from code/libraries/language-processing/src/main/resources/dictionary/latin-1000 rename to code/libraries/language-processing/resources/dictionary/latin-1000 diff --git a/code/libraries/language-processing/src/main/resources/dictionary/swe-1000 b/code/libraries/language-processing/resources/dictionary/swe-1000 similarity index 100% rename from code/libraries/language-processing/src/main/resources/dictionary/swe-1000 rename to code/libraries/language-processing/resources/dictionary/swe-1000 diff --git a/code/libraries/language-processing/src/main/resources/dictionary/word-frequency b/code/libraries/language-processing/resources/dictionary/word-frequency similarity index 100% rename from code/libraries/language-processing/src/main/resources/dictionary/word-frequency rename to code/libraries/language-processing/resources/dictionary/word-frequency diff --git a/code/libraries/language-processing/src/test/resources/html/jep.html b/code/libraries/language-processing/test-resources/html/jep.html similarity index 100% rename from code/libraries/language-processing/src/test/resources/html/jep.html rename to code/libraries/language-processing/test-resources/html/jep.html diff --git a/code/libraries/language-processing/src/test/resources/html/spam.html b/code/libraries/language-processing/test-resources/html/spam.html similarity index 100% rename from code/libraries/language-processing/src/test/resources/html/spam.html rename to code/libraries/language-processing/test-resources/html/spam.html diff --git a/code/libraries/language-processing/src/test/java/nu/marginalia/language/encoding/AsciiFlattenerTest.java b/code/libraries/language-processing/test/nu/marginalia/language/encoding/AsciiFlattenerTest.java similarity index 100% rename from code/libraries/language-processing/src/test/java/nu/marginalia/language/encoding/AsciiFlattenerTest.java rename to code/libraries/language-processing/test/nu/marginalia/language/encoding/AsciiFlattenerTest.java diff --git a/code/libraries/language-processing/src/test/java/nu/marginalia/language/encoding/SentenceExtractorHtmlTagCleanerTest.java b/code/libraries/language-processing/test/nu/marginalia/language/encoding/SentenceExtractorHtmlTagCleanerTest.java similarity index 100% rename from code/libraries/language-processing/src/test/java/nu/marginalia/language/encoding/SentenceExtractorHtmlTagCleanerTest.java rename to code/libraries/language-processing/test/nu/marginalia/language/encoding/SentenceExtractorHtmlTagCleanerTest.java diff --git a/code/libraries/language-processing/src/test/java/nu/marginalia/language/filter/LanguageFilterTest.java b/code/libraries/language-processing/test/nu/marginalia/language/filter/LanguageFilterTest.java similarity index 100% rename from code/libraries/language-processing/src/test/java/nu/marginalia/language/filter/LanguageFilterTest.java rename to code/libraries/language-processing/test/nu/marginalia/language/filter/LanguageFilterTest.java diff --git a/code/libraries/language-processing/src/test/java/nu/marginalia/language/filter/TestLanguageModels.java b/code/libraries/language-processing/test/nu/marginalia/language/filter/TestLanguageModels.java similarity index 100% rename from code/libraries/language-processing/src/test/java/nu/marginalia/language/filter/TestLanguageModels.java rename to code/libraries/language-processing/test/nu/marginalia/language/filter/TestLanguageModels.java diff --git a/code/libraries/language-processing/src/test/java/nu/marginalia/language/sentence/SentenceExtractorTest.java b/code/libraries/language-processing/test/nu/marginalia/language/sentence/SentenceExtractorTest.java similarity index 100% rename from code/libraries/language-processing/src/test/java/nu/marginalia/language/sentence/SentenceExtractorTest.java rename to code/libraries/language-processing/test/nu/marginalia/language/sentence/SentenceExtractorTest.java diff --git a/code/libraries/message-queue/build.gradle b/code/libraries/message-queue/build.gradle index 61f0292f..5931a76e 100644 --- a/code/libraries/message-queue/build.gradle +++ b/code/libraries/message-queue/build.gradle @@ -9,11 +9,12 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation libs.spark implementation libs.guice implementation libs.gson - implementation libs.rxjava implementation libs.bundles.slf4j implementation libs.bucket4j diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/actor/ActorStateMachine.java b/code/libraries/message-queue/java/nu/marginalia/actor/ActorStateMachine.java similarity index 99% rename from code/libraries/message-queue/src/main/java/nu/marginalia/actor/ActorStateMachine.java rename to code/libraries/message-queue/java/nu/marginalia/actor/ActorStateMachine.java index 27215f50..c97f0641 100644 --- a/code/libraries/message-queue/src/main/java/nu/marginalia/actor/ActorStateMachine.java +++ b/code/libraries/message-queue/java/nu/marginalia/actor/ActorStateMachine.java @@ -8,7 +8,6 @@ import nu.marginalia.mq.inbox.MqInboxResponse; import nu.marginalia.mq.inbox.MqSubscription; import nu.marginalia.mq.inbox.MqSynchronousInbox; import nu.marginalia.mq.outbox.MqOutbox; -import nu.marginalia.actor.state.ActorResumeBehavior; import nu.marginalia.actor.state.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/actor/ExpectedMessage.java b/code/libraries/message-queue/java/nu/marginalia/actor/ExpectedMessage.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/actor/ExpectedMessage.java rename to code/libraries/message-queue/java/nu/marginalia/actor/ExpectedMessage.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/actor/prototype/ActorPrototype.java b/code/libraries/message-queue/java/nu/marginalia/actor/prototype/ActorPrototype.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/actor/prototype/ActorPrototype.java rename to code/libraries/message-queue/java/nu/marginalia/actor/prototype/ActorPrototype.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/actor/prototype/RecordActorPrototype.java b/code/libraries/message-queue/java/nu/marginalia/actor/prototype/RecordActorPrototype.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/actor/prototype/RecordActorPrototype.java rename to code/libraries/message-queue/java/nu/marginalia/actor/prototype/RecordActorPrototype.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/actor/state/ActorControlFlowException.java b/code/libraries/message-queue/java/nu/marginalia/actor/state/ActorControlFlowException.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/actor/state/ActorControlFlowException.java rename to code/libraries/message-queue/java/nu/marginalia/actor/state/ActorControlFlowException.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/actor/state/ActorResumeBehavior.java b/code/libraries/message-queue/java/nu/marginalia/actor/state/ActorResumeBehavior.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/actor/state/ActorResumeBehavior.java rename to code/libraries/message-queue/java/nu/marginalia/actor/state/ActorResumeBehavior.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/actor/state/ActorStateInstance.java b/code/libraries/message-queue/java/nu/marginalia/actor/state/ActorStateInstance.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/actor/state/ActorStateInstance.java rename to code/libraries/message-queue/java/nu/marginalia/actor/state/ActorStateInstance.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/actor/state/ActorStateTransition.java b/code/libraries/message-queue/java/nu/marginalia/actor/state/ActorStateTransition.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/actor/state/ActorStateTransition.java rename to code/libraries/message-queue/java/nu/marginalia/actor/state/ActorStateTransition.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/actor/state/ActorStep.java b/code/libraries/message-queue/java/nu/marginalia/actor/state/ActorStep.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/actor/state/ActorStep.java rename to code/libraries/message-queue/java/nu/marginalia/actor/state/ActorStep.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/actor/state/Resume.java b/code/libraries/message-queue/java/nu/marginalia/actor/state/Resume.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/actor/state/Resume.java rename to code/libraries/message-queue/java/nu/marginalia/actor/state/Resume.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/actor/state/Terminal.java b/code/libraries/message-queue/java/nu/marginalia/actor/state/Terminal.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/actor/state/Terminal.java rename to code/libraries/message-queue/java/nu/marginalia/actor/state/Terminal.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/mq/MessageQueueFactory.java b/code/libraries/message-queue/java/nu/marginalia/mq/MessageQueueFactory.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/mq/MessageQueueFactory.java rename to code/libraries/message-queue/java/nu/marginalia/mq/MessageQueueFactory.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/mq/MqException.java b/code/libraries/message-queue/java/nu/marginalia/mq/MqException.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/mq/MqException.java rename to code/libraries/message-queue/java/nu/marginalia/mq/MqException.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/mq/MqMessage.java b/code/libraries/message-queue/java/nu/marginalia/mq/MqMessage.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/mq/MqMessage.java rename to code/libraries/message-queue/java/nu/marginalia/mq/MqMessage.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/mq/MqMessageState.java b/code/libraries/message-queue/java/nu/marginalia/mq/MqMessageState.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/mq/MqMessageState.java rename to code/libraries/message-queue/java/nu/marginalia/mq/MqMessageState.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/mq/inbox/MqAsynchronousInbox.java b/code/libraries/message-queue/java/nu/marginalia/mq/inbox/MqAsynchronousInbox.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/mq/inbox/MqAsynchronousInbox.java rename to code/libraries/message-queue/java/nu/marginalia/mq/inbox/MqAsynchronousInbox.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/mq/inbox/MqInboxIf.java b/code/libraries/message-queue/java/nu/marginalia/mq/inbox/MqInboxIf.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/mq/inbox/MqInboxIf.java rename to code/libraries/message-queue/java/nu/marginalia/mq/inbox/MqInboxIf.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/mq/inbox/MqInboxResponse.java b/code/libraries/message-queue/java/nu/marginalia/mq/inbox/MqInboxResponse.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/mq/inbox/MqInboxResponse.java rename to code/libraries/message-queue/java/nu/marginalia/mq/inbox/MqInboxResponse.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/mq/inbox/MqInboxShredder.java b/code/libraries/message-queue/java/nu/marginalia/mq/inbox/MqInboxShredder.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/mq/inbox/MqInboxShredder.java rename to code/libraries/message-queue/java/nu/marginalia/mq/inbox/MqInboxShredder.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/mq/inbox/MqSingleShotInbox.java b/code/libraries/message-queue/java/nu/marginalia/mq/inbox/MqSingleShotInbox.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/mq/inbox/MqSingleShotInbox.java rename to code/libraries/message-queue/java/nu/marginalia/mq/inbox/MqSingleShotInbox.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/mq/inbox/MqSubscription.java b/code/libraries/message-queue/java/nu/marginalia/mq/inbox/MqSubscription.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/mq/inbox/MqSubscription.java rename to code/libraries/message-queue/java/nu/marginalia/mq/inbox/MqSubscription.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/mq/inbox/MqSynchronousInbox.java b/code/libraries/message-queue/java/nu/marginalia/mq/inbox/MqSynchronousInbox.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/mq/inbox/MqSynchronousInbox.java rename to code/libraries/message-queue/java/nu/marginalia/mq/inbox/MqSynchronousInbox.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/mq/outbox/MqOutbox.java b/code/libraries/message-queue/java/nu/marginalia/mq/outbox/MqOutbox.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/mq/outbox/MqOutbox.java rename to code/libraries/message-queue/java/nu/marginalia/mq/outbox/MqOutbox.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/mq/persistence/MqMessageHandlerRegistry.java b/code/libraries/message-queue/java/nu/marginalia/mq/persistence/MqMessageHandlerRegistry.java similarity index 100% rename from code/libraries/message-queue/src/main/java/nu/marginalia/mq/persistence/MqMessageHandlerRegistry.java rename to code/libraries/message-queue/java/nu/marginalia/mq/persistence/MqMessageHandlerRegistry.java diff --git a/code/libraries/message-queue/src/main/java/nu/marginalia/mq/persistence/MqPersistence.java b/code/libraries/message-queue/java/nu/marginalia/mq/persistence/MqPersistence.java similarity index 98% rename from code/libraries/message-queue/src/main/java/nu/marginalia/mq/persistence/MqPersistence.java rename to code/libraries/message-queue/java/nu/marginalia/mq/persistence/MqPersistence.java index 02740232..a3ddc17b 100644 --- a/code/libraries/message-queue/src/main/java/nu/marginalia/mq/persistence/MqPersistence.java +++ b/code/libraries/message-queue/java/nu/marginalia/mq/persistence/MqPersistence.java @@ -211,7 +211,7 @@ public class MqPersistence { AND OWNER_INSTANCE IS NULL AND STATE='NEW' ORDER BY ID ASC LIMIT ? - """); + """) ) { updateStmt.setString(1, instanceUUID); updateStmt.setLong(2, tick); @@ -314,6 +314,10 @@ public class MqPersistence { */ public Collection pollInbox(String inboxName, String instanceUUID, long tick, int n) throws SQLException { + if (dataSource.isClosed()) { + return Collections.emptyList(); + } + // Mark new messages as claimed int expected = markInboxMessages(inboxName, instanceUUID, tick, n); if (expected == 0) { @@ -366,6 +370,10 @@ public class MqPersistence { */ public Collection pollReplyInbox(String inboxName, String instanceUUID, long tick, int n) throws SQLException { + if (dataSource.isClosed()) { + return Collections.emptyList(); + } + // Mark new messages as claimed int expected = markInboxMessages(inboxName, instanceUUID, tick, n); if (expected == 0) { diff --git a/code/libraries/message-queue/src/test/java/nu/marginalia/actor/ActorRecordMachineTest.java b/code/libraries/message-queue/test/nu/marginalia/actor/ActorRecordMachineTest.java similarity index 98% rename from code/libraries/message-queue/src/test/java/nu/marginalia/actor/ActorRecordMachineTest.java rename to code/libraries/message-queue/test/nu/marginalia/actor/ActorRecordMachineTest.java index cbd1d17c..7c9f7d61 100644 --- a/code/libraries/message-queue/src/test/java/nu/marginalia/actor/ActorRecordMachineTest.java +++ b/code/libraries/message-queue/test/nu/marginalia/actor/ActorRecordMachineTest.java @@ -5,9 +5,7 @@ import com.google.gson.GsonBuilder; import com.zaxxer.hikari.HikariConfig; import com.zaxxer.hikari.HikariDataSource; import nu.marginalia.actor.prototype.RecordActorPrototype; -import nu.marginalia.actor.state.ActorResumeBehavior; import nu.marginalia.actor.state.ActorStep; -import nu.marginalia.actor.state.Resume; import nu.marginalia.mq.MessageQueueFactory; import nu.marginalia.mq.MqTestUtil; import nu.marginalia.mq.persistence.MqPersistence; diff --git a/code/libraries/message-queue/src/test/java/nu/marginalia/mq/MqMessageRow.java b/code/libraries/message-queue/test/nu/marginalia/mq/MqMessageRow.java similarity index 100% rename from code/libraries/message-queue/src/test/java/nu/marginalia/mq/MqMessageRow.java rename to code/libraries/message-queue/test/nu/marginalia/mq/MqMessageRow.java diff --git a/code/libraries/message-queue/src/test/java/nu/marginalia/mq/MqTestUtil.java b/code/libraries/message-queue/test/nu/marginalia/mq/MqTestUtil.java similarity index 100% rename from code/libraries/message-queue/src/test/java/nu/marginalia/mq/MqTestUtil.java rename to code/libraries/message-queue/test/nu/marginalia/mq/MqTestUtil.java diff --git a/code/libraries/message-queue/src/test/java/nu/marginalia/mq/outbox/MqOutboxTest.java b/code/libraries/message-queue/test/nu/marginalia/mq/outbox/MqOutboxTest.java similarity index 100% rename from code/libraries/message-queue/src/test/java/nu/marginalia/mq/outbox/MqOutboxTest.java rename to code/libraries/message-queue/test/nu/marginalia/mq/outbox/MqOutboxTest.java diff --git a/code/libraries/message-queue/src/test/java/nu/marginalia/mq/persistence/MqPersistenceTest.java b/code/libraries/message-queue/test/nu/marginalia/mq/persistence/MqPersistenceTest.java similarity index 100% rename from code/libraries/message-queue/src/test/java/nu/marginalia/mq/persistence/MqPersistenceTest.java rename to code/libraries/message-queue/test/nu/marginalia/mq/persistence/MqPersistenceTest.java diff --git a/code/libraries/next-prime/build.gradle b/code/libraries/next-prime/build.gradle index e716f1c8..8c5609a5 100644 --- a/code/libraries/next-prime/build.gradle +++ b/code/libraries/next-prime/build.gradle @@ -8,6 +8,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation libs.bundles.slf4j diff --git a/code/libraries/next-prime/src/main/java/nu/marginalia/util/NextPrimeUtil.java b/code/libraries/next-prime/java/nu/marginalia/util/NextPrimeUtil.java similarity index 100% rename from code/libraries/next-prime/src/main/java/nu/marginalia/util/NextPrimeUtil.java rename to code/libraries/next-prime/java/nu/marginalia/util/NextPrimeUtil.java diff --git a/code/libraries/next-prime/src/test/java/nu/marginalia/util/NextPrimeUtilTest.java b/code/libraries/next-prime/test/nu/marginalia/util/NextPrimeUtilTest.java similarity index 100% rename from code/libraries/next-prime/src/test/java/nu/marginalia/util/NextPrimeUtilTest.java rename to code/libraries/next-prime/test/nu/marginalia/util/NextPrimeUtilTest.java diff --git a/code/libraries/random-write-funnel/build.gradle b/code/libraries/random-write-funnel/build.gradle index 997ff739..9d23af16 100644 --- a/code/libraries/random-write-funnel/build.gradle +++ b/code/libraries/random-write-funnel/build.gradle @@ -8,6 +8,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:libraries:array') diff --git a/code/libraries/random-write-funnel/src/main/java/nu/marginalia/rwf/RandomFileAssembler.java b/code/libraries/random-write-funnel/java/nu/marginalia/rwf/RandomFileAssembler.java similarity index 100% rename from code/libraries/random-write-funnel/src/main/java/nu/marginalia/rwf/RandomFileAssembler.java rename to code/libraries/random-write-funnel/java/nu/marginalia/rwf/RandomFileAssembler.java diff --git a/code/libraries/random-write-funnel/src/main/java/nu/marginalia/rwf/RandomWriteFunnel.java b/code/libraries/random-write-funnel/java/nu/marginalia/rwf/RandomWriteFunnel.java similarity index 100% rename from code/libraries/random-write-funnel/src/main/java/nu/marginalia/rwf/RandomWriteFunnel.java rename to code/libraries/random-write-funnel/java/nu/marginalia/rwf/RandomWriteFunnel.java diff --git a/code/libraries/random-write-funnel/readme.md b/code/libraries/random-write-funnel/readme.md index fc02b955..219e1439 100644 --- a/code/libraries/random-write-funnel/readme.md +++ b/code/libraries/random-write-funnel/readme.md @@ -2,12 +2,12 @@ This micro-library with strategies for solving the problem of [write amplificati writing large files out of order to disk. It offers a simple API to write data to a file in a random order, while localizing the writes. -Several strategies are available from the [RandomFileAssembler](src/main/java/nu/marginalia/rwf/RandomFileAssembler.java) +Several strategies are available from the [RandomFileAssembler](java/nu/marginalia/rwf/RandomFileAssembler.java) interface. * Writing to a memory mapped file (non-solution, for small files) * Writing to a memory buffer (for systems with enough memory) -* [RandomWriteFunnel](src/main/java/nu/marginalia/rwf/RandomWriteFunnel.java) - Not bound by memory. +* [RandomWriteFunnel](java/nu/marginalia/rwf/RandomWriteFunnel.java) - Not bound by memory. The data is written in a native byte order. @@ -41,5 +41,5 @@ catch (IOException ex) { ## Central Classes -* [RandomFileAssembler](src/main/java/nu/marginalia/rwf/RandomFileAssembler.java) -* [RandomWriteFunnel](src/main/java/nu/marginalia/rwf/RandomWriteFunnel.java) \ No newline at end of file +* [RandomFileAssembler](java/nu/marginalia/rwf/RandomFileAssembler.java) +* [RandomWriteFunnel](java/nu/marginalia/rwf/RandomWriteFunnel.java) \ No newline at end of file diff --git a/code/libraries/random-write-funnel/src/test/java/nu/marginalia/rwf/RandomFileAssemblerTest.java b/code/libraries/random-write-funnel/test/nu/marginalia/rwf/RandomFileAssemblerTest.java similarity index 98% rename from code/libraries/random-write-funnel/src/test/java/nu/marginalia/rwf/RandomFileAssemblerTest.java rename to code/libraries/random-write-funnel/test/nu/marginalia/rwf/RandomFileAssemblerTest.java index 76f5e822..bcbc7383 100644 --- a/code/libraries/random-write-funnel/src/test/java/nu/marginalia/rwf/RandomFileAssemblerTest.java +++ b/code/libraries/random-write-funnel/test/nu/marginalia/rwf/RandomFileAssemblerTest.java @@ -7,7 +7,6 @@ import org.junit.jupiter.api.Test; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.Arrays; import static org.junit.jupiter.api.Assertions.*; diff --git a/code/libraries/random-write-funnel/src/test/java/nu/marginalia/rwf/RandomWriteFunnelTest.java b/code/libraries/random-write-funnel/test/nu/marginalia/rwf/RandomWriteFunnelTest.java similarity index 100% rename from code/libraries/random-write-funnel/src/test/java/nu/marginalia/rwf/RandomWriteFunnelTest.java rename to code/libraries/random-write-funnel/test/nu/marginalia/rwf/RandomWriteFunnelTest.java diff --git a/code/libraries/term-frequency-dict/build.gradle b/code/libraries/term-frequency-dict/build.gradle index d7cd4029..0fe311b6 100644 --- a/code/libraries/term-frequency-dict/build.gradle +++ b/code/libraries/term-frequency-dict/build.gradle @@ -11,6 +11,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':third-party:rdrpostagger') implementation project(':third-party:porterstemmer') diff --git a/code/libraries/term-frequency-dict/src/main/java/nu/marginalia/term_frequency_dict/TermFrequencyDict.java b/code/libraries/term-frequency-dict/java/nu/marginalia/term_frequency_dict/TermFrequencyDict.java similarity index 100% rename from code/libraries/term-frequency-dict/src/main/java/nu/marginalia/term_frequency_dict/TermFrequencyDict.java rename to code/libraries/term-frequency-dict/java/nu/marginalia/term_frequency_dict/TermFrequencyDict.java diff --git a/code/libraries/term-frequency-dict/readme.md b/code/libraries/term-frequency-dict/readme.md index 32912f0d..1c1e9c67 100644 --- a/code/libraries/term-frequency-dict/readme.md +++ b/code/libraries/term-frequency-dict/readme.md @@ -5,8 +5,4 @@ the TF-IDF score of a keyword. ## Central Classes -* [TermFrequencyDict](src/main/java/nu/marginalia/term_frequency_dict/TermFrequencyDict.java) - -## See Also - -* [tools/term-frequency-extractor](../../tools/term-frequency-extractor) constructs this file \ No newline at end of file +* [TermFrequencyDict](java/nu/marginalia/term_frequency_dict/TermFrequencyDict.java) diff --git a/code/libraries/test-helpers/build.gradle b/code/libraries/test-helpers/build.gradle index b50556a1..16244d0a 100644 --- a/code/libraries/test-helpers/build.gradle +++ b/code/libraries/test-helpers/build.gradle @@ -8,6 +8,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation libs.bundles.slf4j implementation libs.bundles.mariadb diff --git a/code/libraries/test-helpers/src/main/java/nu/marginalia/test/TestMigrationLoader.java b/code/libraries/test-helpers/java/nu/marginalia/test/TestMigrationLoader.java similarity index 100% rename from code/libraries/test-helpers/src/main/java/nu/marginalia/test/TestMigrationLoader.java rename to code/libraries/test-helpers/java/nu/marginalia/test/TestMigrationLoader.java diff --git a/code/process-models/crawl-spec/build.gradle b/code/process-models/crawl-spec/build.gradle index a0045a22..551f2c6e 100644 --- a/code/process-models/crawl-spec/build.gradle +++ b/code/process-models/crawl-spec/build.gradle @@ -10,6 +10,8 @@ java { languageVersion.set(JavaLanguageVersion.of(21)) } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation libs.bundles.slf4j diff --git a/code/process-models/crawl-spec/src/main/java/nu/marginalia/crawlspec/CrawlSpecFileNames.java b/code/process-models/crawl-spec/java/nu/marginalia/crawlspec/CrawlSpecFileNames.java similarity index 100% rename from code/process-models/crawl-spec/src/main/java/nu/marginalia/crawlspec/CrawlSpecFileNames.java rename to code/process-models/crawl-spec/java/nu/marginalia/crawlspec/CrawlSpecFileNames.java diff --git a/code/process-models/crawl-spec/src/main/java/nu/marginalia/crawlspec/CrawlSpecGenerator.java b/code/process-models/crawl-spec/java/nu/marginalia/crawlspec/CrawlSpecGenerator.java similarity index 100% rename from code/process-models/crawl-spec/src/main/java/nu/marginalia/crawlspec/CrawlSpecGenerator.java rename to code/process-models/crawl-spec/java/nu/marginalia/crawlspec/CrawlSpecGenerator.java diff --git a/code/process-models/crawl-spec/src/main/java/nu/marginalia/io/crawlspec/CrawlSpecRecordParquetFileReader.java b/code/process-models/crawl-spec/java/nu/marginalia/io/crawlspec/CrawlSpecRecordParquetFileReader.java similarity index 100% rename from code/process-models/crawl-spec/src/main/java/nu/marginalia/io/crawlspec/CrawlSpecRecordParquetFileReader.java rename to code/process-models/crawl-spec/java/nu/marginalia/io/crawlspec/CrawlSpecRecordParquetFileReader.java diff --git a/code/process-models/crawl-spec/src/main/java/nu/marginalia/io/crawlspec/CrawlSpecRecordParquetFileWriter.java b/code/process-models/crawl-spec/java/nu/marginalia/io/crawlspec/CrawlSpecRecordParquetFileWriter.java similarity index 100% rename from code/process-models/crawl-spec/src/main/java/nu/marginalia/io/crawlspec/CrawlSpecRecordParquetFileWriter.java rename to code/process-models/crawl-spec/java/nu/marginalia/io/crawlspec/CrawlSpecRecordParquetFileWriter.java diff --git a/code/process-models/crawl-spec/src/main/java/nu/marginalia/model/crawlspec/CrawlSpecRecord.java b/code/process-models/crawl-spec/java/nu/marginalia/model/crawlspec/CrawlSpecRecord.java similarity index 100% rename from code/process-models/crawl-spec/src/main/java/nu/marginalia/model/crawlspec/CrawlSpecRecord.java rename to code/process-models/crawl-spec/java/nu/marginalia/model/crawlspec/CrawlSpecRecord.java diff --git a/code/process-models/crawl-spec/readme.md b/code/process-models/crawl-spec/readme.md index 63bcec96..cd59f23c 100644 --- a/code/process-models/crawl-spec/readme.md +++ b/code/process-models/crawl-spec/readme.md @@ -8,9 +8,9 @@ A crawl spec is a list of domains to be crawled. It is a parquet file with the Crawl specs are used to define the scope of a crawl in the absence of known domains. -The [CrawlSpecRecord](src/main/java/nu/marginalia/model/crawlspec/CrawlSpecRecord.java) class is +The [CrawlSpecRecord](java/nu/marginalia/model/crawlspec/CrawlSpecRecord.java) class is used to represent a record in the crawl spec. -The [CrawlSpecRecordParquetFileReader](src/main/java/nu/marginalia/io/crawlspec/CrawlSpecRecordParquetFileReader.java) -and [CrawlSpecRecordParquetFileWriter](src/main/java/nu/marginalia/io/crawlspec/CrawlSpecRecordParquetFileWriter.java) +The [CrawlSpecRecordParquetFileReader](java/nu/marginalia/io/crawlspec/CrawlSpecRecordParquetFileReader.java) +and [CrawlSpecRecordParquetFileWriter](java/nu/marginalia/io/crawlspec/CrawlSpecRecordParquetFileWriter.java) classes are used to read and write the crawl spec parquet files. diff --git a/code/process-models/crawling-model/build.gradle b/code/process-models/crawling-model/build.gradle index ab4e8a8a..505db80f 100644 --- a/code/process-models/crawling-model/build.gradle +++ b/code/process-models/crawling-model/build.gradle @@ -12,15 +12,16 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:model') implementation project(':code:common:db') implementation project(':code:common:config') implementation project(':code:common:process') implementation project(':code:libraries:big-string') - implementation project(':code:api:index-api') + implementation project(':code:index:api') implementation project(':code:common:service-discovery') - implementation project(':code:common:service-client') implementation project(':code:features-crawl:content-type') implementation project(':code:libraries:language-processing') implementation project(':third-party:parquet-floor') diff --git a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/body/ContentTypeLogic.java b/code/process-models/crawling-model/java/nu/marginalia/crawling/body/ContentTypeLogic.java similarity index 100% rename from code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/body/ContentTypeLogic.java rename to code/process-models/crawling-model/java/nu/marginalia/crawling/body/ContentTypeLogic.java diff --git a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/body/DocumentBodyExtractor.java b/code/process-models/crawling-model/java/nu/marginalia/crawling/body/DocumentBodyExtractor.java similarity index 100% rename from code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/body/DocumentBodyExtractor.java rename to code/process-models/crawling-model/java/nu/marginalia/crawling/body/DocumentBodyExtractor.java diff --git a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/body/DocumentBodyResult.java b/code/process-models/crawling-model/java/nu/marginalia/crawling/body/DocumentBodyResult.java similarity index 100% rename from code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/body/DocumentBodyResult.java rename to code/process-models/crawling-model/java/nu/marginalia/crawling/body/DocumentBodyResult.java diff --git a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/body/HttpFetchResult.java b/code/process-models/crawling-model/java/nu/marginalia/crawling/body/HttpFetchResult.java similarity index 99% rename from code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/body/HttpFetchResult.java rename to code/process-models/crawling-model/java/nu/marginalia/crawling/body/HttpFetchResult.java index f0db28e8..6bafaf5c 100644 --- a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/body/HttpFetchResult.java +++ b/code/process-models/crawling-model/java/nu/marginalia/crawling/body/HttpFetchResult.java @@ -106,7 +106,7 @@ public sealed interface HttpFetchResult { return headers.get(name); } - }; + } /** This is a special case where the document was not fetched * because it was already in the database. In this case, we @@ -128,14 +128,14 @@ public sealed interface HttpFetchResult { return Optional.empty(); } } - }; + } /** Fetching resulted in an exception */ record ResultException(Exception ex) implements HttpFetchResult { public boolean isOk() { return false; } - }; + } /** Fetching resulted in a HTTP 304, the remote content is identical to * our reference copy. This will be replaced with a Result304ReplacedWithReference @@ -147,7 +147,7 @@ public sealed interface HttpFetchResult { public boolean isOk() { return false; } - }; + } /** No result. This is typically injected at a later stage * of processing, e.g. after filtering out irrelevant responses. @@ -156,5 +156,5 @@ public sealed interface HttpFetchResult { public boolean isOk() { return false; } - }; + } } diff --git a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/CrawledDomainReader.java b/code/process-models/crawling-model/java/nu/marginalia/crawling/io/CrawledDomainReader.java similarity index 100% rename from code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/CrawledDomainReader.java rename to code/process-models/crawling-model/java/nu/marginalia/crawling/io/CrawledDomainReader.java diff --git a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/CrawledDomainWriter.java b/code/process-models/crawling-model/java/nu/marginalia/crawling/io/CrawledDomainWriter.java similarity index 100% rename from code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/CrawledDomainWriter.java rename to code/process-models/crawling-model/java/nu/marginalia/crawling/io/CrawledDomainWriter.java diff --git a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/CrawlerOutputFile.java b/code/process-models/crawling-model/java/nu/marginalia/crawling/io/CrawlerOutputFile.java similarity index 100% rename from code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/CrawlerOutputFile.java rename to code/process-models/crawling-model/java/nu/marginalia/crawling/io/CrawlerOutputFile.java diff --git a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/SerializableCrawlDataStream.java b/code/process-models/crawling-model/java/nu/marginalia/crawling/io/SerializableCrawlDataStream.java similarity index 100% rename from code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/SerializableCrawlDataStream.java rename to code/process-models/crawling-model/java/nu/marginalia/crawling/io/SerializableCrawlDataStream.java diff --git a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/format/CompatibleLegacySerializableCrawlDataStream.java b/code/process-models/crawling-model/java/nu/marginalia/crawling/io/format/CompatibleLegacySerializableCrawlDataStream.java similarity index 99% rename from code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/format/CompatibleLegacySerializableCrawlDataStream.java rename to code/process-models/crawling-model/java/nu/marginalia/crawling/io/format/CompatibleLegacySerializableCrawlDataStream.java index 0b5b7aa8..76ecf7e7 100644 --- a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/format/CompatibleLegacySerializableCrawlDataStream.java +++ b/code/process-models/crawling-model/java/nu/marginalia/crawling/io/format/CompatibleLegacySerializableCrawlDataStream.java @@ -10,7 +10,6 @@ import nu.marginalia.crawling.model.SerializableCrawlData; import java.io.*; import java.nio.file.Path; -import java.util.Objects; import static java.util.Objects.*; diff --git a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/format/FastLegacySerializableCrawlDataStream.java b/code/process-models/crawling-model/java/nu/marginalia/crawling/io/format/FastLegacySerializableCrawlDataStream.java similarity index 100% rename from code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/format/FastLegacySerializableCrawlDataStream.java rename to code/process-models/crawling-model/java/nu/marginalia/crawling/io/format/FastLegacySerializableCrawlDataStream.java diff --git a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/format/ParquetSerializableCrawlDataStream.java b/code/process-models/crawling-model/java/nu/marginalia/crawling/io/format/ParquetSerializableCrawlDataStream.java similarity index 100% rename from code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/format/ParquetSerializableCrawlDataStream.java rename to code/process-models/crawling-model/java/nu/marginalia/crawling/io/format/ParquetSerializableCrawlDataStream.java diff --git a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/model/CrawledDocument.java b/code/process-models/crawling-model/java/nu/marginalia/crawling/model/CrawledDocument.java similarity index 98% rename from code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/model/CrawledDocument.java rename to code/process-models/crawling-model/java/nu/marginalia/crawling/model/CrawledDocument.java index 497f7a00..bb344dfb 100644 --- a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/model/CrawledDocument.java +++ b/code/process-models/crawling-model/java/nu/marginalia/crawling/model/CrawledDocument.java @@ -3,7 +3,6 @@ package nu.marginalia.crawling.model; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.ToString; -import nu.marginalia.bigstring.BigString; import nu.marginalia.model.EdgeUrl; import org.apache.commons.lang3.StringUtils; import org.jetbrains.annotations.Nullable; diff --git a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/model/CrawledDomain.java b/code/process-models/crawling-model/java/nu/marginalia/crawling/model/CrawledDomain.java similarity index 100% rename from code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/model/CrawledDomain.java rename to code/process-models/crawling-model/java/nu/marginalia/crawling/model/CrawledDomain.java diff --git a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/model/CrawlerDocumentStatus.java b/code/process-models/crawling-model/java/nu/marginalia/crawling/model/CrawlerDocumentStatus.java similarity index 100% rename from code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/model/CrawlerDocumentStatus.java rename to code/process-models/crawling-model/java/nu/marginalia/crawling/model/CrawlerDocumentStatus.java diff --git a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/model/CrawlerDomainStatus.java b/code/process-models/crawling-model/java/nu/marginalia/crawling/model/CrawlerDomainStatus.java similarity index 100% rename from code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/model/CrawlerDomainStatus.java rename to code/process-models/crawling-model/java/nu/marginalia/crawling/model/CrawlerDomainStatus.java diff --git a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/model/SerializableCrawlData.java b/code/process-models/crawling-model/java/nu/marginalia/crawling/model/SerializableCrawlData.java similarity index 100% rename from code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/model/SerializableCrawlData.java rename to code/process-models/crawling-model/java/nu/marginalia/crawling/model/SerializableCrawlData.java diff --git a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecord.java b/code/process-models/crawling-model/java/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecord.java similarity index 100% rename from code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecord.java rename to code/process-models/crawling-model/java/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecord.java diff --git a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecordFileReader.java b/code/process-models/crawling-model/java/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecordFileReader.java similarity index 100% rename from code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecordFileReader.java rename to code/process-models/crawling-model/java/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecordFileReader.java diff --git a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecordFileWriter.java b/code/process-models/crawling-model/java/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecordFileWriter.java similarity index 100% rename from code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecordFileWriter.java rename to code/process-models/crawling-model/java/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecordFileWriter.java diff --git a/code/process-models/crawling-model/src/main/java/org/netpreserve/jwarc/WarcXCookieInformationHeader.java b/code/process-models/crawling-model/java/org/netpreserve/jwarc/WarcXCookieInformationHeader.java similarity index 100% rename from code/process-models/crawling-model/src/main/java/org/netpreserve/jwarc/WarcXCookieInformationHeader.java rename to code/process-models/crawling-model/java/org/netpreserve/jwarc/WarcXCookieInformationHeader.java diff --git a/code/process-models/crawling-model/src/main/java/org/netpreserve/jwarc/WarcXEntityRefused.java b/code/process-models/crawling-model/java/org/netpreserve/jwarc/WarcXEntityRefused.java similarity index 100% rename from code/process-models/crawling-model/src/main/java/org/netpreserve/jwarc/WarcXEntityRefused.java rename to code/process-models/crawling-model/java/org/netpreserve/jwarc/WarcXEntityRefused.java diff --git a/code/process-models/crawling-model/src/main/java/org/netpreserve/jwarc/WarcXResponseReference.java b/code/process-models/crawling-model/java/org/netpreserve/jwarc/WarcXResponseReference.java similarity index 100% rename from code/process-models/crawling-model/src/main/java/org/netpreserve/jwarc/WarcXResponseReference.java rename to code/process-models/crawling-model/java/org/netpreserve/jwarc/WarcXResponseReference.java diff --git a/code/process-models/crawling-model/src/main/java/plan/CrawlPlan.java b/code/process-models/crawling-model/java/plan/CrawlPlan.java similarity index 100% rename from code/process-models/crawling-model/src/main/java/plan/CrawlPlan.java rename to code/process-models/crawling-model/java/plan/CrawlPlan.java diff --git a/code/process-models/crawling-model/src/main/java/plan/CrawlPlanLoader.java b/code/process-models/crawling-model/java/plan/CrawlPlanLoader.java similarity index 100% rename from code/process-models/crawling-model/src/main/java/plan/CrawlPlanLoader.java rename to code/process-models/crawling-model/java/plan/CrawlPlanLoader.java diff --git a/code/process-models/crawling-model/readme.md b/code/process-models/crawling-model/readme.md index ac0d0906..3bb9cb58 100644 --- a/code/process-models/crawling-model/readme.md +++ b/code/process-models/crawling-model/readme.md @@ -15,27 +15,27 @@ removed in the future. ## Central Classes -* [CrawledDocument](src/main/java/nu/marginalia/crawling/model/CrawledDocument.java) -* [CrawledDomain](src/main/java/nu/marginalia/crawling/model/CrawledDomain.java) +* [CrawledDocument](java/nu/marginalia/crawling/model/CrawledDocument.java) +* [CrawledDomain](java/nu/marginalia/crawling/model/CrawledDomain.java) ### Serialization These serialization classes automatically negotiate the serialization format based on the file extension. -Data is accessed through a [SerializableCrawlDataStream](src/main/java/nu/marginalia/crawling/io/SerializableCrawlDataStream.java), +Data is accessed through a [SerializableCrawlDataStream](java/nu/marginalia/crawling/io/SerializableCrawlDataStream.java), which is a somewhat enhanced Iterator that can be used to read data. -* [CrawledDomainReader](src/main/java/nu/marginalia/crawling/io/CrawledDomainReader.java) -* [CrawledDomainWriter](src/main/java/nu/marginalia/crawling/io/CrawledDomainWriter.java) +* [CrawledDomainReader](java/nu/marginalia/crawling/io/CrawledDomainReader.java) +* [CrawledDomainWriter](java/nu/marginalia/crawling/io/CrawledDomainWriter.java) ### Parquet Serialization -The parquet serialization is done using the [CrawledDocumentParquetRecordFileReader](src/main/java/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecordFileReader.java) -and [CrawledDocumentParquetRecordFileWriter](src/main/java/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecordFileWriter.java) classes, +The parquet serialization is done using the [CrawledDocumentParquetRecordFileReader](java/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecordFileReader.java) +and [CrawledDocumentParquetRecordFileWriter](java/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecordFileWriter.java) classes, which read and write parquet files respectively. -The model classes are serialized to parquet using the [CrawledDocumentParquetRecord](src/main/java/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecord.java) +The model classes are serialized to parquet using the [CrawledDocumentParquetRecord](java/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecord.java) The record has the following fields: diff --git a/code/process-models/crawling-model/src/test/java/nu/marginalia/crawling/model/CrawledDocumentTest.java b/code/process-models/crawling-model/test/nu/marginalia/crawling/model/CrawledDocumentTest.java similarity index 100% rename from code/process-models/crawling-model/src/test/java/nu/marginalia/crawling/model/CrawledDocumentTest.java rename to code/process-models/crawling-model/test/nu/marginalia/crawling/model/CrawledDocumentTest.java diff --git a/code/process-models/crawling-model/src/test/java/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecordFileWriterTest.java b/code/process-models/crawling-model/test/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecordFileWriterTest.java similarity index 100% rename from code/process-models/crawling-model/src/test/java/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecordFileWriterTest.java rename to code/process-models/crawling-model/test/nu/marginalia/crawling/parquet/CrawledDocumentParquetRecordFileWriterTest.java diff --git a/code/process-models/processed-data/build.gradle b/code/process-models/processed-data/build.gradle index d77ebab5..910cecc6 100644 --- a/code/process-models/processed-data/build.gradle +++ b/code/process-models/processed-data/build.gradle @@ -10,6 +10,8 @@ java { languageVersion.set(JavaLanguageVersion.of(21)) } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation libs.bundles.slf4j diff --git a/code/process-models/processed-data/src/main/java/nu/marginalia/io/processed/DocumentRecordParquetFileReader.java b/code/process-models/processed-data/java/nu/marginalia/io/processed/DocumentRecordParquetFileReader.java similarity index 100% rename from code/process-models/processed-data/src/main/java/nu/marginalia/io/processed/DocumentRecordParquetFileReader.java rename to code/process-models/processed-data/java/nu/marginalia/io/processed/DocumentRecordParquetFileReader.java diff --git a/code/process-models/processed-data/src/main/java/nu/marginalia/io/processed/DocumentRecordParquetFileWriter.java b/code/process-models/processed-data/java/nu/marginalia/io/processed/DocumentRecordParquetFileWriter.java similarity index 100% rename from code/process-models/processed-data/src/main/java/nu/marginalia/io/processed/DocumentRecordParquetFileWriter.java rename to code/process-models/processed-data/java/nu/marginalia/io/processed/DocumentRecordParquetFileWriter.java diff --git a/code/process-models/processed-data/src/main/java/nu/marginalia/io/processed/DomainLinkRecordParquetFileReader.java b/code/process-models/processed-data/java/nu/marginalia/io/processed/DomainLinkRecordParquetFileReader.java similarity index 95% rename from code/process-models/processed-data/src/main/java/nu/marginalia/io/processed/DomainLinkRecordParquetFileReader.java rename to code/process-models/processed-data/java/nu/marginalia/io/processed/DomainLinkRecordParquetFileReader.java index e778169e..efa109cc 100644 --- a/code/process-models/processed-data/src/main/java/nu/marginalia/io/processed/DomainLinkRecordParquetFileReader.java +++ b/code/process-models/processed-data/java/nu/marginalia/io/processed/DomainLinkRecordParquetFileReader.java @@ -3,7 +3,6 @@ package nu.marginalia.io.processed; import blue.strategic.parquet.HydratorSupplier; import blue.strategic.parquet.ParquetReader; import nu.marginalia.model.processed.DomainLinkRecord; -import nu.marginalia.model.processed.DomainRecord; import org.jetbrains.annotations.NotNull; import java.io.IOException; diff --git a/code/process-models/processed-data/src/main/java/nu/marginalia/io/processed/DomainLinkRecordParquetFileWriter.java b/code/process-models/processed-data/java/nu/marginalia/io/processed/DomainLinkRecordParquetFileWriter.java similarity index 100% rename from code/process-models/processed-data/src/main/java/nu/marginalia/io/processed/DomainLinkRecordParquetFileWriter.java rename to code/process-models/processed-data/java/nu/marginalia/io/processed/DomainLinkRecordParquetFileWriter.java diff --git a/code/process-models/processed-data/src/main/java/nu/marginalia/io/processed/DomainRecordParquetFileReader.java b/code/process-models/processed-data/java/nu/marginalia/io/processed/DomainRecordParquetFileReader.java similarity index 100% rename from code/process-models/processed-data/src/main/java/nu/marginalia/io/processed/DomainRecordParquetFileReader.java rename to code/process-models/processed-data/java/nu/marginalia/io/processed/DomainRecordParquetFileReader.java diff --git a/code/process-models/processed-data/src/main/java/nu/marginalia/io/processed/DomainRecordParquetFileWriter.java b/code/process-models/processed-data/java/nu/marginalia/io/processed/DomainRecordParquetFileWriter.java similarity index 100% rename from code/process-models/processed-data/src/main/java/nu/marginalia/io/processed/DomainRecordParquetFileWriter.java rename to code/process-models/processed-data/java/nu/marginalia/io/processed/DomainRecordParquetFileWriter.java diff --git a/code/process-models/processed-data/src/main/java/nu/marginalia/io/processed/ProcessedDataFileNames.java b/code/process-models/processed-data/java/nu/marginalia/io/processed/ProcessedDataFileNames.java similarity index 100% rename from code/process-models/processed-data/src/main/java/nu/marginalia/io/processed/ProcessedDataFileNames.java rename to code/process-models/processed-data/java/nu/marginalia/io/processed/ProcessedDataFileNames.java diff --git a/code/process-models/processed-data/src/main/java/nu/marginalia/model/processed/DocumentRecord.java b/code/process-models/processed-data/java/nu/marginalia/model/processed/DocumentRecord.java similarity index 100% rename from code/process-models/processed-data/src/main/java/nu/marginalia/model/processed/DocumentRecord.java rename to code/process-models/processed-data/java/nu/marginalia/model/processed/DocumentRecord.java diff --git a/code/process-models/processed-data/src/main/java/nu/marginalia/model/processed/DocumentRecordKeywordsProjection.java b/code/process-models/processed-data/java/nu/marginalia/model/processed/DocumentRecordKeywordsProjection.java similarity index 100% rename from code/process-models/processed-data/src/main/java/nu/marginalia/model/processed/DocumentRecordKeywordsProjection.java rename to code/process-models/processed-data/java/nu/marginalia/model/processed/DocumentRecordKeywordsProjection.java diff --git a/code/process-models/processed-data/src/main/java/nu/marginalia/model/processed/DocumentRecordMetadataProjection.java b/code/process-models/processed-data/java/nu/marginalia/model/processed/DocumentRecordMetadataProjection.java similarity index 100% rename from code/process-models/processed-data/src/main/java/nu/marginalia/model/processed/DocumentRecordMetadataProjection.java rename to code/process-models/processed-data/java/nu/marginalia/model/processed/DocumentRecordMetadataProjection.java diff --git a/code/process-models/processed-data/src/main/java/nu/marginalia/model/processed/DomainLinkRecord.java b/code/process-models/processed-data/java/nu/marginalia/model/processed/DomainLinkRecord.java similarity index 100% rename from code/process-models/processed-data/src/main/java/nu/marginalia/model/processed/DomainLinkRecord.java rename to code/process-models/processed-data/java/nu/marginalia/model/processed/DomainLinkRecord.java diff --git a/code/process-models/processed-data/src/main/java/nu/marginalia/model/processed/DomainRecord.java b/code/process-models/processed-data/java/nu/marginalia/model/processed/DomainRecord.java similarity index 100% rename from code/process-models/processed-data/src/main/java/nu/marginalia/model/processed/DomainRecord.java rename to code/process-models/processed-data/java/nu/marginalia/model/processed/DomainRecord.java diff --git a/code/process-models/processed-data/src/main/java/nu/marginalia/model/processed/DomainWithIp.java b/code/process-models/processed-data/java/nu/marginalia/model/processed/DomainWithIp.java similarity index 100% rename from code/process-models/processed-data/src/main/java/nu/marginalia/model/processed/DomainWithIp.java rename to code/process-models/processed-data/java/nu/marginalia/model/processed/DomainWithIp.java diff --git a/code/process-models/processed-data/readme.md b/code/process-models/processed-data/readme.md index 4bc8c857..e7f5cebb 100644 --- a/code/process-models/processed-data/readme.md +++ b/code/process-models/processed-data/readme.md @@ -4,11 +4,11 @@ reading and writing parquet files with the output from the Main models: -* [DocumentRecord](src/main/java/nu/marginalia/model/processed/DocumentRecord.java) -* * [DocumentRecordKeywordsProjection](src/main/java/nu/marginalia/model/processed/DocumentRecordKeywordsProjection.java) -* * [DocumentRecordMetadataProjection](src/main/java/nu/marginalia/model/processed/DocumentRecordMetadataProjection.java) -* [DomainLinkRecord](src/main/java/nu/marginalia/model/processed/DomainLinkRecord.java) -* [DomainRecord](src/main/java/nu/marginalia/model/processed/DomainRecord.java) +* [DocumentRecord](java/nu/marginalia/model/processed/DocumentRecord.java) +* * [DocumentRecordKeywordsProjection](java/nu/marginalia/model/processed/DocumentRecordKeywordsProjection.java) +* * [DocumentRecordMetadataProjection](java/nu/marginalia/model/processed/DocumentRecordMetadataProjection.java) +* [DomainLinkRecord](java/nu/marginalia/model/processed/DomainLinkRecord.java) +* [DomainRecord](java/nu/marginalia/model/processed/DomainRecord.java) Since parquet is a column based format, some of the readable models are projections that only read parts of the input file. diff --git a/code/process-models/processed-data/src/test/java/nu/marginalia/io/processed/DocumentRecordParquetFileReaderTest.java b/code/process-models/processed-data/test/nu/marginalia/io/processed/DocumentRecordParquetFileReaderTest.java similarity index 100% rename from code/process-models/processed-data/src/test/java/nu/marginalia/io/processed/DocumentRecordParquetFileReaderTest.java rename to code/process-models/processed-data/test/nu/marginalia/io/processed/DocumentRecordParquetFileReaderTest.java diff --git a/code/process-models/processed-data/src/test/java/nu/marginalia/io/processed/DomainLinkRecordParquetFileReaderTest.java b/code/process-models/processed-data/test/nu/marginalia/io/processed/DomainLinkRecordParquetFileReaderTest.java similarity index 100% rename from code/process-models/processed-data/src/test/java/nu/marginalia/io/processed/DomainLinkRecordParquetFileReaderTest.java rename to code/process-models/processed-data/test/nu/marginalia/io/processed/DomainLinkRecordParquetFileReaderTest.java diff --git a/code/process-models/processed-data/src/test/java/nu/marginalia/io/processed/DomainRecordParquetFileReaderTest.java b/code/process-models/processed-data/test/nu/marginalia/io/processed/DomainRecordParquetFileReaderTest.java similarity index 100% rename from code/process-models/processed-data/src/test/java/nu/marginalia/io/processed/DomainRecordParquetFileReaderTest.java rename to code/process-models/processed-data/test/nu/marginalia/io/processed/DomainRecordParquetFileReaderTest.java diff --git a/code/process-models/work-log/build.gradle b/code/process-models/work-log/build.gradle index 25fbf22e..fbade272 100644 --- a/code/process-models/work-log/build.gradle +++ b/code/process-models/work-log/build.gradle @@ -10,6 +10,8 @@ java { languageVersion.set(JavaLanguageVersion.of(21)) } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation libs.bundles.slf4j diff --git a/code/process-models/work-log/src/main/java/nu/marginalia/worklog/BatchingWorkLog.java b/code/process-models/work-log/java/nu/marginalia/worklog/BatchingWorkLog.java similarity index 100% rename from code/process-models/work-log/src/main/java/nu/marginalia/worklog/BatchingWorkLog.java rename to code/process-models/work-log/java/nu/marginalia/worklog/BatchingWorkLog.java diff --git a/code/process-models/work-log/src/main/java/nu/marginalia/worklog/BatchingWorkLogImpl.java b/code/process-models/work-log/java/nu/marginalia/worklog/BatchingWorkLogImpl.java similarity index 100% rename from code/process-models/work-log/src/main/java/nu/marginalia/worklog/BatchingWorkLogImpl.java rename to code/process-models/work-log/java/nu/marginalia/worklog/BatchingWorkLogImpl.java diff --git a/code/process-models/work-log/src/main/java/nu/marginalia/worklog/BatchingWorkLogInspector.java b/code/process-models/work-log/java/nu/marginalia/worklog/BatchingWorkLogInspector.java similarity index 100% rename from code/process-models/work-log/src/main/java/nu/marginalia/worklog/BatchingWorkLogInspector.java rename to code/process-models/work-log/java/nu/marginalia/worklog/BatchingWorkLogInspector.java diff --git a/code/process-models/work-log/src/test/java/nu/marginalia/worklog/BatchingWorkLogImplTest.java b/code/process-models/work-log/test/nu/marginalia/worklog/BatchingWorkLogImplTest.java similarity index 100% rename from code/process-models/work-log/src/test/java/nu/marginalia/worklog/BatchingWorkLogImplTest.java rename to code/process-models/work-log/test/nu/marginalia/worklog/BatchingWorkLogImplTest.java diff --git a/code/api/process-mqapi/build.gradle b/code/process-mqapi/build.gradle similarity index 86% rename from code/api/process-mqapi/build.gradle rename to code/process-mqapi/build.gradle index b99fdf75..b1313c62 100644 --- a/code/api/process-mqapi/build.gradle +++ b/code/process-mqapi/build.gradle @@ -11,6 +11,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:config') diff --git a/code/api/process-mqapi/src/main/java/nu/marginalia/mqapi/ProcessInboxNames.java b/code/process-mqapi/java/nu/marginalia/mqapi/ProcessInboxNames.java similarity index 100% rename from code/api/process-mqapi/src/main/java/nu/marginalia/mqapi/ProcessInboxNames.java rename to code/process-mqapi/java/nu/marginalia/mqapi/ProcessInboxNames.java diff --git a/code/api/process-mqapi/src/main/java/nu/marginalia/mqapi/converting/ConvertAction.java b/code/process-mqapi/java/nu/marginalia/mqapi/converting/ConvertAction.java similarity index 100% rename from code/api/process-mqapi/src/main/java/nu/marginalia/mqapi/converting/ConvertAction.java rename to code/process-mqapi/java/nu/marginalia/mqapi/converting/ConvertAction.java diff --git a/code/api/process-mqapi/src/main/java/nu/marginalia/mqapi/converting/ConvertRequest.java b/code/process-mqapi/java/nu/marginalia/mqapi/converting/ConvertRequest.java similarity index 98% rename from code/api/process-mqapi/src/main/java/nu/marginalia/mqapi/converting/ConvertRequest.java rename to code/process-mqapi/java/nu/marginalia/mqapi/converting/ConvertRequest.java index 51d678e3..3064ceca 100644 --- a/code/api/process-mqapi/src/main/java/nu/marginalia/mqapi/converting/ConvertRequest.java +++ b/code/process-mqapi/java/nu/marginalia/mqapi/converting/ConvertRequest.java @@ -1,7 +1,6 @@ package nu.marginalia.mqapi.converting; import lombok.AllArgsConstructor; -import lombok.NonNull; import nu.marginalia.storage.model.FileStorageId; import java.nio.file.Path; diff --git a/code/api/process-mqapi/src/main/java/nu/marginalia/mqapi/crawling/CrawlRequest.java b/code/process-mqapi/java/nu/marginalia/mqapi/crawling/CrawlRequest.java similarity index 100% rename from code/api/process-mqapi/src/main/java/nu/marginalia/mqapi/crawling/CrawlRequest.java rename to code/process-mqapi/java/nu/marginalia/mqapi/crawling/CrawlRequest.java diff --git a/code/api/process-mqapi/src/main/java/nu/marginalia/mqapi/index/CreateIndexRequest.java b/code/process-mqapi/java/nu/marginalia/mqapi/index/CreateIndexRequest.java similarity index 100% rename from code/api/process-mqapi/src/main/java/nu/marginalia/mqapi/index/CreateIndexRequest.java rename to code/process-mqapi/java/nu/marginalia/mqapi/index/CreateIndexRequest.java diff --git a/code/api/process-mqapi/src/main/java/nu/marginalia/mqapi/index/IndexName.java b/code/process-mqapi/java/nu/marginalia/mqapi/index/IndexName.java similarity index 100% rename from code/api/process-mqapi/src/main/java/nu/marginalia/mqapi/index/IndexName.java rename to code/process-mqapi/java/nu/marginalia/mqapi/index/IndexName.java diff --git a/code/api/process-mqapi/src/main/java/nu/marginalia/mqapi/loading/LoadRequest.java b/code/process-mqapi/java/nu/marginalia/mqapi/loading/LoadRequest.java similarity index 100% rename from code/api/process-mqapi/src/main/java/nu/marginalia/mqapi/loading/LoadRequest.java rename to code/process-mqapi/java/nu/marginalia/mqapi/loading/LoadRequest.java diff --git a/code/processes/converting-process/build.gradle b/code/processes/converting-process/build.gradle index 5b1dbde5..8c6b616e 100644 --- a/code/processes/converting-process/build.gradle +++ b/code/processes/converting-process/build.gradle @@ -18,14 +18,16 @@ application { tasks.distZip.enabled = false +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:process') implementation project(':third-party:porterstemmer') implementation project(':third-party:count-min-sketch') - implementation project(':code:api:index-api') - implementation project(':code:api:process-mqapi') + implementation project(':code:index:api') + implementation project(':code:process-mqapi') implementation project(':code:common:model') implementation project(':code:common:db') @@ -34,7 +36,6 @@ dependencies { implementation project(':code:libraries:message-queue') implementation project(':code:libraries:blocking-thread-pool') implementation project(':code:common:service-discovery') - implementation project(':code:common:service-client') implementation project(':code:libraries:guarded-regex') implementation project(':code:libraries:easy-lsh') diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/ConverterMain.java b/code/processes/converting-process/java/nu/marginalia/converting/ConverterMain.java similarity index 99% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/ConverterMain.java rename to code/processes/converting-process/java/nu/marginalia/converting/ConverterMain.java index c72e284a..a570e72d 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/ConverterMain.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/ConverterMain.java @@ -28,7 +28,6 @@ import nu.marginalia.converting.processor.DomainProcessor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import javax.annotation.Nullable; import java.nio.file.Path; import java.sql.SQLException; import java.util.Collection; diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/ConverterModule.java b/code/processes/converting-process/java/nu/marginalia/converting/ConverterModule.java similarity index 95% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/ConverterModule.java rename to code/processes/converting-process/java/nu/marginalia/converting/ConverterModule.java index 903834f4..5f04c86f 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/ConverterModule.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/ConverterModule.java @@ -4,7 +4,6 @@ import com.google.gson.Gson; import com.google.inject.AbstractModule; import com.google.inject.name.Names; import nu.marginalia.LanguageModels; -import nu.marginalia.ProcessConfiguration; import nu.marginalia.WmsaHome; import nu.marginalia.model.gson.GsonFactory; diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/model/DisqualifiedException.java b/code/processes/converting-process/java/nu/marginalia/converting/model/DisqualifiedException.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/model/DisqualifiedException.java rename to code/processes/converting-process/java/nu/marginalia/converting/model/DisqualifiedException.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/model/GeneratorType.java b/code/processes/converting-process/java/nu/marginalia/converting/model/GeneratorType.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/model/GeneratorType.java rename to code/processes/converting-process/java/nu/marginalia/converting/model/GeneratorType.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/model/ProcessedDocument.java b/code/processes/converting-process/java/nu/marginalia/converting/model/ProcessedDocument.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/model/ProcessedDocument.java rename to code/processes/converting-process/java/nu/marginalia/converting/model/ProcessedDocument.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/model/ProcessedDocumentDetails.java b/code/processes/converting-process/java/nu/marginalia/converting/model/ProcessedDocumentDetails.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/model/ProcessedDocumentDetails.java rename to code/processes/converting-process/java/nu/marginalia/converting/model/ProcessedDocumentDetails.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/model/ProcessedDomain.java b/code/processes/converting-process/java/nu/marginalia/converting/model/ProcessedDomain.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/model/ProcessedDomain.java rename to code/processes/converting-process/java/nu/marginalia/converting/model/ProcessedDomain.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/AcceptableAds.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/AcceptableAds.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/AcceptableAds.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/AcceptableAds.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/ConverterDomainTypes.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/ConverterDomainTypes.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/ConverterDomainTypes.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/ConverterDomainTypes.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DocumentClass.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/DocumentClass.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DocumentClass.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/DocumentClass.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DocumentDecorator.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/DocumentDecorator.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DocumentDecorator.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/DocumentDecorator.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DocumentProcessor.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/DocumentProcessor.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DocumentProcessor.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/DocumentProcessor.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DomainProcessor.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/DomainProcessor.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DomainProcessor.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/DomainProcessor.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/MetaRobotsTag.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/MetaRobotsTag.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/MetaRobotsTag.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/MetaRobotsTag.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/SiteWords.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/SiteWords.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/SiteWords.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/SiteWords.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/DocumentGeneratorExtractor.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/logic/DocumentGeneratorExtractor.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/DocumentGeneratorExtractor.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/logic/DocumentGeneratorExtractor.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/DocumentLengthLogic.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/logic/DocumentLengthLogic.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/DocumentLengthLogic.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/logic/DocumentLengthLogic.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/DocumentValuator.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/logic/DocumentValuator.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/DocumentValuator.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/logic/DocumentValuator.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/FeatureExtractor.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/logic/FeatureExtractor.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/FeatureExtractor.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/logic/FeatureExtractor.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/HtmlStandardExtractor.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/logic/HtmlStandardExtractor.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/HtmlStandardExtractor.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/logic/HtmlStandardExtractor.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/LshDocumentDeduplicator.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/logic/LshDocumentDeduplicator.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/LshDocumentDeduplicator.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/logic/LshDocumentDeduplicator.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/PlainTextLogic.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/logic/PlainTextLogic.java similarity index 99% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/PlainTextLogic.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/logic/PlainTextLogic.java index 27c493db..2fc9068f 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/PlainTextLogic.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/logic/PlainTextLogic.java @@ -5,7 +5,6 @@ import org.apache.commons.lang3.StringUtils; import java.util.ArrayList; import java.util.List; -import java.util.regex.Pattern; import java.util.stream.Collectors; public class PlainTextLogic { diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/TitleExtractor.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/logic/TitleExtractor.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/TitleExtractor.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/logic/TitleExtractor.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/dom/DomPruningFilter.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/logic/dom/DomPruningFilter.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/dom/DomPruningFilter.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/logic/dom/DomPruningFilter.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/dom/MeasureLengthVisitor.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/logic/dom/MeasureLengthVisitor.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/dom/MeasureLengthVisitor.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/logic/dom/MeasureLengthVisitor.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/links/CommonKeywordExtractor.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/logic/links/CommonKeywordExtractor.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/links/CommonKeywordExtractor.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/logic/links/CommonKeywordExtractor.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/links/FileLinks.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/logic/links/FileLinks.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/links/FileLinks.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/logic/links/FileLinks.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/links/LinkGraph.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/logic/links/LinkGraph.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/links/LinkGraph.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/logic/links/LinkGraph.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/links/LinkProcessor.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/logic/links/LinkProcessor.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/links/LinkProcessor.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/logic/links/LinkProcessor.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/links/TopKeywords.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/logic/links/TopKeywords.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/logic/links/TopKeywords.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/logic/links/TopKeywords.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/AbstractDocumentProcessorPlugin.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/AbstractDocumentProcessorPlugin.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/AbstractDocumentProcessorPlugin.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/AbstractDocumentProcessorPlugin.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/HtmlDocumentProcessorPlugin.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/HtmlDocumentProcessorPlugin.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/HtmlDocumentProcessorPlugin.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/HtmlDocumentProcessorPlugin.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/PlainTextDocumentProcessorPlugin.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/PlainTextDocumentProcessorPlugin.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/PlainTextDocumentProcessorPlugin.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/PlainTextDocumentProcessorPlugin.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/specialization/BlogSpecialization.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/BlogSpecialization.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/specialization/BlogSpecialization.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/BlogSpecialization.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/specialization/DefaultSpecialization.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/DefaultSpecialization.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/specialization/DefaultSpecialization.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/DefaultSpecialization.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/specialization/HtmlProcessorSpecializations.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/HtmlProcessorSpecializations.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/specialization/HtmlProcessorSpecializations.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/HtmlProcessorSpecializations.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/specialization/JavadocSpecialization.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/JavadocSpecialization.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/specialization/JavadocSpecialization.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/JavadocSpecialization.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/specialization/LemmySpecialization.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/LemmySpecialization.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/specialization/LemmySpecialization.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/LemmySpecialization.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/specialization/MariadbKbSpecialization.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/MariadbKbSpecialization.java similarity index 97% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/specialization/MariadbKbSpecialization.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/MariadbKbSpecialization.java index b26288e9..3aa35973 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/specialization/MariadbKbSpecialization.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/MariadbKbSpecialization.java @@ -5,7 +5,6 @@ import com.google.inject.Singleton; import nu.marginalia.keyword.model.DocumentKeywordsBuilder; import nu.marginalia.model.idx.WordFlags; import nu.marginalia.summary.SummaryExtractor; -import org.apache.commons.lang3.StringUtils; import org.jsoup.nodes.Document; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/specialization/PhpBBSpecialization.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/PhpBBSpecialization.java similarity index 92% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/specialization/PhpBBSpecialization.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/PhpBBSpecialization.java index b92ebcbc..947cc4c0 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/specialization/PhpBBSpecialization.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/PhpBBSpecialization.java @@ -4,12 +4,9 @@ import com.google.inject.Inject; import com.google.inject.Singleton; import nu.marginalia.model.EdgeUrl; import nu.marginalia.summary.SummaryExtractor; -import org.jsoup.nodes.Document; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.Set; - @Singleton public class PhpBBSpecialization extends DefaultSpecialization { private static final Logger logger = LoggerFactory.getLogger(PhpBBSpecialization.class); diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/specialization/WikiSpecialization.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/WikiSpecialization.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/specialization/WikiSpecialization.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/WikiSpecialization.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/specialization/XenForoSpecialization.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/XenForoSpecialization.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/plugin/specialization/XenForoSpecialization.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/XenForoSpecialization.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/SideloadSource.java b/code/processes/converting-process/java/nu/marginalia/converting/sideload/SideloadSource.java similarity index 91% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/SideloadSource.java rename to code/processes/converting-process/java/nu/marginalia/converting/sideload/SideloadSource.java index a74cf456..5b64b798 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/SideloadSource.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/sideload/SideloadSource.java @@ -2,7 +2,6 @@ package nu.marginalia.converting.sideload; import nu.marginalia.converting.model.ProcessedDocument; import nu.marginalia.converting.model.ProcessedDomain; -import nu.marginalia.model.EdgeUrl; import java.util.Iterator; diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/SideloadSourceFactory.java b/code/processes/converting-process/java/nu/marginalia/converting/sideload/SideloadSourceFactory.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/SideloadSourceFactory.java rename to code/processes/converting-process/java/nu/marginalia/converting/sideload/SideloadSourceFactory.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/SideloaderProcessing.java b/code/processes/converting-process/java/nu/marginalia/converting/sideload/SideloaderProcessing.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/SideloaderProcessing.java rename to code/processes/converting-process/java/nu/marginalia/converting/sideload/SideloaderProcessing.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/dirtree/DirtreeSideloadSpec.java b/code/processes/converting-process/java/nu/marginalia/converting/sideload/dirtree/DirtreeSideloadSpec.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/dirtree/DirtreeSideloadSpec.java rename to code/processes/converting-process/java/nu/marginalia/converting/sideload/dirtree/DirtreeSideloadSpec.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/dirtree/DirtreeSideloadSpecList.java b/code/processes/converting-process/java/nu/marginalia/converting/sideload/dirtree/DirtreeSideloadSpecList.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/dirtree/DirtreeSideloadSpecList.java rename to code/processes/converting-process/java/nu/marginalia/converting/sideload/dirtree/DirtreeSideloadSpecList.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/dirtree/DirtreeSideloader.java b/code/processes/converting-process/java/nu/marginalia/converting/sideload/dirtree/DirtreeSideloader.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/dirtree/DirtreeSideloader.java rename to code/processes/converting-process/java/nu/marginalia/converting/sideload/dirtree/DirtreeSideloader.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/dirtree/DirtreeSideloaderFactory.java b/code/processes/converting-process/java/nu/marginalia/converting/sideload/dirtree/DirtreeSideloaderFactory.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/dirtree/DirtreeSideloaderFactory.java rename to code/processes/converting-process/java/nu/marginalia/converting/sideload/dirtree/DirtreeSideloaderFactory.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/encyclopedia/EncyclopediaMarginaliaNuSideloader.java b/code/processes/converting-process/java/nu/marginalia/converting/sideload/encyclopedia/EncyclopediaMarginaliaNuSideloader.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/encyclopedia/EncyclopediaMarginaliaNuSideloader.java rename to code/processes/converting-process/java/nu/marginalia/converting/sideload/encyclopedia/EncyclopediaMarginaliaNuSideloader.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/reddit/RedditSideloader.java b/code/processes/converting-process/java/nu/marginalia/converting/sideload/reddit/RedditSideloader.java similarity index 99% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/reddit/RedditSideloader.java rename to code/processes/converting-process/java/nu/marginalia/converting/sideload/reddit/RedditSideloader.java index 24706317..d564b308 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/reddit/RedditSideloader.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/sideload/reddit/RedditSideloader.java @@ -175,5 +175,5 @@ public class RedditSideloader implements SideloadSource { return doc; - }; + } } diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/stackexchange/StackexchangeSideloader.java b/code/processes/converting-process/java/nu/marginalia/converting/sideload/stackexchange/StackexchangeSideloader.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/stackexchange/StackexchangeSideloader.java rename to code/processes/converting-process/java/nu/marginalia/converting/sideload/stackexchange/StackexchangeSideloader.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/warc/WarcSideloader.java b/code/processes/converting-process/java/nu/marginalia/converting/sideload/warc/WarcSideloader.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/warc/WarcSideloader.java rename to code/processes/converting-process/java/nu/marginalia/converting/sideload/warc/WarcSideloader.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/util/LineUtils.java b/code/processes/converting-process/java/nu/marginalia/converting/util/LineUtils.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/util/LineUtils.java rename to code/processes/converting-process/java/nu/marginalia/converting/util/LineUtils.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/writer/ConverterBatchWritableIf.java b/code/processes/converting-process/java/nu/marginalia/converting/writer/ConverterBatchWritableIf.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/writer/ConverterBatchWritableIf.java rename to code/processes/converting-process/java/nu/marginalia/converting/writer/ConverterBatchWritableIf.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/writer/ConverterBatchWriter.java b/code/processes/converting-process/java/nu/marginalia/converting/writer/ConverterBatchWriter.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/writer/ConverterBatchWriter.java rename to code/processes/converting-process/java/nu/marginalia/converting/writer/ConverterBatchWriter.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/writer/ConverterBatchWriterIf.java b/code/processes/converting-process/java/nu/marginalia/converting/writer/ConverterBatchWriterIf.java similarity index 100% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/writer/ConverterBatchWriterIf.java rename to code/processes/converting-process/java/nu/marginalia/converting/writer/ConverterBatchWriterIf.java diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/writer/ConverterWriter.java b/code/processes/converting-process/java/nu/marginalia/converting/writer/ConverterWriter.java similarity index 98% rename from code/processes/converting-process/src/main/java/nu/marginalia/converting/writer/ConverterWriter.java rename to code/processes/converting-process/java/nu/marginalia/converting/writer/ConverterWriter.java index 6bac2804..bc86e369 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/writer/ConverterWriter.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/writer/ConverterWriter.java @@ -1,7 +1,6 @@ package nu.marginalia.converting.writer; import lombok.SneakyThrows; -import nu.marginalia.converting.model.ProcessedDomain; import nu.marginalia.worklog.BatchingWorkLog; import org.jetbrains.annotations.Nullable; import org.slf4j.Logger; diff --git a/code/processes/converting-process/readme.md b/code/processes/converting-process/readme.md index 3a79c481..936ca7fe 100644 --- a/code/processes/converting-process/readme.md +++ b/code/processes/converting-process/readme.md @@ -38,16 +38,16 @@ https://www.marginalia.nu/log/93_atags/ ## Central Classes -* [ConverterMain](src/main/java/nu/marginalia/converting/ConverterMain.java) orchestrates the conversion process. -* [DocumentProcessor](src/main/java/nu/marginalia/converting/processor/DocumentProcessor.java) converts a single document. -* - [HtmlDocumentProcessorPlugin](src/main/java/nu/marginalia/converting/processor/plugin/HtmlDocumentProcessorPlugin.java) +* [ConverterMain](java/nu/marginalia/converting/ConverterMain.java) orchestrates the conversion process. +* [DocumentProcessor](java/nu/marginalia/converting/processor/DocumentProcessor.java) converts a single document. +* - [HtmlDocumentProcessorPlugin](java/nu/marginalia/converting/processor/plugin/HtmlDocumentProcessorPlugin.java) has HTML-specific logic related to a document, keywords and identifies features such as whether it has javascript. -* * - [HtmlProcessorSpecializations](src/main/java/nu/marginalia/converting/processor/plugin/specialization/HtmlProcessorSpecializations.java) -* * - [XenForoSpecialization](src/main/java/nu/marginalia/converting/processor/plugin/specialization/XenForoSpecialization.java) ... -* - [PlainTextDocumentProcessorPlugin](src/main/java/nu/marginalia/converting/processor/plugin/PlainTextDocumentProcessorPlugin.java) +* * - [HtmlProcessorSpecializations](java/nu/marginalia/converting/processor/plugin/specialization/HtmlProcessorSpecializations.java) +* * - [XenForoSpecialization](java/nu/marginalia/converting/processor/plugin/specialization/XenForoSpecialization.java) ... +* - [PlainTextDocumentProcessorPlugin](java/nu/marginalia/converting/processor/plugin/PlainTextDocumentProcessorPlugin.java) has plain text-specific logic related to a document... -* [DomainProcessor](src/main/java/nu/marginalia/converting/processor/DomainProcessor.java) converts each document and +* [DomainProcessor](java/nu/marginalia/converting/processor/DomainProcessor.java) converts each document and generates domain-wide metadata such as link graphs. ## See Also diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/05-test.gmi b/code/processes/converting-process/test-resources/memex-marginalia/05-test.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/05-test.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/05-test.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/code/index.gmi b/code/processes/converting-process/test-resources/memex-marginalia/code/index.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/code/index.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/code/index.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/commons/dialogue.gmi b/code/processes/converting-process/test-resources/memex-marginalia/commons/dialogue.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/commons/dialogue.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/commons/dialogue.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/commons/search-failure-modes.gmi b/code/processes/converting-process/test-resources/memex-marginalia/commons/search-failure-modes.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/commons/search-failure-modes.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/commons/search-failure-modes.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/commons/self-interest.gmi b/code/processes/converting-process/test-resources/memex-marginalia/commons/self-interest.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/commons/self-interest.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/commons/self-interest.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/dead.gmi b/code/processes/converting-process/test-resources/memex-marginalia/dead.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/dead.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/dead.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/debian-laptop-install-log.gmi b/code/processes/converting-process/test-resources/memex-marginalia/debian-laptop-install-log.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/debian-laptop-install-log.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/debian-laptop-install-log.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/debian-laptop.gmi b/code/processes/converting-process/test-resources/memex-marginalia/debian-laptop.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/debian-laptop.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/debian-laptop.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/feed.gmi b/code/processes/converting-process/test-resources/memex-marginalia/feed.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/feed.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/feed.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/fragments-old-web.gmi b/code/processes/converting-process/test-resources/memex-marginalia/fragments-old-web.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/fragments-old-web.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/fragments-old-web.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/good-video-games.gmi b/code/processes/converting-process/test-resources/memex-marginalia/good-video-games.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/good-video-games.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/good-video-games.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/guestbook/footer.gmi b/code/processes/converting-process/test-resources/memex-marginalia/guestbook/footer.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/guestbook/footer.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/guestbook/footer.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/guestbook/header.gmi b/code/processes/converting-process/test-resources/memex-marginalia/guestbook/header.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/guestbook/header.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/guestbook/header.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/guestbook/index.gmi b/code/processes/converting-process/test-resources/memex-marginalia/guestbook/index.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/guestbook/index.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/guestbook/index.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/index b/code/processes/converting-process/test-resources/memex-marginalia/index similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/index rename to code/processes/converting-process/test-resources/memex-marginalia/index diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/index.gmi b/code/processes/converting-process/test-resources/memex-marginalia/index.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/index.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/index.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/junk/DedupTest.gmi b/code/processes/converting-process/test-resources/memex-marginalia/junk/DedupTest.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/junk/DedupTest.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/junk/DedupTest.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/junk/bssl.gmi b/code/processes/converting-process/test-resources/memex-marginalia/junk/bssl.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/junk/bssl.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/junk/bssl.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/junk/eldritch-oneliner.gmi b/code/processes/converting-process/test-resources/memex-marginalia/junk/eldritch-oneliner.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/junk/eldritch-oneliner.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/junk/eldritch-oneliner.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/junk/server.gmi b/code/processes/converting-process/test-resources/memex-marginalia/junk/server.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/junk/server.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/junk/server.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/junk/very-old-code.cc.gmi b/code/processes/converting-process/test-resources/memex-marginalia/junk/very-old-code.cc.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/junk/very-old-code.cc.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/junk/very-old-code.cc.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/links/aggregators.gmi b/code/processes/converting-process/test-resources/memex-marginalia/links/aggregators.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/links/aggregators.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/links/aggregators.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/links/articles.gmi b/code/processes/converting-process/test-resources/memex-marginalia/links/articles.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/links/articles.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/links/articles.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/links/bookmarks.gmi b/code/processes/converting-process/test-resources/memex-marginalia/links/bookmarks.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/links/bookmarks.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/links/bookmarks.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/links/fragments-old-web.gmi b/code/processes/converting-process/test-resources/memex-marginalia/links/fragments-old-web.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/links/fragments-old-web.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/links/fragments-old-web.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/links/index.gmi b/code/processes/converting-process/test-resources/memex-marginalia/links/index.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/links/index.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/links/index.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/links/linkpocalypse.gmi b/code/processes/converting-process/test-resources/memex-marginalia/links/linkpocalypse.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/links/linkpocalypse.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/links/linkpocalypse.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/00-linkpocalypse.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/00-linkpocalypse.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/00-linkpocalypse.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/00-linkpocalypse.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/01-astrolabe.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/01-astrolabe.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/01-astrolabe.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/01-astrolabe.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/02-re-tests.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/02-re-tests.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/02-re-tests.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/02-re-tests.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/03-writing-for-reading.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/03-writing-for-reading.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/03-writing-for-reading.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/03-writing-for-reading.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/04-link-farms.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/04-link-farms.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/04-link-farms.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/04-link-farms.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/05-minds-field.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/05-minds-field.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/05-minds-field.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/05-minds-field.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/06-optimization.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/06-optimization.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/06-optimization.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/06-optimization.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/07-local-backlinks.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/07-local-backlinks.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/07-local-backlinks.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/07-local-backlinks.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/08-whatever-happened-to-the-memex.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/08-whatever-happened-to-the-memex.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/08-whatever-happened-to-the-memex.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/08-whatever-happened-to-the-memex.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/09-system-upgrade.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/09-system-upgrade.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/09-system-upgrade.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/09-system-upgrade.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/10-astrolabe-2-sampling-bias.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/10-astrolabe-2-sampling-bias.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/10-astrolabe-2-sampling-bias.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/10-astrolabe-2-sampling-bias.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/11-dying-every-day.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/11-dying-every-day.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/11-dying-every-day.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/11-dying-every-day.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/12-bye-bye-gmail.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/12-bye-bye-gmail.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/12-bye-bye-gmail.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/12-bye-bye-gmail.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/13-static-html.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/13-static-html.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/13-static-html.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/13-static-html.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/13-test.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/13-test.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/13-test.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/13-test.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/14-enter-the-circle-of-blame.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/14-enter-the-circle-of-blame.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/14-enter-the-circle-of-blame.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/14-enter-the-circle-of-blame.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/15-stages-of-being.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/15-stages-of-being.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/15-stages-of-being.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/15-stages-of-being.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/16-cursed-motivation.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/16-cursed-motivation.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/16-cursed-motivation.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/16-cursed-motivation.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/17-git-isnt-a-web-service.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/17-git-isnt-a-web-service.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/17-git-isnt-a-web-service.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/17-git-isnt-a-web-service.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/18-soaring-high.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/18-soaring-high.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/18-soaring-high.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/18-soaring-high.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/19-website-discoverability-crisis.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/19-website-discoverability-crisis.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/19-website-discoverability-crisis.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/19-website-discoverability-crisis.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/20-dot-com-link-farms.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/20-dot-com-link-farms.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/20-dot-com-link-farms.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/20-dot-com-link-farms.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/21-new-solutions-old-problems.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/21-new-solutions-old-problems.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/21-new-solutions-old-problems.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/21-new-solutions-old-problems.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/22-against-the-flood.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/22-against-the-flood.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/22-against-the-flood.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/22-against-the-flood.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/23-re-software-and-branding.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/23-re-software-and-branding.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/23-re-software-and-branding.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/23-re-software-and-branding.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/24-silly-hats.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/24-silly-hats.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/24-silly-hats.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/24-silly-hats.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/25-october-update.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/25-october-update.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/25-october-update.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/25-october-update.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/26-personalized-pagerank.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/26-personalized-pagerank.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/26-personalized-pagerank.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/26-personalized-pagerank.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/27-getting-with-the-times.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/27-getting-with-the-times.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/27-getting-with-the-times.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/27-getting-with-the-times.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/28-web-browsing.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/28-web-browsing.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/28-web-browsing.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/28-web-browsing.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/29-botnet-ddos.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/29-botnet-ddos.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/29-botnet-ddos.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/29-botnet-ddos.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/30-unintuitive-optimization.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/30-unintuitive-optimization.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/30-unintuitive-optimization.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/30-unintuitive-optimization.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/31-ngram-needles.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/31-ngram-needles.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/31-ngram-needles.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/31-ngram-needles.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/32-bot-apologetics.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/32-bot-apologetics.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/32-bot-apologetics.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/32-bot-apologetics.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/33-rude-guests.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/33-rude-guests.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/33-rude-guests.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/33-rude-guests.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/34-internet-arguments.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/34-internet-arguments.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/34-internet-arguments.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/34-internet-arguments.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/35-keeping-gemini-difficult.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/35-keeping-gemini-difficult.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/35-keeping-gemini-difficult.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/35-keeping-gemini-difficult.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/36-localized-programming-languages.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/36-localized-programming-languages.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/36-localized-programming-languages.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/36-localized-programming-languages.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/37-keyword-extraction.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/37-keyword-extraction.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/37-keyword-extraction.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/37-keyword-extraction.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/38-old-and-new.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/38-old-and-new.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/38-old-and-new.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/38-old-and-new.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/39-normie-hypothesis.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/39-normie-hypothesis.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/39-normie-hypothesis.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/39-normie-hypothesis.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/40-wasted-resources.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/40-wasted-resources.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/40-wasted-resources.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/40-wasted-resources.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/41-search-result-relevance.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/41-search-result-relevance.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/41-search-result-relevance.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/41-search-result-relevance.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/42-dark.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/42-dark.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/42-dark.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/42-dark.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/43-pseodonymous.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/43-pseodonymous.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/43-pseodonymous.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/43-pseodonymous.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/44-discovery-and-design.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/44-discovery-and-design.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/44-discovery-and-design.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/44-discovery-and-design.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/45-unfuck-internet-discoverability.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/45-unfuck-internet-discoverability.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/45-unfuck-internet-discoverability.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/45-unfuck-internet-discoverability.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/46-anatomy-of-search-engine-spam.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/46-anatomy-of-search-engine-spam.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/46-anatomy-of-search-engine-spam.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/46-anatomy-of-search-engine-spam.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/47-drive-failure.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/47-drive-failure.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/47-drive-failure.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/47-drive-failure.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/48-i-have-no-capslock.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/48-i-have-no-capslock.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/48-i-have-no-capslock.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/48-i-have-no-capslock.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/49-marginalia-1-year.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/49-marginalia-1-year.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/49-marginalia-1-year.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/49-marginalia-1-year.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/50-meditation-on-software-correctness.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/50-meditation-on-software-correctness.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/50-meditation-on-software-correctness.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/50-meditation-on-software-correctness.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/51-the-file-startup.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/51-the-file-startup.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/51-the-file-startup.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/51-the-file-startup.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/52-growing-pains.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/52-growing-pains.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/52-growing-pains.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/52-growing-pains.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/53-better-hard-drive-metaphor.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/53-better-hard-drive-metaphor.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/53-better-hard-drive-metaphor.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/53-better-hard-drive-metaphor.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/54-bargain-bin-btree.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/54-bargain-bin-btree.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/54-bargain-bin-btree.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/54-bargain-bin-btree.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/55-lexicon-rubberduck.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/55-lexicon-rubberduck.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/55-lexicon-rubberduck.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/55-lexicon-rubberduck.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/56-uncertain-future.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/56-uncertain-future.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/56-uncertain-future.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/56-uncertain-future.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/57-dont-know-how-to-build-software.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/57-dont-know-how-to-build-software.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/57-dont-know-how-to-build-software.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/57-dont-know-how-to-build-software.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/58-marginalia-open-source.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/58-marginalia-open-source.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/58-marginalia-open-source.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/58-marginalia-open-source.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/59-anchor-text.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/59-anchor-text.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/59-anchor-text.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/59-anchor-text.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/60-prescriptive-descriptions.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/60-prescriptive-descriptions.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/60-prescriptive-descriptions.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/60-prescriptive-descriptions.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/61-botspam-apocalypse.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/61-botspam-apocalypse.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/61-botspam-apocalypse.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/61-botspam-apocalypse.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/62-marginaliacoin.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/62-marginaliacoin.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/62-marginaliacoin.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/62-marginaliacoin.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/63-marginalia-crawler.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/63-marginalia-crawler.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/63-marginalia-crawler.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/63-marginalia-crawler.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/64-hundred-million.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/64-hundred-million.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/64-hundred-million.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/64-hundred-million.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/65-scaling-doesnt-scale.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/65-scaling-doesnt-scale.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/65-scaling-doesnt-scale.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/65-scaling-doesnt-scale.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/66-carbon-dating.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/66-carbon-dating.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/66-carbon-dating.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/66-carbon-dating.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/67-best-ideas-afk.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/67-best-ideas-afk.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/67-best-ideas-afk.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/67-best-ideas-afk.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/68-wizards-vs-sorcerers.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/68-wizards-vs-sorcerers.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/68-wizards-vs-sorcerers.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/68-wizards-vs-sorcerers.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/69-creepy-website-similarity.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/69-creepy-website-similarity.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/69-creepy-website-similarity.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/69-creepy-website-similarity.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/70-faster-index-joins.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/70-faster-index-joins.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/70-faster-index-joins.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/70-faster-index-joins.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/71-memex-design.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/71-memex-design.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/71-memex-design.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/71-memex-design.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/72-are-you-ok.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/72-are-you-ok.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/72-are-you-ok.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/72-are-you-ok.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/72-new-approach-to-ranking.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/72-new-approach-to-ranking.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/72-new-approach-to-ranking.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/72-new-approach-to-ranking.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/73-new-approach-to-ranking.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/73-new-approach-to-ranking.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/73-new-approach-to-ranking.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/73-new-approach-to-ranking.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/74-marginalia-2-years.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/74-marginalia-2-years.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/74-marginalia-2-years.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/74-marginalia-2-years.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/bargain-bin-btree.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/bargain-bin-btree.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/bargain-bin-btree.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/bargain-bin-btree.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/index.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/index.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/index.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/index.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/soaring-high.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/soaring-high.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/soaring-high.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/soaring-high.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/log/todo.gmi b/code/processes/converting-process/test-resources/memex-marginalia/log/todo.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/log/todo.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/log/todo.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/one-weird-trick.gmi b/code/processes/converting-process/test-resources/memex-marginalia/one-weird-trick.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/one-weird-trick.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/one-weird-trick.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/pics/index.gmi b/code/processes/converting-process/test-resources/memex-marginalia/pics/index.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/pics/index.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/pics/index.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/pics/links/index.gmi b/code/processes/converting-process/test-resources/memex-marginalia/pics/links/index.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/pics/links/index.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/pics/links/index.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/pics/raster-test/index.gmi b/code/processes/converting-process/test-resources/memex-marginalia/pics/raster-test/index.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/pics/raster-test/index.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/pics/raster-test/index.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge.gmi b/code/processes/converting-process/test-resources/memex-marginalia/projects/edge.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/projects/edge.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/about.gmi b/code/processes/converting-process/test-resources/memex-marginalia/projects/edge/about.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/about.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/projects/edge/about.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/api.gmi b/code/processes/converting-process/test-resources/memex-marginalia/projects/edge/api.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/api.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/projects/edge/api.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/changelog.gmi b/code/processes/converting-process/test-resources/memex-marginalia/projects/edge/changelog.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/changelog.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/projects/edge/changelog.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/design-notes.gmi b/code/processes/converting-process/test-resources/memex-marginalia/projects/edge/design-notes.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/design-notes.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/projects/edge/design-notes.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/faq.gmi b/code/processes/converting-process/test-resources/memex-marginalia/projects/edge/faq.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/faq.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/projects/edge/faq.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/for-webmasters.gmi b/code/processes/converting-process/test-resources/memex-marginalia/projects/edge/for-webmasters.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/for-webmasters.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/projects/edge/for-webmasters.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/index.gmi b/code/processes/converting-process/test-resources/memex-marginalia/projects/edge/index.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/index.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/projects/edge/index.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/privacy.gmi b/code/processes/converting-process/test-resources/memex-marginalia/projects/edge/privacy.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/privacy.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/projects/edge/privacy.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/search-tips.gmi b/code/processes/converting-process/test-resources/memex-marginalia/projects/edge/search-tips.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/search-tips.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/projects/edge/search-tips.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/supporting.gmi b/code/processes/converting-process/test-resources/memex-marginalia/projects/edge/supporting.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/supporting.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/projects/edge/supporting.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/top-20.gmi b/code/processes/converting-process/test-resources/memex-marginalia/projects/edge/top-20.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/projects/edge/top-20.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/projects/edge/top-20.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/projects/encyclopedia/index.gmi b/code/processes/converting-process/test-resources/memex-marginalia/projects/encyclopedia/index.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/projects/encyclopedia/index.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/projects/encyclopedia/index.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/projects/gemini-server.gmi b/code/processes/converting-process/test-resources/memex-marginalia/projects/gemini-server.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/projects/gemini-server.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/projects/gemini-server.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/projects/index.gmi b/code/processes/converting-process/test-resources/memex-marginalia/projects/index.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/projects/index.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/projects/index.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/projects/memex.gmi b/code/processes/converting-process/test-resources/memex-marginalia/projects/memex.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/projects/memex.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/projects/memex.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/projects/wmsa.gmi b/code/processes/converting-process/test-resources/memex-marginalia/projects/wmsa.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/projects/wmsa.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/projects/wmsa.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/readme.md b/code/processes/converting-process/test-resources/memex-marginalia/readme.md similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/readme.md rename to code/processes/converting-process/test-resources/memex-marginalia/readme.md diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/recipes/chicken-soup.gmi b/code/processes/converting-process/test-resources/memex-marginalia/recipes/chicken-soup.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/recipes/chicken-soup.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/recipes/chicken-soup.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/recipes/french-borscht.gmi b/code/processes/converting-process/test-resources/memex-marginalia/recipes/french-borscht.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/recipes/french-borscht.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/recipes/french-borscht.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/recipes/index.gmi b/code/processes/converting-process/test-resources/memex-marginalia/recipes/index.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/recipes/index.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/recipes/index.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/recipes/omelette-bacon.gmi b/code/processes/converting-process/test-resources/memex-marginalia/recipes/omelette-bacon.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/recipes/omelette-bacon.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/recipes/omelette-bacon.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/search-about.gmi b/code/processes/converting-process/test-resources/memex-marginalia/search-about.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/search-about.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/search-about.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/server.gmi b/code/processes/converting-process/test-resources/memex-marginalia/server.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/server.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/server.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/special/index.gmi b/code/processes/converting-process/test-resources/memex-marginalia/special/index.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/special/index.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/special/index.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/special/redirect.gmi b/code/processes/converting-process/test-resources/memex-marginalia/special/redirect.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/special/redirect.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/special/redirect.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/special/tombstone.gmi b/code/processes/converting-process/test-resources/memex-marginalia/special/tombstone.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/special/tombstone.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/special/tombstone.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/test.gmi b/code/processes/converting-process/test-resources/memex-marginalia/test.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/test.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/test.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/todo.gmi b/code/processes/converting-process/test-resources/memex-marginalia/todo.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/todo.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/todo.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/todo/done.gmi b/code/processes/converting-process/test-resources/memex-marginalia/todo/done.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/todo/done.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/todo/done.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/todo/index.gmi b/code/processes/converting-process/test-resources/memex-marginalia/todo/index.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/todo/index.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/todo/index.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/todo/todo.gmi b/code/processes/converting-process/test-resources/memex-marginalia/todo/todo.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/todo/todo.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/todo/todo.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/topic/astrolabe.gmi b/code/processes/converting-process/test-resources/memex-marginalia/topic/astrolabe.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/topic/astrolabe.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/topic/astrolabe.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/topic/cooking.gmi b/code/processes/converting-process/test-resources/memex-marginalia/topic/cooking.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/topic/cooking.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/topic/cooking.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/topic/index.gmi b/code/processes/converting-process/test-resources/memex-marginalia/topic/index.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/topic/index.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/topic/index.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/topic/moral-philosophy.gmi b/code/processes/converting-process/test-resources/memex-marginalia/topic/moral-philosophy.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/topic/moral-philosophy.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/topic/moral-philosophy.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/topic/nlnet.gmi b/code/processes/converting-process/test-resources/memex-marginalia/topic/nlnet.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/topic/nlnet.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/topic/nlnet.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/topic/platforms.gmi b/code/processes/converting-process/test-resources/memex-marginalia/topic/platforms.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/topic/platforms.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/topic/platforms.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/topic/programming.gmi b/code/processes/converting-process/test-resources/memex-marginalia/topic/programming.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/topic/programming.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/topic/programming.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/topic/satire.gmi b/code/processes/converting-process/test-resources/memex-marginalia/topic/satire.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/topic/satire.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/topic/satire.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/topic/server.gmi b/code/processes/converting-process/test-resources/memex-marginalia/topic/server.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/topic/server.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/topic/server.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/topic/web-design.gmi b/code/processes/converting-process/test-resources/memex-marginalia/topic/web-design.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/topic/web-design.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/topic/web-design.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/topics.gmi b/code/processes/converting-process/test-resources/memex-marginalia/topics.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/topics.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/topics.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/worklog.gmi b/code/processes/converting-process/test-resources/memex-marginalia/worklog.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/worklog.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/worklog.gmi diff --git a/code/processes/converting-process/src/test/resources/memex-marginalia/writing-topics.gmi b/code/processes/converting-process/test-resources/memex-marginalia/writing-topics.gmi similarity index 100% rename from code/processes/converting-process/src/test/resources/memex-marginalia/writing-topics.gmi rename to code/processes/converting-process/test-resources/memex-marginalia/writing-topics.gmi diff --git a/code/processes/converting-process/src/test/java/nu/marginalia/converting/ConvertingIntegrationTest.java b/code/processes/converting-process/test/nu/marginalia/converting/ConvertingIntegrationTest.java similarity index 100% rename from code/processes/converting-process/src/test/java/nu/marginalia/converting/ConvertingIntegrationTest.java rename to code/processes/converting-process/test/nu/marginalia/converting/ConvertingIntegrationTest.java diff --git a/code/processes/converting-process/src/test/java/nu/marginalia/converting/ConvertingIntegrationTestModule.java b/code/processes/converting-process/test/nu/marginalia/converting/ConvertingIntegrationTestModule.java similarity index 95% rename from code/processes/converting-process/src/test/java/nu/marginalia/converting/ConvertingIntegrationTestModule.java rename to code/processes/converting-process/test/nu/marginalia/converting/ConvertingIntegrationTestModule.java index 3d171a53..83f28882 100644 --- a/code/processes/converting-process/src/test/java/nu/marginalia/converting/ConvertingIntegrationTestModule.java +++ b/code/processes/converting-process/test/nu/marginalia/converting/ConvertingIntegrationTestModule.java @@ -16,7 +16,7 @@ public class ConvertingIntegrationTestModule extends AbstractModule { bind(Integer.class).annotatedWith(Names.named("max-title-length")).toInstance(128); bind(Integer.class).annotatedWith(Names.named("max-summary-length")).toInstance(255); bind(ServiceConfiguration.class).toInstance(new ServiceConfiguration( - null, 1, "localhost", 0, 0, null + null, 1, "localhost", "localhost", 0, null )); bind(ProcessConfiguration.class).toInstance(new ProcessConfiguration( "converting-process", 1, null diff --git a/code/processes/converting-process/src/test/java/nu/marginalia/converting/CrawlingThenConvertingIntegrationTest.java b/code/processes/converting-process/test/nu/marginalia/converting/CrawlingThenConvertingIntegrationTest.java similarity index 100% rename from code/processes/converting-process/src/test/java/nu/marginalia/converting/CrawlingThenConvertingIntegrationTest.java rename to code/processes/converting-process/test/nu/marginalia/converting/CrawlingThenConvertingIntegrationTest.java diff --git a/code/processes/converting-process/src/test/java/nu/marginalia/converting/logic/PlainTextLogicTest.java b/code/processes/converting-process/test/nu/marginalia/converting/logic/PlainTextLogicTest.java similarity index 100% rename from code/processes/converting-process/src/test/java/nu/marginalia/converting/logic/PlainTextLogicTest.java rename to code/processes/converting-process/test/nu/marginalia/converting/logic/PlainTextLogicTest.java diff --git a/code/processes/converting-process/src/test/java/nu/marginalia/converting/processor/logic/MetaRobotsTagTest.java b/code/processes/converting-process/test/nu/marginalia/converting/processor/logic/MetaRobotsTagTest.java similarity index 100% rename from code/processes/converting-process/src/test/java/nu/marginalia/converting/processor/logic/MetaRobotsTagTest.java rename to code/processes/converting-process/test/nu/marginalia/converting/processor/logic/MetaRobotsTagTest.java diff --git a/code/processes/converting-process/src/test/java/nu/marginalia/converting/processor/logic/dom/MeasureLengthVisitorTest.java b/code/processes/converting-process/test/nu/marginalia/converting/processor/logic/dom/MeasureLengthVisitorTest.java similarity index 100% rename from code/processes/converting-process/src/test/java/nu/marginalia/converting/processor/logic/dom/MeasureLengthVisitorTest.java rename to code/processes/converting-process/test/nu/marginalia/converting/processor/logic/dom/MeasureLengthVisitorTest.java diff --git a/code/processes/converting-process/src/test/java/nu/marginalia/converting/processor/plugin/specialization/BlogSpecializationTest.java b/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/BlogSpecializationTest.java similarity index 100% rename from code/processes/converting-process/src/test/java/nu/marginalia/converting/processor/plugin/specialization/BlogSpecializationTest.java rename to code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/BlogSpecializationTest.java diff --git a/code/processes/converting-process/src/test/java/nu/marginalia/converting/processor/plugin/specialization/JavadocSpecializationTest.java b/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/JavadocSpecializationTest.java similarity index 100% rename from code/processes/converting-process/src/test/java/nu/marginalia/converting/processor/plugin/specialization/JavadocSpecializationTest.java rename to code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/JavadocSpecializationTest.java diff --git a/code/processes/converting-process/src/test/java/nu/marginalia/converting/processor/plugin/specialization/LemmySpecializationTest.java b/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/LemmySpecializationTest.java similarity index 100% rename from code/processes/converting-process/src/test/java/nu/marginalia/converting/processor/plugin/specialization/LemmySpecializationTest.java rename to code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/LemmySpecializationTest.java diff --git a/code/processes/converting-process/src/test/java/nu/marginalia/converting/processor/plugin/specialization/WikiSpecializationTest.java b/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/WikiSpecializationTest.java similarity index 94% rename from code/processes/converting-process/src/test/java/nu/marginalia/converting/processor/plugin/specialization/WikiSpecializationTest.java rename to code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/WikiSpecializationTest.java index d48ceeb2..1fc23148 100644 --- a/code/processes/converting-process/src/test/java/nu/marginalia/converting/processor/plugin/specialization/WikiSpecializationTest.java +++ b/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/WikiSpecializationTest.java @@ -1,6 +1,5 @@ package nu.marginalia.converting.processor.plugin.specialization; -import nu.marginalia.converting.processor.logic.DocumentGeneratorExtractor; import nu.marginalia.summary.SummaryExtractor; import nu.marginalia.test.CommonTestData; import org.jsoup.Jsoup; diff --git a/code/processes/converting-process/src/test/java/nu/marginalia/converting/processor/plugin/specialization/XenForoSpecializationTest.java b/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/XenForoSpecializationTest.java similarity index 100% rename from code/processes/converting-process/src/test/java/nu/marginalia/converting/processor/plugin/specialization/XenForoSpecializationTest.java rename to code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/XenForoSpecializationTest.java diff --git a/code/processes/converting-process/src/test/java/nu/marginalia/converting/sideload/encyclopedia/EncyclopediaMarginaliaNuSideloaderTest.java b/code/processes/converting-process/test/nu/marginalia/converting/sideload/encyclopedia/EncyclopediaMarginaliaNuSideloaderTest.java similarity index 100% rename from code/processes/converting-process/src/test/java/nu/marginalia/converting/sideload/encyclopedia/EncyclopediaMarginaliaNuSideloaderTest.java rename to code/processes/converting-process/test/nu/marginalia/converting/sideload/encyclopedia/EncyclopediaMarginaliaNuSideloaderTest.java diff --git a/code/processes/converting-process/src/test/java/nu/marginalia/converting/sideload/reddit/RedditSideloaderTest.java b/code/processes/converting-process/test/nu/marginalia/converting/sideload/reddit/RedditSideloaderTest.java similarity index 100% rename from code/processes/converting-process/src/test/java/nu/marginalia/converting/sideload/reddit/RedditSideloaderTest.java rename to code/processes/converting-process/test/nu/marginalia/converting/sideload/reddit/RedditSideloaderTest.java diff --git a/code/processes/converting-process/src/test/java/nu/marginalia/converting/sideload/warc/WarcSideloaderTest.java b/code/processes/converting-process/test/nu/marginalia/converting/sideload/warc/WarcSideloaderTest.java similarity index 100% rename from code/processes/converting-process/src/test/java/nu/marginalia/converting/sideload/warc/WarcSideloaderTest.java rename to code/processes/converting-process/test/nu/marginalia/converting/sideload/warc/WarcSideloaderTest.java diff --git a/code/processes/converting-process/src/test/java/nu/marginalia/converting/util/LineUtilsTest.java b/code/processes/converting-process/test/nu/marginalia/converting/util/LineUtilsTest.java similarity index 100% rename from code/processes/converting-process/src/test/java/nu/marginalia/converting/util/LineUtilsTest.java rename to code/processes/converting-process/test/nu/marginalia/converting/util/LineUtilsTest.java diff --git a/code/processes/converting-process/src/test/java/nu/marginalia/converting/util/TestLanguageModels.java b/code/processes/converting-process/test/nu/marginalia/converting/util/TestLanguageModels.java similarity index 100% rename from code/processes/converting-process/src/test/java/nu/marginalia/converting/util/TestLanguageModels.java rename to code/processes/converting-process/test/nu/marginalia/converting/util/TestLanguageModels.java diff --git a/code/processes/crawling-process/build.gradle b/code/processes/crawling-process/build.gradle index 2e99b8a7..94a60b11 100644 --- a/code/processes/crawling-process/build.gradle +++ b/code/processes/crawling-process/build.gradle @@ -18,6 +18,8 @@ application { tasks.distZip.enabled = false +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:process') @@ -27,10 +29,9 @@ dependencies { implementation project(':code:common:service') implementation project(':code:libraries:big-string') implementation project(':code:libraries:blocking-thread-pool') - implementation project(':code:api:index-api') - implementation project(':code:api:process-mqapi') + implementation project(':code:index:api') + implementation project(':code:process-mqapi') implementation project(':code:common:service-discovery') - implementation project(':code:common:service-client') implementation project(':code:libraries:message-queue') implementation project(':code:libraries:language-processing') implementation project(':code:libraries:easy-lsh') @@ -55,7 +56,6 @@ dependencies { implementation libs.okhttp3 implementation libs.jsoup implementation libs.opencsv - implementation libs.rxjava implementation libs.fastutil implementation libs.bundles.mariadb diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/AbortMonitor.java b/code/processes/crawling-process/java/nu/marginalia/crawl/AbortMonitor.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/AbortMonitor.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/AbortMonitor.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/CrawlerMain.java b/code/processes/crawling-process/java/nu/marginalia/crawl/CrawlerMain.java similarity index 99% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/CrawlerMain.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/CrawlerMain.java index 580ac3c7..4461a85a 100644 --- a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/CrawlerMain.java +++ b/code/processes/crawling-process/java/nu/marginalia/crawl/CrawlerMain.java @@ -332,7 +332,7 @@ public class CrawlerMain extends ProcessMainClass { if (request.specStorage != null) { var specData = fileStorageService.getStorage(request.specStorage); - var parquetProvider = new ParquetCrawlSpecProvider(CrawlSpecFileNames.resolve(specData));; + var parquetProvider = new ParquetCrawlSpecProvider(CrawlSpecFileNames.resolve(specData)); // Ensure the parquet domains are loaded into the database to avoid // rare data-loss scenarios diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/CrawlerModule.java b/code/processes/crawling-process/java/nu/marginalia/crawl/CrawlerModule.java similarity index 83% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/CrawlerModule.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/CrawlerModule.java index 91251694..1d27f08f 100644 --- a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/CrawlerModule.java +++ b/code/processes/crawling-process/java/nu/marginalia/crawl/CrawlerModule.java @@ -2,15 +2,11 @@ package nu.marginalia.crawl; import com.google.gson.Gson; import com.google.inject.AbstractModule; -import com.google.inject.name.Names; import lombok.SneakyThrows; -import nu.marginalia.ProcessConfiguration; import nu.marginalia.UserAgent; import nu.marginalia.WmsaHome; import nu.marginalia.model.gson.GsonFactory; -import java.util.UUID; - public class CrawlerModule extends AbstractModule { @SneakyThrows public void configure() { diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/Cookies.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/Cookies.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/Cookies.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/Cookies.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawlDataReference.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/CrawlDataReference.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawlDataReference.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/CrawlDataReference.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawlDelayTimer.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/CrawlDelayTimer.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawlDelayTimer.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/CrawlDelayTimer.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawledDocumentFactory.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/CrawledDocumentFactory.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawledDocumentFactory.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/CrawledDocumentFactory.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawlerWarcResynchronizer.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/CrawlerWarcResynchronizer.java similarity index 96% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawlerWarcResynchronizer.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/CrawlerWarcResynchronizer.java index bfbcab14..ab1ce5ef 100644 --- a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawlerWarcResynchronizer.java +++ b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/CrawlerWarcResynchronizer.java @@ -1,7 +1,6 @@ package nu.marginalia.crawl.retreival; import nu.marginalia.crawling.body.DocumentBodyExtractor; -import nu.marginalia.crawling.body.DocumentBodyResult; import nu.marginalia.crawling.body.HttpFetchResult; import nu.marginalia.crawl.retreival.fetcher.warc.WarcRecorder; import nu.marginalia.model.EdgeUrl; @@ -10,8 +9,6 @@ import org.netpreserve.jwarc.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.io.UncheckedIOException; import java.nio.file.Path; /** diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/DomainCrawlFrontier.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/DomainCrawlFrontier.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/DomainCrawlFrontier.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/DomainCrawlFrontier.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/DomainProber.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/DomainProber.java similarity index 97% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/DomainProber.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/DomainProber.java index fcc005a8..57147aec 100644 --- a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/DomainProber.java +++ b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/DomainProber.java @@ -5,7 +5,6 @@ import com.google.inject.Singleton; import nu.marginalia.crawl.retreival.fetcher.FetchResultState; import nu.marginalia.crawl.retreival.fetcher.HttpFetcher; import nu.marginalia.crawling.model.CrawlerDomainStatus; -import nu.marginalia.ip_blocklist.GeoIpBlocklist; import nu.marginalia.ip_blocklist.IpBlockList; import nu.marginalia.model.EdgeDomain; import nu.marginalia.model.EdgeUrl; @@ -57,7 +56,7 @@ public class DomainProber { return new ProbeResultError(CrawlerDomainStatus.ERROR, "Bad status"); } - public sealed interface ProbeResult permits ProbeResultError, ProbeResultRedirect, ProbeResultOk {}; + public sealed interface ProbeResult permits ProbeResultError, ProbeResultRedirect, ProbeResultOk {} /** The probing failed for one reason or another * @param status Machine readable status diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/LinkFilterSelector.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/LinkFilterSelector.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/LinkFilterSelector.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/LinkFilterSelector.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/RateLimitException.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/RateLimitException.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/RateLimitException.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/RateLimitException.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/ContentTags.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/ContentTags.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/ContentTags.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/ContentTags.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/ContentTypeProber.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/ContentTypeProber.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/ContentTypeProber.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/ContentTypeProber.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/FetchResult.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/FetchResult.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/FetchResult.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/FetchResult.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/FetchResultState.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/FetchResultState.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/FetchResultState.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/FetchResultState.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcher.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcher.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcher.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcher.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcherImpl.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcherImpl.java similarity index 99% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcherImpl.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcherImpl.java index 2bc8482b..94494402 100644 --- a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcherImpl.java +++ b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcherImpl.java @@ -1,7 +1,6 @@ package nu.marginalia.crawl.retreival.fetcher; import com.google.inject.Inject; -import com.google.inject.name.Named; import crawlercommons.robots.SimpleRobotRules; import crawlercommons.robots.SimpleRobotRulesParser; import lombok.SneakyThrows; @@ -22,7 +21,6 @@ import okhttp3.ConnectionPool; import okhttp3.Dispatcher; import okhttp3.OkHttpClient; import okhttp3.Request; -import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/SitemapRetriever.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/SitemapRetriever.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/SitemapRetriever.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/SitemapRetriever.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/socket/FastTerminatingSocketFactory.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/socket/FastTerminatingSocketFactory.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/socket/FastTerminatingSocketFactory.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/socket/FastTerminatingSocketFactory.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/socket/IpInterceptingNetworkInterceptor.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/socket/IpInterceptingNetworkInterceptor.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/socket/IpInterceptingNetworkInterceptor.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/socket/IpInterceptingNetworkInterceptor.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/socket/NoSecuritySSL.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/socket/NoSecuritySSL.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/socket/NoSecuritySSL.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/socket/NoSecuritySSL.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcDigestBuilder.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcDigestBuilder.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcDigestBuilder.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcDigestBuilder.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcInputBuffer.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcInputBuffer.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcInputBuffer.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcInputBuffer.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcProtocolReconstructor.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcProtocolReconstructor.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcProtocolReconstructor.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcProtocolReconstructor.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcRecorder.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcRecorder.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcRecorder.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcRecorder.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/revisit/CrawlerRevisitor.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/revisit/CrawlerRevisitor.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/revisit/CrawlerRevisitor.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/revisit/CrawlerRevisitor.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/revisit/DocumentWithReference.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/revisit/DocumentWithReference.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/revisit/DocumentWithReference.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/revisit/DocumentWithReference.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/sitemap/SitemapFetcher.java b/code/processes/crawling-process/java/nu/marginalia/crawl/retreival/sitemap/SitemapFetcher.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/sitemap/SitemapFetcher.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/retreival/sitemap/SitemapFetcher.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/spec/CrawlSpecProvider.java b/code/processes/crawling-process/java/nu/marginalia/crawl/spec/CrawlSpecProvider.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/spec/CrawlSpecProvider.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/spec/CrawlSpecProvider.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/spec/DbCrawlSpecProvider.java b/code/processes/crawling-process/java/nu/marginalia/crawl/spec/DbCrawlSpecProvider.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/spec/DbCrawlSpecProvider.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/spec/DbCrawlSpecProvider.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/spec/ParquetCrawlSpecProvider.java b/code/processes/crawling-process/java/nu/marginalia/crawl/spec/ParquetCrawlSpecProvider.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/spec/ParquetCrawlSpecProvider.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/spec/ParquetCrawlSpecProvider.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/warc/WarcArchiverFactory.java b/code/processes/crawling-process/java/nu/marginalia/crawl/warc/WarcArchiverFactory.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/warc/WarcArchiverFactory.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/warc/WarcArchiverFactory.java diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/warc/WarcArchiverIf.java b/code/processes/crawling-process/java/nu/marginalia/crawl/warc/WarcArchiverIf.java similarity index 100% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/warc/WarcArchiverIf.java rename to code/processes/crawling-process/java/nu/marginalia/crawl/warc/WarcArchiverIf.java diff --git a/code/processes/crawling-process/readme.md b/code/processes/crawling-process/readme.md index a595bf1d..0f72cb87 100644 --- a/code/processes/crawling-process/readme.md +++ b/code/processes/crawling-process/readme.md @@ -31,10 +31,10 @@ On top of organic links, the crawler can use sitemaps and rss-feeds to discover ## Central Classes -* [CrawlerMain](src/main/java/nu/marginalia/crawl/CrawlerMain.java) orchestrates the crawling. -* [CrawlerRetreiver](src/main/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java) +* [CrawlerMain](java/nu/marginalia/crawl/CrawlerMain.java) orchestrates the crawling. +* [CrawlerRetreiver](java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java) visits known addresses from a domain and downloads each document. -* [HttpFetcher](src/main/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcherImpl.java) +* [HttpFetcher](java/nu/marginalia/crawl/retreival/fetcher/HttpFetcherImpl.java) fetches URLs. ## See Also diff --git a/code/processes/crawling-process/src/main/resources/ip-banned-cidr.txt b/code/processes/crawling-process/resources/ip-banned-cidr.txt similarity index 100% rename from code/processes/crawling-process/src/main/resources/ip-banned-cidr.txt rename to code/processes/crawling-process/resources/ip-banned-cidr.txt diff --git a/code/processes/crawling-process/src/test/java/nu/marginalia/crawl/retreival/CrawlerWarcResynchronizerTest.java b/code/processes/crawling-process/test/nu/marginalia/crawl/retreival/CrawlerWarcResynchronizerTest.java similarity index 98% rename from code/processes/crawling-process/src/test/java/nu/marginalia/crawl/retreival/CrawlerWarcResynchronizerTest.java rename to code/processes/crawling-process/test/nu/marginalia/crawl/retreival/CrawlerWarcResynchronizerTest.java index ae3d9be4..e3720941 100644 --- a/code/processes/crawling-process/src/test/java/nu/marginalia/crawl/retreival/CrawlerWarcResynchronizerTest.java +++ b/code/processes/crawling-process/test/nu/marginalia/crawl/retreival/CrawlerWarcResynchronizerTest.java @@ -15,7 +15,6 @@ import org.netpreserve.jwarc.WarcResponse; import java.io.IOException; import java.net.URISyntaxException; -import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.security.NoSuchAlgorithmException; diff --git a/code/processes/crawling-process/src/test/java/nu/marginalia/crawl/retreival/DomainCrawlFrontierTest.java b/code/processes/crawling-process/test/nu/marginalia/crawl/retreival/DomainCrawlFrontierTest.java similarity index 100% rename from code/processes/crawling-process/src/test/java/nu/marginalia/crawl/retreival/DomainCrawlFrontierTest.java rename to code/processes/crawling-process/test/nu/marginalia/crawl/retreival/DomainCrawlFrontierTest.java diff --git a/code/processes/crawling-process/src/test/java/nu/marginalia/crawl/retreival/fetcher/ContentTypeProberTest.java b/code/processes/crawling-process/test/nu/marginalia/crawl/retreival/fetcher/ContentTypeProberTest.java similarity index 100% rename from code/processes/crawling-process/src/test/java/nu/marginalia/crawl/retreival/fetcher/ContentTypeProberTest.java rename to code/processes/crawling-process/test/nu/marginalia/crawl/retreival/fetcher/ContentTypeProberTest.java diff --git a/code/processes/crawling-process/src/test/java/nu/marginalia/crawl/retreival/fetcher/CrawledDocumentParquetRecordFileWriterTest.java b/code/processes/crawling-process/test/nu/marginalia/crawl/retreival/fetcher/CrawledDocumentParquetRecordFileWriterTest.java similarity index 100% rename from code/processes/crawling-process/src/test/java/nu/marginalia/crawl/retreival/fetcher/CrawledDocumentParquetRecordFileWriterTest.java rename to code/processes/crawling-process/test/nu/marginalia/crawl/retreival/fetcher/CrawledDocumentParquetRecordFileWriterTest.java diff --git a/code/processes/crawling-process/src/test/java/nu/marginalia/crawl/retreival/fetcher/WarcRecorderTest.java b/code/processes/crawling-process/test/nu/marginalia/crawl/retreival/fetcher/WarcRecorderTest.java similarity index 100% rename from code/processes/crawling-process/src/test/java/nu/marginalia/crawl/retreival/fetcher/WarcRecorderTest.java rename to code/processes/crawling-process/test/nu/marginalia/crawl/retreival/fetcher/WarcRecorderTest.java diff --git a/code/processes/crawling-process/src/test/java/nu/marginalia/crawl/retreival/revisit/DocumentWithReferenceTest.java b/code/processes/crawling-process/test/nu/marginalia/crawl/retreival/revisit/DocumentWithReferenceTest.java similarity index 100% rename from code/processes/crawling-process/src/test/java/nu/marginalia/crawl/retreival/revisit/DocumentWithReferenceTest.java rename to code/processes/crawling-process/test/nu/marginalia/crawl/retreival/revisit/DocumentWithReferenceTest.java diff --git a/code/processes/crawling-process/src/test/java/nu/marginalia/crawling/CrawlPlanLoaderTest.java b/code/processes/crawling-process/test/nu/marginalia/crawling/CrawlPlanLoaderTest.java similarity index 100% rename from code/processes/crawling-process/src/test/java/nu/marginalia/crawling/CrawlPlanLoaderTest.java rename to code/processes/crawling-process/test/nu/marginalia/crawling/CrawlPlanLoaderTest.java diff --git a/code/processes/crawling-process/src/test/java/nu/marginalia/crawling/DomainCrawlerRobotsTxtTest.java b/code/processes/crawling-process/test/nu/marginalia/crawling/DomainCrawlerRobotsTxtTest.java similarity index 100% rename from code/processes/crawling-process/src/test/java/nu/marginalia/crawling/DomainCrawlerRobotsTxtTest.java rename to code/processes/crawling-process/test/nu/marginalia/crawling/DomainCrawlerRobotsTxtTest.java diff --git a/code/processes/crawling-process/src/test/java/nu/marginalia/crawling/HttpFetcherTest.java b/code/processes/crawling-process/test/nu/marginalia/crawling/HttpFetcherTest.java similarity index 100% rename from code/processes/crawling-process/src/test/java/nu/marginalia/crawling/HttpFetcherTest.java rename to code/processes/crawling-process/test/nu/marginalia/crawling/HttpFetcherTest.java diff --git a/code/processes/crawling-process/src/test/java/nu/marginalia/crawling/LinkParserTest.java b/code/processes/crawling-process/test/nu/marginalia/crawling/LinkParserTest.java similarity index 100% rename from code/processes/crawling-process/src/test/java/nu/marginalia/crawling/LinkParserTest.java rename to code/processes/crawling-process/test/nu/marginalia/crawling/LinkParserTest.java diff --git a/code/processes/crawling-process/src/test/java/nu/marginalia/crawling/RssCrawlerTest.java b/code/processes/crawling-process/test/nu/marginalia/crawling/RssCrawlerTest.java similarity index 100% rename from code/processes/crawling-process/src/test/java/nu/marginalia/crawling/RssCrawlerTest.java rename to code/processes/crawling-process/test/nu/marginalia/crawling/RssCrawlerTest.java diff --git a/code/processes/crawling-process/src/test/java/nu/marginalia/crawling/retreival/CrawlerMockFetcherTest.java b/code/processes/crawling-process/test/nu/marginalia/crawling/retreival/CrawlerMockFetcherTest.java similarity index 100% rename from code/processes/crawling-process/src/test/java/nu/marginalia/crawling/retreival/CrawlerMockFetcherTest.java rename to code/processes/crawling-process/test/nu/marginalia/crawling/retreival/CrawlerMockFetcherTest.java diff --git a/code/processes/crawling-process/src/test/java/nu/marginalia/crawling/retreival/CrawlerRetreiverTest.java b/code/processes/crawling-process/test/nu/marginalia/crawling/retreival/CrawlerRetreiverTest.java similarity index 100% rename from code/processes/crawling-process/src/test/java/nu/marginalia/crawling/retreival/CrawlerRetreiverTest.java rename to code/processes/crawling-process/test/nu/marginalia/crawling/retreival/CrawlerRetreiverTest.java diff --git a/code/processes/index-constructor-process/build.gradle b/code/processes/index-constructor-process/build.gradle index e92db1b6..5e48deea 100644 --- a/code/processes/index-constructor-process/build.gradle +++ b/code/processes/index-constructor-process/build.gradle @@ -18,8 +18,10 @@ application { tasks.distZip.enabled = false +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { - implementation project(':code:api:process-mqapi') + implementation project(':code:process-mqapi') implementation project(':code:common:process') implementation project(':code:common:service') implementation project(':code:common:db') @@ -27,10 +29,10 @@ dependencies { implementation project(':code:common:model') implementation project(':code:libraries:message-queue') - implementation project(':code:features-index:index-forward') - implementation project(':code:features-index:index-reverse') - implementation project(':code:features-index:index-journal') - implementation project(':code:features-index:domain-ranking') + implementation project(':code:index:query') + implementation project(':code:index:index-forward') + implementation project(':code:index:index-reverse') + implementation project(':code:index:index-journal') implementation libs.bundles.slf4j implementation libs.guice diff --git a/code/processes/index-constructor-process/src/main/java/nu/marginalia/index/IndexConstructorMain.java b/code/processes/index-constructor-process/java/nu/marginalia/index/IndexConstructorMain.java similarity index 99% rename from code/processes/index-constructor-process/src/main/java/nu/marginalia/index/IndexConstructorMain.java rename to code/processes/index-constructor-process/java/nu/marginalia/index/IndexConstructorMain.java index 779e2573..47d3fba2 100644 --- a/code/processes/index-constructor-process/src/main/java/nu/marginalia/index/IndexConstructorMain.java +++ b/code/processes/index-constructor-process/java/nu/marginalia/index/IndexConstructorMain.java @@ -6,6 +6,7 @@ import com.google.inject.Inject; import nu.marginalia.IndexLocations; import nu.marginalia.ProcessConfiguration; import nu.marginalia.ProcessConfigurationModule; +import nu.marginalia.index.domainrankings.DomainRankings; import nu.marginalia.service.ProcessMainClass; import nu.marginalia.storage.FileStorageService; import nu.marginalia.index.construction.ReverseIndexConstructor; @@ -23,7 +24,6 @@ import nu.marginalia.mq.inbox.MqSingleShotInbox; import nu.marginalia.mqapi.index.CreateIndexRequest; import nu.marginalia.mqapi.index.IndexName; import nu.marginalia.process.control.ProcessHeartbeatImpl; -import nu.marginalia.ranking.DomainRankings; import nu.marginalia.service.module.DatabaseModule; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -76,6 +76,7 @@ public class IndexConstructorMain extends ProcessMainClass { // Grace period so we don't rug pull the logger or jdbc TimeUnit.SECONDS.sleep(5); + System.exit(0); } diff --git a/code/processes/index-constructor-process/src/main/java/nu/marginalia/index/IndexConstructorModule.java b/code/processes/index-constructor-process/java/nu/marginalia/index/IndexConstructorModule.java similarity index 91% rename from code/processes/index-constructor-process/src/main/java/nu/marginalia/index/IndexConstructorModule.java rename to code/processes/index-constructor-process/java/nu/marginalia/index/IndexConstructorModule.java index 025e90eb..0fa3ac6d 100644 --- a/code/processes/index-constructor-process/src/main/java/nu/marginalia/index/IndexConstructorModule.java +++ b/code/processes/index-constructor-process/java/nu/marginalia/index/IndexConstructorModule.java @@ -4,7 +4,7 @@ import com.google.inject.AbstractModule; import com.google.inject.Provides; import com.google.inject.Singleton; import nu.marginalia.IndexLocations; -import nu.marginalia.ranking.DomainRankings; +import nu.marginalia.index.domainrankings.DomainRankings; import nu.marginalia.storage.FileStorageService; public class IndexConstructorModule extends AbstractModule { diff --git a/code/processes/index-constructor-process/readme.md b/code/processes/index-constructor-process/readme.md index 9457551b..6e7a46ee 100644 --- a/code/processes/index-constructor-process/readme.md +++ b/code/processes/index-constructor-process/readme.md @@ -10,11 +10,11 @@ There are three types of indexes: This is a very light-weight module that delegates the actual work to the modules: -* [features-index/index-reverse](../../features-index/index-reverse) -* [features-index/index-forward](../../features-index/index-forward) +* [features-index/index-reverse](../../index/index-reverse) +* [features-index/index-forward](../../index/index-forward) Their respective readme files contain more information about the indexes themselves and how they are constructed. -The process is glued together within [IndexConstructorMain](src/main/java/nu/marginalia/index/IndexConstructorMain.java), +The process is glued together within [IndexConstructorMain](java/nu/marginalia/index/IndexConstructorMain.java), which is the only class of interest in this module. diff --git a/code/processes/loading-process/build.gradle b/code/processes/loading-process/build.gradle index 116b1d59..4a2afc68 100644 --- a/code/processes/loading-process/build.gradle +++ b/code/processes/loading-process/build.gradle @@ -17,18 +17,19 @@ application { tasks.distZip.enabled = false +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:process') - implementation project(':code:api:process-mqapi') - implementation project(':code:api:index-api') + implementation project(':code:process-mqapi') + implementation project(':code:index:api') implementation project(':code:common:model') implementation project(':code:common:db') implementation project(':code:common:config') implementation project(':code:common:service') implementation project(':code:common:service-discovery') - implementation project(':code:common:service-client') implementation project(':code:common:linkdb') - implementation project(':code:features-index:index-journal') + implementation project(':code:index:index-journal') implementation project(':code:libraries:message-queue') implementation project(':code:libraries:language-processing') implementation project(':third-party:commons-codec') @@ -39,6 +40,8 @@ dependencies { implementation project(':code:process-models:work-log') implementation project(':code:features-convert:keyword-extraction') + implementation project(':code:functions:link-graph:partition') + implementation libs.bundles.slf4j implementation libs.guice diff --git a/code/processes/loading-process/src/main/java/nu/marginalia/loading/LoaderIndexJournalWriter.java b/code/processes/loading-process/java/nu/marginalia/loading/LoaderIndexJournalWriter.java similarity index 100% rename from code/processes/loading-process/src/main/java/nu/marginalia/loading/LoaderIndexJournalWriter.java rename to code/processes/loading-process/java/nu/marginalia/loading/LoaderIndexJournalWriter.java diff --git a/code/processes/loading-process/src/main/java/nu/marginalia/loading/LoaderInputData.java b/code/processes/loading-process/java/nu/marginalia/loading/LoaderInputData.java similarity index 100% rename from code/processes/loading-process/src/main/java/nu/marginalia/loading/LoaderInputData.java rename to code/processes/loading-process/java/nu/marginalia/loading/LoaderInputData.java diff --git a/code/processes/loading-process/src/main/java/nu/marginalia/loading/LoaderMain.java b/code/processes/loading-process/java/nu/marginalia/loading/LoaderMain.java similarity index 100% rename from code/processes/loading-process/src/main/java/nu/marginalia/loading/LoaderMain.java rename to code/processes/loading-process/java/nu/marginalia/loading/LoaderMain.java diff --git a/code/processes/loading-process/src/main/java/nu/marginalia/loading/LoaderModule.java b/code/processes/loading-process/java/nu/marginalia/loading/LoaderModule.java similarity index 81% rename from code/processes/loading-process/src/main/java/nu/marginalia/loading/LoaderModule.java rename to code/processes/loading-process/java/nu/marginalia/loading/LoaderModule.java index 35c98fc6..8b4719e6 100644 --- a/code/processes/loading-process/src/main/java/nu/marginalia/loading/LoaderModule.java +++ b/code/processes/loading-process/java/nu/marginalia/loading/LoaderModule.java @@ -9,12 +9,10 @@ import com.google.inject.name.Names; import nu.marginalia.LanguageModels; import nu.marginalia.WmsaHome; import nu.marginalia.IndexLocations; -import nu.marginalia.linkdb.dlinks.DomainLinkDbWriter; +import nu.marginalia.linkgraph.io.DomainLinksWriter; import nu.marginalia.storage.FileStorageService; import nu.marginalia.linkdb.docs.DocumentDbWriter; import nu.marginalia.model.gson.GsonFactory; -import nu.marginalia.service.SearchServiceDescriptors; -import nu.marginalia.service.descriptor.ServiceDescriptors; import java.io.IOException; import java.nio.file.Files; @@ -30,8 +28,6 @@ public class LoaderModule extends AbstractModule { } public void configure() { - bind(ServiceDescriptors.class).toInstance(SearchServiceDescriptors.descriptors); - bind(Gson.class).toProvider(this::createGson); bind(Path.class).annotatedWith(Names.named("local-index-path")).toInstance(Path.of(System.getProperty("local-index-path", "/vol"))); bind(LanguageModels.class).toInstance(WmsaHome.getLanguageModels()); @@ -49,7 +45,7 @@ public class LoaderModule extends AbstractModule { } @Inject @Provides @Singleton - private DomainLinkDbWriter createDomainLinkdbWriter(FileStorageService service) throws SQLException, IOException { + private DomainLinksWriter createDomainLinkdbWriter(FileStorageService service) throws SQLException, IOException { Path dbPath = IndexLocations.getLinkdbWritePath(service).resolve(DOMAIN_LINKS_FILE_NAME); @@ -57,7 +53,7 @@ public class LoaderModule extends AbstractModule { Files.delete(dbPath); } - return new DomainLinkDbWriter(dbPath); + return new DomainLinksWriter(dbPath); } private Gson createGson() { diff --git a/code/processes/loading-process/src/main/java/nu/marginalia/loading/documents/DocumentLoaderService.java b/code/processes/loading-process/java/nu/marginalia/loading/documents/DocumentLoaderService.java similarity index 100% rename from code/processes/loading-process/src/main/java/nu/marginalia/loading/documents/DocumentLoaderService.java rename to code/processes/loading-process/java/nu/marginalia/loading/documents/DocumentLoaderService.java diff --git a/code/processes/loading-process/src/main/java/nu/marginalia/loading/documents/KeywordLoaderService.java b/code/processes/loading-process/java/nu/marginalia/loading/documents/KeywordLoaderService.java similarity index 100% rename from code/processes/loading-process/src/main/java/nu/marginalia/loading/documents/KeywordLoaderService.java rename to code/processes/loading-process/java/nu/marginalia/loading/documents/KeywordLoaderService.java diff --git a/code/processes/loading-process/src/main/java/nu/marginalia/loading/domains/DomainIdRegistry.java b/code/processes/loading-process/java/nu/marginalia/loading/domains/DomainIdRegistry.java similarity index 100% rename from code/processes/loading-process/src/main/java/nu/marginalia/loading/domains/DomainIdRegistry.java rename to code/processes/loading-process/java/nu/marginalia/loading/domains/DomainIdRegistry.java diff --git a/code/processes/loading-process/src/main/java/nu/marginalia/loading/domains/DomainLoaderService.java b/code/processes/loading-process/java/nu/marginalia/loading/domains/DomainLoaderService.java similarity index 100% rename from code/processes/loading-process/src/main/java/nu/marginalia/loading/domains/DomainLoaderService.java rename to code/processes/loading-process/java/nu/marginalia/loading/domains/DomainLoaderService.java diff --git a/code/processes/loading-process/src/main/java/nu/marginalia/loading/links/DomainLinksLoaderService.java b/code/processes/loading-process/java/nu/marginalia/loading/links/DomainLinksLoaderService.java similarity index 92% rename from code/processes/loading-process/src/main/java/nu/marginalia/loading/links/DomainLinksLoaderService.java rename to code/processes/loading-process/java/nu/marginalia/loading/links/DomainLinksLoaderService.java index 8cf42218..06bf4c95 100644 --- a/code/processes/loading-process/src/main/java/nu/marginalia/loading/links/DomainLinksLoaderService.java +++ b/code/processes/loading-process/java/nu/marginalia/loading/links/DomainLinksLoaderService.java @@ -4,7 +4,7 @@ import com.google.inject.Inject; import com.google.inject.Singleton; import lombok.SneakyThrows; import nu.marginalia.io.processed.DomainLinkRecordParquetFileReader; -import nu.marginalia.linkdb.dlinks.DomainLinkDbWriter; +import nu.marginalia.linkgraph.io.DomainLinksWriter; import nu.marginalia.loading.LoaderInputData; import nu.marginalia.loading.domains.DomainIdRegistry; import nu.marginalia.model.processed.DomainLinkRecord; @@ -20,10 +20,10 @@ public class DomainLinksLoaderService { private static final Logger logger = LoggerFactory.getLogger(DomainLinksLoaderService.class); - private final DomainLinkDbWriter domainLinkDbWriter; + private final DomainLinksWriter domainLinkDbWriter; @Inject - public DomainLinksLoaderService(DomainLinkDbWriter domainLinkDbWriter) { + public DomainLinksLoaderService(DomainLinksWriter domainLinkDbWriter) { this.domainLinkDbWriter = domainLinkDbWriter; } diff --git a/code/processes/loading-process/readme.md b/code/processes/loading-process/readme.md index ec0c12fd..4a5cf735 100644 --- a/code/processes/loading-process/readme.md +++ b/code/processes/loading-process/readme.md @@ -6,4 +6,4 @@ the index-service. ## Central Classes -* [LoaderMain](src/main/java/nu/marginalia/loading/LoaderMain.java) main class. \ No newline at end of file +* [LoaderMain](java/nu/marginalia/loading/LoaderMain.java) main class. \ No newline at end of file diff --git a/code/processes/loading-process/src/test/java/nu/marginalia/loader/DbTestUtil.java b/code/processes/loading-process/test/nu/marginalia/loader/DbTestUtil.java similarity index 100% rename from code/processes/loading-process/src/test/java/nu/marginalia/loader/DbTestUtil.java rename to code/processes/loading-process/test/nu/marginalia/loader/DbTestUtil.java diff --git a/code/processes/loading-process/src/test/java/nu/marginalia/loading/domains/DomainLoaderServiceTest.java b/code/processes/loading-process/test/nu/marginalia/loading/domains/DomainLoaderServiceTest.java similarity index 96% rename from code/processes/loading-process/src/test/java/nu/marginalia/loading/domains/DomainLoaderServiceTest.java rename to code/processes/loading-process/test/nu/marginalia/loading/domains/DomainLoaderServiceTest.java index a751ecdf..fda0e9b6 100644 --- a/code/processes/loading-process/src/test/java/nu/marginalia/loading/domains/DomainLoaderServiceTest.java +++ b/code/processes/loading-process/test/nu/marginalia/loading/domains/DomainLoaderServiceTest.java @@ -11,11 +11,8 @@ import nu.marginalia.model.processed.DomainLinkRecord; import nu.marginalia.model.processed.DomainRecord; import nu.marginalia.process.control.ProcessAdHocTaskHeartbeat; import nu.marginalia.process.control.ProcessHeartbeat; -import nu.marginalia.test.TestMigrationLoader; import org.junit.jupiter.api.*; import org.mockito.Mockito; -import org.testcontainers.containers.MariaDBContainer; -import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; import java.io.IOException; diff --git a/code/processes/loading-process/src/test/java/nu/marginalia/loading/loader/LoaderIndexJournalWriterTest.java b/code/processes/loading-process/test/nu/marginalia/loading/loader/LoaderIndexJournalWriterTest.java similarity index 100% rename from code/processes/loading-process/src/test/java/nu/marginalia/loading/loader/LoaderIndexJournalWriterTest.java rename to code/processes/loading-process/test/nu/marginalia/loading/loader/LoaderIndexJournalWriterTest.java diff --git a/code/processes/readme.md b/code/processes/readme.md index acfe5a39..3bdc0970 100644 --- a/code/processes/readme.md +++ b/code/processes/readme.md @@ -17,7 +17,7 @@ described in [processed-data](../process-models/processed-data/). The [loading-process](loading-process/) reads the processed data. -It has creates an [index journal](../features-index/index-journal), +It has creates an [index journal](../index/index-journal), a [link database](../common/linkdb), and loads domains and domain-links into the [MariaDB database](../common/db). diff --git a/code/processes/test-data/build.gradle b/code/processes/test-data/build.gradle index 462be8c3..4f184c80 100644 --- a/code/processes/test-data/build.gradle +++ b/code/processes/test-data/build.gradle @@ -10,5 +10,7 @@ java { } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { } diff --git a/code/processes/test-data/src/main/java/nu/marginalia/test/CommonTestData.java b/code/processes/test-data/java/nu/marginalia/test/CommonTestData.java similarity index 100% rename from code/processes/test-data/src/main/java/nu/marginalia/test/CommonTestData.java rename to code/processes/test-data/java/nu/marginalia/test/CommonTestData.java diff --git a/code/processes/test-data/src/main/resources/mock-crawl-data/discourse/grid.html b/code/processes/test-data/resources/mock-crawl-data/discourse/grid.html similarity index 100% rename from code/processes/test-data/src/main/resources/mock-crawl-data/discourse/grid.html rename to code/processes/test-data/resources/mock-crawl-data/discourse/grid.html diff --git a/code/processes/test-data/src/main/resources/mock-crawl-data/discourse/index.html b/code/processes/test-data/resources/mock-crawl-data/discourse/index.html similarity index 100% rename from code/processes/test-data/src/main/resources/mock-crawl-data/discourse/index.html rename to code/processes/test-data/resources/mock-crawl-data/discourse/index.html diff --git a/code/processes/test-data/src/main/resources/mock-crawl-data/discourse/telegram.html b/code/processes/test-data/resources/mock-crawl-data/discourse/telegram.html similarity index 100% rename from code/processes/test-data/src/main/resources/mock-crawl-data/discourse/telegram.html rename to code/processes/test-data/resources/mock-crawl-data/discourse/telegram.html diff --git a/code/processes/test-data/src/main/resources/mock-crawl-data/javadoc/stream.html b/code/processes/test-data/resources/mock-crawl-data/javadoc/stream.html similarity index 100% rename from code/processes/test-data/src/main/resources/mock-crawl-data/javadoc/stream.html rename to code/processes/test-data/resources/mock-crawl-data/javadoc/stream.html diff --git a/code/processes/test-data/src/main/resources/mock-crawl-data/lemmy/108995.html b/code/processes/test-data/resources/mock-crawl-data/lemmy/108995.html similarity index 100% rename from code/processes/test-data/src/main/resources/mock-crawl-data/lemmy/108995.html rename to code/processes/test-data/resources/mock-crawl-data/lemmy/108995.html diff --git a/code/processes/test-data/src/main/resources/mock-crawl-data/lemmy/c_startrek.html b/code/processes/test-data/resources/mock-crawl-data/lemmy/c_startrek.html similarity index 100% rename from code/processes/test-data/src/main/resources/mock-crawl-data/lemmy/c_startrek.html rename to code/processes/test-data/resources/mock-crawl-data/lemmy/c_startrek.html diff --git a/code/processes/test-data/src/main/resources/mock-crawl-data/lemmy/index.html b/code/processes/test-data/resources/mock-crawl-data/lemmy/index.html similarity index 100% rename from code/processes/test-data/src/main/resources/mock-crawl-data/lemmy/index.html rename to code/processes/test-data/resources/mock-crawl-data/lemmy/index.html diff --git a/code/processes/test-data/src/main/resources/mock-crawl-data/mediawiki/doom1.html b/code/processes/test-data/resources/mock-crawl-data/mediawiki/doom1.html similarity index 100% rename from code/processes/test-data/src/main/resources/mock-crawl-data/mediawiki/doom1.html rename to code/processes/test-data/resources/mock-crawl-data/mediawiki/doom1.html diff --git a/code/processes/test-data/src/main/resources/mock-crawl-data/mediawiki/doom2.html b/code/processes/test-data/resources/mock-crawl-data/mediawiki/doom2.html similarity index 100% rename from code/processes/test-data/src/main/resources/mock-crawl-data/mediawiki/doom2.html rename to code/processes/test-data/resources/mock-crawl-data/mediawiki/doom2.html diff --git a/code/processes/test-data/src/main/resources/mock-crawl-data/mediawiki/index.html b/code/processes/test-data/resources/mock-crawl-data/mediawiki/index.html similarity index 100% rename from code/processes/test-data/src/main/resources/mock-crawl-data/mediawiki/index.html rename to code/processes/test-data/resources/mock-crawl-data/mediawiki/index.html diff --git a/code/processes/test-data/src/main/resources/mock-crawl-data/xenforo/thread.html b/code/processes/test-data/resources/mock-crawl-data/xenforo/thread.html similarity index 100% rename from code/processes/test-data/src/main/resources/mock-crawl-data/xenforo/thread.html rename to code/processes/test-data/resources/mock-crawl-data/xenforo/thread.html diff --git a/code/processes/website-adjacencies-calculator/build.gradle b/code/processes/website-adjacencies-calculator/build.gradle index 113e784a..6019d1dd 100644 --- a/code/processes/website-adjacencies-calculator/build.gradle +++ b/code/processes/website-adjacencies-calculator/build.gradle @@ -17,13 +17,15 @@ application { tasks.distZip.enabled = false +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:model') implementation project(':code:common:db') implementation project(':code:common:process') - implementation project(':code:common:service-client') + implementation project(':code:common:service-discovery') implementation project(':code:common:service') - implementation project(':code:api:query-api') + implementation project(':code:functions:link-graph:api') implementation libs.bundles.slf4j diff --git a/code/processes/website-adjacencies-calculator/src/main/java/nu/marginalia/adjacencies/AdjacenciesData.java b/code/processes/website-adjacencies-calculator/java/nu/marginalia/adjacencies/AdjacenciesData.java similarity index 95% rename from code/processes/website-adjacencies-calculator/src/main/java/nu/marginalia/adjacencies/AdjacenciesData.java rename to code/processes/website-adjacencies-calculator/java/nu/marginalia/adjacencies/AdjacenciesData.java index 61c2ceee..f52a3161 100644 --- a/code/processes/website-adjacencies-calculator/src/main/java/nu/marginalia/adjacencies/AdjacenciesData.java +++ b/code/processes/website-adjacencies-calculator/java/nu/marginalia/adjacencies/AdjacenciesData.java @@ -4,7 +4,7 @@ import gnu.trove.list.TIntList; import gnu.trove.list.array.TIntArrayList; import gnu.trove.map.hash.TIntObjectHashMap; import gnu.trove.set.hash.TIntHashSet; -import nu.marginalia.query.client.QueryClient; +import nu.marginalia.api.linkgraph.AggregateLinkGraphClient; import org.roaringbitmap.RoaringBitmap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,14 +35,15 @@ public class AdjacenciesData { return ret; } - public AdjacenciesData(QueryClient queryClient, + public AdjacenciesData(AggregateLinkGraphClient linksClient, DomainAliases aliases) { logger.info("Loading adjacency data"); Map tmpMapDtoS = new HashMap<>(100_000); int count = 0; - var allLinks = queryClient.getAllDomainLinks(); + var allLinks = linksClient.getAllDomainLinks(); + for (var iter = allLinks.iterator();;count++) { if (!iter.advance()) { break; diff --git a/code/processes/website-adjacencies-calculator/src/main/java/nu/marginalia/adjacencies/AdjacenciesLoader.java b/code/processes/website-adjacencies-calculator/java/nu/marginalia/adjacencies/AdjacenciesLoader.java similarity index 100% rename from code/processes/website-adjacencies-calculator/src/main/java/nu/marginalia/adjacencies/AdjacenciesLoader.java rename to code/processes/website-adjacencies-calculator/java/nu/marginalia/adjacencies/AdjacenciesLoader.java diff --git a/code/processes/website-adjacencies-calculator/src/main/java/nu/marginalia/adjacencies/DomainAliases.java b/code/processes/website-adjacencies-calculator/java/nu/marginalia/adjacencies/DomainAliases.java similarity index 100% rename from code/processes/website-adjacencies-calculator/src/main/java/nu/marginalia/adjacencies/DomainAliases.java rename to code/processes/website-adjacencies-calculator/java/nu/marginalia/adjacencies/DomainAliases.java diff --git a/code/processes/website-adjacencies-calculator/src/main/java/nu/marginalia/adjacencies/SparseBitVector.java b/code/processes/website-adjacencies-calculator/java/nu/marginalia/adjacencies/SparseBitVector.java similarity index 100% rename from code/processes/website-adjacencies-calculator/src/main/java/nu/marginalia/adjacencies/SparseBitVector.java rename to code/processes/website-adjacencies-calculator/java/nu/marginalia/adjacencies/SparseBitVector.java diff --git a/code/processes/website-adjacencies-calculator/src/main/java/nu/marginalia/adjacencies/WebsiteAdjacenciesCalculator.java b/code/processes/website-adjacencies-calculator/java/nu/marginalia/adjacencies/WebsiteAdjacenciesCalculator.java similarity index 74% rename from code/processes/website-adjacencies-calculator/src/main/java/nu/marginalia/adjacencies/WebsiteAdjacenciesCalculator.java rename to code/processes/website-adjacencies-calculator/java/nu/marginalia/adjacencies/WebsiteAdjacenciesCalculator.java index df87d03b..92aae06a 100644 --- a/code/processes/website-adjacencies-calculator/src/main/java/nu/marginalia/adjacencies/WebsiteAdjacenciesCalculator.java +++ b/code/processes/website-adjacencies-calculator/java/nu/marginalia/adjacencies/WebsiteAdjacenciesCalculator.java @@ -1,40 +1,42 @@ package nu.marginalia.adjacencies; +import com.google.inject.Guice; import com.zaxxer.hikari.HikariDataSource; import lombok.SneakyThrows; import nu.marginalia.ProcessConfiguration; +import nu.marginalia.api.linkgraph.AggregateLinkGraphClient; import nu.marginalia.db.DbDomainQueries; import nu.marginalia.model.EdgeDomain; import nu.marginalia.process.control.ProcessHeartbeat; import nu.marginalia.process.control.ProcessHeartbeatImpl; -import nu.marginalia.query.client.QueryClient; -import nu.marginalia.service.MainClass; +import nu.marginalia.service.ProcessMainClass; +import nu.marginalia.service.ServiceDiscoveryModule; import nu.marginalia.service.module.DatabaseModule; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.sql.SQLException; +import java.time.Duration; import java.util.*; import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Consumer; import java.util.stream.IntStream; import static nu.marginalia.adjacencies.SparseBitVector.*; -public class WebsiteAdjacenciesCalculator extends MainClass { +public class WebsiteAdjacenciesCalculator extends ProcessMainClass { private final HikariDataSource dataSource; public AdjacenciesData adjacenciesData; public DomainAliases domainAliases; private static final Logger logger = LoggerFactory.getLogger(WebsiteAdjacenciesCalculator.class); float[] weights; - public WebsiteAdjacenciesCalculator(QueryClient queryClient, HikariDataSource dataSource) throws SQLException { + public WebsiteAdjacenciesCalculator(AggregateLinkGraphClient domainLinksClient, HikariDataSource dataSource) throws SQLException { this.dataSource = dataSource; domainAliases = new DomainAliases(dataSource); - adjacenciesData = new AdjacenciesData(queryClient, domainAliases); + adjacenciesData = new AdjacenciesData(domainLinksClient, domainAliases); weights = adjacenciesData.getWeights(); } @@ -81,48 +83,6 @@ public class WebsiteAdjacenciesCalculator extends MainClass { loader.stop(); } - private static class ProgressPrinter { - - private final AtomicInteger progress; - private final int total; - volatile boolean running = false; - private Thread printerThread; - - private ProgressPrinter(int total) { - this.total = total; - this.progress = new AtomicInteger(0); - } - - public void advance() { - progress.incrementAndGet(); - } - - private void run() { - while (running) { - double value = 100 * progress.get() / (double) total; - System.out.printf("\u001b[2K\r%3.2f%%", value); - try { - TimeUnit.MILLISECONDS.sleep(100); - } catch (InterruptedException e) { - return; - } - } - } - public void start() { - running = true; - printerThread = new Thread(this::run); - printerThread.setDaemon(true); - printerThread.start(); - } - - public void stop() throws InterruptedException { - running = false; - printerThread.join(); - System.out.println(); - } - } - - public void findAdjacent(int domainId, Consumer andThen) { findAdjacentDtoS(domainId, andThen); } @@ -138,8 +98,9 @@ public class WebsiteAdjacenciesCalculator extends MainClass { return weightedProduct(weights, a, b) / Math.sqrt(a.mulAndSum(weights) * b.mulAndSum(weights)); } - public record DomainSimilarities(int domainId, List similarities) {}; - public record DomainSimilarity(int domainId, double value) {}; + public record DomainSimilarities(int domainId, List similarities) {} + + public record DomainSimilarity(int domainId, double value) {} @SneakyThrows private void findAdjacentDtoS(int domainId, Consumer andThen) { @@ -186,13 +147,20 @@ public class WebsiteAdjacenciesCalculator extends MainClass { } - public static void main(String[] args) throws SQLException { - DatabaseModule dm = new DatabaseModule(false); + public static void main(String[] args) throws SQLException, InterruptedException { + var injector = Guice.createInjector( + new DatabaseModule(false), + new ServiceDiscoveryModule()); - var dataSource = dm.provideConnection(); - var qc = new QueryClient(); - var main = new WebsiteAdjacenciesCalculator(qc, dataSource); + var dataSource = injector.getInstance(HikariDataSource.class); + var lc = injector.getInstance(AggregateLinkGraphClient.class); + + if (!lc.waitReady(Duration.ofSeconds(30))) { + throw new IllegalStateException("Failed to connect to domain-links"); + } + + var main = new WebsiteAdjacenciesCalculator(lc, dataSource); if (args.length == 1 && "load".equals(args[0])) { var processHeartbeat = new ProcessHeartbeatImpl( @@ -228,13 +196,6 @@ public class WebsiteAdjacenciesCalculator extends MainClass { } } -// -// if (args.length == 0) { -// main.loadAll(); -// } -// else { -// main.tryDomains(args); -// } } } diff --git a/code/processes/website-adjacencies-calculator/src/test/java/nu/marginalia/adjacencies/AdjacenciesLoaderTest.java b/code/processes/website-adjacencies-calculator/test/nu/marginalia/adjacencies/AdjacenciesLoaderTest.java similarity index 98% rename from code/processes/website-adjacencies-calculator/src/test/java/nu/marginalia/adjacencies/AdjacenciesLoaderTest.java rename to code/processes/website-adjacencies-calculator/test/nu/marginalia/adjacencies/AdjacenciesLoaderTest.java index 34fa618e..4ce29080 100644 --- a/code/processes/website-adjacencies-calculator/src/test/java/nu/marginalia/adjacencies/AdjacenciesLoaderTest.java +++ b/code/processes/website-adjacencies-calculator/test/nu/marginalia/adjacencies/AdjacenciesLoaderTest.java @@ -70,7 +70,7 @@ public class AdjacenciesLoaderTest { } try (var conn = dataSource.getConnection(); - var stmt = conn.prepareStatement("SELECT * FROM EC_DOMAIN_NEIGHBORS_2 WHERE DOMAIN_ID=1"); + var stmt = conn.prepareStatement("SELECT * FROM EC_DOMAIN_NEIGHBORS_2 WHERE DOMAIN_ID=1") ) { var rs = stmt.executeQuery(); Assertions.assertTrue(rs.next()); diff --git a/code/processes/website-adjacencies-calculator/src/test/java/nu/marginalia/adjacencies/SparseBitVectorTest.java b/code/processes/website-adjacencies-calculator/test/nu/marginalia/adjacencies/SparseBitVectorTest.java similarity index 100% rename from code/processes/website-adjacencies-calculator/src/test/java/nu/marginalia/adjacencies/SparseBitVectorTest.java rename to code/processes/website-adjacencies-calculator/test/nu/marginalia/adjacencies/SparseBitVectorTest.java diff --git a/code/readme.md b/code/readme.md index 1792ec5f..d75912b2 100644 --- a/code/readme.md +++ b/code/readme.md @@ -23,18 +23,38 @@ eligible index services. The control service is responsible for distributing co service, and for monitoring the health of the system. It also offers a web interface for operating the system. ### Services + * [core services](services-core/) Most of these services are stateful, memory hungry, and doing heavy lifting. * * [control](services-core/control-service) * * [query](services-core/query-service) +* * * Exposes the [functions/link-graph](functions/link-graph) subsystem +* * * Exposes the [functions/search-query](functions/search-query) subsystem * * [index](services-core/index-service) +* * * Exposes the [index](index) subsystem +* * * Exposes the [functions/link-graph](functions/link-graph) subsystem * * [executor](services-core/executor-service) +* * * Exposes the [execution](execution) subsystem * * [assistant](services-core/assistant-service) +* * * Exposes the [functions/math](functions/math) subsystem +* * * Exposes the [functions/domain-info](functions/domain-info) subsystem * [application services](services-application/) Mostly stateless gateways providing access to the core services. -* * [api](services-application/api-service) - public API +* * [api](services-application/api-service) - public API gateway * * [search](services-application/search-service) - marginalia search application -* * [dating](services-application/dating-service) - [https://explore.marginalia.nu/](https://explore.marginalia.nu/) -* * [explorer](services-application/explorer-service) - [https://explore2.marginalia.nu/](https://explore2.marginalia.nu/) -* an [internal API](api/) +* * [dating](services-application/dating-service) - [https://explore.marginalia.nu/](https://explore.marginalia.nu/) +* * [explorer](services-application/explorer-service) - [https://explore2.marginalia.nu/](https://explore2.marginalia.nu/) + +The system uses a service registry to find the services. The service registry is based on zookeeper, +and is a separate service. The registry doesn't keep track of processes, but APIs. This means that +the system is flexible to reconfiguration. The same code can in principle be run as a micro-service +mesh or as a monolith. + +This is an unusual architecture, but it has the benefit that you don't need to think too much about +the layout of the system. You can just request an API and talk to it. Because of this, several of the +services have almost no code of their own. They merely import a library and expose it as a service. + +These skeleton services are marked with (S). + +Services that expose HTTP endpoints tend to have more code. They are marked with (G). ### Processes @@ -55,7 +75,6 @@ but isolated. * [features-search](features-search) * [features-crawl](features-crawl) * [features-convert](features-convert) -* [features-index](features-index) ### Libraries and primitives diff --git a/code/services-application/api-service/build.gradle b/code/services-application/api-service/build.gradle index 53fb91a3..ff27b83e 100644 --- a/code/services-application/api-service/build.gradle +++ b/code/services-application/api-service/build.gradle @@ -2,8 +2,8 @@ plugins { id 'java' id 'application' - id 'com.palantir.docker' version '0.35.0' id 'jvm-test-suite' + id 'com.google.cloud.tools.jib' version '3.4.0' } java { @@ -19,7 +19,8 @@ application { tasks.distZip.enabled = false -apply from: "$rootProject.projectDir/docker-service.gradle" +apply from: "$rootProject.projectDir/srcsets.gradle" +apply from: "$rootProject.projectDir/docker.gradle" dependencies { implementation project(':code:common:db') @@ -27,17 +28,14 @@ dependencies { implementation project(':code:common:service') implementation project(':code:common:config') implementation project(':code:common:service-discovery') - implementation project(':code:common:service-client') - implementation project(':code:api:query-api') - implementation project(':code:api:index-api') - implementation project(':code:features-index:index-query') + implementation project(':code:functions:search-query:api') + implementation project(':code:index:query') implementation libs.bundles.slf4j implementation libs.prometheus implementation libs.notnull implementation libs.guice - implementation libs.rxjava implementation libs.spark implementation libs.opencsv implementation libs.trove diff --git a/code/services-application/api-service/src/main/java/nu/marginalia/api/ApiMain.java b/code/services-application/api-service/java/nu/marginalia/api/ApiMain.java similarity index 82% rename from code/services-application/api-service/src/main/java/nu/marginalia/api/ApiMain.java rename to code/services-application/api-service/java/nu/marginalia/api/ApiMain.java index aea51959..cca846d4 100644 --- a/code/services-application/api-service/src/main/java/nu/marginalia/api/ApiMain.java +++ b/code/services-application/api-service/java/nu/marginalia/api/ApiMain.java @@ -4,7 +4,7 @@ import com.google.inject.Guice; import com.google.inject.Inject; import com.google.inject.Injector; import nu.marginalia.service.MainClass; -import nu.marginalia.service.SearchServiceDescriptors; +import nu.marginalia.service.ServiceDiscoveryModule; import nu.marginalia.service.id.ServiceId; import nu.marginalia.service.module.ServiceConfigurationModule; import nu.marginalia.service.module.DatabaseModule; @@ -21,7 +21,8 @@ public class ApiMain extends MainClass { Injector injector = Guice.createInjector( new DatabaseModule(false), - new ServiceConfigurationModule(SearchServiceDescriptors.descriptors, ServiceId.Api)); + new ServiceDiscoveryModule(), + new ServiceConfigurationModule(ServiceId.Api)); injector.getInstance(ApiMain.class); injector.getInstance(Initialization.class).setReady(); } diff --git a/code/services-application/api-service/src/main/java/nu/marginalia/api/ApiSearchOperator.java b/code/services-application/api-service/java/nu/marginalia/api/ApiSearchOperator.java similarity index 86% rename from code/services-application/api-service/src/main/java/nu/marginalia/api/ApiSearchOperator.java rename to code/services-application/api-service/java/nu/marginalia/api/ApiSearchOperator.java index a1180f4c..25ba4945 100644 --- a/code/services-application/api-service/src/main/java/nu/marginalia/api/ApiSearchOperator.java +++ b/code/services-application/api-service/java/nu/marginalia/api/ApiSearchOperator.java @@ -5,14 +5,12 @@ import com.google.inject.Singleton; import nu.marginalia.api.model.ApiSearchResult; import nu.marginalia.api.model.ApiSearchResultQueryDetails; import nu.marginalia.api.model.ApiSearchResults; -import nu.marginalia.client.Context; -import nu.marginalia.index.client.model.query.SearchSetIdentifier; -import nu.marginalia.index.client.model.results.DecoratedSearchResultItem; -import nu.marginalia.index.client.model.results.SearchResultKeywordScore; +import nu.marginalia.api.searchquery.QueryClient; +import nu.marginalia.api.searchquery.model.query.QueryParams; +import nu.marginalia.api.searchquery.model.query.SearchSetIdentifier; +import nu.marginalia.api.searchquery.model.results.*; import nu.marginalia.index.query.limit.QueryLimits; import nu.marginalia.model.idx.WordMetadata; -import nu.marginalia.query.client.QueryClient; -import nu.marginalia.query.model.QueryParams; import java.util.ArrayList; import java.util.Comparator; @@ -29,12 +27,11 @@ public class ApiSearchOperator { this.queryClient = queryClient; } - public ApiSearchResults query(Context context, - String query, + public ApiSearchResults query(String query, int count, int index) { - var rsp = queryClient.search(context, createParams(query, count, index)); + var rsp = queryClient.search(createParams(query, count, index)); return new ApiSearchResults("RESTRICTED", query, rsp.results() diff --git a/code/services-application/api-service/src/main/java/nu/marginalia/api/ApiService.java b/code/services-application/api-service/java/nu/marginalia/api/ApiService.java similarity index 96% rename from code/services-application/api-service/src/main/java/nu/marginalia/api/ApiService.java rename to code/services-application/api-service/java/nu/marginalia/api/ApiService.java index 35d9a7a7..85a19473 100644 --- a/code/services-application/api-service/src/main/java/nu/marginalia/api/ApiService.java +++ b/code/services-application/api-service/java/nu/marginalia/api/ApiService.java @@ -9,9 +9,7 @@ import nu.marginalia.api.model.ApiSearchResults; import nu.marginalia.api.svc.LicenseService; import nu.marginalia.api.svc.RateLimiterService; import nu.marginalia.api.svc.ResponseCache; -import nu.marginalia.client.Context; import nu.marginalia.model.gson.GsonFactory; -import nu.marginalia.query.client.QueryClient; import nu.marginalia.service.server.*; import nu.marginalia.service.server.mq.MqRequest; import org.slf4j.Logger; @@ -126,7 +124,7 @@ public class ApiService extends Service { .labels(license.key) .time(() -> searchOperator - .query(Context.fromRequest(request), query, count, index) + .query(query, count, index) .withLicense(license.getLicense()) ); } diff --git a/code/services-application/api-service/src/main/java/nu/marginalia/api/model/ApiLicense.java b/code/services-application/api-service/java/nu/marginalia/api/model/ApiLicense.java similarity index 100% rename from code/services-application/api-service/src/main/java/nu/marginalia/api/model/ApiLicense.java rename to code/services-application/api-service/java/nu/marginalia/api/model/ApiLicense.java diff --git a/code/services-application/api-service/src/main/java/nu/marginalia/api/model/ApiSearchResult.java b/code/services-application/api-service/java/nu/marginalia/api/model/ApiSearchResult.java similarity index 100% rename from code/services-application/api-service/src/main/java/nu/marginalia/api/model/ApiSearchResult.java rename to code/services-application/api-service/java/nu/marginalia/api/model/ApiSearchResult.java diff --git a/code/services-application/api-service/src/main/java/nu/marginalia/api/model/ApiSearchResultQueryDetails.java b/code/services-application/api-service/java/nu/marginalia/api/model/ApiSearchResultQueryDetails.java similarity index 100% rename from code/services-application/api-service/src/main/java/nu/marginalia/api/model/ApiSearchResultQueryDetails.java rename to code/services-application/api-service/java/nu/marginalia/api/model/ApiSearchResultQueryDetails.java diff --git a/code/services-application/api-service/src/main/java/nu/marginalia/api/model/ApiSearchResults.java b/code/services-application/api-service/java/nu/marginalia/api/model/ApiSearchResults.java similarity index 100% rename from code/services-application/api-service/src/main/java/nu/marginalia/api/model/ApiSearchResults.java rename to code/services-application/api-service/java/nu/marginalia/api/model/ApiSearchResults.java diff --git a/code/services-application/api-service/src/main/java/nu/marginalia/api/svc/LicenseService.java b/code/services-application/api-service/java/nu/marginalia/api/svc/LicenseService.java similarity index 100% rename from code/services-application/api-service/src/main/java/nu/marginalia/api/svc/LicenseService.java rename to code/services-application/api-service/java/nu/marginalia/api/svc/LicenseService.java diff --git a/code/services-application/api-service/src/main/java/nu/marginalia/api/svc/RateLimiterService.java b/code/services-application/api-service/java/nu/marginalia/api/svc/RateLimiterService.java similarity index 100% rename from code/services-application/api-service/src/main/java/nu/marginalia/api/svc/RateLimiterService.java rename to code/services-application/api-service/java/nu/marginalia/api/svc/RateLimiterService.java diff --git a/code/services-application/api-service/src/main/java/nu/marginalia/api/svc/ResponseCache.java b/code/services-application/api-service/java/nu/marginalia/api/svc/ResponseCache.java similarity index 100% rename from code/services-application/api-service/src/main/java/nu/marginalia/api/svc/ResponseCache.java rename to code/services-application/api-service/java/nu/marginalia/api/svc/ResponseCache.java diff --git a/code/services-application/api-service/readme.md b/code/services-application/api-service/readme.md index 33b36b08..8e48c9bb 100644 --- a/code/services-application/api-service/readme.md +++ b/code/services-application/api-service/readme.md @@ -4,4 +4,4 @@ The API service acts as a gateway for public API requests, it deals with API key ## Central Classes -* [ApiService](src/main/java/nu/marginalia/api/ApiService.java) handles REST requests and delegates to the appropriate handling classes. \ No newline at end of file +* [ApiService](java/nu/marginalia/api/ApiService.java) handles REST requests and delegates to the appropriate handling classes. \ No newline at end of file diff --git a/code/services-application/api-service/src/test/java/nu/marginalia/api/svc/LicenseServiceTest.java b/code/services-application/api-service/test/nu/marginalia/api/svc/LicenseServiceTest.java similarity index 100% rename from code/services-application/api-service/src/test/java/nu/marginalia/api/svc/LicenseServiceTest.java rename to code/services-application/api-service/test/nu/marginalia/api/svc/LicenseServiceTest.java diff --git a/code/services-application/api-service/src/test/java/nu/marginalia/api/svc/RateLimiterServiceTest.java b/code/services-application/api-service/test/nu/marginalia/api/svc/RateLimiterServiceTest.java similarity index 100% rename from code/services-application/api-service/src/test/java/nu/marginalia/api/svc/RateLimiterServiceTest.java rename to code/services-application/api-service/test/nu/marginalia/api/svc/RateLimiterServiceTest.java diff --git a/code/services-application/api-service/src/test/java/nu/marginalia/api/svc/ResponseCacheTest.java b/code/services-application/api-service/test/nu/marginalia/api/svc/ResponseCacheTest.java similarity index 100% rename from code/services-application/api-service/src/test/java/nu/marginalia/api/svc/ResponseCacheTest.java rename to code/services-application/api-service/test/nu/marginalia/api/svc/ResponseCacheTest.java diff --git a/code/services-application/dating-service/build.gradle b/code/services-application/dating-service/build.gradle index 6ad45833..72a3057a 100644 --- a/code/services-application/dating-service/build.gradle +++ b/code/services-application/dating-service/build.gradle @@ -2,8 +2,8 @@ plugins { id 'java' id 'application' - id 'com.palantir.docker' version '0.35.0' id 'jvm-test-suite' + id 'com.google.cloud.tools.jib' version '3.4.0' } application { @@ -13,19 +13,21 @@ application { tasks.distZip.enabled = false -apply from: "$rootProject.projectDir/docker-service.gradle" +apply from: "$rootProject.projectDir/docker.gradle" + java { toolchain { languageVersion.set(JavaLanguageVersion.of(21)) } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:db') implementation project(':code:common:model') implementation project(':code:common:service') implementation project(':code:common:service-discovery') - implementation project(':code:common:service-client') implementation project(':code:common:renderer') implementation project(':code:features-search:screenshots') implementation project(':code:features-search:random-websites') @@ -36,7 +38,6 @@ dependencies { implementation libs.prometheus implementation libs.notnull implementation libs.guice - implementation libs.rxjava implementation libs.spark implementation libs.opencsv implementation libs.trove diff --git a/code/services-application/dating-service/src/main/java/nu/marginalia/dating/DatingMain.java b/code/services-application/dating-service/java/nu/marginalia/dating/DatingMain.java similarity index 85% rename from code/services-application/dating-service/src/main/java/nu/marginalia/dating/DatingMain.java rename to code/services-application/dating-service/java/nu/marginalia/dating/DatingMain.java index 00f31cb5..d5cf6f4b 100644 --- a/code/services-application/dating-service/src/main/java/nu/marginalia/dating/DatingMain.java +++ b/code/services-application/dating-service/java/nu/marginalia/dating/DatingMain.java @@ -4,7 +4,7 @@ import com.google.inject.Guice; import com.google.inject.Inject; import com.google.inject.Injector; import nu.marginalia.service.MainClass; -import nu.marginalia.service.SearchServiceDescriptors; +import nu.marginalia.service.ServiceDiscoveryModule; import nu.marginalia.service.id.ServiceId; import nu.marginalia.service.module.ServiceConfigurationModule; import nu.marginalia.service.module.DatabaseModule; @@ -26,7 +26,8 @@ public class DatingMain extends MainClass { Injector injector = Guice.createInjector( new DatingModule(), - new ServiceConfigurationModule(SearchServiceDescriptors.descriptors, ServiceId.Dating), + new ServiceDiscoveryModule(), + new ServiceConfigurationModule(ServiceId.Dating), new DatabaseModule(false) ); diff --git a/code/services-application/dating-service/src/main/java/nu/marginalia/dating/DatingModule.java b/code/services-application/dating-service/java/nu/marginalia/dating/DatingModule.java similarity index 100% rename from code/services-application/dating-service/src/main/java/nu/marginalia/dating/DatingModule.java rename to code/services-application/dating-service/java/nu/marginalia/dating/DatingModule.java diff --git a/code/services-application/dating-service/src/main/java/nu/marginalia/dating/DatingService.java b/code/services-application/dating-service/java/nu/marginalia/dating/DatingService.java similarity index 100% rename from code/services-application/dating-service/src/main/java/nu/marginalia/dating/DatingService.java rename to code/services-application/dating-service/java/nu/marginalia/dating/DatingService.java diff --git a/code/services-application/dating-service/src/main/java/nu/marginalia/dating/DatingSessionObject.java b/code/services-application/dating-service/java/nu/marginalia/dating/DatingSessionObject.java similarity index 100% rename from code/services-application/dating-service/src/main/java/nu/marginalia/dating/DatingSessionObject.java rename to code/services-application/dating-service/java/nu/marginalia/dating/DatingSessionObject.java diff --git a/code/services-application/dating-service/src/main/resources/static/dating/robots.txt b/code/services-application/dating-service/resources/static/dating/robots.txt similarity index 100% rename from code/services-application/dating-service/src/main/resources/static/dating/robots.txt rename to code/services-application/dating-service/resources/static/dating/robots.txt diff --git a/code/services-application/dating-service/src/main/resources/templates/dating/dating-view.hdb b/code/services-application/dating-service/resources/templates/dating/dating-view.hdb similarity index 100% rename from code/services-application/dating-service/src/main/resources/templates/dating/dating-view.hdb rename to code/services-application/dating-service/resources/templates/dating/dating-view.hdb diff --git a/code/services-application/explorer-service/build.gradle b/code/services-application/explorer-service/build.gradle index 807ca8a1..6d2e56ac 100644 --- a/code/services-application/explorer-service/build.gradle +++ b/code/services-application/explorer-service/build.gradle @@ -2,8 +2,8 @@ plugins { id 'java' id 'application' - id 'com.palantir.docker' version '0.35.0' id 'jvm-test-suite' + id 'com.google.cloud.tools.jib' version '3.4.0' } application { @@ -13,18 +13,20 @@ application { tasks.distZip.enabled = false -apply from: "$rootProject.projectDir/docker-service.gradle" +apply from: "$rootProject.projectDir/docker.gradle" + java { toolchain { languageVersion.set(JavaLanguageVersion.of(21)) } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:model') implementation project(':code:common:service') implementation project(':code:common:service-discovery') - implementation project(':code:common:service-client') implementation project(':code:common:renderer') implementation project(':code:features-search:random-websites') @@ -34,7 +36,6 @@ dependencies { implementation libs.prometheus implementation libs.notnull implementation libs.guice - implementation libs.rxjava implementation libs.spark implementation libs.opencsv implementation libs.trove diff --git a/code/services-application/explorer-service/src/main/java/nu/marginalia/explorer/ExplorerMain.java b/code/services-application/explorer-service/java/nu/marginalia/explorer/ExplorerMain.java similarity index 85% rename from code/services-application/explorer-service/src/main/java/nu/marginalia/explorer/ExplorerMain.java rename to code/services-application/explorer-service/java/nu/marginalia/explorer/ExplorerMain.java index 2b47657b..e48320f7 100644 --- a/code/services-application/explorer-service/src/main/java/nu/marginalia/explorer/ExplorerMain.java +++ b/code/services-application/explorer-service/java/nu/marginalia/explorer/ExplorerMain.java @@ -4,7 +4,7 @@ import com.google.inject.Guice; import com.google.inject.Inject; import com.google.inject.Injector; import nu.marginalia.service.MainClass; -import nu.marginalia.service.SearchServiceDescriptors; +import nu.marginalia.service.ServiceDiscoveryModule; import nu.marginalia.service.id.ServiceId; import nu.marginalia.service.module.ServiceConfigurationModule; import nu.marginalia.service.module.DatabaseModule; @@ -25,7 +25,8 @@ public class ExplorerMain extends MainClass { Spark.staticFileLocation("/static/explore/"); Injector injector = Guice.createInjector( - new ServiceConfigurationModule(SearchServiceDescriptors.descriptors, ServiceId.Explorer), + new ServiceConfigurationModule(ServiceId.Explorer), + new ServiceDiscoveryModule(), new ExplorerModule(), new DatabaseModule(false) ); diff --git a/code/services-application/explorer-service/src/main/java/nu/marginalia/explorer/ExplorerModule.java b/code/services-application/explorer-service/java/nu/marginalia/explorer/ExplorerModule.java similarity index 100% rename from code/services-application/explorer-service/src/main/java/nu/marginalia/explorer/ExplorerModule.java rename to code/services-application/explorer-service/java/nu/marginalia/explorer/ExplorerModule.java diff --git a/code/services-application/explorer-service/src/main/java/nu/marginalia/explorer/ExplorerService.java b/code/services-application/explorer-service/java/nu/marginalia/explorer/ExplorerService.java similarity index 99% rename from code/services-application/explorer-service/src/main/java/nu/marginalia/explorer/ExplorerService.java rename to code/services-application/explorer-service/java/nu/marginalia/explorer/ExplorerService.java index a753eed9..0b6ae0de 100644 --- a/code/services-application/explorer-service/src/main/java/nu/marginalia/explorer/ExplorerService.java +++ b/code/services-application/explorer-service/java/nu/marginalia/explorer/ExplorerService.java @@ -136,9 +136,9 @@ public class ExplorerService extends Service { GROUP BY NV.DOMAIN_ID ORDER BY NV.RELATEDNESS DESC """ - ); + ) - ) { + ) { stmt.setInt(1, domainIdInformation.domainId); stmt.setInt(2, domainIdInformation.aliasId); diff --git a/code/services-application/explorer-service/src/main/resources/static/explore/robots.txt b/code/services-application/explorer-service/resources/static/explore/robots.txt similarity index 100% rename from code/services-application/explorer-service/src/main/resources/static/explore/robots.txt rename to code/services-application/explorer-service/resources/static/explore/robots.txt diff --git a/code/services-application/explorer-service/src/main/resources/static/explore/style.css b/code/services-application/explorer-service/resources/static/explore/style.css similarity index 100% rename from code/services-application/explorer-service/src/main/resources/static/explore/style.css rename to code/services-application/explorer-service/resources/static/explore/style.css diff --git a/code/services-application/explorer-service/src/main/resources/templates/explorer/explorer-about.hdb b/code/services-application/explorer-service/resources/templates/explorer/explorer-about.hdb similarity index 100% rename from code/services-application/explorer-service/src/main/resources/templates/explorer/explorer-about.hdb rename to code/services-application/explorer-service/resources/templates/explorer/explorer-about.hdb diff --git a/code/services-application/explorer-service/src/main/resources/templates/explorer/explorer-messages.hdb b/code/services-application/explorer-service/resources/templates/explorer/explorer-messages.hdb similarity index 100% rename from code/services-application/explorer-service/src/main/resources/templates/explorer/explorer-messages.hdb rename to code/services-application/explorer-service/resources/templates/explorer/explorer-messages.hdb diff --git a/code/services-application/explorer-service/src/main/resources/templates/explorer/explorer-results.hdb b/code/services-application/explorer-service/resources/templates/explorer/explorer-results.hdb similarity index 100% rename from code/services-application/explorer-service/src/main/resources/templates/explorer/explorer-results.hdb rename to code/services-application/explorer-service/resources/templates/explorer/explorer-results.hdb diff --git a/code/services-application/explorer-service/src/main/resources/templates/explorer/explorer-search.hdb b/code/services-application/explorer-service/resources/templates/explorer/explorer-search.hdb similarity index 100% rename from code/services-application/explorer-service/src/main/resources/templates/explorer/explorer-search.hdb rename to code/services-application/explorer-service/resources/templates/explorer/explorer-search.hdb diff --git a/code/services-application/explorer-service/src/main/resources/templates/explorer/explorer.hdb b/code/services-application/explorer-service/resources/templates/explorer/explorer.hdb similarity index 100% rename from code/services-application/explorer-service/src/main/resources/templates/explorer/explorer.hdb rename to code/services-application/explorer-service/resources/templates/explorer/explorer.hdb diff --git a/code/services-application/search-service/build.gradle b/code/services-application/search-service/build.gradle index ee504bcb..d5285334 100644 --- a/code/services-application/search-service/build.gradle +++ b/code/services-application/search-service/build.gradle @@ -2,9 +2,10 @@ plugins { id 'java' id 'io.freefair.sass-base' version '8.4' id 'io.freefair.sass-java' version '8.4' - id 'com.palantir.docker' version '0.35.0' id 'application' id 'jvm-test-suite' + + id 'com.google.cloud.tools.jib' version '3.4.0' } application { @@ -14,7 +15,6 @@ application { tasks.distZip.enabled = false -apply from: "$rootProject.projectDir/docker-service.gradle" java { toolchain { @@ -26,23 +26,29 @@ sass { sourceMapEmbed = true outputStyle = EXPANDED } + +apply from: "$rootProject.projectDir/srcsets.gradle" +apply from: "$rootProject.projectDir/docker.gradle" + dependencies { implementation project(':code:common:db') implementation project(':code:common:model') implementation project(':code:common:service') implementation project(':code:common:config') - implementation project(':code:features-index:index-query') + implementation project(':code:index:query') implementation project(':code:libraries:easy-lsh') implementation project(':code:libraries:language-processing') implementation project(':code:libraries:braille-block-punch-cards') implementation project(':code:libraries:term-frequency-dict') - implementation project(':code:api:assistant-api') - implementation project(':code:api:query-api') - implementation project(':code:api:index-api') + implementation project(':code:functions:math:api') + implementation project(':code:functions:domain-info:api') + implementation project(':code:functions:search-query:api') + + + implementation project(':code:index:api') implementation project(':code:common:service-discovery') - implementation project(':code:common:service-client') implementation project(':code:common:renderer') implementation project(':code:features-search:screenshots') @@ -55,7 +61,6 @@ dependencies { implementation libs.prometheus implementation libs.notnull implementation libs.guice - implementation libs.rxjava implementation libs.handlebars implementation libs.spark implementation libs.opencsv diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchHandlebarsConfigurator.java b/code/services-application/search-service/java/nu/marginalia/search/SearchHandlebarsConfigurator.java similarity index 100% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/SearchHandlebarsConfigurator.java rename to code/services-application/search-service/java/nu/marginalia/search/SearchHandlebarsConfigurator.java diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchMain.java b/code/services-application/search-service/java/nu/marginalia/search/SearchMain.java similarity index 85% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/SearchMain.java rename to code/services-application/search-service/java/nu/marginalia/search/SearchMain.java index 30dc1723..f799759c 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchMain.java +++ b/code/services-application/search-service/java/nu/marginalia/search/SearchMain.java @@ -4,7 +4,7 @@ import com.google.inject.Guice; import com.google.inject.Inject; import com.google.inject.Injector; import nu.marginalia.service.MainClass; -import nu.marginalia.service.SearchServiceDescriptors; +import nu.marginalia.service.ServiceDiscoveryModule; import nu.marginalia.service.id.ServiceId; import nu.marginalia.service.module.ServiceConfigurationModule; import nu.marginalia.service.module.DatabaseModule; @@ -27,7 +27,8 @@ public class SearchMain extends MainClass { Injector injector = Guice.createInjector( new SearchModule(), - new ServiceConfigurationModule(SearchServiceDescriptors.descriptors, ServiceId.Search), + new ServiceConfigurationModule(ServiceId.Search), + new ServiceDiscoveryModule(), new DatabaseModule(false) ); diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchModule.java b/code/services-application/search-service/java/nu/marginalia/search/SearchModule.java similarity index 93% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/SearchModule.java rename to code/services-application/search-service/java/nu/marginalia/search/SearchModule.java index 4a32f9e1..52d1cbea 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchModule.java +++ b/code/services-application/search-service/java/nu/marginalia/search/SearchModule.java @@ -25,7 +25,7 @@ public class SearchModule extends AbstractModule { @Provides public FeedlotClient provideFeedlotClient() { return new FeedlotClient( - System.getProperty("ext-svc-feedlot-host", "feedlot"), + System.getProperty("ext-svc-feedlot-bindAddress", "feedlot"), Integer.getInteger("ext-svc-feedlot-port", 80), GsonFactory.get(), Duration.ofMillis(250), diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchOperator.java b/code/services-application/search-service/java/nu/marginalia/search/SearchOperator.java similarity index 56% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/SearchOperator.java rename to code/services-application/search-service/java/nu/marginalia/search/SearchOperator.java index b3597950..e07186b6 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchOperator.java +++ b/code/services-application/search-service/java/nu/marginalia/search/SearchOperator.java @@ -2,17 +2,15 @@ package nu.marginalia.search; import com.google.inject.Inject; import com.google.inject.Singleton; -import io.reactivex.rxjava3.core.Observable; -import io.reactivex.rxjava3.schedulers.Schedulers; +import lombok.SneakyThrows; import nu.marginalia.WebsiteUrl; -import nu.marginalia.assistant.client.AssistantClient; +import nu.marginalia.api.math.MathClient; +import nu.marginalia.api.searchquery.QueryClient; import nu.marginalia.model.EdgeDomain; import nu.marginalia.db.DbDomainQueries; -import nu.marginalia.query.client.QueryClient; -import nu.marginalia.query.model.QueryResponse; +import nu.marginalia.api.searchquery.model.query.QueryResponse; import nu.marginalia.search.command.SearchParameters; import nu.marginalia.search.model.*; -import nu.marginalia.client.Context; import nu.marginalia.search.svc.SearchQueryIndexService; import nu.marginalia.search.svc.SearchUnitConversionService; import org.apache.logging.log4j.util.Strings; @@ -22,6 +20,8 @@ import org.slf4j.Marker; import org.slf4j.MarkerFactory; import javax.annotation.Nullable; +import java.lang.ref.WeakReference; +import java.time.Duration; import java.util.*; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; @@ -35,7 +35,7 @@ public class SearchOperator { // Marker for filtering out sensitive content from the persistent logs private final Marker queryMarker = MarkerFactory.getMarker("QUERY"); - private final AssistantClient assistantClient; + private final MathClient mathClient; private final DbDomainQueries domainQueries; private final QueryClient queryClient; private final SearchQueryIndexService searchQueryService; @@ -45,7 +45,7 @@ public class SearchOperator { @Inject - public SearchOperator(AssistantClient assistantClient, + public SearchOperator(MathClient mathClient, DbDomainQueries domainQueries, QueryClient queryClient, SearchQueryIndexService searchQueryService, @@ -54,7 +54,7 @@ public class SearchOperator { SearchUnitConversionService searchUnitConversionService) { - this.assistantClient = assistantClient; + this.mathClient = mathClient; this.domainQueries = domainQueries; this.queryClient = queryClient; @@ -64,57 +64,86 @@ public class SearchOperator { this.searchUnitConversionService = searchUnitConversionService; } - public List doSiteSearch(Context ctx, - String domain, + public List doSiteSearch(String domain, int count) { var queryParams = paramFactory.forSiteSearch(domain, count); - var queryResponse = queryClient.search(ctx, queryParams); + var queryResponse = queryClient.search(queryParams); return searchQueryService.getResultsFromQuery(queryResponse); } - public List doBacklinkSearch(Context ctx, - String domain) { + public List doBacklinkSearch(String domain) { var queryParams = paramFactory.forBacklinkSearch(domain); - var queryResponse = queryClient.search(ctx, queryParams); + var queryResponse = queryClient.search(queryParams); return searchQueryService.getResultsFromQuery(queryResponse); } - public List doLinkSearch(Context context, String source, String dest) { + public List doLinkSearch(String source, String dest) { var queryParams = paramFactory.forLinkSearch(source, dest); - var queryResponse = queryClient.search(context, queryParams); + var queryResponse = queryClient.search(queryParams); return searchQueryService.getResultsFromQuery(queryResponse); } - public DecoratedSearchResults doSearch(Context ctx, SearchParameters userParams) { + private volatile WeakReference> oldResults = new WeakReference<>(Collections.emptyList()); - Future eval = searchUnitConversionService.tryEval(ctx, userParams.query()); - var queryParams = paramFactory.forRegularSearch(userParams); - var queryResponse = queryClient.search(ctx, queryParams); + public DecoratedSearchResults doSearch(SearchParameters userParams) { - List queryResults = searchQueryService.getResultsFromQuery(queryResponse); + Future eval = searchUnitConversionService.tryEval(userParams.query()); - logger.info(queryMarker, "Human terms: {}", Strings.join(queryResponse.searchTermsHuman(), ',')); - logger.info(queryMarker, "Search Result Count: {}", queryResults.size()); + List clusteredResults; + QueryResponse queryResponse; + List problems; + String evalResult; + String focusDomain; - String evalResult = getFutureOrDefault(eval, ""); + if (userParams.poisonResults() && Math.random() > 0.1) { - List clusteredResults = SearchResultClusterer - .selectStrategy(queryResponse) - .clusterResults(queryResults, 25); + // For botnet users, we return random old query results. This is to make + // it harder for them to figure out if they are being rate limited. + + clusteredResults = new ArrayList<>(Objects.requireNonNullElse(oldResults.get(), List.of())); + + // Shuffle the results to make it harder to distinguish + Collections.shuffle(clusteredResults); + + problems = List.of(); + evalResult = ""; + focusDomain = ""; + } else { + var queryParams = paramFactory.forRegularSearch(userParams); + queryResponse = queryClient.search(queryParams); + var queryResults = searchQueryService.getResultsFromQuery(queryResponse); + + logger.info(queryMarker, "Human terms: {}", Strings.join(queryResponse.searchTermsHuman(), ',')); + logger.info(queryMarker, "Search Result Count: {}", queryResults.size()); + + evalResult = getFutureOrDefault(eval, ""); + + clusteredResults = SearchResultClusterer + .selectStrategy(queryResponse) + .clusterResults(queryResults, 25); + + focusDomain = queryResponse.domain(); + problems = getProblems(evalResult, queryResults, queryResponse); + + if (userParams.poisonResults()) { + // Save the results to feed to the botnet + oldResults = new WeakReference<>(clusteredResults); + } + } return DecoratedSearchResults.builder() .params(userParams) - .problems(getProblems(ctx, evalResult, queryResults, queryResponse)) + .problems(problems) .evalResult(evalResult) .results(clusteredResults) .filters(new SearchFilters(websiteUrl, userParams)) - .focusDomain(queryResponse.domain()) - .focusDomainId(getDomainId(queryResponse.domain())) + .focusDomain(focusDomain) + .focusDomainId(getDomainId(focusDomain)) .build(); } @@ -139,13 +168,13 @@ public class SearchOperator { return domainQueries.tryGetDomainId(new EdgeDomain(domain)).orElse(-1); } - private List getProblems(Context ctx, String evalResult, List queryResults, QueryResponse response) { + private List getProblems(String evalResult, List queryResults, QueryResponse response) { final List problems = new ArrayList<>(response.problems()); boolean siteSearch = response.domain() != null; if (!siteSearch) { if (queryResults.size() <= 5 && null == evalResult) { - spellCheckTerms(ctx, response).forEach(problems::add); + spellCheckTerms(response); } if (queryResults.size() <= 5) { @@ -163,30 +192,22 @@ public class SearchOperator { } - private Iterable spellCheckTerms(Context ctx, QueryResponse response) { - return Observable.fromIterable(response.searchTermsHuman()) - .subscribeOn(Schedulers.io()) - .flatMap(term -> assistantClient.spellCheck(ctx, term) - .onErrorReturn(e -> Collections.emptyList()) - .filter(results -> hasSpellSuggestions(term, results)) - .map(suggestions -> searchTermToProblemDescription(term, suggestions)) - ) - .blockingIterable(); - } + @SneakyThrows + private void spellCheckTerms(QueryResponse response) { + var suggestions = mathClient + .spellCheck(response.searchTermsHuman(), Duration.ofMillis(20)); - private boolean hasSpellSuggestions(String term, List results) { - if (results.size() > 1) { - return true; - } - else if (results.size() == 1) { - return !term.equalsIgnoreCase(results.get(0)); - } - return false; + suggestions.entrySet() + .stream() + .filter(e -> e.getValue().size() > 1) + .map(e -> searchTermToProblemDescription(e.getKey(), e.getValue())) + .forEach(response.problems()::add); } private String searchTermToProblemDescription(String term, List suggestions) { - return "\"" + term + "\" could be spelled " + - suggestions.stream().map(s -> "\""+s+"\"").collect(Collectors.joining(", ")); + String suggestionsStr = suggestions.stream().map(s -> STR."\"\{s}\"").collect(Collectors.joining(", ")); + + return STR."\"\{term}\" could be spelled \{suggestionsStr}"; } diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchQueryParamFactory.java b/code/services-application/search-service/java/nu/marginalia/search/SearchQueryParamFactory.java similarity index 88% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/SearchQueryParamFactory.java rename to code/services-application/search-service/java/nu/marginalia/search/SearchQueryParamFactory.java index 59cd1088..15c8567e 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchQueryParamFactory.java +++ b/code/services-application/search-service/java/nu/marginalia/search/SearchQueryParamFactory.java @@ -1,12 +1,12 @@ package nu.marginalia.search; -import nu.marginalia.index.client.model.query.SearchSetIdentifier; -import nu.marginalia.index.client.model.query.SearchSubquery; -import nu.marginalia.index.client.model.results.ResultRankingParameters; +import nu.marginalia.api.searchquery.model.query.SearchSetIdentifier; +import nu.marginalia.api.searchquery.model.query.SearchSubquery; +import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; import nu.marginalia.index.query.limit.QueryLimits; import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.query.limit.SpecificationLimit; -import nu.marginalia.query.model.QueryParams; +import nu.marginalia.api.searchquery.model.query.QueryParams; import nu.marginalia.search.command.SearchParameters; import java.util.List; @@ -33,7 +33,6 @@ public class SearchQueryParamFactory { profile.getYearLimit(), profile.getSizeLimit(), SpecificationLimit.none(), - SpecificationLimit.none(), List.of(), new QueryLimits(5, 100, 200, 8192), profile.searchSetIdentifier.name(), @@ -54,7 +53,6 @@ public class SearchQueryParamFactory { SpecificationLimit.none(), SpecificationLimit.none(), SpecificationLimit.none(), - SpecificationLimit.none(), List.of(), new QueryLimits(count, count, 100, 512), SearchSetIdentifier.NONE.name(), @@ -74,7 +72,6 @@ public class SearchQueryParamFactory { SpecificationLimit.none(), SpecificationLimit.none(), SpecificationLimit.none(), - SpecificationLimit.none(), List.of(), new QueryLimits(100, 100, 100, 512), SearchSetIdentifier.NONE.name(), @@ -94,7 +91,6 @@ public class SearchQueryParamFactory { SpecificationLimit.none(), SpecificationLimit.none(), SpecificationLimit.none(), - SpecificationLimit.none(), List.of(), new QueryLimits(100, 100, 100, 512), SearchSetIdentifier.NONE.name(), diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchResultClusterer.java b/code/services-application/search-service/java/nu/marginalia/search/SearchResultClusterer.java similarity index 96% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/SearchResultClusterer.java rename to code/services-application/search-service/java/nu/marginalia/search/SearchResultClusterer.java index e5b61788..4e4cd086 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchResultClusterer.java +++ b/code/services-application/search-service/java/nu/marginalia/search/SearchResultClusterer.java @@ -1,6 +1,6 @@ package nu.marginalia.search; -import nu.marginalia.query.model.QueryResponse; +import nu.marginalia.api.searchquery.model.query.QueryResponse; import nu.marginalia.search.model.ClusteredUrlDetails; import nu.marginalia.search.model.UrlDetails; diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchService.java b/code/services-application/search-service/java/nu/marginalia/search/SearchService.java similarity index 96% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/SearchService.java rename to code/services-application/search-service/java/nu/marginalia/search/SearchService.java index 509f015d..8e8594be 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/SearchService.java @@ -5,8 +5,6 @@ import io.prometheus.client.Counter; import io.prometheus.client.Histogram; import lombok.SneakyThrows; import nu.marginalia.WebsiteUrl; -import nu.marginalia.client.Context; -import nu.marginalia.search.svc.SearchFrontPageService; import nu.marginalia.search.svc.*; import nu.marginalia.service.server.*; import org.slf4j.Logger; @@ -75,7 +73,7 @@ public class SearchService extends Service { Spark.exception(Exception.class, (e,p,q) -> { logger.error("Error during processing", e); wmsa_search_service_error_count.labels(p.pathInfo(), p.requestMethod()).inc(); - errorPageService.serveError(Context.fromRequest(p), p, q); + errorPageService.serveError(p, q); }); Spark.awaitInitialization(); diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/CommandEvaluator.java b/code/services-application/search-service/java/nu/marginalia/search/command/CommandEvaluator.java similarity index 78% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/command/CommandEvaluator.java rename to code/services-application/search-service/java/nu/marginalia/search/command/CommandEvaluator.java index 53f8a56e..eb352a93 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/CommandEvaluator.java +++ b/code/services-application/search-service/java/nu/marginalia/search/command/CommandEvaluator.java @@ -2,7 +2,6 @@ package nu.marginalia.search.command; import com.google.inject.Inject; import nu.marginalia.search.command.commands.*; -import nu.marginalia.client.Context; import spark.Response; import java.util.ArrayList; @@ -31,14 +30,14 @@ public class CommandEvaluator { defaultCommand = search; } - public Object eval(Context ctx, Response response, SearchParameters parameters) { + public Object eval(Response response, SearchParameters parameters) { for (var cmd : specialCommands) { - var maybe = cmd.process(ctx, response, parameters); + var maybe = cmd.process(response, parameters); if (maybe.isPresent()) return maybe.get(); } - return defaultCommand.process(ctx, response, parameters).orElse(""); + return defaultCommand.process(response, parameters).orElse(""); } } diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchAdtechParameter.java b/code/services-application/search-service/java/nu/marginalia/search/command/SearchAdtechParameter.java similarity index 92% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchAdtechParameter.java rename to code/services-application/search-service/java/nu/marginalia/search/command/SearchAdtechParameter.java index 0551533e..9e8383f3 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchAdtechParameter.java +++ b/code/services-application/search-service/java/nu/marginalia/search/command/SearchAdtechParameter.java @@ -1,6 +1,6 @@ package nu.marginalia.search.command; -import nu.marginalia.index.client.model.query.SearchSubquery; +import nu.marginalia.api.searchquery.model.query.SearchSubquery; import javax.annotation.Nullable; import java.util.Arrays; diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchCommandInterface.java b/code/services-application/search-service/java/nu/marginalia/search/command/SearchCommandInterface.java similarity index 51% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchCommandInterface.java rename to code/services-application/search-service/java/nu/marginalia/search/command/SearchCommandInterface.java index c2693dd3..d69bacbd 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchCommandInterface.java +++ b/code/services-application/search-service/java/nu/marginalia/search/command/SearchCommandInterface.java @@ -1,11 +1,10 @@ package nu.marginalia.search.command; -import nu.marginalia.client.Context; import spark.Response; import java.util.Optional; public interface SearchCommandInterface { - Optional process(Context ctx, Response response, SearchParameters parameters); + Optional process(Response response, SearchParameters parameters); } diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchJsParameter.java b/code/services-application/search-service/java/nu/marginalia/search/command/SearchJsParameter.java similarity index 92% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchJsParameter.java rename to code/services-application/search-service/java/nu/marginalia/search/command/SearchJsParameter.java index ab67486c..6c8634ac 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchJsParameter.java +++ b/code/services-application/search-service/java/nu/marginalia/search/command/SearchJsParameter.java @@ -1,6 +1,6 @@ package nu.marginalia.search.command; -import nu.marginalia.index.client.model.query.SearchSubquery; +import nu.marginalia.api.searchquery.model.query.SearchSubquery; import javax.annotation.Nullable; import java.util.Arrays; diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchParameters.java b/code/services-application/search-service/java/nu/marginalia/search/command/SearchParameters.java similarity index 88% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchParameters.java rename to code/services-application/search-service/java/nu/marginalia/search/command/SearchParameters.java index eed5b451..9c47419d 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchParameters.java +++ b/code/services-application/search-service/java/nu/marginalia/search/command/SearchParameters.java @@ -1,7 +1,7 @@ package nu.marginalia.search.command; import nu.marginalia.WebsiteUrl; -import nu.marginalia.index.client.model.results.ResultRankingParameters; +import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.query.limit.SpecificationLimit; import nu.marginalia.search.model.SearchProfile; @@ -16,29 +16,30 @@ public record SearchParameters(String query, SearchJsParameter js, SearchRecentParameter recent, SearchTitleParameter searchTitle, - SearchAdtechParameter adtech + SearchAdtechParameter adtech, + boolean poisonResults ) { public String profileStr() { return profile.filterId; } public SearchParameters withProfile(SearchProfile profile) { - return new SearchParameters(query, profile, js, recent, searchTitle, adtech); + return new SearchParameters(query, profile, js, recent, searchTitle, adtech, poisonResults); } public SearchParameters withJs(SearchJsParameter js) { - return new SearchParameters(query, profile, js, recent, searchTitle, adtech); + return new SearchParameters(query, profile, js, recent, searchTitle, adtech, poisonResults); } public SearchParameters withAdtech(SearchAdtechParameter adtech) { - return new SearchParameters(query, profile, js, recent, searchTitle, adtech); + return new SearchParameters(query, profile, js, recent, searchTitle, adtech, poisonResults); } public SearchParameters withRecent(SearchRecentParameter recent) { - return new SearchParameters(query, profile, js, recent, searchTitle, adtech); + return new SearchParameters(query, profile, js, recent, searchTitle, adtech, poisonResults); } public SearchParameters withTitle(SearchTitleParameter title) { - return new SearchParameters(query, profile, js, recent, title, adtech); + return new SearchParameters(query, profile, js, recent, title, adtech, poisonResults); } public String renderUrl(WebsiteUrl baseUrl) { diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchRecentParameter.java b/code/services-application/search-service/java/nu/marginalia/search/command/SearchRecentParameter.java similarity index 100% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchRecentParameter.java rename to code/services-application/search-service/java/nu/marginalia/search/command/SearchRecentParameter.java diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchTitleParameter.java b/code/services-application/search-service/java/nu/marginalia/search/command/SearchTitleParameter.java similarity index 100% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchTitleParameter.java rename to code/services-application/search-service/java/nu/marginalia/search/command/SearchTitleParameter.java diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/BangCommand.java b/code/services-application/search-service/java/nu/marginalia/search/command/commands/BangCommand.java similarity index 95% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/BangCommand.java rename to code/services-application/search-service/java/nu/marginalia/search/command/commands/BangCommand.java index 5dbbec1c..92f493c5 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/BangCommand.java +++ b/code/services-application/search-service/java/nu/marginalia/search/command/commands/BangCommand.java @@ -3,7 +3,6 @@ package nu.marginalia.search.command.commands; import com.google.inject.Inject; import nu.marginalia.search.command.SearchCommandInterface; import nu.marginalia.search.command.SearchParameters; -import nu.marginalia.client.Context; import nu.marginalia.search.exceptions.RedirectException; import spark.Response; @@ -25,7 +24,7 @@ public class BangCommand implements SearchCommandInterface { } @Override - public Optional process(Context ctx, Response response, SearchParameters parameters) { + public Optional process(Response response, SearchParameters parameters) { for (var entry : bangsToPattern.entrySet()) { String bangPattern = entry.getKey(); diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/BrowseCommand.java b/code/services-application/search-service/java/nu/marginalia/search/command/commands/BrowseCommand.java similarity index 87% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/BrowseCommand.java rename to code/services-application/search-service/java/nu/marginalia/search/command/commands/BrowseCommand.java index aa6b19ea..a889ec3d 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/BrowseCommand.java +++ b/code/services-application/search-service/java/nu/marginalia/search/command/commands/BrowseCommand.java @@ -2,7 +2,6 @@ package nu.marginalia.search.command.commands; import com.google.inject.Inject; import nu.marginalia.browse.model.BrowseResultSet; -import nu.marginalia.client.Context; import nu.marginalia.renderer.MustacheRenderer; import nu.marginalia.renderer.RendererFactory; import nu.marginalia.search.command.SearchCommandInterface; @@ -35,12 +34,12 @@ public class BrowseCommand implements SearchCommandInterface { } @Override - public Optional process(Context ctx, Response response, SearchParameters parameters) { + public Optional process(Response response, SearchParameters parameters) { if (!queryPatternPredicate.test(parameters.query())) { return Optional.empty(); } - var model = browseSite(ctx, parameters.query()); + var model = browseSite(parameters.query()); if (null == model) return Optional.empty(); @@ -53,7 +52,7 @@ public class BrowseCommand implements SearchCommandInterface { } - private BrowseResultSet browseSite(Context ctx, String humanQuery) { + private BrowseResultSet browseSite(String humanQuery) { String definePrefix = "browse:"; String word = humanQuery.substring(definePrefix.length()).toLowerCase(); @@ -66,7 +65,7 @@ public class BrowseCommand implements SearchCommandInterface { return browseService.getRandomEntries(set); } else { - return browseService.getRelatedEntries(ctx, word); + return browseService.getRelatedEntries(word); } } catch (Exception ex) { diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/ConvertCommand.java b/code/services-application/search-service/java/nu/marginalia/search/command/commands/ConvertCommand.java similarity index 88% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/ConvertCommand.java rename to code/services-application/search-service/java/nu/marginalia/search/command/commands/ConvertCommand.java index 14119ed7..38f98af2 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/ConvertCommand.java +++ b/code/services-application/search-service/java/nu/marginalia/search/command/commands/ConvertCommand.java @@ -5,7 +5,6 @@ import lombok.SneakyThrows; import nu.marginalia.search.command.SearchCommandInterface; import nu.marginalia.search.command.SearchParameters; import nu.marginalia.search.svc.SearchUnitConversionService; -import nu.marginalia.client.Context; import nu.marginalia.renderer.MustacheRenderer; import nu.marginalia.renderer.RendererFactory; import spark.Response; @@ -27,8 +26,8 @@ public class ConvertCommand implements SearchCommandInterface { @Override @SneakyThrows - public Optional process(Context ctx, Response response, SearchParameters parameters) { - var conversion = searchUnitConversionService.tryConversion(ctx, parameters.query()); + public Optional process(Response response, SearchParameters parameters) { + var conversion = searchUnitConversionService.tryConversion(parameters.query()); return conversion.map(s -> conversionRenderer.render(Map.of( "query", parameters.query(), "result", s, diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/DefinitionCommand.java b/code/services-application/search-service/java/nu/marginalia/search/command/commands/DefinitionCommand.java similarity index 62% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/DefinitionCommand.java rename to code/services-application/search-service/java/nu/marginalia/search/command/commands/DefinitionCommand.java index d0c65ed0..3025497f 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/DefinitionCommand.java +++ b/code/services-application/search-service/java/nu/marginalia/search/command/commands/DefinitionCommand.java @@ -2,13 +2,11 @@ package nu.marginalia.search.command.commands; import com.google.inject.Inject; -import lombok.SneakyThrows; -import nu.marginalia.assistant.client.AssistantClient; -import nu.marginalia.assistant.client.model.DictionaryResponse; -import nu.marginalia.client.Context; +import nu.marginalia.api.math.MathClient; +import nu.marginalia.api.math.model.DictionaryResponse; +import nu.marginalia.renderer.MustacheRenderer; import nu.marginalia.search.command.SearchCommandInterface; import nu.marginalia.search.command.SearchParameters; -import nu.marginalia.renderer.MustacheRenderer; import nu.marginalia.renderer.RendererFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -17,6 +15,7 @@ import spark.Response; import java.io.IOException; import java.util.Map; import java.util.Optional; +import java.util.concurrent.TimeUnit; import java.util.function.Predicate; import java.util.regex.Pattern; @@ -24,27 +23,27 @@ public class DefinitionCommand implements SearchCommandInterface { private final Logger logger = LoggerFactory.getLogger(getClass()); private final MustacheRenderer dictionaryRenderer; - private final AssistantClient assistantClient; + private final MathClient mathClient; private final Predicate queryPatternPredicate = Pattern.compile("^define:[A-Za-z\\s-0-9]+$").asPredicate(); @Inject - public DefinitionCommand(RendererFactory rendererFactory, AssistantClient assistantClient) + public DefinitionCommand(RendererFactory rendererFactory, MathClient mathClient) throws IOException { dictionaryRenderer = rendererFactory.renderer("search/dictionary-results"); - this.assistantClient = assistantClient; + this.mathClient = mathClient; } @Override - public Optional process(Context ctx, Response response, SearchParameters parameters) { + public Optional process(Response response, SearchParameters parameters) { if (!queryPatternPredicate.test(parameters.query())) { return Optional.empty(); } - var results = lookupDefinition(ctx, parameters.query()); + var results = lookupDefinition(parameters.query()); return Optional.of(dictionaryRenderer.render(results, Map.of("query", parameters.query(), @@ -53,17 +52,19 @@ public class DefinitionCommand implements SearchCommandInterface { } - @SneakyThrows - private DictionaryResponse lookupDefinition(Context ctx, String humanQuery) { + private DictionaryResponse lookupDefinition(String humanQuery) { String definePrefix = "define:"; String word = humanQuery.substring(definePrefix.length()).toLowerCase(); - logger.info("Defining: {}", word); - var results = assistantClient - .dictionaryLookup(ctx, word) - .blockingFirst(); - logger.debug("Results = {}", results); + try { + return mathClient + .dictionaryLookup(word) + .get(250, TimeUnit.MILLISECONDS); + } + catch (Exception e) { + logger.error("Failed to lookup definition for word: " + word, e); - return results; + throw new RuntimeException(e); + } } } diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/SearchCommand.java b/code/services-application/search-service/java/nu/marginalia/search/command/commands/SearchCommand.java similarity index 64% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/SearchCommand.java rename to code/services-application/search-service/java/nu/marginalia/search/command/commands/SearchCommand.java index aeede238..ea054a7b 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/SearchCommand.java +++ b/code/services-application/search-service/java/nu/marginalia/search/command/commands/SearchCommand.java @@ -1,13 +1,10 @@ package nu.marginalia.search.command.commands; import com.google.inject.Inject; -import nu.marginalia.client.Context; -import nu.marginalia.db.DomainBlacklist; import nu.marginalia.search.SearchOperator; import nu.marginalia.search.command.SearchCommandInterface; import nu.marginalia.search.command.SearchParameters; import nu.marginalia.search.model.DecoratedSearchResults; -import nu.marginalia.search.model.UrlDetails; import nu.marginalia.renderer.MustacheRenderer; import nu.marginalia.renderer.RendererFactory; import spark.Response; @@ -16,25 +13,21 @@ import java.io.IOException; import java.util.Optional; public class SearchCommand implements SearchCommandInterface { - private final DomainBlacklist blacklist; private final SearchOperator searchOperator; private final MustacheRenderer searchResultsRenderer; @Inject - public SearchCommand(DomainBlacklist blacklist, - SearchOperator searchOperator, - RendererFactory rendererFactory - ) throws IOException { - this.blacklist = blacklist; + public SearchCommand(SearchOperator searchOperator, + RendererFactory rendererFactory) throws IOException { this.searchOperator = searchOperator; searchResultsRenderer = rendererFactory.renderer("search/search-results"); } @Override - public Optional process(Context ctx, Response response, SearchParameters parameters) { - DecoratedSearchResults results = searchOperator.doSearch(ctx, parameters); + public Optional process(Response response, SearchParameters parameters) { + DecoratedSearchResults results = searchOperator.doSearch(parameters); return Optional.of(searchResultsRenderer.render(results)); } diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/SiteRedirectCommand.java b/code/services-application/search-service/java/nu/marginalia/search/command/commands/SiteRedirectCommand.java similarity index 92% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/SiteRedirectCommand.java rename to code/services-application/search-service/java/nu/marginalia/search/command/commands/SiteRedirectCommand.java index cea76c36..dc6855ee 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/SiteRedirectCommand.java +++ b/code/services-application/search-service/java/nu/marginalia/search/command/commands/SiteRedirectCommand.java @@ -2,7 +2,6 @@ package nu.marginalia.search.command.commands; import com.google.inject.Inject; import lombok.SneakyThrows; -import nu.marginalia.client.Context; import nu.marginalia.search.command.SearchCommandInterface; import nu.marginalia.search.command.SearchParameters; import org.slf4j.Logger; @@ -25,7 +24,7 @@ public class SiteRedirectCommand implements SearchCommandInterface { @SneakyThrows @Override - public Optional process(Context ctx, Response response, SearchParameters parameters) { + public Optional process(Response response, SearchParameters parameters) { if (!queryPatternPredicate.test(parameters.query())) { return Optional.empty(); } diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/db/DbNearDomainsQuery.java b/code/services-application/search-service/java/nu/marginalia/search/db/DbNearDomainsQuery.java similarity index 100% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/db/DbNearDomainsQuery.java rename to code/services-application/search-service/java/nu/marginalia/search/db/DbNearDomainsQuery.java diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/exceptions/RedirectException.java b/code/services-application/search-service/java/nu/marginalia/search/exceptions/RedirectException.java similarity index 100% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/exceptions/RedirectException.java rename to code/services-application/search-service/java/nu/marginalia/search/exceptions/RedirectException.java diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/model/ClusteredUrlDetails.java b/code/services-application/search-service/java/nu/marginalia/search/model/ClusteredUrlDetails.java similarity index 100% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/model/ClusteredUrlDetails.java rename to code/services-application/search-service/java/nu/marginalia/search/model/ClusteredUrlDetails.java diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/model/DecoratedSearchResults.java b/code/services-application/search-service/java/nu/marginalia/search/model/DecoratedSearchResults.java similarity index 100% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/model/DecoratedSearchResults.java rename to code/services-application/search-service/java/nu/marginalia/search/model/DecoratedSearchResults.java diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/model/SearchFilters.java b/code/services-application/search-service/java/nu/marginalia/search/model/SearchFilters.java similarity index 100% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/model/SearchFilters.java rename to code/services-application/search-service/java/nu/marginalia/search/model/SearchFilters.java diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/model/SearchProfile.java b/code/services-application/search-service/java/nu/marginalia/search/model/SearchProfile.java similarity index 95% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/model/SearchProfile.java rename to code/services-application/search-service/java/nu/marginalia/search/model/SearchProfile.java index 85fafa8f..27d9f4aa 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/model/SearchProfile.java +++ b/code/services-application/search-service/java/nu/marginalia/search/model/SearchProfile.java @@ -2,8 +2,8 @@ package nu.marginalia.search.model; import nu.marginalia.index.query.limit.SpecificationLimit; import nu.marginalia.model.crawl.HtmlFeature; -import nu.marginalia.index.client.model.query.SearchSubquery; -import nu.marginalia.index.client.model.query.SearchSetIdentifier; +import nu.marginalia.api.searchquery.model.query.SearchSubquery; +import nu.marginalia.api.searchquery.model.query.SearchSetIdentifier; import java.util.Objects; diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/model/UrlDetails.java b/code/services-application/search-service/java/nu/marginalia/search/model/UrlDetails.java similarity index 96% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/model/UrlDetails.java rename to code/services-application/search-service/java/nu/marginalia/search/model/UrlDetails.java index 4968a876..cc2afed6 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/model/UrlDetails.java +++ b/code/services-application/search-service/java/nu/marginalia/search/model/UrlDetails.java @@ -1,8 +1,8 @@ package nu.marginalia.search.model; import lombok.*; -import nu.marginalia.index.client.model.results.SearchResultItem; -import nu.marginalia.index.client.model.results.SearchResultKeywordScore; +import nu.marginalia.api.searchquery.model.results.SearchResultItem; +import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore; import nu.marginalia.model.EdgeUrl; import nu.marginalia.model.crawl.DomainIndexingState; import nu.marginalia.model.crawl.HtmlFeature; diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/results/BrowseResultCleaner.java b/code/services-application/search-service/java/nu/marginalia/search/results/BrowseResultCleaner.java similarity index 93% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/results/BrowseResultCleaner.java rename to code/services-application/search-service/java/nu/marginalia/search/results/BrowseResultCleaner.java index 7ebefb55..8f4e5644 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/results/BrowseResultCleaner.java +++ b/code/services-application/search-service/java/nu/marginalia/search/results/BrowseResultCleaner.java @@ -2,7 +2,6 @@ package nu.marginalia.search.results; import com.google.inject.Inject; import com.google.inject.Singleton; -import nu.marginalia.assistant.client.model.SimilarDomain; import nu.marginalia.browse.model.BrowseResult; import nu.marginalia.screenshot.ScreenshotService; diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/results/UrlDeduplicator.java b/code/services-application/search-service/java/nu/marginalia/search/results/UrlDeduplicator.java similarity index 94% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/results/UrlDeduplicator.java rename to code/services-application/search-service/java/nu/marginalia/search/results/UrlDeduplicator.java index e64f8517..ccddb8d9 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/results/UrlDeduplicator.java +++ b/code/services-application/search-service/java/nu/marginalia/search/results/UrlDeduplicator.java @@ -4,8 +4,7 @@ import gnu.trove.list.TLongList; import gnu.trove.list.array.TLongArrayList; import gnu.trove.map.hash.TObjectIntHashMap; import gnu.trove.set.hash.TIntHashSet; -import nu.marginalia.index.client.model.results.DecoratedSearchResultItem; -import nu.marginalia.search.model.UrlDetails; +import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem; import nu.marginalia.lsh.EasyLSH; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchAddToCrawlQueueService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchAddToCrawlQueueService.java similarity index 100% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchAddToCrawlQueueService.java rename to code/services-application/search-service/java/nu/marginalia/search/svc/SearchAddToCrawlQueueService.java diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchBrowseService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchBrowseService.java similarity index 75% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchBrowseService.java rename to code/services-application/search-service/java/nu/marginalia/search/svc/SearchBrowseService.java index 187a9081..6ba3fa49 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchBrowseService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchBrowseService.java @@ -1,12 +1,11 @@ package nu.marginalia.search.svc; import com.google.inject.Inject; -import nu.marginalia.assistant.client.AssistantClient; -import nu.marginalia.assistant.client.model.SimilarDomain; +import nu.marginalia.api.domains.DomainInfoClient; +import nu.marginalia.api.domains.model.SimilarDomain; import nu.marginalia.browse.DbBrowseDomainsRandom; import nu.marginalia.browse.model.BrowseResult; import nu.marginalia.browse.model.BrowseResultSet; -import nu.marginalia.client.Context; import nu.marginalia.db.DbDomainQueries; import nu.marginalia.db.DomainBlacklist; import nu.marginalia.model.EdgeDomain; @@ -16,6 +15,9 @@ import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import static java.util.Collections.shuffle; @@ -23,20 +25,20 @@ public class SearchBrowseService { private final DbBrowseDomainsRandom randomDomains; private final DbDomainQueries domainQueries; private final DomainBlacklist blacklist; - private final AssistantClient assistantClient; + private final DomainInfoClient domainInfoClient; private final BrowseResultCleaner browseResultCleaner; @Inject public SearchBrowseService(DbBrowseDomainsRandom randomDomains, DbDomainQueries domainQueries, DomainBlacklist blacklist, - AssistantClient assistantClient, + DomainInfoClient domainInfoClient, BrowseResultCleaner browseResultCleaner) { this.randomDomains = randomDomains; this.domainQueries = domainQueries; this.blacklist = blacklist; - this.assistantClient = assistantClient; + this.domainInfoClient = domainInfoClient; this.browseResultCleaner = browseResultCleaner; } @@ -48,16 +50,21 @@ public class SearchBrowseService { return new BrowseResultSet(results); } - public BrowseResultSet getRelatedEntries(Context ctx, String domainName) { + public BrowseResultSet getRelatedEntries(String domainName) throws ExecutionException, InterruptedException, TimeoutException { var domain = domainQueries.getDomainId(new EdgeDomain(domainName)); - var neighbors = assistantClient.similarDomains(ctx, domain, 50).blockingFirst(); + var neighbors = domainInfoClient.similarDomains(domain, 50) + .get(100, TimeUnit.MILLISECONDS); + neighbors.removeIf(sd -> !sd.screenshot()); // If the results are very few, supplement with the alternative shitty algorithm if (neighbors.size() < 25) { Set allNeighbors = new HashSet<>(neighbors); - allNeighbors.addAll(assistantClient.linkedDomains(ctx, domain, 50).blockingFirst()); + allNeighbors.addAll(domainInfoClient + .linkedDomains(domain, 50) + .get(100, TimeUnit.MILLISECONDS) + ); neighbors.clear(); neighbors.addAll(allNeighbors); diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchCrosstalkService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchCrosstalkService.java similarity index 89% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchCrosstalkService.java rename to code/services-application/search-service/java/nu/marginalia/search/svc/SearchCrosstalkService.java index 910348e0..ddce56ac 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchCrosstalkService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchCrosstalkService.java @@ -1,7 +1,6 @@ package nu.marginalia.search.svc; import com.google.inject.Inject; -import nu.marginalia.client.Context; import nu.marginalia.renderer.MustacheRenderer; import nu.marginalia.renderer.RendererFactory; import nu.marginalia.search.SearchOperator; @@ -43,8 +42,8 @@ public class SearchCrosstalkService { parts[i] = parts[i].trim(); } - var resAtoB = searchOperator.doLinkSearch(Context.fromRequest(request), parts[0], parts[1]); - var resBtoA = searchOperator.doLinkSearch(Context.fromRequest(request), parts[1], parts[0]); + var resAtoB = searchOperator.doLinkSearch(parts[0], parts[1]); + var resBtoA = searchOperator.doLinkSearch(parts[1], parts[0]); var model = new CrosstalkResult(parts[0], parts[1], resAtoB, resBtoA); diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchErrorPageService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchErrorPageService.java similarity index 83% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchErrorPageService.java rename to code/services-application/search-service/java/nu/marginalia/search/svc/SearchErrorPageService.java index 68f55b4f..346506e7 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchErrorPageService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchErrorPageService.java @@ -1,8 +1,7 @@ package nu.marginalia.search.svc; import com.google.inject.Inject; -import nu.marginalia.client.Context; -import nu.marginalia.index.client.IndexClient; +import nu.marginalia.index.api.IndexMqClient; import nu.marginalia.renderer.MustacheRenderer; import nu.marginalia.renderer.RendererFactory; import org.slf4j.Logger; @@ -14,20 +13,20 @@ import java.io.IOException; import java.util.Map; public class SearchErrorPageService { - private final IndexClient indexClient; + private final IndexMqClient indexMqClient; private final Logger logger = LoggerFactory.getLogger(getClass()); private final MustacheRenderer renderer; @Inject - public SearchErrorPageService(IndexClient indexClient, + public SearchErrorPageService(IndexMqClient indexMqClient, RendererFactory rendererFactory) throws IOException { renderer = rendererFactory.renderer("search/error-page-search"); - this.indexClient = indexClient; + this.indexMqClient = indexMqClient; } - public void serveError(Context ctx, Request request, Response rsp) { + public void serveError(Request request, Response rsp) { rsp.body(renderError(request, "Internal error", """ An error occurred when communicating with the search engine index. diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchFlagSiteService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchFlagSiteService.java similarity index 98% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchFlagSiteService.java rename to code/services-application/search-service/java/nu/marginalia/search/svc/SearchFlagSiteService.java index e4ffce78..c7ccfa34 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchFlagSiteService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchFlagSiteService.java @@ -81,5 +81,5 @@ public class SearchFlagSiteService { public record CategoryItem(String categoryName, String categoryDesc) {} public record FlagSiteComplaintModel(String category, String submitTime, boolean isReviewed, String decision) {} - public record FlagSiteFormData(int domainId, String category, String description, String sampleQuery) {}; + public record FlagSiteFormData(int domainId, String category, String description, String sampleQuery) {} } diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchFrontPageService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchFrontPageService.java similarity index 100% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchFrontPageService.java rename to code/services-application/search-service/java/nu/marginalia/search/svc/SearchFrontPageService.java diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryCountService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchQueryCountService.java similarity index 100% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryCountService.java rename to code/services-application/search-service/java/nu/marginalia/search/svc/SearchQueryCountService.java diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryIndexService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchQueryIndexService.java similarity index 93% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryIndexService.java rename to code/services-application/search-service/java/nu/marginalia/search/svc/SearchQueryIndexService.java index 87194275..785c8952 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryIndexService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchQueryIndexService.java @@ -5,11 +5,11 @@ import com.google.inject.Singleton; import it.unimi.dsi.fastutil.ints.Int2LongArrayMap; import lombok.SneakyThrows; import nu.marginalia.bbpc.BrailleBlockPunchCards; -import nu.marginalia.index.client.model.query.SearchSpecification; -import nu.marginalia.index.client.model.results.DecoratedSearchResultItem; -import nu.marginalia.index.client.model.results.SearchResultItem; +import nu.marginalia.api.searchquery.model.query.SearchSpecification; +import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem; +import nu.marginalia.api.searchquery.model.results.SearchResultItem; import nu.marginalia.model.crawl.DomainIndexingState; -import nu.marginalia.query.model.QueryResponse; +import nu.marginalia.api.searchquery.model.query.QueryResponse; import nu.marginalia.search.model.UrlDetails; import nu.marginalia.search.results.UrlDeduplicator; import org.slf4j.Logger; diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchQueryService.java similarity index 89% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryService.java rename to code/services-application/search-service/java/nu/marginalia/search/svc/SearchQueryService.java index 906c4caf..3cc9166d 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchQueryService.java @@ -5,7 +5,6 @@ import lombok.SneakyThrows; import nu.marginalia.WebsiteUrl; import nu.marginalia.search.command.*; import nu.marginalia.search.model.SearchProfile; -import nu.marginalia.client.Context; import nu.marginalia.search.exceptions.RedirectException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,18 +30,15 @@ public class SearchQueryService { @SneakyThrows public Object pathSearch(Request request, Response response) { - - final var ctx = Context.fromRequest(request); - try { - return searchCommandEvaulator.eval(ctx, response, parseParameters(request)); + return searchCommandEvaulator.eval(response, parseParameters(request)); } catch (RedirectException ex) { response.redirect(ex.newUrl); } catch (Exception ex) { logger.error("Error", ex); - errorPageService.serveError(ctx, request, response); + errorPageService.serveError(request, response); } return ""; @@ -61,7 +57,8 @@ public class SearchQueryService { SearchJsParameter.parse(request.queryParams("js")), SearchRecentParameter.parse(request.queryParams("recent")), SearchTitleParameter.parse(request.queryParams("searchTitle")), - SearchAdtechParameter.parse(request.queryParams("adtech")) + SearchAdtechParameter.parse(request.queryParams("adtech")), + "1".equals(request.headers("X-Poison-Results")) ); } catch (Exception ex) { diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchSiteInfoService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchSiteInfoService.java similarity index 76% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchSiteInfoService.java rename to code/services-application/search-service/java/nu/marginalia/search/svc/SearchSiteInfoService.java index 23ac2843..7cb5c809 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchSiteInfoService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchSiteInfoService.java @@ -1,9 +1,9 @@ package nu.marginalia.search.svc; import com.google.inject.Inject; -import nu.marginalia.assistant.client.AssistantClient; -import nu.marginalia.assistant.client.model.SimilarDomain; -import nu.marginalia.client.Context; +import nu.marginalia.api.domains.DomainInfoClient; +import nu.marginalia.api.domains.model.DomainInformation; +import nu.marginalia.api.domains.model.SimilarDomain; import nu.marginalia.db.DbDomainQueries; import nu.marginalia.feedlot.model.FeedItems; import nu.marginalia.model.EdgeDomain; @@ -11,7 +11,6 @@ import nu.marginalia.renderer.MustacheRenderer; import nu.marginalia.renderer.RendererFactory; import nu.marginalia.screenshot.ScreenshotService; import nu.marginalia.search.SearchOperator; -import nu.marginalia.assistant.client.model.DomainInformation; import nu.marginalia.feedlot.FeedlotClient; import nu.marginalia.search.model.UrlDetails; import nu.marginalia.search.svc.SearchFlagSiteService.FlagSiteFormData; @@ -24,12 +23,16 @@ import java.io.IOException; import java.sql.SQLException; import java.util.List; import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; public class SearchSiteInfoService { private static final Logger logger = LoggerFactory.getLogger(SearchSiteInfoService.class); private final SearchOperator searchOperator; - private final AssistantClient assistantClient; + private final DomainInfoClient domainInfoClient; private final SearchFlagSiteService flagSiteService; private final DbDomainQueries domainQueries; private final MustacheRenderer renderer; @@ -38,7 +41,7 @@ public class SearchSiteInfoService { @Inject public SearchSiteInfoService(SearchOperator searchOperator, - AssistantClient assistantClient, + DomainInfoClient domainInfoClient, RendererFactory rendererFactory, SearchFlagSiteService flagSiteService, DbDomainQueries domainQueries, @@ -46,7 +49,7 @@ public class SearchSiteInfoService { ScreenshotService screenshotService) throws IOException { this.searchOperator = searchOperator; - this.assistantClient = assistantClient; + this.domainInfoClient = domainInfoClient; this.flagSiteService = flagSiteService; this.domainQueries = domainQueries; @@ -64,14 +67,12 @@ public class SearchSiteInfoService { return null; } - var ctx = Context.fromRequest(request); - var model = switch (view) { - case "links" -> listLinks(ctx, domainName); - case "docs" -> listDocs(ctx, domainName); - case "info" -> listInfo(ctx, domainName); - case "report" -> reportSite(ctx, domainName); - default -> listInfo(ctx, domainName); + case "links" -> listLinks(domainName); + case "docs" -> listDocs(domainName); + case "info" -> listInfo(domainName); + case "report" -> reportSite(domainName); + default -> listInfo(domainName); }; return renderer.render(model); @@ -105,7 +106,7 @@ public class SearchSiteInfoService { return renderer.render(model); } - private Object reportSite(Context ctx, String domainName) throws SQLException { + private Object reportSite(String domainName) throws SQLException { int domainId = domainQueries.getDomainId(new EdgeDomain(domainName)); var existingComplaints = flagSiteService.getExistingComplaints(domainId); @@ -117,40 +118,42 @@ public class SearchSiteInfoService { } - private Backlinks listLinks(Context ctx, String domainName) { + private Backlinks listLinks(String domainName) { return new Backlinks(domainName, domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1), - searchOperator.doBacklinkSearch(ctx, domainName)); + searchOperator.doBacklinkSearch(domainName)); } - private SiteInfoWithContext listInfo(Context ctx, String domainName) { + private SiteInfoWithContext listInfo(String domainName) { final int domainId = domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1); - final DomainInformation domainInfo; - final List similarSet; - final List linkingDomains; - String url = "https://" + domainName + "/";; + final Future domainInfoFuture; + final Future> similarSetFuture; + final Future> linkingDomainsFuture; + + String url = "https://" + domainName + "/"; boolean hasScreenshot = screenshotService.hasScreenshot(domainId); var feedItemsFuture = feedlotClient.getFeedItems(domainName); - if (domainId < 0 || !assistantClient.isAccepting()) { - domainInfo = createDummySiteInfo(domainName); - similarSet = List.of(); - linkingDomains = List.of(); + if (domainId < 0) { + domainInfoFuture = CompletableFuture.failedFuture(new Exception("Unknown Domain ID")); + similarSetFuture = CompletableFuture.failedFuture(new Exception("Unknown Domain ID")); + linkingDomainsFuture = CompletableFuture.failedFuture(new Exception("Unknown Domain ID")); + } + else if (!domainInfoClient.isAccepting()) { + domainInfoFuture = CompletableFuture.failedFuture(new Exception("Assistant Service Unavailable")); + similarSetFuture = CompletableFuture.failedFuture(new Exception("Assistant Service Unavailable")); + linkingDomainsFuture = CompletableFuture.failedFuture(new Exception("Assistant Service Unavailable")); } else { - domainInfo = assistantClient.domainInformation(ctx, domainId).blockingFirst(); - similarSet = assistantClient - .similarDomains(ctx, domainId, 25) - .blockingFirst(); - linkingDomains = assistantClient - .linkedDomains(ctx, domainId, 25) - .blockingFirst(); + domainInfoFuture = domainInfoClient.domainInformation(domainId); + similarSetFuture = domainInfoClient.similarDomains(domainId, 25); + linkingDomainsFuture = domainInfoClient.linkedDomains(domainId, 25); } - List sampleResults = searchOperator.doSiteSearch(ctx, domainName, 5); + List sampleResults = searchOperator.doSiteSearch(domainName, 5); if (!sampleResults.isEmpty()) { url = sampleResults.getFirst().url.withPathAndParam("/", null).toString(); } @@ -166,26 +169,35 @@ public class SearchSiteInfoService { domainId, url, hasScreenshot, - domainInfo, - similarSet, - linkingDomains, + waitForFuture(domainInfoFuture, () -> createDummySiteInfo(domainName)), + waitForFuture(similarSetFuture, List::of), + waitForFuture(linkingDomainsFuture, List::of), feedItems, sampleResults ); } - private DomainInformation createDummySiteInfo(String domainName) { - return DomainInformation.builder() - .domain(new EdgeDomain(domainName)) - .suggestForCrawling(true) - .unknownDomain(true) - .build(); + private T waitForFuture(Future future, Supplier fallback) { + try { + return future.get(250, TimeUnit.MILLISECONDS); + } catch (Exception e) { + logger.info("Failed to get domain data: {}", e.getMessage()); + return fallback.get(); + } } - private Docs listDocs(Context ctx, String domainName) { + private DomainInformation createDummySiteInfo(String domainName) { + return DomainInformation.builder() + .domain(new EdgeDomain(domainName)) + .suggestForCrawling(true) + .unknownDomain(true) + .build(); + } + + private Docs listDocs(String domainName) { return new Docs(domainName, domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1), - searchOperator.doSiteSearch(ctx, domainName, 100)); + searchOperator.doSiteSearch(domainName, 100)); } public record Docs(Map view, diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchUnitConversionService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchUnitConversionService.java similarity index 57% rename from code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchUnitConversionService.java rename to code/services-application/search-service/java/nu/marginalia/search/svc/SearchUnitConversionService.java index 110ece21..1727878e 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchUnitConversionService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchUnitConversionService.java @@ -1,9 +1,6 @@ package nu.marginalia.search.svc; -import io.reactivex.rxjava3.schedulers.Schedulers; -import nu.marginalia.assistant.client.AssistantClient; -import nu.marginalia.client.exception.RemoteException; -import nu.marginalia.client.Context; +import nu.marginalia.api.math.MathClient; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -11,7 +8,10 @@ import javax.annotation.CheckForNull; import com.google.inject.Inject; import com.google.inject.Singleton; import java.util.Optional; +import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.function.Predicate; import java.util.regex.Pattern; @@ -21,17 +21,17 @@ public class SearchUnitConversionService { private final Pattern conversionPattern = Pattern.compile("((\\d+|\\s+|[.()\\-^+%*/]|log[^a-z]|log2[^a-z]|sqrt[^a-z]|log10|cos[^a-z]|sin[^a-z]|tan[^a-z]|log2|pi[^a-z]|e[^a-z]|2pi[^a-z])+)\\s*([a-zA-Z][a-zA-Z^.0-9]*\\s?[a-zA-Z^.0-9]*)\\s+in\\s+([a-zA-Z^.0-9]+\\s?[a-zA-Z^.0-9]*)"); private final Predicate evalPredicate = Pattern.compile("(\\d+|\\s+|[.()\\-^+%*/]|log|log2|sqrt|log10|cos|sin|tan|pi|e|2pi)+").asMatchPredicate(); - private final AssistantClient assistantClient; + private final MathClient mathClient; @Inject - public SearchUnitConversionService(AssistantClient assistantClient) { - this.assistantClient = assistantClient; + public SearchUnitConversionService(MathClient mathClient) { + this.mathClient = mathClient; } - public Optional tryConversion(Context context, String query) { + public Optional tryConversion(String query) { var matcher = conversionPattern.matcher(query); if (!matcher.matches()) - return Optional.empty(); + return Optional.empty(); String value = matcher.group(1); String from = matcher.group(3); @@ -40,24 +40,22 @@ public class SearchUnitConversionService { logger.info("{} -> '{}' '{}' '{}'", query, value, from, to); try { - return Optional.of(assistantClient.unitConversion(context, value, from, to).blockingFirst()); - } - catch (RemoteException ex) { - return Optional.empty(); + var resultFuture = mathClient.unitConversion(value, from, to); + return Optional.of( + resultFuture.get(250, TimeUnit.MILLISECONDS) + ); + } catch (ExecutionException e) { + logger.error("Error in unit conversion", e); + } catch (InterruptedException e) { + logger.error("Interrupted while waiting for unit conversion", e); + } catch (TimeoutException e) { + // Ignore } + return Optional.empty(); } - public boolean isNumeric(String str) { - try { - Double.parseDouble(str); - return true; - } - catch (NumberFormatException ex) { - return false; - } - } - public @CheckForNull Future tryEval(Context context, String query) { + public @CheckForNull Future tryEval(String query) { if (!evalPredicate.test(query)) { return null; } @@ -70,11 +68,6 @@ public class SearchUnitConversionService { logger.info("eval({})", expr); - try { - return assistantClient.evalMath(context, expr).subscribeOn(Schedulers.io()).toFuture(); - } - catch (RemoteException ex) { - return null; - } + return mathClient.evalMath(expr); } } diff --git a/code/services-application/search-service/readme.md b/code/services-application/search-service/readme.md index 02362a55..d7afe4b5 100644 --- a/code/services-application/search-service/readme.md +++ b/code/services-application/search-service/readme.md @@ -14,13 +14,13 @@ to the user. ## Central classes -* [SearchService](src/main/java/nu/marginalia/search/SearchService.java) receives requests and delegates to the +* [SearchService](java/nu/marginalia/search/SearchService.java) receives requests and delegates to the appropriate services. -* [CommandEvaluator](src/main/java/nu/marginalia/search/command/CommandEvaluator.java) interprets a user query and acts +* [CommandEvaluator](java/nu/marginalia/search/command/CommandEvaluator.java) interprets a user query and acts upon it, dealing with special operations like `browse:` or `site:`. -* [SearchQueryIndexService](src/main/java/nu/marginalia/search/svc/SearchQueryIndexService.java) passes a parsed search query to the index service, and +* [SearchQueryIndexService](java/nu/marginalia/search/svc/SearchQueryIndexService.java) passes a parsed search query to the index service, and then decorates the search results so that they can be rendered. ## See Also diff --git a/code/services-application/search-service/src/main/resources/static/search/crawler-ips.txt b/code/services-application/search-service/resources/static/search/crawler-ips.txt similarity index 100% rename from code/services-application/search-service/src/main/resources/static/search/crawler-ips.txt rename to code/services-application/search-service/resources/static/search/crawler-ips.txt diff --git a/code/services-application/search-service/src/main/resources/static/search/favicon.ico b/code/services-application/search-service/resources/static/search/favicon.ico similarity index 100% rename from code/services-application/search-service/src/main/resources/static/search/favicon.ico rename to code/services-application/search-service/resources/static/search/favicon.ico diff --git a/code/services-application/search-service/src/main/resources/static/search/main.js b/code/services-application/search-service/resources/static/search/main.js similarity index 100% rename from code/services-application/search-service/src/main/resources/static/search/main.js rename to code/services-application/search-service/resources/static/search/main.js diff --git a/code/services-application/search-service/src/main/resources/static/search/menu.js b/code/services-application/search-service/resources/static/search/menu.js similarity index 100% rename from code/services-application/search-service/src/main/resources/static/search/menu.js rename to code/services-application/search-service/resources/static/search/menu.js diff --git a/code/services-application/search-service/src/main/resources/static/search/opensearch.xml b/code/services-application/search-service/resources/static/search/opensearch.xml similarity index 100% rename from code/services-application/search-service/src/main/resources/static/search/opensearch.xml rename to code/services-application/search-service/resources/static/search/opensearch.xml diff --git a/code/services-application/search-service/src/main/resources/static/search/robots.txt b/code/services-application/search-service/resources/static/search/robots.txt similarity index 100% rename from code/services-application/search-service/src/main/resources/static/search/robots.txt rename to code/services-application/search-service/resources/static/search/robots.txt diff --git a/code/services-application/search-service/src/main/resources/static/search/rss.svg b/code/services-application/search-service/resources/static/search/rss.svg similarity index 100% rename from code/services-application/search-service/src/main/resources/static/search/rss.svg rename to code/services-application/search-service/resources/static/search/rss.svg diff --git a/code/services-application/search-service/src/main/resources/static/search/serp.scss b/code/services-application/search-service/resources/static/search/serp.scss similarity index 100% rename from code/services-application/search-service/src/main/resources/static/search/serp.scss rename to code/services-application/search-service/resources/static/search/serp.scss diff --git a/code/services-application/search-service/src/main/resources/static/search/style-new.css b/code/services-application/search-service/resources/static/search/style-new.css similarity index 100% rename from code/services-application/search-service/src/main/resources/static/search/style-new.css rename to code/services-application/search-service/resources/static/search/style-new.css diff --git a/code/services-application/search-service/src/main/resources/static/search/tts.js b/code/services-application/search-service/resources/static/search/tts.js similarity index 100% rename from code/services-application/search-service/src/main/resources/static/search/tts.js rename to code/services-application/search-service/resources/static/search/tts.js diff --git a/code/services-application/search-service/src/main/resources/templates/search/browse-result.hdb b/code/services-application/search-service/resources/templates/search/browse-result.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/browse-result.hdb rename to code/services-application/search-service/resources/templates/search/browse-result.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/browse-results.hdb b/code/services-application/search-service/resources/templates/search/browse-results.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/browse-results.hdb rename to code/services-application/search-service/resources/templates/search/browse-results.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/conversion-results.hdb b/code/services-application/search-service/resources/templates/search/conversion-results.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/conversion-results.hdb rename to code/services-application/search-service/resources/templates/search/conversion-results.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/dictionary-results.hdb b/code/services-application/search-service/resources/templates/search/dictionary-results.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/dictionary-results.hdb rename to code/services-application/search-service/resources/templates/search/dictionary-results.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/error-page-search.hdb b/code/services-application/search-service/resources/templates/search/error-page-search.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/error-page-search.hdb rename to code/services-application/search-service/resources/templates/search/error-page-search.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/error-page.hdb b/code/services-application/search-service/resources/templates/search/error-page.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/error-page.hdb rename to code/services-application/search-service/resources/templates/search/error-page.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/index/index-about.hdb b/code/services-application/search-service/resources/templates/search/index/index-about.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/index/index-about.hdb rename to code/services-application/search-service/resources/templates/search/index/index-about.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/index/index-news.hdb b/code/services-application/search-service/resources/templates/search/index/index-news.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/index/index-news.hdb rename to code/services-application/search-service/resources/templates/search/index/index-news.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/index/index-tips.hdb b/code/services-application/search-service/resources/templates/search/index/index-tips.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/index/index-tips.hdb rename to code/services-application/search-service/resources/templates/search/index/index-tips.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/index/index.hdb b/code/services-application/search-service/resources/templates/search/index/index.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/index/index.hdb rename to code/services-application/search-service/resources/templates/search/index/index.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/parts/search-filters.hdb b/code/services-application/search-service/resources/templates/search/parts/search-filters.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/parts/search-filters.hdb rename to code/services-application/search-service/resources/templates/search/parts/search-filters.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/parts/search-footer.hdb b/code/services-application/search-service/resources/templates/search/parts/search-footer.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/parts/search-footer.hdb rename to code/services-application/search-service/resources/templates/search/parts/search-footer.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/parts/search-form.hdb b/code/services-application/search-service/resources/templates/search/parts/search-form.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/parts/search-form.hdb rename to code/services-application/search-service/resources/templates/search/parts/search-form.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/parts/search-header.hdb b/code/services-application/search-service/resources/templates/search/parts/search-header.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/parts/search-header.hdb rename to code/services-application/search-service/resources/templates/search/parts/search-header.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/parts/search-result-rest.hdb b/code/services-application/search-service/resources/templates/search/parts/search-result-rest.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/parts/search-result-rest.hdb rename to code/services-application/search-service/resources/templates/search/parts/search-result-rest.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/parts/search-result.hdb b/code/services-application/search-service/resources/templates/search/parts/search-result.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/parts/search-result.hdb rename to code/services-application/search-service/resources/templates/search/parts/search-result.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/search-results.hdb b/code/services-application/search-service/resources/templates/search/search-results.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/search-results.hdb rename to code/services-application/search-service/resources/templates/search/search-results.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/site-info/site-crosstalk.hdb b/code/services-application/search-service/resources/templates/search/site-info/site-crosstalk.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/site-info/site-crosstalk.hdb rename to code/services-application/search-service/resources/templates/search/site-info/site-crosstalk.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-feed.hdb b/code/services-application/search-service/resources/templates/search/site-info/site-info-feed.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-feed.hdb rename to code/services-application/search-service/resources/templates/search/site-info/site-info-feed.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index-blacklisted.hdb b/code/services-application/search-service/resources/templates/search/site-info/site-info-index-blacklisted.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index-blacklisted.hdb rename to code/services-application/search-service/resources/templates/search/site-info/site-info-index-blacklisted.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index-indexed.hdb b/code/services-application/search-service/resources/templates/search/site-info/site-info-index-indexed.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index-indexed.hdb rename to code/services-application/search-service/resources/templates/search/site-info/site-info-index-indexed.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index-suggest.hdb b/code/services-application/search-service/resources/templates/search/site-info/site-info-index-suggest.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index-suggest.hdb rename to code/services-application/search-service/resources/templates/search/site-info/site-info-index-suggest.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index-unknown.hdb b/code/services-application/search-service/resources/templates/search/site-info/site-info-index-unknown.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index-unknown.hdb rename to code/services-application/search-service/resources/templates/search/site-info/site-info-index-unknown.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index.hdb b/code/services-application/search-service/resources/templates/search/site-info/site-info-index.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index.hdb rename to code/services-application/search-service/resources/templates/search/site-info/site-info-index.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-links.hdb b/code/services-application/search-service/resources/templates/search/site-info/site-info-links.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-links.hdb rename to code/services-application/search-service/resources/templates/search/site-info/site-info-links.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-report.hdb b/code/services-application/search-service/resources/templates/search/site-info/site-info-report.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-report.hdb rename to code/services-application/search-service/resources/templates/search/site-info/site-info-report.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-summary.hdb b/code/services-application/search-service/resources/templates/search/site-info/site-info-summary.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-summary.hdb rename to code/services-application/search-service/resources/templates/search/site-info/site-info-summary.hdb diff --git a/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info.hdb b/code/services-application/search-service/resources/templates/search/site-info/site-info.hdb similarity index 100% rename from code/services-application/search-service/src/main/resources/templates/search/site-info/site-info.hdb rename to code/services-application/search-service/resources/templates/search/site-info/site-info.hdb diff --git a/code/services-application/search-service/src/test/java/nu/marginalia/util/TestLanguageModels.java b/code/services-application/search-service/src/test/java/nu/marginalia/util/TestLanguageModels.java deleted file mode 100644 index 5efd2025..00000000 --- a/code/services-application/search-service/src/test/java/nu/marginalia/util/TestLanguageModels.java +++ /dev/null @@ -1,38 +0,0 @@ -package nu.marginalia.util; - -import nu.marginalia.LanguageModels; -import nu.marginalia.WmsaHome; - -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.Optional; - -public class TestLanguageModels { - private static final Path LANGUAGE_MODELS_DEFAULT = WmsaHome.getHomePath().resolve("model"); - - public static Path getLanguageModelsPath() { - final Path languageModelsHome = Optional.ofNullable(System.getenv("LANGUAGE_MODELS_HOME")) - .map(Path::of) - .orElse(LANGUAGE_MODELS_DEFAULT); - - if (!Files.isDirectory(languageModelsHome)) { - throw new IllegalStateException("Could not find $LANGUAGE_MODELS_HOME, see doc/language-models.md"); - } - return languageModelsHome; - } - - public static LanguageModels getLanguageModels() { - - var languageModelsHome = getLanguageModelsPath(); - - return new LanguageModels( - languageModelsHome.resolve("ngrams.bin"), - languageModelsHome.resolve("tfreq-new-algo3.bin"), - languageModelsHome.resolve("opennlp-sentence.bin"), - languageModelsHome.resolve("English.RDR"), - languageModelsHome.resolve("English.DICT"), - languageModelsHome.resolve("opennlp-tokens.bin"), - languageModelsHome.resolve("lid.176.ftz") - ); - } -} diff --git a/code/services-application/search-service/src/test/java/nu/marginalia/search/command/commands/BangCommandTest.java b/code/services-application/search-service/test/nu/marginalia/search/command/commands/BangCommandTest.java similarity index 88% rename from code/services-application/search-service/src/test/java/nu/marginalia/search/command/commands/BangCommandTest.java rename to code/services-application/search-service/test/nu/marginalia/search/command/commands/BangCommandTest.java index 3394eb8e..f5dd9816 100644 --- a/code/services-application/search-service/src/test/java/nu/marginalia/search/command/commands/BangCommandTest.java +++ b/code/services-application/search-service/test/nu/marginalia/search/command/commands/BangCommandTest.java @@ -1,6 +1,5 @@ package nu.marginalia.search.command.commands; -import nu.marginalia.client.Context; import nu.marginalia.search.command.SearchParameters; import nu.marginalia.search.exceptions.RedirectException; import org.junit.jupiter.api.Assertions; @@ -14,10 +13,9 @@ class BangCommandTest { @Test public void testG() { try { - bangCommand.process(Context.internal(), - null, + bangCommand.process(null, new SearchParameters(" !g test", - null, null, null, null, null) + null, null, null, null, null, false) ); Assertions.fail("Should have thrown RedirectException"); } diff --git a/code/features-qs/query-parser/src/test/java/nu/marginalia/util/TestLanguageModels.java b/code/services-application/search-service/test/nu/marginalia/util/TestLanguageModels.java similarity index 100% rename from code/features-qs/query-parser/src/test/java/nu/marginalia/util/TestLanguageModels.java rename to code/services-application/search-service/test/nu/marginalia/util/TestLanguageModels.java diff --git a/code/services-core/assistant-service/build.gradle b/code/services-core/assistant-service/build.gradle index 950dc359..278bbcdf 100644 --- a/code/services-core/assistant-service/build.gradle +++ b/code/services-core/assistant-service/build.gradle @@ -3,7 +3,7 @@ plugins { id 'application' id 'jvm-test-suite' - id 'com.palantir.docker' version '0.35.0' + id 'com.google.cloud.tools.jib' version '3.4.0' } application { @@ -13,24 +13,28 @@ application { tasks.distZip.enabled = false -apply from: "$rootProject.projectDir/docker-service.gradle" - java { toolchain { languageVersion.set(JavaLanguageVersion.of(21)) } } +apply from: "$rootProject.projectDir/srcsets.gradle" +apply from: "$rootProject.projectDir/docker.gradle" + dependencies { implementation project(':third-party:symspell') - implementation project(':code:api:assistant-api') - implementation project(':code:api:query-api') + + implementation project(':code:functions:math') + implementation project(':code:functions:math:api') + implementation project(':code:functions:domain-info') + implementation project(':code:functions:domain-info:api') + implementation project(':code:common:config') implementation project(':code:common:service') implementation project(':code:common:model') implementation project(':code:common:db') implementation project(':code:common:service-discovery') - implementation project(':code:common:service-client') implementation project(':code:features-search:screenshots') @@ -41,9 +45,9 @@ dependencies { implementation libs.bundles.slf4j implementation libs.prometheus + implementation libs.bundles.grpc implementation libs.notnull implementation libs.guice - implementation libs.rxjava implementation libs.spark implementation libs.opencsv implementation libs.trove diff --git a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/AssistantMain.java b/code/services-core/assistant-service/java/nu/marginalia/assistant/AssistantMain.java similarity index 84% rename from code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/AssistantMain.java rename to code/services-core/assistant-service/java/nu/marginalia/assistant/AssistantMain.java index 24f94fa8..f8c2cc3b 100644 --- a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/AssistantMain.java +++ b/code/services-core/assistant-service/java/nu/marginalia/assistant/AssistantMain.java @@ -4,7 +4,7 @@ import com.google.inject.Guice; import com.google.inject.Inject; import com.google.inject.Injector; import nu.marginalia.service.MainClass; -import nu.marginalia.service.SearchServiceDescriptors; +import nu.marginalia.service.ServiceDiscoveryModule; import nu.marginalia.service.id.ServiceId; import nu.marginalia.service.module.ServiceConfigurationModule; import nu.marginalia.service.module.DatabaseModule; @@ -23,7 +23,8 @@ public class AssistantMain extends MainClass { Injector injector = Guice.createInjector( new AssistantModule(), - new ServiceConfigurationModule(SearchServiceDescriptors.descriptors, ServiceId.Assistant), + new ServiceConfigurationModule(ServiceId.Assistant), + new ServiceDiscoveryModule(), new DatabaseModule(false) ); diff --git a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/AssistantModule.java b/code/services-core/assistant-service/java/nu/marginalia/assistant/AssistantModule.java similarity index 100% rename from code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/AssistantModule.java rename to code/services-core/assistant-service/java/nu/marginalia/assistant/AssistantModule.java diff --git a/code/services-core/assistant-service/java/nu/marginalia/assistant/AssistantService.java b/code/services-core/assistant-service/java/nu/marginalia/assistant/AssistantService.java new file mode 100644 index 00000000..741261b8 --- /dev/null +++ b/code/services-core/assistant-service/java/nu/marginalia/assistant/AssistantService.java @@ -0,0 +1,63 @@ +package nu.marginalia.assistant; + +import com.google.gson.Gson; +import com.google.inject.Inject; +import lombok.SneakyThrows; +import nu.marginalia.assistant.suggest.Suggestions; +import nu.marginalia.functions.domains.DomainInfoGrpcService; +import nu.marginalia.functions.math.MathGrpcService; +import nu.marginalia.model.gson.GsonFactory; +import nu.marginalia.screenshot.ScreenshotService; +import nu.marginalia.service.discovery.property.ServicePartition; +import nu.marginalia.service.server.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Request; +import spark.Response; +import spark.Spark; + +import java.util.List; + +public class AssistantService extends Service { + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final Gson gson = GsonFactory.get(); + private final Suggestions suggestions; + + @SneakyThrows + @Inject + public AssistantService(BaseServiceParams params, + ScreenshotService screenshotService, + DomainInfoGrpcService domainInfoGrpcService, + MathGrpcService mathGrpcService, + Suggestions suggestions) + { + super(params, + ServicePartition.any(), + List.of(domainInfoGrpcService, mathGrpcService)); + + this.suggestions = suggestions; + + Spark.staticFiles.expireTime(600); + + Spark.get("/public/screenshot/:id", screenshotService::serveScreenshotRequest); + Spark.get("/screenshot/:id", screenshotService::serveScreenshotRequest); + Spark.get("/public/suggest/", this::getSuggestions, this::convertToJson); + + Spark.awaitInitialization(); + } + + private Object getSuggestions(Request request, Response response) { + response.type("application/json"); + var param = request.queryParams("partial"); + if (param == null) { + logger.warn("Bad parameter, partial is null"); + Spark.halt(500); + } + return suggestions.getSuggestions(10, param); + } + + private String convertToJson(Object o) { + return gson.toJson(o); + } + +} diff --git a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/suggest/Suggestions.java b/code/services-core/assistant-service/java/nu/marginalia/assistant/suggest/Suggestions.java similarity index 99% rename from code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/suggest/Suggestions.java rename to code/services-core/assistant-service/java/nu/marginalia/assistant/suggest/Suggestions.java index 7adf7921..2e7b28fb 100644 --- a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/suggest/Suggestions.java +++ b/code/services-core/assistant-service/java/nu/marginalia/assistant/suggest/Suggestions.java @@ -2,9 +2,9 @@ package nu.marginalia.assistant.suggest; import com.google.inject.Inject; import com.google.inject.name.Named; +import nu.marginalia.functions.math.dict.SpellChecker; import nu.marginalia.term_frequency_dict.TermFrequencyDict; import nu.marginalia.model.crawl.HtmlFeature; -import nu.marginalia.assistant.dict.SpellChecker; import org.apache.commons.collections4.trie.PatriciaTrie; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; diff --git a/code/services-core/assistant-service/readme.md b/code/services-core/assistant-service/readme.md index 899ac8fc..1c387c83 100644 --- a/code/services-core/assistant-service/readme.md +++ b/code/services-core/assistant-service/readme.md @@ -4,4 +4,4 @@ The assistant service helps the search service by offering various peripheral fu ## Central Classes -* [AssistantService](src/main/java/nu/marginalia/assistant/AssistantService.java) handles REST requests and delegates to the appropriate handling classes. \ No newline at end of file +* [AssistantService](java/nu/marginalia/assistant/AssistantService.java) handles REST requests and delegates to the appropriate handling classes. \ No newline at end of file diff --git a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/AssistantService.java b/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/AssistantService.java deleted file mode 100644 index 592e6308..00000000 --- a/code/services-core/assistant-service/src/main/java/nu/marginalia/assistant/AssistantService.java +++ /dev/null @@ -1,154 +0,0 @@ -package nu.marginalia.assistant; - -import com.google.gson.Gson; -import com.google.inject.Inject; -import lombok.SneakyThrows; -import nu.marginalia.assistant.domains.DomainInformationService; -import nu.marginalia.assistant.domains.SimilarDomainsService; -import nu.marginalia.assistant.eval.Units; -import nu.marginalia.assistant.suggest.Suggestions; -import nu.marginalia.assistant.eval.MathParser; -import nu.marginalia.model.gson.GsonFactory; -import nu.marginalia.screenshot.ScreenshotService; -import nu.marginalia.assistant.dict.DictionaryService; -import nu.marginalia.service.server.*; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import spark.Request; -import spark.Response; -import spark.Spark; - -import java.util.ArrayList; -import java.util.Objects; - -public class AssistantService extends Service { - private final Logger logger = LoggerFactory.getLogger(getClass()); - private final Gson gson = GsonFactory.get(); - private final Units units; - private final MathParser mathParser; - private final SimilarDomainsService similarDomainsService; - private final DomainInformationService domainInformationService; - private final Suggestions suggestions; - - @SneakyThrows - @Inject - public AssistantService(BaseServiceParams params, - DictionaryService dictionaryService, - MathParser mathParser, - Units units, - ScreenshotService screenshotService, - SimilarDomainsService similarDomainsService, - DomainInformationService domainInformationService, - Suggestions suggestions) - { - super(params); - - this.mathParser = mathParser; - this.units = units; - this.similarDomainsService = similarDomainsService; - this.domainInformationService = domainInformationService; - this.suggestions = suggestions; - - Spark.staticFiles.expireTime(600); - - Spark.get("/public/screenshot/:id", screenshotService::serveScreenshotRequest); - Spark.get("/screenshot/:id", screenshotService::serveScreenshotRequest); - - Spark.get("/dictionary/:word", (req, rsp) -> dictionaryService.define(req.params("word")), this::convertToJson); - Spark.get("/spell-check/:term", (req, rsp) -> dictionaryService.spellCheck(req.params("term").toLowerCase()), this::convertToJson); - Spark.get("/unit-conversion", (req, rsp) -> unitConversion( - rsp, - req.queryParams("value"), - req.queryParams("from"), - req.queryParams("to") - - )); - Spark.get("/eval-expression", (req, rsp) -> evalExpression( - rsp, - req.queryParams("value") - )); - Spark.get("/domain/:id/similar", this::getSimilarDomains, this::convertToJson); - Spark.get("/domain/:id/linking", this::getLinkingDomains, this::convertToJson); - Spark.get("/domain/:id/info", this::getDomainInformation, this::convertToJson); - Spark.get("/public/suggest/", this::getSuggestions, this::convertToJson); - - Spark.awaitInitialization(); - } - - private Object getSimilarDomains(Request request, Response response) { - int domainId = Integer.parseInt(request.params("id")); - int count = Integer.parseInt(Objects.requireNonNullElse(request.queryParams("count"), "25")); - - response.type("application/json"); - - if (!similarDomainsService.isReady()) { - return new ArrayList<>(); - } - - return similarDomainsService.getSimilarDomains(domainId, count); - } - - private Object getLinkingDomains(Request request, Response response) { - int domainId = Integer.parseInt(request.params("id")); - int count = Integer.parseInt(Objects.requireNonNullElse(request.queryParams("count"), "25")); - - response.type("application/json"); - if (!similarDomainsService.isReady()) { - return new ArrayList<>(); - } - return similarDomainsService.getLinkingDomains(domainId, count); - } - - private Object getDomainInformation(Request request, Response response) { - int domainId = Integer.parseInt(request.params("id")); - - response.type("application/json"); - - var maybeDomainInfo = domainInformationService.domainInfo(domainId); - if (maybeDomainInfo.isEmpty()) { - Spark.halt(404); - } - return maybeDomainInfo.get(); - } - - private Object getSuggestions(Request request, Response response) { - response.type("application/json"); - var param = request.queryParams("partial"); - if (param == null) { - logger.warn("Bad parameter, partial is null"); - Spark.halt(500); - } - return suggestions.getSuggestions(10, param); - } - - private Object evalExpression(Response rsp, String value) { - try { - var val = mathParser.evalFormatted(value); - if (val.isBlank()) { - Spark.halt(400); - return null; - } - return val; - } - catch (Exception ex) { - Spark.halt(400); - return null; - } - } - - private Object unitConversion(Response rsp, String value, String fromUnit, String toUnit) { - var result = units.convert(value, fromUnit, toUnit); - if (result.isPresent()) { - return result.get(); - } - { - Spark.halt(400); - return null; - } - } - - private String convertToJson(Object o) { - return gson.toJson(o); - } - -} diff --git a/code/services-core/control-service/build.gradle b/code/services-core/control-service/build.gradle index 4b98d378..a14aec08 100644 --- a/code/services-core/control-service/build.gradle +++ b/code/services-core/control-service/build.gradle @@ -1,9 +1,8 @@ plugins { id 'java' - id 'application' - id 'com.palantir.docker' version '0.35.0' id 'jvm-test-suite' + id 'com.google.cloud.tools.jib' version '3.4.0' } java { @@ -19,7 +18,10 @@ application { tasks.distZip.enabled = false -apply from: "$rootProject.projectDir/docker-service.gradle" +apply from: "$rootProject.projectDir/docker.gradle" + + +apply from: "$rootProject.projectDir/srcsets.gradle" dependencies { implementation libs.bundles.gson @@ -31,14 +33,13 @@ dependencies { implementation project(':code:common:renderer') implementation project(':code:libraries:message-queue') implementation project(':code:common:service-discovery') - implementation project(':code:common:service-client') - implementation project(':code:api:index-api') - implementation project(':code:api:query-api') - implementation project(':code:api:executor-api') - implementation project(':code:api:process-mqapi') + implementation project(':code:functions:search-query:api') + implementation project(':code:execution:api') + implementation project(':code:index:api') + implementation project(':code:process-mqapi') implementation project(':code:features-search:screenshots') - implementation project(':code:features-index:index-journal') - implementation project(':code:features-index:index-query') + implementation project(':code:index:index-journal') + implementation project(':code:index:query') implementation project(':code:process-models:crawl-spec') @@ -48,7 +49,6 @@ dependencies { implementation libs.notnull implementation libs.guice implementation libs.zstd - implementation libs.rxjava implementation libs.handlebars implementation libs.trove diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlHandlebarsConfigurator.java b/code/services-core/control-service/java/nu/marginalia/control/ControlHandlebarsConfigurator.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/ControlHandlebarsConfigurator.java rename to code/services-core/control-service/java/nu/marginalia/control/ControlHandlebarsConfigurator.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlMain.java b/code/services-core/control-service/java/nu/marginalia/control/ControlMain.java similarity index 83% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/ControlMain.java rename to code/services-core/control-service/java/nu/marginalia/control/ControlMain.java index a37cb9c4..a8bbb4f4 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlMain.java +++ b/code/services-core/control-service/java/nu/marginalia/control/ControlMain.java @@ -4,7 +4,7 @@ import com.google.inject.Guice; import com.google.inject.Inject; import com.google.inject.Injector; import nu.marginalia.service.MainClass; -import nu.marginalia.service.SearchServiceDescriptors; +import nu.marginalia.service.ServiceDiscoveryModule; import nu.marginalia.service.id.ServiceId; import nu.marginalia.service.module.ServiceConfigurationModule; import nu.marginalia.service.module.DatabaseModule; @@ -22,7 +22,8 @@ public class ControlMain extends MainClass { Injector injector = Guice.createInjector( new DatabaseModule(true), new ControlProcessModule(), - new ServiceConfigurationModule(SearchServiceDescriptors.descriptors, ServiceId.Control)); + new ServiceDiscoveryModule(), + new ServiceConfigurationModule(ServiceId.Control)); injector.getInstance(ControlMain.class); injector.getInstance(Initialization.class).setReady(); diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlProcessModule.java b/code/services-core/control-service/java/nu/marginalia/control/ControlProcessModule.java similarity index 77% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/ControlProcessModule.java rename to code/services-core/control-service/java/nu/marginalia/control/ControlProcessModule.java index 76c773b8..8c7d6c40 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlProcessModule.java +++ b/code/services-core/control-service/java/nu/marginalia/control/ControlProcessModule.java @@ -1,12 +1,8 @@ package nu.marginalia.control; import com.google.inject.AbstractModule; -import com.google.inject.Module; -import com.google.inject.name.Names; import nu.marginalia.renderer.config.HandlebarsConfigurator; -import java.nio.file.Path; - public class ControlProcessModule extends AbstractModule { @Override protected void configure() { diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlRendererFactory.java b/code/services-core/control-service/java/nu/marginalia/control/ControlRendererFactory.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/ControlRendererFactory.java rename to code/services-core/control-service/java/nu/marginalia/control/ControlRendererFactory.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlService.java b/code/services-core/control-service/java/nu/marginalia/control/ControlService.java similarity index 99% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/ControlService.java rename to code/services-core/control-service/java/nu/marginalia/control/ControlService.java index 91e5d3d5..8eb44279 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlService.java +++ b/code/services-core/control-service/java/nu/marginalia/control/ControlService.java @@ -2,7 +2,7 @@ package nu.marginalia.control; import com.google.gson.Gson; import com.google.inject.Inject; -import nu.marginalia.client.ServiceMonitors; +import nu.marginalia.service.ServiceMonitors; import nu.marginalia.control.actor.ControlActorService; import nu.marginalia.control.app.svc.*; import nu.marginalia.control.node.svc.ControlNodeActionsService; @@ -60,6 +60,7 @@ public class ControlService extends Service { ) throws IOException { super(params); + this.monitors = monitors; this.heartbeatService = heartbeatService; this.eventLogService = eventLogService; diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/ControlValidationError.java b/code/services-core/control-service/java/nu/marginalia/control/ControlValidationError.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/ControlValidationError.java rename to code/services-core/control-service/java/nu/marginalia/control/ControlValidationError.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/RedirectControl.java b/code/services-core/control-service/java/nu/marginalia/control/RedirectControl.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/RedirectControl.java rename to code/services-core/control-service/java/nu/marginalia/control/RedirectControl.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/Redirects.java b/code/services-core/control-service/java/nu/marginalia/control/Redirects.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/Redirects.java rename to code/services-core/control-service/java/nu/marginalia/control/Redirects.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/actor/ControlActor.java b/code/services-core/control-service/java/nu/marginalia/control/actor/ControlActor.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/actor/ControlActor.java rename to code/services-core/control-service/java/nu/marginalia/control/actor/ControlActor.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/actor/ControlActorService.java b/code/services-core/control-service/java/nu/marginalia/control/actor/ControlActorService.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/actor/ControlActorService.java rename to code/services-core/control-service/java/nu/marginalia/control/actor/ControlActorService.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/actor/PrecessionNodes.java b/code/services-core/control-service/java/nu/marginalia/control/actor/PrecessionNodes.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/actor/PrecessionNodes.java rename to code/services-core/control-service/java/nu/marginalia/control/actor/PrecessionNodes.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/actor/monitor/MessageQueueMonitorActor.java b/code/services-core/control-service/java/nu/marginalia/control/actor/monitor/MessageQueueMonitorActor.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/actor/monitor/MessageQueueMonitorActor.java rename to code/services-core/control-service/java/nu/marginalia/control/actor/monitor/MessageQueueMonitorActor.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/actor/monitor/ServiceHeartbeatMonitorActor.java b/code/services-core/control-service/java/nu/marginalia/control/actor/monitor/ServiceHeartbeatMonitorActor.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/actor/monitor/ServiceHeartbeatMonitorActor.java rename to code/services-core/control-service/java/nu/marginalia/control/actor/monitor/ServiceHeartbeatMonitorActor.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/actor/precession/RecrawlAllActor.java b/code/services-core/control-service/java/nu/marginalia/control/actor/precession/RecrawlAllActor.java similarity index 98% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/actor/precession/RecrawlAllActor.java rename to code/services-core/control-service/java/nu/marginalia/control/actor/precession/RecrawlAllActor.java index e0268901..b6e857dc 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/actor/precession/RecrawlAllActor.java +++ b/code/services-core/control-service/java/nu/marginalia/control/actor/precession/RecrawlAllActor.java @@ -9,7 +9,6 @@ import nu.marginalia.actor.state.ActorStep; import nu.marginalia.actor.state.Resume; import nu.marginalia.control.actor.PrecessionNodes; import nu.marginalia.executor.client.ExecutorRemoteActorFactory; -import nu.marginalia.nodecfg.NodeConfigurationService; import nu.marginalia.storage.FileStorageService; import nu.marginalia.storage.model.FileStorageType; diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/actor/precession/ReindexAllActor.java b/code/services-core/control-service/java/nu/marginalia/control/actor/precession/ReindexAllActor.java similarity index 90% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/actor/precession/ReindexAllActor.java rename to code/services-core/control-service/java/nu/marginalia/control/actor/precession/ReindexAllActor.java index b07df2c0..48eaf851 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/actor/precession/ReindexAllActor.java +++ b/code/services-core/control-service/java/nu/marginalia/control/actor/precession/ReindexAllActor.java @@ -8,7 +8,7 @@ import nu.marginalia.actor.state.ActorResumeBehavior; import nu.marginalia.actor.state.ActorStep; import nu.marginalia.actor.state.Resume; import nu.marginalia.control.actor.PrecessionNodes; -import nu.marginalia.index.client.IndexClient; +import nu.marginalia.index.api.IndexMqClient; import nu.marginalia.mq.persistence.MqPersistence; import java.sql.SQLException; @@ -18,7 +18,7 @@ import java.util.concurrent.TimeUnit; public class ReindexAllActor extends RecordActorPrototype { private final MqPersistence persistence; - private final IndexClient indexClient; + private final IndexMqClient indexMqClient; private final PrecessionNodes precessionNodes; @@ -42,7 +42,7 @@ public class ReindexAllActor extends RecordActorPrototype { if (first.isEmpty()) yield new End(); else yield new ReindexNode(first.getAsInt()); } - case ReindexNode(int node, long msgId) when msgId < 0 -> new ReindexNode(node, indexClient.triggerRepartition(node)); + case ReindexNode(int node, long msgId) when msgId < 0 -> new ReindexNode(node, indexMqClient.triggerRepartition(node)); case ReindexNode(int node, long msgId) -> { while (!isMessageTerminal(msgId)) { TimeUnit.SECONDS.sleep(10); @@ -66,12 +66,12 @@ public class ReindexAllActor extends RecordActorPrototype { @Inject public ReindexAllActor(Gson gson, MqPersistence persistence, - IndexClient indexClient, + IndexMqClient indexMqClient, PrecessionNodes precessionNodes) { super(gson); this.persistence = persistence; - this.indexClient = indexClient; + this.indexMqClient = indexMqClient; this.precessionNodes = precessionNodes; } diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/actor/precession/ReprocessAllActor.java b/code/services-core/control-service/java/nu/marginalia/control/actor/precession/ReprocessAllActor.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/actor/precession/ReprocessAllActor.java rename to code/services-core/control-service/java/nu/marginalia/control/actor/precession/ReprocessAllActor.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/actor/rebalance/RebalanceActor.java b/code/services-core/control-service/java/nu/marginalia/control/actor/rebalance/RebalanceActor.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/actor/rebalance/RebalanceActor.java rename to code/services-core/control-service/java/nu/marginalia/control/actor/rebalance/RebalanceActor.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/app/model/ApiKeyModel.java b/code/services-core/control-service/java/nu/marginalia/control/app/model/ApiKeyModel.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/app/model/ApiKeyModel.java rename to code/services-core/control-service/java/nu/marginalia/control/app/model/ApiKeyModel.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/app/model/BlacklistedDomainModel.java b/code/services-core/control-service/java/nu/marginalia/control/app/model/BlacklistedDomainModel.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/app/model/BlacklistedDomainModel.java rename to code/services-core/control-service/java/nu/marginalia/control/app/model/BlacklistedDomainModel.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/app/model/DomainComplaintCategory.java b/code/services-core/control-service/java/nu/marginalia/control/app/model/DomainComplaintCategory.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/app/model/DomainComplaintCategory.java rename to code/services-core/control-service/java/nu/marginalia/control/app/model/DomainComplaintCategory.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/app/model/DomainComplaintModel.java b/code/services-core/control-service/java/nu/marginalia/control/app/model/DomainComplaintModel.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/app/model/DomainComplaintModel.java rename to code/services-core/control-service/java/nu/marginalia/control/app/model/DomainComplaintModel.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/app/svc/ApiKeyService.java b/code/services-core/control-service/java/nu/marginalia/control/app/svc/ApiKeyService.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/app/svc/ApiKeyService.java rename to code/services-core/control-service/java/nu/marginalia/control/app/svc/ApiKeyService.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/app/svc/ControlBlacklistService.java b/code/services-core/control-service/java/nu/marginalia/control/app/svc/ControlBlacklistService.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/app/svc/ControlBlacklistService.java rename to code/services-core/control-service/java/nu/marginalia/control/app/svc/ControlBlacklistService.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/app/svc/DomainComplaintService.java b/code/services-core/control-service/java/nu/marginalia/control/app/svc/DomainComplaintService.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/app/svc/DomainComplaintService.java rename to code/services-core/control-service/java/nu/marginalia/control/app/svc/DomainComplaintService.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/app/svc/RandomExplorationService.java b/code/services-core/control-service/java/nu/marginalia/control/app/svc/RandomExplorationService.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/app/svc/RandomExplorationService.java rename to code/services-core/control-service/java/nu/marginalia/control/app/svc/RandomExplorationService.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/app/svc/SearchToBanService.java b/code/services-core/control-service/java/nu/marginalia/control/app/svc/SearchToBanService.java similarity index 80% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/app/svc/SearchToBanService.java rename to code/services-core/control-service/java/nu/marginalia/control/app/svc/SearchToBanService.java index 6ff198da..f9487e22 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/app/svc/SearchToBanService.java +++ b/code/services-core/control-service/java/nu/marginalia/control/app/svc/SearchToBanService.java @@ -1,14 +1,12 @@ package nu.marginalia.control.app.svc; import com.google.inject.Inject; -import nu.marginalia.client.Context; +import nu.marginalia.api.searchquery.QueryClient; +import nu.marginalia.api.searchquery.model.query.QueryParams; import nu.marginalia.control.ControlRendererFactory; -import nu.marginalia.index.client.model.query.SearchSetIdentifier; import nu.marginalia.index.query.limit.QueryLimits; import nu.marginalia.model.EdgeUrl; import nu.marginalia.nodecfg.NodeConfigurationService; -import nu.marginalia.query.client.QueryClient; -import nu.marginalia.query.model.QueryParams; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import spark.Request; @@ -49,17 +47,17 @@ public class SearchToBanService { if (Objects.equals(request.requestMethod(), "POST")) { executeBlacklisting(request); - return findResults(Context.fromRequest(request), request.queryParams("query")); + return findResults(request.queryParams("query")); } - return findResults(Context.fromRequest(request), request.queryParams("q")); + return findResults(request.queryParams("q")); } - private Object findResults(Context ctx, String q) { + private Object findResults(String q) { if (q == null || q.isBlank()) { return Map.of(); } else { - return executeQuery(ctx, q); + return executeQuery(q); } } @@ -76,8 +74,8 @@ public class SearchToBanService { } } - private Object executeQuery(Context ctx, String query) { - return queryClient.search(ctx, new QueryParams( + private Object executeQuery(String query) { + return queryClient.search(new QueryParams( query, new QueryLimits(2, 200, 250, 8192), "NONE" )); diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/node/model/FileStorageBaseWithStorage.java b/code/services-core/control-service/java/nu/marginalia/control/node/model/FileStorageBaseWithStorage.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/node/model/FileStorageBaseWithStorage.java rename to code/services-core/control-service/java/nu/marginalia/control/node/model/FileStorageBaseWithStorage.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/node/model/FileStorageFileModel.java b/code/services-core/control-service/java/nu/marginalia/control/node/model/FileStorageFileModel.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/node/model/FileStorageFileModel.java rename to code/services-core/control-service/java/nu/marginalia/control/node/model/FileStorageFileModel.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/node/model/FileStorageWithActions.java b/code/services-core/control-service/java/nu/marginalia/control/node/model/FileStorageWithActions.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/node/model/FileStorageWithActions.java rename to code/services-core/control-service/java/nu/marginalia/control/node/model/FileStorageWithActions.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/node/model/FileStorageWithRelatedEntries.java b/code/services-core/control-service/java/nu/marginalia/control/node/model/FileStorageWithRelatedEntries.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/node/model/FileStorageWithRelatedEntries.java rename to code/services-core/control-service/java/nu/marginalia/control/node/model/FileStorageWithRelatedEntries.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/node/model/IndexNodeStatus.java b/code/services-core/control-service/java/nu/marginalia/control/node/model/IndexNodeStatus.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/node/model/IndexNodeStatus.java rename to code/services-core/control-service/java/nu/marginalia/control/node/model/IndexNodeStatus.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/node/svc/ControlFileStorageService.java b/code/services-core/control-service/java/nu/marginalia/control/node/svc/ControlFileStorageService.java similarity index 93% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/node/svc/ControlFileStorageService.java rename to code/services-core/control-service/java/nu/marginalia/control/node/svc/ControlFileStorageService.java index 7de00d0a..b385e32e 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/node/svc/ControlFileStorageService.java +++ b/code/services-core/control-service/java/nu/marginalia/control/node/svc/ControlFileStorageService.java @@ -3,7 +3,6 @@ package nu.marginalia.control.node.svc; import com.google.inject.Inject; import com.google.inject.Singleton; import lombok.SneakyThrows; -import nu.marginalia.client.Context; import nu.marginalia.control.Redirects; import nu.marginalia.executor.client.ExecutorClient; import nu.marginalia.storage.FileStorageService; @@ -16,8 +15,6 @@ import spark.Response; import spark.Spark; import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; import java.sql.SQLException; @Singleton @@ -27,7 +24,8 @@ public class ControlFileStorageService { private final Logger logger = LoggerFactory.getLogger(getClass()); @Inject - public ControlFileStorageService(FileStorageService fileStorageService, ExecutorClient executorClient) + public ControlFileStorageService(FileStorageService fileStorageService, + ExecutorClient executorClient) { this.fileStorageService = fileStorageService; this.executorClient = executorClient; @@ -75,7 +73,7 @@ public class ControlFileStorageService { else response.type("application/octet-stream"); - executorClient.transferFile(Context.fromRequest(request), nodeId, fileStorageId, path, response.raw().getOutputStream()); + executorClient.transferFile(nodeId, fileStorageId, path, response.raw().getOutputStream()); return ""; } diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/node/svc/ControlNodeActionsService.java b/code/services-core/control-service/java/nu/marginalia/control/node/svc/ControlNodeActionsService.java similarity index 85% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/node/svc/ControlNodeActionsService.java rename to code/services-core/control-service/java/nu/marginalia/control/node/svc/ControlNodeActionsService.java index 05de9b07..2ae09234 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/node/svc/ControlNodeActionsService.java +++ b/code/services-core/control-service/java/nu/marginalia/control/node/svc/ControlNodeActionsService.java @@ -5,7 +5,10 @@ import com.google.inject.Singleton; import nu.marginalia.control.ControlValidationError; import nu.marginalia.control.RedirectControl; import nu.marginalia.executor.client.ExecutorClient; -import nu.marginalia.index.client.IndexClient; +import nu.marginalia.executor.client.ExecutorCrawlClient; +import nu.marginalia.executor.client.ExecutorExportClient; +import nu.marginalia.executor.client.ExecutorSideloadClient; +import nu.marginalia.index.api.IndexMqClient; import nu.marginalia.service.control.ServiceEventLog; import nu.marginalia.storage.FileStorageService; import nu.marginalia.storage.model.FileStorageId; @@ -26,26 +29,32 @@ import java.util.Set; @Singleton public class ControlNodeActionsService { private static final Logger logger = LoggerFactory.getLogger(ControlNodeActionsService.class); - private final IndexClient indexClient; + private final IndexMqClient indexMqClient; private final RedirectControl redirectControl; private final FileStorageService fileStorageService; private final ServiceEventLog eventLog; private final ExecutorClient executorClient; + private final ExecutorCrawlClient crawlClient; + private final ExecutorSideloadClient sideloadClient; + private final ExecutorExportClient exportClient; @Inject public ControlNodeActionsService(ExecutorClient executorClient, - IndexClient indexClient, + IndexMqClient indexMqClient, RedirectControl redirectControl, FileStorageService fileStorageService, - ServiceEventLog eventLog) + ServiceEventLog eventLog, ExecutorCrawlClient crawlClient, ExecutorSideloadClient sideloadClient, ExecutorExportClient exportClient) { this.executorClient = executorClient; - this.indexClient = indexClient; + this.indexMqClient = indexMqClient; this.redirectControl = redirectControl; this.fileStorageService = fileStorageService; this.eventLog = eventLog; + this.crawlClient = crawlClient; + this.sideloadClient = sideloadClient; + this.exportClient = exportClient; } public void register() { @@ -127,7 +136,7 @@ public class ControlNodeActionsService { eventLog.logEvent("USER-ACTION", "SIDELOAD ENCYCLOPEDIA " + nodeId); - executorClient.sideloadEncyclopedia(nodeId, sourcePath, baseUrl); + sideloadClient.sideloadEncyclopedia(nodeId, sourcePath, baseUrl); return ""; } @@ -140,7 +149,7 @@ public class ControlNodeActionsService { eventLog.logEvent("USER-ACTION", "SIDELOAD DIRTREE " + nodeId); - executorClient.sideloadDirtree(nodeId, sourcePath); + sideloadClient.sideloadDirtree(nodeId, sourcePath); return ""; } @@ -152,7 +161,7 @@ public class ControlNodeActionsService { eventLog.logEvent("USER-ACTION", "SIDELOAD REDDIT " + nodeId); - executorClient.sideloadReddit(nodeId, sourcePath); + sideloadClient.sideloadReddit(nodeId, sourcePath); return ""; } @@ -163,7 +172,7 @@ public class ControlNodeActionsService { eventLog.logEvent("USER-ACTION", "SIDELOAD WARC " + nodeId); - executorClient.sideloadWarc(nodeId, sourcePath); + sideloadClient.sideloadWarc(nodeId, sourcePath); return ""; } @@ -178,13 +187,13 @@ public class ControlNodeActionsService { eventLog.logEvent("USER-ACTION", "SIDELOAD STACKEXCHANGE " + nodeId); - executorClient.sideloadStackexchange(nodeId, sourcePath); + sideloadClient.sideloadStackexchange(nodeId, sourcePath); return ""; } public Object triggerRepartition(Request request, Response response) throws Exception { - indexClient.triggerRepartition(Integer.parseInt(request.params("node"))); + indexMqClient.triggerRepartition(Integer.parseInt(request.params("node"))); return ""; } @@ -196,7 +205,7 @@ public class ControlNodeActionsService { changeActiveStorage(nodeId, FileStorageType.CRAWL_DATA, toCrawl); - executorClient.triggerRecrawl( + crawlClient.triggerRecrawl( nodeId, toCrawl ); @@ -211,7 +220,7 @@ public class ControlNodeActionsService { changeActiveStorage(nodeId, FileStorageType.CRAWL_SPEC, toCrawl); - executorClient.triggerCrawl(nodeId, toCrawl); + crawlClient.triggerCrawl(nodeId, toCrawl); return ""; } @@ -224,10 +233,10 @@ public class ControlNodeActionsService { changeActiveStorage(nodeId, FileStorageType.PROCESSED_DATA, toProcess); if (isAutoload) { - executorClient.triggerConvertAndLoad(nodeId, toProcess); + crawlClient.triggerConvertAndLoad(nodeId, toProcess); } else { - executorClient.triggerConvert(nodeId, toProcess); + crawlClient.triggerConvert(nodeId, toProcess); } return ""; @@ -245,7 +254,7 @@ public class ControlNodeActionsService { changeActiveStorage(nodeId, FileStorageType.PROCESSED_DATA, ids.toArray(new FileStorageId[0])); - executorClient.loadProcessedData(nodeId, ids); + crawlClient.loadProcessedData(nodeId, ids); return ""; } @@ -287,13 +296,13 @@ public class ControlNodeActionsService { throw new ControlValidationError("No url specified", "A url must be specified", ".."); } - executorClient.createCrawlSpecFromDownload(nodeId, description, url); + crawlClient.createCrawlSpecFromDownload(nodeId, description, url); return ""; } private Object exportDbData(Request req, Response rsp) { - executorClient.exportData(Integer.parseInt(req.params("id"))); + exportClient.exportData(Integer.parseInt(req.params("id"))); return ""; } @@ -303,9 +312,9 @@ public class ControlNodeActionsService { FileStorageId source = parseSourceFileStorageId(req.queryParams("source")); switch (exportType) { - case "atags" -> executorClient.exportAtags(Integer.parseInt(req.params("id")), source); - case "rss" -> executorClient.exportRssFeeds(Integer.parseInt(req.params("id")), source); - case "termFreq" -> executorClient.exportTermFrequencies(Integer.parseInt(req.params("id")), source); + case "atags" -> exportClient.exportAtags(Integer.parseInt(req.params("id")), source); + case "rss" -> exportClient.exportRssFeeds(Integer.parseInt(req.params("id")), source); + case "termFreq" -> exportClient.exportTermFrequencies(Integer.parseInt(req.params("id")), source); default -> throw new ControlValidationError("No export type specified", "An export type must be specified", ".."); } @@ -317,7 +326,7 @@ public class ControlNodeActionsService { int size = Integer.parseInt(req.queryParams("size")); String name = req.queryParams("name"); - executorClient.exportSampleData(Integer.parseInt(req.params("id")), source, size, name); + exportClient.exportSampleData(Integer.parseInt(req.params("id")), source, size, name); return ""; } diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/node/svc/ControlNodeService.java b/code/services-core/control-service/java/nu/marginalia/control/node/svc/ControlNodeService.java similarity index 98% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/node/svc/ControlNodeService.java rename to code/services-core/control-service/java/nu/marginalia/control/node/svc/ControlNodeService.java index e4859eb3..95a0ec40 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/node/svc/ControlNodeService.java +++ b/code/services-core/control-service/java/nu/marginalia/control/node/svc/ControlNodeService.java @@ -3,8 +3,7 @@ package nu.marginalia.control.node.svc; import com.google.inject.Inject; import com.zaxxer.hikari.HikariDataSource; import lombok.SneakyThrows; -import nu.marginalia.client.Context; -import nu.marginalia.client.ServiceMonitors; +import nu.marginalia.service.ServiceMonitors; import nu.marginalia.control.ControlRendererFactory; import nu.marginalia.control.RedirectControl; import nu.marginalia.control.Redirects; @@ -211,7 +210,7 @@ public class ControlNodeService { private Object nodeStorageDetailsModel(Request request, Response response) throws SQLException { int nodeId = Integer.parseInt(request.params("id")); - var storage = getFileStorageWithRelatedEntries(Context.fromRequest(request), nodeId, FileStorageId.parse(request.queryParams("fid"))); + var storage = getFileStorageWithRelatedEntries(nodeId, FileStorageId.parse(request.queryParams("fid"))); String view = switch(storage.type()) { case BACKUP -> "backup"; @@ -390,7 +389,6 @@ public class ControlNodeService { public FileStorageWithRelatedEntries getFileStorageWithRelatedEntries( - Context context, int node, FileStorageId fileId ) throws SQLException { diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/model/AbortedProcess.java b/code/services-core/control-service/java/nu/marginalia/control/sys/model/AbortedProcess.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/sys/model/AbortedProcess.java rename to code/services-core/control-service/java/nu/marginalia/control/sys/model/AbortedProcess.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/model/EventLogEntry.java b/code/services-core/control-service/java/nu/marginalia/control/sys/model/EventLogEntry.java similarity index 96% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/sys/model/EventLogEntry.java rename to code/services-core/control-service/java/nu/marginalia/control/sys/model/EventLogEntry.java index 48bf2a07..1de52852 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/model/EventLogEntry.java +++ b/code/services-core/control-service/java/nu/marginalia/control/sys/model/EventLogEntry.java @@ -1,6 +1,5 @@ package nu.marginalia.control.sys.model; -import java.sql.Timestamp; import java.time.LocalDate; public record EventLogEntry( diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/model/EventLogServiceFilter.java b/code/services-core/control-service/java/nu/marginalia/control/sys/model/EventLogServiceFilter.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/sys/model/EventLogServiceFilter.java rename to code/services-core/control-service/java/nu/marginalia/control/sys/model/EventLogServiceFilter.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/model/EventLogTypeFilter.java b/code/services-core/control-service/java/nu/marginalia/control/sys/model/EventLogTypeFilter.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/sys/model/EventLogTypeFilter.java rename to code/services-core/control-service/java/nu/marginalia/control/sys/model/EventLogTypeFilter.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/model/MessageQueueEntry.java b/code/services-core/control-service/java/nu/marginalia/control/sys/model/MessageQueueEntry.java similarity index 97% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/sys/model/MessageQueueEntry.java rename to code/services-core/control-service/java/nu/marginalia/control/sys/model/MessageQueueEntry.java index bea285a1..1705a994 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/model/MessageQueueEntry.java +++ b/code/services-core/control-service/java/nu/marginalia/control/sys/model/MessageQueueEntry.java @@ -1,7 +1,5 @@ package nu.marginalia.control.sys.model; -import java.time.LocalDate; - public record MessageQueueEntry ( long id, long relatedId, diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/model/ProcessHeartbeat.java b/code/services-core/control-service/java/nu/marginalia/control/sys/model/ProcessHeartbeat.java similarity index 79% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/sys/model/ProcessHeartbeat.java rename to code/services-core/control-service/java/nu/marginalia/control/sys/model/ProcessHeartbeat.java index 7cf5205d..f2911ad2 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/model/ProcessHeartbeat.java +++ b/code/services-core/control-service/java/nu/marginalia/control/sys/model/ProcessHeartbeat.java @@ -20,9 +20,7 @@ public record ProcessHeartbeat( } public String progressStyle() { if ("RUNNING".equals(status) && progress != null) { - return """ - background: linear-gradient(90deg, #ccc 0%%, #ccc %d%%, #fff %d%%) - """.formatted(progress, progress, progress); + return STR."background: linear-gradient(90deg, #ccc 0%, #ccc \{progress}%, #fff \{progress}%)"; } return ""; } diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/model/ServiceHeartbeat.java b/code/services-core/control-service/java/nu/marginalia/control/sys/model/ServiceHeartbeat.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/sys/model/ServiceHeartbeat.java rename to code/services-core/control-service/java/nu/marginalia/control/sys/model/ServiceHeartbeat.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/model/TaskHeartbeat.java b/code/services-core/control-service/java/nu/marginalia/control/sys/model/TaskHeartbeat.java similarity index 77% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/sys/model/TaskHeartbeat.java rename to code/services-core/control-service/java/nu/marginalia/control/sys/model/TaskHeartbeat.java index 467ae493..203d36be 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/model/TaskHeartbeat.java +++ b/code/services-core/control-service/java/nu/marginalia/control/sys/model/TaskHeartbeat.java @@ -21,9 +21,7 @@ public record TaskHeartbeat( public String progressStyle() { if ("RUNNING".equals(status) && progress != null) { - return """ - background: linear-gradient(90deg, #ccc 0%%, #ccc %d%%, #fff %d%%) - """.formatted(progress, progress, progress); + return STR."background: linear-gradient(90deg, #ccc 0%, #ccc \{progress}%, #fff \{progress}%)"; } return ""; } diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/AbortedProcessService.java b/code/services-core/control-service/java/nu/marginalia/control/sys/svc/AbortedProcessService.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/AbortedProcessService.java rename to code/services-core/control-service/java/nu/marginalia/control/sys/svc/AbortedProcessService.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlDomainRankingSetsService.java b/code/services-core/control-service/java/nu/marginalia/control/sys/svc/ControlDomainRankingSetsService.java similarity index 98% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlDomainRankingSetsService.java rename to code/services-core/control-service/java/nu/marginalia/control/sys/svc/ControlDomainRankingSetsService.java index ca2c3bc7..e2adf14b 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlDomainRankingSetsService.java +++ b/code/services-core/control-service/java/nu/marginalia/control/sys/svc/ControlDomainRankingSetsService.java @@ -1,7 +1,6 @@ package nu.marginalia.control.sys.svc; import com.google.inject.Inject; -import com.zaxxer.hikari.HikariDataSource; import nu.marginalia.control.ControlRendererFactory; import nu.marginalia.control.ControlValidationError; import nu.marginalia.control.Redirects; diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlErrorHandler.java b/code/services-core/control-service/java/nu/marginalia/control/sys/svc/ControlErrorHandler.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlErrorHandler.java rename to code/services-core/control-service/java/nu/marginalia/control/sys/svc/ControlErrorHandler.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlSysActionsService.java b/code/services-core/control-service/java/nu/marginalia/control/sys/svc/ControlSysActionsService.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/ControlSysActionsService.java rename to code/services-core/control-service/java/nu/marginalia/control/sys/svc/ControlSysActionsService.java diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/DataSetsService.java b/code/services-core/control-service/java/nu/marginalia/control/sys/svc/DataSetsService.java similarity index 97% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/DataSetsService.java rename to code/services-core/control-service/java/nu/marginalia/control/sys/svc/DataSetsService.java index 491f1b62..d1636623 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/DataSetsService.java +++ b/code/services-core/control-service/java/nu/marginalia/control/sys/svc/DataSetsService.java @@ -2,7 +2,6 @@ package nu.marginalia.control.sys.svc; import com.google.inject.Inject; import com.google.inject.Singleton; -import com.zaxxer.hikari.HikariDataSource; import nu.marginalia.control.ControlRendererFactory; import nu.marginalia.db.DomainTypes; import spark.Request; diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/EventLogService.java b/code/services-core/control-service/java/nu/marginalia/control/sys/svc/EventLogService.java similarity index 99% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/EventLogService.java rename to code/services-core/control-service/java/nu/marginalia/control/sys/svc/EventLogService.java index 446b1c32..7f6a58ea 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/EventLogService.java +++ b/code/services-core/control-service/java/nu/marginalia/control/sys/svc/EventLogService.java @@ -11,7 +11,6 @@ import spark.Request; import spark.Response; import java.sql.SQLException; -import java.sql.Timestamp; import java.util.*; @Singleton diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/HeartbeatService.java b/code/services-core/control-service/java/nu/marginalia/control/sys/svc/HeartbeatService.java similarity index 99% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/HeartbeatService.java rename to code/services-core/control-service/java/nu/marginalia/control/sys/svc/HeartbeatService.java index 6fb9931e..875c4bd1 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/HeartbeatService.java +++ b/code/services-core/control-service/java/nu/marginalia/control/sys/svc/HeartbeatService.java @@ -6,7 +6,6 @@ import com.zaxxer.hikari.HikariDataSource; import nu.marginalia.control.sys.model.ProcessHeartbeat; import nu.marginalia.control.sys.model.ServiceHeartbeat; import nu.marginalia.control.sys.model.TaskHeartbeat; -import nu.marginalia.service.control.ServiceEventLog; import java.sql.SQLException; import java.util.ArrayList; diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/MessageQueueService.java b/code/services-core/control-service/java/nu/marginalia/control/sys/svc/MessageQueueService.java similarity index 100% rename from code/services-core/control-service/src/main/java/nu/marginalia/control/sys/svc/MessageQueueService.java rename to code/services-core/control-service/java/nu/marginalia/control/sys/svc/MessageQueueService.java diff --git a/code/services-core/control-service/readme.md b/code/services-core/control-service/readme.md index df5e2c5a..24958d95 100644 --- a/code/services-core/control-service/readme.md +++ b/code/services-core/control-service/readme.md @@ -1,11 +1,10 @@ # Control Service -The control service provides an operator's user interface. By default this interface is +The control service provides an operator's user interface. By default, this interface is exposed on port 8081. It does not offer any sort of access control or authentication. The control service will itself execute tasks that affect the entire system, but delegate -node-specific tasks to the corresponding [executor-service](../executor-service) via the -[executor-api](../../api/executor-api). +node-specific tasks to the corresponding to the [execution subsystem](../../execution). Conceptually the application is broken into three parts: @@ -15,7 +14,7 @@ Conceptually the application is broken into three parts: ## Central Classes -* [ControlService](src/main/java/nu/marginalia/control/ControlService.java) +* [ControlService](java/nu/marginalia/control/ControlService.java) ## See Also diff --git a/code/services-core/control-service/src/main/resources/static/control/noanimation.css b/code/services-core/control-service/resources/static/control/noanimation.css similarity index 100% rename from code/services-core/control-service/src/main/resources/static/control/noanimation.css rename to code/services-core/control-service/resources/static/control/noanimation.css diff --git a/code/services-core/control-service/src/main/resources/static/control/refresh.js b/code/services-core/control-service/resources/static/control/refresh.js similarity index 100% rename from code/services-core/control-service/src/main/resources/static/control/refresh.js rename to code/services-core/control-service/resources/static/control/refresh.js diff --git a/code/services-core/control-service/src/main/resources/static/control/tables.css b/code/services-core/control-service/resources/static/control/tables.css similarity index 100% rename from code/services-core/control-service/src/main/resources/static/control/tables.css rename to code/services-core/control-service/resources/static/control/tables.css diff --git a/code/services-core/control-service/src/main/resources/templates/control/actions.hdb b/code/services-core/control-service/resources/templates/control/actions.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/actions.hdb rename to code/services-core/control-service/resources/templates/control/actions.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/actor-details.hdb b/code/services-core/control-service/resources/templates/control/actor-details.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/actor-details.hdb rename to code/services-core/control-service/resources/templates/control/actor-details.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/app/api-keys.hdb b/code/services-core/control-service/resources/templates/control/app/api-keys.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/app/api-keys.hdb rename to code/services-core/control-service/resources/templates/control/app/api-keys.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/app/blacklist.hdb b/code/services-core/control-service/resources/templates/control/app/blacklist.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/app/blacklist.hdb rename to code/services-core/control-service/resources/templates/control/app/blacklist.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/app/domain-complaints.hdb b/code/services-core/control-service/resources/templates/control/app/domain-complaints.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/app/domain-complaints.hdb rename to code/services-core/control-service/resources/templates/control/app/domain-complaints.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/app/review-random-domains.hdb b/code/services-core/control-service/resources/templates/control/app/review-random-domains.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/app/review-random-domains.hdb rename to code/services-core/control-service/resources/templates/control/app/review-random-domains.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/app/search-to-ban.hdb b/code/services-core/control-service/resources/templates/control/app/search-to-ban.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/app/search-to-ban.hdb rename to code/services-core/control-service/resources/templates/control/app/search-to-ban.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/error.hdb b/code/services-core/control-service/resources/templates/control/error.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/error.hdb rename to code/services-core/control-service/resources/templates/control/error.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/index.hdb b/code/services-core/control-service/resources/templates/control/index.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/index.hdb rename to code/services-core/control-service/resources/templates/control/index.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-download-sample-data.hdb b/code/services-core/control-service/resources/templates/control/node/actions/partial-download-sample-data.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-download-sample-data.hdb rename to code/services-core/control-service/resources/templates/control/node/actions/partial-download-sample-data.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-export-db-data.hdb b/code/services-core/control-service/resources/templates/control/node/actions/partial-export-db-data.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-export-db-data.hdb rename to code/services-core/control-service/resources/templates/control/node/actions/partial-export-db-data.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-export-from-crawl-data.hdb b/code/services-core/control-service/resources/templates/control/node/actions/partial-export-from-crawl-data.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-export-from-crawl-data.hdb rename to code/services-core/control-service/resources/templates/control/node/actions/partial-export-from-crawl-data.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-export-sample-data.hdb b/code/services-core/control-service/resources/templates/control/node/actions/partial-export-sample-data.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-export-sample-data.hdb rename to code/services-core/control-service/resources/templates/control/node/actions/partial-export-sample-data.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-load.hdb b/code/services-core/control-service/resources/templates/control/node/actions/partial-load.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-load.hdb rename to code/services-core/control-service/resources/templates/control/node/actions/partial-load.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-new-crawl-specs.hdb b/code/services-core/control-service/resources/templates/control/node/actions/partial-new-crawl-specs.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-new-crawl-specs.hdb rename to code/services-core/control-service/resources/templates/control/node/actions/partial-new-crawl-specs.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-new-crawl.hdb b/code/services-core/control-service/resources/templates/control/node/actions/partial-new-crawl.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-new-crawl.hdb rename to code/services-core/control-service/resources/templates/control/node/actions/partial-new-crawl.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-process.hdb b/code/services-core/control-service/resources/templates/control/node/actions/partial-process.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-process.hdb rename to code/services-core/control-service/resources/templates/control/node/actions/partial-process.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-recrawl.hdb b/code/services-core/control-service/resources/templates/control/node/actions/partial-recrawl.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-recrawl.hdb rename to code/services-core/control-service/resources/templates/control/node/actions/partial-recrawl.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-repartition.hdb b/code/services-core/control-service/resources/templates/control/node/actions/partial-repartition.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-repartition.hdb rename to code/services-core/control-service/resources/templates/control/node/actions/partial-repartition.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-restore-backup.hdb b/code/services-core/control-service/resources/templates/control/node/actions/partial-restore-backup.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-restore-backup.hdb rename to code/services-core/control-service/resources/templates/control/node/actions/partial-restore-backup.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-sideload-dirtree.hdb b/code/services-core/control-service/resources/templates/control/node/actions/partial-sideload-dirtree.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-sideload-dirtree.hdb rename to code/services-core/control-service/resources/templates/control/node/actions/partial-sideload-dirtree.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-sideload-encyclopedia.hdb b/code/services-core/control-service/resources/templates/control/node/actions/partial-sideload-encyclopedia.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-sideload-encyclopedia.hdb rename to code/services-core/control-service/resources/templates/control/node/actions/partial-sideload-encyclopedia.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-sideload-reddit.hdb b/code/services-core/control-service/resources/templates/control/node/actions/partial-sideload-reddit.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-sideload-reddit.hdb rename to code/services-core/control-service/resources/templates/control/node/actions/partial-sideload-reddit.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-sideload-stackexchange.hdb b/code/services-core/control-service/resources/templates/control/node/actions/partial-sideload-stackexchange.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-sideload-stackexchange.hdb rename to code/services-core/control-service/resources/templates/control/node/actions/partial-sideload-stackexchange.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-sideload-warc.hdb b/code/services-core/control-service/resources/templates/control/node/actions/partial-sideload-warc.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/actions/partial-sideload-warc.hdb rename to code/services-core/control-service/resources/templates/control/node/actions/partial-sideload-warc.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/node-actions.hdb b/code/services-core/control-service/resources/templates/control/node/node-actions.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/node-actions.hdb rename to code/services-core/control-service/resources/templates/control/node/node-actions.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/node-actors.hdb b/code/services-core/control-service/resources/templates/control/node/node-actors.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/node-actors.hdb rename to code/services-core/control-service/resources/templates/control/node/node-actors.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/node-config.hdb b/code/services-core/control-service/resources/templates/control/node/node-config.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/node-config.hdb rename to code/services-core/control-service/resources/templates/control/node/node-config.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/node-overview.hdb b/code/services-core/control-service/resources/templates/control/node/node-overview.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/node-overview.hdb rename to code/services-core/control-service/resources/templates/control/node/node-overview.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/node-storage-conf.hdb b/code/services-core/control-service/resources/templates/control/node/node-storage-conf.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/node-storage-conf.hdb rename to code/services-core/control-service/resources/templates/control/node/node-storage-conf.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/node-storage-details.hdb b/code/services-core/control-service/resources/templates/control/node/node-storage-details.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/node-storage-details.hdb rename to code/services-core/control-service/resources/templates/control/node/node-storage-details.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/node-storage-list.hdb b/code/services-core/control-service/resources/templates/control/node/node-storage-list.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/node-storage-list.hdb rename to code/services-core/control-service/resources/templates/control/node/node-storage-list.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/nodes-list.hdb b/code/services-core/control-service/resources/templates/control/node/nodes-list.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/nodes-list.hdb rename to code/services-core/control-service/resources/templates/control/node/nodes-list.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/partial-node-nav.hdb b/code/services-core/control-service/resources/templates/control/node/partial-node-nav.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/node/partial-node-nav.hdb rename to code/services-core/control-service/resources/templates/control/node/partial-node-nav.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/partials/actor-summary-table.hdb b/code/services-core/control-service/resources/templates/control/partials/actor-summary-table.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/partials/actor-summary-table.hdb rename to code/services-core/control-service/resources/templates/control/partials/actor-summary-table.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/partials/actors-table.hdb b/code/services-core/control-service/resources/templates/control/partials/actors-table.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/partials/actors-table.hdb rename to code/services-core/control-service/resources/templates/control/partials/actors-table.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/partials/events-table-summary.hdb b/code/services-core/control-service/resources/templates/control/partials/events-table-summary.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/partials/events-table-summary.hdb rename to code/services-core/control-service/resources/templates/control/partials/events-table-summary.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/partials/events-table.hdb b/code/services-core/control-service/resources/templates/control/partials/events-table.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/partials/events-table.hdb rename to code/services-core/control-service/resources/templates/control/partials/events-table.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/partials/foot-includes.hdb b/code/services-core/control-service/resources/templates/control/partials/foot-includes.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/partials/foot-includes.hdb rename to code/services-core/control-service/resources/templates/control/partials/foot-includes.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/partials/head-includes.hdb b/code/services-core/control-service/resources/templates/control/partials/head-includes.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/partials/head-includes.hdb rename to code/services-core/control-service/resources/templates/control/partials/head-includes.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/partials/message-queue-table.hdb b/code/services-core/control-service/resources/templates/control/partials/message-queue-table.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/partials/message-queue-table.hdb rename to code/services-core/control-service/resources/templates/control/partials/message-queue-table.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/partials/nav.hdb b/code/services-core/control-service/resources/templates/control/partials/nav.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/partials/nav.hdb rename to code/services-core/control-service/resources/templates/control/partials/nav.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/partials/nodes-table.hdb b/code/services-core/control-service/resources/templates/control/partials/nodes-table.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/partials/nodes-table.hdb rename to code/services-core/control-service/resources/templates/control/partials/nodes-table.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/partials/processes-table.hdb b/code/services-core/control-service/resources/templates/control/partials/processes-table.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/partials/processes-table.hdb rename to code/services-core/control-service/resources/templates/control/partials/processes-table.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/partials/services-table.hdb b/code/services-core/control-service/resources/templates/control/partials/services-table.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/partials/services-table.hdb rename to code/services-core/control-service/resources/templates/control/partials/services-table.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/partials/storage-details/files.hdb b/code/services-core/control-service/resources/templates/control/partials/storage-details/files.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/partials/storage-details/files.hdb rename to code/services-core/control-service/resources/templates/control/partials/storage-details/files.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/partials/storage-details/related.hdb b/code/services-core/control-service/resources/templates/control/partials/storage-details/related.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/partials/storage-details/related.hdb rename to code/services-core/control-service/resources/templates/control/partials/storage-details/related.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/partials/storage-types.hdb b/code/services-core/control-service/resources/templates/control/partials/storage-types.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/partials/storage-types.hdb rename to code/services-core/control-service/resources/templates/control/partials/storage-types.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/redirect-ok.hdb b/code/services-core/control-service/resources/templates/control/redirect-ok.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/redirect-ok.hdb rename to code/services-core/control-service/resources/templates/control/redirect-ok.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/sys/aborted-processes.hdb b/code/services-core/control-service/resources/templates/control/sys/aborted-processes.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/sys/aborted-processes.hdb rename to code/services-core/control-service/resources/templates/control/sys/aborted-processes.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/sys/data-sets.hdb b/code/services-core/control-service/resources/templates/control/sys/data-sets.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/sys/data-sets.hdb rename to code/services-core/control-service/resources/templates/control/sys/data-sets.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/sys/domain-ranking-sets.hdb b/code/services-core/control-service/resources/templates/control/sys/domain-ranking-sets.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/sys/domain-ranking-sets.hdb rename to code/services-core/control-service/resources/templates/control/sys/domain-ranking-sets.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/sys/events.hdb b/code/services-core/control-service/resources/templates/control/sys/events.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/sys/events.hdb rename to code/services-core/control-service/resources/templates/control/sys/events.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/sys/message-queue.hdb b/code/services-core/control-service/resources/templates/control/sys/message-queue.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/sys/message-queue.hdb rename to code/services-core/control-service/resources/templates/control/sys/message-queue.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/sys/new-domain-ranking-set.hdb b/code/services-core/control-service/resources/templates/control/sys/new-domain-ranking-set.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/sys/new-domain-ranking-set.hdb rename to code/services-core/control-service/resources/templates/control/sys/new-domain-ranking-set.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/sys/new-message.hdb b/code/services-core/control-service/resources/templates/control/sys/new-message.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/sys/new-message.hdb rename to code/services-core/control-service/resources/templates/control/sys/new-message.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/sys/service-by-id.hdb b/code/services-core/control-service/resources/templates/control/sys/service-by-id.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/sys/service-by-id.hdb rename to code/services-core/control-service/resources/templates/control/sys/service-by-id.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/sys/sys-actions.hdb b/code/services-core/control-service/resources/templates/control/sys/sys-actions.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/sys/sys-actions.hdb rename to code/services-core/control-service/resources/templates/control/sys/sys-actions.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/sys/update-domain-ranking-set.hdb b/code/services-core/control-service/resources/templates/control/sys/update-domain-ranking-set.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/sys/update-domain-ranking-set.hdb rename to code/services-core/control-service/resources/templates/control/sys/update-domain-ranking-set.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/sys/update-message-state.hdb b/code/services-core/control-service/resources/templates/control/sys/update-message-state.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/sys/update-message-state.hdb rename to code/services-core/control-service/resources/templates/control/sys/update-message-state.hdb diff --git a/code/services-core/control-service/src/main/resources/templates/control/sys/view-message.hdb b/code/services-core/control-service/resources/templates/control/sys/view-message.hdb similarity index 100% rename from code/services-core/control-service/src/main/resources/templates/control/sys/view-message.hdb rename to code/services-core/control-service/resources/templates/control/sys/view-message.hdb diff --git a/code/services-core/control-service/src/test/java/nu/marginalia/control/svc/ApiKeyServiceTest.java b/code/services-core/control-service/test/nu/marginalia/control/svc/ApiKeyServiceTest.java similarity index 100% rename from code/services-core/control-service/src/test/java/nu/marginalia/control/svc/ApiKeyServiceTest.java rename to code/services-core/control-service/test/nu/marginalia/control/svc/ApiKeyServiceTest.java diff --git a/code/services-core/control-service/src/test/java/nu/marginalia/control/svc/HeartbeatServiceTest.java b/code/services-core/control-service/test/nu/marginalia/control/svc/HeartbeatServiceTest.java similarity index 93% rename from code/services-core/control-service/src/test/java/nu/marginalia/control/svc/HeartbeatServiceTest.java rename to code/services-core/control-service/test/nu/marginalia/control/svc/HeartbeatServiceTest.java index 0d6c52ab..8593d20f 100644 --- a/code/services-core/control-service/src/test/java/nu/marginalia/control/svc/HeartbeatServiceTest.java +++ b/code/services-core/control-service/test/nu/marginalia/control/svc/HeartbeatServiceTest.java @@ -4,22 +4,17 @@ import com.zaxxer.hikari.HikariConfig; import com.zaxxer.hikari.HikariDataSource; import nu.marginalia.control.sys.model.TaskHeartbeat; import nu.marginalia.control.sys.svc.HeartbeatService; -import nu.marginalia.service.control.ServiceEventLog; import nu.marginalia.test.TestMigrationLoader; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.parallel.Execution; -import org.mockito.Mockito; import org.testcontainers.containers.MariaDBContainer; import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; -import java.io.IOException; import java.sql.SQLException; -import java.util.List; -import java.util.Objects; import static org.junit.jupiter.api.Assertions.*; import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; diff --git a/code/services-core/executor-service/build.gradle b/code/services-core/executor-service/build.gradle index f2d9678d..69ad7d1f 100644 --- a/code/services-core/executor-service/build.gradle +++ b/code/services-core/executor-service/build.gradle @@ -1,9 +1,9 @@ plugins { id 'java' - id 'com.palantir.docker' version '0.35.0' id 'application' id 'jvm-test-suite' + id 'com.google.cloud.tools.jib' version '3.4.0' } application { @@ -13,7 +13,7 @@ application { tasks.distZip.enabled = false -apply from: "$rootProject.projectDir/docker-service-with-dist.gradle" + java { toolchain { @@ -21,7 +21,18 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" +apply from: "$rootProject.projectDir/docker.gradle" dependencies { + // These look weird but they're needed to be able to spawn the processes + // from the executor service + + implementation project(':code:processes:website-adjacencies-calculator') + implementation project(':code:processes:crawling-process') + implementation project(':code:processes:loading-process') + implementation project(':code:processes:converting-process') + implementation project(':code:processes:index-constructor-process') + implementation project(':code:common:config') implementation project(':code:common:model') implementation project(':code:common:process') @@ -29,24 +40,27 @@ dependencies { implementation project(':code:common:linkdb') implementation project(':code:common:service') - implementation project(':code:common:service-client') implementation project(':code:common:service-discovery') implementation project(':third-party:commons-codec') implementation project(':code:libraries:message-queue') + implementation project(':code:functions:link-graph:api') + implementation project(':code:process-models:crawl-spec') implementation project(':code:process-models:crawling-model') implementation project(':code:features-crawl:link-parser') implementation project(':code:features-convert:data-extractors') implementation project(':code:features-convert:stackexchange-xml') implementation project(':code:features-convert:reddit-json') - implementation project(':code:features-index:index-journal') - implementation project(':code:api:index-api') - implementation project(':code:api:query-api') - implementation project(':code:api:process-mqapi') - implementation project(':code:api:executor-api') + implementation project(':code:index:index-journal') + implementation project(':code:index:api') + implementation project(':code:process-mqapi') + + implementation project(':code:execution') + implementation project(':code:execution:api') + implementation project(':third-party:encyclopedia-marginalia-nu') implementation libs.bundles.slf4j @@ -59,7 +73,6 @@ dependencies { implementation libs.guice implementation libs.trove implementation libs.protobuf - implementation libs.rxjava implementation libs.zstd implementation libs.jsoup implementation libs.commons.io diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/ExecutorMain.java b/code/services-core/executor-service/java/nu/marginalia/executor/ExecutorMain.java similarity index 86% rename from code/services-core/executor-service/src/main/java/nu/marginalia/executor/ExecutorMain.java rename to code/services-core/executor-service/java/nu/marginalia/executor/ExecutorMain.java index 9d9d7d2b..86798d86 100644 --- a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/ExecutorMain.java +++ b/code/services-core/executor-service/java/nu/marginalia/executor/ExecutorMain.java @@ -4,7 +4,7 @@ import com.google.inject.Guice; import com.google.inject.Inject; import com.google.inject.Injector; import nu.marginalia.service.MainClass; -import nu.marginalia.service.SearchServiceDescriptors; +import nu.marginalia.service.ServiceDiscoveryModule; import nu.marginalia.service.id.ServiceId; import nu.marginalia.service.module.DatabaseModule; import nu.marginalia.service.module.ServiceConfigurationModule; @@ -25,7 +25,8 @@ public class ExecutorMain extends MainClass { Injector injector = Guice.createInjector( new ExecutorModule(), new DatabaseModule(false), - new ServiceConfigurationModule(SearchServiceDescriptors.descriptors, ServiceId.Executor) + new ServiceDiscoveryModule(), + new ServiceConfigurationModule(ServiceId.Executor) ); injector.getInstance(NodeStatusWatcher.class); diff --git a/code/services-core/executor-service/java/nu/marginalia/executor/ExecutorModule.java b/code/services-core/executor-service/java/nu/marginalia/executor/ExecutorModule.java new file mode 100644 index 00000000..d49e2a56 --- /dev/null +++ b/code/services-core/executor-service/java/nu/marginalia/executor/ExecutorModule.java @@ -0,0 +1,8 @@ +package nu.marginalia.executor; + +import com.google.inject.AbstractModule; + +public class ExecutorModule extends AbstractModule { + public void configure() { + } +} diff --git a/code/services-core/executor-service/java/nu/marginalia/executor/ExecutorSvc.java b/code/services-core/executor-service/java/nu/marginalia/executor/ExecutorSvc.java new file mode 100644 index 00000000..a84eebd3 --- /dev/null +++ b/code/services-core/executor-service/java/nu/marginalia/executor/ExecutorSvc.java @@ -0,0 +1,75 @@ +package nu.marginalia.executor; + +import com.google.inject.Inject; +import nu.marginalia.execution.*; +import nu.marginalia.service.discovery.property.ServicePartition; +import nu.marginalia.service.server.BaseServiceParams; +import nu.marginalia.service.server.Service; +import nu.marginalia.service.server.mq.MqRequest; +import nu.marginalia.storage.FileStorageService; +import nu.marginalia.storage.model.FileStorageId; +import org.apache.commons.io.FileUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Request; +import spark.Response; +import spark.Spark; + +import java.io.IOException; +import java.nio.file.Path; +import java.sql.SQLException; +import java.util.List; + +// Weird name for this one to not have clashes with java.util.concurrent.ExecutorService +public class ExecutorSvc extends Service { + + private static final Logger logger = LoggerFactory.getLogger(ExecutorSvc.class); + private final ExecutionInit executionInit; + private final FileStorageService fileStorageService; + + @Inject + public ExecutorSvc(BaseServiceParams params, + ExecutorGrpcService executorGrpcService, + ExecutorCrawlGrpcService executorCrawlGrpcService, + ExecutorSideloadGrpcService executorSideloadGrpcService, + ExecutorExportGrpcService executorExportGrpcService, + ExecutionInit executionInit, + FileStorageService fileStorageService) + { + super(params, + ServicePartition.partition(params.configuration.node()), + List.of(executorGrpcService, + executorCrawlGrpcService, + executorSideloadGrpcService, + executorExportGrpcService) + ); + + this.executionInit = executionInit; + this.fileStorageService = fileStorageService; + + Spark.get("/transfer/file/:fid", this::transferFile); + } + + @MqRequest(endpoint="FIRST-BOOT") + public void setUpDefaultActors(String message) throws Exception { + logger.info("Initializing default actors"); + + executionInit.initDefaultActors(); + } + + /** Allows transfer of files from each partition */ + private Object transferFile(Request request, Response response) throws SQLException, IOException { + FileStorageId fileStorageId = FileStorageId.parse(request.params("fid")); + + var fileStorage = fileStorageService.getStorage(fileStorageId); + + Path basePath = fileStorage.asPath(); + // This is not a public API so injection isn't a concern + Path filePath = basePath.resolve(request.queryParams("path")); + + response.type("application/octet-stream"); + FileUtils.copyFile(filePath.toFile(), response.raw().getOutputStream()); + return ""; + } + +} diff --git a/code/services-core/executor-service/readme.md b/code/services-core/executor-service/readme.md index 33e612df..280defd1 100644 --- a/code/services-core/executor-service/readme.md +++ b/code/services-core/executor-service/readme.md @@ -1,16 +1,10 @@ The executor service is a partitioned service responsible for executing and keeping -track of long running maintenance and operational tasks, such as crawling or data -processing. +track of long-running maintenance and operational tasks, such as crawling or data +processing. -It accomplishes this using the [message queue and actor library](../../libraries/message-queue/), -which permits program state to survive crashes and reboots. The executor service is closely -linked to the [control-service](../control-service), which provides a user interface for -much of the executor's functionality. +The executor service is closely linked to the [control-service](../control-service), +which provides a user interface for much of the executor's functionality. -## Central Classes +The service it itself relatively bare of code, but imports and exposes the [execution subsystem](../../execution), +which is responsible for the actual execution of tasks. -* [ExecutorActorControlService](src/main/java/nu/marginalia/actor/ExecutorActorControlService.java) - -## See Also - -* [api/executor-api](../../api/executor-api) \ No newline at end of file diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/ExecutorGrpcService.java b/code/services-core/executor-service/src/main/java/nu/marginalia/executor/ExecutorGrpcService.java deleted file mode 100644 index e0969196..00000000 --- a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/ExecutorGrpcService.java +++ /dev/null @@ -1,325 +0,0 @@ -package nu.marginalia.executor; - -import com.google.inject.Inject; -import io.grpc.stub.StreamObserver; -import nu.marginalia.actor.ActorApi; -import nu.marginalia.executor.api.*; -import nu.marginalia.executor.svc.*; - -public class ExecutorGrpcService extends ExecutorApiGrpc.ExecutorApiImplBase { - private final ActorApi actorApi; - private final ExportService exportService; - private final SideloadService sideloadService; - private final BackupService backupService; - private final TransferService transferService; - private final ProcessingService processingService; - - @Inject - public ExecutorGrpcService(ActorApi actorApi, - ExportService exportService, - SideloadService sideloadService, - BackupService backupService, - TransferService transferService, - ProcessingService processingService) - { - this.actorApi = actorApi; - this.exportService = exportService; - this.sideloadService = sideloadService; - this.backupService = backupService; - this.transferService = transferService; - this.processingService = processingService; - } - - @Override - public void startFsm(RpcFsmName request, StreamObserver responseObserver) { - try { - actorApi.startActor(request); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void stopFsm(RpcFsmName request, StreamObserver responseObserver) { - try { - actorApi.stopActor(request); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void stopProcess(RpcProcessId request, StreamObserver responseObserver) { - try { - actorApi.stopProcess(request); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void triggerCrawl(RpcFileStorageId request, StreamObserver responseObserver) { - try { - processingService.startCrawl(request); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void triggerRecrawl(RpcFileStorageId request, StreamObserver responseObserver) { - try { - processingService.startRecrawl(request); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void triggerConvert(RpcFileStorageId request, StreamObserver responseObserver) { - try { - processingService.startConversion(request); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void triggerConvertAndLoad(RpcFileStorageId request, StreamObserver responseObserver) { - try { - processingService.startConvertLoad(request); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void loadProcessedData(RpcFileStorageIds request, StreamObserver responseObserver) { - try { - processingService.startLoad(request); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void calculateAdjacencies(Empty request, StreamObserver responseObserver) { - try { - processingService.startAdjacencyCalculation(); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void sideloadEncyclopedia(RpcSideloadEncyclopedia request, StreamObserver responseObserver) { - try { - sideloadService.sideloadEncyclopedia(request); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void sideloadDirtree(RpcSideloadDirtree request, StreamObserver responseObserver) { - try { - sideloadService.sideloadDirtree(request); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void sideloadReddit(RpcSideloadReddit request, StreamObserver responseObserver) { - try { - sideloadService.sideloadReddit(request); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void sideloadWarc(RpcSideloadWarc request, StreamObserver responseObserver) { - try { - sideloadService.sideloadWarc(request); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void sideloadStackexchange(RpcSideloadStackexchange request, StreamObserver responseObserver) { - try { - sideloadService.sideloadStackexchange(request); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void createCrawlSpecFromDownload(RpcCrawlSpecFromDownload request, StreamObserver responseObserver) { - try { - processingService.createCrawlSpecFromDownload(request); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void exportAtags(RpcFileStorageId request, StreamObserver responseObserver) { - try { - exportService.exportAtags(request); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void exportSampleData(RpcExportSampleData request, StreamObserver responseObserver) { - try { - exportService.exportSampleData(request); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void exportRssFeeds(RpcFileStorageId request, StreamObserver responseObserver) { - try { - exportService.exportFeeds(request); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void exportTermFrequencies(RpcFileStorageId request, StreamObserver responseObserver) { - try { - exportService.exportTermFrequencies(request); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void downloadSampleData(RpcDownloadSampleData request, StreamObserver responseObserver) { - try { - sideloadService.downloadSampleData(request); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void exportData(Empty request, StreamObserver responseObserver) { - try { - exportService.exportData(); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - - } - - @Override - public void restoreBackup(RpcFileStorageId request, StreamObserver responseObserver) { - try { - backupService.restore(request); - responseObserver.onNext(Empty.getDefaultInstance()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void getActorStates(Empty request, StreamObserver responseObserver) { - responseObserver.onNext(actorApi.getActorStates()); - responseObserver.onCompleted(); - } - - @Override - public void listSideloadDir(Empty request, StreamObserver responseObserver) { - try { - responseObserver.onNext(sideloadService.listUploadDir()); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } - - @Override - public void listFileStorage(RpcFileStorageId request, StreamObserver responseObserver) { - try { - responseObserver.onNext(transferService.listFiles(request)); - responseObserver.onCompleted(); - } - catch (Exception e) { - responseObserver.onError(e); - } - } -} diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/ExecutorModule.java b/code/services-core/executor-service/src/main/java/nu/marginalia/executor/ExecutorModule.java deleted file mode 100644 index 61619823..00000000 --- a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/ExecutorModule.java +++ /dev/null @@ -1,13 +0,0 @@ -package nu.marginalia.executor; - -import com.google.inject.AbstractModule; -import com.google.inject.name.Names; - -import java.nio.file.Path; - -public class ExecutorModule extends AbstractModule { - public void configure() { - String dist = System.getProperty("distPath", System.getProperty("WMSA_HOME", "/var/lib/wmsa") + "/dist/current"); - bind(Path.class).annotatedWith(Names.named("distPath")).toInstance(Path.of(dist)); - } -} diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/ExecutorSvc.java b/code/services-core/executor-service/src/main/java/nu/marginalia/executor/ExecutorSvc.java deleted file mode 100644 index ac567467..00000000 --- a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/ExecutorSvc.java +++ /dev/null @@ -1,86 +0,0 @@ -package nu.marginalia.executor; - -import com.google.gson.Gson; -import com.google.inject.Inject; -import io.grpc.ServerBuilder; -import nu.marginalia.actor.ExecutorActor; -import nu.marginalia.actor.ExecutorActorControlService; -import nu.marginalia.executor.svc.TransferService; -import nu.marginalia.service.server.BaseServiceParams; -import nu.marginalia.service.server.Service; -import nu.marginalia.service.server.mq.MqRequest; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import spark.Spark; - -import java.io.IOException; -import java.sql.SQLException; - -// Weird name for this one to not have clashes with java.util.concurrent.ExecutorService -public class ExecutorSvc extends Service { - private final BaseServiceParams params; - private final Gson gson; - private final ExecutorActorControlService actorControlService; - private final TransferService transferService; - - private static final Logger logger = LoggerFactory.getLogger(ExecutorSvc.class); - - @Inject - public ExecutorSvc(BaseServiceParams params, - ExecutorActorControlService actorControlService, - ExecutorGrpcService executorGrpcService, - Gson gson, - TransferService transferService) throws IOException { - super(params); - this.params = params; - this.gson = gson; - this.actorControlService = actorControlService; - this.transferService = transferService; - - var grpcServer = ServerBuilder.forPort(params.configuration.port() + 1) - .addService(executorGrpcService) - .build(); - grpcServer.start(); - - Spark.get("/transfer/file/:fid", transferService::transferFile); - - Spark.get("/transfer/spec", transferService::getTransferSpec, gson::toJson); - Spark.post("/transfer/yield", transferService::yieldDomain); - } - - @MqRequest(endpoint="FIRST-BOOT") - public void setUpDefaultActors(String message) throws Exception { - logger.info("Initializing default actors"); - - actorControlService.start(ExecutorActor.MONITOR_PROCESS_LIVENESS); - actorControlService.start(ExecutorActor.MONITOR_FILE_STORAGE); - actorControlService.start(ExecutorActor.PROC_CONVERTER_SPAWNER); - actorControlService.start(ExecutorActor.PROC_CRAWLER_SPAWNER); - actorControlService.start(ExecutorActor.PROC_INDEX_CONSTRUCTOR_SPAWNER); - actorControlService.start(ExecutorActor.PROC_LOADER_SPAWNER); - } - - @MqRequest(endpoint="TRANSFER-DOMAINS") - public String transferDomains(String message) throws Exception { - - var spec = gson.fromJson(message, TransferService.TransferReq.class); - - synchronized (this) { - transferService.transferMqEndpoint(spec.sourceNode(), spec.count()); - } - - return "OK"; - } - - - @MqRequest(endpoint="PRUNE-CRAWL-DATA") - public String pruneCrawlData(String message) throws SQLException, IOException { - - synchronized (this) { // would not be great if this ran in parallel with itself - transferService.pruneCrawlDataMqEndpoint(); - } - - return "OK"; - } - -} diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/svc/BackupService.java b/code/services-core/executor-service/src/main/java/nu/marginalia/executor/svc/BackupService.java deleted file mode 100644 index 45f8c622..00000000 --- a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/svc/BackupService.java +++ /dev/null @@ -1,22 +0,0 @@ -package nu.marginalia.executor.svc; - -import com.google.inject.Inject; -import nu.marginalia.actor.ExecutorActor; -import nu.marginalia.actor.ExecutorActorControlService; -import nu.marginalia.actor.task.RestoreBackupActor; -import nu.marginalia.executor.api.RpcFileStorageId; -import nu.marginalia.storage.model.FileStorageId; - -public class BackupService { - private final ExecutorActorControlService actorControlService; - - @Inject - public BackupService(ExecutorActorControlService actorControlService) { - this.actorControlService = actorControlService; - } - - public void restore(RpcFileStorageId request) throws Exception { - var fid = FileStorageId.of(request.getFileStorageId()); - actorControlService.startFrom(ExecutorActor.RESTORE_BACKUP, new RestoreBackupActor.Restore(fid)); - } -} diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/svc/ExportService.java b/code/services-core/executor-service/src/main/java/nu/marginalia/executor/svc/ExportService.java deleted file mode 100644 index 9f941ab9..00000000 --- a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/svc/ExportService.java +++ /dev/null @@ -1,51 +0,0 @@ -package nu.marginalia.executor.svc; - -import com.google.inject.Inject; -import nu.marginalia.actor.ExecutorActor; -import nu.marginalia.actor.ExecutorActorControlService; -import nu.marginalia.actor.task.*; -import nu.marginalia.executor.api.RpcExportSampleData; -import nu.marginalia.executor.api.RpcFileStorageId; -import nu.marginalia.storage.model.FileStorageId; - -public class ExportService { - private final ExecutorActorControlService actorControlService; - - @Inject - public ExportService(ExecutorActorControlService actorControlService) { - this.actorControlService = actorControlService; - } - - public void exportData() throws Exception { - actorControlService.startFrom(ExecutorActor.EXPORT_DATA, new ExportDataActor.Export()); - } - - public void exportSampleData(RpcExportSampleData request) throws Exception { - actorControlService.startFrom(ExecutorActor.EXPORT_SAMPLE_DATA, - new ExportSampleDataActor.Export( - FileStorageId.of(request.getFileStorageId()), - request.getSize(), - request.getName() - ) - ); - } - - public void exportAtags(RpcFileStorageId request) throws Exception { - actorControlService.startFrom(ExecutorActor.EXPORT_ATAGS, - new ExportAtagsActor.Export(FileStorageId.of(request.getFileStorageId())) - ); - } - - public void exportFeeds(RpcFileStorageId request) throws Exception { - actorControlService.startFrom(ExecutorActor.EXPORT_FEEDS, - new ExportFeedsActor.Export(FileStorageId.of(request.getFileStorageId())) - ); - } - - public void exportTermFrequencies(RpcFileStorageId request) throws Exception { - actorControlService.startFrom(ExecutorActor.EXPORT_TERM_FREQUENCIES, - new ExportTermFreqActor.Export(FileStorageId.of(request.getFileStorageId())) - ); - } - -} diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/svc/ProcessingService.java b/code/services-core/executor-service/src/main/java/nu/marginalia/executor/svc/ProcessingService.java deleted file mode 100644 index bee5c2c1..00000000 --- a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/svc/ProcessingService.java +++ /dev/null @@ -1,62 +0,0 @@ -package nu.marginalia.executor.svc; - -import com.google.inject.Inject; -import nu.marginalia.actor.ExecutorActor; -import nu.marginalia.actor.ExecutorActorControlService; -import nu.marginalia.actor.task.*; -import nu.marginalia.executor.api.RpcCrawlSpecFromDownload; -import nu.marginalia.executor.api.RpcFileStorageId; -import nu.marginalia.executor.api.RpcFileStorageIds; -import nu.marginalia.storage.model.FileStorageId; - -import java.util.stream.Collectors; - -public class ProcessingService { - private final ExecutorActorControlService actorControlService; - - @Inject - public ProcessingService(ExecutorActorControlService actorControlService) { - this.actorControlService = actorControlService; - } - - public void startRecrawl(RpcFileStorageId request) throws Exception { - actorControlService.startFrom(ExecutorActor.RECRAWL, - new RecrawlActor.Initial(FileStorageId.of(request.getFileStorageId()), false)); - } - - public void startCrawl(RpcFileStorageId request) throws Exception { - actorControlService.startFrom(ExecutorActor.CRAWL, - new CrawlActor.Initial(FileStorageId.of(request.getFileStorageId()))); - } - - public void startConversion(RpcFileStorageId request) throws Exception { - actorControlService.startFrom(ExecutorActor.CONVERT, - new ConvertActor.Convert(FileStorageId.of(request.getFileStorageId()))); - } - - public void startConvertLoad(RpcFileStorageId request) throws Exception { - actorControlService.startFrom(ExecutorActor.CONVERT_AND_LOAD, - new ConvertAndLoadActor.Initial(FileStorageId.of(request.getFileStorageId()))); - } - - public void startLoad(RpcFileStorageIds request) throws Exception { - actorControlService.startFrom(ExecutorActor.CONVERT_AND_LOAD, - new ConvertAndLoadActor.Load(request.getFileStorageIdsList() - .stream() - .map(FileStorageId::of) - .collect(Collectors.toList())) - ); - } - - public void startAdjacencyCalculation() throws Exception { - actorControlService.startFrom(ExecutorActor.ADJACENCY_CALCULATION, new TriggerAdjacencyCalculationActor.Run()); - } - - public void createCrawlSpecFromDownload(RpcCrawlSpecFromDownload request) throws Exception { - actorControlService.startFrom(ExecutorActor.CRAWL_JOB_EXTRACTOR, - new CrawlJobExtractorActor.CreateFromUrl( - request.getDescription(), - request.getUrl()) - ); - } -} diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/svc/SideloadService.java b/code/services-core/executor-service/src/main/java/nu/marginalia/executor/svc/SideloadService.java deleted file mode 100644 index 136b6f1d..00000000 --- a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/svc/SideloadService.java +++ /dev/null @@ -1,96 +0,0 @@ -package nu.marginalia.executor.svc; - -import com.google.inject.Inject; -import nu.marginalia.WmsaHome; -import nu.marginalia.actor.ExecutorActor; -import nu.marginalia.actor.ExecutorActorControlService; -import nu.marginalia.actor.task.ConvertActor; -import nu.marginalia.actor.task.DownloadSampleActor; -import nu.marginalia.executor.api.*; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.time.LocalDateTime; -import java.time.ZoneId; -import java.time.format.DateTimeFormatter; -import java.util.Comparator; - -public class SideloadService { - private final ExecutorActorControlService actorControlService; - private static final Logger logger = LoggerFactory.getLogger(SideloadService.class); - - @Inject - public SideloadService(ExecutorActorControlService actorControlService) { - this.actorControlService = actorControlService; - } - - public void sideloadDirtree(RpcSideloadDirtree request) throws Exception { - actorControlService.startFrom(ExecutorActor.CONVERT, - new ConvertActor.ConvertDirtree(request.getSourcePath()) - ); - } - - public void sideloadReddit(RpcSideloadReddit request) throws Exception { - actorControlService.startFrom(ExecutorActor.CONVERT, - new ConvertActor.ConvertReddit(request.getSourcePath()) - ); - } - - public void sideloadWarc(RpcSideloadWarc request) throws Exception { - actorControlService.startFrom(ExecutorActor.CONVERT, - new ConvertActor.ConvertWarc(request.getSourcePath()) - ); - } - - public void sideloadEncyclopedia(RpcSideloadEncyclopedia request) throws Exception { - actorControlService.startFrom(ExecutorActor.CONVERT, - new ConvertActor.ConvertEncyclopedia( - request.getSourcePath(), - request.getBaseUrl() - )); - } - - public void sideloadStackexchange(RpcSideloadStackexchange request) throws Exception { - actorControlService.startFrom(ExecutorActor.CONVERT, - new ConvertActor.ConvertStackexchange(request.getSourcePath()) - ); - } - - public RpcUploadDirContents listUploadDir() throws IOException { - Path uploadDir = WmsaHome.getUploadDir(); - - try (var items = Files.list(uploadDir).sorted( - Comparator.comparing((Path d) -> Files.isDirectory(d)).reversed() - .thenComparing(path -> path.getFileName().toString()) - )) { - var builder = RpcUploadDirContents.newBuilder().setPath(uploadDir.toString()); - - var iter = items.iterator(); - while (iter.hasNext()) { - var path = iter.next(); - - boolean isDir = Files.isDirectory(path); - long size = isDir ? 0 : Files.size(path); - var mtime = Files.getLastModifiedTime(path); - - builder.addEntriesBuilder() - .setName(path.toString()) - .setIsDirectory(isDir) - .setLastModifiedTime( - LocalDateTime.ofInstant(mtime.toInstant(), ZoneId.systemDefault()).format(DateTimeFormatter.ISO_DATE_TIME)) - .setSize(size); - } - - return builder.build(); - } - } - - public void downloadSampleData(RpcDownloadSampleData request) throws Exception { - String sampleSet = request.getSampleSet(); - - actorControlService.startFrom(ExecutorActor.DOWNLOAD_SAMPLE, new DownloadSampleActor.Run(sampleSet)); - } -} diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/svc/TransferService.java b/code/services-core/executor-service/src/main/java/nu/marginalia/executor/svc/TransferService.java deleted file mode 100644 index 377bd354..00000000 --- a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/svc/TransferService.java +++ /dev/null @@ -1,258 +0,0 @@ -package nu.marginalia.executor.svc; - -import com.google.gson.Gson; -import com.google.inject.Inject; -import com.zaxxer.hikari.HikariDataSource; -import lombok.SneakyThrows; -import nu.marginalia.client.Context; -import nu.marginalia.executor.api.RpcFileStorageContent; -import nu.marginalia.executor.api.RpcFileStorageEntry; -import nu.marginalia.executor.api.RpcFileStorageId; -import nu.marginalia.executor.client.ExecutorClient; -import nu.marginalia.executor.model.transfer.TransferItem; -import nu.marginalia.executor.model.transfer.TransferSpec; -import nu.marginalia.mq.outbox.MqOutbox; -import nu.marginalia.mq.persistence.MqPersistence; -import nu.marginalia.process.log.WorkLog; -import nu.marginalia.service.module.ServiceConfiguration; -import nu.marginalia.storage.FileStorageService; -import nu.marginalia.storage.model.FileStorageId; -import nu.marginalia.storage.model.FileStorageType; -import org.apache.commons.io.FileUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import spark.Request; -import spark.Response; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardCopyOption; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; -import java.util.UUID; - -public class TransferService { - private final Gson gson; - private final FileStorageService fileStorageService; - private final HikariDataSource dataSource; - private final ExecutorClient executorClient; - private final MqPersistence persistence; - private final String executorServiceName; - private final int nodeId; - - private static final Logger logger = LoggerFactory.getLogger(TransferService.class); - @Inject - public TransferService( - Gson gson, - FileStorageService fileStorageService, - HikariDataSource dataSource, - ExecutorClient executorClient, MqPersistence persistence, ServiceConfiguration config) - { - this.gson = gson; - this.fileStorageService = fileStorageService; - this.dataSource = dataSource; - this.executorClient = executorClient; - this.persistence = persistence; - this.nodeId = config.node(); - this.executorServiceName = config.serviceName(); - } - - public Object transferFile(Request request, Response response) throws SQLException, IOException { - FileStorageId fileStorageId = FileStorageId.parse(request.params("fid")); - - var fileStorage = fileStorageService.getStorage(fileStorageId); - - Path basePath = fileStorage.asPath(); - // This is not a public API so injection isn't a concern - Path filePath = basePath.resolve(request.queryParams("path")); - - response.type("application/octet-stream"); - FileUtils.copyFile(filePath.toFile(), response.raw().getOutputStream()); - return ""; - } - - - public RpcFileStorageContent listFiles(RpcFileStorageId request) throws SQLException, IOException { - FileStorageId fileStorageId = FileStorageId.of(request.getFileStorageId()); - - var storage = fileStorageService.getStorage(fileStorageId); - - var builder = RpcFileStorageContent.newBuilder(); - - - try (var fs = Files.list(storage.asPath())) { - fs.filter(Files::isRegularFile) - .map(this::createFileModel) - .sorted(Comparator.comparing(RpcFileStorageEntry::getName)) - .forEach(builder::addEntries); - } - - return builder.build(); - } - - @SneakyThrows - private RpcFileStorageEntry createFileModel(Path path) { - return RpcFileStorageEntry.newBuilder() - .setName(path.toFile().getName()) - .setSize(Files.size(path)) - .setLastModifiedTime(Files.getLastModifiedTime(path).toInstant().toString()) - .build(); - } - - public TransferSpec getTransferSpec(Request request, Response response) throws SQLException { - List fileStorageIds = fileStorageService.getActiveFileStorages(nodeId, FileStorageType.CRAWL_DATA); - if (fileStorageIds.isEmpty()) { - logger.warn("No ACTIVE crawl data"); - return new TransferSpec(); - } - int count = Integer.parseInt(request.queryParams("count")); - - logger.info("Preparing a transfer of {} domains", count); - - List items = new ArrayList<>(); - var storage = fileStorageService.getStorage(fileStorageIds.get(0)); - - try (var conn = dataSource.getConnection(); - var query = conn.prepareStatement("SELECT ID FROM EC_DOMAIN WHERE DOMAIN_NAME=? AND NODE_AFFINITY=" + nodeId) - ) { - for (var item : WorkLog.iterable(storage.asPath().resolve("crawler.log"))) { - if (items.size() >= count) - break; - - if (!Files.isRegularFile(storage.asPath().resolve(item.relPath()))) { - logger.info("Ignoring absent item {}", item); - continue; - } - - query.setString(1, item.id()); - var rs = query.executeQuery(); - if (rs.next()) { - items.add(new TransferItem( - item.id(), - rs.getInt(1), - fileStorageIds.get(0), - item.relPath() - )); - } - else { - logger.info("Rejected {}", item); - } - } - } - - logger.info("Found {} eligible domains", items.size()); - - return new TransferSpec(items); - } - - public Object yieldDomain(Request request, Response response) throws SQLException, IOException { - var item = gson.fromJson(request.body(), TransferItem.class); - var storage = fileStorageService.getStorage(item.fileStorageId()); - Files.delete(storage.asPath().resolve(storage.asPath().resolve(item.path()))); - return ""; - } - - public void pruneCrawlDataMqEndpoint() throws SQLException, IOException { - List fileStorageIds = fileStorageService.getActiveFileStorages(nodeId, FileStorageType.CRAWL_DATA); - if (fileStorageIds.isEmpty()) { - return; - } - var storage = fileStorageService.getStorage(fileStorageIds.get(0)); - - Path newCrawlLogPath = storage.asPath().resolve("crawler.log-new"); - Path oldCrawlLogPath = storage.asPath().resolve("crawler.log"); - - int pruned = 0; - try (var newWorkLog = new WorkLog(newCrawlLogPath)) { - for (var item : WorkLog.iterable(oldCrawlLogPath)) { - if (Files.exists(storage.asPath().resolve(item.relPath()))) { - newWorkLog.setJobToFinished(item.id(), item.path(), item.cnt()); - } - else { - pruned++; - } - } - } - if (pruned > 0) { - logger.info("Pruned {} items from the crawl log!", pruned); - } - - Files.move(newCrawlLogPath, oldCrawlLogPath, StandardCopyOption.REPLACE_EXISTING); - } - - public void transferMqEndpoint(int sourceNode, int count) throws Exception { - var storages = fileStorageService.getOnlyActiveFileStorage(FileStorageType.CRAWL_DATA); - - // Ensure crawl data exists to receive into - if (storages.isEmpty()) { - var storage = fileStorageService.allocateStorage( - FileStorageType.CRAWL_DATA, - "crawl-data", - "Crawl Data" - ); - fileStorageService.enableFileStorage(storage.id()); - } - - var storageId = fileStorageService - .getOnlyActiveFileStorage(FileStorageType.CRAWL_DATA) - .orElseThrow(AssertionError::new); // This Shouldn't Happen (tm) - - var storage = fileStorageService.getStorage(storageId); - - var spec = executorClient.getTransferSpec(Context.internal(), sourceNode, count); - if (spec.size() == 0) { - return; - } - - Path basePath = storage.asPath(); - try (var workLog = new WorkLog(basePath.resolve("crawler.log")); - var conn = dataSource.getConnection(); - var stmt = conn.prepareStatement("UPDATE EC_DOMAIN SET NODE_AFFINITY=? WHERE ID=?"); - ) { - for (var item : spec.items()) { - logger.info("{}", item); - logger.info("Transferring {}", item.domainName()); - - Path dest = basePath.resolve(item.path()); - Files.createDirectories(dest.getParent()); - try (var fileStream = Files.newOutputStream(dest)) { - executorClient.transferFile(Context.internal(), - sourceNode, - item.fileStorageId(), - item.path(), - fileStream); - - stmt.setInt(1, nodeId); - stmt.setInt(2, item.domainId()); - stmt.executeUpdate(); - - executorClient.yieldDomain(Context.internal(), sourceNode, item); - workLog.setJobToFinished(item.domainName(), item.path(), 1); - } - catch (IOException ex) { - Files.deleteIfExists(dest); - throw new RuntimeException(ex); - } - catch (Exception ex) { - throw new RuntimeException(ex); - } - } - } - - var outbox = new MqOutbox(persistence, executorServiceName, sourceNode, - getClass().getSimpleName(), nodeId, UUID.randomUUID()); - - try { - outbox.send("PRUNE-CRAWL-DATA", ":-)"); - } catch (Exception e) { - throw new RuntimeException(e); - } finally { - outbox.stop(); - } - } - - public record TransferReq(int sourceNode, int count) { } -} diff --git a/code/services-core/index-service/build.gradle b/code/services-core/index-service/build.gradle index 4523dc27..f29a1494 100644 --- a/code/services-core/index-service/build.gradle +++ b/code/services-core/index-service/build.gradle @@ -1,9 +1,9 @@ plugins { id 'java' - id 'com.palantir.docker' version '0.35.0' id 'application' id 'jvm-test-suite' + id 'com.google.cloud.tools.jib' version '3.4.0' } application { @@ -13,32 +13,32 @@ application { tasks.distZip.enabled = false -apply from: "$rootProject.projectDir/docker-service.gradle" +apply from: "$rootProject.projectDir/docker.gradle" + java { toolchain { languageVersion.set(JavaLanguageVersion.of(21)) } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:config') + implementation project(':code:common:service') + implementation project(':code:common:service-discovery') implementation project(':code:common:model') implementation project(':code:common:db') implementation project(':code:common:linkdb') - implementation project(':code:common:service') - implementation project(':code:api:index-api') + + implementation project(':code:index') + implementation project(':code:functions:link-graph:partition') + implementation project(':code:functions:link-graph:api') + implementation project(':code:functions:search-query:api') + implementation project(':code:index:api') + implementation project(':code:common:service-discovery') - implementation project(':code:libraries:array') - implementation project(':code:libraries:btree') - - implementation project(':code:features-index:index-journal') - implementation project(':code:features-index:index-query') - implementation project(':code:features-index:index-forward') - implementation project(':code:features-index:index-reverse') - implementation project(':code:features-index:domain-ranking') - implementation project(':code:features-index:result-ranking') - implementation project(':third-party:commons-codec') testImplementation project(path: ':code:services-core:control-service') testImplementation project(':code:common:process') @@ -51,7 +51,6 @@ dependencies { implementation libs.bundles.httpcomponents implementation libs.roaringbitmap implementation libs.snakeyaml - implementation libs.rxjava implementation libs.spark implementation libs.opencsv implementation libs.trove diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexMain.java b/code/services-core/index-service/java/nu/marginalia/index/IndexMain.java similarity index 86% rename from code/services-core/index-service/src/main/java/nu/marginalia/index/IndexMain.java rename to code/services-core/index-service/java/nu/marginalia/index/IndexMain.java index 9e4042ea..006f313e 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexMain.java +++ b/code/services-core/index-service/java/nu/marginalia/index/IndexMain.java @@ -4,7 +4,7 @@ import com.google.inject.Guice; import com.google.inject.Inject; import com.google.inject.Injector; import nu.marginalia.service.MainClass; -import nu.marginalia.service.SearchServiceDescriptors; +import nu.marginalia.service.ServiceDiscoveryModule; import nu.marginalia.service.id.ServiceId; import nu.marginalia.service.module.ServiceConfigurationModule; import nu.marginalia.service.module.DatabaseModule; @@ -25,7 +25,8 @@ public class IndexMain extends MainClass { Injector injector = Guice.createInjector( new IndexModule(), new DatabaseModule(false), - new ServiceConfigurationModule(SearchServiceDescriptors.descriptors, ServiceId.Index) + new ServiceDiscoveryModule(), + new ServiceConfigurationModule(ServiceId.Index) ); injector.getInstance(NodeStatusWatcher.class); diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexModule.java b/code/services-core/index-service/java/nu/marginalia/index/IndexModule.java similarity index 90% rename from code/services-core/index-service/src/main/java/nu/marginalia/index/IndexModule.java rename to code/services-core/index-service/java/nu/marginalia/index/IndexModule.java index 526e34bd..f998dfcf 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexModule.java +++ b/code/services-core/index-service/java/nu/marginalia/index/IndexModule.java @@ -4,8 +4,8 @@ import com.google.inject.AbstractModule; import com.google.inject.Provides; import com.google.inject.Singleton; import com.google.inject.name.Named; -import nu.marginalia.linkdb.dlinks.DomainLinkDb; -import nu.marginalia.linkdb.dlinks.DelayingDomainLinkDb; +import nu.marginalia.linkgraph.DomainLinks; +import nu.marginalia.linkgraph.impl.DelayingDomainLinks; import nu.marginalia.storage.FileStorageService; import nu.marginalia.IndexLocations; import org.slf4j.Logger; @@ -26,13 +26,13 @@ public class IndexModule extends AbstractModule { @Provides @Singleton - public DomainLinkDb domainLinkDb ( + public DomainLinks domainLinkDb ( FileStorageService storageService ) { Path path = IndexLocations.getLinkdbLivePath(storageService).resolve(DOMAIN_LINKS_FILE_NAME); - return new DelayingDomainLinkDb(path); + return new DelayingDomainLinks(path); } @Provides diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexOpsService.java b/code/services-core/index-service/java/nu/marginalia/index/IndexOpsService.java similarity index 62% rename from code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexOpsService.java rename to code/services-core/index-service/java/nu/marginalia/index/IndexOpsService.java index 632621b0..e7ed68b2 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexOpsService.java +++ b/code/services-core/index-service/java/nu/marginalia/index/IndexOpsService.java @@ -1,11 +1,9 @@ -package nu.marginalia.index.svc; +package nu.marginalia.index; import com.google.inject.Inject; import com.google.inject.Singleton; -import nu.marginalia.index.index.SearchIndex; -import spark.Request; -import spark.Response; -import spark.Spark; +import nu.marginalia.index.index.StatefulIndex; +import nu.marginalia.index.searchset.SearchSetsService; import javax.annotation.CheckReturnValue; import java.util.Optional; @@ -16,12 +14,12 @@ import java.util.concurrent.locks.ReentrantLock; public class IndexOpsService { private final ReentrantLock opsLock = new ReentrantLock(); - private final SearchIndex index; - private final IndexSearchSetsService searchSetService; + private final StatefulIndex index; + private final SearchSetsService searchSetService; @Inject - public IndexOpsService(SearchIndex index, - IndexSearchSetsService searchSetService) { + public IndexOpsService(StatefulIndex index, + SearchSetsService searchSetService) { this.index = index; this.searchSetService = searchSetService; } @@ -43,26 +41,6 @@ public class IndexOpsService { } - public Object repartitionEndpoint(Request request, Response response) throws Exception { - - if (!run(searchSetService::recalculateSecondary)) { - Spark.halt(503, "Operations busy"); - } - - return "OK"; - } - - public Object reindexEndpoint(Request request, Response response) throws Exception { - - if (!run(index::switchIndex).isPresent()) { - Spark.halt(503, "Operations busy"); - } - - return "OK"; - } - - - @CheckReturnValue public Optional run(Callable c) throws Exception { if (!opsLock.tryLock()) diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexService.java b/code/services-core/index-service/java/nu/marginalia/index/IndexService.java similarity index 59% rename from code/services-core/index-service/src/main/java/nu/marginalia/index/IndexService.java rename to code/services-core/index-service/java/nu/marginalia/index/IndexService.java index b5f5a7f8..cf9187d0 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/IndexService.java +++ b/code/services-core/index-service/java/nu/marginalia/index/IndexService.java @@ -1,38 +1,28 @@ package nu.marginalia.index; -import com.google.gson.Gson; import com.google.inject.Inject; -import io.grpc.ServerBuilder; -import io.reactivex.rxjava3.schedulers.Schedulers; import lombok.SneakyThrows; import nu.marginalia.IndexLocations; -import nu.marginalia.index.svc.IndexDomainLinksService; -import nu.marginalia.linkdb.dlinks.DomainLinkDb; +import nu.marginalia.linkgraph.PartitionLinkGraphService; +import nu.marginalia.index.index.StatefulIndex; +import nu.marginalia.linkgraph.DomainLinks; +import nu.marginalia.service.discovery.property.ServicePartition; import nu.marginalia.storage.FileStorageService; -import nu.marginalia.index.client.IndexMqEndpoints; -import nu.marginalia.index.index.SearchIndex; -import nu.marginalia.index.svc.IndexOpsService; -import nu.marginalia.index.svc.IndexQueryService; +import nu.marginalia.index.api.IndexMqEndpoints; import nu.marginalia.linkdb.docs.DocumentDbReader; -import nu.marginalia.model.gson.GsonFactory; import nu.marginalia.service.control.ServiceEventLog; import nu.marginalia.service.server.*; import nu.marginalia.service.server.mq.MqRequest; import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import spark.Request; -import spark.Response; -import spark.Spark; -import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.concurrent.TimeUnit; +import java.util.List; import static nu.marginalia.linkdb.LinkdbFileNames.DOCDB_FILE_NAME; import static nu.marginalia.linkdb.LinkdbFileNames.DOMAIN_LINKS_FILE_NAME; -import static spark.Spark.get; public class IndexService extends Service { private final Logger logger = LoggerFactory.getLogger(getClass()); @@ -40,59 +30,42 @@ public class IndexService extends Service { @NotNull private final Initialization init; private final IndexOpsService opsService; - private final SearchIndex searchIndex; + private final StatefulIndex statefulIndex; private final FileStorageService fileStorageService; private final DocumentDbReader documentDbReader; - private final DomainLinkDb domainLinkDb; + private final DomainLinks domainLinks; private final ServiceEventLog eventLog; + @SneakyThrows @Inject public IndexService(BaseServiceParams params, IndexOpsService opsService, - IndexQueryService indexQueryService, - SearchIndex searchIndex, + IndexGrpcService indexQueryService, + StatefulIndex statefulIndex, FileStorageService fileStorageService, DocumentDbReader documentDbReader, - DomainLinkDb domainLinkDb, - IndexDomainLinksService indexDomainLinksService, - ServiceEventLog eventLog) throws IOException { - super(params); + DomainLinks domainLinks, + PartitionLinkGraphService partitionLinkGraphService, + ServiceEventLog eventLog) + { + super(params, + ServicePartition.partition(params.configuration.node()), + List.of(indexQueryService, + partitionLinkGraphService) + ); this.opsService = opsService; - this.searchIndex = searchIndex; + this.statefulIndex = statefulIndex; this.fileStorageService = fileStorageService; this.documentDbReader = documentDbReader; - this.domainLinkDb = domainLinkDb; + this.domainLinks = domainLinks; this.eventLog = eventLog; - final Gson gson = GsonFactory.get(); - this.init = params.initialization; - var grpcServer = ServerBuilder.forPort(params.configuration.port() + 1) - .addService(indexQueryService) - .addService(indexDomainLinksService) - .build(); - grpcServer.start(); - - Spark.post("/search/", indexQueryService::search, gson::toJson); - - Spark.get("/public/debug/docmeta", indexQueryService::debugEndpointDocMetadata, gson::toJson); - Spark.get("/public/debug/wordmeta", indexQueryService::debugEndpointWordMetadata, gson::toJson); - Spark.get("/public/debug/word", indexQueryService::debugEndpointWordEncoding, gson::toJson); - - Spark.post("/ops/repartition", opsService::repartitionEndpoint); - Spark.post("/ops/reindex", opsService::reindexEndpoint); - - get("/is-blocked", this::isBlocked, gson::toJson); - - Schedulers.newThread().scheduleDirect(this::initialize, 1, TimeUnit.MICROSECONDS); - } - - private Object isBlocked(Request request, Response response) { - return !initialized || opsService.isBusy(); + Thread.ofPlatform().name("initialize-index").start(this::initialize); } volatile boolean initialized = false; @@ -133,7 +106,7 @@ public class IndexService extends Service { if (Files.exists(newPathDomains)) { eventLog.logEvent("INDEX-SWITCH-DOMAIN-LINKDB", ""); - domainLinkDb.switchInput(newPathDomains); + domainLinks.switchInput(newPathDomains); } } @@ -154,7 +127,7 @@ public class IndexService extends Service { public void initialize() { if (!initialized) { init.waitReady(); - searchIndex.init(); + statefulIndex.init(); initialized = true; } } diff --git a/code/services-core/index-service/readme.md b/code/services-core/index-service/readme.md index 4ce81002..b7bbcfb0 100644 --- a/code/services-core/index-service/readme.md +++ b/code/services-core/index-service/readme.md @@ -6,17 +6,6 @@ It is the service that most directly executes a search query. It does this by evaluating a low-level query, and then using the index to find the documents that match the query, finally ranking the results and picking the best matches. -## Central Classes +This module only contains service boilerplate. The guts of this service are +in the [index](../../index) module. -* [IndexService](src/main/java/nu/marginalia/index/IndexService.java) is the REST entry point that the internal API talks to. -* [IndexQueryService](src/main/java/nu/marginalia/index/svc/IndexQueryService.java) executes queries. -* [SearchIndex](src/main/java/nu/marginalia/index/index/SearchIndex.java) owns the state of the index and helps with building a query strategy from parameters. -* [IndexResultValuator](src/main/java/nu/marginalia/index/results/IndexResultValuator.java) determines the best results. - -## See Also - -The index service relies heavily on the primitives in [features-index](../../features-index): - -* [features-index/index-forward](../../features-index/index-forward/) -* [features-index/index-reverse](../../features-index/index-reverse/) -* [features-index/index-query](../../features-index/index-query) diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexReader.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexReader.java deleted file mode 100644 index bef389b0..00000000 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexReader.java +++ /dev/null @@ -1,104 +0,0 @@ -package nu.marginalia.index.index; - -import nu.marginalia.index.ReverseIndexReader; -import nu.marginalia.index.forward.ForwardIndexReader; -import nu.marginalia.index.forward.ParamMatchingQueryFilter; -import nu.marginalia.index.query.*; -import nu.marginalia.index.query.filter.QueryFilterStepIf; -import nu.marginalia.model.EdgeUrl; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.time.Duration; -import java.util.List; -import java.util.concurrent.TimeUnit; - -public class SearchIndexReader { - - private final Logger logger = LoggerFactory.getLogger(getClass()); - - private final ForwardIndexReader forwardIndexReader; - private final ReverseIndexReader reverseIndexFullReader; - private final ReverseIndexReader reverseIndexPriorityReader; - - public SearchIndexReader(ForwardIndexReader forwardIndexReader, - ReverseIndexReader reverseIndexFullReader, - ReverseIndexReader reverseIndexPriorityReader) { - this.forwardIndexReader = forwardIndexReader; - this.reverseIndexFullReader = reverseIndexFullReader; - this.reverseIndexPriorityReader = reverseIndexPriorityReader; - } - - public IndexQueryBuilder findPriorityWord(IndexQueryPriority priority, long wordId, int fetchSizeMultiplier) { - var sources = List.of(reverseIndexPriorityReader.documents(wordId)); - - return new SearchIndexQueryBuilder(reverseIndexFullReader, reverseIndexPriorityReader, - new IndexQuery(sources, priority, fetchSizeMultiplier), wordId); - } - - public IndexQueryBuilder findFullWord(IndexQueryPriority priority, long wordId, int fetchSizeMultiplier) { - var sources = List.of(reverseIndexFullReader.documents(wordId)); - - return new SearchIndexQueryBuilder(reverseIndexFullReader, reverseIndexPriorityReader, - new IndexQuery(sources, priority, fetchSizeMultiplier), wordId); - } - - public QueryFilterStepIf filterForParams(IndexQueryParams params) { - return new ParamMatchingQueryFilter(params, forwardIndexReader); - } - - public long numHits(long word) { - return reverseIndexFullReader.numDocuments(word); - } - public long numHitsPrio(long word) { - return reverseIndexPriorityReader.numDocuments(word); - } - - public long[] getMetadata(long wordId, long[] docIds) { - return reverseIndexFullReader.getTermMeta(wordId, docIds); - } - - public long getDocumentMetadata(long docId) { - return forwardIndexReader.getDocMeta(docId); - } - - public int totalDocCount() { - return forwardIndexReader.totalDocCount(); - } - - public int getHtmlFeatures(long docId) { - return forwardIndexReader.getHtmlFeatures(docId); - } - - public void close() throws InterruptedException { - /* Delay the invocation of close method to allow for a clean shutdown of the service. - * - * This is especially important when using Unsafe-based LongArrays, since we have - * concurrent access to the underlying memory-mapped file. If pull the rug from - * under the caller by closing the file, we'll get a SIGSEGV. Even with MemorySegment, - * we'll get ugly stacktraces if we close the file while a thread is still accessing it. - */ - - delayedCall(forwardIndexReader::close, Duration.ofMinutes(1)); - delayedCall(reverseIndexFullReader::close, Duration.ofMinutes(1)); - delayedCall(reverseIndexPriorityReader::close, Duration.ofMinutes(1)); - } - - - private void delayedCall(Runnable call, Duration delay) throws InterruptedException { - Thread.ofPlatform().start(() -> { - try { - TimeUnit.SECONDS.sleep(delay.toSeconds()); - call.run(); - } catch (InterruptedException e) { - logger.error("Interrupted", e); - } - }); - } - - /** Returns true if index data is available */ - public boolean isLoaded() { - // We only need to check one of the readers, as they are either all loaded or none are - return forwardIndexReader.isLoaded(); - } -} diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexSearchTerms.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexSearchTerms.java deleted file mode 100644 index 833778df..00000000 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/index/SearchIndexSearchTerms.java +++ /dev/null @@ -1,38 +0,0 @@ -package nu.marginalia.index.index; - -import it.unimi.dsi.fastutil.longs.LongArrayList; -import it.unimi.dsi.fastutil.longs.LongComparator; -import it.unimi.dsi.fastutil.longs.LongList; -import it.unimi.dsi.fastutil.longs.LongOpenHashSet; - -import java.util.Collections; -import java.util.List; - -public record SearchIndexSearchTerms( - LongList includes, - LongList excludes, - LongList priority, - List coherences - ) -{ - public SearchIndexSearchTerms() { - this(LongList.of(), LongList.of(), LongList.of(), Collections.emptyList()); - } - - public boolean isEmpty() { - return includes.isEmpty(); - } - - public long[] sortedDistinctIncludes(LongComparator comparator) { - if (includes.isEmpty()) - return includes.toLongArray(); - - LongList list = new LongArrayList(new LongOpenHashSet(includes)); - list.sort(comparator); - return list.toLongArray(); - } - - public int size() { - return includes.size() + excludes.size() + priority.size(); - } -} diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexMetadataService.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexMetadataService.java deleted file mode 100644 index b9380883..00000000 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexMetadataService.java +++ /dev/null @@ -1,174 +0,0 @@ -package nu.marginalia.index.results; - -import com.google.inject.Inject; -import gnu.trove.map.hash.TObjectIntHashMap; -import gnu.trove.map.hash.TObjectLongHashMap; -import gnu.trove.set.hash.TLongHashSet; -import it.unimi.dsi.fastutil.ints.Int2ObjectArrayMap; -import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap; -import it.unimi.dsi.fastutil.longs.Long2ObjectArrayMap; -import it.unimi.dsi.fastutil.longs.LongArrayList; -import nu.marginalia.index.client.model.query.SearchSubquery; -import nu.marginalia.index.index.SearchIndex; -import nu.marginalia.index.svc.SearchTermsService; -import nu.marginalia.model.idx.WordMetadata; -import nu.marginalia.ranking.ResultValuator; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.ArrayList; -import java.util.List; - -public class IndexMetadataService { - private final SearchIndex index; - private final SearchTermsService searchTermsService; - private final ResultValuator searchResultValuator; - private static final Logger logger = LoggerFactory.getLogger(IndexMetadataService.class); - - @Inject - public IndexMetadataService(SearchIndex index, - SearchTermsService searchTermsService, - ResultValuator searchResultValuator) { - this.index = index; - this.searchTermsService = searchTermsService; - this.searchResultValuator = searchResultValuator; - } - - public long getDocumentMetadata(long docId) { - return index.getDocumentMetadata(docId); - } - - public int getHtmlFeatures(long urlId) { - return index.getHtmlFeatures(urlId); - } - - public TermMetadataForDocuments getTermMetadataForDocuments(long[] docIdsAll, long[] termIdsList) { - return new TermMetadataForDocuments(docIdsAll, termIdsList); - } - - public QuerySearchTerms getSearchTerms(List searchTermVariants) { - - LongArrayList termIdsList = new LongArrayList(); - - TObjectLongHashMap termToId = new TObjectLongHashMap<>(10, 0.75f, -1); - - for (var subquery : searchTermVariants) { - for (var term : subquery.searchTermsInclude) { - if (termToId.containsKey(term)) { - continue; - } - - long id = searchTermsService.getWordId(term); - termIdsList.add(id); - termToId.put(term, id); - } - } - - return new QuerySearchTerms(termToId, - termIdsList.toLongArray(), - getTermCoherences(searchTermVariants)); - } - - - private TermCoherences getTermCoherences(List searchTermVariants) { - List coherences = new ArrayList<>(); - - for (var subquery : searchTermVariants) { - for (var coh : subquery.searchTermCoherences) { - long[] ids = coh.stream().mapToLong(searchTermsService::getWordId).toArray(); - coherences.add(ids); - } - - // It's assumed each subquery has identical coherences - break; - } - - return new TermCoherences(coherences); - } - - public TLongHashSet getResultsWithPriorityTerms(List subqueries, long[] resultsArray) { - long[] priorityTermIds = - subqueries.stream() - .flatMap(sq -> sq.searchTermsPriority.stream()) - .distinct() - .mapToLong(searchTermsService::getWordId) - .toArray(); - - var ret = new TLongHashSet(resultsArray.length); - - for (long priorityTerm : priorityTermIds) { - long[] metadata = index.getTermMetadata(priorityTerm, resultsArray); - for (int i = 0; i < metadata.length; i++) { - if (metadata[i] != 0) ret.add(resultsArray[i]); - } - } - - return ret; - } - - public ResultValuator getSearchResultValuator() { - return searchResultValuator; - } - - public class TermMetadataForDocuments { - private final Long2ObjectArrayMap termdocToMeta; - - public TermMetadataForDocuments(long[] docIdsAll, long[] termIdsList) { - termdocToMeta = new Long2ObjectArrayMap<>(termIdsList.length); - - for (long termId : termIdsList) { - var metadata = index.getTermMetadata(termId, docIdsAll); - termdocToMeta.put(termId, new Long2LongOpenHashMap(docIdsAll, metadata)); - } - } - - public long getTermMetadata(long termId, long docId) { - var docsForTerm = termdocToMeta.get(termId); - if (docsForTerm == null) { - logger.warn("Missing meta for term {}", termId); - return 0; - } - - return docsForTerm.getOrDefault(docId, 0); - } - - public boolean testCoherence(long docId, TermCoherences coherences) { - - for (var coherenceSet : coherences.words()) { - long overlap = 0xFF_FFFF_FFFF_FFFFL; - - for (var word : coherenceSet) { - long positions = WordMetadata.decodePositions(getTermMetadata(word, docId)); - overlap &= positions; - } - if (overlap == 0L) { - return false; - } - } - - return true; - } - } - - public static class QuerySearchTerms { - private final TObjectLongHashMap termToId; - public final long[] termIdsAll; - - public final TermCoherences coherences; - - public QuerySearchTerms(TObjectLongHashMap termToId, - long[] termIdsAll, - TermCoherences coherences) { - this.termToId = termToId; - this.termIdsAll = termIdsAll; - this.coherences = coherences; - } - - public long getIdForTerm(String searchTerm) { - return termToId.get(searchTerm); - } - } - - /** wordIds that we require to be in the same sentence */ - public record TermCoherences(List words) {} -} diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultDecorator.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultDecorator.java deleted file mode 100644 index 6b4d062b..00000000 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultDecorator.java +++ /dev/null @@ -1,90 +0,0 @@ -package nu.marginalia.index.results; - -import com.google.inject.Inject; -import com.google.inject.Singleton; -import gnu.trove.list.TLongList; -import gnu.trove.list.array.TLongArrayList; -import nu.marginalia.index.client.model.results.DecoratedSearchResultItem; -import nu.marginalia.index.client.model.results.ResultRankingContext; -import nu.marginalia.index.client.model.results.SearchResultItem; -import nu.marginalia.linkdb.docs.DocumentDbReader; -import nu.marginalia.linkdb.model.DocdbUrlDetail; -import nu.marginalia.ranking.ResultValuator; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** Joins the index view of a set of search results with data from the linkdb */ -@Singleton -public class IndexResultDecorator { - - private static final Logger logger = LoggerFactory.getLogger(IndexResultDecorator.class); - - private final DocumentDbReader documentDbReader; - private final ResultValuator valuator; - - @Inject - public IndexResultDecorator(DocumentDbReader documentDbReader, - ResultValuator valuator) { - this.documentDbReader = documentDbReader; - this.valuator = valuator; - } - - /** Decorate the result items with additional information from the link database - * and calculate an updated ranking with the additional information */ - public List decorateAndRerank(List rawResults, - ResultRankingContext rankingContext) - throws SQLException - { - TLongList idsList = new TLongArrayList(rawResults.size()); - - for (var result : rawResults) - idsList.add(result.getDocumentId()); - - Map urlDetailsById = new HashMap<>(rawResults.size()); - - for (var item : documentDbReader.getUrlDetails(idsList)) - urlDetailsById.put(item.urlId(), item); - - List decoratedItems = new ArrayList<>(); - for (var result : rawResults) { - var docData = urlDetailsById.get(result.getDocumentId()); - - if (null == docData) { - logger.warn("No data for document id {}", result.getDocumentId()); - continue; - } - - decoratedItems.add(createCombinedItem(result, docData, rankingContext)); - } - - if (decoratedItems.size() != rawResults.size()) - logger.warn("Result list shrunk during decoration?"); - - return decoratedItems; - } - - private DecoratedSearchResultItem createCombinedItem(SearchResultItem result, - DocdbUrlDetail docData, - ResultRankingContext rankingContext) { - return new DecoratedSearchResultItem( - result, - docData.url(), - docData.title(), - docData.description(), - docData.urlQuality(), - docData.format(), - docData.features(), - docData.pubYear(), - docData.dataHash(), - docData.wordsTotal(), - valuator.calculateSearchResultValue(result.keywordScores, docData.wordsTotal(), rankingContext) - ); - - } -} diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultDomainDeduplicator.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultDomainDeduplicator.java deleted file mode 100644 index 4c29886a..00000000 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultDomainDeduplicator.java +++ /dev/null @@ -1,47 +0,0 @@ -package nu.marginalia.index.results; - -import gnu.trove.map.TLongIntMap; -import gnu.trove.map.hash.TLongIntHashMap; -import nu.marginalia.index.client.model.results.SearchResultItem; - -public class IndexResultDomainDeduplicator { - final TLongIntMap resultsByDomainId = CachedObjects.getMap(); - final int limitByDomain; - - public IndexResultDomainDeduplicator(int limitByDomain) { - this.limitByDomain = limitByDomain; - } - - public boolean test(SearchResultItem item) { - final long key = item.getDomainId(); - - return resultsByDomainId.adjustOrPutValue(key, 1, 1) <= limitByDomain; - } - - public int getCount(SearchResultItem item) { - final long key = item.getDomainId(); - - return resultsByDomainId.get(key); - } - - private static class CachedObjects { - private static final ThreadLocal mapCache = ThreadLocal.withInitial(() -> - new TLongIntHashMap(2048, 0.5f, -1, 0) - ); - - private static TLongIntHashMap getMap() { - var ret = mapCache.get(); - ret.clear(); - return ret; - } - - public static void clear() { - mapCache.remove(); - } - } - - static void clearCachedObjects() { - CachedObjects.clear(); - } -} - diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryExecutor.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryExecutor.java deleted file mode 100644 index 11268ae4..00000000 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryExecutor.java +++ /dev/null @@ -1,40 +0,0 @@ -package nu.marginalia.index.svc; - -import com.google.inject.Singleton; -import gnu.trove.list.array.TLongArrayList; -import nu.marginalia.array.buffer.LongQueryBuffer; -import nu.marginalia.index.query.IndexQuery; - -@Singleton -public class IndexQueryExecutor { - - /* Re-use these buffers as they contribute to a large amount of memory churn */ - private static final ThreadLocal bufferTL = ThreadLocal.withInitial(() -> new LongQueryBuffer(4096)); - - public int executeQuery(IndexQuery query, TLongArrayList results, SearchParameters params) - { - final int fetchSize = params.fetchSize * query.fetchSizeMultiplier; - - final LongQueryBuffer buffer = bufferTL.get(); - - int cnt = 0; - - while (query.hasMore() - && results.size() < fetchSize - && params.budget.hasTimeLeft()) - { - buffer.reset(); - query.getMoreResults(buffer); - - for (int i = 0; i < buffer.size() && results.size() < fetchSize; i++) { - results.add(buffer.data[i]); - cnt++; - } - } - - params.dataCost += query.dataCost(); - - return cnt; - } - -} diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java deleted file mode 100644 index 08c7c1c3..00000000 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java +++ /dev/null @@ -1,459 +0,0 @@ -package nu.marginalia.index.svc; - -import com.google.gson.Gson; -import com.google.inject.Inject; -import com.google.inject.Singleton; -import gnu.trove.list.TLongList; -import gnu.trove.list.array.TLongArrayList; -import io.prometheus.client.Counter; -import io.prometheus.client.Gauge; -import io.prometheus.client.Histogram; -import lombok.SneakyThrows; -import nu.marginalia.index.api.*; -import nu.marginalia.index.api.IndexApiGrpc.IndexApiImplBase; -import nu.marginalia.index.client.model.query.SearchSetIdentifier; -import nu.marginalia.index.client.model.query.SearchSubquery; -import nu.marginalia.index.client.model.results.ResultRankingParameters; -import nu.marginalia.index.client.model.results.SearchResultItem; -import nu.marginalia.index.client.model.results.ResultRankingContext; -import nu.marginalia.index.client.model.results.SearchResultSet; -import nu.marginalia.index.client.model.query.SearchSpecification; -import nu.marginalia.index.index.SearchIndex; -import nu.marginalia.index.index.SearchIndexSearchTerms; -import nu.marginalia.index.query.IndexQueryPriority; -import nu.marginalia.index.results.IndexMetadataService; -import nu.marginalia.index.results.IndexResultDecorator; -import nu.marginalia.index.searchset.SearchSet; -import nu.marginalia.index.results.IndexResultValuator; -import nu.marginalia.index.query.IndexQuery; -import nu.marginalia.index.results.IndexResultDomainDeduplicator; -import nu.marginalia.index.svc.searchset.SmallSearchSet; -import nu.marginalia.model.gson.GsonFactory; -import nu.marginalia.model.idx.DocumentMetadata; -import nu.marginalia.model.idx.WordMetadata; -import nu.marginalia.service.module.ServiceConfiguration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.slf4j.Marker; -import org.slf4j.MarkerFactory; -import spark.HaltException; -import spark.Request; -import spark.Response; -import spark.Spark; - -import java.sql.SQLException; -import java.util.*; -import java.util.stream.Collectors; - -@Singleton -public class IndexQueryService extends IndexApiImplBase { - - private final Logger logger = LoggerFactory.getLogger(getClass()); - - // This marker is used to mark sensitive log messages that are related to queries - // so that they can be filtered out in the production logging configuration - private final Marker queryMarker = MarkerFactory.getMarker("QUERY"); - - private static final Counter wmsa_query_timeouts = Counter.build() - .name("wmsa_index_query_timeouts") - .help("Query timeout counter") - .labelNames("node", "api") - .register(); - private static final Gauge wmsa_query_cost = Gauge.build() - .name("wmsa_index_query_cost") - .help("Computational cost of query") - .labelNames("node", "api") - .register(); - private static final Histogram wmsa_query_time = Histogram.build() - .name("wmsa_index_query_time") - .linearBuckets(0.05, 0.05, 15) - .labelNames("node", "api") - .help("Index-side query time") - .register(); - - private final IndexQueryExecutor queryExecutor; - private final Gson gson = GsonFactory.get(); - - private final SearchIndex index; - private final IndexResultDecorator resultDecorator; - private final IndexSearchSetsService searchSetsService; - - private final IndexMetadataService metadataService; - private final SearchTermsService searchTermsSvc; - private final int nodeId; - - - @Inject - public IndexQueryService(IndexQueryExecutor queryExecutor, - ServiceConfiguration serviceConfiguration, - SearchIndex index, - IndexResultDecorator resultDecorator, - IndexSearchSetsService searchSetsService, - IndexMetadataService metadataService, - SearchTermsService searchTerms) - { - this.nodeId = serviceConfiguration.node(); - this.queryExecutor = queryExecutor; - this.index = index; - this.resultDecorator = resultDecorator; - this.searchSetsService = searchSetsService; - this.metadataService = metadataService; - this.searchTermsSvc = searchTerms; - } - - public DocumentMetadata debugEndpointDocMetadata(Request request, Response response) { - String docId = request.queryParams("docId"); - response.type("application/json"); - - return new DocumentMetadata(index.getDocumentMetadata(Long.parseLong(docId))); - } - - public WordMetadata debugEndpointWordMetadata(Request request, Response response) { - String word = request.queryParams("word"); - String docId = request.queryParams("docId"); - response.type("application/json"); - - return new WordMetadata(index.getTermMetadata( - searchTermsSvc.getWordId(word), - new long[] { Long.parseLong(docId) } - )[0]); - } - - public String debugEndpointWordEncoding(Request request, Response response) { - String word = request.queryParams("word"); - response.type("application/json"); - - return Long.toHexString(searchTermsSvc.getWordId(word)); - } - - public Object search(Request request, Response response) { - final String json = request.body(); - final SearchSpecification specsSet = gson.fromJson(json, SearchSpecification.class); - - if (!index.isAvailable()) { - Spark.halt(503, "Index is not loaded"); - } - - final String nodeName = Integer.toString(nodeId); - - try { - return wmsa_query_time - .labels(nodeName, "REST") - .time(() -> { - var params = new SearchParameters(specsSet, getSearchSet(specsSet)); - - SearchResultSet results = executeSearch(params); - - logger.info(queryMarker, "Index Result Count: {}", results.size()); - - wmsa_query_cost - .labels(nodeName, "REST") - .set(params.getDataCost()); - - if (!params.hasTimeLeft()) { - wmsa_query_timeouts - .labels(nodeName, "REST") - .inc(); - } - - return results; - }); - } - catch (HaltException ex) { - logger.warn("Halt", ex); - throw ex; - } - catch (Exception ex) { - logger.info("Error during search {}({}) (query: {})", ex.getClass().getSimpleName(), ex.getMessage(), json); - logger.info("Error", ex); - Spark.halt(500, "Error"); - return null; - } - } - - - // GRPC endpoint - @SneakyThrows - public void query(nu.marginalia.index.api.RpcIndexQuery request, - io.grpc.stub.StreamObserver responseObserver) { - - try { - var params = new SearchParameters(request, getSearchSet(request)); - - final String nodeName = Integer.toString(nodeId); - - SearchResultSet results = wmsa_query_time - .labels(nodeName, "GRPC") - .time(() -> executeSearch(params)); - - wmsa_query_cost - .labels(nodeName, "GRPC") - .set(params.getDataCost()); - - if (!params.hasTimeLeft()) { - wmsa_query_timeouts - .labels(nodeName, "GRPC") - .inc(); - } - - for (var result : results.results) { - - var rawResult = result.rawIndexResult; - - var rawItem = RpcRawResultItem.newBuilder(); - rawItem.setCombinedId(rawResult.combinedId); - rawItem.setResultsFromDomain(rawResult.resultsFromDomain); - - for (var score : rawResult.keywordScores) { - rawItem.addKeywordScores( - RpcResultKeywordScore.newBuilder() - .setEncodedDocMetadata(score.encodedDocMetadata()) - .setEncodedWordMetadata(score.encodedWordMetadata()) - .setKeyword(score.keyword) - .setHtmlFeatures(score.htmlFeatures()) - .setHasPriorityTerms(score.hasPriorityTerms()) - .setSubquery(score.subquery) - ); - } - - var decoratedBuilder = RpcDecoratedResultItem.newBuilder() - .setDataHash(result.dataHash) - .setDescription(result.description) - .setFeatures(result.features) - .setFormat(result.format) - .setRankingScore(result.rankingScore) - .setTitle(result.title) - .setUrl(result.url.toString()) - .setWordsTotal(result.wordsTotal) - .setRawItem(rawItem); - - if (result.pubYear != null) { - decoratedBuilder.setPubYear(result.pubYear); - } - responseObserver.onNext(decoratedBuilder.build()); - } - - responseObserver.onCompleted(); - } - catch (Exception ex) { - logger.error("Error in handling request", ex); - responseObserver.onError(ex); - } - } - - // exists for test access - @SneakyThrows - SearchResultSet justQuery(SearchSpecification specsSet) { - return executeSearch(new SearchParameters(specsSet, getSearchSet(specsSet))); - } - - private SearchSet getSearchSet(SearchSpecification specsSet) { - - if (specsSet.domains != null && !specsSet.domains.isEmpty()) { - return new SmallSearchSet(specsSet.domains); - } - - return searchSetsService.getSearchSetByName(specsSet.searchSetIdentifier); - } - private SearchSet getSearchSet(RpcIndexQuery request) { - - if (request.getDomainsCount() > 0) { - return new SmallSearchSet(request.getDomainsList()); - } - - return searchSetsService.getSearchSetByName(request.getSearchSetIdentifier()); - } - private SearchResultSet executeSearch(SearchParameters params) throws SQLException { - - if (!index.isLoaded()) { - // Short-circuit if the index is not loaded, as we trivially know that there can be no results - return new SearchResultSet(List.of()); - } - - var rankingContext = createRankingContext(params.rankingParams, params.subqueries); - - logger.info(queryMarker, "{}", params.queryParams); - - var resultIds = evaluateSubqueries(params); - var resultItems = calculateResultScores(params, rankingContext, resultIds); - - logger.info(queryMarker, "After filtering: {} -> {}", resultIds.size(), resultItems.size()); - - var bestResults = selectBestResults(params, resultItems); - - return new SearchResultSet(resultDecorator.decorateAndRerank(bestResults, rankingContext)); - } - - private ResultRankingContext createRankingContext(ResultRankingParameters rankingParams, List subqueries) { - final var termToId = searchTermsSvc.getAllIncludeTerms(subqueries); - final Map termFrequencies = new HashMap<>(termToId.size()); - final Map prioFrequencies = new HashMap<>(termToId.size()); - - termToId.forEach((key, id) -> termFrequencies.put(key, index.getTermFrequency(id))); - termToId.forEach((key, id) -> prioFrequencies.put(key, index.getTermFrequencyPrio(id))); - - return new ResultRankingContext(index.getTotalDocCount(), - rankingParams, - termFrequencies, - prioFrequencies); - } - - /** Execute subqueries and return a list of document ids. The index is queried for each subquery, - * at different priorty depths until timeout is reached or the results are all visited. - *
- * Then the results are combined. - * */ - private final ThreadLocal resultsArrayListPool = ThreadLocal.withInitial(TLongArrayList::new); - - private TLongList evaluateSubqueries(SearchParameters params) { - final TLongArrayList results = resultsArrayListPool.get(); - results.resetQuick(); - results.ensureCapacity(params.fetchSize); - - // These queries are various term combinations - for (var subquery : params.subqueries) { - - if (!params.hasTimeLeft()) { - logger.info("Query timed out {}, ({}), -{}", - subquery.searchTermsInclude, subquery.searchTermsAdvice, subquery.searchTermsExclude); - break; - } - - logger.info(queryMarker, "{}", subquery); - - final SearchIndexSearchTerms searchTerms = searchTermsSvc.getSearchTerms(subquery); - - if (searchTerms.isEmpty()) { - logger.info(queryMarker, "empty"); - continue; - } - - logSearchTerms(subquery, searchTerms); - - // These queries are different indices for one subquery - List queries = params.createIndexQueries(index, searchTerms); - for (var query : queries) { - - if (!params.hasTimeLeft()) - break; - - if (shouldOmitQuery(params, query, results.size())) { - logger.info(queryMarker, "Omitting {}", query); - continue; - } - - int cnt = queryExecutor.executeQuery(query, results, params); - - logger.info(queryMarker, "{} from {}", cnt, query); - } - } - - return results; - } - - /** @see IndexQueryPriority */ - private boolean shouldOmitQuery(SearchParameters params, IndexQuery query, int resultCount) { - - var priority = query.queryPriority; - - return switch (priority) { - case BEST -> false; - case GOOD -> resultCount > params.fetchSize / 4; - case FALLBACK -> resultCount > params.fetchSize / 8; - }; - } - - private void logSearchTerms(SearchSubquery subquery, SearchIndexSearchTerms searchTerms) { - - // This logging should only be enabled in testing, as it is very verbose - // and contains sensitive information - - if (!logger.isInfoEnabled(queryMarker)) { - return; - } - - var includes = subquery.searchTermsInclude; - var advice = subquery.searchTermsAdvice; - var excludes = subquery.searchTermsExclude; - var priority = subquery.searchTermsPriority; - - for (int i = 0; i < includes.size(); i++) { - logger.info(queryMarker, "{} -> {} I", includes.get(i), - Long.toHexString(searchTerms.includes().getLong(i)) - ); - } - for (int i = 0; i < advice.size(); i++) { - logger.info(queryMarker, "{} -> {} A", advice.get(i), - Long.toHexString(searchTerms.includes().getLong(includes.size() + i)) - ); - } - for (int i = 0; i < excludes.size(); i++) { - logger.info(queryMarker, "{} -> {} E", excludes.get(i), - Long.toHexString(searchTerms.excludes().getLong(i)) - ); - } - for (int i = 0; i < priority.size(); i++) { - logger.info(queryMarker, "{} -> {} P", priority.get(i), - Long.toHexString(searchTerms.priority().getLong(i)) - ); - } - } - - private List calculateResultScores(SearchParameters params, ResultRankingContext rankingContext, TLongList resultIds) { - - final var evaluator = new IndexResultValuator(metadataService, - resultIds, - rankingContext, - params.subqueries, - params.queryParams); - - // Sort the ids for more favorable access patterns on disk - resultIds.sort(); - - // Parallel stream to calculate scores is a minor performance boost - return Arrays.stream(resultIds.toArray()) - .parallel() - .mapToObj(evaluator::calculatePreliminaryScore) - .filter(Objects::nonNull) - .collect(Collectors.toList()); - } - - private List selectBestResults(SearchParameters params, List results) { - - var domainCountFilter = new IndexResultDomainDeduplicator(params.limitByDomain); - - results.sort(Comparator.naturalOrder()); - - List resultsList = new ArrayList<>(results.size()); - - for (var item : results) { - if (domainCountFilter.test(item)) { - resultsList.add(item); - } - } - - if (!params.queryParams.domainCount().isNone()) { - // Remove items that don't meet the domain count requirement - // This isn't perfect because the domain count is calculated - // after the results are sorted - resultsList.removeIf(item -> !params.queryParams.domainCount().test(domainCountFilter.getCount(item))); - } - - if (resultsList.size() > params.limitTotal) { - // This can't be made a stream limit() operation because we need domainCountFilter - // to run over the entire list to provide accurate statistics - - resultsList.subList(params.limitTotal, resultsList.size()).clear(); - } - - // populate results with the total number of results encountered from - // the same domain so this information can be presented to the user - for (var result : resultsList) { - result.resultsFromDomain = domainCountFilter.getCount(result); - } - - return resultsList; - } - -} - diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/SearchTermsService.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/SearchTermsService.java deleted file mode 100644 index 668731b0..00000000 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/SearchTermsService.java +++ /dev/null @@ -1,87 +0,0 @@ -package nu.marginalia.index.svc; - -import com.google.inject.Singleton; -import it.unimi.dsi.fastutil.longs.LongArrayList; -import it.unimi.dsi.fastutil.longs.LongList; -import nu.marginalia.hash.MurmurHash3_128; -import nu.marginalia.index.client.model.query.SearchSubquery; -import nu.marginalia.index.index.SearchIndexSearchTerms; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -@Singleton -public class SearchTermsService { - private final Logger logger = LoggerFactory.getLogger(getClass()); - - public SearchIndexSearchTerms getSearchTerms(SearchSubquery request) { - final LongList excludes = new LongArrayList(); - final LongList includes = new LongArrayList(); - final LongList priority = new LongArrayList(); - final List coherences = new ArrayList<>(); - - if (!addEachTerm(includes, request.searchTermsInclude)) { - return new SearchIndexSearchTerms(); - } - - // This looks like a bug, but it's not - // v--- ----v - if (!addEachTerm(includes, request.searchTermsAdvice)) { - return new SearchIndexSearchTerms(); - } - - for (var coherence : request.searchTermCoherences) { - LongList parts = new LongArrayList(coherence.size()); - - if (!addEachTerm(parts, coherence)) { - return new SearchIndexSearchTerms(); - } - - coherences.add(parts); - } - - // we don't care if we can't find these: - addEachNonMandatoryTerm(excludes, request.searchTermsExclude); - addEachNonMandatoryTerm(priority, request.searchTermsPriority); - - return new SearchIndexSearchTerms(includes, excludes, priority, coherences); - } - - private boolean addEachTerm(LongList ret, List words) { - boolean success = true; - - for (var word : words) { - ret.add(getWordId(word)); - } - - return success; - } - - private void addEachNonMandatoryTerm(LongList ret, List words) { - for (var word : words) { - ret.add(getWordId(word)); - } - } - - - public Map getAllIncludeTerms(List subqueries) { - Map ret = new HashMap<>(); - - for (var subquery : subqueries) { - for (var include : subquery.searchTermsInclude) { - ret.computeIfAbsent(include, i -> getWordId(include)); - } - } - - return ret; - } - - static MurmurHash3_128 hasher = new MurmurHash3_128(); - public long getWordId(String s) { - return hasher.hashKeyword(s); - } -} diff --git a/code/services-core/index-service/src/test/java/nu/marginalia/index/service/util/DictionaryDataTest.java b/code/services-core/index-service/src/test/java/nu/marginalia/index/service/util/DictionaryDataTest.java deleted file mode 100644 index 92f134cf..00000000 --- a/code/services-core/index-service/src/test/java/nu/marginalia/index/service/util/DictionaryDataTest.java +++ /dev/null @@ -1,17 +0,0 @@ -package nu.marginalia.index.service.util; - -class DictionaryDataTest { - -// @Test -// public void testDataBankGrow2() { -// var dataBank = new DictionaryData(65535); -// for (int i = 0; i < 64; i++) { -// String s = "" + i; -// int offset = dataBank.add(s.getBytes(), i); -// System.out.println(s + " " + offset + " " + new String(dataBank.getKey(i)) + " " + dataBank.getValue(i)); -// -// Assertions.assertEquals(s, new String(dataBank.getKey(i))); -// Assertions.assertEquals(i, dataBank.getValue(i)); -// } -// } -} \ No newline at end of file diff --git a/code/services-core/index-service/src/test/java/nu/marginalia/index/service/util/DictionaryHashMapTest.java b/code/services-core/index-service/src/test/java/nu/marginalia/index/service/util/DictionaryHashMapTest.java deleted file mode 100644 index a290f33f..00000000 --- a/code/services-core/index-service/src/test/java/nu/marginalia/index/service/util/DictionaryHashMapTest.java +++ /dev/null @@ -1,58 +0,0 @@ -package nu.marginalia.index.service.util; - -class DictionaryHashMapTest { -// -// @Test -// public void testDictionaryHashMap() { -// var dhm = new DictionaryHashMap(1<<6); -// System.out.println(dhm.put("hello".getBytes(), 23)); -// System.out.println(dhm.put("hello".getBytes(), 23)); -// System.out.println(dhm.put("world".getBytes(), 54)); -// assertEquals(23, dhm.get("hello".getBytes())); -// assertEquals(54, dhm.get("world".getBytes())); -// -// } -// -// @Test -// public void testDictionaryHashMapMissing() { -// var dhm = new DictionaryHashMap(1<<8); -// assertEquals(DictionaryHashMap.NO_VALUE, dhm.get(new byte[] { 1,2,3})); -// -// } -// -// @Test -// public void randomTest() { -// Set strings = new HashSet<>(); -// var dhm = new DictionaryHashMap(1<<14); -// -// for (int i = 0; i < 10000; i++) { -// strings.add(Double.toString(Math.random())); -// } -// -// for (String s : strings) { -// dhm.put(s.getBytes(), s.hashCode()); -// } -// -// for (String s : strings) { -// assertEquals(s.hashCode(), dhm.get(s.getBytes())); -// } -// -// assertEquals(strings.size(), dhm.size()); -// } -// -// @Test -// public void fillHerUp2() { -// var dhm = new DictionaryHashMap(1<<13); -// -// try { -// for (int i = 0; i < 10000; i++) { -// dhm.put(Double.toString(Math.random()).getBytes(), i); -// } -// Assertions.fail("Expected exception"); -// } -// catch (IllegalStateException ex) { -// ex.printStackTrace(); -// } -// } - -} \ No newline at end of file diff --git a/code/services-core/query-service/build.gradle b/code/services-core/query-service/build.gradle index 8f7cea9a..b4e2a179 100644 --- a/code/services-core/query-service/build.gradle +++ b/code/services-core/query-service/build.gradle @@ -1,9 +1,9 @@ plugins { id 'java' - id 'com.palantir.docker' version '0.35.0' id 'application' id 'jvm-test-suite' + id 'com.google.cloud.tools.jib' version '3.4.0' } application { @@ -13,7 +13,8 @@ application { tasks.distZip.enabled = false -apply from: "$rootProject.projectDir/docker-service.gradle" +apply from: "$rootProject.projectDir/docker.gradle" + java { toolchain { @@ -21,20 +22,22 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:config') implementation project(':code:common:model') implementation project(':code:common:db') implementation project(':code:common:service') implementation project(':code:common:renderer') - implementation project(':code:common:service-client') - implementation project(':code:api:index-api') - implementation project(':code:api:query-api') + implementation project(':code:index:api') implementation project(':code:common:service-discovery') - implementation project(':code:features-qs:query-parser') - implementation project(':code:features-index:index-query') - implementation project(':code:libraries:language-processing') - implementation project(':code:libraries:term-frequency-dict') + implementation project(':code:index:query') + + implementation project(':code:functions:search-query') + implementation project(':code:functions:search-query:api') + implementation project(':code:functions:link-graph:api') + implementation project(':code:functions:link-graph:aggregate') implementation libs.bundles.slf4j @@ -44,7 +47,6 @@ dependencies { implementation libs.notnull implementation libs.guice implementation libs.protobuf - implementation libs.rxjava implementation libs.bundles.mariadb implementation libs.bundles.grpc diff --git a/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryBasicInterface.java b/code/services-core/query-service/java/nu/marginalia/query/QueryBasicInterface.java similarity index 57% rename from code/services-core/query-service/src/main/java/nu/marginalia/query/QueryBasicInterface.java rename to code/services-core/query-service/java/nu/marginalia/query/QueryBasicInterface.java index 6558f1b8..dc0ae2a0 100644 --- a/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryBasicInterface.java +++ b/code/services-core/query-service/java/nu/marginalia/query/QueryBasicInterface.java @@ -2,14 +2,12 @@ package nu.marginalia.query; import com.google.gson.Gson; import com.google.inject.Inject; -import nu.marginalia.client.Context; -import nu.marginalia.index.client.IndexClient; +import nu.marginalia.functions.searchquery.QueryGRPCService; import nu.marginalia.index.query.limit.QueryLimits; import nu.marginalia.model.gson.GsonFactory; -import nu.marginalia.query.model.QueryParams; +import nu.marginalia.api.searchquery.model.query.QueryParams; import nu.marginalia.renderer.MustacheRenderer; import nu.marginalia.renderer.RendererFactory; -import nu.marginalia.query.svc.QueryFactory; import spark.Request; import spark.Response; @@ -18,23 +16,17 @@ import java.util.Map; public class QueryBasicInterface { private final MustacheRenderer renderer; - private final NodeConfigurationWatcher nodeConfigurationWatcher; - private final IndexClient indexClient; - private final QueryFactory queryFactory; private final Gson gson = GsonFactory.get(); + private final QueryGRPCService queryGRPCService; + @Inject public QueryBasicInterface(RendererFactory rendererFactory, - NodeConfigurationWatcher nodeConfigurationWatcher, - IndexClient indexClient, - QueryFactory queryFactory - ) throws IOException + QueryGRPCService queryGRPCService + ) throws IOException { this.renderer = rendererFactory.renderer("search"); - - this.nodeConfigurationWatcher = nodeConfigurationWatcher; - this.indexClient = indexClient; - this.queryFactory = queryFactory; + this.queryGRPCService = queryGRPCService; } public Object handle(Request request, Response response) { @@ -47,24 +39,20 @@ public class QueryBasicInterface { int domainCount = request.queryParams("domainCount") == null ? 5 : Integer.parseInt(request.queryParams("domainCount")); String set = request.queryParams("set") == null ? "" : request.queryParams("set"); - var query = queryFactory.createQuery(new QueryParams(queryParam, new QueryLimits( + var params = new QueryParams(queryParam, new QueryLimits( domainCount, count, 250, 8192 - ), set)); + ), set); - var rsp = indexClient.query( - Context.fromRequest(request), - nodeConfigurationWatcher.getQueryNodes(), - query.specs - ); + var results = queryGRPCService.executeDirect(queryParam, params, count); if (request.headers("Accept").contains("application/json")) { response.type("application/json"); - return gson.toJson(rsp); + return gson.toJson(results); } else { return renderer.render( Map.of("query", queryParam, - "results", rsp.results) + "results", results) ); } } diff --git a/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryMain.java b/code/services-core/query-service/java/nu/marginalia/query/QueryMain.java similarity index 84% rename from code/services-core/query-service/src/main/java/nu/marginalia/query/QueryMain.java rename to code/services-core/query-service/java/nu/marginalia/query/QueryMain.java index f3f3d561..4034efde 100644 --- a/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryMain.java +++ b/code/services-core/query-service/java/nu/marginalia/query/QueryMain.java @@ -4,7 +4,7 @@ import com.google.inject.Guice; import com.google.inject.Inject; import com.google.inject.Injector; import nu.marginalia.service.MainClass; -import nu.marginalia.service.SearchServiceDescriptors; +import nu.marginalia.service.ServiceDiscoveryModule; import nu.marginalia.service.id.ServiceId; import nu.marginalia.service.module.ServiceConfigurationModule; import nu.marginalia.service.module.DatabaseModule; @@ -24,7 +24,8 @@ public class QueryMain extends MainClass { Injector injector = Guice.createInjector( new QueryModule(), new DatabaseModule(false), - new ServiceConfigurationModule(SearchServiceDescriptors.descriptors, ServiceId.Query) + new ServiceDiscoveryModule(), + new ServiceConfigurationModule(ServiceId.Query) ); injector.getInstance(QueryMain.class); diff --git a/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryModule.java b/code/services-core/query-service/java/nu/marginalia/query/QueryModule.java similarity index 100% rename from code/services-core/query-service/src/main/java/nu/marginalia/query/QueryModule.java rename to code/services-core/query-service/java/nu/marginalia/query/QueryModule.java diff --git a/code/services-core/query-service/java/nu/marginalia/query/QueryService.java b/code/services-core/query-service/java/nu/marginalia/query/QueryService.java new file mode 100644 index 00000000..b7dcc04c --- /dev/null +++ b/code/services-core/query-service/java/nu/marginalia/query/QueryService.java @@ -0,0 +1,42 @@ +package nu.marginalia.query; + +import com.google.inject.Inject; +import lombok.SneakyThrows; +import nu.marginalia.linkgraph.AggregateLinkGraphService; +import nu.marginalia.functions.searchquery.QueryGRPCService; +import nu.marginalia.service.discovery.property.ServicePartition; +import nu.marginalia.service.server.BaseServiceParams; +import nu.marginalia.service.server.Service; +import spark.Spark; + +import java.io.IOException; +import java.util.List; + +public class QueryService extends Service { + + @SneakyThrows + @Inject + public QueryService(BaseServiceParams params, + AggregateLinkGraphService domainLinksService, + QueryGRPCService queryGRPCService, + QueryBasicInterface queryBasicInterface) + { + super(params, + () -> Spark.staticFileLocation("/static/"), + ServicePartition.any(), + List.of(queryGRPCService, domainLinksService)); + + + Spark.get("/public/search", queryBasicInterface::handle); + + Spark.exception(Exception.class, (e, request, response) -> { + response.status(500); + try { + e.printStackTrace(response.raw().getWriter()); + } catch (IOException ex) { + throw new RuntimeException(ex); + } + }); + } + +} diff --git a/code/services-core/query-service/readme.md b/code/services-core/query-service/readme.md index 1c9d2b66..0aa07c05 100644 --- a/code/services-core/query-service/readme.md +++ b/code/services-core/query-service/readme.md @@ -13,13 +13,10 @@ specific stuff. This mode of operations is available through a `barebones` inst The web interface also offers a JSON API for machine-based queries. -## Main Classes +## Central Classes -* [QueryService](src/main/java/nu/marginalia/query/QueryService.java) - The REST service implementation -* [QueryGRPCService](src/main/java/nu/marginalia/query/QueryGRPCService.java) - The GRPC service implementation +This module is almost entirely boilerplate, except the [QueryBasicInterface](java/nu/marginalia/query/QueryBasicInterface.java) +class, which offers a REST API for querying the index. -## See Also - -* [api/query-api](../../api/query-api) -* [features-qs/query-parser](../../features-qs/query-parser) -* [features-index/index-query](../../features-index/index-query) \ No newline at end of file +Much of the guts of the query service are in the [query-service](../../functions/search-query) +module; which offers query parsing and an interface to the index service partitions. diff --git a/code/services-core/query-service/src/main/resources/static/public/index.html b/code/services-core/query-service/resources/static/public/index.html similarity index 100% rename from code/services-core/query-service/src/main/resources/static/public/index.html rename to code/services-core/query-service/resources/static/public/index.html diff --git a/code/services-core/query-service/src/main/resources/templates/search.hdb b/code/services-core/query-service/resources/templates/search.hdb similarity index 100% rename from code/services-core/query-service/src/main/resources/templates/search.hdb rename to code/services-core/query-service/resources/templates/search.hdb diff --git a/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryGRPCDomainLinksService.java b/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryGRPCDomainLinksService.java deleted file mode 100644 index a2b6b780..00000000 --- a/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryGRPCDomainLinksService.java +++ /dev/null @@ -1,102 +0,0 @@ -package nu.marginalia.query; - -import com.google.inject.Inject; -import io.grpc.ManagedChannel; -import io.grpc.stub.StreamObserver; -import nu.marginalia.client.grpc.GrpcChannelPool; -import nu.marginalia.index.api.IndexDomainLinksApiGrpc; -import nu.marginalia.index.api.RpcDomainIdCount; -import nu.marginalia.index.api.RpcDomainIdList; -import nu.marginalia.index.api.RpcDomainIdPairs; -import nu.marginalia.service.id.ServiceId; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.List; - - -public class QueryGRPCDomainLinksService extends IndexDomainLinksApiGrpc.IndexDomainLinksApiImplBase { - private static final Logger logger = LoggerFactory.getLogger(QueryGRPCDomainLinksService.class); - private final GrpcChannelPool channelPool; - - @Inject - public QueryGRPCDomainLinksService(NodeConfigurationWatcher nodeConfigurationWatcher) { - channelPool = new GrpcChannelPool<>(ServiceId.Index) { - @Override - public IndexDomainLinksApiGrpc.IndexDomainLinksApiBlockingStub createStub(ManagedChannel channel) { - return IndexDomainLinksApiGrpc.newBlockingStub(channel); - } - - @Override - public List getEligibleNodes() { - return nodeConfigurationWatcher.getQueryNodes(); - } - }; - } - - @Override - public void getAllLinks(nu.marginalia.index.api.Empty request, - StreamObserver responseObserver) { - channelPool.callEachSequential(stub -> stub.getAllLinks(request)) - .forEach( - iter -> iter.forEachRemaining(responseObserver::onNext) - ); - - responseObserver.onCompleted(); - } - - @Override - public void getLinksFromDomain(nu.marginalia.index.api.RpcDomainId request, - StreamObserver responseObserver) { - var rspBuilder = RpcDomainIdList.newBuilder(); - - channelPool.callEachSequential(stub -> stub.getLinksFromDomain(request)) - .map(RpcDomainIdList::getDomainIdList) - .forEach(rspBuilder::addAllDomainId); - - responseObserver.onNext(rspBuilder.build()); - responseObserver.onCompleted(); - } - - @Override - public void getLinksToDomain(nu.marginalia.index.api.RpcDomainId request, - StreamObserver responseObserver) { - var rspBuilder = RpcDomainIdList.newBuilder(); - - channelPool.callEachSequential(stub -> stub.getLinksToDomain(request)) - .map(RpcDomainIdList::getDomainIdList) - .forEach(rspBuilder::addAllDomainId); - - responseObserver.onNext(rspBuilder.build()); - responseObserver.onCompleted(); - } - - @Override - public void countLinksFromDomain(nu.marginalia.index.api.RpcDomainId request, - StreamObserver responseObserver) { - - int sum = channelPool.callEachSequential(stub -> stub.countLinksFromDomain(request)) - .mapToInt(RpcDomainIdCount::getIdCount) - .sum(); - - var rspBuilder = RpcDomainIdCount.newBuilder(); - rspBuilder.setIdCount(sum); - responseObserver.onNext(rspBuilder.build()); - responseObserver.onCompleted(); - } - - @Override - public void countLinksToDomain(nu.marginalia.index.api.RpcDomainId request, - io.grpc.stub.StreamObserver responseObserver) { - - int sum = channelPool.callEachSequential(stub -> stub.countLinksToDomain(request)) - .mapToInt(RpcDomainIdCount::getIdCount) - .sum(); - - var rspBuilder = RpcDomainIdCount.newBuilder(); - rspBuilder.setIdCount(sum); - responseObserver.onNext(rspBuilder.build()); - responseObserver.onCompleted(); - } - -} diff --git a/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryService.java b/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryService.java deleted file mode 100644 index bd5eaebd..00000000 --- a/code/services-core/query-service/src/main/java/nu/marginalia/query/QueryService.java +++ /dev/null @@ -1,122 +0,0 @@ -package nu.marginalia.query; - -import com.google.gson.Gson; -import com.google.inject.Inject; -import io.grpc.ServerBuilder; -import io.prometheus.client.Histogram; -import nu.marginalia.client.Context; -import nu.marginalia.db.DomainBlacklist; -import nu.marginalia.index.client.IndexClient; -import nu.marginalia.index.client.model.query.SearchSpecification; -import nu.marginalia.index.client.model.results.DecoratedSearchResultItem; -import nu.marginalia.index.client.model.results.SearchResultSet; -import nu.marginalia.query.model.QueryParams; -import nu.marginalia.query.model.QueryResponse; -import nu.marginalia.query.svc.QueryFactory; -import nu.marginalia.service.server.BaseServiceParams; -import nu.marginalia.service.server.Service; -import spark.Request; -import spark.Response; -import spark.Spark; - -import java.io.IOException; -import java.util.List; - -public class QueryService extends Service { - - private final IndexClient indexClient; - private final NodeConfigurationWatcher nodeWatcher; - private final Gson gson; - private final DomainBlacklist blacklist; - private final QueryFactory queryFactory; - - private static final Histogram wmsa_qs_query_time_rest = Histogram.build() - .name("wmsa_qs_query_time_rest") - .linearBuckets(0.05, 0.05, 15) - .help("QS-side query time (REST endpoint)") - .register(); - - - @Inject - public QueryService(BaseServiceParams params, - IndexClient indexClient, - NodeConfigurationWatcher nodeWatcher, - QueryGRPCDomainLinksService domainLinksService, - QueryGRPCService queryGRPCService, - Gson gson, - DomainBlacklist blacklist, - QueryBasicInterface queryBasicInterface, - QueryFactory queryFactory) throws IOException - { - super(params, () -> { - Spark.staticFileLocation("/static/"); - }); - - this.indexClient = indexClient; - this.nodeWatcher = nodeWatcher; - this.gson = gson; - this.blacklist = blacklist; - this.queryFactory = queryFactory; - - var grpcServer = ServerBuilder.forPort(params.configuration.port() + 1) - .addService(queryGRPCService) - .addService(domainLinksService) - .build(); - grpcServer.start(); - - Spark.post("/delegate/", this::delegateToIndex, gson::toJson); - Spark.post("/search/", this::search, gson::toJson); - - Spark.get("/public/search", queryBasicInterface::handle); - - Spark.exception(Exception.class, (e, request, response) -> { - response.status(500); - try { - e.printStackTrace(response.raw().getWriter()); - } catch (IOException ex) { - throw new RuntimeException(ex); - } - }); - } - - private Object search(Request request, Response response) { - return wmsa_qs_query_time_rest.time(() -> { - String json = request.body(); - QueryParams params = gson.fromJson(json, QueryParams.class); - - var query = queryFactory.createQuery(params); - var rsp = executeQuery(Context.fromRequest(request), query.specs); - - rsp.results.removeIf(this::isBlacklisted); - - response.type("application/json"); - - return new QueryResponse( - query.specs, - rsp.results, - query.searchTermsHuman, - List.of(), // no problems - query.domain - ); - }); - } - - private SearchResultSet delegateToIndex(Request request, Response response) { - String json = request.body(); - SearchSpecification specsSet = gson.fromJson(json, SearchSpecification.class); - - response.type("application/json"); - - return executeQuery(Context.fromRequest(request), specsSet); - } - - private SearchResultSet executeQuery(Context ctx, SearchSpecification query) { - var nodes = nodeWatcher.getQueryNodes(); - - return indexClient.query(ctx, nodes, query); - } - - private boolean isBlacklisted(DecoratedSearchResultItem item) { - return blacklist.isBlacklisted(item.domainId()); - } -} diff --git a/code/tools/crawl-data-unfcker/build.gradle b/code/tools/crawl-data-unfcker/build.gradle index 6673eab6..40ec3bcb 100644 --- a/code/tools/crawl-data-unfcker/build.gradle +++ b/code/tools/crawl-data-unfcker/build.gradle @@ -19,6 +19,8 @@ application { tasks.distZip.enabled = false +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':third-party:rdrpostagger') implementation project(':third-party:porterstemmer') diff --git a/code/tools/crawl-data-unfcker/src/main/java/nu/marginalia/tools/CrawlDataUnfcker.java b/code/tools/crawl-data-unfcker/java/nu/marginalia/tools/CrawlDataUnfcker.java similarity index 100% rename from code/tools/crawl-data-unfcker/src/main/java/nu/marginalia/tools/CrawlDataUnfcker.java rename to code/tools/crawl-data-unfcker/java/nu/marginalia/tools/CrawlDataUnfcker.java diff --git a/code/tools/experiment-runner/build.gradle b/code/tools/experiment-runner/build.gradle index 44494949..36001e0b 100644 --- a/code/tools/experiment-runner/build.gradle +++ b/code/tools/experiment-runner/build.gradle @@ -19,6 +19,8 @@ application { tasks.distZip.enabled = false +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':third-party:rdrpostagger') implementation project(':third-party:porterstemmer') diff --git a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/Experiment.java b/code/tools/experiment-runner/java/nu/marginalia/tools/Experiment.java similarity index 90% rename from code/tools/experiment-runner/src/main/java/nu/marginalia/tools/Experiment.java rename to code/tools/experiment-runner/java/nu/marginalia/tools/Experiment.java index f08ccf53..b5f9ff40 100644 --- a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/Experiment.java +++ b/code/tools/experiment-runner/java/nu/marginalia/tools/Experiment.java @@ -1,7 +1,6 @@ package nu.marginalia.tools; import nu.marginalia.crawling.io.SerializableCrawlDataStream; -import nu.marginalia.crawling.model.CrawledDomain; import java.io.IOException; import java.util.HashSet; @@ -14,7 +13,7 @@ public abstract class Experiment { for (String domain : args) { domains.add(domain.toLowerCase()); } - }; + } public abstract boolean process(SerializableCrawlDataStream dataStream) throws IOException; diff --git a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/ExperimentRunnerMain.java b/code/tools/experiment-runner/java/nu/marginalia/tools/ExperimentRunnerMain.java similarity index 100% rename from code/tools/experiment-runner/src/main/java/nu/marginalia/tools/ExperimentRunnerMain.java rename to code/tools/experiment-runner/java/nu/marginalia/tools/ExperimentRunnerMain.java diff --git a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/LegacyExperiment.java b/code/tools/experiment-runner/java/nu/marginalia/tools/LegacyExperiment.java similarity index 100% rename from code/tools/experiment-runner/src/main/java/nu/marginalia/tools/LegacyExperiment.java rename to code/tools/experiment-runner/java/nu/marginalia/tools/LegacyExperiment.java diff --git a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/AdblockExperiment.java b/code/tools/experiment-runner/java/nu/marginalia/tools/experiments/AdblockExperiment.java similarity index 96% rename from code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/AdblockExperiment.java rename to code/tools/experiment-runner/java/nu/marginalia/tools/experiments/AdblockExperiment.java index c5cb8dda..70856439 100644 --- a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/AdblockExperiment.java +++ b/code/tools/experiment-runner/java/nu/marginalia/tools/experiments/AdblockExperiment.java @@ -5,7 +5,6 @@ import nu.marginalia.adblock.AdblockSimulator; import nu.marginalia.converting.processor.DocumentProcessor; import nu.marginalia.crawling.model.CrawledDocument; import nu.marginalia.crawling.model.CrawledDomain; -import nu.marginalia.tools.Experiment; import nu.marginalia.tools.LegacyExperiment; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; diff --git a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/AtagsExperiment.java b/code/tools/experiment-runner/java/nu/marginalia/tools/experiments/AtagsExperiment.java similarity index 100% rename from code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/AtagsExperiment.java rename to code/tools/experiment-runner/java/nu/marginalia/tools/experiments/AtagsExperiment.java diff --git a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/DebugConverterExperiment.java b/code/tools/experiment-runner/java/nu/marginalia/tools/experiments/DebugConverterExperiment.java similarity index 96% rename from code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/DebugConverterExperiment.java rename to code/tools/experiment-runner/java/nu/marginalia/tools/experiments/DebugConverterExperiment.java index a7866193..8290a658 100644 --- a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/DebugConverterExperiment.java +++ b/code/tools/experiment-runner/java/nu/marginalia/tools/experiments/DebugConverterExperiment.java @@ -4,7 +4,6 @@ import com.google.inject.Inject; import nu.marginalia.converting.processor.DomainProcessor; import nu.marginalia.converting.processor.plugin.specialization.BlogSpecialization; import nu.marginalia.crawling.model.CrawledDomain; -import nu.marginalia.tools.Experiment; import nu.marginalia.tools.LegacyExperiment; import org.jsoup.Jsoup; diff --git a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/ExportExternalLinksExperiment.java b/code/tools/experiment-runner/java/nu/marginalia/tools/experiments/ExportExternalLinksExperiment.java similarity index 100% rename from code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/ExportExternalLinksExperiment.java rename to code/tools/experiment-runner/java/nu/marginalia/tools/experiments/ExportExternalLinksExperiment.java diff --git a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/SentenceStatisticsExperiment.java b/code/tools/experiment-runner/java/nu/marginalia/tools/experiments/SentenceStatisticsExperiment.java similarity index 98% rename from code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/SentenceStatisticsExperiment.java rename to code/tools/experiment-runner/java/nu/marginalia/tools/experiments/SentenceStatisticsExperiment.java index f804ac2d..8614d1e6 100644 --- a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/SentenceStatisticsExperiment.java +++ b/code/tools/experiment-runner/java/nu/marginalia/tools/experiments/SentenceStatisticsExperiment.java @@ -9,7 +9,6 @@ import nu.marginalia.keyword.DocumentKeywordExtractor; import nu.marginalia.language.sentence.SentenceExtractor; import nu.marginalia.model.EdgeUrl; import nu.marginalia.term_frequency_dict.TermFrequencyDict; -import nu.marginalia.tools.Experiment; import nu.marginalia.tools.LegacyExperiment; import org.jsoup.Jsoup; diff --git a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/SiteStatisticsExperiment.java b/code/tools/experiment-runner/java/nu/marginalia/tools/experiments/SiteStatisticsExperiment.java similarity index 100% rename from code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/SiteStatisticsExperiment.java rename to code/tools/experiment-runner/java/nu/marginalia/tools/experiments/SiteStatisticsExperiment.java diff --git a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/TestExperiment.java b/code/tools/experiment-runner/java/nu/marginalia/tools/experiments/TestExperiment.java similarity index 90% rename from code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/TestExperiment.java rename to code/tools/experiment-runner/java/nu/marginalia/tools/experiments/TestExperiment.java index 65991ea4..521b36e8 100644 --- a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/TestExperiment.java +++ b/code/tools/experiment-runner/java/nu/marginalia/tools/experiments/TestExperiment.java @@ -1,7 +1,6 @@ package nu.marginalia.tools.experiments; import nu.marginalia.crawling.model.CrawledDomain; -import nu.marginalia.tools.Experiment; import nu.marginalia.tools.LegacyExperiment; public class TestExperiment extends LegacyExperiment { diff --git a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/TopicExperiment.java b/code/tools/experiment-runner/java/nu/marginalia/tools/experiments/TopicExperiment.java similarity index 100% rename from code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/TopicExperiment.java rename to code/tools/experiment-runner/java/nu/marginalia/tools/experiments/TopicExperiment.java diff --git a/code/tools/load-test/build.gradle b/code/tools/load-test/build.gradle index e8b363c8..744333c8 100644 --- a/code/tools/load-test/build.gradle +++ b/code/tools/load-test/build.gradle @@ -11,6 +11,8 @@ java { } } +apply from: "$rootProject.projectDir/srcsets.gradle" + dependencies { implementation project(':code:common:config') implementation project(':code:common:model') diff --git a/code/tools/load-test/src/main/java/nu/marginalia/load_test/LoadTestMain.java b/code/tools/load-test/java/nu/marginalia/load_test/LoadTestMain.java similarity index 100% rename from code/tools/load-test/src/main/java/nu/marginalia/load_test/LoadTestMain.java rename to code/tools/load-test/java/nu/marginalia/load_test/LoadTestMain.java diff --git a/code/tools/screenshot-capture-tool/build.gradle b/code/tools/screenshot-capture-tool/build.gradle index 6c1f7c67..75aef932 100644 --- a/code/tools/screenshot-capture-tool/build.gradle +++ b/code/tools/screenshot-capture-tool/build.gradle @@ -3,7 +3,7 @@ plugins { id 'application' id 'jvm-test-suite' - id 'com.palantir.docker' version '0.35.0' + id 'com.google.cloud.tools.jib' version '3.4.0' } java { @@ -17,10 +17,12 @@ application { applicationName = 'screenshot-capture-tool' } -apply from: "$rootProject.projectDir/docker-service.gradle" tasks.distZip.enabled = false +apply from: "$rootProject.projectDir/srcsets.gradle" +apply from: "$rootProject.projectDir/docker.gradle" + dependencies { implementation project(':code:common:model') implementation project(':code:features-search:screenshots') diff --git a/code/tools/screenshot-capture-tool/src/main/java/nu/marginalia/screenshot/ScreenshotCaptureToolMain.java b/code/tools/screenshot-capture-tool/java/nu/marginalia/screenshot/ScreenshotCaptureToolMain.java similarity index 100% rename from code/tools/screenshot-capture-tool/src/main/java/nu/marginalia/screenshot/ScreenshotCaptureToolMain.java rename to code/tools/screenshot-capture-tool/java/nu/marginalia/screenshot/ScreenshotCaptureToolMain.java diff --git a/code/tools/screenshot-capture-tool/src/main/java/nu/marginalia/screenshot/ScreenshotLoaderMain.java b/code/tools/screenshot-capture-tool/java/nu/marginalia/screenshot/ScreenshotLoaderMain.java similarity index 100% rename from code/tools/screenshot-capture-tool/src/main/java/nu/marginalia/screenshot/ScreenshotLoaderMain.java rename to code/tools/screenshot-capture-tool/java/nu/marginalia/screenshot/ScreenshotLoaderMain.java diff --git a/docker-service-with-dist.gradle b/docker-service-with-dist.gradle deleted file mode 100644 index 4beb9ac2..00000000 --- a/docker-service-with-dist.gradle +++ /dev/null @@ -1,69 +0,0 @@ -ext { - dockerImage='openjdk:21-slim' -} - -tasks.register('dockerFile') { - buildDir.mkdir() - - var df = new File(buildDir, "Dockerfile") - doLast { - df.text = """# -# I'm auto-generated, please don't make changes to me or commit me to git -# -# The template exists in docker-service.gradle -# -FROM ${dockerImage} - -RUN apt-get update && apt-get install -y curl - -ADD ${application.applicationName}.tar / -ADD crawler-process.tar /dist -ADD loader-process.tar /dist -ADD converter-process.tar /dist -ADD website-adjacencies-calculator.tar /dist -ADD index-construction-process.tar /dist - -RUN mkdir /wmsa - -ENTRYPOINT WMSA_HOME=/wmsa /${application.applicationName}/bin/${application.applicationName} \${arg0} \${arg1} -""" - } - it.outputs.file(df) -} - -dockerPrepare { - dependsOn tasks.dockerFile - - dependsOn project(':code:processes:website-adjacencies-calculator').distTar - dependsOn project(':code:processes:crawling-process').distTar - dependsOn project(':code:processes:loading-process').distTar - dependsOn project(':code:processes:converting-process').distTar - dependsOn project(':code:processes:index-constructor-process').distTar -} - -dockerfileZip { - dependsOn tasks.dockerFile -} - -docker { - dockerfile = tasks.dockerFile.outputs.files.singleFile - - var registry = project.hasProperty('docker-registry') ? project.property('docker-registry') : 'marginalia' - var tagName = project.hasProperty('docker-tag') ? project.property('docker-tag') : 'latest' - - name = registry+'/'+application.applicationName+':'+tagName - tag 'test', (registry+'/'+application.applicationName+':'+tagName) - - files tasks.distTar.outputs, \ - project(':code:processes:crawling-process').distTar.outputs, \ - project(':code:processes:loading-process').distTar.outputs, \ - project(':code:processes:converting-process').distTar.outputs, \ - project(':code:processes:index-constructor-process').distTar.outputs, \ - project(':code:processes:website-adjacencies-calculator').distTar.outputs - - dependsOn project(':code:processes:crawling-process').distTar - dependsOn project(':code:processes:loading-process').distTar - dependsOn project(':code:processes:converting-process').distTar - dependsOn project(':code:processes:index-constructor-process').distTar - dependsOn project(':code:processes:website-adjacencies-calculator').distTar -} diff --git a/docker-service.gradle b/docker-service.gradle deleted file mode 100644 index 9ac559c6..00000000 --- a/docker-service.gradle +++ /dev/null @@ -1,47 +0,0 @@ -ext { - dockerImage='openjdk:21-slim' -} - -tasks.register('dockerFile') { - buildDir.mkdir() - - var df = new File(buildDir, "Dockerfile") - doLast { - df.text = """# -# I'm auto-generated, please don't make changes to me or commit me to git -# -# The template exists in docker-service.gradle -# -FROM ${dockerImage} - -RUN apt-get update && apt-get install -y curl -ADD ${application.applicationName}.tar / -RUN mkdir /wmsa - -ENTRYPOINT WMSA_HOME=/wmsa /${application.applicationName}/bin/${application.applicationName} \${arg0} \${arg1} -""" - } - it.outputs.file(df) -} - -dockerPrepare { - dependsOn tasks.dockerFile -} - -dockerfileZip { - dependsOn tasks.dockerFile -} - - -docker { - dockerfile = tasks.dockerFile.outputs.files.singleFile - - var registry = project.hasProperty('docker-registry') ? project.property('docker-registry') : 'marginalia' - var tagName = project.hasProperty('docker-tag') ? project.property('docker-tag') : 'latest' - - name = registry+'/'+application.applicationName+':'+tagName - tag 'test', (registry+'/'+application.applicationName+':'+tagName) - - files tasks.distTar.outputs - dependsOn tasks.distTar -} diff --git a/docker.gradle b/docker.gradle new file mode 100644 index 00000000..b3feccb5 --- /dev/null +++ b/docker.gradle @@ -0,0 +1,18 @@ +jib { + from { + image = image = rootProject.ext.dockerImageBase + } + to { + image = rootProject.ext.dockerImageRegistry+'/'+project.name + tags = ['latest'] + } + container { + mainClass = application.mainClass + jvmFlags = ['-Dservice.bind-address=0.0.0.0', '-Dservice.useDockerHostname=TRUE', '-Dsystem.homePath=/wmsa'] + volumes = ['/wmsa/conf', '/wmsa/model', '/wmsa/data', '/var/log/wmsa'] + } +} + +tasks.register('docker', { + dependsOn('jibDockerBuild') +}) diff --git a/protobuf.gradle b/protobuf.gradle index af2f5317..d8eec78e 100644 --- a/protobuf.gradle +++ b/protobuf.gradle @@ -26,4 +26,14 @@ protobuf { } } } -} \ No newline at end of file +} + +tasks.compileJava.dependsOn('generateProto') + +sourceSets { + main { + proto { + srcDirs += 'src/main/protobuf' + } + } +} diff --git a/run/env/service.env b/run/env/service.env index 8a1e311f..f14a93cc 100644 --- a/run/env/service.env +++ b/run/env/service.env @@ -1,8 +1,7 @@ -WMSA_HOME=run/ -JAVA_OPTS="--enable-preview -da -Dservice-host=0.0.0.0 -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=4000 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false" +JDK_JAVA_OPTIONS="--enable-preview -da -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=4000 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false" JAVA_TOOL_OPTIONS="--enable-preview -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5000" -EXECUTOR_SERVICE_OPTS="-DdistPath=/dist" -CONVERTER_PROCESS_OPTS="-Dservice-name=converter -Dservice-host=0.0.0.0" -CRAWLER_PROCESS_OPTS="-Dservice-name=crawler -Dservice-host=0.0.0.0" -LOADER_PROCESS_OPTS="-Dservice-name=loader -Dservice-host=0.0.0.0" -INDEX_CONSTRUCTION_PROCESS_OPTS="-Dservice-name=index-constructor -Djava.util.concurrent.ForkJoinPool.common.parallelism=4" \ No newline at end of file +CONVERTER_PROCESS_OPTS="-Dservice-name=converter" +CRAWLER_PROCESS_OPTS="-Dservice-name=crawler" +LOADER_PROCESS_OPTS="-Dservice-name=loader" +INDEX_CONSTRUCTION_PROCESS_OPTS="-Dservice-name=index-constructor -Djava.util.concurrent.ForkJoinPool.common.parallelism=4" +ZOOKEEPER_HOSTS="zookeeper:2181" diff --git a/run/install/docker-compose-barebones-1.yml.template b/run/install/docker-compose-barebones-1.yml.template index 1a08cc31..8b36d64e 100644 --- a/run/install/docker-compose-barebones-1.yml.template +++ b/run/install/docker-compose-barebones-1.yml.template @@ -8,21 +8,12 @@ x-svc: &service - logs:/var/log/wmsa networks: - wmsa - healthcheck: - test: curl -f http://localhost:80/internal/ping || exit 1 - start_period: 1s - interval: 5s - timeout: 5s - retries: 60 + depends_on: + - mariadb + - zookeeper x-p1: &partition-1 env_file: - "${INSTALL_DIR}/env/service.env" - healthcheck: - test: curl -f http://localhost:80/internal/ping || exit 1 - start_period: 1s - interval: 5s - timeout: 5s - retries: 60 volumes: - conf:/wmsa/conf:ro - model:/wmsa/model @@ -37,28 +28,22 @@ x-p1: &partition-1 - wmsa environment: - "WMSA_SERVICE_NODE=1" + depends_on: + - mariadb + - zookeeper services: index-service-1: <<: *partition-1 image: "marginalia/index-service" container_name: "index-service-1" - depends_on: - control-service: - condition: service_healthy executor-service-1: <<: *partition-1 image: "marginalia/executor-service" container_name: "executor-service-1" - depends_on: - control-service: - condition: service_healthy query-service: <<: *service image: "marginalia/query-service" container_name: "query-service" - depends_on: - control-service: - condition: service_healthy expose: - 80 labels: @@ -103,6 +88,12 @@ services: - db:/var/lib/mysql networks: - wmsa + zookeeper: + image: zookeeper + container_name: "zookeeper" + restart: always + ports: + - "127.0.0.1:2181:2181" traefik: image: "traefik:v2.10" container_name: "traefik" diff --git a/run/install/docker-compose-barebones-2.yml.template b/run/install/docker-compose-barebones-2.yml.template index 94e35d34..6232cf01 100644 --- a/run/install/docker-compose-barebones-2.yml.template +++ b/run/install/docker-compose-barebones-2.yml.template @@ -8,21 +8,12 @@ x-svc: &service - logs:/var/log/wmsa networks: - wmsa - healthcheck: - test: curl -f http://localhost:80/internal/ping || exit 1 - start_period: 1s - interval: 5s - timeout: 5s - retries: 60 + depends_on: + - mariadb + - zookeeper x-p1: &partition-1 env_file: - "${INSTALL_DIR}/env/service.env" - healthcheck: - test: curl -f http://localhost:80/internal/ping || exit 1 - start_period: 1s - interval: 5s - timeout: 5s - retries: 60 volumes: - conf:/wmsa/conf:ro - model:/wmsa/model @@ -35,17 +26,14 @@ x-p1: &partition-1 - uploads-1:/uploads networks: - wmsa + depends_on: + - mariadb + - zookeeper environment: - "WMSA_SERVICE_NODE=1" x-p2: &partition-2 env_file: - "${INSTALL_DIR}/env/service.env" - healthcheck: - test: curl -f http://localhost:80/internal/ping || exit 1 - start_period: 1s - interval: 5s - timeout: 5s - retries: 60 volumes: - conf:/wmsa/conf:ro - model:/wmsa/model @@ -58,6 +46,9 @@ x-p2: &partition-2 - uploads-2:/uploads networks: - wmsa + depends_on: + - mariadb + - zookeeper environment: - "WMSA_SERVICE_NODE=2" services: @@ -65,37 +56,22 @@ services: <<: *partition-1 image: "marginalia/index-service" container_name: "index-service-1" - depends_on: - control-service: - condition: service_healthy executor-service-1: <<: *partition-1 image: "marginalia/executor-service" container_name: "executor-service-1" - depends_on: - control-service: - condition: service_healthy index-service-2: <<: *partition-2 image: "marginalia/index-service" container_name: "index-service-2" - depends_on: - control-service: - condition: service_healthy executor-service-2: <<: *partition-2 image: "marginalia/executor-service" container_name: "executor-service-2" - depends_on: - control-service: - condition: service_healthy query-service: <<: *service image: "marginalia/query-service" container_name: "query-service" - depends_on: - control-service: - condition: service_healthy expose: - 80 labels: @@ -140,6 +116,12 @@ services: - db:/var/lib/mysql networks: - wmsa + zookeeper: + image: zookeeper + container_name: "zookeeper" + restart: always + ports: + - "127.0.0.1:2181:2181" traefik: image: "traefik:v2.10" container_name: "traefik" diff --git a/run/install/docker-compose-marginalia.yml.template b/run/install/docker-compose-marginalia.yml.template index 2adef02d..b92543e7 100644 --- a/run/install/docker-compose-marginalia.yml.template +++ b/run/install/docker-compose-marginalia.yml.template @@ -8,21 +8,12 @@ x-svc: &service - logs:/var/log/wmsa networks: - wmsa - healthcheck: - test: curl -f http://localhost:80/internal/ping || exit 1 - start_period: 1s - interval: 5s - timeout: 5s - retries: 60 + depends_on: + - mariadb + - zookeeper x-p1: &partition-1 env_file: - "${INSTALL_DIR}/env/service.env" - healthcheck: - test: curl -f http://localhost:80/internal/ping || exit 1 - start_period: 1s - interval: 5s - timeout: 5s - retries: 60 volumes: - conf:/wmsa/conf:ro - model:/wmsa/model @@ -35,17 +26,14 @@ x-p1: &partition-1 - uploads-1:/uploads networks: - wmsa + depends_on: + - mariadb + - zookeeper environment: - "WMSA_SERVICE_NODE=1" x-p2: &partition-2 env_file: - "${INSTALL_DIR}/env/service.env" - healthcheck: - test: curl -f http://localhost:80/internal/ping || exit 1 - start_period: 1s - interval: 5s - timeout: 5s - retries: 60 volumes: - conf:/wmsa/conf:ro - model:/wmsa/model @@ -58,6 +46,9 @@ x-p2: &partition-2 - uploads-2:/uploads networks: - wmsa + depends_on: + - mariadb + - zookeeper environment: - "WMSA_SERVICE_NODE=2" services: @@ -65,44 +56,26 @@ services: <<: *partition-1 image: "marginalia/index-service" container_name: "index-service-1" - depends_on: - control-service: - condition: service_healthy executor-service-1: <<: *partition-1 image: "marginalia/executor-service" container_name: "executor-service-1" - depends_on: - control-service: - condition: service_healthy index-service-2: <<: *partition-2 image: "marginalia/index-service" container_name: "index-service-2" - depends_on: - control-service: - condition: service_healthy executor-service-2: <<: *partition-2 image: "marginalia/executor-service" container_name: "executor-service-2" - depends_on: - control-service: - condition: service_healthy query-service: <<: *service image: "marginalia/query-service" container_name: "query-service" - depends_on: - control-service: - condition: service_healthy search-service: <<: *service image: "marginalia/search-service" container_name: "search-service" - depends_on: - control-service: - condition: service_healthy expose: - 80 labels: @@ -117,9 +90,6 @@ services: <<: *service image: "marginalia/assistant-service" container_name: "assistant-service" - depends_on: - control-service: - condition: service_healthy expose: - 80 labels: @@ -138,9 +108,6 @@ services: <<: *service image: "marginalia/api-service" container_name: "api-service" - depends_on: - control-service: - condition: service_healthy expose: - "80" labels: diff --git a/settings.gradle b/settings.gradle index af0cad4f..fca2091c 100644 --- a/settings.gradle +++ b/settings.gradle @@ -1,4 +1,4 @@ -rootProject.name = 'marginalia.nu' +rootProject.name = 'marginalia' include 'code:services-core:index-service' include 'code:services-core:assistant-service' @@ -11,6 +11,29 @@ include 'code:services-application:api-service' include 'code:services-application:dating-service' include 'code:services-application:explorer-service' +include 'code:functions:math' +include 'code:functions:math:api' + +include 'code:functions:domain-info' +include 'code:functions:domain-info:api' + +include 'code:functions:link-graph:partition' +include 'code:functions:link-graph:aggregate' +include 'code:functions:link-graph:api' + +include 'code:functions:search-query' +include 'code:functions:search-query:api' + +include 'code:execution' +include 'code:execution:api' + +include 'code:index' +include 'code:index:api' +include 'code:index:index-journal' +include 'code:index:query' +include 'code:index:index-forward' +include 'code:index:index-reverse' + include 'code:libraries:array' include 'code:libraries:geo-ip' include 'code:libraries:btree' @@ -30,8 +53,6 @@ include 'code:libraries:message-queue' include 'code:features-search:screenshots' include 'code:features-search:random-websites' include 'code:features-search:feedlot-client' -include 'code:features-qs:query-parser' -include 'code:features-index:result-ranking' include 'code:features-convert:adblock' include 'code:features-convert:anchor-keywords' @@ -47,21 +68,9 @@ include 'code:features-crawl:crawl-blocklist' include 'code:features-crawl:link-parser' include 'code:features-crawl:content-type' -include 'code:features-index:index-journal' -include 'code:features-index:index-query' -include 'code:features-index:index-forward' -include 'code:features-index:index-reverse' -include 'code:features-index:domain-ranking' - -include 'code:api:actor-api' -include 'code:api:query-api' -include 'code:api:index-api' -include 'code:api:assistant-api' -include 'code:api:process-mqapi' -include 'code:api:executor-api' +include 'code:process-mqapi' include 'code:common:service-discovery' -include 'code:common:service-client' include 'code:common:db' include 'code:common:linkdb' include 'code:common:service' @@ -131,7 +140,6 @@ dependencyResolutionManagement { library('grpc-protobuf', 'io.grpc', 'grpc-protobuf').version('1.49.2') library('grpc-stub', 'io.grpc', 'grpc-stub').version('1.49.2') library('grpc-netty', 'io.grpc', 'grpc-netty-shaded').version('1.49.2') - library('rxjava', 'io.reactivex.rxjava3', 'rxjava').version('3.1.6') library('prometheus', 'io.prometheus', 'simpleclient').version('0.16.0') library('prometheus-servlet', 'io.prometheus', 'simpleclient_servlet').version('0.16.0') @@ -177,7 +185,7 @@ dependencyResolutionManagement { library('bucket4j','com.github.vladimir-bukhtoyarov','bucket4j-core').version('7.5.0') library('gson','com.google.code.gson','gson').version('2.10.1') - library('gson-type-adapter','com.github.Marcono1234','gson-record-type-adapter-factory').version('0.2.0') + library('gson-type-adapter','com.github.Marcono1234','gson-record-type-adapter-factory').version('0.3.0') library('zstd','com.github.luben','zstd-jni').version('1.5.2-2') library('lz4','org.lz4','lz4-java').version('1.8.0') @@ -201,9 +209,13 @@ dependencyResolutionManagement { library('sqlite','org.xerial','sqlite-jdbc').version('3.41.2.2') library('javax.annotation','javax.annotation','javax.annotation-api').version('1.3.2') + library('parquet-column', 'org.apache.parquet','parquet-column').version('1.13.1') library('parquet-hadoop', 'org.apache.parquet','parquet-hadoop').version('1.13.1') + library('curator-framework', 'org.apache.curator','curator-framework').version('5.6.0') + library('curator-x-discovery', 'org.apache.curator','curator-x-discovery').version('5.6.0') + bundle('slf4j', ['slf4j.api', 'log4j.api', 'log4j.core', 'log4j.slf4j']) bundle('slf4j.test', ['slf4j.jdk14']) bundle('prometheus', ['prometheus', 'prometheus-servlet', 'prometheus-server', 'prometheus-hotspot']) @@ -217,9 +229,8 @@ dependencyResolutionManagement { bundle('parquet', ['parquet-column', 'parquet-hadoop']) bundle('junit', ['junit.jupiter', 'junit.jupiter.engine']) bundle('flyway', ['flyway.core', 'flyway.mysql']) + + bundle('curator', ['curator-framework', 'curator-x-discovery']) } - - } - } diff --git a/srcsets.gradle b/srcsets.gradle new file mode 100644 index 00000000..ae1e0729 --- /dev/null +++ b/srcsets.gradle @@ -0,0 +1,51 @@ +/** Configures the source sets for the code/ subprojects. + * + * Ideally this would have been done in the root build.gradle file, but due to disagreements + * between Gradle and IntelliJ's gradle plugin about how to interpret the sourceSets block + * when applied to subprojects from the root project, this has to be done in each subproject. + * */ +apply plugin: 'java' +apply plugin: 'io.freefair.lombok' + +dependencies { + implementation libs.lombok + testImplementation libs.lombok + annotationProcessor libs.lombok + + lombok libs.lombok // prevent plugin from downgrading the version to something incompatible with '19 +} + +test { + maxHeapSize = "8G" + useJUnitPlatform() +} + +tasks.register('fastTests', Test) { + maxHeapSize = "8G" + useJUnitPlatform { + excludeTags "slow" + } +} + +sourceSets { + main { + java { + srcDirs = [ + 'java', + 'build/generated/source/proto/main/grpc', + 'build/generated/source/proto/main/java' + ] + } + resources { + srcDirs = [ 'resources' ] + } + } + test { + java { + srcDirs = [ 'test' ] + } + resources { + srcDirs = [ 'test-resources' ] + } + } +} diff --git a/third-party/encyclopedia-marginalia-nu/build.gradle b/third-party/encyclopedia-marginalia-nu/build.gradle index 992f70e2..faf794ec 100644 --- a/third-party/encyclopedia-marginalia-nu/build.gradle +++ b/third-party/encyclopedia-marginalia-nu/build.gradle @@ -1,5 +1,6 @@ plugins { id 'java' + id 'io.freefair.lombok' } java { @@ -15,6 +16,12 @@ dependencies { implementation libs.zstd implementation libs.bundles.slf4j + implementation libs.lombok + testImplementation libs.lombok + annotationProcessor libs.lombok + + lombok libs.lombok // prevent plugin from downgrading the version to something incompatible with '19 + implementation project(':code:libraries:blocking-thread-pool') implementation project(':third-party:openzim') diff --git a/third-party/encyclopedia-marginalia-nu/src/main/java/nu/marginalia/encyclopedia/EncyclopediaConverter.java b/third-party/encyclopedia-marginalia-nu/src/main/java/nu/marginalia/encyclopedia/EncyclopediaConverter.java index 1920f536..e5274652 100644 --- a/third-party/encyclopedia-marginalia-nu/src/main/java/nu/marginalia/encyclopedia/EncyclopediaConverter.java +++ b/third-party/encyclopedia-marginalia-nu/src/main/java/nu/marginalia/encyclopedia/EncyclopediaConverter.java @@ -3,7 +3,6 @@ package nu.marginalia.encyclopedia; import nu.marginalia.encyclopedia.cleaner.WikiCleaner; import nu.marginalia.encyclopedia.store.ArticleDbProvider; import nu.marginalia.encyclopedia.store.ArticleStoreWriter; -import nu.marginalia.util.SimpleBlockingThreadPool; import org.openzim.ZIMTypes.ZIMFile; import org.openzim.ZIMTypes.ZIMReader; import org.slf4j.LoggerFactory; @@ -13,7 +12,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.sql.SQLException; -import java.util.concurrent.TimeUnit; +import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiConsumer; import java.util.function.Predicate; @@ -26,41 +25,35 @@ public class EncyclopediaConverter { public static void convert(Path inputFile, Path outputFile) throws IOException, SQLException, InterruptedException { var wc = new WikiCleaner(); - var pool = new SimpleBlockingThreadPool("Convert ZIM", - Math.clamp(Runtime.getRuntime().availableProcessors() - 2, 1, 32), - 2); - var size = new AtomicInteger(); - if (!Files.exists(inputFile)) { - throw new IllegalStateException("ZIM file not found: " + inputFile); - } - Files.deleteIfExists(outputFile); + try (var executor = Executors.newWorkStealingPool(Math.clamp(Runtime.getRuntime().availableProcessors() - 2, 1, 32))) { - try (var asw = new ArticleStoreWriter(new ArticleDbProvider(outputFile))) { - Predicate keepGoing = (s) -> true; + var size = new AtomicInteger(); - BiConsumer handleArticle = (url, html) -> { - if (pool.isTerminated()) - return; + if (!Files.exists(inputFile)) { + throw new IllegalStateException("ZIM file not found: " + inputFile); + } + Files.deleteIfExists(outputFile); - pool.submitQuietly(() -> { - int sz = size.incrementAndGet(); - if (sz % 1000 == 0) { - System.out.printf("\u001b[2K\r%d", sz); - } - asw.add(wc.cleanWikiJunk(url, html)); - }); + try (var asw = new ArticleStoreWriter(new ArticleDbProvider(outputFile))) { + Predicate keepGoing = (s) -> true; - size.incrementAndGet(); - }; + BiConsumer handleArticle = (url, html) -> { + if (executor.isTerminated()) + return; - new ZIMReader(new ZIMFile(inputFile.toString())).forEachArticles(handleArticle, keepGoing); + executor.submit(() -> { + int sz = size.incrementAndGet(); + if (sz % 1000 == 0) { + System.out.printf("\u001b[2K\r%d", sz); + } + asw.add(wc.cleanWikiJunk(url, html)); + }); - pool.shutDown(); - logger.info("Waiting for pool to finish"); + size.incrementAndGet(); + }; - while (!pool.awaitTermination(1, TimeUnit.SECONDS)) { - // ... + new ZIMReader(new ZIMFile(inputFile.toString())).forEachArticles(handleArticle, keepGoing); } } } diff --git a/third-party/parquet-floor/src/main/java/blue/strategic/parquet/ParquetReader.java b/third-party/parquet-floor/src/main/java/blue/strategic/parquet/ParquetReader.java index 45718fe8..0a5e8375 100644 --- a/third-party/parquet-floor/src/main/java/blue/strategic/parquet/ParquetReader.java +++ b/third-party/parquet-floor/src/main/java/blue/strategic/parquet/ParquetReader.java @@ -13,7 +13,6 @@ import org.apache.parquet.io.DelegatingSeekableInputStream; import org.apache.parquet.io.InputFile; import org.apache.parquet.io.SeekableInputStream; import org.apache.parquet.io.api.GroupConverter; -import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.PrimitiveType;