Restructuring the git repo
This commit is contained in:
parent
9c665bbc74
commit
4fdaaa16ba
3
.gitignore
vendored
3
.gitignore
vendored
@ -4,3 +4,6 @@ build/
|
||||
*~
|
||||
.gradle/
|
||||
.idea/
|
||||
lombok.config
|
||||
Dockerfile
|
||||
run
|
||||
|
33
README.md
33
README.md
@ -1,31 +1,38 @@
|
||||
# marginalia.nu
|
||||
|
||||
This is the source code for marginalia.nu, including the [search engine](https://search.marginalia.nu),
|
||||
the [MEMEX/gemini server](https://memex.marginalia.nu), the and the [encyclopedia service](https://encyclopedia.marginalia.nu).
|
||||
This is the source code for [Marginalia Search](https://search.marginalia.nu).
|
||||
|
||||
The aim of the project is to develop new and alternative discovery methods for the Internet.
|
||||
It's an experimental workshop as much as it is a public service, the overarching goal is to
|
||||
elevate the more human, non-commercial sides of the Internet. A side-goal is to do this without
|
||||
requiring datacenters and expensive enterprise hardware, to run this operation on affordable hardware.
|
||||
|
||||
The canonical git server for this project is [https://git.marginalia.nu](https://git.marginalia.nu).
|
||||
It is fine to mirror it on other hosts, but if you have issues or questions
|
||||
git.marginalia.nu is where you want to go.
|
||||
## Set up instructions
|
||||
|
||||
## Important note about wmsa.local
|
||||
For local development, you're strongly encouraged to use docker or podman.
|
||||
From a fresh to running system, you'll need to do this:
|
||||
|
||||
This project has a [sister repository called wmsa.local](https://git.marginalia.nu/marginalia/wmsa.local)
|
||||
that contains scripts and configuration files for running and developing the code.
|
||||
```
|
||||
$ ./gradlew assemble
|
||||
|
||||
Without it, development is very unpleasant.
|
||||
$ ./gradlew docker
|
||||
|
||||
While developing the code, you will want an environment variable WMSA_HOME pointing to
|
||||
the directory in which wmsa.local is checked out, otherwise the code will not run and
|
||||
several tests will fail.
|
||||
$ vim run/settings.profile
|
||||
|
||||
(follow instructions in file)
|
||||
|
||||
$ run/setup.sh
|
||||
|
||||
$ run/reconvert.sh
|
||||
|
||||
$ docker-compose up
|
||||
```
|
||||
|
||||
Wait a moment and check out [https://localhost:8080](https://localhost:8080).
|
||||
|
||||
## Documentation
|
||||
|
||||
Documentation is a work in progress. See the [wiki](https://git.marginalia.nu/marginalia/marginalia.nu/wiki).
|
||||
Documentation is a work in progress.
|
||||
|
||||
## Contributing
|
||||
|
||||
|
45
api/assistant-api/build.gradle
Normal file
45
api/assistant-api/build.gradle
Normal file
@ -0,0 +1,45 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id "io.freefair.lombok" version "5.3.3.3"
|
||||
|
||||
id 'jvm-test-suite'
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(17))
|
||||
}
|
||||
}
|
||||
dependencies {
|
||||
implementation project(':third-party')
|
||||
implementation project(':protocol')
|
||||
implementation project(':common:model')
|
||||
implementation project(':common:config')
|
||||
implementation project(':common:service-discovery')
|
||||
implementation project(':common:service-client')
|
||||
|
||||
implementation libs.lombok
|
||||
annotationProcessor libs.lombok
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation libs.prometheus
|
||||
implementation libs.notnull
|
||||
implementation libs.guice
|
||||
implementation libs.rxjava
|
||||
implementation libs.gson
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
|
||||
}
|
||||
|
||||
test {
|
||||
useJUnitPlatform()
|
||||
}
|
||||
|
||||
task fastTests(type: Test) {
|
||||
useJUnitPlatform {
|
||||
excludeTags "slow"
|
||||
}
|
||||
}
|
@ -1,28 +1,32 @@
|
||||
package nu.marginalia.wmsa.edge.assistant.client;
|
||||
package nu.marginalia.assistant.client;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import io.reactivex.rxjava3.core.Observable;
|
||||
import nu.marginalia.wmsa.client.AbstractDynamicClient;
|
||||
import nu.marginalia.wmsa.client.exception.RouteNotConfiguredException;
|
||||
import nu.marginalia.wmsa.configuration.ServiceDescriptor;
|
||||
import nu.marginalia.wmsa.configuration.server.Context;
|
||||
import nu.marginalia.wmsa.edge.assistant.dict.DictionaryResponse;
|
||||
import org.eclipse.jetty.util.UrlEncoded;
|
||||
import nu.marginalia.assistant.client.model.DictionaryResponse;
|
||||
import nu.marginalia.client.AbstractDynamicClient;
|
||||
import nu.marginalia.client.exception.RouteNotConfiguredException;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.model.gson.GsonFactory;
|
||||
import nu.marginalia.service.descriptor.ServiceDescriptors;
|
||||
import nu.marginalia.service.id.ServiceId;
|
||||
import nu.marginalia.client.Context;
|
||||
|
||||
import java.net.URLEncoder;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.List;
|
||||
|
||||
@Singleton
|
||||
public class AssistantClient extends AbstractDynamicClient {
|
||||
|
||||
@Inject
|
||||
public AssistantClient() {
|
||||
super(ServiceDescriptor.EDGE_ASSISTANT);
|
||||
public AssistantClient(ServiceDescriptors descriptors) {
|
||||
super(descriptors.forId(ServiceId.Assistant), WmsaHome.getHostsFile(), GsonFactory::get);
|
||||
}
|
||||
|
||||
public Observable<DictionaryResponse> dictionaryLookup(Context ctx, String word) {
|
||||
try {
|
||||
return super.get(ctx, "/dictionary/" + UrlEncoded.encodeString(word), DictionaryResponse.class);
|
||||
return super.get(ctx, "/dictionary/" + URLEncoder.encode(word, StandardCharsets.UTF_8), DictionaryResponse.class);
|
||||
}
|
||||
catch (RouteNotConfiguredException ex) {
|
||||
return Observable.empty();
|
||||
@ -32,7 +36,7 @@ public class AssistantClient extends AbstractDynamicClient {
|
||||
@SuppressWarnings("unchecked")
|
||||
public Observable<List<String>> spellCheck(Context ctx, String word) {
|
||||
try {
|
||||
return (Observable<List<String>>) (Object) super.get(ctx, "/spell-check/" + UrlEncoded.encodeString(word), List.class);
|
||||
return (Observable<List<String>>) (Object) super.get(ctx, "/spell-check/" + URLEncoder.encode(word, StandardCharsets.UTF_8), List.class);
|
||||
}
|
||||
catch (RouteNotConfiguredException ex) {
|
||||
return Observable.empty();
|
||||
@ -49,7 +53,7 @@ public class AssistantClient extends AbstractDynamicClient {
|
||||
|
||||
public Observable<String> evalMath(Context ctx, String expression) {
|
||||
try {
|
||||
return super.get(ctx, "/eval-expression?value=" + UrlEncoded.encodeString(expression));
|
||||
return super.get(ctx, "/eval-expression?value=" + URLEncoder.encode(expression, StandardCharsets.UTF_8));
|
||||
}
|
||||
catch (RouteNotConfiguredException ex) {
|
||||
return Observable.empty();
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.assistant.dict;
|
||||
package nu.marginalia.assistant.client.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.assistant.dict;
|
||||
package nu.marginalia.assistant.client.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
48
api/index-api/build.gradle
Normal file
48
api/index-api/build.gradle
Normal file
@ -0,0 +1,48 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id "io.freefair.lombok" version "5.3.3.3"
|
||||
|
||||
id 'jvm-test-suite'
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(17))
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation project(':third-party')
|
||||
implementation project(':protocol')
|
||||
implementation project(':common:model')
|
||||
implementation project(':common:config')
|
||||
implementation project(':common:service-discovery')
|
||||
implementation project(':common:service-client')
|
||||
|
||||
implementation project(':index:index-query')
|
||||
|
||||
implementation libs.lombok
|
||||
annotationProcessor libs.lombok
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation libs.prometheus
|
||||
implementation libs.notnull
|
||||
implementation libs.guice
|
||||
implementation libs.rxjava
|
||||
implementation libs.protobuf
|
||||
implementation libs.gson
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
}
|
||||
|
||||
test {
|
||||
useJUnitPlatform()
|
||||
}
|
||||
|
||||
task fastTests(type: Test) {
|
||||
useJUnitPlatform {
|
||||
excludeTags "slow"
|
||||
}
|
||||
}
|
@ -0,0 +1,45 @@
|
||||
package nu.marginalia.index.client;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import io.prometheus.client.Summary;
|
||||
import io.reactivex.rxjava3.core.Observable;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.client.AbstractDynamicClient;
|
||||
import nu.marginalia.client.Context;
|
||||
import nu.marginalia.index.client.model.query.EdgeSearchSpecification;
|
||||
import nu.marginalia.index.client.model.results.EdgeSearchResultItem;
|
||||
import nu.marginalia.index.client.model.results.EdgeSearchResultSet;
|
||||
import nu.marginalia.model.gson.GsonFactory;
|
||||
import nu.marginalia.service.descriptor.ServiceDescriptors;
|
||||
import nu.marginalia.service.id.ServiceId;
|
||||
|
||||
import javax.annotation.CheckReturnValue;
|
||||
import java.util.List;
|
||||
|
||||
@Singleton
|
||||
public class EdgeIndexClient extends AbstractDynamicClient {
|
||||
|
||||
private static final Summary wmsa_search_index_api_time = Summary.build().name("wmsa_search_index_api_time").help("-").register();
|
||||
|
||||
@Inject
|
||||
public EdgeIndexClient(ServiceDescriptors descriptors) {
|
||||
super(descriptors.forId(ServiceId.Index), WmsaHome.getHostsFile(), GsonFactory::get);
|
||||
|
||||
setTimeout(30);
|
||||
}
|
||||
|
||||
@CheckReturnValue
|
||||
public List<EdgeSearchResultItem> query(Context ctx, EdgeSearchSpecification specs) {
|
||||
return wmsa_search_index_api_time.time(
|
||||
() -> this.postGet(ctx, "/search/", specs, EdgeSearchResultSet.class).blockingFirst().getResults()
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@CheckReturnValue
|
||||
public Observable<Boolean> isBlocked(Context ctx) {
|
||||
return super.get(ctx, "/is-blocked", Boolean.class);
|
||||
}
|
||||
|
||||
}
|
@ -1,10 +1,10 @@
|
||||
package nu.marginalia.wmsa.edge.model.search.domain;
|
||||
package nu.marginalia.index.client.model.domain;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.ToString;
|
||||
import nu.marginalia.wmsa.edge.model.EdgeUrl;
|
||||
import nu.marginalia.wmsa.edge.model.id.EdgeIdList;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.model.id.EdgeIdList;
|
||||
|
||||
@AllArgsConstructor @Getter @ToString
|
||||
public class EdgeDomainSearchResults {
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.model.search.domain;
|
||||
package nu.marginalia.index.client.model.domain;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.ToString;
|
@ -1,10 +1,9 @@
|
||||
package nu.marginalia.wmsa.edge.model.search;
|
||||
package nu.marginalia.index.client.model.query;
|
||||
|
||||
import lombok.*;
|
||||
import nu.marginalia.wmsa.edge.index.model.QueryLimits;
|
||||
import nu.marginalia.wmsa.edge.index.model.QueryStrategy;
|
||||
import nu.marginalia.wmsa.edge.index.svc.searchset.SearchSetIdentifier;
|
||||
import nu.marginalia.wmsa.edge.model.search.domain.SpecificationLimit;
|
||||
import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
|
||||
import java.util.List;
|
||||
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.model.search;
|
||||
package nu.marginalia.index.client.model.query;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
@ -1,6 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.index.svc.searchset;
|
||||
|
||||
import nu.marginalia.wmsa.edge.search.model.EdgeSearchProfile;
|
||||
package nu.marginalia.index.client.model.query;
|
||||
|
||||
/** Identifies a RankingSearchSet, associated with an EdgeSearchProfile
|
||||
*
|
@ -1,9 +1,9 @@
|
||||
package nu.marginalia.wmsa.edge.model.search;
|
||||
package nu.marginalia.index.client.model.results;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import nu.marginalia.wmsa.edge.model.EdgeUrl;
|
||||
import nu.marginalia.wmsa.edge.model.id.EdgeId;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.model.id.EdgeId;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@ -41,7 +41,7 @@ public class EdgeSearchResultItem {
|
||||
return scoreValue;
|
||||
}
|
||||
|
||||
private transient int domainId = 0;
|
||||
private transient int domainId = Integer.MIN_VALUE;
|
||||
public void setDomainId(int domainId) {
|
||||
this.domainId = domainId;
|
||||
}
|
||||
@ -69,12 +69,12 @@ public class EdgeSearchResultItem {
|
||||
}
|
||||
|
||||
public long deduplicationKey() {
|
||||
final int ranking = getDomainId();
|
||||
final int domainId = getDomainId();
|
||||
|
||||
if (ranking == Integer.MAX_VALUE || ranking == Integer.MIN_VALUE) {
|
||||
if (domainId == Integer.MAX_VALUE || domainId == Integer.MIN_VALUE) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return ranking;
|
||||
return domainId;
|
||||
}
|
||||
}
|
@ -1,9 +1,9 @@
|
||||
package nu.marginalia.wmsa.edge.model.search;
|
||||
package nu.marginalia.index.client.model.results;
|
||||
|
||||
import nu.marginalia.wmsa.edge.index.model.EdgePageDocumentFlags;
|
||||
import nu.marginalia.wmsa.edge.index.model.EdgePageDocumentsMetadata;
|
||||
import nu.marginalia.wmsa.edge.index.model.EdgePageWordFlags;
|
||||
import nu.marginalia.wmsa.edge.index.model.EdgePageWordMetadata;
|
||||
import nu.marginalia.model.crawl.EdgePageWordFlags;
|
||||
import nu.marginalia.model.idx.EdgePageWordMetadata;
|
||||
import nu.marginalia.model.crawl.EdgePageDocumentFlags;
|
||||
import nu.marginalia.model.idx.EdgePageDocumentsMetadata;
|
||||
|
||||
import static java.lang.Integer.lowestOneBit;
|
||||
import static java.lang.Integer.numberOfTrailingZeros;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.model.search;
|
||||
package nu.marginalia.index.client.model.results;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.model.search;
|
||||
package nu.marginalia.index.client.model.results;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
7
api/readme.md
Normal file
7
api/readme.md
Normal file
@ -0,0 +1,7 @@
|
||||
# Core Service Clients
|
||||
|
||||
These are clients for the [core services](../services-core/), along with what models
|
||||
are necessary for speaking to them.
|
||||
|
||||
All that is necessary is to `@Inject` them into the constructor and then
|
||||
requests can be sent.
|
46
api/search-api/build.gradle
Normal file
46
api/search-api/build.gradle
Normal file
@ -0,0 +1,46 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id "io.freefair.lombok" version "5.3.3.3"
|
||||
|
||||
id 'jvm-test-suite'
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(17))
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation project(':third-party')
|
||||
implementation project(':protocol')
|
||||
implementation project(':common:model')
|
||||
implementation project(':common:config')
|
||||
implementation project(':common:service-discovery')
|
||||
implementation project(':common:service-client')
|
||||
|
||||
implementation libs.lombok
|
||||
annotationProcessor libs.lombok
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation libs.prometheus
|
||||
implementation libs.notnull
|
||||
implementation libs.guice
|
||||
implementation libs.rxjava
|
||||
implementation libs.gson
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
|
||||
}
|
||||
|
||||
test {
|
||||
useJUnitPlatform()
|
||||
}
|
||||
|
||||
task fastTests(type: Test) {
|
||||
useJUnitPlatform {
|
||||
excludeTags "slow"
|
||||
}
|
||||
}
|
@ -1,11 +1,15 @@
|
||||
package nu.marginalia.wmsa.edge.search.client;
|
||||
package nu.marginalia.search.client;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import io.reactivex.rxjava3.core.Observable;
|
||||
import nu.marginalia.wmsa.api.model.ApiSearchResults;
|
||||
import nu.marginalia.wmsa.client.AbstractDynamicClient;
|
||||
import nu.marginalia.wmsa.configuration.ServiceDescriptor;
|
||||
import nu.marginalia.wmsa.configuration.server.Context;
|
||||
import nu.marginalia.client.AbstractDynamicClient;
|
||||
import nu.marginalia.model.gson.GsonFactory;
|
||||
import nu.marginalia.search.client.model.ApiSearchResults;
|
||||
import nu.marginalia.service.descriptor.ServiceDescriptors;
|
||||
import nu.marginalia.service.id.ServiceId;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.client.Context;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -17,8 +21,9 @@ import java.nio.charset.StandardCharsets;
|
||||
public class EdgeSearchClient extends AbstractDynamicClient {
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
public EdgeSearchClient() {
|
||||
super(ServiceDescriptor.EDGE_SEARCH);
|
||||
@Inject
|
||||
public EdgeSearchClient(ServiceDescriptors descriptors) {
|
||||
super(descriptors.forId(ServiceId.Search), WmsaHome.getHostsFile(), GsonFactory::get);
|
||||
}
|
||||
|
||||
@CheckReturnValue
|
@ -0,0 +1,18 @@
|
||||
package nu.marginalia.search.client.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@AllArgsConstructor @Getter
|
||||
public class ApiSearchResult {
|
||||
public String url;
|
||||
public String title;
|
||||
public String description;
|
||||
public double quality;
|
||||
|
||||
public List<List<ApiSearchResultQueryDetails>> details = new ArrayList<>();
|
||||
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.api.model;
|
||||
package nu.marginalia.search.client.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.api.model;
|
||||
package nu.marginalia.search.client.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
52
build.gradle
52
build.gradle
@ -1,42 +1,16 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
|
||||
id 'com.github.johnrengelman.shadow' version '6.0.0'
|
||||
}
|
||||
|
||||
group 'nu.marginalia'
|
||||
group 'marginalia'
|
||||
version 'SNAPSHOT'
|
||||
|
||||
compileJava.options.encoding = "UTF-8"
|
||||
compileTestJava.options.encoding = "UTF-8"
|
||||
repositories {
|
||||
mavenLocal()
|
||||
maven { url "https://artifactory.cronapp.io/public-release/" }
|
||||
maven { url "https://repo1.maven.org/maven2/" }
|
||||
maven { url "https://www2.ph.ed.ac.uk/maven2/" }
|
||||
maven { url "https://jitpack.io/" }
|
||||
exclusiveContent {
|
||||
forRepository {
|
||||
maven {
|
||||
url = uri("https://jitpack.io")
|
||||
}
|
||||
}
|
||||
filter {
|
||||
// Only use JitPack for the `gson-record-type-adapter-factory` library
|
||||
includeModule("com.github.Marcono1234", "gson-record-type-adapter-factory")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
shadowJar {
|
||||
zip64 true
|
||||
}
|
||||
jar {
|
||||
manifest {
|
||||
attributes 'Main-Class': "nu.marginalia.wmsa.configuration.ServiceDescriptor"
|
||||
}
|
||||
from {
|
||||
configurations.shadow.collect { it.isDirectory() ? it : zipTree(it) }
|
||||
}
|
||||
task dist(type: Copy) {
|
||||
from subprojects.collect { it.tasks.withType(Tar) }
|
||||
into "$buildDir/dist"
|
||||
}
|
||||
|
||||
java {
|
||||
@ -44,19 +18,3 @@ java {
|
||||
languageVersion.set(JavaLanguageVersion.of(17))
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation project(':marginalia_nu')
|
||||
}
|
||||
task version() { //
|
||||
}
|
||||
|
||||
test {
|
||||
maxParallelForks = 16
|
||||
forkEvery = 1
|
||||
maxHeapSize = "8G"
|
||||
useJUnitPlatform {
|
||||
excludeTags "nobuild"
|
||||
}
|
||||
}
|
||||
|
||||
|
32
common/config/build.gradle
Normal file
32
common/config/build.gradle
Normal file
@ -0,0 +1,32 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id "io.freefair.lombok" version "5.3.3.3"
|
||||
|
||||
id 'jvm-test-suite'
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(17))
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation project(':common:service-discovery')
|
||||
implementation project(':common:service-client')
|
||||
implementation project(':libraries:misc')
|
||||
}
|
||||
|
||||
test {
|
||||
maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
|
||||
maxHeapSize = "8G"
|
||||
useJUnitPlatform()
|
||||
}
|
||||
|
||||
task fastTests(type: Test) {
|
||||
maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
|
||||
maxHeapSize = "8G"
|
||||
useJUnitPlatform {
|
||||
excludeTags "slow"
|
||||
}
|
||||
}
|
@ -0,0 +1,22 @@
|
||||
package nu.marginalia;
|
||||
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class LanguageModels {
|
||||
public final Path ngramBloomFilter;
|
||||
public final Path termFrequencies;
|
||||
|
||||
public final Path openNLPSentenceDetectionData;
|
||||
public final Path posRules;
|
||||
public final Path posDict;
|
||||
public final Path openNLPTokenData;
|
||||
|
||||
public LanguageModels(Path ngramBloomFilter, Path termFrequencies, Path openNLPSentenceDetectionData, Path posRules, Path posDict, Path openNLPTokenData) {
|
||||
this.ngramBloomFilter = ngramBloomFilter;
|
||||
this.termFrequencies = termFrequencies;
|
||||
this.openNLPSentenceDetectionData = openNLPSentenceDetectionData;
|
||||
this.posRules = posRules;
|
||||
this.posDict = posDict;
|
||||
this.openNLPTokenData = openNLPTokenData;
|
||||
}
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.configuration;
|
||||
package nu.marginalia;
|
||||
|
||||
public record UserAgent(String uaString) {
|
||||
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.configuration;
|
||||
package nu.marginalia;
|
||||
|
||||
public record WebsiteUrl(String url) {
|
||||
public String withPath(String path) {
|
@ -1,6 +1,7 @@
|
||||
package nu.marginalia.wmsa.configuration;
|
||||
package nu.marginalia;
|
||||
|
||||
import nu.marginalia.util.language.conf.LanguageModels;
|
||||
|
||||
import nu.marginalia.service.descriptor.HostsFile;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
53
common/model/build.gradle
Normal file
53
common/model/build.gradle
Normal file
@ -0,0 +1,53 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id "io.freefair.lombok" version "5.3.3.3"
|
||||
id 'jvm-test-suite'
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(17))
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation project(':common:service-discovery')
|
||||
implementation project(':common:service-client')
|
||||
implementation project(':libraries:misc')
|
||||
|
||||
implementation libs.lombok
|
||||
annotationProcessor libs.lombok
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation libs.guice
|
||||
implementation libs.bundles.gson
|
||||
|
||||
implementation libs.notnull
|
||||
|
||||
implementation libs.commons.lang3
|
||||
|
||||
implementation libs.trove
|
||||
implementation libs.fastutil
|
||||
|
||||
implementation libs.rxjava
|
||||
implementation libs.bundles.mariadb
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
}
|
||||
|
||||
test {
|
||||
maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
|
||||
maxHeapSize = "8G"
|
||||
useJUnitPlatform()
|
||||
}
|
||||
|
||||
task fastTests(type: Test) {
|
||||
maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
|
||||
maxHeapSize = "8G"
|
||||
useJUnitPlatform {
|
||||
excludeTags "slow"
|
||||
}
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.model;
|
||||
package nu.marginalia.model;
|
||||
|
||||
import lombok.*;
|
||||
|
||||
@ -10,8 +10,6 @@ import java.util.regex.Pattern;
|
||||
@AllArgsConstructor
|
||||
@Getter @Setter @Builder
|
||||
public class EdgeDomain {
|
||||
|
||||
|
||||
@Nonnull
|
||||
public final String subDomain;
|
||||
@Nonnull
|
||||
@ -109,6 +107,7 @@ public class EdgeDomain {
|
||||
}
|
||||
return domain.substring(0, cutPoint).toLowerCase();
|
||||
}
|
||||
|
||||
public String getLongDomainKey() {
|
||||
StringBuilder ret = new StringBuilder();
|
||||
|
@ -1,9 +1,9 @@
|
||||
package nu.marginalia.wmsa.edge.model;
|
||||
package nu.marginalia.model;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import nu.marginalia.wmsa.edge.converting.processor.logic.QueryParams;
|
||||
import nu.marginalia.util.QueryParams;
|
||||
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
@ -12,7 +12,7 @@ import java.util.Optional;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@Getter @Setter @Builder
|
||||
public class EdgeUrl implements WideHashable {
|
||||
public class EdgeUrl {
|
||||
public final String proto;
|
||||
public final EdgeDomain domain;
|
||||
public final Integer port;
|
||||
@ -158,12 +158,6 @@ public class EdgeUrl implements WideHashable {
|
||||
return path.replaceAll(".*/", "");
|
||||
}
|
||||
|
||||
public long wideHash() {
|
||||
long domainHash = domain.hashCode();
|
||||
long thisHash = hashCode();
|
||||
return (domainHash << 32) | thisHash;
|
||||
}
|
||||
|
||||
public int depth() {
|
||||
return (int) path.chars().filter(c -> c=='/').count();
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
package nu.marginalia.wmsa.edge.converting.interpreter.instruction;
|
||||
package nu.marginalia.model.crawl;
|
||||
|
||||
import nu.marginalia.wmsa.edge.index.model.EdgePageWordMetadata;
|
||||
import nu.marginalia.wmsa.edge.model.crawl.EdgePageWords;
|
||||
|
||||
import nu.marginalia.model.idx.EdgePageWordMetadata;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.model.crawl;
|
||||
package nu.marginalia.model.crawl;
|
||||
|
||||
|
||||
import lombok.*;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.model.crawl;
|
||||
package nu.marginalia.model.crawl;
|
||||
|
||||
public enum EdgeDomainIndexingState {
|
||||
ACTIVE("Active"),
|
@ -1,7 +1,7 @@
|
||||
package nu.marginalia.wmsa.edge.model.crawl;
|
||||
package nu.marginalia.model.crawl;
|
||||
|
||||
import lombok.*;
|
||||
import nu.marginalia.wmsa.edge.model.EdgeDomain;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
|
||||
@AllArgsConstructor @EqualsAndHashCode @Getter @Setter @Builder @ToString
|
||||
public class EdgeDomainLink {
|
@ -1,4 +1,5 @@
|
||||
package nu.marginalia.wmsa.edge.model.crawl;
|
||||
package nu.marginalia.model.crawl;
|
||||
|
||||
|
||||
public enum EdgeHtmlStandard {
|
||||
PLAIN(0, 1, 1993),
|
||||
@ -8,9 +9,13 @@ public enum EdgeHtmlStandard {
|
||||
XHTML(-0.1, 1.05, 2006),
|
||||
HTML5(0.5, 1.1, 2018);
|
||||
|
||||
/** Used to tune quality score */
|
||||
public final double offset;
|
||||
/** Used to tune quality score */
|
||||
public final double scale;
|
||||
|
||||
/** This parameter is used to bias publish date heuristics
|
||||
* */
|
||||
public final int yearGuess;
|
||||
|
||||
EdgeHtmlStandard(double offset, double scale, int yearGuess) {
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.index.model;
|
||||
package nu.marginalia.model.crawl;
|
||||
|
||||
import java.util.EnumSet;
|
||||
|
||||
@ -6,7 +6,7 @@ public enum EdgePageDocumentFlags {
|
||||
/** Simple processing was done, this document should be de-prioritized as a search result */
|
||||
Simple,
|
||||
|
||||
UnusedBit1,
|
||||
PlainText,
|
||||
UnusedBit2,
|
||||
UnusedBit3,
|
||||
UnusedBit4,
|
@ -1,9 +1,5 @@
|
||||
package nu.marginalia.wmsa.edge.index.model;
|
||||
package nu.marginalia.model.crawl;
|
||||
|
||||
import nu.marginalia.util.language.processing.KeywordCounter;
|
||||
import nu.marginalia.util.language.processing.NameCounter;
|
||||
import nu.marginalia.util.language.processing.SubjectCounter;
|
||||
import nu.marginalia.wmsa.edge.converting.processor.SiteWords;
|
||||
|
||||
import java.util.EnumSet;
|
||||
|
@ -1,10 +1,8 @@
|
||||
package nu.marginalia.wmsa.edge.model.crawl;
|
||||
package nu.marginalia.model.crawl;
|
||||
|
||||
import gnu.trove.list.array.TLongArrayList;
|
||||
import lombok.Getter;
|
||||
import lombok.ToString;
|
||||
import nu.marginalia.wmsa.edge.index.model.EdgePageWordFlags;
|
||||
import nu.marginalia.wmsa.edge.index.model.EdgePageWordMetadata;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
@ -39,7 +37,7 @@ public class EdgePageWords {
|
||||
List<Long> emptyMeta = new ArrayList<>(entries.size());
|
||||
|
||||
for (int i = 0; i < entries.size(); i++) {
|
||||
emptyMeta.add(EdgePageWordMetadata.emptyValue());
|
||||
emptyMeta.add(0L);
|
||||
}
|
||||
|
||||
return new EdgePageWords(entries, emptyMeta);
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.model.crawl;
|
||||
package nu.marginalia.model.crawl;
|
||||
|
||||
/** This should correspond to EC_URL.STATE */
|
||||
public enum EdgeUrlState {
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.converting.processor.logic;
|
||||
package nu.marginalia.model.crawl;
|
||||
|
||||
import java.util.Collection;
|
||||
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.converting.processor.logic.pubdate;
|
||||
package nu.marginalia.model.crawl;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeFormatter;
|
@ -0,0 +1,64 @@
|
||||
package nu.marginalia.model.dbcommon;
|
||||
|
||||
|
||||
import com.google.common.cache.Cache;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import com.google.common.util.concurrent.UncheckedExecutionException;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.id.EdgeId;
|
||||
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Optional;
|
||||
|
||||
@Singleton
|
||||
public class DbDomainQueries {
|
||||
private final HikariDataSource dataSource;
|
||||
|
||||
private final Cache<EdgeDomain, EdgeId<EdgeDomain>> domainIdCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
||||
|
||||
@Inject
|
||||
public DbDomainQueries(HikariDataSource dataSource)
|
||||
{
|
||||
this.dataSource = dataSource;
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public EdgeId<EdgeDomain> getDomainId(EdgeDomain domain) {
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
|
||||
return domainIdCache.get(domain, () -> {
|
||||
try (var stmt = connection.prepareStatement("SELECT ID FROM EC_DOMAIN WHERE DOMAIN_NAME=?")) {
|
||||
stmt.setString(1, domain.toString());
|
||||
var rsp = stmt.executeQuery();
|
||||
if (rsp.next()) {
|
||||
return new EdgeId<>(rsp.getInt(1));
|
||||
}
|
||||
}
|
||||
throw new NoSuchElementException();
|
||||
});
|
||||
}
|
||||
catch (UncheckedExecutionException ex) {
|
||||
throw ex.getCause();
|
||||
}
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
public Optional<EdgeDomain> getDomain(EdgeId<EdgeDomain> id) {
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
|
||||
try (var stmt = connection.prepareStatement("SELECT DOMAIN_NAME FROM EC_DOMAIN WHERE ID=?")) {
|
||||
stmt.setInt(1, id.id());
|
||||
var rsp = stmt.executeQuery();
|
||||
if (rsp.next()) {
|
||||
return Optional.of(new EdgeDomain(rsp.getString(1)));
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,9 +1,9 @@
|
||||
package nu.marginalia.wmsa.edge.dbcommon;
|
||||
package nu.marginalia.model.dbcommon;
|
||||
|
||||
import com.google.inject.ImplementedBy;
|
||||
import gnu.trove.set.hash.TIntHashSet;
|
||||
import nu.marginalia.wmsa.edge.model.EdgeDomain;
|
||||
import nu.marginalia.wmsa.edge.model.id.EdgeId;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.id.EdgeId;
|
||||
|
||||
@ImplementedBy(EdgeDomainBlacklistImpl.class)
|
||||
public interface EdgeDomainBlacklist {
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.dbcommon;
|
||||
package nu.marginalia.model.dbcommon;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
@ -6,6 +6,7 @@ import com.zaxxer.hikari.HikariDataSource;
|
||||
import gnu.trove.set.hash.TIntHashSet;
|
||||
import io.reactivex.rxjava3.schedulers.Schedulers;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -67,5 +68,4 @@ public class EdgeDomainBlacklistImpl implements EdgeDomainBlacklist {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
@ -1,11 +1,11 @@
|
||||
package nu.marginalia.wmsa.client;
|
||||
package nu.marginalia.model.gson;
|
||||
|
||||
import com.google.gson.*;
|
||||
import marcono1234.gson.recordadapter.RecordTypeAdapterFactory;
|
||||
import nu.marginalia.util.bigstring.BigString;
|
||||
import nu.marginalia.wmsa.edge.model.EdgeDomain;
|
||||
import nu.marginalia.wmsa.edge.model.EdgeUrl;
|
||||
import nu.marginalia.wmsa.edge.model.id.EdgeId;
|
||||
import nu.marginalia.bigstring.BigString;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.model.id.EdgeId;
|
||||
|
||||
import java.net.URISyntaxException;
|
||||
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.model.id;
|
||||
package nu.marginalia.model.id;
|
||||
|
||||
|
||||
/**
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.model.id;
|
||||
package nu.marginalia.model.id;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.stream.IntStream;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.model.id;
|
||||
package nu.marginalia.model.id;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.stream.IntStream;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.model.id;
|
||||
package nu.marginalia.model.id;
|
||||
|
||||
import gnu.trove.TIntCollection;
|
||||
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.model.id;
|
||||
package nu.marginalia.model.id;
|
||||
|
||||
import gnu.trove.TIntCollection;
|
||||
import gnu.trove.list.array.TIntArrayList;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.model.id;
|
||||
package nu.marginalia.model.id;
|
||||
|
||||
import gnu.trove.TIntCollection;
|
||||
import gnu.trove.set.hash.TIntHashSet;
|
@ -1,6 +1,7 @@
|
||||
package nu.marginalia.wmsa.edge.index.model;
|
||||
package nu.marginalia.model.idx;
|
||||
|
||||
import nu.marginalia.wmsa.edge.converting.processor.logic.pubdate.PubDate;
|
||||
import nu.marginalia.model.crawl.EdgePageDocumentFlags;
|
||||
import nu.marginalia.model.crawl.PubDate;
|
||||
|
||||
import java.util.EnumSet;
|
||||
import java.util.Set;
|
||||
@ -107,7 +108,6 @@ public record EdgePageDocumentsMetadata(int rank,
|
||||
return (int) ((encoded >>> TOPOLOGY_SHIFT) & TOPOLOGY_MASK);
|
||||
}
|
||||
public static int decodeYear(long encoded) {
|
||||
|
||||
return PubDate.fromYearByte((int) ((encoded >>> YEAR_SHIFT) & YEAR_MASK));
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
package nu.marginalia.wmsa.edge.index.model;
|
||||
package nu.marginalia.model.idx;
|
||||
|
||||
import nu.marginalia.model.crawl.EdgePageWordFlags;
|
||||
import nu.marginalia.util.BrailleBlockPunchCards;
|
||||
|
||||
import java.util.EnumSet;
|
@ -1,4 +1,6 @@
|
||||
package nu.marginalia.wmsa.edge.converting.interpreter.instruction;
|
||||
package nu.marginalia.util;
|
||||
|
||||
import nu.marginalia.model.crawl.DocumentKeywords;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
47
common/model/src/main/java/nu/marginalia/util/LineUtils.java
Normal file
47
common/model/src/main/java/nu/marginalia/util/LineUtils.java
Normal file
@ -0,0 +1,47 @@
|
||||
package nu.marginalia.util;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class LineUtils {
|
||||
|
||||
/** LF, CR, CRLF, LFCR-agnostic string-line splitter that preserves empty lines
|
||||
* that does not create a huge blob of a last item like String$split(regex, n)
|
||||
*
|
||||
*/
|
||||
public static List<String> firstNLines(String documentBody, int numLines) {
|
||||
List<String> lines = new ArrayList<>(numLines);
|
||||
|
||||
boolean eatCr = false;
|
||||
boolean eatLf = false;
|
||||
int startPos = 0;
|
||||
|
||||
for (int pos = 0; pos < documentBody.length() && lines.size() < numLines; pos++) {
|
||||
int cp = documentBody.charAt(pos);
|
||||
if (cp == '\r') {
|
||||
if (eatCr) {
|
||||
eatCr = false;
|
||||
}
|
||||
else {
|
||||
eatLf = true;
|
||||
lines.add(documentBody.substring(startPos, pos));
|
||||
}
|
||||
startPos = pos + 1;
|
||||
} else if (cp == '\n') {
|
||||
if (eatLf) {
|
||||
eatLf = false;
|
||||
}
|
||||
else {
|
||||
eatCr = true;
|
||||
lines.add(documentBody.substring(startPos, pos));
|
||||
}
|
||||
startPos = pos + 1;
|
||||
} else {
|
||||
eatCr = eatLf = false;
|
||||
}
|
||||
}
|
||||
|
||||
return lines;
|
||||
}
|
||||
|
||||
}
|
@ -9,6 +9,12 @@ import java.util.List;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/** Generalization of the workflow <br>
|
||||
* -- single provider thread reading sequentially from disk <br>
|
||||
* -> multiple independent CPU-bound processing tasks <br>
|
||||
* -> single consumer thread writing to network/disk <br>
|
||||
* <p>
|
||||
*/
|
||||
public abstract class ParallelPipe<INPUT,INTERMEDIATE> {
|
||||
private final LinkedBlockingQueue<INPUT> inputs;
|
||||
private final LinkedBlockingQueue<INTERMEDIATE> intermediates;
|
||||
@ -61,8 +67,9 @@ public abstract class ParallelPipe<INPUT,INTERMEDIATE> {
|
||||
}
|
||||
}
|
||||
|
||||
logger.debug("Terminating {}", Thread.currentThread().getName());
|
||||
logger.info("Terminating {}", Thread.currentThread().getName());
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
private void runReceiverThread() {
|
||||
while (expectingOutput || !inputs.isEmpty() || !intermediates.isEmpty()) {
|
||||
@ -80,12 +87,16 @@ public abstract class ParallelPipe<INPUT,INTERMEDIATE> {
|
||||
logger.info("Terminating {}", Thread.currentThread().getName());
|
||||
}
|
||||
|
||||
/** Begin processing an item */
|
||||
@SneakyThrows
|
||||
public void accept(INPUT input) {
|
||||
inputs.put(input);
|
||||
}
|
||||
|
||||
/** The meat of the processor thread runtime */
|
||||
protected abstract INTERMEDIATE onProcess(INPUT input) throws Exception;
|
||||
|
||||
/** The meat of the consumer thread runtime */
|
||||
protected abstract void onReceive(INTERMEDIATE intermediate) throws Exception;
|
||||
|
||||
public void join() throws InterruptedException {
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.converting.processor.logic;
|
||||
package nu.marginalia.util;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
@ -74,7 +74,7 @@ CREATE TABLE IF NOT EXISTS EC_PAGE_DATA (
|
||||
FORMAT ENUM('PLAIN', 'UNKNOWN', 'HTML123', 'HTML4', 'XHTML', 'HTML5', 'MARKDOWN') NOT NULL,
|
||||
FEATURES INT COMMENT "Bit-encoded feature set of document, @see HtmlFeature" NOT NULL,
|
||||
|
||||
DATA_HASH INTEGER NOT NULL,
|
||||
DATA_HASH BIGINT NOT NULL,
|
||||
QUALITY DOUBLE NOT NULL,
|
||||
|
||||
PUB_YEAR SMALLINT,
|
||||
@ -212,6 +212,13 @@ CREATE INDEX IF NOT EXISTS EC_DOMAIN_TOP_DOMAIN ON EC_DOMAIN (DOMAIN_TOP);
|
||||
|
||||
---;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS EC_RANDOM_DOMAINS (
|
||||
DOMAIN_ID INT PRIMARY KEY,
|
||||
DOMAIN_SET INT NOT NULL
|
||||
);
|
||||
|
||||
---;
|
||||
|
||||
DROP TABLE IF EXISTS REF_DICTIONARY;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS REF_DICTIONARY (
|
||||
@ -287,3 +294,4 @@ CREATE TABLE WMSA_PROCESS(
|
||||
MUTEX VARCHAR(255),
|
||||
TIMEOUT INT NOT NULL DEFAULT 60
|
||||
);
|
||||
|
@ -1,5 +1,6 @@
|
||||
package nu.marginalia.wmsa.edge.model;
|
||||
package nu.marginalia.model;
|
||||
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.net.URISyntaxException;
|
@ -1,7 +1,7 @@
|
||||
package nu.marginalia.wmsa.edge.model.crawl;
|
||||
package nu.marginalia.model;
|
||||
|
||||
import nu.marginalia.wmsa.edge.index.model.EdgePageWordFlags;
|
||||
import nu.marginalia.wmsa.edge.index.model.EdgePageWordMetadata;
|
||||
import nu.marginalia.model.crawl.EdgePageWordFlags;
|
||||
import nu.marginalia.model.idx.EdgePageWordMetadata;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.EnumSet;
|
@ -1,5 +1,6 @@
|
||||
package nu.marginalia.wmsa.edge.model;
|
||||
package nu.marginalia.model;
|
||||
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.net.URISyntaxException;
|
@ -1,5 +1,6 @@
|
||||
package nu.marginalia.util;
|
||||
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
@ -0,0 +1,16 @@
|
||||
package nu.marginalia.util;
|
||||
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
class LineUtilsTest {
|
||||
|
||||
@Test
|
||||
void firstNLines() {
|
||||
String text = "a\nb\r\ncd\r\re\n\rffgg\n\n";
|
||||
List<String> expected = List.of("a", "b", "cd", "", "e", "ffgg", "");
|
||||
Assertions.assertEquals(expected, LineUtils.firstNLines(text, 10));
|
||||
}
|
||||
}
|
9
common/readme.md
Normal file
9
common/readme.md
Normal file
@ -0,0 +1,9 @@
|
||||
# Common
|
||||
|
||||
These are packages containing the basic building blocks for running a service as well
|
||||
as shared models.
|
||||
|
||||
* [config](config/) contains some `@Inject`ables.
|
||||
* [service](service/) is the shared base classes for main methods and web services.
|
||||
* [service-client](service-client/) is the shared base class for RPC.
|
||||
* [service-discovery](service-discovery) contains tools that lets the services find each other.
|
56
common/service-client/build.gradle
Normal file
56
common/service-client/build.gradle
Normal file
@ -0,0 +1,56 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id "io.freefair.lombok" version "5.3.3.3"
|
||||
id "de.undercouch.download" version "5.1.0"
|
||||
id 'jvm-test-suite'
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(17))
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation project(':third-party')
|
||||
implementation project(':protocol')
|
||||
implementation project(':common:service-discovery')
|
||||
|
||||
|
||||
implementation libs.lombok
|
||||
annotationProcessor libs.lombok
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation libs.commons.lang3
|
||||
implementation libs.spark
|
||||
implementation libs.guice
|
||||
implementation libs.rxjava
|
||||
|
||||
implementation libs.okhttp3
|
||||
implementation libs.bundles.httpcomponents
|
||||
|
||||
implementation libs.bundles.gson
|
||||
implementation libs.protobuf
|
||||
|
||||
implementation libs.bundles.prometheus
|
||||
|
||||
implementation libs.bundles.mariadb
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
}
|
||||
|
||||
test {
|
||||
maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
|
||||
maxHeapSize = "8G"
|
||||
useJUnitPlatform()
|
||||
}
|
||||
|
||||
task fastTests(type: Test) {
|
||||
maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
|
||||
maxHeapSize = "8G"
|
||||
useJUnitPlatform {
|
||||
excludeTags "slow"
|
||||
}
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.client;
|
||||
package nu.marginalia.client;
|
||||
|
||||
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||
import io.reactivex.rxjava3.core.Scheduler;
|
||||
@ -12,7 +12,7 @@ import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.ThreadFactory;
|
||||
|
||||
public class AbortingScheduler implements AutoCloseable {
|
||||
public class AbortingScheduler {
|
||||
private final String name;
|
||||
private final ThreadFactory threadFactory;
|
||||
|
||||
@ -54,7 +54,6 @@ public class AbortingScheduler implements AutoCloseable {
|
||||
return executorService;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void close() {
|
||||
if (null != executorService) {
|
||||
executorService.shutdown();
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.client;
|
||||
package nu.marginalia.client;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.protobuf.GeneratedMessageV3;
|
||||
@ -6,11 +6,10 @@ import io.reactivex.rxjava3.core.Observable;
|
||||
import io.reactivex.rxjava3.core.ObservableSource;
|
||||
import io.reactivex.rxjava3.plugins.RxJavaPlugins;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.wmsa.client.exception.LocalException;
|
||||
import nu.marginalia.wmsa.client.exception.NetworkException;
|
||||
import nu.marginalia.wmsa.client.exception.RemoteException;
|
||||
import nu.marginalia.wmsa.client.exception.RouteNotConfiguredException;
|
||||
import nu.marginalia.wmsa.configuration.server.Context;
|
||||
import nu.marginalia.client.exception.LocalException;
|
||||
import nu.marginalia.client.exception.NetworkException;
|
||||
import nu.marginalia.client.exception.RemoteException;
|
||||
import nu.marginalia.client.exception.RouteNotConfiguredException;
|
||||
import okhttp3.*;
|
||||
import org.apache.http.HttpHost;
|
||||
import org.apache.logging.log4j.ThreadContext;
|
||||
@ -24,32 +23,33 @@ import java.net.ConnectException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.zip.GZIPOutputStream;
|
||||
|
||||
public abstract class AbstractClient implements AutoCloseable {
|
||||
public static final String CONTEXT_OUTBOUND_REQUEST = "outbound-request";
|
||||
|
||||
private final Gson gson = GsonFactory.get();
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
public static final String CONTEXT_OUTBOUND_REQUEST = "outbound-request";
|
||||
private final Gson gson;
|
||||
private final OkHttpClient client;
|
||||
|
||||
private boolean quiet;
|
||||
private String url;
|
||||
private String serviceRoute;
|
||||
private int timeout;
|
||||
|
||||
|
||||
private volatile boolean alive;
|
||||
private final Thread livenessMonitor;
|
||||
|
||||
public void setTimeout(int timeout) {
|
||||
this.timeout = timeout;
|
||||
}
|
||||
|
||||
private int timeout;
|
||||
private volatile boolean alive;
|
||||
|
||||
private final Thread livenessMonitor;
|
||||
|
||||
public AbstractClient(String host, int port, int timeout) {
|
||||
public AbstractClient(String host, int port, int timeout, Supplier<Gson> gsonProvider) {
|
||||
logger.info("Creating client for {}[{}:{}]", getClass().getSimpleName(), host, port);
|
||||
|
||||
this.gson = gsonProvider.get();
|
||||
|
||||
this.timeout = timeout;
|
||||
client = new OkHttpClient.Builder()
|
||||
.connectTimeout(100, TimeUnit.MILLISECONDS)
|
||||
@ -57,7 +57,7 @@ public abstract class AbstractClient implements AutoCloseable {
|
||||
.retryOnConnectionFailure(true)
|
||||
.followRedirects(true)
|
||||
.build();
|
||||
url = new HttpHost(host, port).toURI();
|
||||
serviceRoute = new HttpHost(host, port).toURI();
|
||||
|
||||
RxJavaPlugins.setErrorHandler(e -> {
|
||||
if (e.getMessage() == null) {
|
||||
@ -76,28 +76,37 @@ public abstract class AbstractClient implements AutoCloseable {
|
||||
|
||||
public void setServiceRoute(String hostname, int port) {
|
||||
scheduler().abort();
|
||||
url = new HttpHost(hostname, port).toURI();
|
||||
serviceRoute = new HttpHost(hostname, port).toURI();
|
||||
}
|
||||
|
||||
protected String getServiceRoute() {
|
||||
return serviceRoute;
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
private void monitorLiveness() {
|
||||
Thread.sleep(100); // Wait for initialization
|
||||
for (;;) {
|
||||
try {
|
||||
alive = isResponsive();
|
||||
}
|
||||
//
|
||||
catch (Exception ex) {
|
||||
logger.warn("Oops", ex);
|
||||
}
|
||||
synchronized (livenessMonitor) {
|
||||
if (alive) {
|
||||
livenessMonitor.wait(1000);
|
||||
try {
|
||||
for (; ; ) {
|
||||
try {
|
||||
alive = isResponsive();
|
||||
}
|
||||
//
|
||||
catch (Exception ex) {
|
||||
logger.warn("Oops", ex);
|
||||
}
|
||||
synchronized (livenessMonitor) {
|
||||
if (alive) {
|
||||
livenessMonitor.wait(1000);
|
||||
}
|
||||
}
|
||||
if (!alive) {
|
||||
Thread.sleep(100);
|
||||
}
|
||||
}
|
||||
if (!alive) {
|
||||
Thread.sleep(100);
|
||||
}
|
||||
}
|
||||
catch (InterruptedException ex) {
|
||||
// nothing to see here
|
||||
}
|
||||
}
|
||||
|
||||
@ -117,11 +126,9 @@ public abstract class AbstractClient implements AutoCloseable {
|
||||
|
||||
public synchronized boolean isResponsive() {
|
||||
Context ctx = Context.internal("ping");
|
||||
var req = ctx.paint(new Request.Builder()).url(url + "/internal/ping").get().build();
|
||||
var req = ctx.paint(new Request.Builder()).url(serviceRoute + "/internal/ping").get().build();
|
||||
|
||||
var call = client.newCall(req);
|
||||
|
||||
return Observable.just(call)
|
||||
return Observable.just(client.newCall(req))
|
||||
.subscribeOn(scheduler().get())
|
||||
.map(Call::execute)
|
||||
.map(this::getResponseStatus)
|
||||
@ -135,11 +142,9 @@ public abstract class AbstractClient implements AutoCloseable {
|
||||
public synchronized boolean isAccepting() {
|
||||
Context ctx = Context.internal("ready");
|
||||
|
||||
var req = ctx.paint(new Request.Builder()).url(url + "/internal/ready").get().build();
|
||||
var req = ctx.paint(new Request.Builder()).url(serviceRoute + "/internal/ready").get().build();
|
||||
|
||||
var call = client.newCall(req);
|
||||
|
||||
return Observable.just(call)
|
||||
return Observable.just(client.newCall(req))
|
||||
.subscribeOn(scheduler().get())
|
||||
.map(Call::execute)
|
||||
.map(this::getResponseStatus)
|
||||
@ -156,19 +161,12 @@ public abstract class AbstractClient implements AutoCloseable {
|
||||
|
||||
ensureAlive();
|
||||
|
||||
RequestBody body = RequestBody.create(
|
||||
MediaType.parse("application/json; charset=utf-8"),
|
||||
json(data));
|
||||
RequestBody body = RequestBody.create(json(data), MediaType.parse("application/json; charset=utf-8"));
|
||||
|
||||
var req = ctx.paint(new Request.Builder()).url(url + endpoint).post(body).build();
|
||||
var call = client.newCall(req);
|
||||
var req = ctx.paint(new Request.Builder()).url(serviceRoute + endpoint).post(body).build();
|
||||
|
||||
return Observable
|
||||
.just(call)
|
||||
.map((c) -> {
|
||||
ThreadContext.put("outbound-request", url + endpoint);
|
||||
return c;
|
||||
})
|
||||
.just(client.newCall(req))
|
||||
.subscribeOn(scheduler().get())
|
||||
.map(this::logInbound)
|
||||
.map(Call::execute)
|
||||
@ -186,15 +184,13 @@ public abstract class AbstractClient implements AutoCloseable {
|
||||
|
||||
ensureAlive();
|
||||
|
||||
RequestBody body = RequestBody.create(
|
||||
MediaType.parse("application/protobuf"),
|
||||
data.toByteArray());
|
||||
RequestBody body = RequestBody.create(data.toByteArray(), MediaType.parse("application/protobuf"));
|
||||
|
||||
var req = ctx.paint(new Request.Builder()).url(url + endpoint).post(body).build();
|
||||
var req = ctx.paint(new Request.Builder()).url(serviceRoute + endpoint).post(body).build();
|
||||
var call = client.newCall(req);
|
||||
|
||||
logInbound(call);
|
||||
ThreadContext.put("outbound-request", url + endpoint);
|
||||
ThreadContext.put("outbound-request", serviceRoute + endpoint);
|
||||
try (var rsp = call.execute()) {
|
||||
logOutbound(rsp);
|
||||
int code = rsp.code();
|
||||
@ -212,18 +208,10 @@ public abstract class AbstractClient implements AutoCloseable {
|
||||
|
||||
ensureAlive();
|
||||
|
||||
RequestBody body = RequestBody.create(
|
||||
MediaType.parse("application/json"),
|
||||
json(data));
|
||||
RequestBody body = RequestBody.create(json(data), MediaType.parse("application/json"));
|
||||
var req = ctx.paint(new Request.Builder()).url(serviceRoute + endpoint).post(body).build();
|
||||
|
||||
var req = ctx.paint(new Request.Builder()).url(url + endpoint).post(body).build();
|
||||
var call = client.newCall(req);
|
||||
|
||||
return Observable.just(call)
|
||||
.map((c) -> {
|
||||
ThreadContext.put("outbound-request", url + endpoint);
|
||||
return c;
|
||||
})
|
||||
return Observable.just(client.newCall(req))
|
||||
.subscribeOn(scheduler().get())
|
||||
.map(this::logInbound)
|
||||
.map(Call::execute)
|
||||
@ -238,15 +226,15 @@ public abstract class AbstractClient implements AutoCloseable {
|
||||
protected synchronized Observable<HttpStatusCode> post(Context ctx, String endpoint, String data, MediaType mediaType) {
|
||||
ensureAlive();
|
||||
|
||||
var body = RequestBody.create(mediaType, data);
|
||||
var body = RequestBody.create(data, mediaType);
|
||||
|
||||
var req = ctx.paint(new Request.Builder()).url(url + endpoint).post(body).build();
|
||||
var req = ctx.paint(new Request.Builder()).url(serviceRoute + endpoint).post(body).build();
|
||||
var call = client.newCall(req);
|
||||
|
||||
|
||||
return Observable.just(call)
|
||||
.map((c) -> {
|
||||
ThreadContext.put(CONTEXT_OUTBOUND_REQUEST, url + endpoint);
|
||||
ThreadContext.put(CONTEXT_OUTBOUND_REQUEST, serviceRoute + endpoint);
|
||||
return c;
|
||||
})
|
||||
.subscribeOn(scheduler().get())
|
||||
@ -264,14 +252,9 @@ public abstract class AbstractClient implements AutoCloseable {
|
||||
protected synchronized <T> Observable<T> get(Context ctx, String endpoint, Class<T> type) {
|
||||
ensureAlive();
|
||||
|
||||
var req = ctx.paint(new Request.Builder()).url(url + endpoint).get().build();
|
||||
var call = client.newCall(req);
|
||||
var req = ctx.paint(new Request.Builder()).url(serviceRoute + endpoint).get().build();
|
||||
|
||||
return Observable.just(call)
|
||||
.map((c) -> {
|
||||
ThreadContext.put("outbound-request", url + endpoint);
|
||||
return c;
|
||||
})
|
||||
return Observable.just(client.newCall(req))
|
||||
.subscribeOn(scheduler().get())
|
||||
.map(this::logInbound)
|
||||
.map(Call::execute)
|
||||
@ -284,62 +267,12 @@ public abstract class AbstractClient implements AutoCloseable {
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
protected synchronized <T> Observable<List<T>> getList(Context ctx, String endpoint, Class<T> type) {
|
||||
ensureAlive();
|
||||
|
||||
var req = ctx.paint(new Request.Builder()).url(url + endpoint).get().build();
|
||||
var call = client.newCall(req);
|
||||
|
||||
return Observable.just(call)
|
||||
.map((c) -> {
|
||||
ThreadContext.put("outbound-request", url + endpoint);
|
||||
return c;
|
||||
})
|
||||
.subscribeOn(scheduler().get())
|
||||
.map(this::logInbound)
|
||||
.map(Call::execute)
|
||||
.map(this::logOutbound)
|
||||
.map(rsp -> validateResponseStatus(rsp, req, 200))
|
||||
.map(rsp -> Arrays.asList((T[])getEntity(rsp, type.arrayType())))
|
||||
.retryWhen(this::retryHandler)
|
||||
.timeout(timeout, TimeUnit.SECONDS)
|
||||
.doFinally(() -> ThreadContext.remove("outbound-request"));
|
||||
}
|
||||
|
||||
|
||||
protected synchronized Observable<byte[]> getBinary(Context ctx, String endpoint) {
|
||||
ensureAlive();
|
||||
|
||||
var req = ctx.paint(new Request.Builder()).url(url + endpoint).get().build();
|
||||
var call = client.newCall(req);
|
||||
|
||||
return Observable.just(call)
|
||||
.map((c) -> {
|
||||
ThreadContext.put("outbound-request", url + endpoint);
|
||||
return c;
|
||||
})
|
||||
.subscribeOn(scheduler().get())
|
||||
.map(this::logInbound)
|
||||
.map(Call::execute)
|
||||
.map(this::logOutbound)
|
||||
.map(rsp -> validateResponseStatus(rsp, req, 200))
|
||||
.map(this::getBinaryEntity)
|
||||
.retryWhen(this::retryHandler)
|
||||
.timeout(timeout, TimeUnit.SECONDS)
|
||||
.doFinally(() -> ThreadContext.remove("outbound-request"));
|
||||
}
|
||||
|
||||
protected synchronized Observable<String> get(Context ctx, String endpoint) {
|
||||
ensureAlive();
|
||||
|
||||
var req = ctx.paint(new Request.Builder()).url(url + endpoint).get().build();
|
||||
var call = client.newCall(req);
|
||||
var req = ctx.paint(new Request.Builder()).url(serviceRoute + endpoint).get().build();
|
||||
|
||||
return Observable.just(call)
|
||||
.map((c) -> {
|
||||
ThreadContext.put("outbound-request", url + endpoint);
|
||||
return c;
|
||||
})
|
||||
return Observable.just(client.newCall(req))
|
||||
.subscribeOn(scheduler().get())
|
||||
.map(this::logInbound)
|
||||
.map(Call::execute)
|
||||
@ -354,14 +287,9 @@ public abstract class AbstractClient implements AutoCloseable {
|
||||
protected synchronized Observable<HttpStatusCode> delete(Context ctx, String endpoint) {
|
||||
ensureAlive();
|
||||
|
||||
var req = ctx.paint(new Request.Builder()).url(url + endpoint).delete().build();
|
||||
var call = client.newCall(req);
|
||||
var req = ctx.paint(new Request.Builder()).url(serviceRoute + endpoint).delete().build();
|
||||
|
||||
return Observable.just(call)
|
||||
.map((c) -> {
|
||||
ThreadContext.put("outbound-request", url + endpoint);
|
||||
return c;
|
||||
})
|
||||
return Observable.just(client.newCall(req))
|
||||
.subscribeOn(scheduler().get())
|
||||
.map(this::logInbound)
|
||||
.map(Call::execute)
|
||||
@ -390,26 +318,12 @@ public abstract class AbstractClient implements AutoCloseable {
|
||||
if (!isAlive()) {
|
||||
wait(2000);
|
||||
if (!isAlive()) {
|
||||
throw new RouteNotConfiguredException("Route not configured for " + name());
|
||||
throw new RouteNotConfiguredException("Route not configured for " + name() + " -- tried " + serviceRoute);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public void waitReady() {
|
||||
boolean accepting = isAccepting();
|
||||
if (accepting) {
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info("Waiting for " + name());
|
||||
do {
|
||||
Thread.sleep(1000);
|
||||
} while (!isAccepting());
|
||||
}
|
||||
|
||||
|
||||
private ObservableSource<?> retryHandler(Observable<Throwable> error) {
|
||||
return error.flatMap(this::filterRetryableExceptions);
|
||||
}
|
||||
@ -488,12 +402,6 @@ public abstract class AbstractClient implements AutoCloseable {
|
||||
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
private byte[] getBinaryEntity(Response response) {
|
||||
try (response) {
|
||||
return response.body().bytes();
|
||||
}
|
||||
}
|
||||
public boolean isAlive() {
|
||||
return alive;
|
||||
}
|
||||
@ -507,17 +415,4 @@ public abstract class AbstractClient implements AutoCloseable {
|
||||
}
|
||||
}
|
||||
|
||||
private byte[] compressedJson(Object o) throws IOException {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
GZIPOutputStream gos = new GZIPOutputStream(baos);
|
||||
try {
|
||||
gson.toJson(o, new OutputStreamWriter(gos));
|
||||
gos.finish();
|
||||
return baos.toByteArray();
|
||||
}
|
||||
catch (Exception ex) {
|
||||
throw new LocalException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,21 +1,23 @@
|
||||
package nu.marginalia.wmsa.client;
|
||||
package nu.marginalia.client;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import io.reactivex.rxjava3.core.Observable;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.wmsa.configuration.ServiceDescriptor;
|
||||
import nu.marginalia.wmsa.configuration.server.Context;
|
||||
import nu.marginalia.service.descriptor.ServiceDescriptor;
|
||||
import nu.marginalia.service.descriptor.HostsFile;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
public class AbstractDynamicClient extends AbstractClient {
|
||||
private final ServiceDescriptor service;
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
private final AbortingScheduler scheduler;
|
||||
|
||||
public AbstractDynamicClient(@Nonnull ServiceDescriptor service) {
|
||||
super(service.getHost(), service.port, 10);
|
||||
public AbstractDynamicClient(@Nonnull ServiceDescriptor service, HostsFile hosts, Supplier<Gson> gsonProvider) {
|
||||
super(hosts.getHost(service), service.port, 10, gsonProvider);
|
||||
|
||||
this.service = service;
|
||||
this.scheduler = new AbortingScheduler(name());
|
||||
@ -32,7 +34,7 @@ public class AbstractDynamicClient extends AbstractClient {
|
||||
|
||||
@SneakyThrows
|
||||
public void blockingWait() {
|
||||
logger.info("Waiting for route to {}", service);
|
||||
logger.info("Waiting for route to {} ({})", service, getServiceRoute());
|
||||
while (!isAlive()) {
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
@ -43,10 +45,4 @@ public class AbstractDynamicClient extends AbstractClient {
|
||||
return scheduler;
|
||||
}
|
||||
|
||||
public Observable<String> who(Context ctx) {
|
||||
return get(ctx, "/public/who");
|
||||
}
|
||||
public Observable<String> ping(Context ctx) {
|
||||
return get(ctx, "/internal/ping");
|
||||
}
|
||||
}
|
@ -0,0 +1,89 @@
|
||||
package nu.marginalia.client;
|
||||
|
||||
import io.reactivex.rxjava3.schedulers.Schedulers;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import spark.Request;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
public class Context {
|
||||
public static final String CONTEXT_HEADER = "X-Context";
|
||||
public static final String SESSION_HEADER = "Cookie";
|
||||
public static final String PUBLIC_HEADER = "X-Public";
|
||||
|
||||
private final String id;
|
||||
private final String session;
|
||||
private boolean treatAsPublic;
|
||||
|
||||
private Context(String id, String session) {
|
||||
this.id = Objects.requireNonNull(id, "Context missing");
|
||||
this.session = session;
|
||||
}
|
||||
|
||||
public Context treatAsPublic() {
|
||||
this.treatAsPublic = true;
|
||||
return this;
|
||||
}
|
||||
|
||||
public static Context internal() {
|
||||
return new Context(UUID.randomUUID().toString(), null);
|
||||
}
|
||||
public static Context internal(String why) {
|
||||
return new Context(why + ":" + System.nanoTime(), null);
|
||||
}
|
||||
|
||||
public static Context fromRequest(Request request) {
|
||||
|
||||
if (Boolean.getBoolean("unit-test")) {
|
||||
return Context.internal();
|
||||
}
|
||||
|
||||
final var ctxHeader = anonymizeContext(request);
|
||||
final var sessHeader = request.headers(SESSION_HEADER);
|
||||
|
||||
return new Context(ctxHeader, sessHeader);
|
||||
}
|
||||
|
||||
private static String anonymizeContext(Request request) {
|
||||
String header = request.headers(CONTEXT_HEADER);
|
||||
if (header != null && header.contains("-")) {
|
||||
// The public X-Context header contains info that traces to the
|
||||
// external user's IP. Anonymize this by running it through a
|
||||
// hash code blender with rotating salt
|
||||
|
||||
return ContextScrambler.anonymize(header);
|
||||
}
|
||||
else if (header != null) {
|
||||
return header;
|
||||
}
|
||||
else {
|
||||
// When no X-Context is provided, synthesize one from path
|
||||
return request.pathInfo() + ":" + Thread.currentThread().getId();
|
||||
}
|
||||
}
|
||||
|
||||
public okhttp3.Request.Builder paint(okhttp3.Request.Builder requestBuilder) {
|
||||
requestBuilder.addHeader(CONTEXT_HEADER, id);
|
||||
|
||||
if (session != null) {
|
||||
requestBuilder.addHeader(SESSION_HEADER, session);
|
||||
}
|
||||
|
||||
if (treatAsPublic) {
|
||||
requestBuilder.header(PUBLIC_HEADER, "1");
|
||||
}
|
||||
|
||||
return requestBuilder;
|
||||
}
|
||||
|
||||
public String getContextId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public boolean isPublic() {
|
||||
return id.startsWith("#");
|
||||
}
|
||||
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.client;
|
||||
package nu.marginalia.client;
|
||||
|
||||
public final class HttpStatusCode {
|
||||
public final int code;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.client.exception;
|
||||
package nu.marginalia.client.exception;
|
||||
|
||||
public class LocalException extends MessagingException {
|
||||
public LocalException() {
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.client.exception;
|
||||
package nu.marginalia.client.exception;
|
||||
|
||||
public class MessagingException extends RuntimeException {
|
||||
public MessagingException() {
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.client.exception;
|
||||
package nu.marginalia.client.exception;
|
||||
|
||||
public class NetworkException extends MessagingException {
|
||||
public NetworkException() {
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.client.exception;
|
||||
package nu.marginalia.client.exception;
|
||||
|
||||
public class RemoteException extends MessagingException {
|
||||
public RemoteException() {
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.client.exception;
|
||||
package nu.marginalia.client.exception;
|
||||
|
||||
public class RouteNotConfiguredException extends MessagingException {
|
||||
public RouteNotConfiguredException() {
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.client.exception;
|
||||
package nu.marginalia.client.exception;
|
||||
|
||||
public class TimeoutException extends MessagingException {
|
||||
public TimeoutException() {
|
@ -0,0 +1,180 @@
|
||||
package nu.marginalia.client;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import io.reactivex.rxjava3.core.Observable;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.SneakyThrows;
|
||||
import org.junit.jupiter.api.*;
|
||||
import spark.Request;
|
||||
import spark.Response;
|
||||
import spark.Spark;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
public class AbstractClientTest {
|
||||
|
||||
static TestServer testServer;
|
||||
static AbstractClient client;
|
||||
Gson gson = new Gson();
|
||||
|
||||
@Data @AllArgsConstructor
|
||||
private static class DummyObject {
|
||||
public int num;
|
||||
public String str;
|
||||
}
|
||||
|
||||
@BeforeAll
|
||||
public static void setUp() {
|
||||
int port = new Random().nextInt(6000, 10000);
|
||||
testServer = new TestServer(port);
|
||||
|
||||
client = new AbstractClient("localhost", port, 1, Gson::new) {
|
||||
@Override
|
||||
public AbortingScheduler scheduler() {
|
||||
return new AbortingScheduler(name());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String name() {
|
||||
return "test";
|
||||
}
|
||||
};
|
||||
client.setTimeout(1);
|
||||
}
|
||||
|
||||
|
||||
@AfterAll
|
||||
public static void tearDown() {
|
||||
testServer.close();
|
||||
client.close();
|
||||
}
|
||||
|
||||
private void assertError(Observable<?> observable) {
|
||||
try {
|
||||
observable.blockingSubscribe();
|
||||
}
|
||||
catch (RuntimeException ex) {
|
||||
System.out.println("Got exception " + ex.getClass().getSimpleName() + " -- as expected!" );
|
||||
return;
|
||||
}
|
||||
Assertions.fail("Expected exception");
|
||||
}
|
||||
@SneakyThrows
|
||||
private Object timeout(Request request, Response response) {
|
||||
Thread.sleep(5000);
|
||||
return "yawn";
|
||||
}
|
||||
@SneakyThrows
|
||||
private Object error404(Request request, Response response) {
|
||||
Spark.halt(404);
|
||||
return "";
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetTimeout() {
|
||||
testServer.get(this::timeout);
|
||||
|
||||
assertError(client.get(Context.internal(), "/get"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPostTimeout() {
|
||||
testServer.post(this::timeout);
|
||||
|
||||
assertError(client.post(Context.internal(), "/post", "test"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDeleteTimeout() {
|
||||
testServer.delete(this::timeout);
|
||||
|
||||
assertError(client.delete(Context.internal(), "/post"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPost404() {
|
||||
testServer.post(this::error404);
|
||||
|
||||
assertError(client.post(Context.internal(), "/post", "test"));
|
||||
}
|
||||
@Test
|
||||
public void testGet404() {
|
||||
testServer.get(this::error404);
|
||||
|
||||
assertError(client.get(Context.internal(), "/get"));
|
||||
}
|
||||
@Test
|
||||
public void testDelete404() {
|
||||
testServer.delete(this::error404);
|
||||
|
||||
assertError(client.delete(Context.internal(), "/delete"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGet() {
|
||||
testServer.get((req, rsp) -> "Hello World");
|
||||
|
||||
assertEquals("Hello World", client.get(Context.internal(), "/get").blockingFirst());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAcceptingUp() {
|
||||
testServer.setReady(true);
|
||||
assertTrue(client.isAccepting());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAcceptingDown() {
|
||||
testServer.setReady(false);
|
||||
assertFalse(client.isAccepting());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetJson() {
|
||||
testServer.get((req, rsp) -> new DummyObject(5, "23"), new Gson()::toJson);
|
||||
|
||||
assertEquals(client.get(Context.internal(), "/get", DummyObject.class).blockingFirst(),
|
||||
new DummyObject(5, "23"));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDelete() {
|
||||
testServer.delete((req, rsp) -> "Hello World");
|
||||
|
||||
assertTrue(client.delete(Context.internal(), "/delete").blockingFirst().isGood());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testPost() {
|
||||
List<DummyObject> inbox = new ArrayList<>();
|
||||
testServer.post((req, rsp) -> {
|
||||
inbox.add(gson.fromJson(req.body(), DummyObject.class));
|
||||
return "ok";
|
||||
});
|
||||
|
||||
client.post(Context.internal(), "/post", new DummyObject(5, "23")).blockingSubscribe();
|
||||
assertEquals(1, inbox.size());
|
||||
assertEquals(new DummyObject(5, "23"), inbox.get(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPostGet() {
|
||||
List<DummyObject> inbox = new ArrayList<>();
|
||||
testServer.post((req, rsp) -> {
|
||||
inbox.add(gson.fromJson(req.body(), DummyObject.class));
|
||||
return new DummyObject(1, "ret");
|
||||
}, gson::toJson);
|
||||
|
||||
var ret = client.postGet(Context.internal(), "/post", new DummyObject(5, "23"), DummyObject.class).blockingFirst();
|
||||
assertEquals(1, inbox.size());
|
||||
assertEquals(new DummyObject(5, "23"), inbox.get(0));
|
||||
assertEquals(new DummyObject(1, "ret"), ret);
|
||||
}
|
||||
}
|
@ -0,0 +1,59 @@
|
||||
package nu.marginalia.client;
|
||||
|
||||
import spark.Request;
|
||||
import spark.Response;
|
||||
import spark.Spark;
|
||||
|
||||
import java.util.function.BiFunction;
|
||||
import java.util.function.Function;
|
||||
|
||||
public class TestServer {
|
||||
BiFunction<Request, Response, Object> onGet;
|
||||
BiFunction<Request, Response, Object> onPost;
|
||||
BiFunction<Request, Response, Object> onDelete;
|
||||
|
||||
|
||||
boolean isReady;
|
||||
|
||||
public TestServer(int port) {
|
||||
Spark.port(port);
|
||||
Spark.get("/internal/ping", (r,q) -> "pong");
|
||||
Spark.get("/internal/ready", this::ready);
|
||||
Spark.get("/get", (request, response) -> onGet.apply(request, response));
|
||||
Spark.post("/post", (request, response) -> onPost.apply(request, response));
|
||||
Spark.delete("/delete", (request, response) -> onDelete.apply(request, response));
|
||||
}
|
||||
|
||||
private Object ready(Request request, Response response) {
|
||||
if (isReady) {
|
||||
return "";
|
||||
}
|
||||
else {
|
||||
response.status(401);
|
||||
return "bad";
|
||||
}
|
||||
}
|
||||
|
||||
public void close() {
|
||||
Spark.stop();
|
||||
}
|
||||
|
||||
public boolean isReady() {
|
||||
return isReady;
|
||||
}
|
||||
|
||||
public void setReady(boolean ready) {
|
||||
isReady = ready;
|
||||
}
|
||||
|
||||
public TestServer get(BiFunction<Request, Response, Object> onGet) { this.onGet = onGet; return this; }
|
||||
public TestServer get(BiFunction<Request, Response, Object> onGet, Function<Object, Object> transform) {
|
||||
this.onGet = onGet.andThen(transform);
|
||||
return this;
|
||||
}
|
||||
public TestServer delete(BiFunction<Request, Response, Object> onDelete) { this.onDelete = onDelete; return this; }
|
||||
public TestServer post(BiFunction<Request, Response, Object> onPost) { this.onPost = onPost; return this; }
|
||||
public TestServer post(BiFunction<Request, Response, Object> onPost, Function<Object, Object> transform) {
|
||||
this.onPost = onPost.andThen(transform); return this;
|
||||
}
|
||||
}
|
22
common/service-discovery/build.gradle
Normal file
22
common/service-discovery/build.gradle
Normal file
@ -0,0 +1,22 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
|
||||
}
|
||||
|
||||
repositories {
|
||||
mavenLocal()
|
||||
mavenCentral()
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(17))
|
||||
}
|
||||
}
|
||||
dependencies {
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
}
|
@ -0,0 +1,17 @@
|
||||
package nu.marginalia.service;
|
||||
|
||||
import nu.marginalia.service.descriptor.ServiceDescriptor;
|
||||
import nu.marginalia.service.descriptor.ServiceDescriptors;
|
||||
import nu.marginalia.service.id.ServiceId;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class SearchServiceDescriptors {
|
||||
public static ServiceDescriptors descriptors = new ServiceDescriptors(
|
||||
List.of(new ServiceDescriptor(ServiceId.Api, 5004),
|
||||
new ServiceDescriptor(ServiceId.Index, 5021),
|
||||
new ServiceDescriptor(ServiceId.Search, 5023),
|
||||
new ServiceDescriptor(ServiceId.Assistant, 5025),
|
||||
new ServiceDescriptor(ServiceId.Dating, 5070),
|
||||
new ServiceDescriptor(ServiceId.Explorer, 5071)));
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.configuration;
|
||||
package nu.marginalia.service.descriptor;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
@ -13,7 +13,7 @@ import java.util.Map;
|
||||
*
|
||||
* */
|
||||
public class HostsFile {
|
||||
private final Map<ServiceDescriptor, String> hostsMap = new HashMap<>(ServiceDescriptor.values().length);
|
||||
private final Map<String, String> hostsMap = new HashMap<>();
|
||||
private static final Logger logger = LoggerFactory.getLogger(HostsFile.class);
|
||||
public HostsFile(Path fileName) throws IOException {
|
||||
var lines = Files.readAllLines(fileName);
|
||||
@ -27,7 +27,7 @@ public class HostsFile {
|
||||
String hostName = parts[1];
|
||||
|
||||
try {
|
||||
hostsMap.put(ServiceDescriptor.byName(descriptorName), hostName);
|
||||
hostsMap.put(descriptorName, hostName);
|
||||
}
|
||||
catch (IllegalArgumentException ex) {
|
||||
logger.warn("Hosts file contains entry for unknown service {}", descriptorName);
|
||||
@ -36,13 +36,10 @@ public class HostsFile {
|
||||
}
|
||||
|
||||
public HostsFile() {
|
||||
for (var sd : ServiceDescriptor.values()) {
|
||||
hostsMap.put(sd, "localhost");
|
||||
}
|
||||
}
|
||||
|
||||
public String getHost(ServiceDescriptor sd) {
|
||||
return hostsMap.get(sd);
|
||||
return hostsMap.getOrDefault(sd.name, sd.name);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
package nu.marginalia.service.descriptor;
|
||||
|
||||
import nu.marginalia.service.id.ServiceId;
|
||||
|
||||
public class ServiceDescriptor {
|
||||
public final ServiceId id;
|
||||
public final String name;
|
||||
public final int port;
|
||||
|
||||
public ServiceDescriptor(ServiceId id, int port) {
|
||||
this.id = id;
|
||||
this.name = id.name;
|
||||
this.port = port;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public String describeService() {
|
||||
return String.format("%s", name);
|
||||
}
|
||||
}
|
@ -0,0 +1,31 @@
|
||||
package nu.marginalia.service.descriptor;
|
||||
|
||||
import nu.marginalia.service.id.ServiceId;
|
||||
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
public class ServiceDescriptors {
|
||||
private final Map<ServiceId, ServiceDescriptor> descriptorsAll = new LinkedHashMap<>();
|
||||
|
||||
public ServiceDescriptors() {
|
||||
|
||||
}
|
||||
|
||||
public ServiceDescriptors(List<ServiceDescriptor> descriptors) {
|
||||
descriptors.forEach(d -> descriptorsAll.put(d.id, d));
|
||||
}
|
||||
|
||||
public ServiceDescriptor[] values() {
|
||||
return descriptorsAll.values().toArray(ServiceDescriptor[]::new);
|
||||
}
|
||||
|
||||
public ServiceDescriptor forId(ServiceId id) {
|
||||
return Objects.requireNonNull(descriptorsAll.get(id),
|
||||
"No service descriptor defined for " + id + " -- did you forget to "
|
||||
+ "bind(ServiceDescriptors.class).toInstance(SearchServiceDescriptors.descriptors); ?");
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,25 @@
|
||||
package nu.marginalia.service.id;
|
||||
|
||||
public enum ServiceId {
|
||||
|
||||
Assistant("assistant-service"),
|
||||
Api("api-service"),
|
||||
Search("search-service"),
|
||||
Index("index-service"),
|
||||
|
||||
Dating("dating-service"),
|
||||
Explorer("explorer-service"),
|
||||
|
||||
Other_Auth("auth"),
|
||||
Other_Memex("memex"),
|
||||
|
||||
|
||||
Other_ResourceStore("resource-store"),
|
||||
Other_Renderer("renderer"),
|
||||
Other_PodcastScraper("podcast-scraper");
|
||||
|
||||
public final String name;
|
||||
ServiceId(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
}
|
35
common/service/build.gradle
Normal file
35
common/service/build.gradle
Normal file
@ -0,0 +1,35 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
}
|
||||
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(17))
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation project(':common:service-client')
|
||||
implementation project(':common:service-discovery')
|
||||
implementation project(':libraries:misc')
|
||||
|
||||
implementation libs.lombok
|
||||
annotationProcessor libs.lombok
|
||||
|
||||
implementation libs.spark
|
||||
implementation libs.guice
|
||||
implementation libs.rxjava
|
||||
|
||||
implementation libs.bundles.prometheus
|
||||
implementation libs.bundles.slf4j
|
||||
implementation libs.bucket4j
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
implementation libs.bundles.mariadb
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
|
||||
}
|
@ -0,0 +1,62 @@
|
||||
package nu.marginalia.service;
|
||||
|
||||
import io.prometheus.client.hotspot.DefaultExports;
|
||||
import io.reactivex.rxjava3.exceptions.UndeliverableException;
|
||||
import io.reactivex.rxjava3.plugins.RxJavaPlugins;
|
||||
import nu.marginalia.service.id.ServiceId;
|
||||
import nu.marginalia.client.exception.NetworkException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.net.SocketTimeoutException;
|
||||
import java.net.UnknownHostException;
|
||||
|
||||
public abstract class MainClass {
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
public MainClass() {
|
||||
RxJavaPlugins.setErrorHandler(this::handleError);
|
||||
}
|
||||
|
||||
protected void handleError(Throwable ex) {
|
||||
if (ex instanceof UndeliverableException) {
|
||||
ex = ex.getCause();
|
||||
}
|
||||
|
||||
if (ex instanceof SocketTimeoutException) {
|
||||
logger.warn("SocketTimeoutException");
|
||||
}
|
||||
else if (ex instanceof UnknownHostException) {
|
||||
logger.warn("UnknownHostException");
|
||||
}
|
||||
else if (ex instanceof NetworkException) {
|
||||
logger.warn("NetworkException", ex);
|
||||
}
|
||||
else {
|
||||
logger.error("Uncaught exception", ex);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
protected static void init(ServiceId id, String... args) {
|
||||
|
||||
System.setProperty("log4j2.isThreadContextMapInheritable", "true");
|
||||
System.setProperty("isThreadContextMapInheritable", "true");
|
||||
System.setProperty("service-name", id.name);
|
||||
|
||||
initJdbc();
|
||||
initPrometheus();
|
||||
}
|
||||
|
||||
private static void initJdbc() {
|
||||
// This looks weird, but it's just for running the static block
|
||||
// in the driver class so that it registers itself
|
||||
|
||||
new org.mariadb.jdbc.Driver();
|
||||
}
|
||||
|
||||
private static void initPrometheus() {
|
||||
DefaultExports.initialize();
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,35 @@
|
||||
package nu.marginalia.service.module;
|
||||
|
||||
import com.google.inject.AbstractModule;
|
||||
import com.google.inject.Provides;
|
||||
import com.google.inject.name.Named;
|
||||
import com.google.inject.name.Names;
|
||||
import nu.marginalia.service.descriptor.ServiceDescriptors;
|
||||
import nu.marginalia.service.id.ServiceId;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
public class ConfigurationModule extends AbstractModule {
|
||||
private static final String SERVICE_NAME = System.getProperty("service-name");
|
||||
private final ServiceDescriptors descriptors;
|
||||
private final ServiceId id;
|
||||
|
||||
public ConfigurationModule(ServiceDescriptors descriptors, ServiceId id) {
|
||||
this.descriptors = descriptors;
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public void configure() {
|
||||
bind(ServiceDescriptors.class).toInstance(descriptors);
|
||||
bind(String.class).annotatedWith(Names.named("service-name")).toInstance(Objects.requireNonNull(SERVICE_NAME));
|
||||
bind(String.class).annotatedWith(Names.named("service-host")).toInstance(System.getProperty("service-host", "127.0.0.1"));
|
||||
bind(Integer.class).annotatedWith(Names.named("service-port")).toInstance(descriptors.forId(id).port);
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Named("metrics-server-port")
|
||||
public Integer provideMetricsServerPort(@Named("service-port") Integer servicePort) {
|
||||
return servicePort + 1000;
|
||||
}
|
||||
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.configuration.module;
|
||||
package nu.marginalia.service.module;
|
||||
|
||||
import com.google.inject.AbstractModule;
|
||||
import com.google.inject.Provides;
|
||||
@ -6,17 +6,15 @@ import com.google.inject.Singleton;
|
||||
import com.zaxxer.hikari.HikariConfig;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.wmsa.configuration.WmsaHome;
|
||||
import org.h2.tools.RunScript;
|
||||
import org.mariadb.jdbc.Driver;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Optional;
|
||||
import java.util.Properties;
|
||||
|
||||
public class DatabaseModule extends AbstractModule {
|
||||
@ -35,7 +33,7 @@ public class DatabaseModule extends AbstractModule {
|
||||
}
|
||||
|
||||
private Properties loadDbProperties() {
|
||||
Path propDir = WmsaHome.getHomePath().resolve("conf/db.properties");
|
||||
Path propDir = getHomePath().resolve("conf/db.properties");
|
||||
if (!Files.isRegularFile(propDir)) {
|
||||
throw new IllegalStateException("Database properties file " + propDir + " does not exist");
|
||||
}
|
||||
@ -56,22 +54,27 @@ public class DatabaseModule extends AbstractModule {
|
||||
|
||||
}
|
||||
|
||||
public static Path getHomePath() {
|
||||
var retStr = Optional.ofNullable(System.getenv("WMSA_HOME")).orElse("/var/lib/wmsa");
|
||||
|
||||
var ret = Path.of(retStr);
|
||||
if (!Files.isDirectory(ret)) {
|
||||
throw new IllegalStateException("Could not find WMSA_HOME, either set environment variable or ensure /var/lib/wmsa exists");
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
@Singleton
|
||||
@Provides
|
||||
public HikariDataSource provideConnection() {
|
||||
if (Boolean.getBoolean("data-store-h2")) {
|
||||
return getH2();
|
||||
}
|
||||
else {
|
||||
return getMariaDB();
|
||||
}
|
||||
|
||||
return getMariaDB();
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
private HikariDataSource getMariaDB() {
|
||||
var connStr = dbProperties.getProperty(DB_CONN_KEY);
|
||||
var connStr = System.getProperty("db.overrideJdbc", dbProperties.getProperty(DB_CONN_KEY));
|
||||
|
||||
try {
|
||||
HikariConfig config = new HikariConfig();
|
||||
@ -94,22 +97,4 @@ public class DatabaseModule extends AbstractModule {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private HikariDataSource getH2() {
|
||||
HikariConfig config = new HikariConfig();
|
||||
config.setJdbcUrl("jdbc:h2:~/wmsa-db");
|
||||
config.setUsername("wmsa");
|
||||
config.setPassword("");
|
||||
|
||||
var ds = new HikariDataSource(config);
|
||||
|
||||
try (var stream = ClassLoader.getSystemResourceAsStream("sql/data-store-init.sql")) {
|
||||
RunScript.execute(ds.getConnection(), new InputStreamReader(stream));
|
||||
}
|
||||
try (var stream = ClassLoader.getSystemResourceAsStream("sql/edge-crawler-cache.sql")) {
|
||||
RunScript.execute(ds.getConnection(), new InputStreamReader(stream));
|
||||
}
|
||||
return ds;
|
||||
}
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.configuration.module;
|
||||
package nu.marginalia.service.module;
|
||||
|
||||
import com.google.inject.name.Named;
|
||||
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.configuration.module;
|
||||
package nu.marginalia.service.module;
|
||||
|
||||
import com.google.inject.name.Named;
|
||||
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.configuration.server;
|
||||
package nu.marginalia.service.server;
|
||||
|
||||
import com.google.inject.Singleton;
|
||||
import lombok.SneakyThrows;
|
||||
@ -22,11 +22,6 @@ public class Initialization {
|
||||
initialized = true;
|
||||
notifyAll();
|
||||
}
|
||||
|
||||
if (Boolean.getBoolean("go-no-go")) {
|
||||
logger.info("Self-test OK");
|
||||
System.exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isReady() {
|
||||
@ -41,7 +36,7 @@ public class Initialization {
|
||||
while (!initialized) {
|
||||
wait();
|
||||
}
|
||||
return initialized;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.configuration.server;
|
||||
package nu.marginalia.service.server;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.name.Named;
|
@ -1,10 +1,10 @@
|
||||
package nu.marginalia.wmsa.configuration.server;
|
||||
package nu.marginalia.service.server;
|
||||
|
||||
import io.github.bucket4j.Bandwidth;
|
||||
import io.github.bucket4j.Bucket;
|
||||
import io.github.bucket4j.Bucket4j;
|
||||
import io.github.bucket4j.Refill;
|
||||
import io.reactivex.rxjava3.schedulers.Schedulers;
|
||||
import nu.marginalia.client.Context;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Map;
|
||||
@ -49,13 +49,11 @@ public class RateLimiter {
|
||||
}
|
||||
|
||||
public boolean isAllowed(Context ctx) {
|
||||
final Optional<String> maybeIp = ctx.getIpHash();
|
||||
|
||||
if (maybeIp.isEmpty()) { // Internal server->server request
|
||||
if (!ctx.isPublic()) { // Internal server->server request
|
||||
return true;
|
||||
}
|
||||
|
||||
return bucketMap.computeIfAbsent(maybeIp.get(),
|
||||
return bucketMap.computeIfAbsent(ctx.getContextId(),
|
||||
(ip) -> createBucket()).tryConsume(1);
|
||||
}
|
||||
|
@ -1,10 +1,9 @@
|
||||
package nu.marginalia.wmsa.configuration.server;
|
||||
package nu.marginalia.service.server;
|
||||
|
||||
import com.google.common.base.Strings;
|
||||
import io.prometheus.client.Counter;
|
||||
import nu.marginalia.wmsa.client.exception.MessagingException;
|
||||
import org.apache.http.HttpStatus;
|
||||
import org.apache.logging.log4j.ThreadContext;
|
||||
import nu.marginalia.client.Context;
|
||||
import nu.marginalia.client.exception.MessagingException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.slf4j.Marker;
|
||||
@ -17,6 +16,8 @@ import java.util.Optional;
|
||||
|
||||
public class Service {
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
// Marker for filtering out sensitive content from the persistent logs
|
||||
private final Marker httpMarker = MarkerFactory.getMarker("HTTP");
|
||||
|
||||
private final Initialization initialization;
|
||||
@ -53,8 +54,8 @@ public class Service {
|
||||
|
||||
configureStaticFiles.run();
|
||||
|
||||
Spark.before(this::filterPublicRequests);
|
||||
Spark.before(this::auditRequestIn);
|
||||
Spark.before(this::filterPublicRequests);
|
||||
Spark.after(this::auditRequestOut);
|
||||
Spark.exception(MessagingException.class, this::handleException);
|
||||
|
||||
@ -86,14 +87,14 @@ public class Service {
|
||||
|
||||
if (!request.pathInfo().startsWith("/public/")) {
|
||||
logger.warn(httpMarker, "External connection to internal API: {} -> {} {}", context, request.requestMethod(), request.pathInfo());
|
||||
Spark.halt(HttpStatus.SC_FORBIDDEN);
|
||||
Spark.halt(403);
|
||||
}
|
||||
|
||||
String url = request.pathInfo();
|
||||
if (request.queryString() != null) {
|
||||
url = url + "?" + request.queryString();
|
||||
}
|
||||
logger.info(httpMarker, "PUBLIC {}: {} {}", Context.fromRequest(request).getIpHash().orElse("?"), request.requestMethod(), url);
|
||||
logger.info(httpMarker, "PUBLIC {}: {} {}", Context.fromRequest(request).getContextId(), request.requestMethod(), url);
|
||||
}
|
||||
|
||||
private Object isInitialized(Request request, Response response) {
|
||||
@ -101,7 +102,7 @@ public class Service {
|
||||
return "ok";
|
||||
}
|
||||
else {
|
||||
response.status(HttpStatus.SC_FAILED_DEPENDENCY);
|
||||
response.status(424);
|
||||
return "bad";
|
||||
}
|
||||
}
|
||||
@ -115,21 +116,21 @@ public class Service {
|
||||
return "ok";
|
||||
}
|
||||
else {
|
||||
response.status(HttpStatus.SC_FAILED_DEPENDENCY);
|
||||
response.status(424);
|
||||
return "bad";
|
||||
}
|
||||
}
|
||||
|
||||
private void auditRequestIn(Request request, Response response) {
|
||||
request_counter.labels(serviceName).inc();
|
||||
|
||||
// Paint context
|
||||
if (!Strings.isNullOrEmpty(request.headers(Context.CONTEXT_HEADER))) {
|
||||
Context.fromRequest(request);
|
||||
}
|
||||
paintThreadName(request, "req:");
|
||||
|
||||
request_counter.labels(serviceName).inc();
|
||||
}
|
||||
|
||||
private void auditRequestOut(Request request, Response response) {
|
||||
ThreadContext.clearMap();
|
||||
|
||||
paintThreadName(request, "rsp:");
|
||||
|
||||
if (response.status() < 400) {
|
||||
request_counter_good.labels(serviceName).inc();
|
||||
@ -143,6 +144,11 @@ public class Service {
|
||||
}
|
||||
}
|
||||
|
||||
private void paintThreadName(Request request, String prefix) {
|
||||
var ctx = Context.fromRequest(request);
|
||||
Thread.currentThread().setName(prefix + ctx.getContextId());
|
||||
}
|
||||
|
||||
private void handleException(Exception ex, Request request, Response response) {
|
||||
request_counter_err.labels(serviceName).inc();
|
||||
if (ex instanceof MessagingException) {
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.resource_store;
|
||||
package nu.marginalia.service.server;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import spark.Request;
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user