(*) install script for deploying Marginalia outside the codebase

The changeset also makes the control service responsible for flyway migrations.  This helps reduce the number of places the database configuration needs to be spread out.  These automatic migrations can be disabled with -DdisableFlyway=true.

The commit also adds curl to the docker container, to enable docker health checks and interdependencies.
This commit is contained in:
Viktor Lofgren 2024-01-11 12:40:03 +01:00
parent 205e5016e8
commit 734996002c
29 changed files with 379 additions and 26 deletions

View File

@ -67,7 +67,6 @@ idea {
excludeDirs.add(file("$projectDir/run/dist"))
excludeDirs.add(file("$projectDir/run/db"))
excludeDirs.add(file("$projectDir/run/logs"))
excludeDirs.add(file("$projectDir/run/install"))
excludeDirs.add(file("$projectDir/run/data"))
excludeDirs.add(file("$projectDir/run/conf"))
excludeDirs.add(file("$projectDir/run/test-data"))

View File

@ -20,7 +20,7 @@ public class NodeConfigurationService {
public NodeConfiguration create(int id, String description, boolean acceptQueries) throws SQLException {
try (var conn = dataSource.getConnection();
var is = conn.prepareStatement("""
INSERT INTO NODE_CONFIGURATION(ID, DESCRIPTION, ACCEPT_QUERIES) VALUES(?, ?, ?)
INSERT IGNORE INTO NODE_CONFIGURATION(ID, DESCRIPTION, ACCEPT_QUERIES) VALUES(?, ?, ?)
""")
)
{

View File

@ -23,6 +23,7 @@ dependencies {
implementation libs.bundles.prometheus
implementation libs.bundles.slf4j
implementation libs.bucket4j
implementation libs.bundles.flyway
testImplementation libs.bundles.slf4j.test
implementation libs.bundles.mariadb

View File

@ -7,6 +7,7 @@ import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource;
import lombok.SneakyThrows;
import nu.marginalia.service.ServiceHomeNotConfiguredException;
import org.flywaydb.core.Flyway;
import org.mariadb.jdbc.Driver;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -28,10 +29,25 @@ public class DatabaseModule extends AbstractModule {
private final Properties dbProperties;
public DatabaseModule() {
public DatabaseModule(boolean migrate) {
new Driver();
dbProperties = loadDbProperties();
if (migrate) {
if (Boolean.getBoolean("disableFlyway")) {
logger.info("Flyway disabled");
}
else {
var config = Flyway.configure()
.dataSource(getMariaDB())
.locations("classpath:db/migration")
.load();
new Flyway(config.getConfiguration()).migrate();
}
}
}
private Properties loadDbProperties() {

View File

@ -54,7 +54,7 @@ public class ConverterMain {
Injector injector = Guice.createInjector(
new ConverterModule(),
new ProcessConfigurationModule("converter"),
new DatabaseModule()
new DatabaseModule(false)
);
var converter = injector.getInstance(ConverterMain.class);

View File

@ -124,7 +124,7 @@ public class CrawlerMain {
Injector injector = Guice.createInjector(
new CrawlerModule(),
new ProcessConfigurationModule("crawler"),
new DatabaseModule()
new DatabaseModule(false)
);
var crawler = injector.getInstance(CrawlerMain.class);

View File

@ -56,7 +56,7 @@ public class IndexConstructorMain {
var main = Guice.createInjector(
new IndexConstructorModule(),
new ProcessConfigurationModule("index-constructor"),
new DatabaseModule())
new DatabaseModule(false))
.getInstance(IndexConstructorMain.class);
instructions = main.fetchInstructions();

View File

@ -59,7 +59,7 @@ public class LoaderMain {
Injector injector = Guice.createInjector(
new ProcessConfigurationModule("loader"),
new LoaderModule(),
new DatabaseModule()
new DatabaseModule(false)
);
var instance = injector.getInstance(LoaderMain.class);

View File

@ -186,7 +186,7 @@ public class WebsiteAdjacenciesCalculator {
public static void main(String[] args) throws SQLException {
DatabaseModule dm = new DatabaseModule();
DatabaseModule dm = new DatabaseModule(false);
var dataSource = dm.provideConnection();
var qc = new QueryClient();

View File

@ -20,7 +20,7 @@ public class ApiMain extends MainClass {
init(ServiceId.Api, args);
Injector injector = Guice.createInjector(
new DatabaseModule(),
new DatabaseModule(false),
new ServiceConfigurationModule(SearchServiceDescriptors.descriptors, ServiceId.Api));
injector.getInstance(ApiMain.class);
injector.getInstance(Initialization.class).setReady();

View File

@ -27,7 +27,7 @@ public class DatingMain extends MainClass {
Injector injector = Guice.createInjector(
new DatingModule(),
new ServiceConfigurationModule(SearchServiceDescriptors.descriptors, ServiceId.Dating),
new DatabaseModule()
new DatabaseModule(false)
);
injector.getInstance(DatingMain.class);

View File

@ -27,7 +27,7 @@ public class ExplorerMain extends MainClass {
Injector injector = Guice.createInjector(
new ServiceConfigurationModule(SearchServiceDescriptors.descriptors, ServiceId.Explorer),
new ExplorerModule(),
new DatabaseModule()
new DatabaseModule(false)
);
injector.getInstance(ExplorerMain.class);

View File

@ -28,7 +28,7 @@ public class SearchMain extends MainClass {
Injector injector = Guice.createInjector(
new SearchModule(),
new ServiceConfigurationModule(SearchServiceDescriptors.descriptors, ServiceId.Search),
new DatabaseModule()
new DatabaseModule(false)
);
injector.getInstance(SearchMain.class);

View File

@ -24,7 +24,7 @@ public class AssistantMain extends MainClass {
Injector injector = Guice.createInjector(
new AssistantModule(),
new ServiceConfigurationModule(SearchServiceDescriptors.descriptors, ServiceId.Assistant),
new DatabaseModule()
new DatabaseModule(false)
);
injector.getInstance(AssistantMain.class);

View File

@ -20,7 +20,7 @@ public class ControlMain extends MainClass {
init(ServiceId.Control, args);
Injector injector = Guice.createInjector(
new DatabaseModule(),
new DatabaseModule(true),
new ControlProcessModule(),
new ServiceConfigurationModule(SearchServiceDescriptors.descriptors, ServiceId.Control));

View File

@ -24,7 +24,7 @@ public class ExecutorMain extends MainClass {
Injector injector = Guice.createInjector(
new ExecutorModule(),
new DatabaseModule(),
new DatabaseModule(false),
new ServiceConfigurationModule(SearchServiceDescriptors.descriptors, ServiceId.Executor)
);
injector.getInstance(NodeStatusWatcher.class);

View File

@ -24,7 +24,7 @@ public class IndexMain extends MainClass {
Injector injector = Guice.createInjector(
new IndexModule(),
new DatabaseModule(),
new DatabaseModule(false),
new ServiceConfigurationModule(SearchServiceDescriptors.descriptors, ServiceId.Index)
);

View File

@ -23,7 +23,7 @@ public class QueryMain extends MainClass {
Injector injector = Guice.createInjector(
new QueryModule(),
new DatabaseModule(),
new DatabaseModule(false),
new ServiceConfigurationModule(SearchServiceDescriptors.descriptors, ServiceId.Query)
);

View File

@ -37,7 +37,7 @@ public class ExperimentRunnerMain {
}
Injector injector = Guice.createInjector(
new DatabaseModule(),
new DatabaseModule(false),
new ConverterModule()
);

View File

@ -29,7 +29,7 @@ public class ScreenshotCaptureToolMain {
private static final Logger logger = LoggerFactory.getLogger(ScreenshotCaptureToolMain.class);
public static void main(String[] args) {
DatabaseModule databaseModule = new DatabaseModule();
DatabaseModule databaseModule = new DatabaseModule(false);
var ds = databaseModule.provideConnection();
System.setProperty(ChromeDriverService.CHROME_DRIVER_SILENT_OUTPUT_PROPERTY, "true");

View File

@ -15,7 +15,7 @@ public class ScreenshotLoaderMain {
public static void main(String... args) throws IOException {
org.mariadb.jdbc.Driver driver = new Driver();
var ds = new DatabaseModule().provideConnection();
var ds = new DatabaseModule(false).provideConnection();
try (var tis = new TarArchiveInputStream(new GZIPInputStream(new FileInputStream(args[0])));
var conn = ds.getConnection();

View File

@ -14,6 +14,8 @@ tasks.register('dockerFile') {
#
FROM ${dockerImage}
RUN apt-get update && apt-get install -y curl
ADD ${application.applicationName}.tar /
ADD crawler-process.tar /dist
ADD loader-process.tar /dist

View File

@ -14,6 +14,7 @@ tasks.register('dockerFile') {
#
FROM ${dockerImage}
RUN apt-get update && apt-get install -y curl
ADD ${application.applicationName}.tar /
RUN mkdir /wmsa

98
run/install.sh Executable file
View File

@ -0,0 +1,98 @@
#!/bin/bash
#
# This script will set up a Marginalia instance in a given directory.
# It will create a docker-compose.yml file, and a directory structure
# with the necessary files. It will also create a MariaDB database
# in docker, and run the flyway migrations to set up the database.
#
# After the script is run, the instance can be started with
# $ docker-compose up -d
#
# The instance can be stopped with
# $ docker-compose down -v
#
# It is likely that you will want to edit the docker-compose.yml file
# to change the ports that the services are exposed on, and to change
# the volumes that are mounted. The default configuration is provided
# a starting point.
set -e
if ! command -v envsubst &> /dev/null
then
echo "The envsubst command could not be found, please install it. It is usually part of GNU gettext."
exit
fi
if [ -z "${1}" ]; then
echo "Usage: $0 <install directory>"
exit 1
fi
if [ -e "${1}" ]; then
echo "ERROR: Destination ${1} already exists, refusing to overwrite"
exit 1
fi
INSTALL_DIR=${1}
echo "We're going to set up a Mariadb database in docker, please enter some details"
read -p "MariaDB user (e.g. marginalia): " MARIADB_USER
read -s -p "MariaDB password (e.g. hunter2 ;-): " MARIADB_PASSWORD
echo
read -s -p "MariaDB password (again): " MARIADB_PASSWORD2
echo
export MARIADB_USER
export MARIADB_PASSWORD
if [ "${MARIADB_PASSWORD}" != "${MARIADB_PASSWORD2}" ]; then
echo "ERROR: Passwords do not match"
exit 1
fi
echo "Will install to ${INSTALL_DIR}"
read -p "Press enter to continue, or Ctrl-C to abort"
pushd $(dirname $0)
./setup.sh ## Ensure that the setup script has been run
mkdir -p ${INSTALL_DIR}
echo "** Copying files to ${INSTALL_DIR}"
for dir in model data conf env; do
if [ ! -d ${dir} ]; then
echo "ERROR: ${dir} does not exist"
exit 1
fi
echo "Copying ${dir}/"
mkdir -p ${INSTALL_DIR}/${dir}
find ${dir} -maxdepth 1 -type f -exec cp -v {} ${INSTALL_DIR}/{} \;
done
echo "** Copying settings files"
cp prometheus.yml ${INSTALL_DIR}/
echo "** Creating directories"
mkdir -p ${INSTALL_DIR}/logs
mkdir -p ${INSTALL_DIR}/db
mkdir -p ${INSTALL_DIR}/index-1/{work,index,backup,storage,uploads}
echo "** Updating settings files"
envsubst < install/mariadb.env.template > ${INSTALL_DIR}/env/mariadb.env
envsubst < install/db.properties.template > ${INSTALL_DIR}/conf/db.properties
echo "** Creating docker-compose.yml"
## Hack to get around envstubst substituting these values, which we want to be verbatim
export uval="\$\$MARIADB_USER"
export pval="\$\$MARIADB_PASSWORD"
export INSTALL_DIR
envsubst < install/docker-compose.yml.template >${INSTALL_DIR}/docker-compose.yml
popd

View File

@ -0,0 +1,3 @@
db.user=${MARIADB_USER}
db.pass=${MARIADB_PASSWORD}
db.conn=jdbc:mariadb://mariadb:3306/WMSA_prod?rewriteBatchedStatements=true

View File

@ -0,0 +1,226 @@
x-svc: &service
env_file:
- "${INSTALL_DIR}/env/service.env"
volumes:
- conf:/wmsa/conf:ro
- model:/wmsa/model
- data:/wmsa/data
- logs:/var/log/wmsa
networks:
- wmsa
healthcheck:
test: curl -f http://localhost:80/internal/ping || exit 1
start_period: 1s
interval: 5s
timeout: 5s
retries: 60
x-p1: &partition-1
env_file:
- "${INSTALL_DIR}/env/service.env"
healthcheck:
test: curl -f http://localhost:80/internal/ping || exit 1
start_period: 1s
interval: 5s
timeout: 5s
retries: 60
volumes:
- conf:/wmsa/conf:ro
- model:/wmsa/model
- data:/wmsa/data
- logs:/var/log/wmsa
- index-1:/idx
- work-1:/work
- backup-1:/backup
- samples-1:/storage
networks:
- wmsa
environment:
- "WMSA_SERVICE_NODE=1"
services:
index-service-1:
<<: *partition-1
image: "marginalia/index-service"
container_name: "index-service-1"
depends_on:
control-service:
condition: service_healthy
executor-service-1:
<<: *partition-1
image: "marginalia/executor-service"
container_name: "executor-service-1"
depends_on:
control-service:
condition: service_healthy
query-service:
<<: *service
image: "marginalia/query-service"
container_name: "query-service"
depends_on:
control-service:
condition: service_healthy
search-service:
<<: *service
image: "marginalia/search-service"
container_name: "search-service"
depends_on:
control-service:
condition: service_healthy
expose:
- 80
labels:
- "traefik.enable=true"
- "traefik.http.routers.search-service.rule=PathPrefix(`/`)"
- "traefik.http.routers.search-service.entrypoints=search"
- "traefik.http.routers.search-service.middlewares=add-xpublic"
- "traefik.http.routers.search-service.middlewares=add-public"
- "traefik.http.middlewares.add-xpublic.headers.customrequestheaders.X-Public=1"
- "traefik.http.middlewares.add-public.addprefix.prefix=/public"
assistant-service:
<<: *service
image: "marginalia/assistant-service"
container_name: "assistant-service"
depends_on:
control-service:
condition: service_healthy
expose:
- 80
labels:
- "traefik.enable=true"
- "traefik.http.routers.assistant-service-screenshot.rule=PathPrefix(`/screenshot`)"
- "traefik.http.routers.assistant-service-screenshot.entrypoints=search"
- "traefik.http.routers.assistant-service-screenshot.middlewares=add-xpublic"
- "traefik.http.routers.assistant-service-screenshot.middlewares=add-public"
- "traefik.http.routers.assistant-service-suggest.rule=PathPrefix(`/suggest`)"
- "traefik.http.routers.assistant-service-suggest.entrypoints=search"
- "traefik.http.routers.assistant-service-suggest.middlewares=add-xpublic"
- "traefik.http.routers.assistant-service-suggest.middlewares=add-public"
- "traefik.http.middlewares.add-xpublic.headers.customrequestheaders.X-Public=1"
- "traefik.http.middlewares.add-public.addprefix.prefix=/public"
api-service:
<<: *service
image: "marginalia/api-service"
container_name: "api-service"
depends_on:
control-service:
condition: service_healthy
expose:
- "80"
labels:
- "traefik.enable=true"
- "traefik.http.routers.api-service.rule=PathPrefix(`/`)"
- "traefik.http.routers.api-service.entrypoints=api"
- "traefik.http.routers.api-service.middlewares=add-xpublic"
- "traefik.http.routers.api-service.middlewares=add-public"
- "traefik.http.middlewares.add-xpublic.headers.customrequestheaders.X-Public=1"
- "traefik.http.middlewares.add-public.addprefix.prefix=/public"
control-service:
<<: *service
image: "marginalia/control-service"
container_name: "control-service"
depends_on:
mariadb:
condition: service_healthy
expose:
- 80
labels:
- "traefik.enable=true"
- "traefik.http.routers.control-service.rule=PathPrefix(`/`)"
- "traefik.http.routers.control-service.entrypoints=control"
- "traefik.http.routers.control-service.middlewares=add-xpublic"
- "traefik.http.routers.control-service.middlewares=add-public"
- "traefik.http.middlewares.add-xpublic.headers.customrequestheaders.X-Public=1"
- "traefik.http.middlewares.add-public.addprefix.prefix=/public"
mariadb:
image: "mariadb:lts"
container_name: "mariadb"
env_file: "${INSTALL_DIR}/env/mariadb.env"
command: ['mysqld', '--character-set-server=utf8mb4', '--collation-server=utf8mb4_unicode_ci']
ports:
- "127.0.0.1:3306:3306/tcp"
healthcheck:
test: mysqladmin ping -h 127.0.0.1 -u ${uval} --password=${pval}
start_period: 5s
interval: 5s
timeout: 5s
retries: 60
volumes:
- db:/var/lib/mysql
networks:
- wmsa
traefik:
image: "traefik:v2.10"
container_name: "traefik"
command:
#- "--log.level=DEBUG"
- "--api.insecure=true"
- "--providers.docker=true"
- "--providers.docker.exposedbydefault=false"
- "--entrypoints.search.address=:80"
- "--entrypoints.control.address=:81"
- "--entrypoints.api.address=:82"
ports:
- "127.0.0.1:8080:80"
- "127.0.0.1:8081:81"
- "127.0.0.1:8082:82"
- "127.0.0.1:8090:8080"
volumes:
- "/var/run/docker.sock:/var/run/docker.sock:ro"
networks:
- wmsa
networks:
wmsa:
volumes:
db:
driver: local
driver_opts:
type: none
o: bind
device: ${INSTALL_DIR}/db
logs:
driver: local
driver_opts:
type: none
o: bind
device: ${INSTALL_DIR}/logs
model:
driver: local
driver_opts:
type: none
o: bind
device: ${INSTALL_DIR}/model
conf:
driver: local
driver_opts:
type: none
o: bind
device: ${INSTALL_DIR}/conf
data:
driver: local
driver_opts:
type: none
o: bind
device: ${INSTALL_DIR}/data
samples-1:
driver: local
driver_opts:
type: none
o: bind
device: ${INSTALL_DIR}/index-1/storage
index-1:
driver: local
driver_opts:
type: none
o: bind
device: ${INSTALL_DIR}/index-1/index
work-1:
driver: local
driver_opts:
type: none
o: bind
device: ${INSTALL_DIR}/index-1/work
backup-1:
driver: local
driver_opts:
type: none
o: bind
device: ${INSTALL_DIR}/index-1/backup

View File

@ -0,0 +1,4 @@
MARIADB_RANDOM_ROOT_PASSWORD=1
MARIADB_DATABASE=WMSA_prod
MARIADB_USER=${MARIADB_USER}
MARIADB_PASSWORD=${MARIADB_PASSWORD}

View File

@ -6,13 +6,12 @@ bootstrap this directory structure.
## Requirements
While the system is designed to run bare metal in production,
for local development, you're strongly encouraged to use docker
or podman. These are a bit of a pain to install, but if you follow
[this guide](https://docs.docker.com/engine/install/ubuntu/#install-using-the-repository) you're on the right track.
**Docker** - It is a bit of a pain to install, but if you follow
[this guide](https://docs.docker.com/engine/install/ubuntu/#install-using-the-repository) you're on the right track for ubuntu-like systems.
The system requires JDK21+, and uses Java 21 preview features. Gradle complains
a bit about this since it's not currently supported, but it works anyway.
**JDK 21** - The code uses Java 21 preview features.
The civilized way of installing this is to use [SDKMAN](https://sdkman.io/);
graalce is a good distribution choice but it doesn't matter too much.
## Set up

View File

@ -184,6 +184,9 @@ dependencyResolutionManagement {
library('zstd','com.github.luben','zstd-jni').version('1.5.2-2')
library('lz4','org.lz4','lz4-java').version('1.8.0')
library('flyway.core','org.flywaydb','flyway-core').version('10.4.1')
library('flyway.mysql','org.flywaydb','flyway-mysql').version('10.4.1')
library('jsoup','org.jsoup','jsoup').version('1.15.3')
library('snakeyaml','org.yaml','snakeyaml').version('1.33') // Known CVE, but this library only parses internal config files so it's fine
@ -214,6 +217,7 @@ dependencyResolutionManagement {
bundle('httpcomponents', ['httpcomponents.core', 'httpcomponents.client'])
bundle('parquet', ['parquet-column', 'parquet-hadoop'])
bundle('junit', ['junit.jupiter', 'junit.jupiter.engine'])
bundle('flyway', ['flyway.core', 'flyway.mysql'])
}