Move database to a separate module

* Move database to a separate project, break apart sql file into separate entities.
* Fix front page news listing.
This commit is contained in:
Viktor 2023-03-25 15:26:17 +01:00 committed by GitHub
parent 0b505939ed
commit ac1ac3ea57
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
57 changed files with 326 additions and 215 deletions

View File

@ -0,0 +1,50 @@
plugins {
id 'java'
id "io.freefair.lombok" version "5.3.3.3"
id 'jvm-test-suite'
}
java {
toolchain {
languageVersion.set(JavaLanguageVersion.of(17))
}
}
dependencies {
implementation project(':code:common:model')
implementation libs.lombok
annotationProcessor libs.lombok
implementation libs.bundles.slf4j
implementation libs.guice
implementation libs.bundles.gson
implementation libs.notnull
implementation libs.commons.lang3
implementation libs.trove
implementation libs.rxjava
implementation libs.bundles.mariadb
testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit
testImplementation libs.mockito
}
test {
maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
maxHeapSize = "8G"
useJUnitPlatform()
}
task fastTests(type: Test) {
maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
maxHeapSize = "8G"
useJUnitPlatform {
excludeTags "slow"
}
}

View File

@ -1,4 +1,4 @@
package nu.marginalia.model.dbcommon;
package nu.marginalia.db;
import com.google.common.cache.Cache;

View File

@ -1,4 +1,4 @@
package nu.marginalia.model.dbcommon;
package nu.marginalia.db;
import com.google.inject.ImplementedBy;
import gnu.trove.set.hash.TIntHashSet;

View File

@ -1,4 +1,4 @@
package nu.marginalia.model.dbcommon;
package nu.marginalia.db;
import com.google.inject.Inject;
import com.google.inject.Singleton;

View File

@ -1,19 +1,3 @@
DROP TABLE IF EXISTS DOMAIN_METADATA;
DROP TABLE IF EXISTS EC_FEED_URL;
DROP TABLE IF EXISTS EC_DOMAIN_LINK;
DROP TABLE IF EXISTS EC_PAGE_DATA;
DROP TABLE IF EXISTS EC_URL;
DROP TABLE IF EXISTS EC_DOMAIN_NEIGHBORS;
DROP TABLE IF EXISTS EC_DOMAIN;
CREATE TABLE IF NOT EXISTS DOMAIN_METADATA (
ID INT PRIMARY KEY,
KNOWN_URLS INT DEFAULT 0,
VISITED_URLS INT DEFAULT 0,
GOOD_URLS INT DEFAULT 0
);
CREATE TABLE IF NOT EXISTS EC_DOMAIN (
ID INT PRIMARY KEY AUTO_INCREMENT,
@ -36,12 +20,6 @@ CREATE TABLE IF NOT EXISTS EC_DOMAIN (
CHARACTER SET utf8mb4
COLLATE utf8mb4_unicode_ci;
CREATE TABLE IF NOT EXISTS EC_DOMAIN_BLACKLIST (
ID INT PRIMARY KEY AUTO_INCREMENT,
URL_DOMAIN VARCHAR(255) UNIQUE NOT NULL
)
CHARACTER SET utf8mb4
COLLATE utf8mb4_unicode_ci;
CREATE TABLE IF NOT EXISTS EC_URL (
ID INT PRIMARY KEY AUTO_INCREMENT,
@ -84,37 +62,6 @@ CREATE TABLE IF NOT EXISTS EC_PAGE_DATA (
CHARACTER SET utf8mb4
COLLATE utf8mb4_unicode_ci;
CREATE TABLE EC_FEED_URL (
URL VARCHAR(255) PRIMARY KEY,
DOMAIN_ID INT,
FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
)
CHARACTER SET utf8mb4
COLLATE utf8mb4_unicode_ci;
CREATE TABLE EC_DOMAIN_NEIGHBORS (
ID INT PRIMARY KEY AUTO_INCREMENT,
DOMAIN_ID INT NOT NULL,
NEIGHBOR_ID INT NOT NULL,
ADJ_IDX INT NOT NULL,
CONSTRAINT CONS UNIQUE (DOMAIN_ID, ADJ_IDX),
FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
)
CHARACTER SET utf8mb4
COLLATE utf8mb4_unicode_ci;
CREATE TABLE EC_DOMAIN_NEIGHBORS_2 (
DOMAIN_ID INT NOT NULL,
NEIGHBOR_ID INT NOT NULL,
RELATEDNESS DOUBLE NOT NULL,
PRIMARY KEY (DOMAIN_ID, NEIGHBOR_ID),
FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE,
FOREIGN KEY (NEIGHBOR_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS EC_DOMAIN_LINK (
ID INT PRIMARY KEY AUTO_INCREMENT,
SOURCE_DOMAIN_ID INT NOT NULL,
@ -126,6 +73,24 @@ CREATE TABLE IF NOT EXISTS EC_DOMAIN_LINK (
FOREIGN KEY (DEST_DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS DOMAIN_METADATA (
ID INT PRIMARY KEY,
KNOWN_URLS INT DEFAULT 0,
VISITED_URLS INT DEFAULT 0,
GOOD_URLS INT DEFAULT 0,
FOREIGN KEY (ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
);
CREATE TABLE EC_FEED_URL (
URL VARCHAR(255) PRIMARY KEY,
DOMAIN_ID INT,
FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
)
CHARACTER SET utf8mb4
COLLATE utf8mb4_unicode_ci;
CREATE OR REPLACE VIEW EC_URL_VIEW AS
SELECT
CONCAT(EC_URL.PROTO,
@ -141,9 +106,7 @@ CREATE OR REPLACE VIEW EC_URL_VIEW AS
EC_DOMAIN.DOMAIN_TOP AS DOMAIN_TOP,
EC_URL.ID AS ID,
EC_DOMAIN.ID AS DOMAIN_ID,
EC_URL.VISITED AS VISITED,
EC_PAGE_DATA.QUALITY AS QUALITY,
EC_PAGE_DATA.DATA_HASH AS DATA_HASH,
EC_PAGE_DATA.TITLE AS TITLE,
@ -151,7 +114,6 @@ CREATE OR REPLACE VIEW EC_URL_VIEW AS
EC_PAGE_DATA.WORDS_TOTAL AS WORDS_TOTAL,
EC_PAGE_DATA.FORMAT AS FORMAT,
EC_PAGE_DATA.FEATURES AS FEATURES,
EC_DOMAIN.IP AS IP,
EC_URL.STATE AS STATE,
EC_DOMAIN.RANK AS RANK,
@ -162,17 +124,6 @@ CREATE OR REPLACE VIEW EC_URL_VIEW AS
INNER JOIN EC_DOMAIN
ON EC_URL.DOMAIN_ID = EC_DOMAIN.ID;
CREATE OR REPLACE VIEW EC_NEIGHBORS_VIEW AS
SELECT
DOM.DOMAIN_NAME AS DOMAIN_NAME,
DOM.ID AS DOMAIN_ID,
NEIGHBOR.DOMAIN_NAME AS NEIGHBOR_NAME,
NEIGHBOR.ID AS NEIGHBOR_ID,
ROUND(100 * RELATEDNESS) AS RELATEDNESS
FROM EC_DOMAIN_NEIGHBORS_2
INNER JOIN EC_DOMAIN DOM ON DOMAIN_ID=DOM.ID
INNER JOIN EC_DOMAIN NEIGHBOR ON NEIGHBOR_ID=NEIGHBOR.ID;
CREATE OR REPLACE VIEW EC_RELATED_LINKS_VIEW AS
SELECT
@ -189,117 +140,5 @@ CREATE OR REPLACE VIEW EC_RELATED_LINKS_VIEW AS
ON DEST_DOMAIN.ID=DEST_DOMAIN_ID
;
CREATE OR REPLACE VIEW EC_RELATED_LINKS_IN AS
SELECT
IN_URL.ID AS SRC_URL_ID,
OUT_URL.ID AS DEST_URL_ID
FROM EC_DOMAIN_LINK
INNER JOIN EC_URL AS IN_URL ON IN_URL.DOMAIN_ID=EC_DOMAIN_LINK.SOURCE_DOMAIN_ID
INNER JOIN EC_URL AS OUT_URL ON OUT_URL.DOMAIN_ID=EC_DOMAIN_LINK.DEST_DOMAIN_ID
WHERE IN_URL.VISITED AND IN_URL.STATE = 'ok'
AND OUT_URL.VISITED AND OUT_URL.STATE = 'ok';
CREATE TABLE IF NOT EXISTS EC_API_KEY (
LICENSE_KEY VARCHAR(255) UNIQUE,
LICENSE VARCHAR(255) NOT NULL,
NAME VARCHAR(255) NOT NULL,
EMAIL VARCHAR(255) NOT NULL,
RATE INT DEFAULT 10
);
CREATE INDEX IF NOT EXISTS EC_DOMAIN_INDEXED_INDEX ON EC_DOMAIN (INDEXED);
CREATE INDEX IF NOT EXISTS EC_DOMAIN_TOP_DOMAIN ON EC_DOMAIN (DOMAIN_TOP);
---;
CREATE TABLE IF NOT EXISTS EC_RANDOM_DOMAINS (
DOMAIN_ID INT PRIMARY KEY,
DOMAIN_SET INT NOT NULL
);
---;
DROP TABLE IF EXISTS REF_DICTIONARY;
CREATE TABLE IF NOT EXISTS REF_DICTIONARY (
TYPE VARCHAR(16),
WORD VARCHAR(255),
DEFINITION VARCHAR(255)
)
CHARACTER SET utf8mb4
COLLATE utf8mb4_unicode_ci;
---;
CREATE INDEX IF NOT EXISTS REF_DICTIONARY_WORD ON REF_DICTIONARY (WORD);
CREATE TABLE IF NOT EXISTS REF_WIKI_ARTICLE (
NAME VARCHAR(255) PRIMARY KEY,
REF_NAME VARCHAR(255) COMMENT "If this is a redirect, it redirects to this REF_WIKI_ARTICLE.NAME",
ENTRY LONGBLOB
)
ROW_FORMAT=DYNAMIC
CHARACTER SET utf8mb4
COLLATE utf8mb4_unicode_ci;
---;
CREATE TABLE IF NOT EXISTS DATA_DOMAIN_SCREENSHOT (
DOMAIN_NAME VARCHAR(255) PRIMARY KEY,
CONTENT_TYPE ENUM ('image/png', 'image/webp', 'image/svg+xml') NOT NULL,
DATA LONGBLOB NOT NULL
)
ROW_FORMAT=DYNAMIC
CHARACTER SET utf8mb4
COLLATE utf8mb4_unicode_ci;
CREATE TABLE DATA_DOMAIN_HISTORY (
DOMAIN_NAME VARCHAR(255) PRIMARY KEY,
SCREENSHOT_DATE DATE DEFAULT NOW()
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
CREATE TABLE CRAWL_QUEUE(
DOMAIN_NAME VARCHAR(255) UNIQUE,
SOURCE VARCHAR(255)
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
CREATE TABLE DOMAIN_COMPLAINT(
ID INT PRIMARY KEY AUTO_INCREMENT,
DOMAIN_ID INT NOT NULL,
CATEGORY VARCHAR(255) NOT NULL,
DESCRIPTION TEXT,
SAMPLE VARCHAR(255),
FILE_DATE TIMESTAMP NOT NULL DEFAULT NOW(),
REVIEWED BOOLEAN AS (REVIEW_DATE > 0) VIRTUAL,
DECISION VARCHAR(255),
REVIEW_DATE TIMESTAMP,
FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
);
---
CREATE TABLE WMSA_PROCESS(
ID BIGINT PRIMARY KEY,
NAME VARCHAR(255) NOT NULL,
TYPE ENUM('SERVICE', 'TASK') NOT NULL,
START DATETIME NOT NULL DEFAULT NOW(),
UPDATED DATETIME,
FINISHED DATETIME,
PROGRESS DOUBLE DEFAULT 0,
PROCESS_STATUS ENUM('RUNNING', 'FINISHED', 'DEAD') NOT NULL DEFAULT 'RUNNING',
PROCESS_SUBSTATUS ENUM('NA', 'OK', 'FAIL') NOT NULL DEFAULT 'NA',
MUTEX VARCHAR(255),
TIMEOUT INT NOT NULL DEFAULT 60
);
---
CREATE TABLE SEARCH_NEWS_FEED(
ID INT PRIMARY KEY AUTO_INCREMENT,
TITLE VARCHAR(255),
LINK VARCHAR(255),
LIST_DATE DATE
) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;

View File

@ -0,0 +1,7 @@
CREATE TABLE IF NOT EXISTS EC_DOMAIN_BLACKLIST (
ID INT PRIMARY KEY AUTO_INCREMENT,
URL_DOMAIN VARCHAR(255) UNIQUE NOT NULL
)
CHARACTER SET utf8mb4
COLLATE utf8mb4_unicode_ci;

View File

@ -0,0 +1,19 @@
CREATE TABLE IF NOT EXISTS REF_DICTIONARY (
TYPE VARCHAR(16),
WORD VARCHAR(255),
DEFINITION VARCHAR(255)
)
CHARACTER SET utf8mb4
COLLATE utf8mb4_unicode_ci;
CREATE TABLE IF NOT EXISTS REF_WIKI_ARTICLE (
NAME VARCHAR(255) PRIMARY KEY,
REF_NAME VARCHAR(255) COMMENT "If this is a redirect, it redirects to this REF_WIKI_ARTICLE.NAME",
ENTRY LONGBLOB
)
ROW_FORMAT=DYNAMIC
CHARACTER SET utf8mb4
COLLATE utf8mb4_unicode_ci;
CREATE INDEX IF NOT EXISTS REF_DICTIONARY_WORD ON REF_DICTIONARY (WORD);

View File

@ -0,0 +1,5 @@
CREATE TABLE CRAWL_QUEUE(
DOMAIN_NAME VARCHAR(255) UNIQUE,
SOURCE VARCHAR(255)
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;

View File

@ -0,0 +1,13 @@
CREATE TABLE IF NOT EXISTS DATA_DOMAIN_SCREENSHOT (
DOMAIN_NAME VARCHAR(255) PRIMARY KEY,
CONTENT_TYPE ENUM ('image/png', 'image/webp', 'image/svg+xml') NOT NULL,
DATA LONGBLOB NOT NULL
)
ROW_FORMAT=DYNAMIC
CHARACTER SET utf8mb4
COLLATE utf8mb4_unicode_ci;
CREATE TABLE DATA_DOMAIN_HISTORY (
DOMAIN_NAME VARCHAR(255) PRIMARY KEY,
SCREENSHOT_DATE DATE DEFAULT NOW()
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;

View File

@ -0,0 +1,15 @@
CREATE TABLE DOMAIN_COMPLAINT(
ID INT PRIMARY KEY AUTO_INCREMENT,
DOMAIN_ID INT NOT NULL,
CATEGORY VARCHAR(255) NOT NULL,
DESCRIPTION TEXT,
SAMPLE VARCHAR(255),
FILE_DATE TIMESTAMP NOT NULL DEFAULT NOW(),
REVIEWED BOOLEAN AS (REVIEW_DATE > 0) VIRTUAL,
DECISION VARCHAR(255),
REVIEW_DATE TIMESTAMP,
FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;

View File

@ -0,0 +1,7 @@
CREATE TABLE IF NOT EXISTS EC_API_KEY (
LICENSE_KEY VARCHAR(255) UNIQUE,
LICENSE VARCHAR(255) NOT NULL,
NAME VARCHAR(255) NOT NULL,
EMAIL VARCHAR(255) NOT NULL,
RATE INT DEFAULT 10
);

View File

@ -0,0 +1,34 @@
CREATE TABLE EC_DOMAIN_NEIGHBORS (
ID INT PRIMARY KEY AUTO_INCREMENT,
DOMAIN_ID INT NOT NULL,
NEIGHBOR_ID INT NOT NULL,
ADJ_IDX INT NOT NULL,
CONSTRAINT CONS UNIQUE (DOMAIN_ID, ADJ_IDX),
FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
)
CHARACTER SET utf8mb4
COLLATE utf8mb4_unicode_ci;
CREATE TABLE EC_DOMAIN_NEIGHBORS_2 (
DOMAIN_ID INT NOT NULL,
NEIGHBOR_ID INT NOT NULL,
RELATEDNESS DOUBLE NOT NULL,
PRIMARY KEY (DOMAIN_ID, NEIGHBOR_ID),
FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE,
FOREIGN KEY (NEIGHBOR_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
);
CREATE OR REPLACE VIEW EC_NEIGHBORS_VIEW AS
SELECT
DOM.DOMAIN_NAME AS DOMAIN_NAME,
DOM.ID AS DOMAIN_ID,
NEIGHBOR.DOMAIN_NAME AS NEIGHBOR_NAME,
NEIGHBOR.ID AS NEIGHBOR_ID,
ROUND(100 * RELATEDNESS) AS RELATEDNESS
FROM EC_DOMAIN_NEIGHBORS_2
INNER JOIN EC_DOMAIN DOM ON DOMAIN_ID=DOM.ID
INNER JOIN EC_DOMAIN NEIGHBOR ON NEIGHBOR_ID=NEIGHBOR.ID;

View File

@ -0,0 +1,5 @@
CREATE TABLE IF NOT EXISTS EC_RANDOM_DOMAINS (
DOMAIN_ID INT PRIMARY KEY,
DOMAIN_SET INT NOT NULL
);

View File

@ -0,0 +1,8 @@
CREATE TABLE SEARCH_NEWS_FEED (
ID INT PRIMARY KEY AUTO_INCREMENT,
TITLE VARCHAR(255) NOT NULL,
LINK VARCHAR(255) UNIQUE NOT NULL,
SOURCE VARCHAR(255),
LIST_DATE DATE NOT NULL
) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;

View File

@ -0,0 +1,76 @@
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
'A search engine that favors text-heavy sites and punishes modern web design',
'https://news.ycombinator.com/item?id=28550764',
'Hacker News',
'2021-09-16'
);
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
'A Search Engine Designed To Surprise You',
'https://onezero.medium.com/a-search-engine-designed-to-surprise-you-b81944ed5c06',
'Clive Thompson OneZero',
'2021-09-16'
);
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
'🎂 First anniversary! 🎊',
'https://memex.marginalia.nu/log/49-marginalia-1-year.gmi',
null,
'2022-02-26');
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
'Marginalia Search - Serendipity Engineering',
'https://www.metafilter.com/194653/Marginalia-Search-Serendipity-Engineering',
'MetaFilter',
'2022-03-09');
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
'What Google Search Isn\'t Showing You',
'https://www.newyorker.com/culture/infinite-scroll/what-google-search-isnt-showing-you',
'The New Yorker 🎩',
'2022-03-10'
);
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
'You Should Check Out the Indie Web 🎞️',
'https://www.youtube.com/watch?v=rTSEr0cRJY8',
'YouTube, You\'ve Got Kat',
'2022-03-15'
);
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
'Marginalia Goes Open Source',
'https://news.ycombinator.com/item?id=31536626',
'Hacker News',
'2022-05-28'
);
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
'Kritik an Googles Suche - Platzhirsch auf dem Nebenschauplatz',
'https://www.deutschlandfunkkultur.de/google-suche-100.html',
'Deutschlandfunk Kultur 🇩🇪',
'2022-08-18'
);
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
'Google ei enää tideä',
'https://www.hs.fi/visio/art-2000009139237.html',
'Helsing Sanomat 🇫🇮',
'2022-10-19'
);
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
'Marginalia\'s Index Reaches 100,000,000 Documents 🎊',
'https://memex.marginalia.nu/log/64-hundred-million.gmi',
null,
'2022-10-21'
);
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
'Marginalia Receives NLnet grant',
'https://memex.marginalia.nu/log/74-marginalia-2-years.gmi',
null,
'2023-02-26'
);

View File

@ -14,6 +14,7 @@ java {
}
dependencies {
implementation project(':code:common:db')
implementation project(':code:common:model')
implementation project(':code:common:service')

View File

@ -3,7 +3,7 @@ package nu.marginalia.ranking.data;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.model.dbcommon.DomainBlacklistImpl;
import nu.marginalia.db.DomainBlacklistImpl;
import nu.marginalia.model.crawl.DomainIndexingState;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -3,7 +3,7 @@ package nu.marginalia.ranking.data;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.model.dbcommon.DomainBlacklistImpl;
import nu.marginalia.db.DomainBlacklistImpl;
import org.slf4j.LoggerFactory;
import java.sql.SQLException;

View File

@ -2,7 +2,7 @@ package nu.marginalia.ranking.tool;
import com.zaxxer.hikari.HikariDataSource;
import lombok.SneakyThrows;
import nu.marginalia.model.dbcommon.DomainBlacklistImpl;
import nu.marginalia.db.DomainBlacklistImpl;
import nu.marginalia.ranking.StandardPageRank;
import nu.marginalia.ranking.accumulator.RankingResultListAccumulator;
import nu.marginalia.ranking.data.RankingDomainFetcherForSimilarityData;

View File

@ -13,7 +13,7 @@ import lombok.SneakyThrows;
import nu.marginalia.ranking.RankingAlgorithm;
import nu.marginalia.ranking.data.RankingDomainData;
import nu.marginalia.ranking.data.RankingDomainFetcher;
import nu.marginalia.model.dbcommon.DomainBlacklistImpl;
import nu.marginalia.db.DomainBlacklistImpl;
import nu.marginalia.service.module.DatabaseModule;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;

View File

@ -3,7 +3,7 @@ package nu.marginalia.ranking.tool;
import lombok.SneakyThrows;
import nu.marginalia.ranking.accumulator.RankingResultListAccumulator;
import nu.marginalia.ranking.data.RankingDomainFetcher;
import nu.marginalia.model.dbcommon.DomainBlacklistImpl;
import nu.marginalia.db.DomainBlacklistImpl;
import nu.marginalia.ranking.StandardPageRank;
import nu.marginalia.ranking.data.RankingDomainFetcherForSimilarityData;
import nu.marginalia.service.module.DatabaseModule;

View File

@ -6,7 +6,7 @@ import nu.marginalia.ranking.StandardPageRank;
import nu.marginalia.ranking.accumulator.RankingResultListAccumulator;
import nu.marginalia.ranking.data.RankingDomainFetcherForSimilarityData;
import nu.marginalia.model.dbcommon.DomainBlacklistImpl;
import nu.marginalia.db.DomainBlacklistImpl;
import nu.marginalia.service.module.DatabaseModule;
import org.mariadb.jdbc.Driver;
import org.slf4j.Logger;

View File

@ -13,6 +13,7 @@ java {
dependencies {
implementation project(':code:common:model')
implementation project(':code:common:db')
implementation project(':code:common:service')
implementation libs.lombok

View File

@ -5,7 +5,7 @@ import com.google.inject.Singleton;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.browse.model.BrowseResult;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.dbcommon.DomainBlacklist;
import nu.marginalia.db.DomainBlacklist;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -5,7 +5,7 @@ import com.google.inject.Singleton;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.browse.model.BrowseResult;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.dbcommon.DomainBlacklist;
import nu.marginalia.db.DomainBlacklist;
import nu.marginalia.model.id.EdgeId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -6,7 +6,7 @@ import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.browse.model.BrowseResult;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.EdgeUrl;
import nu.marginalia.model.dbcommon.DomainBlacklist;
import nu.marginalia.db.DomainBlacklist;
import nu.marginalia.model.id.EdgeId;
import nu.marginalia.model.id.EdgeIdCollection;
import org.slf4j.Logger;

View File

@ -5,7 +5,7 @@ import gnu.trove.map.hash.TIntIntHashMap;
import gnu.trove.map.hash.TIntObjectHashMap;
import gnu.trove.set.hash.TIntHashSet;
import lombok.SneakyThrows;
import nu.marginalia.model.dbcommon.DbDomainQueries;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.id.EdgeId;
import nu.marginalia.service.module.DatabaseModule;

View File

@ -12,6 +12,7 @@ java {
dependencies {
implementation project(':code:common:model')
implementation project(':code:common:db')
implementation project(':code:common:service')
implementation libs.lombok

View File

@ -5,14 +5,13 @@ import com.google.inject.Inject;
import com.zaxxer.hikari.HikariDataSource;
import lombok.SneakyThrows;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.dbcommon.DbDomainQueries;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.model.id.EdgeId;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Request;
import spark.Response;
import spark.Spark;
import java.sql.SQLException;

View File

@ -75,6 +75,9 @@ public class SqlLoadDomainLinks {
}
catch (SQLException ex) {
logger.warn("SQL error inserting domain links", ex);
if (getClass().desiredAssertionStatus())
throw new RuntimeException(ex);
}
}

View File

@ -33,6 +33,9 @@ public class SqlLoadDomainMetadata {
stmt.executeUpdate();
} catch (SQLException ex) {
logger.warn("SQL error inserting domains", ex);
if (getClass().desiredAssertionStatus())
throw new RuntimeException(ex);
}
}
}

View File

@ -57,6 +57,8 @@ public class SqlLoadDomains {
}
catch (SQLException ex) {
logger.warn("SQL error inserting domain", ex);
if (getClass().desiredAssertionStatus())
throw new RuntimeException(ex);
}

View File

@ -120,6 +120,9 @@ public class SqlLoadProcessedDocument {
} catch (SQLException ex) {
logger.warn("SQL error inserting document", ex);
if (getClass().desiredAssertionStatus())
throw new RuntimeException(ex);
}
}
@ -169,6 +172,9 @@ public class SqlLoadProcessedDocument {
conn.setAutoCommit(true);
} catch (SQLException ex) {
logger.warn("SQL error inserting failed document", ex);
if (getClass().desiredAssertionStatus())
throw new RuntimeException(ex);
}
}

View File

@ -62,6 +62,9 @@ public class SqlLoadProcessedDomain {
}
catch (SQLException ex) {
logger.warn("SQL error initializing domain", ex);
if (getClass().desiredAssertionStatus())
throw new RuntimeException(ex);
}
}
@ -84,6 +87,9 @@ public class SqlLoadProcessedDomain {
}
catch (SQLException ex) {
logger.warn("SQL error inserting domain alias", ex);
if (getClass().desiredAssertionStatus())
throw new RuntimeException(ex);
}
}
}

View File

@ -104,6 +104,9 @@ public class SqlLoadUrls {
}
catch (SQLException ex) {
logger.warn("SQL error inserting URLs", ex);
if (getClass().desiredAssertionStatus())
throw new RuntimeException(ex);
}
}

View File

@ -22,7 +22,7 @@ class SqlLoadDomainLinksTest {
.withDatabaseName("WMSA_prod")
.withUsername("wmsa")
.withPassword("wmsa")
.withInitScript("sql/edge-crawler-cache.sql")
.withInitScript("sql/current/00-base.sql")
.withNetworkAliases("mariadb");
HikariDataSource dataSource;

View File

@ -19,7 +19,7 @@ class SqlLoadDomainsTest {
.withDatabaseName("WMSA_prod")
.withUsername("wmsa")
.withPassword("wmsa")
.withInitScript("sql/edge-crawler-cache.sql")
.withInitScript("sql/current/00-base.sql")
.withNetworkAliases("mariadb");
@Test

View File

@ -33,7 +33,7 @@ class SqlLoadProcessedDocumentTest {
.withDatabaseName("WMSA_prod")
.withUsername("wmsa")
.withPassword("wmsa")
.withInitScript("sql/edge-crawler-cache.sql")
.withInitScript("sql/current/00-base.sql")
.withNetworkAliases("mariadb");
HikariDataSource dataSource;

View File

@ -23,7 +23,7 @@ class SqlLoadProcessedDomainTest {
.withDatabaseName("WMSA_prod")
.withUsername("wmsa")
.withPassword("wmsa")
.withInitScript("sql/edge-crawler-cache.sql")
.withInitScript("sql/current/00-base.sql")
.withNetworkAliases("mariadb");
HikariDataSource dataSource;

View File

@ -24,7 +24,7 @@ class SqlLoadUrlsTest {
.withDatabaseName("WMSA_prod")
.withUsername("wmsa")
.withPassword("wmsa")
.withInitScript("sql/edge-crawler-cache.sql")
.withInitScript("sql/current/00-base.sql")
.withNetworkAliases("mariadb");
HikariDataSource dataSource;

View File

@ -21,6 +21,7 @@ java {
}
}
dependencies {
implementation project(':code:common:db')
implementation project(':code:common:model')
implementation project(':code:common:service')
implementation project(':code:common:config')

View File

@ -6,7 +6,7 @@ import io.reactivex.rxjava3.core.Observable;
import io.reactivex.rxjava3.schedulers.Schedulers;
import nu.marginalia.assistant.client.AssistantClient;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.dbcommon.DbDomainQueries;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.client.Context;
import nu.marginalia.search.model.DecoratedSearchResults;

View File

@ -7,8 +7,8 @@ import nu.marginalia.browse.DbBrowseDomainsSimilarOldAlgo;
import nu.marginalia.browse.model.BrowseResult;
import nu.marginalia.browse.model.BrowseResultSet;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.dbcommon.DbDomainQueries;
import nu.marginalia.model.dbcommon.DomainBlacklist;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.db.DomainBlacklist;
import nu.marginalia.search.command.SearchCommandInterface;
import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.results.BrowseResultCleaner;

View File

@ -2,7 +2,7 @@ package nu.marginalia.search.command.commands;
import com.google.inject.Inject;
import nu.marginalia.client.Context;
import nu.marginalia.model.dbcommon.DomainBlacklist;
import nu.marginalia.db.DomainBlacklist;
import nu.marginalia.search.SearchOperator;
import nu.marginalia.search.command.SearchCommandInterface;
import nu.marginalia.search.command.SearchParameters;

View File

@ -1,7 +1,7 @@
package nu.marginalia.search.command.commands;
import com.google.inject.Inject;
import nu.marginalia.model.dbcommon.DbDomainQueries;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.search.command.SearchCommandInterface;
import nu.marginalia.search.command.SearchParameters;

View File

@ -4,7 +4,7 @@ import com.zaxxer.hikari.HikariDataSource;
import lombok.SneakyThrows;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.crawl.DomainIndexingState;
import nu.marginalia.model.dbcommon.DbDomainQueries;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.model.id.EdgeId;
import nu.marginalia.search.model.DomainInformation;
import org.slf4j.Logger;

View File

@ -3,7 +3,7 @@ package nu.marginalia.search.svc;
import com.google.inject.Inject;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.model.dbcommon.DbDomainQueries;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.model.id.EdgeId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

View File

@ -52,13 +52,17 @@ public class SearchFrontPageService {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
SELECT TITLE, LINK, LIST_DATE FROM SEARCH_NEWS_FEED
SELECT TITLE, LINK, SOURCE, LIST_DATE FROM SEARCH_NEWS_FEED ORDER BY LIST_DATE DESC
""")) {
var rep = stmt.executeQuery();
while (rep.next()) {
items.add(new NewsItem(rep.getString(1), rep.getString(2), rep.getDate(3).toLocalDate()));
items.add(new NewsItem(
rep.getString(1),
rep.getString(2),
rep.getString(3),
rep.getDate(4).toLocalDate()));
}
}
catch (SQLException ex) {
@ -69,5 +73,5 @@ public class SearchFrontPageService {
}
private record IndexModel(List<NewsItem> news, int searchPerMinute) { }
private record NewsItem(String title, String url, LocalDate date) {}
private record NewsItem(String title, String url, String source, LocalDate date) {}
}

View File

@ -6,7 +6,7 @@
<dl>
{{#each news}}
<dt><a href="{{link}}" rel="nofollow">{{title}}</a></dt>
<dd>{{date}}</dd>
<dd>{{source}} {{date}}</dd>
{{/each}}
</dl>
</div>

View File

@ -21,6 +21,7 @@ java {
}
}
dependencies {
implementation project(':code:common:db')
implementation project(':code:common:model')
implementation project(':code:common:service')
implementation project(':code:common:service-discovery')

View File

@ -6,7 +6,7 @@ import lombok.SneakyThrows;
import nu.marginalia.browse.DbBrowseDomainsRandom;
import nu.marginalia.browse.DbBrowseDomainsSimilarCosine;
import nu.marginalia.browse.model.BrowseResult;
import nu.marginalia.model.dbcommon.DomainBlacklist;
import nu.marginalia.db.DomainBlacklist;
import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory;
import nu.marginalia.screenshot.ScreenshotService;

View File

@ -4,7 +4,7 @@ import nu.marginalia.browse.DbBrowseDomainsRandom;
import nu.marginalia.browse.DbBrowseDomainsSimilarCosine;
import nu.marginalia.browse.model.BrowseResult;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.dbcommon.DomainBlacklist;
import nu.marginalia.db.DomainBlacklist;
import nu.marginalia.model.id.EdgeId;
import java.util.LinkedList;

View File

@ -21,6 +21,7 @@ tasks.distZip.enabled = false
dependencies {
implementation project(':code:common:process')
implementation project(':code:common:db')
implementation project(':code:common:model')
implementation project(':code:common:service')
implementation project(':code:process-models:crawling-model')

View File

@ -5,7 +5,7 @@ import com.google.common.hash.Hashing;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.crawling.model.spec.CrawlingSpecification;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.dbcommon.DomainBlacklistImpl;
import nu.marginalia.db.DomainBlacklistImpl;
import java.sql.ResultSet;
import java.sql.SQLException;

View File

@ -2,7 +2,7 @@ package nu.marginalia.crawl;
import nu.marginalia.crawling.model.spec.CrawlingSpecification;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.dbcommon.DomainBlacklistImpl;
import nu.marginalia.db.DomainBlacklistImpl;
import nu.marginalia.service.module.DatabaseModule;
import java.io.IOException;

View File

@ -80,7 +80,7 @@ services:
- "127.0.0.1:3306:3306/tcp"
volumes:
- db:/var/lib/mysql
- "./code/common/model/src/main/resources/sql/edge-crawler-cache.sql:/docker-entrypoint-initdb.d/init.sql"
- "./code/common/db/src/main/resources/sql/current/:/docker-entrypoint-initdb.d/"
networks:
- wmsa
nginx-gw:

View File

@ -46,6 +46,7 @@ include 'code:api:assistant-api'
include 'code:common:service-discovery'
include 'code:common:service-client'
include 'code:common:db'
include 'code:common:service'
include 'code:common:config'
include 'code:common:model'