Move database to a separate module
* Move database to a separate project, break apart sql file into separate entities. * Fix front page news listing.
This commit is contained in:
parent
0b505939ed
commit
ac1ac3ea57
50
code/common/db/build.gradle
Normal file
50
code/common/db/build.gradle
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
plugins {
|
||||||
|
id 'java'
|
||||||
|
id "io.freefair.lombok" version "5.3.3.3"
|
||||||
|
id 'jvm-test-suite'
|
||||||
|
}
|
||||||
|
|
||||||
|
java {
|
||||||
|
toolchain {
|
||||||
|
languageVersion.set(JavaLanguageVersion.of(17))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dependencies {
|
||||||
|
implementation project(':code:common:model')
|
||||||
|
|
||||||
|
implementation libs.lombok
|
||||||
|
annotationProcessor libs.lombok
|
||||||
|
implementation libs.bundles.slf4j
|
||||||
|
|
||||||
|
implementation libs.guice
|
||||||
|
implementation libs.bundles.gson
|
||||||
|
|
||||||
|
implementation libs.notnull
|
||||||
|
|
||||||
|
implementation libs.commons.lang3
|
||||||
|
|
||||||
|
implementation libs.trove
|
||||||
|
|
||||||
|
implementation libs.rxjava
|
||||||
|
implementation libs.bundles.mariadb
|
||||||
|
|
||||||
|
testImplementation libs.bundles.slf4j.test
|
||||||
|
testImplementation libs.bundles.junit
|
||||||
|
testImplementation libs.mockito
|
||||||
|
}
|
||||||
|
|
||||||
|
test {
|
||||||
|
maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
|
||||||
|
maxHeapSize = "8G"
|
||||||
|
useJUnitPlatform()
|
||||||
|
}
|
||||||
|
|
||||||
|
task fastTests(type: Test) {
|
||||||
|
maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
|
||||||
|
maxHeapSize = "8G"
|
||||||
|
useJUnitPlatform {
|
||||||
|
excludeTags "slow"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,4 +1,4 @@
|
|||||||
package nu.marginalia.model.dbcommon;
|
package nu.marginalia.db;
|
||||||
|
|
||||||
|
|
||||||
import com.google.common.cache.Cache;
|
import com.google.common.cache.Cache;
|
@ -1,4 +1,4 @@
|
|||||||
package nu.marginalia.model.dbcommon;
|
package nu.marginalia.db;
|
||||||
|
|
||||||
import com.google.inject.ImplementedBy;
|
import com.google.inject.ImplementedBy;
|
||||||
import gnu.trove.set.hash.TIntHashSet;
|
import gnu.trove.set.hash.TIntHashSet;
|
@ -1,4 +1,4 @@
|
|||||||
package nu.marginalia.model.dbcommon;
|
package nu.marginalia.db;
|
||||||
|
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.google.inject.Singleton;
|
import com.google.inject.Singleton;
|
@ -1,19 +1,3 @@
|
|||||||
DROP TABLE IF EXISTS DOMAIN_METADATA;
|
|
||||||
DROP TABLE IF EXISTS EC_FEED_URL;
|
|
||||||
DROP TABLE IF EXISTS EC_DOMAIN_LINK;
|
|
||||||
DROP TABLE IF EXISTS EC_PAGE_DATA;
|
|
||||||
DROP TABLE IF EXISTS EC_URL;
|
|
||||||
DROP TABLE IF EXISTS EC_DOMAIN_NEIGHBORS;
|
|
||||||
DROP TABLE IF EXISTS EC_DOMAIN;
|
|
||||||
|
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS DOMAIN_METADATA (
|
|
||||||
ID INT PRIMARY KEY,
|
|
||||||
KNOWN_URLS INT DEFAULT 0,
|
|
||||||
VISITED_URLS INT DEFAULT 0,
|
|
||||||
GOOD_URLS INT DEFAULT 0
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS EC_DOMAIN (
|
CREATE TABLE IF NOT EXISTS EC_DOMAIN (
|
||||||
ID INT PRIMARY KEY AUTO_INCREMENT,
|
ID INT PRIMARY KEY AUTO_INCREMENT,
|
||||||
@ -36,12 +20,6 @@ CREATE TABLE IF NOT EXISTS EC_DOMAIN (
|
|||||||
CHARACTER SET utf8mb4
|
CHARACTER SET utf8mb4
|
||||||
COLLATE utf8mb4_unicode_ci;
|
COLLATE utf8mb4_unicode_ci;
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS EC_DOMAIN_BLACKLIST (
|
|
||||||
ID INT PRIMARY KEY AUTO_INCREMENT,
|
|
||||||
URL_DOMAIN VARCHAR(255) UNIQUE NOT NULL
|
|
||||||
)
|
|
||||||
CHARACTER SET utf8mb4
|
|
||||||
COLLATE utf8mb4_unicode_ci;
|
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS EC_URL (
|
CREATE TABLE IF NOT EXISTS EC_URL (
|
||||||
ID INT PRIMARY KEY AUTO_INCREMENT,
|
ID INT PRIMARY KEY AUTO_INCREMENT,
|
||||||
@ -84,37 +62,6 @@ CREATE TABLE IF NOT EXISTS EC_PAGE_DATA (
|
|||||||
CHARACTER SET utf8mb4
|
CHARACTER SET utf8mb4
|
||||||
COLLATE utf8mb4_unicode_ci;
|
COLLATE utf8mb4_unicode_ci;
|
||||||
|
|
||||||
CREATE TABLE EC_FEED_URL (
|
|
||||||
URL VARCHAR(255) PRIMARY KEY,
|
|
||||||
DOMAIN_ID INT,
|
|
||||||
|
|
||||||
FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
|
|
||||||
)
|
|
||||||
CHARACTER SET utf8mb4
|
|
||||||
COLLATE utf8mb4_unicode_ci;
|
|
||||||
|
|
||||||
CREATE TABLE EC_DOMAIN_NEIGHBORS (
|
|
||||||
ID INT PRIMARY KEY AUTO_INCREMENT,
|
|
||||||
DOMAIN_ID INT NOT NULL,
|
|
||||||
NEIGHBOR_ID INT NOT NULL,
|
|
||||||
ADJ_IDX INT NOT NULL,
|
|
||||||
|
|
||||||
CONSTRAINT CONS UNIQUE (DOMAIN_ID, ADJ_IDX),
|
|
||||||
FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
|
|
||||||
)
|
|
||||||
CHARACTER SET utf8mb4
|
|
||||||
COLLATE utf8mb4_unicode_ci;
|
|
||||||
|
|
||||||
CREATE TABLE EC_DOMAIN_NEIGHBORS_2 (
|
|
||||||
DOMAIN_ID INT NOT NULL,
|
|
||||||
NEIGHBOR_ID INT NOT NULL,
|
|
||||||
RELATEDNESS DOUBLE NOT NULL,
|
|
||||||
|
|
||||||
PRIMARY KEY (DOMAIN_ID, NEIGHBOR_ID),
|
|
||||||
FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE,
|
|
||||||
FOREIGN KEY (NEIGHBOR_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
|
|
||||||
);
|
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS EC_DOMAIN_LINK (
|
CREATE TABLE IF NOT EXISTS EC_DOMAIN_LINK (
|
||||||
ID INT PRIMARY KEY AUTO_INCREMENT,
|
ID INT PRIMARY KEY AUTO_INCREMENT,
|
||||||
SOURCE_DOMAIN_ID INT NOT NULL,
|
SOURCE_DOMAIN_ID INT NOT NULL,
|
||||||
@ -126,6 +73,24 @@ CREATE TABLE IF NOT EXISTS EC_DOMAIN_LINK (
|
|||||||
FOREIGN KEY (DEST_DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
|
FOREIGN KEY (DEST_DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
|
||||||
);
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS DOMAIN_METADATA (
|
||||||
|
ID INT PRIMARY KEY,
|
||||||
|
KNOWN_URLS INT DEFAULT 0,
|
||||||
|
VISITED_URLS INT DEFAULT 0,
|
||||||
|
GOOD_URLS INT DEFAULT 0,
|
||||||
|
|
||||||
|
FOREIGN KEY (ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE EC_FEED_URL (
|
||||||
|
URL VARCHAR(255) PRIMARY KEY,
|
||||||
|
DOMAIN_ID INT,
|
||||||
|
|
||||||
|
FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
|
||||||
|
)
|
||||||
|
CHARACTER SET utf8mb4
|
||||||
|
COLLATE utf8mb4_unicode_ci;
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW EC_URL_VIEW AS
|
CREATE OR REPLACE VIEW EC_URL_VIEW AS
|
||||||
SELECT
|
SELECT
|
||||||
CONCAT(EC_URL.PROTO,
|
CONCAT(EC_URL.PROTO,
|
||||||
@ -141,9 +106,7 @@ CREATE OR REPLACE VIEW EC_URL_VIEW AS
|
|||||||
EC_DOMAIN.DOMAIN_TOP AS DOMAIN_TOP,
|
EC_DOMAIN.DOMAIN_TOP AS DOMAIN_TOP,
|
||||||
EC_URL.ID AS ID,
|
EC_URL.ID AS ID,
|
||||||
EC_DOMAIN.ID AS DOMAIN_ID,
|
EC_DOMAIN.ID AS DOMAIN_ID,
|
||||||
|
|
||||||
EC_URL.VISITED AS VISITED,
|
EC_URL.VISITED AS VISITED,
|
||||||
|
|
||||||
EC_PAGE_DATA.QUALITY AS QUALITY,
|
EC_PAGE_DATA.QUALITY AS QUALITY,
|
||||||
EC_PAGE_DATA.DATA_HASH AS DATA_HASH,
|
EC_PAGE_DATA.DATA_HASH AS DATA_HASH,
|
||||||
EC_PAGE_DATA.TITLE AS TITLE,
|
EC_PAGE_DATA.TITLE AS TITLE,
|
||||||
@ -151,7 +114,6 @@ CREATE OR REPLACE VIEW EC_URL_VIEW AS
|
|||||||
EC_PAGE_DATA.WORDS_TOTAL AS WORDS_TOTAL,
|
EC_PAGE_DATA.WORDS_TOTAL AS WORDS_TOTAL,
|
||||||
EC_PAGE_DATA.FORMAT AS FORMAT,
|
EC_PAGE_DATA.FORMAT AS FORMAT,
|
||||||
EC_PAGE_DATA.FEATURES AS FEATURES,
|
EC_PAGE_DATA.FEATURES AS FEATURES,
|
||||||
|
|
||||||
EC_DOMAIN.IP AS IP,
|
EC_DOMAIN.IP AS IP,
|
||||||
EC_URL.STATE AS STATE,
|
EC_URL.STATE AS STATE,
|
||||||
EC_DOMAIN.RANK AS RANK,
|
EC_DOMAIN.RANK AS RANK,
|
||||||
@ -162,17 +124,6 @@ CREATE OR REPLACE VIEW EC_URL_VIEW AS
|
|||||||
INNER JOIN EC_DOMAIN
|
INNER JOIN EC_DOMAIN
|
||||||
ON EC_URL.DOMAIN_ID = EC_DOMAIN.ID;
|
ON EC_URL.DOMAIN_ID = EC_DOMAIN.ID;
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW EC_NEIGHBORS_VIEW AS
|
|
||||||
SELECT
|
|
||||||
DOM.DOMAIN_NAME AS DOMAIN_NAME,
|
|
||||||
DOM.ID AS DOMAIN_ID,
|
|
||||||
NEIGHBOR.DOMAIN_NAME AS NEIGHBOR_NAME,
|
|
||||||
NEIGHBOR.ID AS NEIGHBOR_ID,
|
|
||||||
ROUND(100 * RELATEDNESS) AS RELATEDNESS
|
|
||||||
FROM EC_DOMAIN_NEIGHBORS_2
|
|
||||||
INNER JOIN EC_DOMAIN DOM ON DOMAIN_ID=DOM.ID
|
|
||||||
INNER JOIN EC_DOMAIN NEIGHBOR ON NEIGHBOR_ID=NEIGHBOR.ID;
|
|
||||||
|
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW EC_RELATED_LINKS_VIEW AS
|
CREATE OR REPLACE VIEW EC_RELATED_LINKS_VIEW AS
|
||||||
SELECT
|
SELECT
|
||||||
@ -189,117 +140,5 @@ CREATE OR REPLACE VIEW EC_RELATED_LINKS_VIEW AS
|
|||||||
ON DEST_DOMAIN.ID=DEST_DOMAIN_ID
|
ON DEST_DOMAIN.ID=DEST_DOMAIN_ID
|
||||||
;
|
;
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW EC_RELATED_LINKS_IN AS
|
|
||||||
SELECT
|
|
||||||
IN_URL.ID AS SRC_URL_ID,
|
|
||||||
OUT_URL.ID AS DEST_URL_ID
|
|
||||||
FROM EC_DOMAIN_LINK
|
|
||||||
INNER JOIN EC_URL AS IN_URL ON IN_URL.DOMAIN_ID=EC_DOMAIN_LINK.SOURCE_DOMAIN_ID
|
|
||||||
INNER JOIN EC_URL AS OUT_URL ON OUT_URL.DOMAIN_ID=EC_DOMAIN_LINK.DEST_DOMAIN_ID
|
|
||||||
WHERE IN_URL.VISITED AND IN_URL.STATE = 'ok'
|
|
||||||
AND OUT_URL.VISITED AND OUT_URL.STATE = 'ok';
|
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS EC_API_KEY (
|
|
||||||
LICENSE_KEY VARCHAR(255) UNIQUE,
|
|
||||||
LICENSE VARCHAR(255) NOT NULL,
|
|
||||||
NAME VARCHAR(255) NOT NULL,
|
|
||||||
EMAIL VARCHAR(255) NOT NULL,
|
|
||||||
RATE INT DEFAULT 10
|
|
||||||
);
|
|
||||||
|
|
||||||
CREATE INDEX IF NOT EXISTS EC_DOMAIN_INDEXED_INDEX ON EC_DOMAIN (INDEXED);
|
CREATE INDEX IF NOT EXISTS EC_DOMAIN_INDEXED_INDEX ON EC_DOMAIN (INDEXED);
|
||||||
CREATE INDEX IF NOT EXISTS EC_DOMAIN_TOP_DOMAIN ON EC_DOMAIN (DOMAIN_TOP);
|
CREATE INDEX IF NOT EXISTS EC_DOMAIN_TOP_DOMAIN ON EC_DOMAIN (DOMAIN_TOP);
|
||||||
|
|
||||||
---;
|
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS EC_RANDOM_DOMAINS (
|
|
||||||
DOMAIN_ID INT PRIMARY KEY,
|
|
||||||
DOMAIN_SET INT NOT NULL
|
|
||||||
);
|
|
||||||
|
|
||||||
---;
|
|
||||||
|
|
||||||
DROP TABLE IF EXISTS REF_DICTIONARY;
|
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS REF_DICTIONARY (
|
|
||||||
TYPE VARCHAR(16),
|
|
||||||
WORD VARCHAR(255),
|
|
||||||
DEFINITION VARCHAR(255)
|
|
||||||
)
|
|
||||||
CHARACTER SET utf8mb4
|
|
||||||
COLLATE utf8mb4_unicode_ci;
|
|
||||||
|
|
||||||
---;
|
|
||||||
|
|
||||||
CREATE INDEX IF NOT EXISTS REF_DICTIONARY_WORD ON REF_DICTIONARY (WORD);
|
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS REF_WIKI_ARTICLE (
|
|
||||||
NAME VARCHAR(255) PRIMARY KEY,
|
|
||||||
REF_NAME VARCHAR(255) COMMENT "If this is a redirect, it redirects to this REF_WIKI_ARTICLE.NAME",
|
|
||||||
ENTRY LONGBLOB
|
|
||||||
)
|
|
||||||
ROW_FORMAT=DYNAMIC
|
|
||||||
CHARACTER SET utf8mb4
|
|
||||||
COLLATE utf8mb4_unicode_ci;
|
|
||||||
|
|
||||||
---;
|
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS DATA_DOMAIN_SCREENSHOT (
|
|
||||||
DOMAIN_NAME VARCHAR(255) PRIMARY KEY,
|
|
||||||
CONTENT_TYPE ENUM ('image/png', 'image/webp', 'image/svg+xml') NOT NULL,
|
|
||||||
DATA LONGBLOB NOT NULL
|
|
||||||
)
|
|
||||||
ROW_FORMAT=DYNAMIC
|
|
||||||
CHARACTER SET utf8mb4
|
|
||||||
COLLATE utf8mb4_unicode_ci;
|
|
||||||
|
|
||||||
CREATE TABLE DATA_DOMAIN_HISTORY (
|
|
||||||
DOMAIN_NAME VARCHAR(255) PRIMARY KEY,
|
|
||||||
SCREENSHOT_DATE DATE DEFAULT NOW()
|
|
||||||
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
|
|
||||||
|
|
||||||
CREATE TABLE CRAWL_QUEUE(
|
|
||||||
DOMAIN_NAME VARCHAR(255) UNIQUE,
|
|
||||||
SOURCE VARCHAR(255)
|
|
||||||
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
|
|
||||||
|
|
||||||
CREATE TABLE DOMAIN_COMPLAINT(
|
|
||||||
ID INT PRIMARY KEY AUTO_INCREMENT,
|
|
||||||
DOMAIN_ID INT NOT NULL,
|
|
||||||
|
|
||||||
CATEGORY VARCHAR(255) NOT NULL,
|
|
||||||
DESCRIPTION TEXT,
|
|
||||||
SAMPLE VARCHAR(255),
|
|
||||||
FILE_DATE TIMESTAMP NOT NULL DEFAULT NOW(),
|
|
||||||
|
|
||||||
REVIEWED BOOLEAN AS (REVIEW_DATE > 0) VIRTUAL,
|
|
||||||
DECISION VARCHAR(255),
|
|
||||||
REVIEW_DATE TIMESTAMP,
|
|
||||||
|
|
||||||
FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
|
|
||||||
);
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
CREATE TABLE WMSA_PROCESS(
|
|
||||||
ID BIGINT PRIMARY KEY,
|
|
||||||
NAME VARCHAR(255) NOT NULL,
|
|
||||||
TYPE ENUM('SERVICE', 'TASK') NOT NULL,
|
|
||||||
START DATETIME NOT NULL DEFAULT NOW(),
|
|
||||||
UPDATED DATETIME,
|
|
||||||
FINISHED DATETIME,
|
|
||||||
PROGRESS DOUBLE DEFAULT 0,
|
|
||||||
PROCESS_STATUS ENUM('RUNNING', 'FINISHED', 'DEAD') NOT NULL DEFAULT 'RUNNING',
|
|
||||||
PROCESS_SUBSTATUS ENUM('NA', 'OK', 'FAIL') NOT NULL DEFAULT 'NA',
|
|
||||||
MUTEX VARCHAR(255),
|
|
||||||
TIMEOUT INT NOT NULL DEFAULT 60
|
|
||||||
);
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
CREATE TABLE SEARCH_NEWS_FEED(
|
|
||||||
ID INT PRIMARY KEY AUTO_INCREMENT,
|
|
||||||
TITLE VARCHAR(255),
|
|
||||||
LINK VARCHAR(255),
|
|
||||||
LIST_DATE DATE
|
|
||||||
) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
|
@ -0,0 +1,7 @@
|
|||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS EC_DOMAIN_BLACKLIST (
|
||||||
|
ID INT PRIMARY KEY AUTO_INCREMENT,
|
||||||
|
URL_DOMAIN VARCHAR(255) UNIQUE NOT NULL
|
||||||
|
)
|
||||||
|
CHARACTER SET utf8mb4
|
||||||
|
COLLATE utf8mb4_unicode_ci;
|
@ -0,0 +1,19 @@
|
|||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS REF_DICTIONARY (
|
||||||
|
TYPE VARCHAR(16),
|
||||||
|
WORD VARCHAR(255),
|
||||||
|
DEFINITION VARCHAR(255)
|
||||||
|
)
|
||||||
|
CHARACTER SET utf8mb4
|
||||||
|
COLLATE utf8mb4_unicode_ci;
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS REF_WIKI_ARTICLE (
|
||||||
|
NAME VARCHAR(255) PRIMARY KEY,
|
||||||
|
REF_NAME VARCHAR(255) COMMENT "If this is a redirect, it redirects to this REF_WIKI_ARTICLE.NAME",
|
||||||
|
ENTRY LONGBLOB
|
||||||
|
)
|
||||||
|
ROW_FORMAT=DYNAMIC
|
||||||
|
CHARACTER SET utf8mb4
|
||||||
|
COLLATE utf8mb4_unicode_ci;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS REF_DICTIONARY_WORD ON REF_DICTIONARY (WORD);
|
@ -0,0 +1,5 @@
|
|||||||
|
|
||||||
|
CREATE TABLE CRAWL_QUEUE(
|
||||||
|
DOMAIN_NAME VARCHAR(255) UNIQUE,
|
||||||
|
SOURCE VARCHAR(255)
|
||||||
|
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
|
@ -0,0 +1,13 @@
|
|||||||
|
CREATE TABLE IF NOT EXISTS DATA_DOMAIN_SCREENSHOT (
|
||||||
|
DOMAIN_NAME VARCHAR(255) PRIMARY KEY,
|
||||||
|
CONTENT_TYPE ENUM ('image/png', 'image/webp', 'image/svg+xml') NOT NULL,
|
||||||
|
DATA LONGBLOB NOT NULL
|
||||||
|
)
|
||||||
|
ROW_FORMAT=DYNAMIC
|
||||||
|
CHARACTER SET utf8mb4
|
||||||
|
COLLATE utf8mb4_unicode_ci;
|
||||||
|
|
||||||
|
CREATE TABLE DATA_DOMAIN_HISTORY (
|
||||||
|
DOMAIN_NAME VARCHAR(255) PRIMARY KEY,
|
||||||
|
SCREENSHOT_DATE DATE DEFAULT NOW()
|
||||||
|
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
|
@ -0,0 +1,15 @@
|
|||||||
|
CREATE TABLE DOMAIN_COMPLAINT(
|
||||||
|
ID INT PRIMARY KEY AUTO_INCREMENT,
|
||||||
|
DOMAIN_ID INT NOT NULL,
|
||||||
|
|
||||||
|
CATEGORY VARCHAR(255) NOT NULL,
|
||||||
|
DESCRIPTION TEXT,
|
||||||
|
SAMPLE VARCHAR(255),
|
||||||
|
FILE_DATE TIMESTAMP NOT NULL DEFAULT NOW(),
|
||||||
|
|
||||||
|
REVIEWED BOOLEAN AS (REVIEW_DATE > 0) VIRTUAL,
|
||||||
|
DECISION VARCHAR(255),
|
||||||
|
REVIEW_DATE TIMESTAMP,
|
||||||
|
|
||||||
|
FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
|
||||||
|
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
|
@ -0,0 +1,7 @@
|
|||||||
|
CREATE TABLE IF NOT EXISTS EC_API_KEY (
|
||||||
|
LICENSE_KEY VARCHAR(255) UNIQUE,
|
||||||
|
LICENSE VARCHAR(255) NOT NULL,
|
||||||
|
NAME VARCHAR(255) NOT NULL,
|
||||||
|
EMAIL VARCHAR(255) NOT NULL,
|
||||||
|
RATE INT DEFAULT 10
|
||||||
|
);
|
@ -0,0 +1,34 @@
|
|||||||
|
|
||||||
|
CREATE TABLE EC_DOMAIN_NEIGHBORS (
|
||||||
|
ID INT PRIMARY KEY AUTO_INCREMENT,
|
||||||
|
DOMAIN_ID INT NOT NULL,
|
||||||
|
NEIGHBOR_ID INT NOT NULL,
|
||||||
|
ADJ_IDX INT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT CONS UNIQUE (DOMAIN_ID, ADJ_IDX),
|
||||||
|
FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
|
||||||
|
)
|
||||||
|
CHARACTER SET utf8mb4
|
||||||
|
COLLATE utf8mb4_unicode_ci;
|
||||||
|
|
||||||
|
CREATE TABLE EC_DOMAIN_NEIGHBORS_2 (
|
||||||
|
DOMAIN_ID INT NOT NULL,
|
||||||
|
NEIGHBOR_ID INT NOT NULL,
|
||||||
|
RELATEDNESS DOUBLE NOT NULL,
|
||||||
|
|
||||||
|
PRIMARY KEY (DOMAIN_ID, NEIGHBOR_ID),
|
||||||
|
FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE,
|
||||||
|
FOREIGN KEY (NEIGHBOR_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
CREATE OR REPLACE VIEW EC_NEIGHBORS_VIEW AS
|
||||||
|
SELECT
|
||||||
|
DOM.DOMAIN_NAME AS DOMAIN_NAME,
|
||||||
|
DOM.ID AS DOMAIN_ID,
|
||||||
|
NEIGHBOR.DOMAIN_NAME AS NEIGHBOR_NAME,
|
||||||
|
NEIGHBOR.ID AS NEIGHBOR_ID,
|
||||||
|
ROUND(100 * RELATEDNESS) AS RELATEDNESS
|
||||||
|
FROM EC_DOMAIN_NEIGHBORS_2
|
||||||
|
INNER JOIN EC_DOMAIN DOM ON DOMAIN_ID=DOM.ID
|
||||||
|
INNER JOIN EC_DOMAIN NEIGHBOR ON NEIGHBOR_ID=NEIGHBOR.ID;
|
@ -0,0 +1,5 @@
|
|||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS EC_RANDOM_DOMAINS (
|
||||||
|
DOMAIN_ID INT PRIMARY KEY,
|
||||||
|
DOMAIN_SET INT NOT NULL
|
||||||
|
);
|
@ -0,0 +1,8 @@
|
|||||||
|
|
||||||
|
CREATE TABLE SEARCH_NEWS_FEED (
|
||||||
|
ID INT PRIMARY KEY AUTO_INCREMENT,
|
||||||
|
TITLE VARCHAR(255) NOT NULL,
|
||||||
|
LINK VARCHAR(255) UNIQUE NOT NULL,
|
||||||
|
SOURCE VARCHAR(255),
|
||||||
|
LIST_DATE DATE NOT NULL
|
||||||
|
) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
@ -0,0 +1,76 @@
|
|||||||
|
|
||||||
|
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
|
||||||
|
'A search engine that favors text-heavy sites and punishes modern web design',
|
||||||
|
'https://news.ycombinator.com/item?id=28550764',
|
||||||
|
'Hacker News',
|
||||||
|
'2021-09-16'
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
|
||||||
|
'A Search Engine Designed To Surprise You',
|
||||||
|
'https://onezero.medium.com/a-search-engine-designed-to-surprise-you-b81944ed5c06',
|
||||||
|
'Clive Thompson OneZero',
|
||||||
|
'2021-09-16'
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
|
||||||
|
'🎂 First anniversary! 🎊',
|
||||||
|
'https://memex.marginalia.nu/log/49-marginalia-1-year.gmi',
|
||||||
|
null,
|
||||||
|
'2022-02-26');
|
||||||
|
|
||||||
|
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
|
||||||
|
'Marginalia Search - Serendipity Engineering',
|
||||||
|
'https://www.metafilter.com/194653/Marginalia-Search-Serendipity-Engineering',
|
||||||
|
'MetaFilter',
|
||||||
|
'2022-03-09');
|
||||||
|
|
||||||
|
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
|
||||||
|
'What Google Search Isn\'t Showing You',
|
||||||
|
'https://www.newyorker.com/culture/infinite-scroll/what-google-search-isnt-showing-you',
|
||||||
|
'The New Yorker 🎩',
|
||||||
|
'2022-03-10'
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
|
||||||
|
'You Should Check Out the Indie Web 🎞️',
|
||||||
|
'https://www.youtube.com/watch?v=rTSEr0cRJY8',
|
||||||
|
'YouTube, You\'ve Got Kat',
|
||||||
|
'2022-03-15'
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
|
||||||
|
'Marginalia Goes Open Source',
|
||||||
|
'https://news.ycombinator.com/item?id=31536626',
|
||||||
|
'Hacker News',
|
||||||
|
'2022-05-28'
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
|
||||||
|
'Kritik an Googles Suche - Platzhirsch auf dem Nebenschauplatz',
|
||||||
|
'https://www.deutschlandfunkkultur.de/google-suche-100.html',
|
||||||
|
'Deutschlandfunk Kultur 🇩🇪',
|
||||||
|
'2022-08-18'
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
|
||||||
|
'Google ei enää tideä',
|
||||||
|
'https://www.hs.fi/visio/art-2000009139237.html',
|
||||||
|
'Helsing Sanomat 🇫🇮',
|
||||||
|
'2022-10-19'
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
|
||||||
|
'Marginalia\'s Index Reaches 100,000,000 Documents 🎊',
|
||||||
|
'https://memex.marginalia.nu/log/64-hundred-million.gmi',
|
||||||
|
null,
|
||||||
|
'2022-10-21'
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES (
|
||||||
|
'Marginalia Receives NLnet grant',
|
||||||
|
'https://memex.marginalia.nu/log/74-marginalia-2-years.gmi',
|
||||||
|
null,
|
||||||
|
'2023-02-26'
|
||||||
|
);
|
||||||
|
|
@ -14,6 +14,7 @@ java {
|
|||||||
}
|
}
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
|
implementation project(':code:common:db')
|
||||||
implementation project(':code:common:model')
|
implementation project(':code:common:model')
|
||||||
implementation project(':code:common:service')
|
implementation project(':code:common:service')
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ package nu.marginalia.ranking.data;
|
|||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.google.inject.Singleton;
|
import com.google.inject.Singleton;
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import nu.marginalia.model.dbcommon.DomainBlacklistImpl;
|
import nu.marginalia.db.DomainBlacklistImpl;
|
||||||
import nu.marginalia.model.crawl.DomainIndexingState;
|
import nu.marginalia.model.crawl.DomainIndexingState;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -3,7 +3,7 @@ package nu.marginalia.ranking.data;
|
|||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.google.inject.Singleton;
|
import com.google.inject.Singleton;
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import nu.marginalia.model.dbcommon.DomainBlacklistImpl;
|
import nu.marginalia.db.DomainBlacklistImpl;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
|
@ -2,7 +2,7 @@ package nu.marginalia.ranking.tool;
|
|||||||
|
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
import nu.marginalia.model.dbcommon.DomainBlacklistImpl;
|
import nu.marginalia.db.DomainBlacklistImpl;
|
||||||
import nu.marginalia.ranking.StandardPageRank;
|
import nu.marginalia.ranking.StandardPageRank;
|
||||||
import nu.marginalia.ranking.accumulator.RankingResultListAccumulator;
|
import nu.marginalia.ranking.accumulator.RankingResultListAccumulator;
|
||||||
import nu.marginalia.ranking.data.RankingDomainFetcherForSimilarityData;
|
import nu.marginalia.ranking.data.RankingDomainFetcherForSimilarityData;
|
||||||
|
@ -13,7 +13,7 @@ import lombok.SneakyThrows;
|
|||||||
import nu.marginalia.ranking.RankingAlgorithm;
|
import nu.marginalia.ranking.RankingAlgorithm;
|
||||||
import nu.marginalia.ranking.data.RankingDomainData;
|
import nu.marginalia.ranking.data.RankingDomainData;
|
||||||
import nu.marginalia.ranking.data.RankingDomainFetcher;
|
import nu.marginalia.ranking.data.RankingDomainFetcher;
|
||||||
import nu.marginalia.model.dbcommon.DomainBlacklistImpl;
|
import nu.marginalia.db.DomainBlacklistImpl;
|
||||||
import nu.marginalia.service.module.DatabaseModule;
|
import nu.marginalia.service.module.DatabaseModule;
|
||||||
import org.jetbrains.annotations.NotNull;
|
import org.jetbrains.annotations.NotNull;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -3,7 +3,7 @@ package nu.marginalia.ranking.tool;
|
|||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
import nu.marginalia.ranking.accumulator.RankingResultListAccumulator;
|
import nu.marginalia.ranking.accumulator.RankingResultListAccumulator;
|
||||||
import nu.marginalia.ranking.data.RankingDomainFetcher;
|
import nu.marginalia.ranking.data.RankingDomainFetcher;
|
||||||
import nu.marginalia.model.dbcommon.DomainBlacklistImpl;
|
import nu.marginalia.db.DomainBlacklistImpl;
|
||||||
import nu.marginalia.ranking.StandardPageRank;
|
import nu.marginalia.ranking.StandardPageRank;
|
||||||
import nu.marginalia.ranking.data.RankingDomainFetcherForSimilarityData;
|
import nu.marginalia.ranking.data.RankingDomainFetcherForSimilarityData;
|
||||||
import nu.marginalia.service.module.DatabaseModule;
|
import nu.marginalia.service.module.DatabaseModule;
|
||||||
|
@ -6,7 +6,7 @@ import nu.marginalia.ranking.StandardPageRank;
|
|||||||
import nu.marginalia.ranking.accumulator.RankingResultListAccumulator;
|
import nu.marginalia.ranking.accumulator.RankingResultListAccumulator;
|
||||||
import nu.marginalia.ranking.data.RankingDomainFetcherForSimilarityData;
|
import nu.marginalia.ranking.data.RankingDomainFetcherForSimilarityData;
|
||||||
|
|
||||||
import nu.marginalia.model.dbcommon.DomainBlacklistImpl;
|
import nu.marginalia.db.DomainBlacklistImpl;
|
||||||
import nu.marginalia.service.module.DatabaseModule;
|
import nu.marginalia.service.module.DatabaseModule;
|
||||||
import org.mariadb.jdbc.Driver;
|
import org.mariadb.jdbc.Driver;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -13,6 +13,7 @@ java {
|
|||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
implementation project(':code:common:model')
|
implementation project(':code:common:model')
|
||||||
|
implementation project(':code:common:db')
|
||||||
implementation project(':code:common:service')
|
implementation project(':code:common:service')
|
||||||
|
|
||||||
implementation libs.lombok
|
implementation libs.lombok
|
||||||
|
@ -5,7 +5,7 @@ import com.google.inject.Singleton;
|
|||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import nu.marginalia.browse.model.BrowseResult;
|
import nu.marginalia.browse.model.BrowseResult;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.dbcommon.DomainBlacklist;
|
import nu.marginalia.db.DomainBlacklist;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@ import com.google.inject.Singleton;
|
|||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import nu.marginalia.browse.model.BrowseResult;
|
import nu.marginalia.browse.model.BrowseResult;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.dbcommon.DomainBlacklist;
|
import nu.marginalia.db.DomainBlacklist;
|
||||||
import nu.marginalia.model.id.EdgeId;
|
import nu.marginalia.model.id.EdgeId;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -6,7 +6,7 @@ import com.zaxxer.hikari.HikariDataSource;
|
|||||||
import nu.marginalia.browse.model.BrowseResult;
|
import nu.marginalia.browse.model.BrowseResult;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.EdgeUrl;
|
import nu.marginalia.model.EdgeUrl;
|
||||||
import nu.marginalia.model.dbcommon.DomainBlacklist;
|
import nu.marginalia.db.DomainBlacklist;
|
||||||
import nu.marginalia.model.id.EdgeId;
|
import nu.marginalia.model.id.EdgeId;
|
||||||
import nu.marginalia.model.id.EdgeIdCollection;
|
import nu.marginalia.model.id.EdgeIdCollection;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -5,7 +5,7 @@ import gnu.trove.map.hash.TIntIntHashMap;
|
|||||||
import gnu.trove.map.hash.TIntObjectHashMap;
|
import gnu.trove.map.hash.TIntObjectHashMap;
|
||||||
import gnu.trove.set.hash.TIntHashSet;
|
import gnu.trove.set.hash.TIntHashSet;
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
import nu.marginalia.model.dbcommon.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.id.EdgeId;
|
import nu.marginalia.model.id.EdgeId;
|
||||||
import nu.marginalia.service.module.DatabaseModule;
|
import nu.marginalia.service.module.DatabaseModule;
|
||||||
|
@ -12,6 +12,7 @@ java {
|
|||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
implementation project(':code:common:model')
|
implementation project(':code:common:model')
|
||||||
|
implementation project(':code:common:db')
|
||||||
implementation project(':code:common:service')
|
implementation project(':code:common:service')
|
||||||
|
|
||||||
implementation libs.lombok
|
implementation libs.lombok
|
||||||
|
@ -5,14 +5,13 @@ import com.google.inject.Inject;
|
|||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.dbcommon.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
import nu.marginalia.model.id.EdgeId;
|
import nu.marginalia.model.id.EdgeId;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import spark.Request;
|
import spark.Request;
|
||||||
import spark.Response;
|
import spark.Response;
|
||||||
import spark.Spark;
|
|
||||||
|
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
|
|
||||||
|
@ -75,6 +75,9 @@ public class SqlLoadDomainLinks {
|
|||||||
}
|
}
|
||||||
catch (SQLException ex) {
|
catch (SQLException ex) {
|
||||||
logger.warn("SQL error inserting domain links", ex);
|
logger.warn("SQL error inserting domain links", ex);
|
||||||
|
|
||||||
|
if (getClass().desiredAssertionStatus())
|
||||||
|
throw new RuntimeException(ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -33,6 +33,9 @@ public class SqlLoadDomainMetadata {
|
|||||||
stmt.executeUpdate();
|
stmt.executeUpdate();
|
||||||
} catch (SQLException ex) {
|
} catch (SQLException ex) {
|
||||||
logger.warn("SQL error inserting domains", ex);
|
logger.warn("SQL error inserting domains", ex);
|
||||||
|
|
||||||
|
if (getClass().desiredAssertionStatus())
|
||||||
|
throw new RuntimeException(ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -57,6 +57,8 @@ public class SqlLoadDomains {
|
|||||||
}
|
}
|
||||||
catch (SQLException ex) {
|
catch (SQLException ex) {
|
||||||
logger.warn("SQL error inserting domain", ex);
|
logger.warn("SQL error inserting domain", ex);
|
||||||
|
if (getClass().desiredAssertionStatus())
|
||||||
|
throw new RuntimeException(ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -120,6 +120,9 @@ public class SqlLoadProcessedDocument {
|
|||||||
|
|
||||||
} catch (SQLException ex) {
|
} catch (SQLException ex) {
|
||||||
logger.warn("SQL error inserting document", ex);
|
logger.warn("SQL error inserting document", ex);
|
||||||
|
|
||||||
|
if (getClass().desiredAssertionStatus())
|
||||||
|
throw new RuntimeException(ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -169,6 +172,9 @@ public class SqlLoadProcessedDocument {
|
|||||||
conn.setAutoCommit(true);
|
conn.setAutoCommit(true);
|
||||||
} catch (SQLException ex) {
|
} catch (SQLException ex) {
|
||||||
logger.warn("SQL error inserting failed document", ex);
|
logger.warn("SQL error inserting failed document", ex);
|
||||||
|
|
||||||
|
if (getClass().desiredAssertionStatus())
|
||||||
|
throw new RuntimeException(ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -62,6 +62,9 @@ public class SqlLoadProcessedDomain {
|
|||||||
}
|
}
|
||||||
catch (SQLException ex) {
|
catch (SQLException ex) {
|
||||||
logger.warn("SQL error initializing domain", ex);
|
logger.warn("SQL error initializing domain", ex);
|
||||||
|
|
||||||
|
if (getClass().desiredAssertionStatus())
|
||||||
|
throw new RuntimeException(ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -84,6 +87,9 @@ public class SqlLoadProcessedDomain {
|
|||||||
}
|
}
|
||||||
catch (SQLException ex) {
|
catch (SQLException ex) {
|
||||||
logger.warn("SQL error inserting domain alias", ex);
|
logger.warn("SQL error inserting domain alias", ex);
|
||||||
|
|
||||||
|
if (getClass().desiredAssertionStatus())
|
||||||
|
throw new RuntimeException(ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -104,6 +104,9 @@ public class SqlLoadUrls {
|
|||||||
}
|
}
|
||||||
catch (SQLException ex) {
|
catch (SQLException ex) {
|
||||||
logger.warn("SQL error inserting URLs", ex);
|
logger.warn("SQL error inserting URLs", ex);
|
||||||
|
|
||||||
|
if (getClass().desiredAssertionStatus())
|
||||||
|
throw new RuntimeException(ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@ class SqlLoadDomainLinksTest {
|
|||||||
.withDatabaseName("WMSA_prod")
|
.withDatabaseName("WMSA_prod")
|
||||||
.withUsername("wmsa")
|
.withUsername("wmsa")
|
||||||
.withPassword("wmsa")
|
.withPassword("wmsa")
|
||||||
.withInitScript("sql/edge-crawler-cache.sql")
|
.withInitScript("sql/current/00-base.sql")
|
||||||
.withNetworkAliases("mariadb");
|
.withNetworkAliases("mariadb");
|
||||||
|
|
||||||
HikariDataSource dataSource;
|
HikariDataSource dataSource;
|
||||||
|
@ -19,7 +19,7 @@ class SqlLoadDomainsTest {
|
|||||||
.withDatabaseName("WMSA_prod")
|
.withDatabaseName("WMSA_prod")
|
||||||
.withUsername("wmsa")
|
.withUsername("wmsa")
|
||||||
.withPassword("wmsa")
|
.withPassword("wmsa")
|
||||||
.withInitScript("sql/edge-crawler-cache.sql")
|
.withInitScript("sql/current/00-base.sql")
|
||||||
.withNetworkAliases("mariadb");
|
.withNetworkAliases("mariadb");
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -33,7 +33,7 @@ class SqlLoadProcessedDocumentTest {
|
|||||||
.withDatabaseName("WMSA_prod")
|
.withDatabaseName("WMSA_prod")
|
||||||
.withUsername("wmsa")
|
.withUsername("wmsa")
|
||||||
.withPassword("wmsa")
|
.withPassword("wmsa")
|
||||||
.withInitScript("sql/edge-crawler-cache.sql")
|
.withInitScript("sql/current/00-base.sql")
|
||||||
.withNetworkAliases("mariadb");
|
.withNetworkAliases("mariadb");
|
||||||
|
|
||||||
HikariDataSource dataSource;
|
HikariDataSource dataSource;
|
||||||
|
@ -23,7 +23,7 @@ class SqlLoadProcessedDomainTest {
|
|||||||
.withDatabaseName("WMSA_prod")
|
.withDatabaseName("WMSA_prod")
|
||||||
.withUsername("wmsa")
|
.withUsername("wmsa")
|
||||||
.withPassword("wmsa")
|
.withPassword("wmsa")
|
||||||
.withInitScript("sql/edge-crawler-cache.sql")
|
.withInitScript("sql/current/00-base.sql")
|
||||||
.withNetworkAliases("mariadb");
|
.withNetworkAliases("mariadb");
|
||||||
|
|
||||||
HikariDataSource dataSource;
|
HikariDataSource dataSource;
|
||||||
|
@ -24,7 +24,7 @@ class SqlLoadUrlsTest {
|
|||||||
.withDatabaseName("WMSA_prod")
|
.withDatabaseName("WMSA_prod")
|
||||||
.withUsername("wmsa")
|
.withUsername("wmsa")
|
||||||
.withPassword("wmsa")
|
.withPassword("wmsa")
|
||||||
.withInitScript("sql/edge-crawler-cache.sql")
|
.withInitScript("sql/current/00-base.sql")
|
||||||
.withNetworkAliases("mariadb");
|
.withNetworkAliases("mariadb");
|
||||||
|
|
||||||
HikariDataSource dataSource;
|
HikariDataSource dataSource;
|
||||||
|
@ -21,6 +21,7 @@ java {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
dependencies {
|
dependencies {
|
||||||
|
implementation project(':code:common:db')
|
||||||
implementation project(':code:common:model')
|
implementation project(':code:common:model')
|
||||||
implementation project(':code:common:service')
|
implementation project(':code:common:service')
|
||||||
implementation project(':code:common:config')
|
implementation project(':code:common:config')
|
||||||
|
@ -6,7 +6,7 @@ import io.reactivex.rxjava3.core.Observable;
|
|||||||
import io.reactivex.rxjava3.schedulers.Schedulers;
|
import io.reactivex.rxjava3.schedulers.Schedulers;
|
||||||
import nu.marginalia.assistant.client.AssistantClient;
|
import nu.marginalia.assistant.client.AssistantClient;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.dbcommon.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
import nu.marginalia.search.model.UrlDetails;
|
import nu.marginalia.search.model.UrlDetails;
|
||||||
import nu.marginalia.client.Context;
|
import nu.marginalia.client.Context;
|
||||||
import nu.marginalia.search.model.DecoratedSearchResults;
|
import nu.marginalia.search.model.DecoratedSearchResults;
|
||||||
|
@ -7,8 +7,8 @@ import nu.marginalia.browse.DbBrowseDomainsSimilarOldAlgo;
|
|||||||
import nu.marginalia.browse.model.BrowseResult;
|
import nu.marginalia.browse.model.BrowseResult;
|
||||||
import nu.marginalia.browse.model.BrowseResultSet;
|
import nu.marginalia.browse.model.BrowseResultSet;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.dbcommon.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
import nu.marginalia.model.dbcommon.DomainBlacklist;
|
import nu.marginalia.db.DomainBlacklist;
|
||||||
import nu.marginalia.search.command.SearchCommandInterface;
|
import nu.marginalia.search.command.SearchCommandInterface;
|
||||||
import nu.marginalia.search.command.SearchParameters;
|
import nu.marginalia.search.command.SearchParameters;
|
||||||
import nu.marginalia.search.results.BrowseResultCleaner;
|
import nu.marginalia.search.results.BrowseResultCleaner;
|
||||||
|
@ -2,7 +2,7 @@ package nu.marginalia.search.command.commands;
|
|||||||
|
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import nu.marginalia.client.Context;
|
import nu.marginalia.client.Context;
|
||||||
import nu.marginalia.model.dbcommon.DomainBlacklist;
|
import nu.marginalia.db.DomainBlacklist;
|
||||||
import nu.marginalia.search.SearchOperator;
|
import nu.marginalia.search.SearchOperator;
|
||||||
import nu.marginalia.search.command.SearchCommandInterface;
|
import nu.marginalia.search.command.SearchCommandInterface;
|
||||||
import nu.marginalia.search.command.SearchParameters;
|
import nu.marginalia.search.command.SearchParameters;
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
package nu.marginalia.search.command.commands;
|
package nu.marginalia.search.command.commands;
|
||||||
|
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import nu.marginalia.model.dbcommon.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
import nu.marginalia.search.model.UrlDetails;
|
import nu.marginalia.search.model.UrlDetails;
|
||||||
import nu.marginalia.search.command.SearchCommandInterface;
|
import nu.marginalia.search.command.SearchCommandInterface;
|
||||||
import nu.marginalia.search.command.SearchParameters;
|
import nu.marginalia.search.command.SearchParameters;
|
||||||
|
@ -4,7 +4,7 @@ import com.zaxxer.hikari.HikariDataSource;
|
|||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.crawl.DomainIndexingState;
|
import nu.marginalia.model.crawl.DomainIndexingState;
|
||||||
import nu.marginalia.model.dbcommon.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
import nu.marginalia.model.id.EdgeId;
|
import nu.marginalia.model.id.EdgeId;
|
||||||
import nu.marginalia.search.model.DomainInformation;
|
import nu.marginalia.search.model.DomainInformation;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -3,7 +3,7 @@ package nu.marginalia.search.svc;
|
|||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import nu.marginalia.WebsiteUrl;
|
import nu.marginalia.WebsiteUrl;
|
||||||
import nu.marginalia.model.dbcommon.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
import nu.marginalia.model.id.EdgeId;
|
import nu.marginalia.model.id.EdgeId;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -52,13 +52,17 @@ public class SearchFrontPageService {
|
|||||||
|
|
||||||
try (var conn = dataSource.getConnection();
|
try (var conn = dataSource.getConnection();
|
||||||
var stmt = conn.prepareStatement("""
|
var stmt = conn.prepareStatement("""
|
||||||
SELECT TITLE, LINK, LIST_DATE FROM SEARCH_NEWS_FEED
|
SELECT TITLE, LINK, SOURCE, LIST_DATE FROM SEARCH_NEWS_FEED ORDER BY LIST_DATE DESC
|
||||||
""")) {
|
""")) {
|
||||||
|
|
||||||
var rep = stmt.executeQuery();
|
var rep = stmt.executeQuery();
|
||||||
|
|
||||||
while (rep.next()) {
|
while (rep.next()) {
|
||||||
items.add(new NewsItem(rep.getString(1), rep.getString(2), rep.getDate(3).toLocalDate()));
|
items.add(new NewsItem(
|
||||||
|
rep.getString(1),
|
||||||
|
rep.getString(2),
|
||||||
|
rep.getString(3),
|
||||||
|
rep.getDate(4).toLocalDate()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (SQLException ex) {
|
catch (SQLException ex) {
|
||||||
@ -69,5 +73,5 @@ public class SearchFrontPageService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private record IndexModel(List<NewsItem> news, int searchPerMinute) { }
|
private record IndexModel(List<NewsItem> news, int searchPerMinute) { }
|
||||||
private record NewsItem(String title, String url, LocalDate date) {}
|
private record NewsItem(String title, String url, String source, LocalDate date) {}
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
<dl>
|
<dl>
|
||||||
{{#each news}}
|
{{#each news}}
|
||||||
<dt><a href="{{link}}" rel="nofollow">{{title}}</a></dt>
|
<dt><a href="{{link}}" rel="nofollow">{{title}}</a></dt>
|
||||||
<dd>{{date}}</dd>
|
<dd>{{source}} {{date}}</dd>
|
||||||
{{/each}}
|
{{/each}}
|
||||||
</dl>
|
</dl>
|
||||||
</div>
|
</div>
|
||||||
|
@ -21,6 +21,7 @@ java {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
dependencies {
|
dependencies {
|
||||||
|
implementation project(':code:common:db')
|
||||||
implementation project(':code:common:model')
|
implementation project(':code:common:model')
|
||||||
implementation project(':code:common:service')
|
implementation project(':code:common:service')
|
||||||
implementation project(':code:common:service-discovery')
|
implementation project(':code:common:service-discovery')
|
||||||
|
@ -6,7 +6,7 @@ import lombok.SneakyThrows;
|
|||||||
import nu.marginalia.browse.DbBrowseDomainsRandom;
|
import nu.marginalia.browse.DbBrowseDomainsRandom;
|
||||||
import nu.marginalia.browse.DbBrowseDomainsSimilarCosine;
|
import nu.marginalia.browse.DbBrowseDomainsSimilarCosine;
|
||||||
import nu.marginalia.browse.model.BrowseResult;
|
import nu.marginalia.browse.model.BrowseResult;
|
||||||
import nu.marginalia.model.dbcommon.DomainBlacklist;
|
import nu.marginalia.db.DomainBlacklist;
|
||||||
import nu.marginalia.renderer.MustacheRenderer;
|
import nu.marginalia.renderer.MustacheRenderer;
|
||||||
import nu.marginalia.renderer.RendererFactory;
|
import nu.marginalia.renderer.RendererFactory;
|
||||||
import nu.marginalia.screenshot.ScreenshotService;
|
import nu.marginalia.screenshot.ScreenshotService;
|
||||||
|
@ -4,7 +4,7 @@ import nu.marginalia.browse.DbBrowseDomainsRandom;
|
|||||||
import nu.marginalia.browse.DbBrowseDomainsSimilarCosine;
|
import nu.marginalia.browse.DbBrowseDomainsSimilarCosine;
|
||||||
import nu.marginalia.browse.model.BrowseResult;
|
import nu.marginalia.browse.model.BrowseResult;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.dbcommon.DomainBlacklist;
|
import nu.marginalia.db.DomainBlacklist;
|
||||||
import nu.marginalia.model.id.EdgeId;
|
import nu.marginalia.model.id.EdgeId;
|
||||||
|
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
|
@ -21,6 +21,7 @@ tasks.distZip.enabled = false
|
|||||||
dependencies {
|
dependencies {
|
||||||
implementation project(':code:common:process')
|
implementation project(':code:common:process')
|
||||||
|
|
||||||
|
implementation project(':code:common:db')
|
||||||
implementation project(':code:common:model')
|
implementation project(':code:common:model')
|
||||||
implementation project(':code:common:service')
|
implementation project(':code:common:service')
|
||||||
implementation project(':code:process-models:crawling-model')
|
implementation project(':code:process-models:crawling-model')
|
||||||
|
@ -5,7 +5,7 @@ import com.google.common.hash.Hashing;
|
|||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import nu.marginalia.crawling.model.spec.CrawlingSpecification;
|
import nu.marginalia.crawling.model.spec.CrawlingSpecification;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.dbcommon.DomainBlacklistImpl;
|
import nu.marginalia.db.DomainBlacklistImpl;
|
||||||
|
|
||||||
import java.sql.ResultSet;
|
import java.sql.ResultSet;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
|
@ -2,7 +2,7 @@ package nu.marginalia.crawl;
|
|||||||
|
|
||||||
import nu.marginalia.crawling.model.spec.CrawlingSpecification;
|
import nu.marginalia.crawling.model.spec.CrawlingSpecification;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.dbcommon.DomainBlacklistImpl;
|
import nu.marginalia.db.DomainBlacklistImpl;
|
||||||
import nu.marginalia.service.module.DatabaseModule;
|
import nu.marginalia.service.module.DatabaseModule;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -80,7 +80,7 @@ services:
|
|||||||
- "127.0.0.1:3306:3306/tcp"
|
- "127.0.0.1:3306:3306/tcp"
|
||||||
volumes:
|
volumes:
|
||||||
- db:/var/lib/mysql
|
- db:/var/lib/mysql
|
||||||
- "./code/common/model/src/main/resources/sql/edge-crawler-cache.sql:/docker-entrypoint-initdb.d/init.sql"
|
- "./code/common/db/src/main/resources/sql/current/:/docker-entrypoint-initdb.d/"
|
||||||
networks:
|
networks:
|
||||||
- wmsa
|
- wmsa
|
||||||
nginx-gw:
|
nginx-gw:
|
||||||
|
@ -46,6 +46,7 @@ include 'code:api:assistant-api'
|
|||||||
|
|
||||||
include 'code:common:service-discovery'
|
include 'code:common:service-discovery'
|
||||||
include 'code:common:service-client'
|
include 'code:common:service-client'
|
||||||
|
include 'code:common:db'
|
||||||
include 'code:common:service'
|
include 'code:common:service'
|
||||||
include 'code:common:config'
|
include 'code:common:config'
|
||||||
include 'code:common:model'
|
include 'code:common:model'
|
||||||
|
Loading…
Reference in New Issue
Block a user