From ac1ac3ea5760fece10b8c7b1f4dce2c7667c3887 Mon Sep 17 00:00:00 2001 From: Viktor Date: Sat, 25 Mar 2023 15:26:17 +0100 Subject: [PATCH] Move database to a separate module * Move database to a separate project, break apart sql file into separate entities. * Fix front page news listing. --- code/common/db/build.gradle | 50 +++++ .../nu/marginalia/db}/DbDomainQueries.java | 2 +- .../nu/marginalia/db}/DomainBlacklist.java | 2 +- .../marginalia/db}/DomainBlacklistImpl.java | 2 +- .../main/resources/sql/current/00-base.sql} | 197 ++---------------- .../resources/sql/current/01-blacklist.sql | 7 + .../resources/sql/current/02-dictionary.sql | 19 ++ .../resources/sql/current/03-crawl-queue.sql | 5 + .../resources/sql/current/04-screenshot.sql | 13 ++ .../sql/current/05-domain-complaint.sql | 15 ++ .../main/resources/sql/current/06-api-key.sql | 7 + .../resources/sql/current/07-neighbors.sql | 34 +++ .../sql/current/08-random-domains.sql | 5 + .../resources/sql/current/09-news-feed.sql | 8 + .../sql/migrations/00-news-items.sql | 76 +++++++ .../domain-ranking/build.gradle | 1 + .../ranking/data/RankingDomainFetcher.java | 2 +- ...RankingDomainFetcherForSimilarityData.java | 2 +- .../tool/CreateBrowseDomainRanksTool.java | 2 +- .../ranking/tool/PerusePageRankV2.java | 2 +- .../ranking/tool/PrintDomainRanksTool.java | 2 +- .../ranking/tool/UpdateDomainRanksTool.java | 2 +- .../random-websites/build.gradle | 1 + .../browse/DbBrowseDomainsRandom.java | 2 +- .../browse/DbBrowseDomainsSimilarCosine.java | 2 +- .../browse/DbBrowseDomainsSimilarOldAlgo.java | 2 +- .../EdgeDomainLinkConsineSimilarityMain.java | 2 +- code/features-search/screenshots/build.gradle | 1 + .../screenshot/ScreenshotService.java | 3 +- .../loading/loader/SqlLoadDomainLinks.java | 3 + .../loading/loader/SqlLoadDomainMetadata.java | 3 + .../loading/loader/SqlLoadDomains.java | 2 + .../loader/SqlLoadProcessedDocument.java | 6 + .../loader/SqlLoadProcessedDomain.java | 6 + .../loading/loader/SqlLoadUrls.java | 3 + .../loader/SqlLoadDomainLinksTest.java | 2 +- .../marginalia/loader/SqlLoadDomainsTest.java | 2 +- .../loader/SqlLoadProcessedDocumentTest.java | 2 +- .../loader/SqlLoadProcessedDomainTest.java | 2 +- .../nu/marginalia/loader/SqlLoadUrlsTest.java | 2 +- .../services-core/search-service/build.gradle | 1 + .../nu/marginalia/search/SearchOperator.java | 2 +- .../command/commands/BrowseCommand.java | 4 +- .../command/commands/SearchCommand.java | 2 +- .../command/commands/SiteListCommand.java | 2 +- .../siteinfo/DomainInformationService.java | 2 +- .../svc/SearchAddToCrawlQueueService.java | 2 +- .../search/svc/SearchFrontPageService.java | 10 +- .../templates/search/index/index-news.hdb | 2 +- .../dating-service/build.gradle | 1 + .../nu/marginalia/dating/DatingService.java | 2 +- .../dating/DatingSessionObject.java | 2 +- code/tools/crawl-job-extractor/build.gradle | 1 + .../crawl/CrawlJobDomainExtractor.java | 2 +- .../crawl/CrawlJobExtractorMain.java | 2 +- docker-compose.yml | 2 +- settings.gradle | 1 + 57 files changed, 326 insertions(+), 215 deletions(-) create mode 100644 code/common/db/build.gradle rename code/common/{model/src/main/java/nu/marginalia/model/dbcommon => db/src/main/java/nu/marginalia/db}/DbDomainQueries.java (98%) rename code/common/{model/src/main/java/nu/marginalia/model/dbcommon => db/src/main/java/nu/marginalia/db}/DomainBlacklist.java (92%) rename code/common/{model/src/main/java/nu/marginalia/model/dbcommon => db/src/main/java/nu/marginalia/db}/DomainBlacklistImpl.java (98%) rename code/common/{model/src/main/resources/sql/edge-crawler-cache.sql => db/src/main/resources/sql/current/00-base.sql} (50%) create mode 100644 code/common/db/src/main/resources/sql/current/01-blacklist.sql create mode 100644 code/common/db/src/main/resources/sql/current/02-dictionary.sql create mode 100644 code/common/db/src/main/resources/sql/current/03-crawl-queue.sql create mode 100644 code/common/db/src/main/resources/sql/current/04-screenshot.sql create mode 100644 code/common/db/src/main/resources/sql/current/05-domain-complaint.sql create mode 100644 code/common/db/src/main/resources/sql/current/06-api-key.sql create mode 100644 code/common/db/src/main/resources/sql/current/07-neighbors.sql create mode 100644 code/common/db/src/main/resources/sql/current/08-random-domains.sql create mode 100644 code/common/db/src/main/resources/sql/current/09-news-feed.sql create mode 100644 code/common/db/src/main/resources/sql/migrations/00-news-items.sql diff --git a/code/common/db/build.gradle b/code/common/db/build.gradle new file mode 100644 index 00000000..a06d8c3e --- /dev/null +++ b/code/common/db/build.gradle @@ -0,0 +1,50 @@ +plugins { + id 'java' + id "io.freefair.lombok" version "5.3.3.3" + id 'jvm-test-suite' +} + +java { + toolchain { + languageVersion.set(JavaLanguageVersion.of(17)) + } +} + +dependencies { + implementation project(':code:common:model') + + implementation libs.lombok + annotationProcessor libs.lombok + implementation libs.bundles.slf4j + + implementation libs.guice + implementation libs.bundles.gson + + implementation libs.notnull + + implementation libs.commons.lang3 + + implementation libs.trove + + implementation libs.rxjava + implementation libs.bundles.mariadb + + testImplementation libs.bundles.slf4j.test + testImplementation libs.bundles.junit + testImplementation libs.mockito +} + +test { + maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1 + maxHeapSize = "8G" + useJUnitPlatform() +} + +task fastTests(type: Test) { + maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1 + maxHeapSize = "8G" + useJUnitPlatform { + excludeTags "slow" + } +} + diff --git a/code/common/model/src/main/java/nu/marginalia/model/dbcommon/DbDomainQueries.java b/code/common/db/src/main/java/nu/marginalia/db/DbDomainQueries.java similarity index 98% rename from code/common/model/src/main/java/nu/marginalia/model/dbcommon/DbDomainQueries.java rename to code/common/db/src/main/java/nu/marginalia/db/DbDomainQueries.java index dc1f015c..ae52cac9 100644 --- a/code/common/model/src/main/java/nu/marginalia/model/dbcommon/DbDomainQueries.java +++ b/code/common/db/src/main/java/nu/marginalia/db/DbDomainQueries.java @@ -1,4 +1,4 @@ -package nu.marginalia.model.dbcommon; +package nu.marginalia.db; import com.google.common.cache.Cache; diff --git a/code/common/model/src/main/java/nu/marginalia/model/dbcommon/DomainBlacklist.java b/code/common/db/src/main/java/nu/marginalia/db/DomainBlacklist.java similarity index 92% rename from code/common/model/src/main/java/nu/marginalia/model/dbcommon/DomainBlacklist.java rename to code/common/db/src/main/java/nu/marginalia/db/DomainBlacklist.java index 07fe1399..2e8a7b4c 100644 --- a/code/common/model/src/main/java/nu/marginalia/model/dbcommon/DomainBlacklist.java +++ b/code/common/db/src/main/java/nu/marginalia/db/DomainBlacklist.java @@ -1,4 +1,4 @@ -package nu.marginalia.model.dbcommon; +package nu.marginalia.db; import com.google.inject.ImplementedBy; import gnu.trove.set.hash.TIntHashSet; diff --git a/code/common/model/src/main/java/nu/marginalia/model/dbcommon/DomainBlacklistImpl.java b/code/common/db/src/main/java/nu/marginalia/db/DomainBlacklistImpl.java similarity index 98% rename from code/common/model/src/main/java/nu/marginalia/model/dbcommon/DomainBlacklistImpl.java rename to code/common/db/src/main/java/nu/marginalia/db/DomainBlacklistImpl.java index 1afe30fc..f8aa8ee6 100644 --- a/code/common/model/src/main/java/nu/marginalia/model/dbcommon/DomainBlacklistImpl.java +++ b/code/common/db/src/main/java/nu/marginalia/db/DomainBlacklistImpl.java @@ -1,4 +1,4 @@ -package nu.marginalia.model.dbcommon; +package nu.marginalia.db; import com.google.inject.Inject; import com.google.inject.Singleton; diff --git a/code/common/model/src/main/resources/sql/edge-crawler-cache.sql b/code/common/db/src/main/resources/sql/current/00-base.sql similarity index 50% rename from code/common/model/src/main/resources/sql/edge-crawler-cache.sql rename to code/common/db/src/main/resources/sql/current/00-base.sql index e7abc5e8..935ae991 100644 --- a/code/common/model/src/main/resources/sql/edge-crawler-cache.sql +++ b/code/common/db/src/main/resources/sql/current/00-base.sql @@ -1,19 +1,3 @@ -DROP TABLE IF EXISTS DOMAIN_METADATA; -DROP TABLE IF EXISTS EC_FEED_URL; -DROP TABLE IF EXISTS EC_DOMAIN_LINK; -DROP TABLE IF EXISTS EC_PAGE_DATA; -DROP TABLE IF EXISTS EC_URL; -DROP TABLE IF EXISTS EC_DOMAIN_NEIGHBORS; -DROP TABLE IF EXISTS EC_DOMAIN; - - -CREATE TABLE IF NOT EXISTS DOMAIN_METADATA ( - ID INT PRIMARY KEY, - KNOWN_URLS INT DEFAULT 0, - VISITED_URLS INT DEFAULT 0, - GOOD_URLS INT DEFAULT 0 -); - CREATE TABLE IF NOT EXISTS EC_DOMAIN ( ID INT PRIMARY KEY AUTO_INCREMENT, @@ -36,12 +20,6 @@ CREATE TABLE IF NOT EXISTS EC_DOMAIN ( CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; -CREATE TABLE IF NOT EXISTS EC_DOMAIN_BLACKLIST ( - ID INT PRIMARY KEY AUTO_INCREMENT, - URL_DOMAIN VARCHAR(255) UNIQUE NOT NULL -) -CHARACTER SET utf8mb4 -COLLATE utf8mb4_unicode_ci; CREATE TABLE IF NOT EXISTS EC_URL ( ID INT PRIMARY KEY AUTO_INCREMENT, @@ -84,37 +62,6 @@ CREATE TABLE IF NOT EXISTS EC_PAGE_DATA ( CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; -CREATE TABLE EC_FEED_URL ( - URL VARCHAR(255) PRIMARY KEY, - DOMAIN_ID INT, - - FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE -) -CHARACTER SET utf8mb4 -COLLATE utf8mb4_unicode_ci; - -CREATE TABLE EC_DOMAIN_NEIGHBORS ( - ID INT PRIMARY KEY AUTO_INCREMENT, - DOMAIN_ID INT NOT NULL, - NEIGHBOR_ID INT NOT NULL, - ADJ_IDX INT NOT NULL, - - CONSTRAINT CONS UNIQUE (DOMAIN_ID, ADJ_IDX), - FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE -) -CHARACTER SET utf8mb4 -COLLATE utf8mb4_unicode_ci; - -CREATE TABLE EC_DOMAIN_NEIGHBORS_2 ( - DOMAIN_ID INT NOT NULL, - NEIGHBOR_ID INT NOT NULL, - RELATEDNESS DOUBLE NOT NULL, - - PRIMARY KEY (DOMAIN_ID, NEIGHBOR_ID), - FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE, - FOREIGN KEY (NEIGHBOR_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE -); - CREATE TABLE IF NOT EXISTS EC_DOMAIN_LINK ( ID INT PRIMARY KEY AUTO_INCREMENT, SOURCE_DOMAIN_ID INT NOT NULL, @@ -126,6 +73,24 @@ CREATE TABLE IF NOT EXISTS EC_DOMAIN_LINK ( FOREIGN KEY (DEST_DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE ); +CREATE TABLE IF NOT EXISTS DOMAIN_METADATA ( + ID INT PRIMARY KEY, + KNOWN_URLS INT DEFAULT 0, + VISITED_URLS INT DEFAULT 0, + GOOD_URLS INT DEFAULT 0, + + FOREIGN KEY (ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE +); + +CREATE TABLE EC_FEED_URL ( + URL VARCHAR(255) PRIMARY KEY, + DOMAIN_ID INT, + + FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE +) +CHARACTER SET utf8mb4 +COLLATE utf8mb4_unicode_ci; + CREATE OR REPLACE VIEW EC_URL_VIEW AS SELECT CONCAT(EC_URL.PROTO, @@ -141,9 +106,7 @@ CREATE OR REPLACE VIEW EC_URL_VIEW AS EC_DOMAIN.DOMAIN_TOP AS DOMAIN_TOP, EC_URL.ID AS ID, EC_DOMAIN.ID AS DOMAIN_ID, - EC_URL.VISITED AS VISITED, - EC_PAGE_DATA.QUALITY AS QUALITY, EC_PAGE_DATA.DATA_HASH AS DATA_HASH, EC_PAGE_DATA.TITLE AS TITLE, @@ -151,7 +114,6 @@ CREATE OR REPLACE VIEW EC_URL_VIEW AS EC_PAGE_DATA.WORDS_TOTAL AS WORDS_TOTAL, EC_PAGE_DATA.FORMAT AS FORMAT, EC_PAGE_DATA.FEATURES AS FEATURES, - EC_DOMAIN.IP AS IP, EC_URL.STATE AS STATE, EC_DOMAIN.RANK AS RANK, @@ -162,17 +124,6 @@ CREATE OR REPLACE VIEW EC_URL_VIEW AS INNER JOIN EC_DOMAIN ON EC_URL.DOMAIN_ID = EC_DOMAIN.ID; -CREATE OR REPLACE VIEW EC_NEIGHBORS_VIEW AS - SELECT - DOM.DOMAIN_NAME AS DOMAIN_NAME, - DOM.ID AS DOMAIN_ID, - NEIGHBOR.DOMAIN_NAME AS NEIGHBOR_NAME, - NEIGHBOR.ID AS NEIGHBOR_ID, - ROUND(100 * RELATEDNESS) AS RELATEDNESS - FROM EC_DOMAIN_NEIGHBORS_2 - INNER JOIN EC_DOMAIN DOM ON DOMAIN_ID=DOM.ID - INNER JOIN EC_DOMAIN NEIGHBOR ON NEIGHBOR_ID=NEIGHBOR.ID; - CREATE OR REPLACE VIEW EC_RELATED_LINKS_VIEW AS SELECT @@ -189,117 +140,5 @@ CREATE OR REPLACE VIEW EC_RELATED_LINKS_VIEW AS ON DEST_DOMAIN.ID=DEST_DOMAIN_ID ; -CREATE OR REPLACE VIEW EC_RELATED_LINKS_IN AS - SELECT - IN_URL.ID AS SRC_URL_ID, - OUT_URL.ID AS DEST_URL_ID - FROM EC_DOMAIN_LINK - INNER JOIN EC_URL AS IN_URL ON IN_URL.DOMAIN_ID=EC_DOMAIN_LINK.SOURCE_DOMAIN_ID - INNER JOIN EC_URL AS OUT_URL ON OUT_URL.DOMAIN_ID=EC_DOMAIN_LINK.DEST_DOMAIN_ID - WHERE IN_URL.VISITED AND IN_URL.STATE = 'ok' - AND OUT_URL.VISITED AND OUT_URL.STATE = 'ok'; - -CREATE TABLE IF NOT EXISTS EC_API_KEY ( - LICENSE_KEY VARCHAR(255) UNIQUE, - LICENSE VARCHAR(255) NOT NULL, - NAME VARCHAR(255) NOT NULL, - EMAIL VARCHAR(255) NOT NULL, - RATE INT DEFAULT 10 -); - CREATE INDEX IF NOT EXISTS EC_DOMAIN_INDEXED_INDEX ON EC_DOMAIN (INDEXED); CREATE INDEX IF NOT EXISTS EC_DOMAIN_TOP_DOMAIN ON EC_DOMAIN (DOMAIN_TOP); - ----; - -CREATE TABLE IF NOT EXISTS EC_RANDOM_DOMAINS ( - DOMAIN_ID INT PRIMARY KEY, - DOMAIN_SET INT NOT NULL -); - ----; - -DROP TABLE IF EXISTS REF_DICTIONARY; - -CREATE TABLE IF NOT EXISTS REF_DICTIONARY ( - TYPE VARCHAR(16), - WORD VARCHAR(255), - DEFINITION VARCHAR(255) -) -CHARACTER SET utf8mb4 -COLLATE utf8mb4_unicode_ci; - ----; - -CREATE INDEX IF NOT EXISTS REF_DICTIONARY_WORD ON REF_DICTIONARY (WORD); - -CREATE TABLE IF NOT EXISTS REF_WIKI_ARTICLE ( - NAME VARCHAR(255) PRIMARY KEY, - REF_NAME VARCHAR(255) COMMENT "If this is a redirect, it redirects to this REF_WIKI_ARTICLE.NAME", - ENTRY LONGBLOB -) -ROW_FORMAT=DYNAMIC -CHARACTER SET utf8mb4 -COLLATE utf8mb4_unicode_ci; - ----; - -CREATE TABLE IF NOT EXISTS DATA_DOMAIN_SCREENSHOT ( - DOMAIN_NAME VARCHAR(255) PRIMARY KEY, - CONTENT_TYPE ENUM ('image/png', 'image/webp', 'image/svg+xml') NOT NULL, - DATA LONGBLOB NOT NULL -) -ROW_FORMAT=DYNAMIC -CHARACTER SET utf8mb4 -COLLATE utf8mb4_unicode_ci; - -CREATE TABLE DATA_DOMAIN_HISTORY ( - DOMAIN_NAME VARCHAR(255) PRIMARY KEY, - SCREENSHOT_DATE DATE DEFAULT NOW() -) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; - -CREATE TABLE CRAWL_QUEUE( - DOMAIN_NAME VARCHAR(255) UNIQUE, - SOURCE VARCHAR(255) -) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; - -CREATE TABLE DOMAIN_COMPLAINT( - ID INT PRIMARY KEY AUTO_INCREMENT, - DOMAIN_ID INT NOT NULL, - - CATEGORY VARCHAR(255) NOT NULL, - DESCRIPTION TEXT, - SAMPLE VARCHAR(255), - FILE_DATE TIMESTAMP NOT NULL DEFAULT NOW(), - - REVIEWED BOOLEAN AS (REVIEW_DATE > 0) VIRTUAL, - DECISION VARCHAR(255), - REVIEW_DATE TIMESTAMP, - - FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE -); - ---- - -CREATE TABLE WMSA_PROCESS( - ID BIGINT PRIMARY KEY, - NAME VARCHAR(255) NOT NULL, - TYPE ENUM('SERVICE', 'TASK') NOT NULL, - START DATETIME NOT NULL DEFAULT NOW(), - UPDATED DATETIME, - FINISHED DATETIME, - PROGRESS DOUBLE DEFAULT 0, - PROCESS_STATUS ENUM('RUNNING', 'FINISHED', 'DEAD') NOT NULL DEFAULT 'RUNNING', - PROCESS_SUBSTATUS ENUM('NA', 'OK', 'FAIL') NOT NULL DEFAULT 'NA', - MUTEX VARCHAR(255), - TIMEOUT INT NOT NULL DEFAULT 60 -); - ---- - -CREATE TABLE SEARCH_NEWS_FEED( - ID INT PRIMARY KEY AUTO_INCREMENT, - TITLE VARCHAR(255), - LINK VARCHAR(255), - LIST_DATE DATE -) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; \ No newline at end of file diff --git a/code/common/db/src/main/resources/sql/current/01-blacklist.sql b/code/common/db/src/main/resources/sql/current/01-blacklist.sql new file mode 100644 index 00000000..e46161bc --- /dev/null +++ b/code/common/db/src/main/resources/sql/current/01-blacklist.sql @@ -0,0 +1,7 @@ + +CREATE TABLE IF NOT EXISTS EC_DOMAIN_BLACKLIST ( + ID INT PRIMARY KEY AUTO_INCREMENT, + URL_DOMAIN VARCHAR(255) UNIQUE NOT NULL +) +CHARACTER SET utf8mb4 +COLLATE utf8mb4_unicode_ci; diff --git a/code/common/db/src/main/resources/sql/current/02-dictionary.sql b/code/common/db/src/main/resources/sql/current/02-dictionary.sql new file mode 100644 index 00000000..3fc3eb6e --- /dev/null +++ b/code/common/db/src/main/resources/sql/current/02-dictionary.sql @@ -0,0 +1,19 @@ + +CREATE TABLE IF NOT EXISTS REF_DICTIONARY ( + TYPE VARCHAR(16), + WORD VARCHAR(255), + DEFINITION VARCHAR(255) +) +CHARACTER SET utf8mb4 +COLLATE utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS REF_WIKI_ARTICLE ( + NAME VARCHAR(255) PRIMARY KEY, + REF_NAME VARCHAR(255) COMMENT "If this is a redirect, it redirects to this REF_WIKI_ARTICLE.NAME", + ENTRY LONGBLOB +) +ROW_FORMAT=DYNAMIC +CHARACTER SET utf8mb4 +COLLATE utf8mb4_unicode_ci; + +CREATE INDEX IF NOT EXISTS REF_DICTIONARY_WORD ON REF_DICTIONARY (WORD); diff --git a/code/common/db/src/main/resources/sql/current/03-crawl-queue.sql b/code/common/db/src/main/resources/sql/current/03-crawl-queue.sql new file mode 100644 index 00000000..f327ad6c --- /dev/null +++ b/code/common/db/src/main/resources/sql/current/03-crawl-queue.sql @@ -0,0 +1,5 @@ + +CREATE TABLE CRAWL_QUEUE( + DOMAIN_NAME VARCHAR(255) UNIQUE, + SOURCE VARCHAR(255) +) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; diff --git a/code/common/db/src/main/resources/sql/current/04-screenshot.sql b/code/common/db/src/main/resources/sql/current/04-screenshot.sql new file mode 100644 index 00000000..b61a0312 --- /dev/null +++ b/code/common/db/src/main/resources/sql/current/04-screenshot.sql @@ -0,0 +1,13 @@ +CREATE TABLE IF NOT EXISTS DATA_DOMAIN_SCREENSHOT ( + DOMAIN_NAME VARCHAR(255) PRIMARY KEY, + CONTENT_TYPE ENUM ('image/png', 'image/webp', 'image/svg+xml') NOT NULL, + DATA LONGBLOB NOT NULL +) +ROW_FORMAT=DYNAMIC +CHARACTER SET utf8mb4 +COLLATE utf8mb4_unicode_ci; + +CREATE TABLE DATA_DOMAIN_HISTORY ( + DOMAIN_NAME VARCHAR(255) PRIMARY KEY, + SCREENSHOT_DATE DATE DEFAULT NOW() +) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; diff --git a/code/common/db/src/main/resources/sql/current/05-domain-complaint.sql b/code/common/db/src/main/resources/sql/current/05-domain-complaint.sql new file mode 100644 index 00000000..87c956ea --- /dev/null +++ b/code/common/db/src/main/resources/sql/current/05-domain-complaint.sql @@ -0,0 +1,15 @@ +CREATE TABLE DOMAIN_COMPLAINT( + ID INT PRIMARY KEY AUTO_INCREMENT, + DOMAIN_ID INT NOT NULL, + + CATEGORY VARCHAR(255) NOT NULL, + DESCRIPTION TEXT, + SAMPLE VARCHAR(255), + FILE_DATE TIMESTAMP NOT NULL DEFAULT NOW(), + + REVIEWED BOOLEAN AS (REVIEW_DATE > 0) VIRTUAL, + DECISION VARCHAR(255), + REVIEW_DATE TIMESTAMP, + + FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE +) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci; \ No newline at end of file diff --git a/code/common/db/src/main/resources/sql/current/06-api-key.sql b/code/common/db/src/main/resources/sql/current/06-api-key.sql new file mode 100644 index 00000000..33033ad6 --- /dev/null +++ b/code/common/db/src/main/resources/sql/current/06-api-key.sql @@ -0,0 +1,7 @@ +CREATE TABLE IF NOT EXISTS EC_API_KEY ( + LICENSE_KEY VARCHAR(255) UNIQUE, + LICENSE VARCHAR(255) NOT NULL, + NAME VARCHAR(255) NOT NULL, + EMAIL VARCHAR(255) NOT NULL, + RATE INT DEFAULT 10 +); \ No newline at end of file diff --git a/code/common/db/src/main/resources/sql/current/07-neighbors.sql b/code/common/db/src/main/resources/sql/current/07-neighbors.sql new file mode 100644 index 00000000..affc570d --- /dev/null +++ b/code/common/db/src/main/resources/sql/current/07-neighbors.sql @@ -0,0 +1,34 @@ + +CREATE TABLE EC_DOMAIN_NEIGHBORS ( + ID INT PRIMARY KEY AUTO_INCREMENT, + DOMAIN_ID INT NOT NULL, + NEIGHBOR_ID INT NOT NULL, + ADJ_IDX INT NOT NULL, + + CONSTRAINT CONS UNIQUE (DOMAIN_ID, ADJ_IDX), + FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE +) +CHARACTER SET utf8mb4 +COLLATE utf8mb4_unicode_ci; + +CREATE TABLE EC_DOMAIN_NEIGHBORS_2 ( + DOMAIN_ID INT NOT NULL, + NEIGHBOR_ID INT NOT NULL, + RELATEDNESS DOUBLE NOT NULL, + + PRIMARY KEY (DOMAIN_ID, NEIGHBOR_ID), + FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE, + FOREIGN KEY (NEIGHBOR_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE +); + + +CREATE OR REPLACE VIEW EC_NEIGHBORS_VIEW AS + SELECT + DOM.DOMAIN_NAME AS DOMAIN_NAME, + DOM.ID AS DOMAIN_ID, + NEIGHBOR.DOMAIN_NAME AS NEIGHBOR_NAME, + NEIGHBOR.ID AS NEIGHBOR_ID, + ROUND(100 * RELATEDNESS) AS RELATEDNESS + FROM EC_DOMAIN_NEIGHBORS_2 + INNER JOIN EC_DOMAIN DOM ON DOMAIN_ID=DOM.ID + INNER JOIN EC_DOMAIN NEIGHBOR ON NEIGHBOR_ID=NEIGHBOR.ID; diff --git a/code/common/db/src/main/resources/sql/current/08-random-domains.sql b/code/common/db/src/main/resources/sql/current/08-random-domains.sql new file mode 100644 index 00000000..724fc009 --- /dev/null +++ b/code/common/db/src/main/resources/sql/current/08-random-domains.sql @@ -0,0 +1,5 @@ + +CREATE TABLE IF NOT EXISTS EC_RANDOM_DOMAINS ( + DOMAIN_ID INT PRIMARY KEY, + DOMAIN_SET INT NOT NULL +); diff --git a/code/common/db/src/main/resources/sql/current/09-news-feed.sql b/code/common/db/src/main/resources/sql/current/09-news-feed.sql new file mode 100644 index 00000000..de4d46ed --- /dev/null +++ b/code/common/db/src/main/resources/sql/current/09-news-feed.sql @@ -0,0 +1,8 @@ + +CREATE TABLE SEARCH_NEWS_FEED ( + ID INT PRIMARY KEY AUTO_INCREMENT, + TITLE VARCHAR(255) NOT NULL, + LINK VARCHAR(255) UNIQUE NOT NULL, + SOURCE VARCHAR(255), + LIST_DATE DATE NOT NULL +) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; diff --git a/code/common/db/src/main/resources/sql/migrations/00-news-items.sql b/code/common/db/src/main/resources/sql/migrations/00-news-items.sql new file mode 100644 index 00000000..4f237b67 --- /dev/null +++ b/code/common/db/src/main/resources/sql/migrations/00-news-items.sql @@ -0,0 +1,76 @@ + +INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES ( +'A search engine that favors text-heavy sites and punishes modern web design', +'https://news.ycombinator.com/item?id=28550764', +'Hacker News', +'2021-09-16' +); + +INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES ( +'A Search Engine Designed To Surprise You', +'https://onezero.medium.com/a-search-engine-designed-to-surprise-you-b81944ed5c06', +'Clive Thompson OneZero', +'2021-09-16' +); + +INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES ( +'๐ŸŽ‚ First anniversary! ๐ŸŽŠ', +'https://memex.marginalia.nu/log/49-marginalia-1-year.gmi', +null, +'2022-02-26'); + +INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES ( +'Marginalia Search - Serendipity Engineering', +'https://www.metafilter.com/194653/Marginalia-Search-Serendipity-Engineering', +'MetaFilter', +'2022-03-09'); + +INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES ( +'What Google Search Isn\'t Showing You', +'https://www.newyorker.com/culture/infinite-scroll/what-google-search-isnt-showing-you', +'The New Yorker ๐ŸŽฉ', +'2022-03-10' +); + +INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES ( +'You Should Check Out the Indie Web ๐ŸŽž๏ธ', +'https://www.youtube.com/watch?v=rTSEr0cRJY8', +'YouTube, You\'ve Got Kat', +'2022-03-15' +); + +INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES ( +'Marginalia Goes Open Source', +'https://news.ycombinator.com/item?id=31536626', +'Hacker News', +'2022-05-28' +); + +INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES ( +'Kritik an Googles Suche - Platzhirsch auf dem Nebenschauplatz', +'https://www.deutschlandfunkkultur.de/google-suche-100.html', +'Deutschlandfunk Kultur ๐Ÿ‡ฉ๐Ÿ‡ช', +'2022-08-18' +); + +INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES ( +'Google ei enรครค tideรค', +'https://www.hs.fi/visio/art-2000009139237.html', +'Helsing Sanomat ๐Ÿ‡ซ๐Ÿ‡ฎ', +'2022-10-19' +); + +INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES ( +'Marginalia\'s Index Reaches 100,000,000 Documents ๐ŸŽŠ', +'https://memex.marginalia.nu/log/64-hundred-million.gmi', +null, +'2022-10-21' +); + +INSERT IGNORE INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE) VALUES ( +'Marginalia Receives NLnet grant', +'https://memex.marginalia.nu/log/74-marginalia-2-years.gmi', +null, +'2023-02-26' +); + diff --git a/code/features-index/domain-ranking/build.gradle b/code/features-index/domain-ranking/build.gradle index 1c6dfe91..5ac1b1e6 100644 --- a/code/features-index/domain-ranking/build.gradle +++ b/code/features-index/domain-ranking/build.gradle @@ -14,6 +14,7 @@ java { } dependencies { + implementation project(':code:common:db') implementation project(':code:common:model') implementation project(':code:common:service') diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/RankingDomainFetcher.java b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/RankingDomainFetcher.java index 7f577f3b..2499d51f 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/RankingDomainFetcher.java +++ b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/RankingDomainFetcher.java @@ -3,7 +3,7 @@ package nu.marginalia.ranking.data; import com.google.inject.Inject; import com.google.inject.Singleton; import com.zaxxer.hikari.HikariDataSource; -import nu.marginalia.model.dbcommon.DomainBlacklistImpl; +import nu.marginalia.db.DomainBlacklistImpl; import nu.marginalia.model.crawl.DomainIndexingState; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/RankingDomainFetcherForSimilarityData.java b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/RankingDomainFetcherForSimilarityData.java index 0bfff828..eccb87ad 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/RankingDomainFetcherForSimilarityData.java +++ b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/data/RankingDomainFetcherForSimilarityData.java @@ -3,7 +3,7 @@ package nu.marginalia.ranking.data; import com.google.inject.Inject; import com.google.inject.Singleton; import com.zaxxer.hikari.HikariDataSource; -import nu.marginalia.model.dbcommon.DomainBlacklistImpl; +import nu.marginalia.db.DomainBlacklistImpl; import org.slf4j.LoggerFactory; import java.sql.SQLException; diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/tool/CreateBrowseDomainRanksTool.java b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/tool/CreateBrowseDomainRanksTool.java index 058cf32b..17b2e195 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/tool/CreateBrowseDomainRanksTool.java +++ b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/tool/CreateBrowseDomainRanksTool.java @@ -2,7 +2,7 @@ package nu.marginalia.ranking.tool; import com.zaxxer.hikari.HikariDataSource; import lombok.SneakyThrows; -import nu.marginalia.model.dbcommon.DomainBlacklistImpl; +import nu.marginalia.db.DomainBlacklistImpl; import nu.marginalia.ranking.StandardPageRank; import nu.marginalia.ranking.accumulator.RankingResultListAccumulator; import nu.marginalia.ranking.data.RankingDomainFetcherForSimilarityData; diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/tool/PerusePageRankV2.java b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/tool/PerusePageRankV2.java index 2a3c15ae..be64a4e2 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/tool/PerusePageRankV2.java +++ b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/tool/PerusePageRankV2.java @@ -13,7 +13,7 @@ import lombok.SneakyThrows; import nu.marginalia.ranking.RankingAlgorithm; import nu.marginalia.ranking.data.RankingDomainData; import nu.marginalia.ranking.data.RankingDomainFetcher; -import nu.marginalia.model.dbcommon.DomainBlacklistImpl; +import nu.marginalia.db.DomainBlacklistImpl; import nu.marginalia.service.module.DatabaseModule; import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/tool/PrintDomainRanksTool.java b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/tool/PrintDomainRanksTool.java index 11d71ddf..9877f393 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/tool/PrintDomainRanksTool.java +++ b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/tool/PrintDomainRanksTool.java @@ -3,7 +3,7 @@ package nu.marginalia.ranking.tool; import lombok.SneakyThrows; import nu.marginalia.ranking.accumulator.RankingResultListAccumulator; import nu.marginalia.ranking.data.RankingDomainFetcher; -import nu.marginalia.model.dbcommon.DomainBlacklistImpl; +import nu.marginalia.db.DomainBlacklistImpl; import nu.marginalia.ranking.StandardPageRank; import nu.marginalia.ranking.data.RankingDomainFetcherForSimilarityData; import nu.marginalia.service.module.DatabaseModule; diff --git a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/tool/UpdateDomainRanksTool.java b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/tool/UpdateDomainRanksTool.java index abd00f89..7e57bc8a 100644 --- a/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/tool/UpdateDomainRanksTool.java +++ b/code/features-index/domain-ranking/src/main/java/nu/marginalia/ranking/tool/UpdateDomainRanksTool.java @@ -6,7 +6,7 @@ import nu.marginalia.ranking.StandardPageRank; import nu.marginalia.ranking.accumulator.RankingResultListAccumulator; import nu.marginalia.ranking.data.RankingDomainFetcherForSimilarityData; -import nu.marginalia.model.dbcommon.DomainBlacklistImpl; +import nu.marginalia.db.DomainBlacklistImpl; import nu.marginalia.service.module.DatabaseModule; import org.mariadb.jdbc.Driver; import org.slf4j.Logger; diff --git a/code/features-search/random-websites/build.gradle b/code/features-search/random-websites/build.gradle index 1bca63e3..0d4795eb 100644 --- a/code/features-search/random-websites/build.gradle +++ b/code/features-search/random-websites/build.gradle @@ -13,6 +13,7 @@ java { dependencies { implementation project(':code:common:model') + implementation project(':code:common:db') implementation project(':code:common:service') implementation libs.lombok diff --git a/code/features-search/random-websites/src/main/java/nu/marginalia/browse/DbBrowseDomainsRandom.java b/code/features-search/random-websites/src/main/java/nu/marginalia/browse/DbBrowseDomainsRandom.java index 2dd503b4..44dbe744 100644 --- a/code/features-search/random-websites/src/main/java/nu/marginalia/browse/DbBrowseDomainsRandom.java +++ b/code/features-search/random-websites/src/main/java/nu/marginalia/browse/DbBrowseDomainsRandom.java @@ -5,7 +5,7 @@ import com.google.inject.Singleton; import com.zaxxer.hikari.HikariDataSource; import nu.marginalia.browse.model.BrowseResult; import nu.marginalia.model.EdgeDomain; -import nu.marginalia.model.dbcommon.DomainBlacklist; +import nu.marginalia.db.DomainBlacklist; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/code/features-search/random-websites/src/main/java/nu/marginalia/browse/DbBrowseDomainsSimilarCosine.java b/code/features-search/random-websites/src/main/java/nu/marginalia/browse/DbBrowseDomainsSimilarCosine.java index cdeac7fd..6928e329 100644 --- a/code/features-search/random-websites/src/main/java/nu/marginalia/browse/DbBrowseDomainsSimilarCosine.java +++ b/code/features-search/random-websites/src/main/java/nu/marginalia/browse/DbBrowseDomainsSimilarCosine.java @@ -5,7 +5,7 @@ import com.google.inject.Singleton; import com.zaxxer.hikari.HikariDataSource; import nu.marginalia.browse.model.BrowseResult; import nu.marginalia.model.EdgeDomain; -import nu.marginalia.model.dbcommon.DomainBlacklist; +import nu.marginalia.db.DomainBlacklist; import nu.marginalia.model.id.EdgeId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/code/features-search/random-websites/src/main/java/nu/marginalia/browse/DbBrowseDomainsSimilarOldAlgo.java b/code/features-search/random-websites/src/main/java/nu/marginalia/browse/DbBrowseDomainsSimilarOldAlgo.java index 0ab6ade6..a9fb6e54 100644 --- a/code/features-search/random-websites/src/main/java/nu/marginalia/browse/DbBrowseDomainsSimilarOldAlgo.java +++ b/code/features-search/random-websites/src/main/java/nu/marginalia/browse/DbBrowseDomainsSimilarOldAlgo.java @@ -6,7 +6,7 @@ import com.zaxxer.hikari.HikariDataSource; import nu.marginalia.browse.model.BrowseResult; import nu.marginalia.model.EdgeDomain; import nu.marginalia.model.EdgeUrl; -import nu.marginalia.model.dbcommon.DomainBlacklist; +import nu.marginalia.db.DomainBlacklist; import nu.marginalia.model.id.EdgeId; import nu.marginalia.model.id.EdgeIdCollection; import org.slf4j.Logger; diff --git a/code/features-search/random-websites/src/main/java/nu/marginalia/browse/experimental/EdgeDomainLinkConsineSimilarityMain.java b/code/features-search/random-websites/src/main/java/nu/marginalia/browse/experimental/EdgeDomainLinkConsineSimilarityMain.java index c4d4e0b2..77b52e8c 100644 --- a/code/features-search/random-websites/src/main/java/nu/marginalia/browse/experimental/EdgeDomainLinkConsineSimilarityMain.java +++ b/code/features-search/random-websites/src/main/java/nu/marginalia/browse/experimental/EdgeDomainLinkConsineSimilarityMain.java @@ -5,7 +5,7 @@ import gnu.trove.map.hash.TIntIntHashMap; import gnu.trove.map.hash.TIntObjectHashMap; import gnu.trove.set.hash.TIntHashSet; import lombok.SneakyThrows; -import nu.marginalia.model.dbcommon.DbDomainQueries; +import nu.marginalia.db.DbDomainQueries; import nu.marginalia.model.EdgeDomain; import nu.marginalia.model.id.EdgeId; import nu.marginalia.service.module.DatabaseModule; diff --git a/code/features-search/screenshots/build.gradle b/code/features-search/screenshots/build.gradle index 0e014011..c2dcd51e 100644 --- a/code/features-search/screenshots/build.gradle +++ b/code/features-search/screenshots/build.gradle @@ -12,6 +12,7 @@ java { dependencies { implementation project(':code:common:model') + implementation project(':code:common:db') implementation project(':code:common:service') implementation libs.lombok diff --git a/code/features-search/screenshots/src/main/java/nu/marginalia/screenshot/ScreenshotService.java b/code/features-search/screenshots/src/main/java/nu/marginalia/screenshot/ScreenshotService.java index 69d00ba7..0f839ba1 100644 --- a/code/features-search/screenshots/src/main/java/nu/marginalia/screenshot/ScreenshotService.java +++ b/code/features-search/screenshots/src/main/java/nu/marginalia/screenshot/ScreenshotService.java @@ -5,14 +5,13 @@ import com.google.inject.Inject; import com.zaxxer.hikari.HikariDataSource; import lombok.SneakyThrows; import nu.marginalia.model.EdgeDomain; -import nu.marginalia.model.dbcommon.DbDomainQueries; +import nu.marginalia.db.DbDomainQueries; import nu.marginalia.model.id.EdgeId; import org.apache.commons.io.IOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import spark.Request; import spark.Response; -import spark.Spark; import java.sql.SQLException; diff --git a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadDomainLinks.java b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadDomainLinks.java index 256b2712..79028d4c 100644 --- a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadDomainLinks.java +++ b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadDomainLinks.java @@ -75,6 +75,9 @@ public class SqlLoadDomainLinks { } catch (SQLException ex) { logger.warn("SQL error inserting domain links", ex); + + if (getClass().desiredAssertionStatus()) + throw new RuntimeException(ex); } } diff --git a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadDomainMetadata.java b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadDomainMetadata.java index e276e40f..3c435f87 100644 --- a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadDomainMetadata.java +++ b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadDomainMetadata.java @@ -33,6 +33,9 @@ public class SqlLoadDomainMetadata { stmt.executeUpdate(); } catch (SQLException ex) { logger.warn("SQL error inserting domains", ex); + + if (getClass().desiredAssertionStatus()) + throw new RuntimeException(ex); } } } diff --git a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadDomains.java b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadDomains.java index 5c441c2f..eb4713ce 100644 --- a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadDomains.java +++ b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadDomains.java @@ -57,6 +57,8 @@ public class SqlLoadDomains { } catch (SQLException ex) { logger.warn("SQL error inserting domain", ex); + if (getClass().desiredAssertionStatus()) + throw new RuntimeException(ex); } diff --git a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadProcessedDocument.java b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadProcessedDocument.java index cb53c4ca..2a875b58 100644 --- a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadProcessedDocument.java +++ b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadProcessedDocument.java @@ -120,6 +120,9 @@ public class SqlLoadProcessedDocument { } catch (SQLException ex) { logger.warn("SQL error inserting document", ex); + + if (getClass().desiredAssertionStatus()) + throw new RuntimeException(ex); } } @@ -169,6 +172,9 @@ public class SqlLoadProcessedDocument { conn.setAutoCommit(true); } catch (SQLException ex) { logger.warn("SQL error inserting failed document", ex); + + if (getClass().desiredAssertionStatus()) + throw new RuntimeException(ex); } } diff --git a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadProcessedDomain.java b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadProcessedDomain.java index dd1f5e4c..4c8be708 100644 --- a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadProcessedDomain.java +++ b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadProcessedDomain.java @@ -62,6 +62,9 @@ public class SqlLoadProcessedDomain { } catch (SQLException ex) { logger.warn("SQL error initializing domain", ex); + + if (getClass().desiredAssertionStatus()) + throw new RuntimeException(ex); } } @@ -84,6 +87,9 @@ public class SqlLoadProcessedDomain { } catch (SQLException ex) { logger.warn("SQL error inserting domain alias", ex); + + if (getClass().desiredAssertionStatus()) + throw new RuntimeException(ex); } } } diff --git a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadUrls.java b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadUrls.java index 1cd191f6..0e97ece1 100644 --- a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadUrls.java +++ b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/SqlLoadUrls.java @@ -104,6 +104,9 @@ public class SqlLoadUrls { } catch (SQLException ex) { logger.warn("SQL error inserting URLs", ex); + + if (getClass().desiredAssertionStatus()) + throw new RuntimeException(ex); } } diff --git a/code/processes/loading-process/src/test/java/nu/marginalia/loader/SqlLoadDomainLinksTest.java b/code/processes/loading-process/src/test/java/nu/marginalia/loader/SqlLoadDomainLinksTest.java index 8c2bbe95..f80a54dc 100644 --- a/code/processes/loading-process/src/test/java/nu/marginalia/loader/SqlLoadDomainLinksTest.java +++ b/code/processes/loading-process/src/test/java/nu/marginalia/loader/SqlLoadDomainLinksTest.java @@ -22,7 +22,7 @@ class SqlLoadDomainLinksTest { .withDatabaseName("WMSA_prod") .withUsername("wmsa") .withPassword("wmsa") - .withInitScript("sql/edge-crawler-cache.sql") + .withInitScript("sql/current/00-base.sql") .withNetworkAliases("mariadb"); HikariDataSource dataSource; diff --git a/code/processes/loading-process/src/test/java/nu/marginalia/loader/SqlLoadDomainsTest.java b/code/processes/loading-process/src/test/java/nu/marginalia/loader/SqlLoadDomainsTest.java index 90c534ad..d5aee323 100644 --- a/code/processes/loading-process/src/test/java/nu/marginalia/loader/SqlLoadDomainsTest.java +++ b/code/processes/loading-process/src/test/java/nu/marginalia/loader/SqlLoadDomainsTest.java @@ -19,7 +19,7 @@ class SqlLoadDomainsTest { .withDatabaseName("WMSA_prod") .withUsername("wmsa") .withPassword("wmsa") - .withInitScript("sql/edge-crawler-cache.sql") + .withInitScript("sql/current/00-base.sql") .withNetworkAliases("mariadb"); @Test diff --git a/code/processes/loading-process/src/test/java/nu/marginalia/loader/SqlLoadProcessedDocumentTest.java b/code/processes/loading-process/src/test/java/nu/marginalia/loader/SqlLoadProcessedDocumentTest.java index 51a4d1fc..0abea35c 100644 --- a/code/processes/loading-process/src/test/java/nu/marginalia/loader/SqlLoadProcessedDocumentTest.java +++ b/code/processes/loading-process/src/test/java/nu/marginalia/loader/SqlLoadProcessedDocumentTest.java @@ -33,7 +33,7 @@ class SqlLoadProcessedDocumentTest { .withDatabaseName("WMSA_prod") .withUsername("wmsa") .withPassword("wmsa") - .withInitScript("sql/edge-crawler-cache.sql") + .withInitScript("sql/current/00-base.sql") .withNetworkAliases("mariadb"); HikariDataSource dataSource; diff --git a/code/processes/loading-process/src/test/java/nu/marginalia/loader/SqlLoadProcessedDomainTest.java b/code/processes/loading-process/src/test/java/nu/marginalia/loader/SqlLoadProcessedDomainTest.java index e4051790..2b28f6a2 100644 --- a/code/processes/loading-process/src/test/java/nu/marginalia/loader/SqlLoadProcessedDomainTest.java +++ b/code/processes/loading-process/src/test/java/nu/marginalia/loader/SqlLoadProcessedDomainTest.java @@ -23,7 +23,7 @@ class SqlLoadProcessedDomainTest { .withDatabaseName("WMSA_prod") .withUsername("wmsa") .withPassword("wmsa") - .withInitScript("sql/edge-crawler-cache.sql") + .withInitScript("sql/current/00-base.sql") .withNetworkAliases("mariadb"); HikariDataSource dataSource; diff --git a/code/processes/loading-process/src/test/java/nu/marginalia/loader/SqlLoadUrlsTest.java b/code/processes/loading-process/src/test/java/nu/marginalia/loader/SqlLoadUrlsTest.java index fe8c7847..cc5c1381 100644 --- a/code/processes/loading-process/src/test/java/nu/marginalia/loader/SqlLoadUrlsTest.java +++ b/code/processes/loading-process/src/test/java/nu/marginalia/loader/SqlLoadUrlsTest.java @@ -24,7 +24,7 @@ class SqlLoadUrlsTest { .withDatabaseName("WMSA_prod") .withUsername("wmsa") .withPassword("wmsa") - .withInitScript("sql/edge-crawler-cache.sql") + .withInitScript("sql/current/00-base.sql") .withNetworkAliases("mariadb"); HikariDataSource dataSource; diff --git a/code/services-core/search-service/build.gradle b/code/services-core/search-service/build.gradle index 60717341..04ab77cc 100644 --- a/code/services-core/search-service/build.gradle +++ b/code/services-core/search-service/build.gradle @@ -21,6 +21,7 @@ java { } } dependencies { + implementation project(':code:common:db') implementation project(':code:common:model') implementation project(':code:common:service') implementation project(':code:common:config') diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/SearchOperator.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/SearchOperator.java index 2187514f..5657eb40 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/SearchOperator.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/SearchOperator.java @@ -6,7 +6,7 @@ import io.reactivex.rxjava3.core.Observable; import io.reactivex.rxjava3.schedulers.Schedulers; import nu.marginalia.assistant.client.AssistantClient; import nu.marginalia.model.EdgeDomain; -import nu.marginalia.model.dbcommon.DbDomainQueries; +import nu.marginalia.db.DbDomainQueries; import nu.marginalia.search.model.UrlDetails; import nu.marginalia.client.Context; import nu.marginalia.search.model.DecoratedSearchResults; diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/command/commands/BrowseCommand.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/command/commands/BrowseCommand.java index aae4cd99..32398bc8 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/command/commands/BrowseCommand.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/command/commands/BrowseCommand.java @@ -7,8 +7,8 @@ import nu.marginalia.browse.DbBrowseDomainsSimilarOldAlgo; import nu.marginalia.browse.model.BrowseResult; import nu.marginalia.browse.model.BrowseResultSet; import nu.marginalia.model.EdgeDomain; -import nu.marginalia.model.dbcommon.DbDomainQueries; -import nu.marginalia.model.dbcommon.DomainBlacklist; +import nu.marginalia.db.DbDomainQueries; +import nu.marginalia.db.DomainBlacklist; import nu.marginalia.search.command.SearchCommandInterface; import nu.marginalia.search.command.SearchParameters; import nu.marginalia.search.results.BrowseResultCleaner; diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/command/commands/SearchCommand.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/command/commands/SearchCommand.java index f7255e8a..f09bfcb9 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/command/commands/SearchCommand.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/command/commands/SearchCommand.java @@ -2,7 +2,7 @@ package nu.marginalia.search.command.commands; import com.google.inject.Inject; import nu.marginalia.client.Context; -import nu.marginalia.model.dbcommon.DomainBlacklist; +import nu.marginalia.db.DomainBlacklist; import nu.marginalia.search.SearchOperator; import nu.marginalia.search.command.SearchCommandInterface; import nu.marginalia.search.command.SearchParameters; diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/command/commands/SiteListCommand.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/command/commands/SiteListCommand.java index 959a7762..0f24126b 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/command/commands/SiteListCommand.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/command/commands/SiteListCommand.java @@ -1,7 +1,7 @@ package nu.marginalia.search.command.commands; import com.google.inject.Inject; -import nu.marginalia.model.dbcommon.DbDomainQueries; +import nu.marginalia.db.DbDomainQueries; import nu.marginalia.search.model.UrlDetails; import nu.marginalia.search.command.SearchCommandInterface; import nu.marginalia.search.command.SearchParameters; diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/siteinfo/DomainInformationService.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/siteinfo/DomainInformationService.java index e52f2a48..35ce81b7 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/siteinfo/DomainInformationService.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/siteinfo/DomainInformationService.java @@ -4,7 +4,7 @@ import com.zaxxer.hikari.HikariDataSource; import lombok.SneakyThrows; import nu.marginalia.model.EdgeDomain; import nu.marginalia.model.crawl.DomainIndexingState; -import nu.marginalia.model.dbcommon.DbDomainQueries; +import nu.marginalia.db.DbDomainQueries; import nu.marginalia.model.id.EdgeId; import nu.marginalia.search.model.DomainInformation; import org.slf4j.Logger; diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchAddToCrawlQueueService.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchAddToCrawlQueueService.java index 8e4faff5..ba104340 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchAddToCrawlQueueService.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchAddToCrawlQueueService.java @@ -3,7 +3,7 @@ package nu.marginalia.search.svc; import com.google.inject.Inject; import com.zaxxer.hikari.HikariDataSource; import nu.marginalia.WebsiteUrl; -import nu.marginalia.model.dbcommon.DbDomainQueries; +import nu.marginalia.db.DbDomainQueries; import nu.marginalia.model.id.EdgeId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchFrontPageService.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchFrontPageService.java index c0fdde4e..8d409530 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchFrontPageService.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchFrontPageService.java @@ -52,13 +52,17 @@ public class SearchFrontPageService { try (var conn = dataSource.getConnection(); var stmt = conn.prepareStatement(""" - SELECT TITLE, LINK, LIST_DATE FROM SEARCH_NEWS_FEED + SELECT TITLE, LINK, SOURCE, LIST_DATE FROM SEARCH_NEWS_FEED ORDER BY LIST_DATE DESC """)) { var rep = stmt.executeQuery(); while (rep.next()) { - items.add(new NewsItem(rep.getString(1), rep.getString(2), rep.getDate(3).toLocalDate())); + items.add(new NewsItem( + rep.getString(1), + rep.getString(2), + rep.getString(3), + rep.getDate(4).toLocalDate())); } } catch (SQLException ex) { @@ -69,5 +73,5 @@ public class SearchFrontPageService { } private record IndexModel(List news, int searchPerMinute) { } - private record NewsItem(String title, String url, LocalDate date) {} + private record NewsItem(String title, String url, String source, LocalDate date) {} } diff --git a/code/services-core/search-service/src/main/resources/templates/search/index/index-news.hdb b/code/services-core/search-service/src/main/resources/templates/search/index/index-news.hdb index 53bbf81b..2b1ecbcd 100644 --- a/code/services-core/search-service/src/main/resources/templates/search/index/index-news.hdb +++ b/code/services-core/search-service/src/main/resources/templates/search/index/index-news.hdb @@ -6,7 +6,7 @@
{{#each news}}
{{title}}
-
{{date}}
+
{{source}} {{date}}
{{/each}}
diff --git a/code/services-satellite/dating-service/build.gradle b/code/services-satellite/dating-service/build.gradle index 85bdea4d..c2a39c0b 100644 --- a/code/services-satellite/dating-service/build.gradle +++ b/code/services-satellite/dating-service/build.gradle @@ -21,6 +21,7 @@ java { } } dependencies { + implementation project(':code:common:db') implementation project(':code:common:model') implementation project(':code:common:service') implementation project(':code:common:service-discovery') diff --git a/code/services-satellite/dating-service/src/main/java/nu/marginalia/dating/DatingService.java b/code/services-satellite/dating-service/src/main/java/nu/marginalia/dating/DatingService.java index 3f9ca32a..d39f5a0c 100644 --- a/code/services-satellite/dating-service/src/main/java/nu/marginalia/dating/DatingService.java +++ b/code/services-satellite/dating-service/src/main/java/nu/marginalia/dating/DatingService.java @@ -6,7 +6,7 @@ import lombok.SneakyThrows; import nu.marginalia.browse.DbBrowseDomainsRandom; import nu.marginalia.browse.DbBrowseDomainsSimilarCosine; import nu.marginalia.browse.model.BrowseResult; -import nu.marginalia.model.dbcommon.DomainBlacklist; +import nu.marginalia.db.DomainBlacklist; import nu.marginalia.renderer.MustacheRenderer; import nu.marginalia.renderer.RendererFactory; import nu.marginalia.screenshot.ScreenshotService; diff --git a/code/services-satellite/dating-service/src/main/java/nu/marginalia/dating/DatingSessionObject.java b/code/services-satellite/dating-service/src/main/java/nu/marginalia/dating/DatingSessionObject.java index 695de6fb..60ec6e3e 100644 --- a/code/services-satellite/dating-service/src/main/java/nu/marginalia/dating/DatingSessionObject.java +++ b/code/services-satellite/dating-service/src/main/java/nu/marginalia/dating/DatingSessionObject.java @@ -4,7 +4,7 @@ import nu.marginalia.browse.DbBrowseDomainsRandom; import nu.marginalia.browse.DbBrowseDomainsSimilarCosine; import nu.marginalia.browse.model.BrowseResult; import nu.marginalia.model.EdgeDomain; -import nu.marginalia.model.dbcommon.DomainBlacklist; +import nu.marginalia.db.DomainBlacklist; import nu.marginalia.model.id.EdgeId; import java.util.LinkedList; diff --git a/code/tools/crawl-job-extractor/build.gradle b/code/tools/crawl-job-extractor/build.gradle index 07ba17f5..d79d7cb8 100644 --- a/code/tools/crawl-job-extractor/build.gradle +++ b/code/tools/crawl-job-extractor/build.gradle @@ -21,6 +21,7 @@ tasks.distZip.enabled = false dependencies { implementation project(':code:common:process') + implementation project(':code:common:db') implementation project(':code:common:model') implementation project(':code:common:service') implementation project(':code:process-models:crawling-model') diff --git a/code/tools/crawl-job-extractor/src/main/java/nu/marginalia/crawl/CrawlJobDomainExtractor.java b/code/tools/crawl-job-extractor/src/main/java/nu/marginalia/crawl/CrawlJobDomainExtractor.java index 82213c45..9e1ad2de 100644 --- a/code/tools/crawl-job-extractor/src/main/java/nu/marginalia/crawl/CrawlJobDomainExtractor.java +++ b/code/tools/crawl-job-extractor/src/main/java/nu/marginalia/crawl/CrawlJobDomainExtractor.java @@ -5,7 +5,7 @@ import com.google.common.hash.Hashing; import com.zaxxer.hikari.HikariDataSource; import nu.marginalia.crawling.model.spec.CrawlingSpecification; import nu.marginalia.model.EdgeDomain; -import nu.marginalia.model.dbcommon.DomainBlacklistImpl; +import nu.marginalia.db.DomainBlacklistImpl; import java.sql.ResultSet; import java.sql.SQLException; diff --git a/code/tools/crawl-job-extractor/src/main/java/nu/marginalia/crawl/CrawlJobExtractorMain.java b/code/tools/crawl-job-extractor/src/main/java/nu/marginalia/crawl/CrawlJobExtractorMain.java index b3df8c94..90bf9326 100644 --- a/code/tools/crawl-job-extractor/src/main/java/nu/marginalia/crawl/CrawlJobExtractorMain.java +++ b/code/tools/crawl-job-extractor/src/main/java/nu/marginalia/crawl/CrawlJobExtractorMain.java @@ -2,7 +2,7 @@ package nu.marginalia.crawl; import nu.marginalia.crawling.model.spec.CrawlingSpecification; import nu.marginalia.model.EdgeDomain; -import nu.marginalia.model.dbcommon.DomainBlacklistImpl; +import nu.marginalia.db.DomainBlacklistImpl; import nu.marginalia.service.module.DatabaseModule; import java.io.IOException; diff --git a/docker-compose.yml b/docker-compose.yml index 66094aa4..12dd0e79 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -80,7 +80,7 @@ services: - "127.0.0.1:3306:3306/tcp" volumes: - db:/var/lib/mysql - - "./code/common/model/src/main/resources/sql/edge-crawler-cache.sql:/docker-entrypoint-initdb.d/init.sql" + - "./code/common/db/src/main/resources/sql/current/:/docker-entrypoint-initdb.d/" networks: - wmsa nginx-gw: diff --git a/settings.gradle b/settings.gradle index 3f216b4a..1fa43e13 100644 --- a/settings.gradle +++ b/settings.gradle @@ -46,6 +46,7 @@ include 'code:api:assistant-api' include 'code:common:service-discovery' include 'code:common:service-client' +include 'code:common:db' include 'code:common:service' include 'code:common:config' include 'code:common:model'