The refactoring will continue until morale improves.

2023-03-12 10:04:48 +01:00 · 2023-03-12 10:04:48 +01:00 · 616effdb3c
commit 616effdb3c
parent 4cec89da91
118 changed files with 214 additions and 2287 deletions
--- a/code/api/assistant-api/build.gradle
+++ b/code/api/assistant-api/build.gradle
@ -11,7 +11,6 @@ java {
    }
 }
 dependencies {
-    implementation project(':third-party')
    implementation project(':code:common:model')
    implementation project(':code:common:config')
    implementation project(':code:common:service-discovery')
--- a/code/api/index-api/build.gradle
+++ b/code/api/index-api/build.gradle
@ -12,7 +12,6 @@ java {
 }

 dependencies {
-    implementation project(':third-party')
    implementation project(':code:common:model')
    implementation project(':code:common:config')
    implementation project(':code:common:service-discovery')
--- a/code/api/search-api/build.gradle
+++ b/code/api/search-api/build.gradle
@ -12,7 +12,6 @@ java {
 }

 dependencies {
-    implementation project(':third-party')
    implementation project(':code:common:model')
    implementation project(':code:common:config')
    implementation project(':code:common:service-discovery')
--- a/code/common/service-client/build.gradle
+++ b/code/common/service-client/build.gradle
@ -12,7 +12,6 @@ java {
 }

 dependencies {
-    implementation project(':third-party')
    implementation project(':code:common:service-discovery')


--- a/code/crawl/common/build.gradle
+++ b/code/crawl/common/build.gradle
@ -12,7 +12,6 @@ java {
 }

 dependencies {
-    implementation project(':third-party')
    implementation project(':code:common:model')
    implementation project(':code:common:config')
    implementation project(':code:libraries:guarded-regex')
--- a/code/crawl/converting-model/build.gradle
+++ b/code/crawl/converting-model/build.gradle
@ -11,7 +11,6 @@ java {
    }
 }
 dependencies {
-    implementation project(':third-party')
    implementation project(':code:common:model')
    implementation project(':code:api:index-api')
    implementation project(':code:common:service-discovery')
--- a/code/crawl/converting-process/build.gradle
+++ b/code/crawl/converting-process/build.gradle
@ -19,7 +19,7 @@ application {
 tasks.distZip.enabled = false

 dependencies {
-    implementation project(':third-party')
+    implementation project(':third-party:porterstemmer')
    implementation project(':code:api:index-api')

    implementation project(':code:common:model')
--- a/code/crawl/crawling-model/build.gradle
+++ b/code/crawl/crawling-model/build.gradle
@ -13,7 +13,6 @@ java {
 }

 dependencies {
-    implementation project(':third-party')
    implementation project(':code:common:model')
    implementation project(':code:libraries:big-string')
    implementation project(':code:api:index-api')
--- a/code/crawl/crawling-model/src/main/java/nu/marginalia/crawling/io/CrawledDomainReader.java
+++ b/code/crawl/crawling-model/src/main/java/nu/marginalia/crawling/io/CrawledDomainReader.java
@ -2,13 +2,13 @@ package nu.marginalia.crawling.io;

 import com.github.luben.zstd.ZstdInputStream;
 import com.google.gson.Gson;
-import jdkoverride.LargeLineBufferedReader;
 import nu.marginalia.crawling.model.CrawledDocument;
 import nu.marginalia.crawling.model.CrawledDomain;
 import nu.marginalia.model.gson.GsonFactory;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

+import java.io.BufferedReader;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
@ -29,7 +29,7 @@ public class CrawledDomainReader {
    public CrawledDomain read(Path path) throws IOException {
        DomainDataAssembler domainData = new DomainDataAssembler();

-        try (var br = new LargeLineBufferedReader(new InputStreamReader(new ZstdInputStream(new FileInputStream(path.toFile()))))) {
+        try (var br = new BufferedReader(new InputStreamReader(new ZstdInputStream(new FileInputStream(path.toFile()))))) {
            String line;
            while ((line = br.readLine()) != null) {
                if (line.startsWith("//")) {
--- a/code/crawl/crawling-process/build.gradle
+++ b/code/crawl/crawling-process/build.gradle
@ -19,7 +19,6 @@ application {
 tasks.distZip.enabled = false

 dependencies {
-    implementation project(':third-party')
    implementation project(':code:common:model')
    implementation project(':code:common:config')
    implementation project(':code:common:service')
--- a/code/crawl/experimental/build.gradle
+++ b/code/crawl/experimental/build.gradle
@ -12,7 +12,6 @@ java {
 }

 dependencies {
-    implementation project(':third-party')
    implementation project(':code:common:model')
    implementation project(':code:common:config')
    implementation project(':code:common:service')
--- a/code/crawl/loading-process/build.gradle
+++ b/code/crawl/loading-process/build.gradle
@ -18,7 +18,6 @@ application {
 tasks.distZip.enabled = false

 dependencies {
-    implementation project(':third-party')
    implementation project(':code:api:index-api')
    implementation project(':code:common:model')
    implementation project(':code:common:config')
--- a/code/features/random-websites/build.gradle
+++ b/code/features/random-websites/build.gradle
@ -15,7 +15,6 @@ java {
 }

 dependencies {
-    implementation project(':third-party')
    implementation project(':code:common:model')
    implementation project(':code:common:service')

--- a/code/features/screenshots/build.gradle
+++ b/code/features/screenshots/build.gradle
@ -11,7 +11,6 @@ java {
 }

 dependencies {
-    implementation project(':third-party')
    implementation project(':code:common:model')
    implementation project(':code:common:service')

--- a/code/features/topic-detection/build.gradle
+++ b/code/features/topic-detection/build.gradle
@ -16,7 +16,7 @@ java {
 dependencies {
    implementation project(':code:common:config')
    implementation project(':code:libraries:language-processing')
-    implementation project(':third-party')
+    implementation project(':third-party:porterstemmer')

    implementation libs.lombok
    annotationProcessor libs.lombok
--- a/code/index/index-forward/build.gradle
+++ b/code/index/index-forward/build.gradle
@ -18,7 +18,8 @@ dependencies {
    implementation project(':code:index:index-journal')
    implementation project(':code:index:lexicon')
    implementation project(':code:common:model')
-    implementation project(':third-party')
+
+    implementation project(':third-party:uppend')

    implementation libs.lombok
    annotationProcessor libs.lombok
--- a/code/index/index-journal/build.gradle
+++ b/code/index/index-journal/build.gradle
@ -14,7 +14,6 @@ dependencies {
    implementation project(':code:libraries:array')
    implementation project(':code:common:model')
    implementation project(':code:index:lexicon')
-    implementation project(':third-party')

    implementation libs.lombok
    annotationProcessor libs.lombok
--- a/code/libraries/array/build.gradle
+++ b/code/libraries/array/build.gradle
@ -9,7 +9,7 @@ java {
 }

 dependencies {
-    implementation project(':third-party')
+    implementation project(':third-party:uppend')

    implementation libs.lombok
    annotationProcessor libs.lombok
--- a/code/libraries/btree/build.gradle
+++ b/code/libraries/btree/build.gradle
@ -9,7 +9,6 @@ java {
 }

 dependencies {
-    implementation project(':third-party')
    implementation project(':code:libraries:array')
    implementation project(':code:libraries:next-prime')

--- a/code/libraries/language-processing/build.gradle
+++ b/code/libraries/language-processing/build.gradle
@ -15,7 +15,9 @@ java {
 }

 dependencies {
-    implementation project(':third-party')
+    implementation project(':third-party:rdrpostagger')
+    implementation project(':third-party:porterstemmer')
+    implementation project(':third-party:monkey-patch-opennlp')
    implementation project(':code:common:model')
    implementation project(':code:common:config')
    implementation project(':code:libraries:easy-lsh')
--- a/code/services-core/assistant-service/build.gradle
+++ b/code/services-core/assistant-service/build.gradle
@ -22,7 +22,7 @@ java {
 }

 dependencies {
-    implementation project(':third-party')
+    implementation project(':third-party:symspell')
    implementation project(':code:api:assistant-api')
    implementation project(':code:common:config')
    implementation project(':code:common:service')
--- a/code/services-core/assistant-service/src/test/java/nu/marginalia/assistant/dict/WikiCleanerTest.java
+++ b/code/services-core/assistant-service/src/test/java/nu/marginalia/assistant/dict/WikiCleanerTest.java
@ -1,45 +0,0 @@
-package nu.marginalia.assistant.dict;
-
-import org.junit.jupiter.api.Disabled;
-import org.junit.jupiter.api.Test;
-import org.openzim.ZIMTypes.ZIMFile;
-import org.openzim.ZIMTypes.ZIMReader;
-
-import java.io.IOException;
-
-class WikiCleanerTest {
-
-    @Test
-    void cleanWikiJunk() throws IOException {
-//        String str = new WikiCleaner().cleanWikiJunk("https://en.wikipedia.org/wiki/Scamander", new String(Files.readAllBytes(Path.of("/home/vlofgren/Work/wiki-cleaner/Scamander.wiki.html"))));
-//        String str2 = new WikiCleaner().cleanWikiJunk("https://en.wikipedia.org/wiki/Plato", new String(Files.readAllBytes(Path.of("/home/vlofgren/Work/wiki-cleaner/Plato.wiki.html"))));
-//        String str3 = new WikiCleaner().cleanWikiJunk("https://en.wikipedia.org/wiki/C++", new String(Files.readAllBytes(Path.of("/home/vlofgren/Work/wiki-cleaner/Cpp.wiki.html"))));
-//        String str4 = new WikiCleaner().cleanWikiJunk("https://en.wikipedia.org/wiki/Memex", new String(Files.readAllBytes(Path.of("/home/vlofgren/Work/wiki-cleaner/Memex.wiki.html"))));
-//        Files.writeString(Path.of("/home/vlofgren/Work/wiki-cleaner/Scamander.out.html"), str);
-//        Files.writeString(Path.of("/home/vlofgren/Work/wiki-cleaner/Plato.out.html"), str2);
-//        Files.writeString(Path.of("/home/vlofgren/Work/wiki-cleaner/Cpp.out.html"), str3);
-//        Files.writeString(Path.of("/home/vlofgren/Work/wiki-cleaner/Memex.out.html"), str4);
-    }
-
-    @Test @Disabled
-    public void readZim() throws IOException {
-        var zr = new ZIMReader(new ZIMFile("/home/vlofgren/Work/wikipedia_en_all_nopic_2021-01.zim"));
-//        try (var pw = new PrintWriter(new File("/home/vlofgren/Work/article-clusters.tsv"))) {
-//            zr.enumerateArticles(pw);
-//        }
-        zr.forEachArticles((url, art) -> {
-            if (art != null) {
-                System.out.println(url);
-            }
-//            if (art != null && art.length() > 5) {
-//                System.out.println(url + " -> " + art.substring(0, 5));
-//            }
-        }, (p) -> true);
-
-        /*try (var baos = zr.getArticleData("Giraffe", 'A')) {
-            String str  = baos.toString();
-            Files.writeString(Path.of("/home/vlofgren/Work/wiki-cleaner/Giraffe.wiki.html"), str);
-            Files.writeString(Path.of("/home/vlofgren/Work/wiki-cleaner/Giraffe.out.html"), new WikiCleaner().cleanWikiJunk("https://en.wikipedia.org/wiki/Giraffe", str));
-        }*/
-    }
-}
--- a/code/services-core/index-service/build.gradle
+++ b/code/services-core/index-service/build.gradle
@ -21,7 +21,6 @@ java {
    }
 }
 dependencies {
-    implementation project(':third-party')
    implementation project(':code:common:config')
    implementation project(':code:common:model')
    implementation project(':code:common:service')
--- a/code/services-core/search-service/build.gradle
+++ b/code/services-core/search-service/build.gradle
@ -21,7 +21,6 @@ java {
    }
 }
 dependencies {
-    implementation project(':third-party')
    implementation project(':code:common:model')
    implementation project(':code:common:service')
    implementation project(':code:common:config')
--- a/code/services-satellite/api-service/build.gradle
+++ b/code/services-satellite/api-service/build.gradle
@ -22,7 +22,6 @@ tasks.distZip.enabled = false
 apply from: "$rootProject.projectDir/docker-service.gradle"

 dependencies {
-    implementation project(':third-party')
    implementation project(':code:common:model')
    implementation project(':code:common:service')
    implementation project(':code:common:config')
--- a/code/services-satellite/dating-service/build.gradle
+++ b/code/services-satellite/dating-service/build.gradle
@ -21,7 +21,6 @@ java {
    }
 }
 dependencies {
-    implementation project(':third-party')
    implementation project(':code:common:model')
    implementation project(':code:common:service')
    implementation project(':code:common:service-discovery')
--- a/code/services-satellite/explorer-service/build.gradle
+++ b/code/services-satellite/explorer-service/build.gradle
@ -21,7 +21,6 @@ java {
    }
 }
 dependencies {
-    implementation project(':third-party')
    implementation project(':code:common:model')
    implementation project(':code:common:service')
    implementation project(':code:common:service-discovery')
--- a/docker-service.gradle
+++ b/docker-service.gradle
@ -4,9 +4,11 @@ ext {
    serviceToolOpts='-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5000'
 }

-docker {
-    var df = new File(buildDir, "Dockerfile")
+tasks.register('dockerFile') {
+    buildDir.mkdir()

+    var df = new File(buildDir, "Dockerfile")
+    doLast {
        df.text = """#
 # I'm auto-generated, please don't make changes to me or commit me to git
 #
@ -22,11 +24,23 @@ ENV JAVA_OPTS="${serviceJvmOpts} "

 ENTRYPOINT WMSA_HOME=/wmsa /${application.applicationName}/bin/${application.applicationName} \${arg0} \${arg1}
 """
+    }
+    it.outputs.file(df)
+}

-    dockerfile = new File(buildDir, "Dockerfile")
+dockerPrepare {
+    dependsOn tasks.dockerFile
+}
+
+dockerfileZip {
+    dependsOn tasks.dockerFile
+}
+
+
+docker {
+    dockerfile = tasks.dockerFile.outputs.files.singleFile
    name = 'marginalia.nu/'+application.applicationName+':latest'
    files tasks.distTar.outputs
    tags 'latest'
-
    dependsOn tasks.distTar
 }
--- a/other/memex/build.gradle
+++ b/other/memex/build.gradle
@ -59,7 +59,6 @@ jmhJar {
    zip64 true
 }
 dependencies {
-    implementation project(':third-party')
    implementation project(':code:common:service')
    implementation project(':code:common:config')
    implementation project(':code:common:service-discovery')
--- a/other/wmsa_old/build.gradle
+++ b/other/wmsa_old/build.gradle
@ -30,7 +30,6 @@ java {
    }
 }
 dependencies {
-    implementation project(':third-party')
    implementation project(':code:common:service')
    implementation project(':code:common:service-discovery')
    implementation project(':code:common:service-client')
--- a/settings.gradle
+++ b/settings.gradle
@ -52,7 +52,14 @@ include 'code:crawl:loading-process'
 include 'code:crawl:common'
 include 'code:crawl:experimental'

-include 'third-party'
+include 'third-party:porterstemmer'
+include 'third-party:xz'
+include 'third-party:symspell'
+include 'third-party:rdrpostagger'
+include 'third-party:uppend'
+include 'third-party:openzim'
+include 'third-party:monkey-patch-opennlp'
+
 include 'other:memex'
 include 'other:wmsa_old'

--- a/third-party/README.md
+++ b/third-party/README.md
@ -6,14 +6,11 @@ or lack an artifact, or to override some default that is inappropriate for the t
 ## Sources and Licenses

 ### Modified
-* [RDRPosTagger](https://github.com/datquocnguyen/RDRPOSTagger) - GPL3
-* [PorterStemmer](https://github.com/caarmen/porter-stemmer) - LGPL3
-* [Uppend](https://github.com/upserve/uppend) - MIT
-* [OpenZIM](https://github.com/openzim/libzim) - GPL-2.0
-* [XZ for Java](https://tukaani.org/xz/) - Public Domain
-* [SymSpell](https://github.com/wolfgarbe/symspell) - LGPL-3.0
+* [RDRPosTagger](rdrpostagger/) - GPL3
+* [PorterStemmer](porterstemmer/) - LGPL3
+* [Uppend](uppend/) - MIT
+* [OpenZIM](openzim/) - GPL-2.0
+* [SymSpell](symspell/) - LGPL-3.0

 ### Monkey Patched
-* [GSON](https://github.com/google/gson) - Apache-2.0
-* OpenJDK - GPL-2.0 (packaged under jdkoverride)
-* Stanford OpenNLP - Apache-2.0
+* [Stanford OpenNLP](monkey-patch-opennlp/) - Apache-2.0
--- a/third-party/monkey-patch-opennlp/build.gradle
+++ b/third-party/monkey-patch-opennlp/build.gradle
@ -27,5 +27,5 @@ dependencies {
 }

 test {
-    useJUnitPlatform()
+    useJUnitPlatform()\
 }
--- a/third-party/monkey-patch-opennlp/readme.md
+++ b/third-party/monkey-patch-opennlp/readme.md
@ -0,0 +1,11 @@
+# Monkey Patched OpenNLP
+
+Stanford OpenNLP - Apache-2.0
+
+## Rationale
+
+OpenNLP's sentence detector uses a slow StringBuffer instead of a StringBuilder where it makes no
+no sense to do so. This makes it much slower than it needs to be. I've found no way to file issues with the 
+project to get it fixed. Instead we're doing this monkey patch where the class is overridden with something 
+better.
+
--- a/third-party/monkey-patch-opennlp/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java
+++ b/third-party/monkey-patch-opennlp/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java
--- a/third-party/monkey-patch-opennlp/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
+++ b/third-party/monkey-patch-opennlp/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
--- a/third-party/openzim/build.gradle
+++ b/third-party/openzim/build.gradle
@ -0,0 +1,24 @@
+plugins {
+    id 'java'
+}
+
+java {
+    toolchain {
+        languageVersion.set(JavaLanguageVersion.of(17))
+    }
+}
+
+dependencies {
+    implementation libs.bundles.nlp
+    implementation libs.zstd
+    implementation libs.commons.compress
+    implementation libs.ffi
+    implementation libs.databind
+    implementation libs.bundles.gson
+
+    implementation project(':third-party:xz')
+}
+
+test {
+    useJUnitPlatform()
+}
--- a/third-party/openzim/readme.md
+++ b/third-party/openzim/readme.md
@ -0,0 +1,11 @@
+# OpenZIM
+
+[OpenZIM](https://github.com/openzim/libzim) - GPL-2.0
+
+OpenZIM is a ZIM file reader. This code has been modified in a fairly crude manner 
+to be much faster than the original code base which seems quite antique. It also 
+supports XZ compression.
+
+**Important Note** the license is incompatible with AGPL 3, so we can't link Marginalia 
+directly to this. It's still very useful for building tools that deal with 
+wikipedia data which would be stand-alone.  
--- a/third-party/openzim/src/main/java/org/openzim/ZIMTypes/ArticleEntry.java
+++ b/third-party/openzim/src/main/java/org/openzim/ZIMTypes/ArticleEntry.java
--- a/third-party/openzim/src/main/java/org/openzim/ZIMTypes/DirectoryEntry.java
+++ b/third-party/openzim/src/main/java/org/openzim/ZIMTypes/DirectoryEntry.java
--- a/third-party/openzim/src/main/java/org/openzim/ZIMTypes/RedirectEntry.java
+++ b/third-party/openzim/src/main/java/org/openzim/ZIMTypes/RedirectEntry.java
--- a/third-party/openzim/src/main/java/org/openzim/ZIMTypes/ZIMFile.java
+++ b/third-party/openzim/src/main/java/org/openzim/ZIMTypes/ZIMFile.java
--- a/third-party/openzim/src/main/java/org/openzim/ZIMTypes/ZIMReader.java
+++ b/third-party/openzim/src/main/java/org/openzim/ZIMTypes/ZIMReader.java
--- a/third-party/openzim/src/main/java/org/openzim/util/RandomAcessFileZIMInputStream.java
+++ b/third-party/openzim/src/main/java/org/openzim/util/RandomAcessFileZIMInputStream.java
--- a/third-party/openzim/src/main/java/org/openzim/util/Utilities.java
+++ b/third-party/openzim/src/main/java/org/openzim/util/Utilities.java
--- a/third-party/porterstemmer/build.gradle
+++ b/third-party/porterstemmer/build.gradle
@ -0,0 +1,16 @@
+plugins {
+    id 'java'
+}
+
+java {
+    toolchain {
+        languageVersion.set(JavaLanguageVersion.of(17))
+    }
+}
+
+dependencies {
+}
+
+test {
+    useJUnitPlatform()
+}
--- a/third-party/porterstemmer/readme.md
+++ b/third-party/porterstemmer/readme.md
@ -0,0 +1,6 @@
+# Porterstemmer
+
+[PorterStemmer](https://github.com/caarmen/porter-stemmer) - LGPL3
+
+It's a [porter stemmer](https://tartarus.org/martin/PorterStemmer/) library, although one comes with OpenNLP
+too. TBD which one to use, they're fairly equivalent. 
--- a/third-party/porterstemmer/src/main/java/ca/rmen/porterstemmer/PorterStemmer.java
+++ b/third-party/porterstemmer/src/main/java/ca/rmen/porterstemmer/PorterStemmer.java
--- a/third-party/rdrpostagger/build.gradle
+++ b/third-party/rdrpostagger/build.gradle
@ -0,0 +1,16 @@
+plugins {
+    id 'java'
+}
+
+java {
+    toolchain {
+        languageVersion.set(JavaLanguageVersion.of(17))
+    }
+}
+
+dependencies {
+}
+
+test {
+    useJUnitPlatform()
+}
--- a/third-party/rdrpostagger/readme.md
+++ b/third-party/rdrpostagger/readme.md
@ -0,0 +1,12 @@
+# RDRPosTagger
+
+[RDRPosTagger](https://github.com/datquocnguyen/RDRPOSTagger) - GPL3
+
+datquocnguyen's excellent fast POS tagger. It's been crudely modified to be faster. 
+Unlike the original, it only does English.
+
+## Citations
+
+- Dat Quoc Nguyen, Dai Quoc Nguyen, Dang Duc Pham and Son Bao Pham. [RDRPOSTagger: A Ripple Down Rules-based Part-Of-Speech Tagger](http://www.aclweb.org/anthology/E14-2005). In *Proceedings of the Demonstrations at the 14th Conference of the European Chapter of the Association for Computational Linguistics*, EACL 2014, pp. 17-20, 2014. [[.PDF]](http://www.aclweb.org/anthology/E14-2005) [[.bib]](http://www.aclweb.org/anthology/E14-2005.bib)
+
+- Dat Quoc Nguyen, Dai Quoc Nguyen, Dang Duc Pham and Son Bao Pham. [A Robust Transformation-Based Learning Approach Using Ripple Down Rules for Part-Of-Speech Tagging](http://content.iospress.com/articles/ai-communications/aic698). *AI Communications* (AICom), vol. 29, no. 3, pp. 409-422, 2016. [[.PDF]](http://arxiv.org/pdf/1412.4021.pdf) [[.bib]](http://rdrpostagger.sourceforge.net/AICom.bib)
--- a/third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/FWObject.java
+++ b/third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/FWObject.java
--- a/third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/InitialTagger.java
+++ b/third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/InitialTagger.java
--- a/third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/Node.java
+++ b/third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/Node.java
--- a/third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/RDRPOSTagger.java
+++ b/third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/RDRPOSTagger.java
--- a/third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/Utils.java
+++ b/third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/Utils.java
--- a/third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/WordTag.java
+++ b/third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/WordTag.java
--- a/third-party/src/main/java/com/google/gson/stream/JsonReader.java
+++ b/third-party/src/main/java/com/google/gson/stream/JsonReader.java
--- a/third-party/src/main/java/jdkoverride/LargeLineBufferedReader.java
+++ b/third-party/src/main/java/jdkoverride/LargeLineBufferedReader.java
@ -1,559 +0,0 @@
-package jdkoverride;/*
- * Copyright (c) 1996, 2021, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.  Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-import java.io.IOException;
-import java.io.Reader;
-import java.io.UncheckedIOException;
-import java.util.Iterator;
-import java.util.NoSuchElementException;
-import java.util.Objects;
-import java.util.Spliterator;
-import java.util.Spliterators;
-import java.util.stream.Stream;
-import java.util.stream.StreamSupport;
-
-/** This is java.io.BufferedReader with a different value for defaultExpectedLineLength
- */
-
-public class LargeLineBufferedReader extends Reader {
-
-    private Reader in;
-
-    private char[] cb;
-    private int nChars, nextChar;
-
-    private static final int INVALIDATED = -2;
-    private static final int UNMARKED = -1;
-    private int markedChar = UNMARKED;
-    private int readAheadLimit = 0; /* Valid only when markedChar > 0 */
-
-    /** If the next character is a line feed, skip it */
-    private boolean skipLF = false;
-
-    /** The skipLF flag when the mark was set */
-    private boolean markedSkipLF = false;
-
-    private static int defaultCharBufferSize = 8192;
-    private static int defaultExpectedLineLength = 65536;
-
-    /**
-     * Creates a buffering character-input stream that uses an input buffer of
-     * the specified size.
-     *
-     * @param  in   A Reader
-     * @param  sz   Input-buffer size
-     *
-     * @throws IllegalArgumentException  If {@code sz <= 0}
-     */
-    public LargeLineBufferedReader(Reader in, int sz) {
-        super(in);
-        if (sz <= 0)
-            throw new IllegalArgumentException("Buffer size <= 0");
-        this.in = in;
-        cb = new char[sz];
-        nextChar = nChars = 0;
-    }
-
-    /**
-     * Creates a buffering character-input stream that uses a default-sized
-     * input buffer.
-     *
-     * @param  in   A Reader
-     */
-    public LargeLineBufferedReader(Reader in) {
-        this(in, defaultCharBufferSize);
-    }
-
-    /** Checks to make sure that the stream has not been closed */
-    private void ensureOpen() throws IOException {
-        if (in == null)
-            throw new IOException("Stream closed");
-    }
-
-    /**
-     * Fills the input buffer, taking the mark into account if it is valid.
-     */
-    private void fill() throws IOException {
-        int dst;
-        if (markedChar <= UNMARKED) {
-            /* No mark */
-            dst = 0;
-        } else {
-            /* Marked */
-            int delta = nextChar - markedChar;
-            if (delta >= readAheadLimit) {
-                /* Gone past read-ahead limit: Invalidate mark */
-                markedChar = INVALIDATED;
-                readAheadLimit = 0;
-                dst = 0;
-            } else {
-                if (readAheadLimit <= cb.length) {
-                    /* Shuffle in the current buffer */
-                    System.arraycopy(cb, markedChar, cb, 0, delta);
-                    markedChar = 0;
-                    dst = delta;
-                } else {
-                    /* Reallocate buffer to accommodate read-ahead limit */
-                    char[] ncb = new char[readAheadLimit];
-                    System.arraycopy(cb, markedChar, ncb, 0, delta);
-                    cb = ncb;
-                    markedChar = 0;
-                    dst = delta;
-                }
-                nextChar = nChars = delta;
-            }
-        }
-
-        int n;
-        do {
-            n = in.read(cb, dst, cb.length - dst);
-        } while (n == 0);
-        if (n > 0) {
-            nChars = dst + n;
-            nextChar = dst;
-        }
-    }
-
-    /**
-     * Reads a single character.
-     *
-     * @return The character read, as an integer in the range
-     *         0 to 65535 ({@code 0x00-0xffff}), or -1 if the
-     *         end of the stream has been reached
-     * @throws     IOException  If an I/O error occurs
-     */
-    public int read() throws IOException {
-        synchronized (lock) {
-            ensureOpen();
-            for (;;) {
-                if (nextChar >= nChars) {
-                    fill();
-                    if (nextChar >= nChars)
-                        return -1;
-                }
-                if (skipLF) {
-                    skipLF = false;
-                    if (cb[nextChar] == '\n') {
-                        nextChar++;
-                        continue;
-                    }
-                }
-                return cb[nextChar++];
-            }
-        }
-    }
-
-    /**
-     * Reads characters into a portion of an array, reading from the underlying
-     * stream if necessary.
-     */
-    private int read1(char[] cbuf, int off, int len) throws IOException {
-        if (nextChar >= nChars) {
-            /* If the requested length is at least as large as the buffer, and
-               if there is no mark/reset activity, and if line feeds are not
-               being skipped, do not bother to copy the characters into the
-               local buffer.  In this way buffered streams will cascade
-               harmlessly. */
-            if (len >= cb.length && markedChar <= UNMARKED && !skipLF) {
-                return in.read(cbuf, off, len);
-            }
-            fill();
-        }
-        if (nextChar >= nChars) return -1;
-        if (skipLF) {
-            skipLF = false;
-            if (cb[nextChar] == '\n') {
-                nextChar++;
-                if (nextChar >= nChars)
-                    fill();
-                if (nextChar >= nChars)
-                    return -1;
-            }
-        }
-        int n = Math.min(len, nChars - nextChar);
-        System.arraycopy(cb, nextChar, cbuf, off, n);
-        nextChar += n;
-        return n;
-    }
-
-    /**
-     * Reads characters into a portion of an array.
-     *
-     * <p> This method implements the general contract of the corresponding
-     * {@link Reader#read(char[], int, int) read} method of the
-     * {@link Reader} class.  As an additional convenience, it
-     * attempts to read as many characters as possible by repeatedly invoking
-     * the {@code read} method of the underlying stream.  This iterated
-     * {@code read} continues until one of the following conditions becomes
-     * true:
-     * <ul>
-     *
-     *   <li> The specified number of characters have been read,
-     *
-     *   <li> The {@code read} method of the underlying stream returns
-     *   {@code -1}, indicating end-of-file, or
-     *
-     *   <li> The {@code ready} method of the underlying stream
-     *   returns {@code false}, indicating that further input requests
-     *   would block.
-     *
-     * </ul>
-     * If the first {@code read} on the underlying stream returns
-     * {@code -1} to indicate end-of-file then this method returns
-     * {@code -1}.  Otherwise this method returns the number of characters
-     * actually read.
-     *
-     * <p> Subclasses of this class are encouraged, but not required, to
-     * attempt to read as many characters as possible in the same fashion.
-     *
-     * <p> Ordinarily this method takes characters from this stream's character
-     * buffer, filling it from the underlying stream as necessary.  If,
-     * however, the buffer is empty, the mark is not valid, and the requested
-     * length is at least as large as the buffer, then this method will read
-     * characters directly from the underlying stream into the given array.
-     * Thus redundant {@code BufferedReader}s will not copy data
-     * unnecessarily.
-     *
-     * @param      cbuf  {@inheritDoc}
-     * @param      off   {@inheritDoc}
-     * @param      len   {@inheritDoc}
-     *
-     * @return     {@inheritDoc}
-     *
-     * @throws     IndexOutOfBoundsException {@inheritDoc}
-     * @throws     IOException  {@inheritDoc}
-     */
-    public int read(char[] cbuf, int off, int len) throws IOException {
-        synchronized (lock) {
-            ensureOpen();
-            Objects.checkFromIndexSize(off, len, cbuf.length);
-            if (len == 0) {
-                return 0;
-            }
-
-            int n = read1(cbuf, off, len);
-            if (n <= 0) return n;
-            while ((n < len) && in.ready()) {
-                int n1 = read1(cbuf, off + n, len - n);
-                if (n1 <= 0) break;
-                n += n1;
-            }
-            return n;
-        }
-    }
-
-    /**
-     * Reads a line of text.  A line is considered to be terminated by any one
-     * of a line feed ('\n'), a carriage return ('\r'), a carriage return
-     * followed immediately by a line feed, or by reaching the end-of-file
-     * (EOF).
-     *
-     * @param      ignoreLF  If true, the next '\n' will be skipped
-     * @param      term      Output: Whether a line terminator was encountered
-     *                       while reading the line; may be {@code null}.
-     *
-     * @return     A String containing the contents of the line, not including
-     *             any line-termination characters, or null if the end of the
-     *             stream has been reached without reading any characters
-     *
-     * @see        java.io.LineNumberReader#readLine()
-     *
-     * @throws     IOException  If an I/O error occurs
-     */
-
-    StringBuilder s = new StringBuilder(10000);
-    String readLine(boolean ignoreLF, boolean[] term) throws IOException {
-
-        int startChar;
-
-        synchronized (lock) {
-
-            s.setLength(0);
-
-            ensureOpen();
-            boolean omitLF = ignoreLF || skipLF;
-            if (term != null) term[0] = false;
-
-            for (;;) {
-
-                if (nextChar >= nChars)
-                    fill();
-                if (nextChar >= nChars) { /* EOF */
-                    if (s != null && s.length() > 0)
-                        return s.toString();
-                    else
-                        return null;
-                }
-                boolean eol = false;
-                char c = 0;
-                int i;
-
-                /* Skip a leftover '\n', if necessary */
-                if (omitLF && (cb[nextChar] == '\n'))
-                    nextChar++;
-                skipLF = false;
-                omitLF = false;
-
-                for (i = nextChar; i < nChars; i++) {
-                    c = cb[i];
-                    if ((c == '\n') || (c == '\r')) {
-                        if (term != null) term[0] = true;
-                        eol = true;
-                        break;
-                    }
-                }
-
-                startChar = nextChar;
-                nextChar = i;
-
-                if (eol) {
-                    s.append(cb, startChar, i - startChar);
-                    String str = s.toString();
-
-                    nextChar++;
-                    if (c == '\r') {
-                        skipLF = true;
-                    }
-                    return str;
-                }
-
-                s.append(cb, startChar, i - startChar);
-            }
-        }
-    }
-
-    /**
-     * Reads a line of text.  A line is considered to be terminated by any one
-     * of a line feed ('\n'), a carriage return ('\r'), a carriage return
-     * followed immediately by a line feed, or by reaching the end-of-file
-     * (EOF).
-     *
-     * @return     A String containing the contents of the line, not including
-     *             any line-termination characters, or null if the end of the
-     *             stream has been reached without reading any characters
-     *
-     * @throws     IOException  If an I/O error occurs
-     *
-     * @see java.nio.file.Files#readAllLines
-     */
-    public String readLine() throws IOException {
-        return readLine(false, null);
-    }
-
-    /**
-     * {@inheritDoc}
-     */
-    public long skip(long n) throws IOException {
-        if (n < 0L) {
-            throw new IllegalArgumentException("skip value is negative");
-        }
-        synchronized (lock) {
-            ensureOpen();
-            long r = n;
-            while (r > 0) {
-                if (nextChar >= nChars)
-                    fill();
-                if (nextChar >= nChars) /* EOF */
-                    break;
-                if (skipLF) {
-                    skipLF = false;
-                    if (cb[nextChar] == '\n') {
-                        nextChar++;
-                    }
-                }
-                long d = nChars - nextChar;
-                if (r <= d) {
-                    nextChar += r;
-                    r = 0;
-                    break;
-                }
-                else {
-                    r -= d;
-                    nextChar = nChars;
-                }
-            }
-            return n - r;
-        }
-    }
-
-    /**
-     * Tells whether this stream is ready to be read.  A buffered character
-     * stream is ready if the buffer is not empty, or if the underlying
-     * character stream is ready.
-     *
-     * @throws     IOException  If an I/O error occurs
-     */
-    public boolean ready() throws IOException {
-        synchronized (lock) {
-            ensureOpen();
-
-            /*
-             * If newline needs to be skipped and the next char to be read
-             * is a newline character, then just skip it right away.
-             */
-            if (skipLF) {
-                /* Note that in.ready() will return true if and only if the next
-                 * read on the stream will not block.
-                 */
-                if (nextChar >= nChars && in.ready()) {
-                    fill();
-                }
-                if (nextChar < nChars) {
-                    if (cb[nextChar] == '\n')
-                        nextChar++;
-                    skipLF = false;
-                }
-            }
-            return (nextChar < nChars) || in.ready();
-        }
-    }
-
-    /**
-     * Tells whether this stream supports the mark() operation, which it does.
-     */
-    public boolean markSupported() {
-        return true;
-    }
-
-    /**
-     * Marks the present position in the stream.  Subsequent calls to reset()
-     * will attempt to reposition the stream to this point.
-     *
-     * @param readAheadLimit   Limit on the number of characters that may be
-     *                         read while still preserving the mark. An attempt
-     *                         to reset the stream after reading characters
-     *                         up to this limit or beyond may fail.
-     *                         A limit value larger than the size of the input
-     *                         buffer will cause a new buffer to be allocated
-     *                         whose size is no smaller than limit.
-     *                         Therefore large values should be used with care.
-     *
-     * @throws     IllegalArgumentException  If {@code readAheadLimit < 0}
-     * @throws     IOException  If an I/O error occurs
-     */
-    public void mark(int readAheadLimit) throws IOException {
-        if (readAheadLimit < 0) {
-            throw new IllegalArgumentException("Read-ahead limit < 0");
-        }
-        synchronized (lock) {
-            ensureOpen();
-            this.readAheadLimit = readAheadLimit;
-            markedChar = nextChar;
-            markedSkipLF = skipLF;
-        }
-    }
-
-    /**
-     * Resets the stream to the most recent mark.
-     *
-     * @throws     IOException  If the stream has never been marked,
-     *                          or if the mark has been invalidated
-     */
-    public void reset() throws IOException {
-        synchronized (lock) {
-            ensureOpen();
-            if (markedChar < 0)
-                throw new IOException((markedChar == INVALIDATED)
-                        ? "Mark invalid"
-                        : "Stream not marked");
-            nextChar = markedChar;
-            skipLF = markedSkipLF;
-        }
-    }
-
-    public void close() throws IOException {
-        synchronized (lock) {
-            if (in == null)
-                return;
-            try {
-                in.close();
-            } finally {
-                in = null;
-                cb = null;
-            }
-        }
-    }
-
-    /**
-     * Returns a {@code Stream}, the elements of which are lines read from
-     * this {@code BufferedReader}.  The {@link Stream} is lazily populated,
-     * i.e., read only occurs during the
-     * <a href="../util/stream/package-summary.html#StreamOps">terminal
-     * stream operation</a>.
-     *
-     * <p> The reader must not be operated on during the execution of the
-     * terminal stream operation. Otherwise, the result of the terminal stream
-     * operation is undefined.
-     *
-     * <p> After execution of the terminal stream operation there are no
-     * guarantees that the reader will be at a specific position from which to
-     * read the next character or line.
-     *
-     * <p> If an {@link IOException} is thrown when accessing the underlying
-     * {@code BufferedReader}, it is wrapped in an {@link
-     * UncheckedIOException} which will be thrown from the {@code Stream}
-     * method that caused the read to take place. This method will return a
-     * Stream if invoked on a BufferedReader that is closed. Any operation on
-     * that stream that requires reading from the BufferedReader after it is
-     * closed, will cause an UncheckedIOException to be thrown.
-     *
-     * @return a {@code Stream<String>} providing the lines of text
-     *         described by this {@code BufferedReader}
-     *
-     * @since 1.8
-     */
-    public Stream<String> lines() {
-        Iterator<String> iter = new Iterator<>() {
-            String nextLine = null;
-
-            @Override
-            public boolean hasNext() {
-                if (nextLine != null) {
-                    return true;
-                } else {
-                    try {
-                        nextLine = readLine();
-                        return (nextLine != null);
-                    } catch (IOException e) {
-                        throw new UncheckedIOException(e);
-                    }
-                }
-            }
-
-            @Override
-            public String next() {
-                if (nextLine != null || hasNext()) {
-                    String line = nextLine;
-                    nextLine = null;
-                    return line;
-                } else {
-                    throw new NoSuchElementException();
-                }
-            }
-        };
-        return StreamSupport.stream(Spliterators.spliteratorUnknownSize(
-                iter, Spliterator.ORDERED | Spliterator.NONNULL), false);
-    }
-}
--- a/third-party/symspell/build.gradle
+++ b/third-party/symspell/build.gradle
@ -0,0 +1,16 @@
+plugins {
+    id 'java'
+}
+
+java {
+    toolchain {
+        languageVersion.set(JavaLanguageVersion.of(17))
+    }
+}
+
+dependencies {
+}
+
+test {
+    useJUnitPlatform()
+}
--- a/third-party/symspell/readme.md
+++ b/third-party/symspell/readme.md
@ -0,0 +1,9 @@
+# SymSpell
+
+[SymSpell](https://github.com/wolfgarbe/symspell) - LGPL-3.0
+
+Fast spell checking library. Ostensibly lacks an artifact, so we're packaging it ourselves.
+
+## Further Reading
+
+Wolf Garbe, [1000x Faster Spelling Correction algorithm (2012)](https://wolfgarbe.medium.com/1000x-faster-spelling-correction-algorithm-2012-8701fcd87a5f)
--- a/third-party/symspell/src/main/java/symspell/SymSpell.java
+++ b/third-party/symspell/src/main/java/symspell/SymSpell.java
--- a/third-party/uppend/build.gradle
+++ b/third-party/uppend/build.gradle
@ -0,0 +1,17 @@
+plugins {
+    id 'java'
+}
+
+java {
+    toolchain {
+        languageVersion.set(JavaLanguageVersion.of(17))
+    }
+}
+
+dependencies {
+    implementation libs.ffi
+}
+
+test {
+    useJUnitPlatform()
+}
--- a/third-party/uppend/readme.md
+++ b/third-party/uppend/readme.md
@ -0,0 +1,6 @@
+# Uppend
+
+[Uppend](https://github.com/upserve/uppend) - MIT
+
+It's "an append-only, key-multivalue store". Cool project, but we're unceremoniously pillaging just a small piece of 
+code they did for calling [memadvise()](https://man7.org/linux/man-pages/man2/madvise.2.html) on off-heap byte buffers.
--- a/third-party/uppend/src/main/java/com/upserve/uppend/blobs/NativeIO.java
+++ b/third-party/uppend/src/main/java/com/upserve/uppend/blobs/NativeIO.java
--- a/third-party/xz/build.gradle
+++ b/third-party/xz/build.gradle
@ -0,0 +1,16 @@
+plugins {
+    id 'java'
+}
+
+java {
+    toolchain {
+        languageVersion.set(JavaLanguageVersion.of(17))
+    }
+}
+
+dependencies {
+}
+
+test {
+    useJUnitPlatform()
+}
--- a/third-party/xz/readme.md
+++ b/third-party/xz/readme.md
@ -0,0 +1,9 @@
+# XZ
+
+[XZ for Java](https://tukaani.org/xz/) - Public Domain
+
+"XZ Utils is free general-purpose data compression software with a high compression ratio. 
+XZ Utils were written for POSIX-like systems, but also work on some not-so-POSIX systems. 
+XZ Utils are the successor to LZMA Utils."
+
+Needed for [openzim](../openzim) to deal with modern zim files.
--- a/third-party/xz/src/main/java/org/tukaani/xz/BlockInputStream.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/BlockInputStream.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/BlockOutputStream.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/BlockOutputStream.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/CorruptedInputException.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/CorruptedInputException.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/CountingInputStream.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/CountingInputStream.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/CountingOutputStream.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/CountingOutputStream.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/DeltaCoder.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/DeltaCoder.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/DeltaDecoder.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/DeltaDecoder.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/DeltaInputStream.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/DeltaInputStream.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/FilterCoder.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/FilterCoder.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/FilterDecoder.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/FilterDecoder.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/FilterEncoder.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/FilterEncoder.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/FilterOptions.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/FilterOptions.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/FinishableOutputStream.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/FinishableOutputStream.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/IndexIndicatorException.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/IndexIndicatorException.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/LZMA2Coder.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/LZMA2Coder.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/LZMA2Decoder.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/LZMA2Decoder.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/LZMA2Encoder.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/LZMA2Encoder.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/LZMA2InputStream.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/LZMA2InputStream.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/LZMA2Options.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/LZMA2Options.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/LZMA2OutputStream.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/LZMA2OutputStream.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/MemoryLimitException.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/MemoryLimitException.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/RawCoder.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/RawCoder.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/SingleXZInputStream.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/SingleXZInputStream.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/UnsupportedOptionsException.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/UnsupportedOptionsException.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/XZ.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/XZ.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/XZFormatException.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/XZFormatException.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/XZIOException.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/XZIOException.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/XZInputStream.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/XZInputStream.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/XZOutputStream.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/XZOutputStream.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/check/CRC32.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/check/CRC32.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/check/CRC64.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/check/CRC64.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/check/Check.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/check/Check.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/check/None.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/check/None.java
--- a/third-party/xz/src/main/java/org/tukaani/xz/check/SHA256.java
+++ b/third-party/xz/src/main/java/org/tukaani/xz/check/SHA256.java
--- a/Show More
+++ b/Show More