From 616effdb3c5d4b3a03e20dc86ef9cb8b04ac561f Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Sun, 12 Mar 2023 10:04:48 +0100 Subject: [PATCH] The refactoring will continue until morale improves. --- code/api/assistant-api/build.gradle | 1 - code/api/index-api/build.gradle | 1 - code/api/search-api/build.gradle | 1 - code/common/service-client/build.gradle | 1 - code/crawl/common/build.gradle | 1 - code/crawl/converting-model/build.gradle | 1 - code/crawl/converting-process/build.gradle | 2 +- code/crawl/crawling-model/build.gradle | 1 - .../crawling/io/CrawledDomainReader.java | 4 +- code/crawl/crawling-process/build.gradle | 1 - code/crawl/experimental/build.gradle | 1 - code/crawl/loading-process/build.gradle | 1 - code/features/random-websites/build.gradle | 1 - code/features/screenshots/build.gradle | 1 - code/features/topic-detection/build.gradle | 2 +- code/index/index-forward/build.gradle | 3 +- code/index/index-journal/build.gradle | 1 - code/libraries/array/build.gradle | 2 +- code/libraries/btree/build.gradle | 1 - .../language-processing/build.gradle | 4 +- .../assistant-service/build.gradle | 2 +- .../assistant/dict/WikiCleanerTest.java | 45 - code/services-core/index-service/build.gradle | 1 - .../services-core/search-service/build.gradle | 1 - .../api-service/build.gradle | 1 - .../dating-service/build.gradle | 1 - .../explorer-service/build.gradle | 1 - docker-service.gradle | 24 +- other/memex/build.gradle | 1 - other/wmsa_old/build.gradle | 1 - settings.gradle | 9 +- third-party/README.md | 15 +- .../{ => monkey-patch-opennlp}/build.gradle | 2 +- third-party/monkey-patch-opennlp/readme.md | 11 + .../sentdetect/DefaultSDContextGenerator.java | 0 .../tools/sentdetect/SentenceDetectorME.java | 0 third-party/openzim/build.gradle | 24 + third-party/openzim/readme.md | 11 + .../org/openzim/ZIMTypes/ArticleEntry.java | 0 .../org/openzim/ZIMTypes/DirectoryEntry.java | 0 .../org/openzim/ZIMTypes/RedirectEntry.java | 0 .../java/org/openzim/ZIMTypes/ZIMFile.java | 0 .../java/org/openzim/ZIMTypes/ZIMReader.java | 0 .../util/RandomAcessFileZIMInputStream.java | 0 .../main/java/org/openzim/util/Utilities.java | 0 third-party/porterstemmer/build.gradle | 16 + third-party/porterstemmer/readme.md | 6 + .../ca/rmen/porterstemmer/PorterStemmer.java | 0 third-party/rdrpostagger/build.gradle | 16 + third-party/rdrpostagger/readme.md | 12 + .../com/github/datquocnguyen/FWObject.java | 0 .../github/datquocnguyen/InitialTagger.java | 0 .../java/com/github/datquocnguyen/Node.java | 0 .../github/datquocnguyen/RDRPOSTagger.java | 0 .../java/com/github/datquocnguyen/Utils.java | 0 .../com/github/datquocnguyen/WordTag.java | 0 .../com/google/gson/stream/JsonReader.java | 1637 ----------------- .../jdkoverride/LargeLineBufferedReader.java | 559 ------ third-party/symspell/build.gradle | 16 + third-party/symspell/readme.md | 9 + .../src/main/java/symspell/SymSpell.java | 0 third-party/uppend/build.gradle | 17 + third-party/uppend/readme.md | 6 + .../com/upserve/uppend/blobs/NativeIO.java | 0 third-party/xz/build.gradle | 16 + third-party/xz/readme.md | 9 + .../java/org/tukaani/xz/BlockInputStream.java | 0 .../org/tukaani/xz/BlockOutputStream.java | 0 .../tukaani/xz/CorruptedInputException.java | 0 .../org/tukaani/xz/CountingInputStream.java | 0 .../org/tukaani/xz/CountingOutputStream.java | 0 .../main/java/org/tukaani/xz/DeltaCoder.java | 0 .../java/org/tukaani/xz/DeltaDecoder.java | 0 .../java/org/tukaani/xz/DeltaInputStream.java | 0 .../main/java/org/tukaani/xz/FilterCoder.java | 0 .../java/org/tukaani/xz/FilterDecoder.java | 0 .../java/org/tukaani/xz/FilterEncoder.java | 0 .../java/org/tukaani/xz/FilterOptions.java | 0 .../tukaani/xz/FinishableOutputStream.java | 0 .../tukaani/xz/IndexIndicatorException.java | 0 .../main/java/org/tukaani/xz/LZMA2Coder.java | 0 .../java/org/tukaani/xz/LZMA2Decoder.java | 0 .../java/org/tukaani/xz/LZMA2Encoder.java | 0 .../java/org/tukaani/xz/LZMA2InputStream.java | 0 .../java/org/tukaani/xz/LZMA2Options.java | 0 .../org/tukaani/xz/LZMA2OutputStream.java | 0 .../org/tukaani/xz/MemoryLimitException.java | 0 .../main/java/org/tukaani/xz/RawCoder.java | 0 .../org/tukaani/xz/SingleXZInputStream.java | 0 .../xz/UnsupportedOptionsException.java | 0 .../src/main/java/org/tukaani/xz/XZ.java | 0 .../org/tukaani/xz/XZFormatException.java | 0 .../java/org/tukaani/xz/XZIOException.java | 0 .../java/org/tukaani/xz/XZInputStream.java | 0 .../java/org/tukaani/xz/XZOutputStream.java | 0 .../main/java/org/tukaani/xz/check/CRC32.java | 0 .../main/java/org/tukaani/xz/check/CRC64.java | 0 .../main/java/org/tukaani/xz/check/Check.java | 0 .../main/java/org/tukaani/xz/check/None.java | 0 .../java/org/tukaani/xz/check/SHA256.java | 0 .../org/tukaani/xz/common/DecoderUtil.java | 0 .../org/tukaani/xz/common/EncoderUtil.java | 0 .../org/tukaani/xz/common/StreamFlags.java | 0 .../main/java/org/tukaani/xz/common/Util.java | 0 .../java/org/tukaani/xz/delta/DeltaCoder.java | 0 .../org/tukaani/xz/delta/DeltaDecoder.java | 0 .../java/org/tukaani/xz/index/IndexBase.java | 0 .../org/tukaani/xz/index/IndexEncoder.java | 0 .../java/org/tukaani/xz/index/IndexHash.java | 0 .../org/tukaani/xz/index/IndexRecord.java | 0 .../java/org/tukaani/xz/lz/LZDecoder.java | 0 .../java/org/tukaani/xz/lzma/LZMACoder.java | 0 .../java/org/tukaani/xz/lzma/LZMADecoder.java | 0 .../main/java/org/tukaani/xz/lzma/State.java | 0 .../java/org/tukaani/xz/package-info.java | 0 .../org/tukaani/xz/rangecoder/RangeCoder.java | 0 .../tukaani/xz/rangecoder/RangeDecoder.java | 0 tools/screenshot/build.gradle | 1 - 118 files changed, 214 insertions(+), 2287 deletions(-) delete mode 100644 code/services-core/assistant-service/src/test/java/nu/marginalia/assistant/dict/WikiCleanerTest.java rename third-party/{ => monkey-patch-opennlp}/build.gradle (96%) create mode 100644 third-party/monkey-patch-opennlp/readme.md rename third-party/{ => monkey-patch-opennlp}/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java (100%) rename third-party/{ => monkey-patch-opennlp}/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java (100%) create mode 100644 third-party/openzim/build.gradle create mode 100644 third-party/openzim/readme.md rename third-party/{ => openzim}/src/main/java/org/openzim/ZIMTypes/ArticleEntry.java (100%) rename third-party/{ => openzim}/src/main/java/org/openzim/ZIMTypes/DirectoryEntry.java (100%) rename third-party/{ => openzim}/src/main/java/org/openzim/ZIMTypes/RedirectEntry.java (100%) rename third-party/{ => openzim}/src/main/java/org/openzim/ZIMTypes/ZIMFile.java (100%) rename third-party/{ => openzim}/src/main/java/org/openzim/ZIMTypes/ZIMReader.java (100%) rename third-party/{ => openzim}/src/main/java/org/openzim/util/RandomAcessFileZIMInputStream.java (100%) rename third-party/{ => openzim}/src/main/java/org/openzim/util/Utilities.java (100%) create mode 100644 third-party/porterstemmer/build.gradle create mode 100644 third-party/porterstemmer/readme.md rename third-party/{ => porterstemmer}/src/main/java/ca/rmen/porterstemmer/PorterStemmer.java (100%) create mode 100644 third-party/rdrpostagger/build.gradle create mode 100644 third-party/rdrpostagger/readme.md rename third-party/{ => rdrpostagger}/src/main/java/com/github/datquocnguyen/FWObject.java (100%) rename third-party/{ => rdrpostagger}/src/main/java/com/github/datquocnguyen/InitialTagger.java (100%) rename third-party/{ => rdrpostagger}/src/main/java/com/github/datquocnguyen/Node.java (100%) rename third-party/{ => rdrpostagger}/src/main/java/com/github/datquocnguyen/RDRPOSTagger.java (100%) rename third-party/{ => rdrpostagger}/src/main/java/com/github/datquocnguyen/Utils.java (100%) rename third-party/{ => rdrpostagger}/src/main/java/com/github/datquocnguyen/WordTag.java (100%) delete mode 100644 third-party/src/main/java/com/google/gson/stream/JsonReader.java delete mode 100644 third-party/src/main/java/jdkoverride/LargeLineBufferedReader.java create mode 100644 third-party/symspell/build.gradle create mode 100644 third-party/symspell/readme.md rename third-party/{ => symspell}/src/main/java/symspell/SymSpell.java (100%) create mode 100644 third-party/uppend/build.gradle create mode 100644 third-party/uppend/readme.md rename third-party/{ => uppend}/src/main/java/com/upserve/uppend/blobs/NativeIO.java (100%) create mode 100644 third-party/xz/build.gradle create mode 100644 third-party/xz/readme.md rename third-party/{ => xz}/src/main/java/org/tukaani/xz/BlockInputStream.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/BlockOutputStream.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/CorruptedInputException.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/CountingInputStream.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/CountingOutputStream.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/DeltaCoder.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/DeltaDecoder.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/DeltaInputStream.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/FilterCoder.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/FilterDecoder.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/FilterEncoder.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/FilterOptions.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/FinishableOutputStream.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/IndexIndicatorException.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/LZMA2Coder.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/LZMA2Decoder.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/LZMA2Encoder.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/LZMA2InputStream.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/LZMA2Options.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/LZMA2OutputStream.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/MemoryLimitException.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/RawCoder.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/SingleXZInputStream.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/UnsupportedOptionsException.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/XZ.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/XZFormatException.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/XZIOException.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/XZInputStream.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/XZOutputStream.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/check/CRC32.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/check/CRC64.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/check/Check.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/check/None.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/check/SHA256.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/common/DecoderUtil.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/common/EncoderUtil.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/common/StreamFlags.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/common/Util.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/delta/DeltaCoder.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/delta/DeltaDecoder.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/index/IndexBase.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/index/IndexEncoder.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/index/IndexHash.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/index/IndexRecord.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/lz/LZDecoder.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/lzma/LZMACoder.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/lzma/LZMADecoder.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/lzma/State.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/package-info.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/rangecoder/RangeCoder.java (100%) rename third-party/{ => xz}/src/main/java/org/tukaani/xz/rangecoder/RangeDecoder.java (100%) diff --git a/code/api/assistant-api/build.gradle b/code/api/assistant-api/build.gradle index 6a46e5ef..5dc33d5c 100644 --- a/code/api/assistant-api/build.gradle +++ b/code/api/assistant-api/build.gradle @@ -11,7 +11,6 @@ java { } } dependencies { - implementation project(':third-party') implementation project(':code:common:model') implementation project(':code:common:config') implementation project(':code:common:service-discovery') diff --git a/code/api/index-api/build.gradle b/code/api/index-api/build.gradle index 93672230..73520b36 100644 --- a/code/api/index-api/build.gradle +++ b/code/api/index-api/build.gradle @@ -12,7 +12,6 @@ java { } dependencies { - implementation project(':third-party') implementation project(':code:common:model') implementation project(':code:common:config') implementation project(':code:common:service-discovery') diff --git a/code/api/search-api/build.gradle b/code/api/search-api/build.gradle index a314ea6c..8c38b5f3 100644 --- a/code/api/search-api/build.gradle +++ b/code/api/search-api/build.gradle @@ -12,7 +12,6 @@ java { } dependencies { - implementation project(':third-party') implementation project(':code:common:model') implementation project(':code:common:config') implementation project(':code:common:service-discovery') diff --git a/code/common/service-client/build.gradle b/code/common/service-client/build.gradle index 834fc45f..6ba7f806 100644 --- a/code/common/service-client/build.gradle +++ b/code/common/service-client/build.gradle @@ -12,7 +12,6 @@ java { } dependencies { - implementation project(':third-party') implementation project(':code:common:service-discovery') diff --git a/code/crawl/common/build.gradle b/code/crawl/common/build.gradle index 67174793..483dbee8 100644 --- a/code/crawl/common/build.gradle +++ b/code/crawl/common/build.gradle @@ -12,7 +12,6 @@ java { } dependencies { - implementation project(':third-party') implementation project(':code:common:model') implementation project(':code:common:config') implementation project(':code:libraries:guarded-regex') diff --git a/code/crawl/converting-model/build.gradle b/code/crawl/converting-model/build.gradle index 15382ff2..29bbd025 100644 --- a/code/crawl/converting-model/build.gradle +++ b/code/crawl/converting-model/build.gradle @@ -11,7 +11,6 @@ java { } } dependencies { - implementation project(':third-party') implementation project(':code:common:model') implementation project(':code:api:index-api') implementation project(':code:common:service-discovery') diff --git a/code/crawl/converting-process/build.gradle b/code/crawl/converting-process/build.gradle index 805a5bc8..e33f0b44 100644 --- a/code/crawl/converting-process/build.gradle +++ b/code/crawl/converting-process/build.gradle @@ -19,7 +19,7 @@ application { tasks.distZip.enabled = false dependencies { - implementation project(':third-party') + implementation project(':third-party:porterstemmer') implementation project(':code:api:index-api') implementation project(':code:common:model') diff --git a/code/crawl/crawling-model/build.gradle b/code/crawl/crawling-model/build.gradle index e9cdbc01..d2803c21 100644 --- a/code/crawl/crawling-model/build.gradle +++ b/code/crawl/crawling-model/build.gradle @@ -13,7 +13,6 @@ java { } dependencies { - implementation project(':third-party') implementation project(':code:common:model') implementation project(':code:libraries:big-string') implementation project(':code:api:index-api') diff --git a/code/crawl/crawling-model/src/main/java/nu/marginalia/crawling/io/CrawledDomainReader.java b/code/crawl/crawling-model/src/main/java/nu/marginalia/crawling/io/CrawledDomainReader.java index 49dee5b3..744236c0 100644 --- a/code/crawl/crawling-model/src/main/java/nu/marginalia/crawling/io/CrawledDomainReader.java +++ b/code/crawl/crawling-model/src/main/java/nu/marginalia/crawling/io/CrawledDomainReader.java @@ -2,13 +2,13 @@ package nu.marginalia.crawling.io; import com.github.luben.zstd.ZstdInputStream; import com.google.gson.Gson; -import jdkoverride.LargeLineBufferedReader; import nu.marginalia.crawling.model.CrawledDocument; import nu.marginalia.crawling.model.CrawledDomain; import nu.marginalia.model.gson.GsonFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.BufferedReader; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; @@ -29,7 +29,7 @@ public class CrawledDomainReader { public CrawledDomain read(Path path) throws IOException { DomainDataAssembler domainData = new DomainDataAssembler(); - try (var br = new LargeLineBufferedReader(new InputStreamReader(new ZstdInputStream(new FileInputStream(path.toFile()))))) { + try (var br = new BufferedReader(new InputStreamReader(new ZstdInputStream(new FileInputStream(path.toFile()))))) { String line; while ((line = br.readLine()) != null) { if (line.startsWith("//")) { diff --git a/code/crawl/crawling-process/build.gradle b/code/crawl/crawling-process/build.gradle index 18e841b3..29630437 100644 --- a/code/crawl/crawling-process/build.gradle +++ b/code/crawl/crawling-process/build.gradle @@ -19,7 +19,6 @@ application { tasks.distZip.enabled = false dependencies { - implementation project(':third-party') implementation project(':code:common:model') implementation project(':code:common:config') implementation project(':code:common:service') diff --git a/code/crawl/experimental/build.gradle b/code/crawl/experimental/build.gradle index cfa239b2..85063928 100644 --- a/code/crawl/experimental/build.gradle +++ b/code/crawl/experimental/build.gradle @@ -12,7 +12,6 @@ java { } dependencies { - implementation project(':third-party') implementation project(':code:common:model') implementation project(':code:common:config') implementation project(':code:common:service') diff --git a/code/crawl/loading-process/build.gradle b/code/crawl/loading-process/build.gradle index bf93444a..ea233dda 100644 --- a/code/crawl/loading-process/build.gradle +++ b/code/crawl/loading-process/build.gradle @@ -18,7 +18,6 @@ application { tasks.distZip.enabled = false dependencies { - implementation project(':third-party') implementation project(':code:api:index-api') implementation project(':code:common:model') implementation project(':code:common:config') diff --git a/code/features/random-websites/build.gradle b/code/features/random-websites/build.gradle index 4e1ff560..1e018f03 100644 --- a/code/features/random-websites/build.gradle +++ b/code/features/random-websites/build.gradle @@ -15,7 +15,6 @@ java { } dependencies { - implementation project(':third-party') implementation project(':code:common:model') implementation project(':code:common:service') diff --git a/code/features/screenshots/build.gradle b/code/features/screenshots/build.gradle index db62f15c..0e014011 100644 --- a/code/features/screenshots/build.gradle +++ b/code/features/screenshots/build.gradle @@ -11,7 +11,6 @@ java { } dependencies { - implementation project(':third-party') implementation project(':code:common:model') implementation project(':code:common:service') diff --git a/code/features/topic-detection/build.gradle b/code/features/topic-detection/build.gradle index 035e9974..0f764b67 100644 --- a/code/features/topic-detection/build.gradle +++ b/code/features/topic-detection/build.gradle @@ -16,7 +16,7 @@ java { dependencies { implementation project(':code:common:config') implementation project(':code:libraries:language-processing') - implementation project(':third-party') + implementation project(':third-party:porterstemmer') implementation libs.lombok annotationProcessor libs.lombok diff --git a/code/index/index-forward/build.gradle b/code/index/index-forward/build.gradle index 8a789e59..011a39ac 100644 --- a/code/index/index-forward/build.gradle +++ b/code/index/index-forward/build.gradle @@ -18,7 +18,8 @@ dependencies { implementation project(':code:index:index-journal') implementation project(':code:index:lexicon') implementation project(':code:common:model') - implementation project(':third-party') + + implementation project(':third-party:uppend') implementation libs.lombok annotationProcessor libs.lombok diff --git a/code/index/index-journal/build.gradle b/code/index/index-journal/build.gradle index 65910447..9aeaa209 100644 --- a/code/index/index-journal/build.gradle +++ b/code/index/index-journal/build.gradle @@ -14,7 +14,6 @@ dependencies { implementation project(':code:libraries:array') implementation project(':code:common:model') implementation project(':code:index:lexicon') - implementation project(':third-party') implementation libs.lombok annotationProcessor libs.lombok diff --git a/code/libraries/array/build.gradle b/code/libraries/array/build.gradle index 65574dfd..295a4dc5 100644 --- a/code/libraries/array/build.gradle +++ b/code/libraries/array/build.gradle @@ -9,7 +9,7 @@ java { } dependencies { - implementation project(':third-party') + implementation project(':third-party:uppend') implementation libs.lombok annotationProcessor libs.lombok diff --git a/code/libraries/btree/build.gradle b/code/libraries/btree/build.gradle index c5a9950e..83b5fc7b 100644 --- a/code/libraries/btree/build.gradle +++ b/code/libraries/btree/build.gradle @@ -9,7 +9,6 @@ java { } dependencies { - implementation project(':third-party') implementation project(':code:libraries:array') implementation project(':code:libraries:next-prime') diff --git a/code/libraries/language-processing/build.gradle b/code/libraries/language-processing/build.gradle index dfcec644..f0d52d1f 100644 --- a/code/libraries/language-processing/build.gradle +++ b/code/libraries/language-processing/build.gradle @@ -15,7 +15,9 @@ java { } dependencies { - implementation project(':third-party') + implementation project(':third-party:rdrpostagger') + implementation project(':third-party:porterstemmer') + implementation project(':third-party:monkey-patch-opennlp') implementation project(':code:common:model') implementation project(':code:common:config') implementation project(':code:libraries:easy-lsh') diff --git a/code/services-core/assistant-service/build.gradle b/code/services-core/assistant-service/build.gradle index ad6e0167..d9b04f0e 100644 --- a/code/services-core/assistant-service/build.gradle +++ b/code/services-core/assistant-service/build.gradle @@ -22,7 +22,7 @@ java { } dependencies { - implementation project(':third-party') + implementation project(':third-party:symspell') implementation project(':code:api:assistant-api') implementation project(':code:common:config') implementation project(':code:common:service') diff --git a/code/services-core/assistant-service/src/test/java/nu/marginalia/assistant/dict/WikiCleanerTest.java b/code/services-core/assistant-service/src/test/java/nu/marginalia/assistant/dict/WikiCleanerTest.java deleted file mode 100644 index 3c2bda7a..00000000 --- a/code/services-core/assistant-service/src/test/java/nu/marginalia/assistant/dict/WikiCleanerTest.java +++ /dev/null @@ -1,45 +0,0 @@ -package nu.marginalia.assistant.dict; - -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; -import org.openzim.ZIMTypes.ZIMFile; -import org.openzim.ZIMTypes.ZIMReader; - -import java.io.IOException; - -class WikiCleanerTest { - - @Test - void cleanWikiJunk() throws IOException { -// String str = new WikiCleaner().cleanWikiJunk("https://en.wikipedia.org/wiki/Scamander", new String(Files.readAllBytes(Path.of("/home/vlofgren/Work/wiki-cleaner/Scamander.wiki.html")))); -// String str2 = new WikiCleaner().cleanWikiJunk("https://en.wikipedia.org/wiki/Plato", new String(Files.readAllBytes(Path.of("/home/vlofgren/Work/wiki-cleaner/Plato.wiki.html")))); -// String str3 = new WikiCleaner().cleanWikiJunk("https://en.wikipedia.org/wiki/C++", new String(Files.readAllBytes(Path.of("/home/vlofgren/Work/wiki-cleaner/Cpp.wiki.html")))); -// String str4 = new WikiCleaner().cleanWikiJunk("https://en.wikipedia.org/wiki/Memex", new String(Files.readAllBytes(Path.of("/home/vlofgren/Work/wiki-cleaner/Memex.wiki.html")))); -// Files.writeString(Path.of("/home/vlofgren/Work/wiki-cleaner/Scamander.out.html"), str); -// Files.writeString(Path.of("/home/vlofgren/Work/wiki-cleaner/Plato.out.html"), str2); -// Files.writeString(Path.of("/home/vlofgren/Work/wiki-cleaner/Cpp.out.html"), str3); -// Files.writeString(Path.of("/home/vlofgren/Work/wiki-cleaner/Memex.out.html"), str4); - } - - @Test @Disabled - public void readZim() throws IOException { - var zr = new ZIMReader(new ZIMFile("/home/vlofgren/Work/wikipedia_en_all_nopic_2021-01.zim")); -// try (var pw = new PrintWriter(new File("/home/vlofgren/Work/article-clusters.tsv"))) { -// zr.enumerateArticles(pw); -// } - zr.forEachArticles((url, art) -> { - if (art != null) { - System.out.println(url); - } -// if (art != null && art.length() > 5) { -// System.out.println(url + " -> " + art.substring(0, 5)); -// } - }, (p) -> true); - - /*try (var baos = zr.getArticleData("Giraffe", 'A')) { - String str = baos.toString(); - Files.writeString(Path.of("/home/vlofgren/Work/wiki-cleaner/Giraffe.wiki.html"), str); - Files.writeString(Path.of("/home/vlofgren/Work/wiki-cleaner/Giraffe.out.html"), new WikiCleaner().cleanWikiJunk("https://en.wikipedia.org/wiki/Giraffe", str)); - }*/ - } -} \ No newline at end of file diff --git a/code/services-core/index-service/build.gradle b/code/services-core/index-service/build.gradle index 42b6d8c7..1ed30ea5 100644 --- a/code/services-core/index-service/build.gradle +++ b/code/services-core/index-service/build.gradle @@ -21,7 +21,6 @@ java { } } dependencies { - implementation project(':third-party') implementation project(':code:common:config') implementation project(':code:common:model') implementation project(':code:common:service') diff --git a/code/services-core/search-service/build.gradle b/code/services-core/search-service/build.gradle index 58a205e3..47e9f1d7 100644 --- a/code/services-core/search-service/build.gradle +++ b/code/services-core/search-service/build.gradle @@ -21,7 +21,6 @@ java { } } dependencies { - implementation project(':third-party') implementation project(':code:common:model') implementation project(':code:common:service') implementation project(':code:common:config') diff --git a/code/services-satellite/api-service/build.gradle b/code/services-satellite/api-service/build.gradle index acf0dbfc..6a40dd89 100644 --- a/code/services-satellite/api-service/build.gradle +++ b/code/services-satellite/api-service/build.gradle @@ -22,7 +22,6 @@ tasks.distZip.enabled = false apply from: "$rootProject.projectDir/docker-service.gradle" dependencies { - implementation project(':third-party') implementation project(':code:common:model') implementation project(':code:common:service') implementation project(':code:common:config') diff --git a/code/services-satellite/dating-service/build.gradle b/code/services-satellite/dating-service/build.gradle index 0e72441b..3208e477 100644 --- a/code/services-satellite/dating-service/build.gradle +++ b/code/services-satellite/dating-service/build.gradle @@ -21,7 +21,6 @@ java { } } dependencies { - implementation project(':third-party') implementation project(':code:common:model') implementation project(':code:common:service') implementation project(':code:common:service-discovery') diff --git a/code/services-satellite/explorer-service/build.gradle b/code/services-satellite/explorer-service/build.gradle index 8034a42d..fee0e6dd 100644 --- a/code/services-satellite/explorer-service/build.gradle +++ b/code/services-satellite/explorer-service/build.gradle @@ -21,7 +21,6 @@ java { } } dependencies { - implementation project(':third-party') implementation project(':code:common:model') implementation project(':code:common:service') implementation project(':code:common:service-discovery') diff --git a/docker-service.gradle b/docker-service.gradle index 9f8f1bbe..15086ce7 100644 --- a/docker-service.gradle +++ b/docker-service.gradle @@ -4,10 +4,12 @@ ext { serviceToolOpts='-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5000' } -docker { - var df = new File(buildDir, "Dockerfile") +tasks.register('dockerFile') { + buildDir.mkdir() - df.text = """# + var df = new File(buildDir, "Dockerfile") + doLast { + df.text = """# # I'm auto-generated, please don't make changes to me or commit me to git # # The template exists in docker-service.gradle @@ -22,11 +24,23 @@ ENV JAVA_OPTS="${serviceJvmOpts} " ENTRYPOINT WMSA_HOME=/wmsa /${application.applicationName}/bin/${application.applicationName} \${arg0} \${arg1} """ + } + it.outputs.file(df) +} - dockerfile = new File(buildDir, "Dockerfile") +dockerPrepare { + dependsOn tasks.dockerFile +} + +dockerfileZip { + dependsOn tasks.dockerFile +} + + +docker { + dockerfile = tasks.dockerFile.outputs.files.singleFile name = 'marginalia.nu/'+application.applicationName+':latest' files tasks.distTar.outputs tags 'latest' - dependsOn tasks.distTar } diff --git a/other/memex/build.gradle b/other/memex/build.gradle index 78b13789..5e422b57 100644 --- a/other/memex/build.gradle +++ b/other/memex/build.gradle @@ -59,7 +59,6 @@ jmhJar { zip64 true } dependencies { - implementation project(':third-party') implementation project(':code:common:service') implementation project(':code:common:config') implementation project(':code:common:service-discovery') diff --git a/other/wmsa_old/build.gradle b/other/wmsa_old/build.gradle index 8b41cbc9..4f38b40c 100644 --- a/other/wmsa_old/build.gradle +++ b/other/wmsa_old/build.gradle @@ -30,7 +30,6 @@ java { } } dependencies { - implementation project(':third-party') implementation project(':code:common:service') implementation project(':code:common:service-discovery') implementation project(':code:common:service-client') diff --git a/settings.gradle b/settings.gradle index b871494c..db1822aa 100644 --- a/settings.gradle +++ b/settings.gradle @@ -52,7 +52,14 @@ include 'code:crawl:loading-process' include 'code:crawl:common' include 'code:crawl:experimental' -include 'third-party' +include 'third-party:porterstemmer' +include 'third-party:xz' +include 'third-party:symspell' +include 'third-party:rdrpostagger' +include 'third-party:uppend' +include 'third-party:openzim' +include 'third-party:monkey-patch-opennlp' + include 'other:memex' include 'other:wmsa_old' diff --git a/third-party/README.md b/third-party/README.md index 70b6340c..577566bd 100644 --- a/third-party/README.md +++ b/third-party/README.md @@ -6,14 +6,11 @@ or lack an artifact, or to override some default that is inappropriate for the t ## Sources and Licenses ### Modified -* [RDRPosTagger](https://github.com/datquocnguyen/RDRPOSTagger) - GPL3 -* [PorterStemmer](https://github.com/caarmen/porter-stemmer) - LGPL3 -* [Uppend](https://github.com/upserve/uppend) - MIT -* [OpenZIM](https://github.com/openzim/libzim) - GPL-2.0 -* [XZ for Java](https://tukaani.org/xz/) - Public Domain -* [SymSpell](https://github.com/wolfgarbe/symspell) - LGPL-3.0 +* [RDRPosTagger](rdrpostagger/) - GPL3 +* [PorterStemmer](porterstemmer/) - LGPL3 +* [Uppend](uppend/) - MIT +* [OpenZIM](openzim/) - GPL-2.0 +* [SymSpell](symspell/) - LGPL-3.0 ### Monkey Patched -* [GSON](https://github.com/google/gson) - Apache-2.0 -* OpenJDK - GPL-2.0 (packaged under jdkoverride) -* Stanford OpenNLP - Apache-2.0 +* [Stanford OpenNLP](monkey-patch-opennlp/) - Apache-2.0 diff --git a/third-party/build.gradle b/third-party/monkey-patch-opennlp/build.gradle similarity index 96% rename from third-party/build.gradle rename to third-party/monkey-patch-opennlp/build.gradle index 4128ac2b..1d6a4bc3 100644 --- a/third-party/build.gradle +++ b/third-party/monkey-patch-opennlp/build.gradle @@ -27,5 +27,5 @@ dependencies { } test { - useJUnitPlatform() + useJUnitPlatform()\ } diff --git a/third-party/monkey-patch-opennlp/readme.md b/third-party/monkey-patch-opennlp/readme.md new file mode 100644 index 00000000..be7d7490 --- /dev/null +++ b/third-party/monkey-patch-opennlp/readme.md @@ -0,0 +1,11 @@ +# Monkey Patched OpenNLP + +Stanford OpenNLP - Apache-2.0 + +## Rationale + +OpenNLP's sentence detector uses a slow StringBuffer instead of a StringBuilder where it makes no +no sense to do so. This makes it much slower than it needs to be. I've found no way to file issues with the +project to get it fixed. Instead we're doing this monkey patch where the class is overridden with something +better. + diff --git a/third-party/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java b/third-party/monkey-patch-opennlp/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java similarity index 100% rename from third-party/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java rename to third-party/monkey-patch-opennlp/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java diff --git a/third-party/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java b/third-party/monkey-patch-opennlp/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java similarity index 100% rename from third-party/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java rename to third-party/monkey-patch-opennlp/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java diff --git a/third-party/openzim/build.gradle b/third-party/openzim/build.gradle new file mode 100644 index 00000000..e56a9df5 --- /dev/null +++ b/third-party/openzim/build.gradle @@ -0,0 +1,24 @@ +plugins { + id 'java' +} + +java { + toolchain { + languageVersion.set(JavaLanguageVersion.of(17)) + } +} + +dependencies { + implementation libs.bundles.nlp + implementation libs.zstd + implementation libs.commons.compress + implementation libs.ffi + implementation libs.databind + implementation libs.bundles.gson + + implementation project(':third-party:xz') +} + +test { + useJUnitPlatform() +} diff --git a/third-party/openzim/readme.md b/third-party/openzim/readme.md new file mode 100644 index 00000000..ee47e601 --- /dev/null +++ b/third-party/openzim/readme.md @@ -0,0 +1,11 @@ +# OpenZIM + +[OpenZIM](https://github.com/openzim/libzim) - GPL-2.0 + +OpenZIM is a ZIM file reader. This code has been modified in a fairly crude manner +to be much faster than the original code base which seems quite antique. It also +supports XZ compression. + +**Important Note** the license is incompatible with AGPL 3, so we can't link Marginalia +directly to this. It's still very useful for building tools that deal with +wikipedia data which would be stand-alone. \ No newline at end of file diff --git a/third-party/src/main/java/org/openzim/ZIMTypes/ArticleEntry.java b/third-party/openzim/src/main/java/org/openzim/ZIMTypes/ArticleEntry.java similarity index 100% rename from third-party/src/main/java/org/openzim/ZIMTypes/ArticleEntry.java rename to third-party/openzim/src/main/java/org/openzim/ZIMTypes/ArticleEntry.java diff --git a/third-party/src/main/java/org/openzim/ZIMTypes/DirectoryEntry.java b/third-party/openzim/src/main/java/org/openzim/ZIMTypes/DirectoryEntry.java similarity index 100% rename from third-party/src/main/java/org/openzim/ZIMTypes/DirectoryEntry.java rename to third-party/openzim/src/main/java/org/openzim/ZIMTypes/DirectoryEntry.java diff --git a/third-party/src/main/java/org/openzim/ZIMTypes/RedirectEntry.java b/third-party/openzim/src/main/java/org/openzim/ZIMTypes/RedirectEntry.java similarity index 100% rename from third-party/src/main/java/org/openzim/ZIMTypes/RedirectEntry.java rename to third-party/openzim/src/main/java/org/openzim/ZIMTypes/RedirectEntry.java diff --git a/third-party/src/main/java/org/openzim/ZIMTypes/ZIMFile.java b/third-party/openzim/src/main/java/org/openzim/ZIMTypes/ZIMFile.java similarity index 100% rename from third-party/src/main/java/org/openzim/ZIMTypes/ZIMFile.java rename to third-party/openzim/src/main/java/org/openzim/ZIMTypes/ZIMFile.java diff --git a/third-party/src/main/java/org/openzim/ZIMTypes/ZIMReader.java b/third-party/openzim/src/main/java/org/openzim/ZIMTypes/ZIMReader.java similarity index 100% rename from third-party/src/main/java/org/openzim/ZIMTypes/ZIMReader.java rename to third-party/openzim/src/main/java/org/openzim/ZIMTypes/ZIMReader.java diff --git a/third-party/src/main/java/org/openzim/util/RandomAcessFileZIMInputStream.java b/third-party/openzim/src/main/java/org/openzim/util/RandomAcessFileZIMInputStream.java similarity index 100% rename from third-party/src/main/java/org/openzim/util/RandomAcessFileZIMInputStream.java rename to third-party/openzim/src/main/java/org/openzim/util/RandomAcessFileZIMInputStream.java diff --git a/third-party/src/main/java/org/openzim/util/Utilities.java b/third-party/openzim/src/main/java/org/openzim/util/Utilities.java similarity index 100% rename from third-party/src/main/java/org/openzim/util/Utilities.java rename to third-party/openzim/src/main/java/org/openzim/util/Utilities.java diff --git a/third-party/porterstemmer/build.gradle b/third-party/porterstemmer/build.gradle new file mode 100644 index 00000000..de627417 --- /dev/null +++ b/third-party/porterstemmer/build.gradle @@ -0,0 +1,16 @@ +plugins { + id 'java' +} + +java { + toolchain { + languageVersion.set(JavaLanguageVersion.of(17)) + } +} + +dependencies { +} + +test { + useJUnitPlatform() +} diff --git a/third-party/porterstemmer/readme.md b/third-party/porterstemmer/readme.md new file mode 100644 index 00000000..c5e17793 --- /dev/null +++ b/third-party/porterstemmer/readme.md @@ -0,0 +1,6 @@ +# Porterstemmer + +[PorterStemmer](https://github.com/caarmen/porter-stemmer) - LGPL3 + +It's a [porter stemmer](https://tartarus.org/martin/PorterStemmer/) library, although one comes with OpenNLP +too. TBD which one to use, they're fairly equivalent. \ No newline at end of file diff --git a/third-party/src/main/java/ca/rmen/porterstemmer/PorterStemmer.java b/third-party/porterstemmer/src/main/java/ca/rmen/porterstemmer/PorterStemmer.java similarity index 100% rename from third-party/src/main/java/ca/rmen/porterstemmer/PorterStemmer.java rename to third-party/porterstemmer/src/main/java/ca/rmen/porterstemmer/PorterStemmer.java diff --git a/third-party/rdrpostagger/build.gradle b/third-party/rdrpostagger/build.gradle new file mode 100644 index 00000000..de627417 --- /dev/null +++ b/third-party/rdrpostagger/build.gradle @@ -0,0 +1,16 @@ +plugins { + id 'java' +} + +java { + toolchain { + languageVersion.set(JavaLanguageVersion.of(17)) + } +} + +dependencies { +} + +test { + useJUnitPlatform() +} diff --git a/third-party/rdrpostagger/readme.md b/third-party/rdrpostagger/readme.md new file mode 100644 index 00000000..1627eb55 --- /dev/null +++ b/third-party/rdrpostagger/readme.md @@ -0,0 +1,12 @@ +# RDRPosTagger + +[RDRPosTagger](https://github.com/datquocnguyen/RDRPOSTagger) - GPL3 + +datquocnguyen's excellent fast POS tagger. It's been crudely modified to be faster. +Unlike the original, it only does English. + +## Citations + +- Dat Quoc Nguyen, Dai Quoc Nguyen, Dang Duc Pham and Son Bao Pham. [RDRPOSTagger: A Ripple Down Rules-based Part-Of-Speech Tagger](http://www.aclweb.org/anthology/E14-2005). In *Proceedings of the Demonstrations at the 14th Conference of the European Chapter of the Association for Computational Linguistics*, EACL 2014, pp. 17-20, 2014. [[.PDF]](http://www.aclweb.org/anthology/E14-2005) [[.bib]](http://www.aclweb.org/anthology/E14-2005.bib) + +- Dat Quoc Nguyen, Dai Quoc Nguyen, Dang Duc Pham and Son Bao Pham. [A Robust Transformation-Based Learning Approach Using Ripple Down Rules for Part-Of-Speech Tagging](http://content.iospress.com/articles/ai-communications/aic698). *AI Communications* (AICom), vol. 29, no. 3, pp. 409-422, 2016. [[.PDF]](http://arxiv.org/pdf/1412.4021.pdf) [[.bib]](http://rdrpostagger.sourceforge.net/AICom.bib) diff --git a/third-party/src/main/java/com/github/datquocnguyen/FWObject.java b/third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/FWObject.java similarity index 100% rename from third-party/src/main/java/com/github/datquocnguyen/FWObject.java rename to third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/FWObject.java diff --git a/third-party/src/main/java/com/github/datquocnguyen/InitialTagger.java b/third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/InitialTagger.java similarity index 100% rename from third-party/src/main/java/com/github/datquocnguyen/InitialTagger.java rename to third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/InitialTagger.java diff --git a/third-party/src/main/java/com/github/datquocnguyen/Node.java b/third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/Node.java similarity index 100% rename from third-party/src/main/java/com/github/datquocnguyen/Node.java rename to third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/Node.java diff --git a/third-party/src/main/java/com/github/datquocnguyen/RDRPOSTagger.java b/third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/RDRPOSTagger.java similarity index 100% rename from third-party/src/main/java/com/github/datquocnguyen/RDRPOSTagger.java rename to third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/RDRPOSTagger.java diff --git a/third-party/src/main/java/com/github/datquocnguyen/Utils.java b/third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/Utils.java similarity index 100% rename from third-party/src/main/java/com/github/datquocnguyen/Utils.java rename to third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/Utils.java diff --git a/third-party/src/main/java/com/github/datquocnguyen/WordTag.java b/third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/WordTag.java similarity index 100% rename from third-party/src/main/java/com/github/datquocnguyen/WordTag.java rename to third-party/rdrpostagger/src/main/java/com/github/datquocnguyen/WordTag.java diff --git a/third-party/src/main/java/com/google/gson/stream/JsonReader.java b/third-party/src/main/java/com/google/gson/stream/JsonReader.java deleted file mode 100644 index 213feffa..00000000 --- a/third-party/src/main/java/com/google/gson/stream/JsonReader.java +++ /dev/null @@ -1,1637 +0,0 @@ -/* - * Copyright (C) 2010 Google Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.google.gson.stream; - -import com.google.gson.internal.JsonReaderInternalAccess; -import com.google.gson.internal.bind.JsonTreeReader; -import java.io.Closeable; -import java.io.EOFException; -import java.io.IOException; -import java.io.Reader; -import java.util.Arrays; - -/** - * Reads a JSON (RFC 7159) - * encoded value as a stream of tokens. This stream includes both literal - * values (strings, numbers, booleans, and nulls) as well as the begin and - * end delimiters of objects and arrays. The tokens are traversed in - * depth-first order, the same order that they appear in the JSON document. - * Within JSON objects, name/value pairs are represented by a single token. - * - *

Parsing JSON

- * To create a recursive descent parser for your own JSON streams, first create - * an entry point method that creates a {@code JsonReader}. - * - *

Next, create handler methods for each structure in your JSON text. You'll - * need a method for each object type and for each array type. - *

- *

When a nested object or array is encountered, delegate to the - * corresponding handler method. - * - *

When an unknown name is encountered, strict parsers should fail with an - * exception. Lenient parsers should call {@link #skipValue()} to recursively - * skip the value's nested tokens, which may otherwise conflict. - * - *

If a value may be null, you should first check using {@link #peek()}. - * Null literals can be consumed using either {@link #nextNull()} or {@link - * #skipValue()}. - * - *

Example

- * Suppose we'd like to parse a stream of messages such as the following:
 {@code
- * [
- *   {
- *     "id": 912345678901,
- *     "text": "How do I read a JSON stream in Java?",
- *     "geo": null,
- *     "user": {
- *       "name": "json_newb",
- *       "followers_count": 41
- *      }
- *   },
- *   {
- *     "id": 912345678902,
- *     "text": "@json_newb just use JsonReader!",
- *     "geo": [50.454722, -104.606667],
- *     "user": {
- *       "name": "jesse",
- *       "followers_count": 2
- *     }
- *   }
- * ]}
- * This code implements the parser for the above structure:
   {@code
- *
- *   public List readJsonStream(InputStream in) throws IOException {
- *     JsonReader reader = new JsonReader(new InputStreamReader(in, "UTF-8"));
- *     try {
- *       return readMessagesArray(reader);
- *     } finally {
- *       reader.close();
- *     }
- *   }
- *
- *   public List readMessagesArray(JsonReader reader) throws IOException {
- *     List messages = new ArrayList();
- *
- *     reader.beginArray();
- *     while (reader.hasNext()) {
- *       messages.add(readMessage(reader));
- *     }
- *     reader.endArray();
- *     return messages;
- *   }
- *
- *   public Message readMessage(JsonReader reader) throws IOException {
- *     long id = -1;
- *     String text = null;
- *     User user = null;
- *     List geo = null;
- *
- *     reader.beginObject();
- *     while (reader.hasNext()) {
- *       String name = reader.nextName();
- *       if (name.equals("id")) {
- *         id = reader.nextLong();
- *       } else if (name.equals("text")) {
- *         text = reader.nextString();
- *       } else if (name.equals("geo") && reader.peek() != JsonToken.NULL) {
- *         geo = readDoublesArray(reader);
- *       } else if (name.equals("user")) {
- *         user = readUser(reader);
- *       } else {
- *         reader.skipValue();
- *       }
- *     }
- *     reader.endObject();
- *     return new Message(id, text, user, geo);
- *   }
- *
- *   public List readDoublesArray(JsonReader reader) throws IOException {
- *     List doubles = new ArrayList();
- *
- *     reader.beginArray();
- *     while (reader.hasNext()) {
- *       doubles.add(reader.nextDouble());
- *     }
- *     reader.endArray();
- *     return doubles;
- *   }
- *
- *   public User readUser(JsonReader reader) throws IOException {
- *     String username = null;
- *     int followersCount = -1;
- *
- *     reader.beginObject();
- *     while (reader.hasNext()) {
- *       String name = reader.nextName();
- *       if (name.equals("name")) {
- *         username = reader.nextString();
- *       } else if (name.equals("followers_count")) {
- *         followersCount = reader.nextInt();
- *       } else {
- *         reader.skipValue();
- *       }
- *     }
- *     reader.endObject();
- *     return new User(username, followersCount);
- *   }}
- * - *

Number Handling

- * This reader permits numeric values to be read as strings and string values to - * be read as numbers. For example, both elements of the JSON array {@code - * [1, "1"]} may be read using either {@link #nextInt} or {@link #nextString}. - * This behavior is intended to prevent lossy numeric conversions: double is - * JavaScript's only numeric type and very large values like {@code - * 9007199254740993} cannot be represented exactly on that platform. To minimize - * precision loss, extremely large values should be written and read as strings - * in JSON. - * - *

Non-Execute Prefix

- * Web servers that serve private data using JSON may be vulnerable to Cross-site - * request forgery attacks. In such an attack, a malicious site gains access - * to a private JSON file by executing it with an HTML {@code