From 7286596fb420b6a0ee83e86b7b6d91f0bc91ba3a Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Tue, 6 Feb 2024 12:11:39 +0100 Subject: [PATCH] (deps) Remove monkey patched GSON The codebase used to have a monkey patched version of gson that made special optimizations for the unusually large JSON files that used to store e.g. crawl data. Since JSON is no longer used in this fashion, the GSON fork is not needed anymore. --- .../processes/converting-process/build.gradle | 3 - settings.gradle | 2 - third-party/README.md | 3 +- third-party/monkey-patch-gson/build.gradle | 17 - third-party/monkey-patch-gson/readme.md | 13 - .../com/google/gson/stream/JsonReader.java | 1653 ----------------- 6 files changed, 1 insertion(+), 1690 deletions(-) delete mode 100644 third-party/monkey-patch-gson/build.gradle delete mode 100644 third-party/monkey-patch-gson/readme.md delete mode 100644 third-party/monkey-patch-gson/src/main/java/com/google/gson/stream/JsonReader.java diff --git a/code/processes/converting-process/build.gradle b/code/processes/converting-process/build.gradle index 52b093fc..011994ea 100644 --- a/code/processes/converting-process/build.gradle +++ b/code/processes/converting-process/build.gradle @@ -19,9 +19,6 @@ application { tasks.distZip.enabled = false dependencies { - - implementation project(':third-party:monkey-patch-gson') - implementation project(':code:common:process') implementation project(':third-party:porterstemmer') diff --git a/settings.gradle b/settings.gradle index 190df710..8a32a74a 100644 --- a/settings.gradle +++ b/settings.gradle @@ -93,12 +93,10 @@ include 'third-party:rdrpostagger' include 'third-party:openzim' include 'third-party:count-min-sketch' include 'third-party:monkey-patch-opennlp' -include 'third-party:monkey-patch-gson' include 'third-party:commons-codec' include 'third-party:parquet-floor' include 'third-party:encyclopedia-marginalia-nu' - dependencyResolutionManagement { repositories { diff --git a/third-party/README.md b/third-party/README.md index 9913e658..68ccc4cf 100644 --- a/third-party/README.md +++ b/third-party/README.md @@ -17,5 +17,4 @@ or lack an artifact, or to override some default that is inappropriate for the t * [Count-Min-Sketch](count-min-sketch/) - Apache 2.0 ### Monkey Patched -* [Stanford OpenNLP](monkey-patch-opennlp/) - Apache-2.0 -* [GSON](monkey-patch-gson/) - Apache-2.0 \ No newline at end of file +* [Stanford OpenNLP](monkey-patch-opennlp/) - Apache-2.0 \ No newline at end of file diff --git a/third-party/monkey-patch-gson/build.gradle b/third-party/monkey-patch-gson/build.gradle deleted file mode 100644 index 6ae59c6b..00000000 --- a/third-party/monkey-patch-gson/build.gradle +++ /dev/null @@ -1,17 +0,0 @@ -plugins { - id 'java' -} - -java { - toolchain { - languageVersion.set(JavaLanguageVersion.of(21)) - } -} - -dependencies { - implementation libs.bundles.gson -} - -test { - useJUnitPlatform() -} diff --git a/third-party/monkey-patch-gson/readme.md b/third-party/monkey-patch-gson/readme.md deleted file mode 100644 index 73b478ad..00000000 --- a/third-party/monkey-patch-gson/readme.md +++ /dev/null @@ -1,13 +0,0 @@ -# Monkey Patched GSON - -Stanford OpenNLP - Apache-2.0 - -## Rationale - -GSON makes some assumptions that make it not work very well -for deserializing extremely large JSON objects. This patch -makes the code technically leak memory, but the way it's used -makes this not much of a problem. - -It should only be applied to the converter or possibly -loader processes, not the services. \ No newline at end of file diff --git a/third-party/monkey-patch-gson/src/main/java/com/google/gson/stream/JsonReader.java b/third-party/monkey-patch-gson/src/main/java/com/google/gson/stream/JsonReader.java deleted file mode 100644 index b319bcbd..00000000 --- a/third-party/monkey-patch-gson/src/main/java/com/google/gson/stream/JsonReader.java +++ /dev/null @@ -1,1653 +0,0 @@ -/* - * Copyright (C) 2010 Google Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.google.gson.stream; - -import com.google.gson.internal.JsonReaderInternalAccess; -import com.google.gson.internal.bind.JsonTreeReader; -import java.io.Closeable; -import java.io.EOFException; -import java.io.IOException; -import java.io.Reader; -import java.util.Arrays; - -/** - * Reads a JSON (RFC 7159) - * encoded value as a stream of tokens. This stream includes both literal - * values (strings, numbers, booleans, and nulls) as well as the begin and - * end delimiters of objects and arrays. The tokens are traversed in - * depth-first order, the same order that they appear in the JSON document. - * Within JSON objects, name/value pairs are represented by a single token. - * - *

Parsing JSON

- * To create a recursive descent parser for your own JSON streams, first create - * an entry point method that creates a {@code JsonReader}. - * - *

Next, create handler methods for each structure in your JSON text. You'll - * need a method for each object type and for each array type. - *

- *

When a nested object or array is encountered, delegate to the - * corresponding handler method. - * - *

When an unknown name is encountered, strict parsers should fail with an - * exception. Lenient parsers should call {@link #skipValue()} to recursively - * skip the value's nested tokens, which may otherwise conflict. - * - *

If a value may be null, you should first check using {@link #peek()}. - * Null literals can be consumed using either {@link #nextNull()} or {@link - * #skipValue()}. - * - *

Example

- * Suppose we'd like to parse a stream of messages such as the following:
 {@code
- * [
- *   {
- *     "id": 912345678901,
- *     "text": "How do I read a JSON stream in Java?",
- *     "geo": null,
- *     "user": {
- *       "name": "json_newb",
- *       "followers_count": 41
- *      }
- *   },
- *   {
- *     "id": 912345678902,
- *     "text": "@json_newb just use JsonReader!",
- *     "geo": [50.454722, -104.606667],
- *     "user": {
- *       "name": "jesse",
- *       "followers_count": 2
- *     }
- *   }
- * ]}
- * This code implements the parser for the above structure:
   {@code
- *
- *   public List readJsonStream(InputStream in) throws IOException {
- *     JsonReader reader = new JsonReader(new InputStreamReader(in, "UTF-8"));
- *     try {
- *       return readMessagesArray(reader);
- *     } finally {
- *       reader.close();
- *     }
- *   }
- *
- *   public List readMessagesArray(JsonReader reader) throws IOException {
- *     List messages = new ArrayList();
- *
- *     reader.beginArray();
- *     while (reader.hasNext()) {
- *       messages.add(readMessage(reader));
- *     }
- *     reader.endArray();
- *     return messages;
- *   }
- *
- *   public Message readMessage(JsonReader reader) throws IOException {
- *     long id = -1;
- *     String text = null;
- *     User user = null;
- *     List geo = null;
- *
- *     reader.beginObject();
- *     while (reader.hasNext()) {
- *       String name = reader.nextName();
- *       if (name.equals("id")) {
- *         id = reader.nextLong();
- *       } else if (name.equals("text")) {
- *         text = reader.nextString();
- *       } else if (name.equals("geo") && reader.peek() != JsonToken.NULL) {
- *         geo = readDoublesArray(reader);
- *       } else if (name.equals("user")) {
- *         user = readUser(reader);
- *       } else {
- *         reader.skipValue();
- *       }
- *     }
- *     reader.endObject();
- *     return new Message(id, text, user, geo);
- *   }
- *
- *   public List readDoublesArray(JsonReader reader) throws IOException {
- *     List doubles = new ArrayList();
- *
- *     reader.beginArray();
- *     while (reader.hasNext()) {
- *       doubles.add(reader.nextDouble());
- *     }
- *     reader.endArray();
- *     return doubles;
- *   }
- *
- *   public User readUser(JsonReader reader) throws IOException {
- *     String username = null;
- *     int followersCount = -1;
- *
- *     reader.beginObject();
- *     while (reader.hasNext()) {
- *       String name = reader.nextName();
- *       if (name.equals("name")) {
- *         username = reader.nextString();
- *       } else if (name.equals("followers_count")) {
- *         followersCount = reader.nextInt();
- *       } else {
- *         reader.skipValue();
- *       }
- *     }
- *     reader.endObject();
- *     return new User(username, followersCount);
- *   }}
- * - *

Number Handling

- * This reader permits numeric values to be read as strings and string values to - * be read as numbers. For example, both elements of the JSON array {@code - * [1, "1"]} may be read using either {@link #nextInt} or {@link #nextString}. - * This behavior is intended to prevent lossy numeric conversions: double is - * JavaScript's only numeric type and very large values like {@code - * 9007199254740993} cannot be represented exactly on that platform. To minimize - * precision loss, extremely large values should be written and read as strings - * in JSON. - * - *

Non-Execute Prefix

- * Web servers that serve private data using JSON may be vulnerable to Cross-site - * request forgery attacks. In such an attack, a malicious site gains access - * to a private JSON file by executing it with an HTML {@code