From dbe974f5103db9bdd270e31c64c1f6321991e64b Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Mon, 11 Sep 2023 09:02:58 +0200 Subject: [PATCH] (parquet) Use ZSTD compression by default. --- settings.gradle | 7 +++++-- third-party/parquet-floor/build.gradle | 5 ++++- .../main/java/blue/strategic/parquet/ParquetWriter.java | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/settings.gradle b/settings.gradle index 18c86ddd..884160c9 100644 --- a/settings.gradle +++ b/settings.gradle @@ -116,6 +116,7 @@ dependencyResolutionManagement { library('spark', 'com.sparkjava', 'spark-core').version('2.9.4') library('guice', 'com.google.inject', 'guice').version('7.0.0') library('guava', 'com.google.guava', 'guava').version('32.0.1-jre') + library('protobuf', 'com.google.protobuf', 'protobuf-java').version('3.0.0') library('rxjava', 'io.reactivex.rxjava3', 'rxjava').version('3.1.6') @@ -160,7 +161,6 @@ dependencyResolutionManagement { library('opencsv','com.opencsv','opencsv').version('5.6') library('bucket4j','com.github.vladimir-bukhtoyarov','bucket4j-core').version('7.5.0') - library('protobuf','com.google.protobuf','protobuf-java').version('3.0.0') library('gson','com.google.code.gson','gson').version('2.10.1') library('gson-type-adapter','com.github.Marcono1234','gson-record-type-adapter-factory').version('0.2.0') @@ -182,6 +182,9 @@ dependencyResolutionManagement { library('sqlite','org.xerial','sqlite-jdbc').version('3.41.2.1') + library('parquet-column', 'org.apache.parquet','parquet-column').version('1.13.1') + library('parquet-hadoop', 'org.apache.parquet','parquet-hadoop').version('1.13.1') + bundle('slf4j', ['slf4j.api', 'log4j.api', 'log4j.core', 'log4j.slf4j']) bundle('slf4j.test', ['slf4j.jdk14']) bundle('prometheus', ['prometheus', 'prometheus-servlet', 'prometheus-server', 'prometheus-hotspot']) @@ -192,7 +195,7 @@ dependencyResolutionManagement { bundle('gson', ['gson', 'gson-type-adapter']) bundle('httpcomponents', ['httpcomponents.core', 'httpcomponents.client']) - + bundle('parquet', ['parquet-column', 'parquet-hadoop']) bundle('junit', ['junit.jupiter', 'junit.jupiter.engine']) } diff --git a/third-party/parquet-floor/build.gradle b/third-party/parquet-floor/build.gradle index d286c43d..f8393044 100644 --- a/third-party/parquet-floor/build.gradle +++ b/third-party/parquet-floor/build.gradle @@ -9,9 +9,12 @@ java { } dependencies { - implementation 'org.apache.parquet:parquet-column:1.13.1' + implementation ('org.apache.parquet:parquet-column:1.13.1') { + transitive = true + } implementation('org.apache.parquet:parquet-hadoop:1.13.1') { exclude group: 'commons-pool', module: 'commons-pool' + transitive = true } } diff --git a/third-party/parquet-floor/src/main/java/blue/strategic/parquet/ParquetWriter.java b/third-party/parquet-floor/src/main/java/blue/strategic/parquet/ParquetWriter.java index 7d75b057..68d4ba76 100644 --- a/third-party/parquet-floor/src/main/java/blue/strategic/parquet/ParquetWriter.java +++ b/third-party/parquet-floor/src/main/java/blue/strategic/parquet/ParquetWriter.java @@ -62,7 +62,7 @@ public final class ParquetWriter implements Closeable { this.writer = new Builder(outputFile) .withType(schema) .withDehydrator(dehydrator) - .withCompressionCodec(CompressionCodecName.SNAPPY) + .withCompressionCodec(CompressionCodecName.ZSTD) .withWriterVersion(ParquetProperties.WriterVersion.PARQUET_2_0) .build(); }