CatgirlIntelligenceAgency/code/process-models/crawling-model/build.gradle
Viktor Lofgren 787a20cbaa (crawling-model) Implement a parquet format for crawl data
This is not hooked into anything yet.  The change also makes modifications to the parquet-floor library to support reading and writing of byte[] arrays.  This is desirable since we may in the future want to support inputs that are not text-based, and codifying the assumption that each document is a string will definitely cause us grief down the line.
2023-12-13 16:22:19 +01:00

44 lines
1.1 KiB
Groovy

plugins {
id 'java'
id 'jvm-test-suite'
}
java {
toolchain {
languageVersion.set(JavaLanguageVersion.of(21))
}
}
dependencies {
implementation project(':code:common:model')
implementation project(':code:common:db')
implementation project(':code:common:process')
implementation project(':code:libraries:big-string')
implementation project(':code:api:index-api')
implementation project(':code:common:service-discovery')
implementation project(':code:common:service-client')
implementation project(':code:features-crawl:content-type')
implementation project(':code:libraries:language-processing')
implementation project(':third-party:parquet-floor')
implementation libs.bundles.slf4j
implementation libs.notnull
implementation libs.bundles.parquet
implementation libs.jwarc
implementation libs.gson
implementation libs.commons.io
implementation libs.okhttp3
implementation libs.jsoup
implementation libs.snakeyaml
implementation libs.zstd
testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit
testImplementation libs.mockito
}