2022-05-19 17:45:26 +02:00
|
|
|
plugins {
|
|
|
|
id 'java'
|
|
|
|
id "io.freefair.lombok" version "5.3.3.3"
|
|
|
|
|
|
|
|
id "me.champeau.jmh" version "0.6.6"
|
2022-05-25 18:02:19 +02:00
|
|
|
id "de.undercouch.download" version "5.1.0"
|
2022-05-19 17:45:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
repositories {
|
|
|
|
mavenLocal()
|
|
|
|
maven { url "https://artifactory.cronapp.io/public-release/" }
|
|
|
|
maven { url "https://repo1.maven.org/maven2/" }
|
|
|
|
maven { url "https://www2.ph.ed.ac.uk/maven2/" }
|
|
|
|
maven { url "https://jitpack.io/" }
|
|
|
|
exclusiveContent {
|
|
|
|
forRepository {
|
|
|
|
maven {
|
|
|
|
url = uri("https://jitpack.io")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
filter {
|
|
|
|
// Only use JitPack for the `gson-record-type-adapter-factory` library
|
|
|
|
includeModule("com.github.Marcono1234", "gson-record-type-adapter-factory")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-25 18:02:19 +02:00
|
|
|
sourceSets {
|
|
|
|
e2eTest {
|
|
|
|
java {
|
|
|
|
java {
|
|
|
|
compileClasspath += main.output + test.output
|
|
|
|
runtimeClasspath += main.output + test.output
|
|
|
|
srcDir file('src/e2e/java')
|
|
|
|
}
|
|
|
|
resources.srcDir file('src/e2e/resources')
|
|
|
|
}
|
|
|
|
}
|
2022-08-15 13:58:18 +02:00
|
|
|
jmh {
|
|
|
|
java {
|
|
|
|
java {
|
|
|
|
compileClasspath += main.output + test.output
|
|
|
|
runtimeClasspath += main.output + test.output
|
|
|
|
srcDir file('src/jmh/java')
|
|
|
|
}
|
|
|
|
resources.srcDir file('src/jmh/resources')
|
|
|
|
}
|
|
|
|
}
|
2022-05-25 18:02:19 +02:00
|
|
|
}
|
|
|
|
|
2022-05-21 15:14:47 +02:00
|
|
|
java {
|
|
|
|
toolchain {
|
|
|
|
languageVersion.set(JavaLanguageVersion.of(17))
|
|
|
|
}
|
|
|
|
}
|
2022-08-15 13:58:18 +02:00
|
|
|
jmhJar {
|
|
|
|
zip64 true
|
|
|
|
}
|
2022-05-19 17:45:26 +02:00
|
|
|
dependencies {
|
|
|
|
implementation project(':third_party')
|
2022-08-26 16:08:46 +02:00
|
|
|
implementation project(':protocol')
|
2022-05-19 17:45:26 +02:00
|
|
|
|
2022-05-25 18:02:19 +02:00
|
|
|
implementation 'org.projectlombok:lombok:1.18.24'
|
|
|
|
annotationProcessor 'org.projectlombok:lombok:1.18.24'
|
2022-05-19 17:45:26 +02:00
|
|
|
|
|
|
|
implementation 'com.github.jknack:handlebars:4.3.0'
|
|
|
|
implementation 'com.github.jknack:handlebars-markdown:4.2.1'
|
|
|
|
|
|
|
|
implementation group: 'com.google.code.gson', name: 'gson', version: '2.9.0'
|
|
|
|
implementation 'io.reactivex.rxjava3:rxjava:3.1.4'
|
|
|
|
implementation "com.sparkjava:spark-core:2.9.3"
|
|
|
|
implementation 'com.opencsv:opencsv:5.6'
|
|
|
|
|
2022-06-16 14:09:57 +02:00
|
|
|
implementation group: 'org.apache.logging.log4j', name: 'log4j-api', version: '2.17.2'
|
|
|
|
implementation group: 'org.apache.logging.log4j', name: 'log4j-core', version: '2.17.2'
|
|
|
|
implementation group: 'org.apache.logging.log4j', name: 'log4j-slf4j-impl', version: '2.17.2'
|
|
|
|
implementation group: 'org.apache.logging.log4j', name: 'log4j-api', version: '2.17.2'
|
|
|
|
implementation group: 'org.apache.logging.log4j', name: 'log4j-core', version: '2.17.2'
|
|
|
|
implementation group: 'org.apache.logging.log4j', name: 'log4j-slf4j-impl', version: '2.17.2'
|
2022-05-19 17:45:26 +02:00
|
|
|
|
|
|
|
implementation 'org.slf4j:slf4j-api:1.7.36'
|
|
|
|
|
|
|
|
implementation 'com.google.guava:guava:31.1-jre'
|
|
|
|
implementation 'com.google.inject:guice:5.1.0'
|
2022-05-25 18:02:19 +02:00
|
|
|
implementation 'com.github.jnr:jnr-ffi:2.2.12'
|
2022-05-19 17:45:26 +02:00
|
|
|
implementation 'org.apache.httpcomponents:httpcore:4.4.15'
|
|
|
|
implementation 'org.apache.httpcomponents:httpclient:4.5.13'
|
|
|
|
implementation 'com.github.ThatJavaNerd:JRAW:1.1.0'
|
|
|
|
|
|
|
|
implementation group: 'com.h2database', name: 'h2', version: '2.1.210'
|
|
|
|
|
|
|
|
implementation 'org.jsoup:jsoup:1.14.3'
|
|
|
|
implementation group: 'com.github.crawler-commons', name: 'crawler-commons', version: '1.2'
|
|
|
|
|
2022-05-25 18:02:19 +02:00
|
|
|
implementation 'org.mariadb.jdbc:mariadb-java-client:3.0.4'
|
2022-05-19 17:45:26 +02:00
|
|
|
implementation group: 'net.sf.trove4j', name: 'trove4j', version: '3.0.3'
|
|
|
|
|
|
|
|
implementation 'com.zaxxer:HikariCP:5.0.1'
|
|
|
|
|
2022-06-16 14:09:57 +02:00
|
|
|
implementation 'org.apache.opennlp:opennlp-tools:1.9.4'
|
2022-05-19 17:45:26 +02:00
|
|
|
implementation 'io.prometheus:simpleclient:0.15.0'
|
|
|
|
implementation 'io.prometheus:simpleclient_servlet:0.15.0'
|
|
|
|
implementation 'io.prometheus:simpleclient_httpserver:0.15.0'
|
|
|
|
implementation 'io.prometheus:simpleclient_hotspot:0.15.0'
|
2022-05-25 18:02:19 +02:00
|
|
|
implementation 'com.fasterxml.jackson.core:jackson-databind:2.13.3'
|
2022-05-19 17:45:26 +02:00
|
|
|
|
|
|
|
implementation group: 'org.yaml', name: 'snakeyaml', version: '1.30'
|
|
|
|
|
|
|
|
implementation 'com.syncthemall:boilerpipe:1.2.2'
|
|
|
|
implementation 'com.github.luben:zstd-jni:1.5.2-2'
|
2022-05-25 18:02:19 +02:00
|
|
|
implementation 'com.github.vladimir-bukhtoyarov:bucket4j-core:7.5.0'
|
2022-05-19 17:45:26 +02:00
|
|
|
implementation 'de.rototor.jeuclid:jeuclid-core:3.1.14'
|
|
|
|
|
|
|
|
implementation 'org.imgscalr:imgscalr-lib:4.2'
|
|
|
|
implementation 'org.jclarion:image4j:0.7'
|
|
|
|
|
|
|
|
implementation 'commons-net:commons-net:3.6'
|
|
|
|
implementation 'org.eclipse.jgit:org.eclipse.jgit:5.12.0.202106070339-r'
|
|
|
|
implementation 'org.eclipse.jgit:org.eclipse.jgit.ssh.jsch:5.12.0.202106070339-r'
|
|
|
|
implementation 'com.jcraft:jsch:0.1.55'
|
|
|
|
|
|
|
|
implementation group: 'org.apache.commons', name: 'commons-compress', version: '1.21'
|
|
|
|
implementation 'edu.stanford.nlp:stanford-corenlp:4.4.0'
|
|
|
|
|
|
|
|
implementation group: 'it.unimi.dsi', name: 'fastutil', version: '8.5.8'
|
2022-05-25 18:02:19 +02:00
|
|
|
implementation 'org.roaringbitmap:RoaringBitmap:0.9.27'
|
2022-05-19 17:45:26 +02:00
|
|
|
implementation group: 'mysql', name: 'mysql-connector-java', version: '8.0.29'
|
|
|
|
|
|
|
|
implementation 'com.github.Marcono1234:gson-record-type-adapter-factory:0.2.0'
|
2022-05-25 18:02:19 +02:00
|
|
|
|
|
|
|
testImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.2'
|
|
|
|
testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine'
|
|
|
|
testCompileOnly 'org.projectlombok:lombok:1.18.24'
|
|
|
|
testImplementation 'org.projectlombok:lombok:1.18.24'
|
|
|
|
testAnnotationProcessor 'org.projectlombok:lombok:1.18.24'
|
|
|
|
|
2022-06-16 14:09:57 +02:00
|
|
|
testImplementation group: 'org.mockito', name: 'mockito-core', version: '4.5.1'
|
|
|
|
|
|
|
|
testImplementation platform('org.testcontainers:testcontainers-bom:1.17.2')
|
|
|
|
testImplementation 'org.testcontainers:mariadb:1.17.2'
|
|
|
|
testImplementation "org.testcontainers:junit-jupiter:1.17.2"
|
|
|
|
|
2022-05-25 18:02:19 +02:00
|
|
|
e2eTestImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.2'
|
|
|
|
e2eTestRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine'
|
|
|
|
e2eTestImplementation 'org.projectlombok:lombok:1.18.24'
|
2022-06-16 14:09:57 +02:00
|
|
|
e2eTestAnnotationProcessor 'org.projectlombok:lombok:1.18.24'
|
2022-08-04 21:14:17 +02:00
|
|
|
e2eTestImplementation 'org.testcontainers:nginx:1.17.3'
|
2022-06-16 14:09:57 +02:00
|
|
|
e2eTestImplementation "org.testcontainers:junit-jupiter:1.17.2"
|
2022-08-04 21:14:17 +02:00
|
|
|
e2eTestImplementation 'org.testcontainers:selenium:1.17.3'
|
|
|
|
e2eTestImplementation 'org.seleniumhq.selenium:selenium-remote-driver:4.2.1'
|
|
|
|
e2eTestImplementation 'org.seleniumhq.selenium:selenium-chrome-driver:4.2.1'
|
|
|
|
|
|
|
|
|
|
|
|
implementation 'org.seleniumhq.selenium:selenium-chrome-driver:4.1.4'
|
|
|
|
implementation 'org.seleniumhq.selenium:selenium-java:4.3.0'
|
|
|
|
implementation 'org.sejda.imageio:webp-imageio:0.1.6'
|
2022-08-15 13:58:18 +02:00
|
|
|
|
|
|
|
jmh 'org.openjdk.jmh:jmh-core:1.35'
|
|
|
|
jmh 'org.openjdk.jmh:jmh-generator-annprocess:1.35'
|
2022-08-26 16:08:46 +02:00
|
|
|
|
2022-09-13 18:27:32 +02:00
|
|
|
implementation 'net.agkn:hll:1.6.0'
|
|
|
|
|
2022-05-25 18:02:19 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
configurations {
|
|
|
|
e2eTestImplementation.extendsFrom(testImplementation)
|
|
|
|
|
2022-05-19 17:45:26 +02:00
|
|
|
}
|
|
|
|
|
2022-08-04 21:14:17 +02:00
|
|
|
|
2022-05-19 17:45:26 +02:00
|
|
|
test {
|
|
|
|
maxParallelForks = 16
|
|
|
|
forkEvery = 1
|
|
|
|
maxHeapSize = "8G"
|
|
|
|
useJUnitPlatform {
|
|
|
|
excludeTags "db"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
task dbTest(type: Test) {
|
|
|
|
maxParallelForks = 1
|
|
|
|
forkEvery = 1
|
|
|
|
maxHeapSize = "8G"
|
|
|
|
|
|
|
|
useJUnitPlatform {
|
|
|
|
includeTags "db"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-25 18:02:19 +02:00
|
|
|
task e2eTest(type: Test) {
|
|
|
|
maxParallelForks = 1
|
|
|
|
forkEvery = 1
|
|
|
|
maxHeapSize = "8G"
|
|
|
|
dependsOn ':shadowJar'
|
|
|
|
dependsOn 'downloadTestData'
|
|
|
|
dependsOn 'downloadRDRModelData'
|
|
|
|
dependsOn 'downloadSentenceModelData'
|
|
|
|
dependsOn 'downloadTokenModelData'
|
|
|
|
dependsOn 'downloadTermFreqData'
|
2022-05-27 14:27:44 +02:00
|
|
|
dependsOn 'IP2LocationFile'
|
2022-05-25 18:02:19 +02:00
|
|
|
|
|
|
|
classpath = sourceSets.e2eTest.runtimeClasspath
|
|
|
|
testClassesDirs = sourceSets.e2eTest.output.classesDirs
|
|
|
|
useJUnitPlatform {
|
|
|
|
includeTags "e2e"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
task downloadTestData(type: Download) {
|
|
|
|
src 'http://hammurabi.acc.umu.se/mirror/kiwix.org/zim/wikipedia/wikipedia_en_100_nopic_2022-05.zim'
|
|
|
|
dest file('data/test/wikipedia_en_100_nopic.zim')
|
|
|
|
overwrite false
|
|
|
|
}
|
|
|
|
|
|
|
|
task downloadRDRModelData(type: Download) {
|
|
|
|
src (['https://raw.githubusercontent.com/datquocnguyen/RDRPOSTagger/master/Models/POS/English.DICT',
|
|
|
|
'https://raw.githubusercontent.com/datquocnguyen/RDRPOSTagger/master/Models/POS/English.RDR'])
|
|
|
|
dest file('data/models/')
|
|
|
|
overwrite false
|
|
|
|
}
|
|
|
|
|
|
|
|
task downloadSentenceModelData(type: Download) {
|
|
|
|
src 'https://dlcdn.apache.org/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin'
|
|
|
|
dest file('data/models/opennlp-sentence.bin')
|
|
|
|
overwrite false
|
|
|
|
}
|
|
|
|
task downloadTokenModelData(type: Download) {
|
|
|
|
src 'https://dlcdn.apache.org/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin'
|
|
|
|
dest file('data/models/opennlp-tokens.bin')
|
|
|
|
overwrite false
|
|
|
|
}
|
2022-05-27 14:27:44 +02:00
|
|
|
task downloadIP2LocationFile(type: Download) {
|
|
|
|
src 'https://download.ip2location.com/lite/IP2LOCATION-LITE-DB1.CSV.ZIP'
|
|
|
|
dest file('data/models/IP2LOCATION-LITE-DB1.CSV.ZIP')
|
|
|
|
overwrite false
|
|
|
|
}
|
|
|
|
task IP2LocationFile(type: Copy) {
|
|
|
|
dependsOn 'downloadIP2LocationFile'
|
|
|
|
def zipFile = file('data/models/IP2LOCATION-LITE-DB1.CSV.ZIP')
|
|
|
|
def outputDir = file("data/models/IP2LOC")
|
|
|
|
|
|
|
|
from zipTree(zipFile)
|
|
|
|
into outputDir
|
|
|
|
}
|
2022-05-25 18:02:19 +02:00
|
|
|
|
|
|
|
task downloadTermFreqData(type: Copy) {
|
|
|
|
// TODO: Need hosting for this file
|
|
|
|
from '/var/lib/wmsa/model/tfreq-new-algo3.bin'
|
2022-05-27 14:27:44 +02:00
|
|
|
into 'data/models/'
|
2022-05-25 18:02:19 +02:00
|
|
|
}
|
2022-05-19 17:45:26 +02:00
|
|
|
|