plugins { id 'java' id "io.freefair.lombok" version "5.3.3.3" id "me.champeau.jmh" version "0.6.6" id "de.undercouch.download" version "5.1.0" } repositories { mavenLocal() maven { url "https://artifactory.cronapp.io/public-release/" } maven { url "https://repo1.maven.org/maven2/" } maven { url "https://www2.ph.ed.ac.uk/maven2/" } maven { url "https://jitpack.io/" } exclusiveContent { forRepository { maven { url = uri("https://jitpack.io") } } filter { // Only use JitPack for the `gson-record-type-adapter-factory` library includeModule("com.github.Marcono1234", "gson-record-type-adapter-factory") } } } sourceSets { e2eTest { java { java { compileClasspath += main.output + test.output runtimeClasspath += main.output + test.output srcDir file('src/e2e/java') } resources.srcDir file('src/e2e/resources') } } } java { toolchain { languageVersion.set(JavaLanguageVersion.of(17)) } } dependencies { implementation project(':third_party') implementation 'org.projectlombok:lombok:1.18.24' annotationProcessor 'org.projectlombok:lombok:1.18.24' implementation 'com.github.jknack:handlebars:4.3.0' implementation 'com.github.jknack:handlebars-markdown:4.2.1' implementation group: 'com.google.code.gson', name: 'gson', version: '2.9.0' implementation 'io.reactivex.rxjava3:rxjava:3.1.4' implementation "com.sparkjava:spark-core:2.9.3" implementation 'com.opencsv:opencsv:5.6' implementation group: 'org.apache.logging.log4j', name: 'log4j-api', version: '2.17.2' implementation group: 'org.apache.logging.log4j', name: 'log4j-core', version: '2.17.2' implementation group: 'org.apache.logging.log4j', name: 'log4j-slf4j-impl', version: '2.17.2' implementation group: 'org.apache.logging.log4j', name: 'log4j-api', version: '2.17.2' implementation group: 'org.apache.logging.log4j', name: 'log4j-core', version: '2.17.2' implementation group: 'org.apache.logging.log4j', name: 'log4j-slf4j-impl', version: '2.17.2' implementation 'org.slf4j:slf4j-api:1.7.36' implementation 'com.google.guava:guava:31.1-jre' implementation 'com.google.inject:guice:5.1.0' implementation 'com.github.jnr:jnr-ffi:2.2.12' implementation 'org.apache.httpcomponents:httpcore:4.4.15' implementation 'org.apache.httpcomponents:httpclient:4.5.13' implementation 'com.github.ThatJavaNerd:JRAW:1.1.0' implementation group: 'com.h2database', name: 'h2', version: '2.1.210' implementation 'org.jsoup:jsoup:1.14.3' implementation group: 'com.github.crawler-commons', name: 'crawler-commons', version: '1.2' implementation 'org.mariadb.jdbc:mariadb-java-client:3.0.4' implementation group: 'net.sf.trove4j', name: 'trove4j', version: '3.0.3' implementation 'com.zaxxer:HikariCP:5.0.1' implementation 'org.apache.opennlp:opennlp-tools:1.9.4' implementation 'io.prometheus:simpleclient:0.15.0' implementation 'io.prometheus:simpleclient_servlet:0.15.0' implementation 'io.prometheus:simpleclient_httpserver:0.15.0' implementation 'io.prometheus:simpleclient_hotspot:0.15.0' implementation 'com.fasterxml.jackson.core:jackson-databind:2.13.3' implementation group: 'org.yaml', name: 'snakeyaml', version: '1.30' implementation 'com.syncthemall:boilerpipe:1.2.2' implementation 'com.github.luben:zstd-jni:1.5.2-2' implementation 'com.github.vladimir-bukhtoyarov:bucket4j-core:7.5.0' implementation 'de.rototor.jeuclid:jeuclid-core:3.1.14' implementation 'org.imgscalr:imgscalr-lib:4.2' implementation 'org.jclarion:image4j:0.7' implementation 'commons-net:commons-net:3.6' implementation 'org.eclipse.jgit:org.eclipse.jgit:5.12.0.202106070339-r' implementation 'org.eclipse.jgit:org.eclipse.jgit.ssh.jsch:5.12.0.202106070339-r' implementation 'com.jcraft:jsch:0.1.55' implementation group: 'org.apache.commons', name: 'commons-compress', version: '1.21' implementation 'edu.stanford.nlp:stanford-corenlp:4.4.0' implementation group: 'it.unimi.dsi', name: 'fastutil', version: '8.5.8' implementation 'org.roaringbitmap:RoaringBitmap:0.9.27' implementation group: 'mysql', name: 'mysql-connector-java', version: '8.0.29' implementation 'com.github.Marcono1234:gson-record-type-adapter-factory:0.2.0' testImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.2' testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine' testCompileOnly 'org.projectlombok:lombok:1.18.24' testImplementation 'org.projectlombok:lombok:1.18.24' testAnnotationProcessor 'org.projectlombok:lombok:1.18.24' testImplementation group: 'org.mockito', name: 'mockito-core', version: '4.5.1' testImplementation platform('org.testcontainers:testcontainers-bom:1.17.2') testImplementation 'org.testcontainers:mariadb:1.17.2' testImplementation "org.testcontainers:junit-jupiter:1.17.2" e2eTestImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.2' e2eTestRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine' e2eTestImplementation 'org.projectlombok:lombok:1.18.24' e2eTestAnnotationProcessor 'org.projectlombok:lombok:1.18.24' e2eTestImplementation 'org.testcontainers:nginx:1.17.3' e2eTestImplementation "org.testcontainers:junit-jupiter:1.17.2" e2eTestImplementation 'org.testcontainers:selenium:1.17.3' e2eTestImplementation 'org.seleniumhq.selenium:selenium-remote-driver:4.2.1' e2eTestImplementation 'org.seleniumhq.selenium:selenium-chrome-driver:4.2.1' implementation 'org.seleniumhq.selenium:selenium-chrome-driver:4.1.4' implementation 'org.seleniumhq.selenium:selenium-java:4.3.0' implementation 'org.sejda.imageio:webp-imageio:0.1.6' } configurations { e2eTestImplementation.extendsFrom(testImplementation) } test { maxParallelForks = 16 forkEvery = 1 maxHeapSize = "8G" useJUnitPlatform { excludeTags "db" } } task dbTest(type: Test) { maxParallelForks = 1 forkEvery = 1 maxHeapSize = "8G" useJUnitPlatform { includeTags "db" } } task e2eTest(type: Test) { maxParallelForks = 1 forkEvery = 1 maxHeapSize = "8G" dependsOn ':shadowJar' dependsOn 'downloadTestData' dependsOn 'downloadRDRModelData' dependsOn 'downloadSentenceModelData' dependsOn 'downloadTokenModelData' dependsOn 'downloadTermFreqData' dependsOn 'IP2LocationFile' classpath = sourceSets.e2eTest.runtimeClasspath testClassesDirs = sourceSets.e2eTest.output.classesDirs useJUnitPlatform { includeTags "e2e" } } task downloadTestData(type: Download) { src 'http://hammurabi.acc.umu.se/mirror/kiwix.org/zim/wikipedia/wikipedia_en_100_nopic_2022-05.zim' dest file('data/test/wikipedia_en_100_nopic.zim') overwrite false } task downloadRDRModelData(type: Download) { src (['https://raw.githubusercontent.com/datquocnguyen/RDRPOSTagger/master/Models/POS/English.DICT', 'https://raw.githubusercontent.com/datquocnguyen/RDRPOSTagger/master/Models/POS/English.RDR']) dest file('data/models/') overwrite false } task downloadSentenceModelData(type: Download) { src 'https://dlcdn.apache.org/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin' dest file('data/models/opennlp-sentence.bin') overwrite false } task downloadTokenModelData(type: Download) { src 'https://dlcdn.apache.org/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin' dest file('data/models/opennlp-tokens.bin') overwrite false } task downloadIP2LocationFile(type: Download) { src 'https://download.ip2location.com/lite/IP2LOCATION-LITE-DB1.CSV.ZIP' dest file('data/models/IP2LOCATION-LITE-DB1.CSV.ZIP') overwrite false } task IP2LocationFile(type: Copy) { dependsOn 'downloadIP2LocationFile' def zipFile = file('data/models/IP2LOCATION-LITE-DB1.CSV.ZIP') def outputDir = file("data/models/IP2LOC") from zipTree(zipFile) into outputDir } task downloadTermFreqData(type: Copy) { // TODO: Need hosting for this file from '/var/lib/wmsa/model/tfreq-new-algo3.bin' into 'data/models/' }