CatgirlIntelligenceAgency/run/reconvert.sh
2023-03-04 14:50:08 +01:00

86 lines
1.7 KiB
Bash
Executable File

#!/bin/bash
set -e
## Configuration
SAMPLE_DIR="samples/crawl-${1:-l}/"
CONVERTER_PROCESS_OPTS="
-Xmx16G
-XX:-CompactStrings
-XX:+UseParallelGC
-XX:GCTimeRatio=14
-XX:ParallelGCThreads=15
"
LOADER_PROCESS_OPTS="
-Dsmall-ram=TRUE
-Dlocal-index-path=vol/iw
"
JAVA_OPTS="
-Dcrawl.rootDirRewrite=/crawl:${SAMPLE_DIR}
-Ddb.overrideJdbc=jdbc:mariadb://localhost:3306/WMSA_prod?rewriteBatchedStatements=true
"
## Configuration ends
function download_model {
model=$1
url=$2
if [ ! -f $model ]; then
echo "** Downloading $url"
wget -O $model $url
fi
}
pushd $(dirname $0)
if [ ! -d ${SAMPLE_DIR} ]; then
mkdir -p samples/
cmd/download_model ${SAMPLE_TARBALL} https://downloads.marginalia.nu/${SAMPLE_TARBALL} || rm ${SAMPLE_TARBALL}
if [ ! -f ${SAMPLE_TARBALL} ]; then
echo "!! Failed"
exit 255
fi
mkdir -p samples/crawl-${SAMPLE_NAME}
if [ ! -f $SAMPLE_DIR/plan.yaml ]; then
echo "Uncompressing"
tar zxf ${SAMPLE_TARBALL} --strip-components=1 -C ${SAMPLE_DIR}
fi
fi
## Wipe the old index data
rm -f ${SAMPLE_DIR}/process/process.log
rm -f vol/iw/dictionary.dat
rm -f vol/iw/index.dat
## Upgrade the tools
rm -rf install/loader-process install/converter-process
tar xf ../crawl/loading-process/build/distributions/loader-process.tar -C install/
tar xf ../crawl/converting-process/build/distributions/converter-process.tar -C install/
PATH+=":install/converter-process/bin"
PATH+=":install/loader-process/bin"
export WMSA_HOME=.
export PATH
export JAVA_OPTS
export CONVERTER_PROCESS_OPTS
export LOADER_PROCESS_OPTS
converter-process ${SAMPLE_DIR}/plan.yaml
loader-process ${SAMPLE_DIR}/plan.yaml
mv vol/iw/index.dat vol/iw/0/page-index.dat
popd