CatgirlIntelligenceAgency/run/reconvert.sh

88 lines
1.7 KiB
Bash
Raw Normal View History

2023-03-04 13:19:01 +01:00
#!/bin/bash
set -e
2023-03-04 14:52:03 +01:00
SAMPLE_NAME=crawl-${1:-l}
SAMPLE_DIR="samples/${SAMPLE_NAME}/"
2023-03-04 13:19:01 +01:00
2023-03-04 14:52:03 +01:00
## Configuration
2023-03-04 13:19:01 +01:00
CONVERTER_PROCESS_OPTS="
-Xmx16G
-XX:-CompactStrings
-XX:+UseParallelGC
-XX:GCTimeRatio=14
-XX:ParallelGCThreads=15
"
LOADER_PROCESS_OPTS="
-Dsmall-ram=TRUE
-Dlocal-index-path=vol/iw
"
JAVA_OPTS="
-Dcrawl.rootDirRewrite=/crawl:${SAMPLE_DIR}
-Ddb.overrideJdbc=jdbc:mariadb://localhost:3306/WMSA_prod?rewriteBatchedStatements=true
"
## Configuration ends
2023-03-04 14:50:08 +01:00
function download_model {
model=$1
url=$2
if [ ! -f $model ]; then
echo "** Downloading $url"
wget -O $model $url
fi
}
2023-03-04 13:19:01 +01:00
pushd $(dirname $0)
2023-03-04 14:50:08 +01:00
if [ ! -d ${SAMPLE_DIR} ]; then
mkdir -p samples/
2023-03-04 14:52:44 +01:00
SAMPLE_TARBALL=samples/${SAMPLE_NAME}.tar.gz
2023-03-04 14:50:35 +01:00
download_model ${SAMPLE_TARBALL} https://downloads.marginalia.nu/${SAMPLE_TARBALL} || rm ${SAMPLE_TARBALL}
2023-03-04 14:50:08 +01:00
if [ ! -f ${SAMPLE_TARBALL} ]; then
echo "!! Failed"
exit 255
fi
2023-03-04 14:52:44 +01:00
mkdir -p samples/${SAMPLE_NAME}
2023-03-04 14:50:08 +01:00
if [ ! -f $SAMPLE_DIR/plan.yaml ]; then
echo "Uncompressing"
tar zxf ${SAMPLE_TARBALL} --strip-components=1 -C ${SAMPLE_DIR}
fi
fi
2023-03-04 13:19:01 +01:00
## Wipe the old index data
rm -f ${SAMPLE_DIR}/process/process.log
rm -f vol/iw/dictionary.dat
rm -f vol/iw/index.dat
## Upgrade the tools
rm -rf install/loader-process install/converter-process
tar xf ../crawl/loading-process/build/distributions/loader-process.tar -C install/
tar xf ../crawl/converting-process/build/distributions/converter-process.tar -C install/
PATH+=":install/converter-process/bin"
PATH+=":install/loader-process/bin"
export WMSA_HOME=.
export PATH
export JAVA_OPTS
export CONVERTER_PROCESS_OPTS
export LOADER_PROCESS_OPTS
converter-process ${SAMPLE_DIR}/plan.yaml
loader-process ${SAMPLE_DIR}/plan.yaml
mv vol/iw/index.dat vol/iw/0/page-index.dat
popd