2023-03-04 13:19:01 +01:00
|
|
|
#!/bin/bash
|
|
|
|
|
|
|
|
set -e
|
|
|
|
|
2023-03-04 14:52:03 +01:00
|
|
|
SAMPLE_NAME=crawl-${1:-l}
|
|
|
|
SAMPLE_DIR="samples/${SAMPLE_NAME}/"
|
2023-03-04 13:19:01 +01:00
|
|
|
|
2023-03-04 14:52:03 +01:00
|
|
|
## Configuration
|
2023-03-04 13:19:01 +01:00
|
|
|
|
|
|
|
CONVERTER_PROCESS_OPTS="
|
|
|
|
-Xmx16G
|
|
|
|
-XX:-CompactStrings
|
|
|
|
-XX:+UseParallelGC
|
|
|
|
-XX:GCTimeRatio=14
|
|
|
|
-XX:ParallelGCThreads=15
|
|
|
|
"
|
|
|
|
|
|
|
|
LOADER_PROCESS_OPTS="
|
|
|
|
-Dsmall-ram=TRUE
|
|
|
|
-Dlocal-index-path=vol/iw
|
|
|
|
"
|
|
|
|
|
|
|
|
JAVA_OPTS="
|
|
|
|
-Dcrawl.rootDirRewrite=/crawl:${SAMPLE_DIR}
|
|
|
|
-Ddb.overrideJdbc=jdbc:mariadb://localhost:3306/WMSA_prod?rewriteBatchedStatements=true
|
|
|
|
"
|
|
|
|
|
|
|
|
## Configuration ends
|
|
|
|
|
2023-03-04 14:50:08 +01:00
|
|
|
function download_model {
|
|
|
|
model=$1
|
|
|
|
url=$2
|
|
|
|
|
|
|
|
if [ ! -f $model ]; then
|
|
|
|
echo "** Downloading $url"
|
|
|
|
wget -O $model $url
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
2023-03-04 13:19:01 +01:00
|
|
|
pushd $(dirname $0)
|
|
|
|
|
2023-03-04 14:50:08 +01:00
|
|
|
if [ ! -d ${SAMPLE_DIR} ]; then
|
|
|
|
mkdir -p samples/
|
|
|
|
|
2023-03-04 14:52:44 +01:00
|
|
|
SAMPLE_TARBALL=samples/${SAMPLE_NAME}.tar.gz
|
2023-03-04 14:50:35 +01:00
|
|
|
download_model ${SAMPLE_TARBALL} https://downloads.marginalia.nu/${SAMPLE_TARBALL} || rm ${SAMPLE_TARBALL}
|
2023-03-04 14:50:08 +01:00
|
|
|
|
|
|
|
if [ ! -f ${SAMPLE_TARBALL} ]; then
|
|
|
|
echo "!! Failed"
|
|
|
|
exit 255
|
|
|
|
fi
|
|
|
|
|
2023-03-04 14:52:44 +01:00
|
|
|
mkdir -p samples/${SAMPLE_NAME}
|
2023-03-04 14:50:08 +01:00
|
|
|
if [ ! -f $SAMPLE_DIR/plan.yaml ]; then
|
|
|
|
echo "Uncompressing"
|
|
|
|
tar zxf ${SAMPLE_TARBALL} --strip-components=1 -C ${SAMPLE_DIR}
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
|
2023-03-04 13:19:01 +01:00
|
|
|
## Wipe the old index data
|
|
|
|
|
|
|
|
rm -f ${SAMPLE_DIR}/process/process.log
|
|
|
|
rm -f vol/iw/dictionary.dat
|
|
|
|
rm -f vol/iw/index.dat
|
|
|
|
|
|
|
|
## Upgrade the tools
|
|
|
|
|
|
|
|
rm -rf install/loader-process install/converter-process
|
|
|
|
tar xf ../crawl/loading-process/build/distributions/loader-process.tar -C install/
|
|
|
|
tar xf ../crawl/converting-process/build/distributions/converter-process.tar -C install/
|
|
|
|
|
|
|
|
PATH+=":install/converter-process/bin"
|
|
|
|
PATH+=":install/loader-process/bin"
|
|
|
|
|
|
|
|
export WMSA_HOME=.
|
|
|
|
export PATH
|
|
|
|
|
|
|
|
export JAVA_OPTS
|
|
|
|
export CONVERTER_PROCESS_OPTS
|
|
|
|
export LOADER_PROCESS_OPTS
|
|
|
|
|
|
|
|
converter-process ${SAMPLE_DIR}/plan.yaml
|
|
|
|
loader-process ${SAMPLE_DIR}/plan.yaml
|
|
|
|
|
|
|
|
mv vol/iw/index.dat vol/iw/0/page-index.dat
|
|
|
|
|
|
|
|
popd
|