2023-03-04 14:35:50 +01:00
|
|
|
#!/bin/bash
|
|
|
|
|
2023-04-02 14:44:43 +02:00
|
|
|
# This script will perform a first-time setup of the run/ directory, as well as
|
|
|
|
# download third party language models and other files that aren't suitable for
|
|
|
|
# git
|
|
|
|
|
2023-03-04 14:35:50 +01:00
|
|
|
set -e
|
|
|
|
|
2023-03-04 14:45:35 +01:00
|
|
|
function download_model {
|
|
|
|
model=$1
|
|
|
|
url=$2
|
|
|
|
|
|
|
|
if [ ! -f $model ]; then
|
|
|
|
echo "** Downloading $url"
|
2023-10-02 16:38:23 +02:00
|
|
|
curl -s -o $model $url
|
2023-03-04 14:45:35 +01:00
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
2023-03-04 14:35:50 +01:00
|
|
|
pushd $(dirname $0)
|
|
|
|
|
2023-10-16 17:37:26 +02:00
|
|
|
mkdir -p model logs db install data samples
|
|
|
|
mkdir -p {node-1,node-2}/{work,index,backup,samples/export}
|
2023-03-04 14:42:24 +01:00
|
|
|
|
2023-03-04 14:45:35 +01:00
|
|
|
download_model model/English.DICT https://raw.githubusercontent.com/datquocnguyen/RDRPOSTagger/master/Models/POS/English.DICT
|
|
|
|
download_model model/English.RDR https://raw.githubusercontent.com/datquocnguyen/RDRPOSTagger/master/Models/POS/English.RDR
|
|
|
|
download_model model/opennlp-sentence.bin https://mirrors.estointernet.in/apache/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin
|
|
|
|
download_model model/opennlp-tokens.bin https://mirrors.estointernet.in/apache/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin
|
|
|
|
download_model model/ngrams.bin https://downloads.marginalia.nu/model/ngrams.bin
|
|
|
|
download_model model/tfreq-new-algo3.bin https://downloads.marginalia.nu/model/tfreq-new-algo3.bin
|
2023-10-03 10:29:44 +02:00
|
|
|
download_model model/lid.176.ftz https://downloads.marginalia.nu/model/lid.176.ftz
|
2023-03-05 13:47:40 +01:00
|
|
|
|
2023-03-05 14:12:13 +01:00
|
|
|
download_model data/IP2LOCATION-LITE-DB1.CSV.ZIP https://download.ip2location.com/lite/IP2LOCATION-LITE-DB1.CSV.ZIP
|
|
|
|
unzip -qn -d data data/IP2LOCATION-LITE-DB1.CSV.ZIP
|
2023-03-05 13:47:40 +01:00
|
|
|
|
2023-03-22 15:11:22 +01:00
|
|
|
download_model data/adblock.txt https://downloads.marginalia.nu/data/adblock.txt
|
|
|
|
if [ ! -f data/suggestions.txt ]; then
|
|
|
|
download_model data/suggestions.txt.gz https://downloads.marginalia.nu/data/suggestions.txt.gz
|
|
|
|
gunzip data/suggestions.txt.gz
|
|
|
|
fi
|
|
|
|
|
2023-03-05 14:12:13 +01:00
|
|
|
if [ ! -d conf ]; then
|
|
|
|
cp -r template/conf .
|
|
|
|
fi
|
2023-03-04 14:42:24 +01:00
|
|
|
|
2023-03-04 14:35:50 +01:00
|
|
|
popd
|