From 9ce67029ca5c21bc85e4cdf6ba6084d564612ae2 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Wed, 31 Jan 2024 13:02:16 +0100 Subject: [PATCH] (language-processing) Add a system property for configuring which language detection model to use The flag is `system.languageDetectionModelVersion`. * If negative, no model is used. * If 0, both models are used. * If 1, the old crappy model is used. * If 2, the new fasttext model is used. --- .../language/filter/LanguageFilter.java | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/code/libraries/language-processing/src/main/java/nu/marginalia/language/filter/LanguageFilter.java b/code/libraries/language-processing/src/main/java/nu/marginalia/language/filter/LanguageFilter.java index bf817735..9049eb4d 100644 --- a/code/libraries/language-processing/src/main/java/nu/marginalia/language/filter/LanguageFilter.java +++ b/code/libraries/language-processing/src/main/java/nu/marginalia/language/filter/LanguageFilter.java @@ -20,16 +20,28 @@ public class LanguageFilter { private static final Logger logger = LoggerFactory.getLogger(LanguageFilter.class); + private static final int LANGUAGE_DETECTION_MODEL_VERSION = + Integer.getInteger("system.languageDetectionModelVersion", 0); private final LanguagePredictionModel languagePredictionModel1; private final LanguagePredictionModel languagePredictionModel2; /** Returns the probability the language is in English */ public double dictionaryAgreement(DocumentLanguageData dld) { - if (languagePredictionModel1.predictEnglish(dld) < 0.1) - return 0; + if(LANGUAGE_DETECTION_MODEL_VERSION == -1) return 1.0; + + if (LANGUAGE_DETECTION_MODEL_VERSION == 1) { + return languagePredictionModel2.predictEnglish(dld); + } + else if (LANGUAGE_DETECTION_MODEL_VERSION == 2) { + return languagePredictionModel1.predictEnglish(dld); + } + else { // default is to run both models + if (languagePredictionModel1.predictEnglish(dld) < 0.1) + return 0; + return languagePredictionModel2.predictEnglish(dld); + } - return languagePredictionModel2.predictEnglish(dld); } @Inject