(language-processing) Add a system property for configuring which language detection model to use

The flag is `system.languageDetectionModelVersion`.

* If negative, no model is used.
* If 0, both models are used.
* If 1, the old crappy model is used.
* If 2, the new fasttext model is used.
This commit is contained in:
Viktor Lofgren 2024-01-31 13:02:16 +01:00
parent 98f3382cea
commit 9ce67029ca

View File

@ -20,16 +20,28 @@ public class LanguageFilter {
private static final Logger logger = LoggerFactory.getLogger(LanguageFilter.class);
private static final int LANGUAGE_DETECTION_MODEL_VERSION =
Integer.getInteger("system.languageDetectionModelVersion", 0);
private final LanguagePredictionModel languagePredictionModel1;
private final LanguagePredictionModel languagePredictionModel2;
/** Returns the probability the language is in English */
public double dictionaryAgreement(DocumentLanguageData dld) {
if (languagePredictionModel1.predictEnglish(dld) < 0.1)
return 0;
if(LANGUAGE_DETECTION_MODEL_VERSION == -1) return 1.0;
if (LANGUAGE_DETECTION_MODEL_VERSION == 1) {
return languagePredictionModel2.predictEnglish(dld);
}
else if (LANGUAGE_DETECTION_MODEL_VERSION == 2) {
return languagePredictionModel1.predictEnglish(dld);
}
else { // default is to run both models
if (languagePredictionModel1.predictEnglish(dld) < 0.1)
return 0;
return languagePredictionModel2.predictEnglish(dld);
}
return languagePredictionModel2.predictEnglish(dld);
}
@Inject