(language-processing) Add a system property for configuring which language detection model to use
The flag is `system.languageDetectionModelVersion`. * If negative, no model is used. * If 0, both models are used. * If 1, the old crappy model is used. * If 2, the new fasttext model is used.
This commit is contained in:
parent
98f3382cea
commit
9ce67029ca
@ -20,16 +20,28 @@ public class LanguageFilter {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(LanguageFilter.class);
|
||||
|
||||
private static final int LANGUAGE_DETECTION_MODEL_VERSION =
|
||||
Integer.getInteger("system.languageDetectionModelVersion", 0);
|
||||
private final LanguagePredictionModel languagePredictionModel1;
|
||||
private final LanguagePredictionModel languagePredictionModel2;
|
||||
|
||||
|
||||
/** Returns the probability the language is in English */
|
||||
public double dictionaryAgreement(DocumentLanguageData dld) {
|
||||
if (languagePredictionModel1.predictEnglish(dld) < 0.1)
|
||||
return 0;
|
||||
if(LANGUAGE_DETECTION_MODEL_VERSION == -1) return 1.0;
|
||||
|
||||
if (LANGUAGE_DETECTION_MODEL_VERSION == 1) {
|
||||
return languagePredictionModel2.predictEnglish(dld);
|
||||
}
|
||||
else if (LANGUAGE_DETECTION_MODEL_VERSION == 2) {
|
||||
return languagePredictionModel1.predictEnglish(dld);
|
||||
}
|
||||
else { // default is to run both models
|
||||
if (languagePredictionModel1.predictEnglish(dld) < 0.1)
|
||||
return 0;
|
||||
return languagePredictionModel2.predictEnglish(dld);
|
||||
}
|
||||
|
||||
return languagePredictionModel2.predictEnglish(dld);
|
||||
}
|
||||
|
||||
@Inject
|
||||
|
Loading…
Reference in New Issue
Block a user