(converter) Add upper 128KB limit to how much HTML we'll parse

This commit is contained in:
Viktor Lofgren 2024-01-03 23:14:03 +01:00
parent 33c2188c87
commit fdec565b34

View File

@ -107,6 +107,10 @@ public class HtmlDocumentProcessorPlugin extends AbstractDocumentProcessorPlugin
throw new DisqualifiedException(DisqualificationReason.LANGUAGE);
}
if (documentBody.length() > 128_000) { // 128kb
documentBody = documentBody.substring(0, 128_000);
}
Document doc = Jsoup.parse(documentBody);
if (!metaRobotsTag.allowIndexingByMetaTag(doc)) {