(converter) Add upper 128KB limit to how much HTML we'll parse

This commit is contained in:
Viktor Lofgren 2024-01-03 23:14:03 +01:00
parent f7560cb1d8
commit 60361f88ed

View File

@ -107,6 +107,10 @@ public class HtmlDocumentProcessorPlugin extends AbstractDocumentProcessorPlugin
throw new DisqualifiedException(DisqualificationReason.LANGUAGE);
}
if (documentBody.length() > 128_000) { // 128kb
documentBody = documentBody.substring(0, 128_000);
}
Document doc = Jsoup.parse(documentBody);
if (!metaRobotsTag.allowIndexingByMetaTag(doc)) {