(converter) Add upper 128KB limit to how much HTML we'll parse
This commit is contained in:
parent
f7560cb1d8
commit
60361f88ed
@ -107,6 +107,10 @@ public class HtmlDocumentProcessorPlugin extends AbstractDocumentProcessorPlugin
|
||||
throw new DisqualifiedException(DisqualificationReason.LANGUAGE);
|
||||
}
|
||||
|
||||
if (documentBody.length() > 128_000) { // 128kb
|
||||
documentBody = documentBody.substring(0, 128_000);
|
||||
}
|
||||
|
||||
Document doc = Jsoup.parse(documentBody);
|
||||
|
||||
if (!metaRobotsTag.allowIndexingByMetaTag(doc)) {
|
||||
|
Loading…
Reference in New Issue
Block a user