(crawler) Fix rare exception in content type handling due to improper length checking of a split() array

This commit is contained in:
Viktor Lofgren 2024-01-17 21:14:21 +01:00
parent ca80957143
commit 321fa94b8f

View File

@ -1,6 +1,7 @@
package nu.marginalia.contenttype; package nu.marginalia.contenttype;
import crawlercommons.mimetypes.MimeTypeDetector; import crawlercommons.mimetypes.MimeTypeDetector;
import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable; import org.jetbrains.annotations.Nullable;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
@ -35,10 +36,12 @@ public class ContentTypeParser {
if (contentType == null) if (contentType == null)
return Optional.empty(); return Optional.empty();
if (!contentType.contains(";")) var parts = StringUtils.split(contentType, ';');
return Optional.empty();
if (parts.length != 2) {
return Optional.empty();
}
var parts = contentType.split(";");
var content = parts[0].trim(); var content = parts[0].trim();
var extra = parts[1].trim(); var extra = parts[1].trim();