From 321fa94b8f6cbf0c9aacea4afdd9b4e08d8ab9fd Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Wed, 17 Jan 2024 21:14:21 +0100 Subject: [PATCH] (crawler) Fix rare exception in content type handling due to improper length checking of a split() array --- .../nu/marginalia/contenttype/ContentTypeParser.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/code/features-crawl/content-type/src/main/java/nu/marginalia/contenttype/ContentTypeParser.java b/code/features-crawl/content-type/src/main/java/nu/marginalia/contenttype/ContentTypeParser.java index 5b794246..7b05bda0 100644 --- a/code/features-crawl/content-type/src/main/java/nu/marginalia/contenttype/ContentTypeParser.java +++ b/code/features-crawl/content-type/src/main/java/nu/marginalia/contenttype/ContentTypeParser.java @@ -1,6 +1,7 @@ package nu.marginalia.contenttype; import crawlercommons.mimetypes.MimeTypeDetector; +import org.apache.commons.lang3.StringUtils; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import org.jsoup.Jsoup; @@ -35,10 +36,12 @@ public class ContentTypeParser { if (contentType == null) return Optional.empty(); - if (!contentType.contains(";")) - return Optional.empty(); + var parts = StringUtils.split(contentType, ';'); + + if (parts.length != 2) { + return Optional.empty(); + } - var parts = contentType.split(";"); var content = parts[0].trim(); var extra = parts[1].trim();