diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcRecorder.java b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcRecorder.java index e31585ef..1bd640ca 100644 --- a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcRecorder.java +++ b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/warc/WarcRecorder.java @@ -203,7 +203,14 @@ public class WarcRecorder implements AutoCloseable { WarcDigestBuilder responseDigestBuilder = new WarcDigestBuilder(); WarcDigestBuilder payloadDigestBuilder = new WarcDigestBuilder(); - byte[] bytes = documentBody.getBytes(); + byte[] bytes; + + if (documentBody == null) { + bytes = new byte[0]; + } + else { + bytes = documentBody.getBytes(); + } String fakeHeaders = STR.""" Content-Type: \{contentType} diff --git a/code/processes/crawling-process/src/test/java/nu/marginalia/crawl/retreival/fetcher/WarcRecorderTest.java b/code/processes/crawling-process/src/test/java/nu/marginalia/crawl/retreival/fetcher/WarcRecorderTest.java index cdc10bd2..4c533b13 100644 --- a/code/processes/crawling-process/src/test/java/nu/marginalia/crawl/retreival/fetcher/WarcRecorderTest.java +++ b/code/processes/crawling-process/src/test/java/nu/marginalia/crawl/retreival/fetcher/WarcRecorderTest.java @@ -91,6 +91,18 @@ class WarcRecorderTest { } } + @Test + public void flagAsSkippedNullBody() throws IOException, URISyntaxException { + + try (var recorder = new WarcRecorder(fileNameWarc)) { + recorder.flagAsSkipped(new EdgeUrl("https://www.marginalia.nu/"), + "text/html", + 200, + null); + } + + } + @Test public void testSaveImport() throws URISyntaxException, IOException { try (var recorder = new WarcRecorder(fileNameWarc)) {