(crawler) Clean up stale warc files
We should probably have an option to keep them, but not by default!
This commit is contained in:
parent
88551043cd
commit
25d086c4e1
@ -254,13 +254,14 @@ public class CrawlerMain {
|
|||||||
logger.info("Fetched {}", domain);
|
logger.info("Fetched {}", domain);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.error("Error fetching domain " + domain, e);
|
logger.error("Error fetching domain " + domain, e);
|
||||||
Files.deleteIfExists(newWarcFile);
|
|
||||||
Files.deleteIfExists(tempFile);
|
|
||||||
}
|
}
|
||||||
finally {
|
finally {
|
||||||
// We don't need to double-count these; it's also kept int he workLog
|
// We don't need to double-count these; it's also kept int he workLog
|
||||||
processingIds.remove(domain);
|
processingIds.remove(domain);
|
||||||
Thread.currentThread().setName("[idle]");
|
Thread.currentThread().setName("[idle]");
|
||||||
|
|
||||||
|
Files.deleteIfExists(newWarcFile);
|
||||||
|
Files.deleteIfExists(tempFile);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user