(sideload) Fix sideloading of explicitly selected stackexchange files

Fix a bug where sideloading stackexchange files by explicitly selecting the 7z file would fail, since the 7z file would be passed along to the converter rather than the path to the pre-converted .db file.
This commit is contained in:
Viktor Lofgren 2024-02-17 13:24:04 +01:00
parent a175b36382
commit ebbe49d17b
2 changed files with 28 additions and 9 deletions

View File

@ -198,13 +198,16 @@ public class ConvertActor extends RecordActorPrototype {
// Convert stackexchange data to sqlite database // Convert stackexchange data to sqlite database
// (we can't use a Predigest- step here because the conversion is too complicated) // (we can't use a Predigest- step here because the conversion is too complicated)
StackExchangeSideloadHelper.convertStackexchangeData(sourcePath); var preprocessedPath = StackExchangeSideloadHelper.convertStackexchangeData(sourcePath);
if (preprocessedPath.isEmpty())
yield new Error("Failed to convert stackexchange 7z file to sqlite database");
// Pre-send convert request // Pre-send convert request
yield new ConvertWait( yield new ConvertWait(
processedArea.id(), processedArea.id(),
mqConverterOutbox.sendAsync(ConvertRequest.forStackexchange(sourcePath, processedArea.id())) mqConverterOutbox.sendAsync(ConvertRequest.forStackexchange(preprocessedPath.get(), processedArea.id()))
); );
} }
case ConvertWait(FileStorageId destFid, long msgId) -> { case ConvertWait(FileStorageId destFid, long msgId) -> {

View File

@ -19,33 +19,46 @@ public class StackExchangeSideloadHelper {
* The function is idempotent, so it is safe to call it multiple times on the same path * The function is idempotent, so it is safe to call it multiple times on the same path
* (it will not re-convert files that have already been successfully converted) * (it will not re-convert files that have already been successfully converted)
* */ * */
public static void convertStackexchangeData(Path sourcePath) { public static Optional<Path> convertStackexchangeData(Path sourcePath) {
if (Files.isDirectory(sourcePath)) { if (Files.isDirectory(sourcePath)) {
try (var contents = Files.list(sourcePath)) { try (var contents = Files.list(sourcePath)) {
contents.filter(Files::isRegularFile) contents.filter(Files::isRegularFile)
.parallel() .parallel()
.forEach(StackExchangeSideloadHelper::convertSingleStackexchangeFile); .forEach(StackExchangeSideloadHelper::convertSingleStackexchangeFile);
// If we process a directory, then the converter step will find the .db files automatically
return Optional.of(sourcePath);
} catch (IOException ex) { } catch (IOException ex) {
logger.warn("Failed to convert stackexchange 7z file to sqlite database", ex); logger.warn("Failed to convert stackexchange 7z file to sqlite database", ex);
} }
} else if (Files.isRegularFile(sourcePath)) { } else if (Files.isRegularFile(sourcePath)) {
convertSingleStackexchangeFile(sourcePath); // If we process a single file, then we need to alter the input path to the converted file's name
return convertSingleStackexchangeFile(sourcePath);
} }
return Optional.empty();
} }
private static void convertSingleStackexchangeFile(Path sourcePath) { /** Converts a single stackexchange 7z file to a sqlite database.
* The function is idempotent, so it is safe to call it multiple times on the same file
* (it will not re-convert files that have already been successfully converted)
*
* @return The path to the converted sqlite database, or an empty optional if the conversion failed
* */
private static Optional<Path> convertSingleStackexchangeFile(Path sourcePath) {
String fileName = sourcePath.toFile().getName(); String fileName = sourcePath.toFile().getName();
if (fileName.endsWith(".db")) return; if (fileName.endsWith(".db")) return Optional.of(sourcePath);
if (!fileName.endsWith(".7z")) return; if (!fileName.endsWith(".7z")) return Optional.empty();
Optional<String> domain = getStackexchangeDomainFromFilename(fileName); Optional<String> domain = getStackexchangeDomainFromFilename(fileName);
if (domain.isEmpty()) if (domain.isEmpty())
return; return Optional.empty();
try { try {
Path destPath = getStackexchangeDbPath(sourcePath); Path destPath = getStackexchangeDbPath(sourcePath);
if (Files.exists(destPath)) return; if (Files.exists(destPath))
return Optional.of(destPath);
Path tempFile = Files.createTempFile(destPath.getParent(), "processed", "db.tmp"); Path tempFile = Files.createTempFile(destPath.getParent(), "processed", "db.tmp");
try { try {
@ -53,6 +66,8 @@ public class StackExchangeSideloadHelper {
StackExchangePostsDb.create(domain.get(), tempFile, sourcePath); StackExchangePostsDb.create(domain.get(), tempFile, sourcePath);
logger.info("Finished converting stackexchange 7z file {} to sqlite database", sourcePath); logger.info("Finished converting stackexchange 7z file {} to sqlite database", sourcePath);
Files.move(tempFile, destPath, StandardCopyOption.REPLACE_EXISTING); Files.move(tempFile, destPath, StandardCopyOption.REPLACE_EXISTING);
return Optional.of(destPath);
} catch (Exception e) { } catch (Exception e) {
logger.error("Failed to convert stackexchange 7z file to sqlite database", e); logger.error("Failed to convert stackexchange 7z file to sqlite database", e);
Files.deleteIfExists(tempFile); Files.deleteIfExists(tempFile);
@ -61,6 +76,7 @@ public class StackExchangeSideloadHelper {
} catch (IOException ex) { } catch (IOException ex) {
logger.warn("Failed to convert stackexchange 7z file to sqlite database", ex); logger.warn("Failed to convert stackexchange 7z file to sqlite database", ex);
} }
return Optional.empty();
} }
private static Path getStackexchangeDbPath(Path sourcePath) throws IOException { private static Path getStackexchangeDbPath(Path sourcePath) throws IOException {