(sideloader) Fix integration problems with sideloaders

In encyclopedia, add a class "mw-content-text" that the WikiSpecialization class is looking for during pruning to give the articles a more fair treatment.

Also add generator keywords based on the generator type provided, to ensure that these documents show up in appropriate filters.

Further, add a new document flag value 'Sideloaded' to be able to distinguish these entries.
This commit is contained in:
Viktor Lofgren 2023-12-17 13:28:17 +01:00
parent 5ab2a22e88
commit bcad6492d6
3 changed files with 15 additions and 5 deletions

View File

@ -8,7 +8,7 @@ public enum DocumentFlags {
GeneratorDocs,
GeneratorForum,
GeneratorWiki,
Unused6,
Sideloaded,
Unused7,
Unused8,
;

View File

@ -63,6 +63,11 @@ public class SideloaderProcessing {
for (String keyword : extraKeywords)
ret.words.add(keyword, WordFlags.Subjects.asBit());
if (type == GeneratorType.WIKI)
ret.words.add("generator:wiki", WordFlags.Subjects.asBit());
else if (type == GeneratorType.DOCS)
ret.words.add("generator:docs", WordFlags.Subjects.asBit());
ret.details = details.details();
// Add a few things that we know about the document
@ -80,8 +85,8 @@ public class SideloaderProcessing {
PubDate.toYearByte(ret.details.pubYear),
(int) -ret.details.quality,
switch (type) {
case WIKI -> EnumSet.of(DocumentFlags.GeneratorWiki);
case DOCS -> EnumSet.of(DocumentFlags.GeneratorDocs);
case WIKI -> EnumSet.of(DocumentFlags.GeneratorWiki, DocumentFlags.Sideloaded);
case DOCS -> EnumSet.of(DocumentFlags.GeneratorDocs, DocumentFlags.Sideloaded);
default -> EnumSet.noneOf(DocumentFlags.class);
});

View File

@ -110,13 +110,18 @@ public class EncyclopediaMarginaliaNuSideloader implements SideloadSource, AutoC
String fullUrl = baseUrl.toString() + url;
StringBuilder fullHtml = new StringBuilder();
fullHtml.append("<!DOCTYPE html><html><head><title>").append(title).append("</title></head><body>");
fullHtml
.append("<!DOCTYPE html><html><head><title>")
.append(title)
.append("</title></head><body>")
.append("<div class=\"mw-content-text\">");
for (String part : parts) {
fullHtml.append("<p>");
fullHtml.append(part);
fullHtml.append("</p>");
}
fullHtml.append("</body></html>");
fullHtml.append("</div></body></html>");
var doc = sideloaderProcessing
.processDocument(fullUrl,