(sideloader) Fix integration problems with sideloaders
In encyclopedia, add a class "mw-content-text" that the WikiSpecialization class is looking for during pruning to give the articles a more fair treatment. Also add generator keywords based on the generator type provided, to ensure that these documents show up in appropriate filters. Further, add a new document flag value 'Sideloaded' to be able to distinguish these entries.
This commit is contained in:
parent
5ab2a22e88
commit
bcad6492d6
@ -8,7 +8,7 @@ public enum DocumentFlags {
|
|||||||
GeneratorDocs,
|
GeneratorDocs,
|
||||||
GeneratorForum,
|
GeneratorForum,
|
||||||
GeneratorWiki,
|
GeneratorWiki,
|
||||||
Unused6,
|
Sideloaded,
|
||||||
Unused7,
|
Unused7,
|
||||||
Unused8,
|
Unused8,
|
||||||
;
|
;
|
||||||
|
@ -63,6 +63,11 @@ public class SideloaderProcessing {
|
|||||||
for (String keyword : extraKeywords)
|
for (String keyword : extraKeywords)
|
||||||
ret.words.add(keyword, WordFlags.Subjects.asBit());
|
ret.words.add(keyword, WordFlags.Subjects.asBit());
|
||||||
|
|
||||||
|
if (type == GeneratorType.WIKI)
|
||||||
|
ret.words.add("generator:wiki", WordFlags.Subjects.asBit());
|
||||||
|
else if (type == GeneratorType.DOCS)
|
||||||
|
ret.words.add("generator:docs", WordFlags.Subjects.asBit());
|
||||||
|
|
||||||
ret.details = details.details();
|
ret.details = details.details();
|
||||||
|
|
||||||
// Add a few things that we know about the document
|
// Add a few things that we know about the document
|
||||||
@ -80,8 +85,8 @@ public class SideloaderProcessing {
|
|||||||
PubDate.toYearByte(ret.details.pubYear),
|
PubDate.toYearByte(ret.details.pubYear),
|
||||||
(int) -ret.details.quality,
|
(int) -ret.details.quality,
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case WIKI -> EnumSet.of(DocumentFlags.GeneratorWiki);
|
case WIKI -> EnumSet.of(DocumentFlags.GeneratorWiki, DocumentFlags.Sideloaded);
|
||||||
case DOCS -> EnumSet.of(DocumentFlags.GeneratorDocs);
|
case DOCS -> EnumSet.of(DocumentFlags.GeneratorDocs, DocumentFlags.Sideloaded);
|
||||||
default -> EnumSet.noneOf(DocumentFlags.class);
|
default -> EnumSet.noneOf(DocumentFlags.class);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -110,13 +110,18 @@ public class EncyclopediaMarginaliaNuSideloader implements SideloadSource, AutoC
|
|||||||
String fullUrl = baseUrl.toString() + url;
|
String fullUrl = baseUrl.toString() + url;
|
||||||
|
|
||||||
StringBuilder fullHtml = new StringBuilder();
|
StringBuilder fullHtml = new StringBuilder();
|
||||||
fullHtml.append("<!DOCTYPE html><html><head><title>").append(title).append("</title></head><body>");
|
fullHtml
|
||||||
|
.append("<!DOCTYPE html><html><head><title>")
|
||||||
|
.append(title)
|
||||||
|
.append("</title></head><body>")
|
||||||
|
.append("<div class=\"mw-content-text\">");
|
||||||
|
|
||||||
for (String part : parts) {
|
for (String part : parts) {
|
||||||
fullHtml.append("<p>");
|
fullHtml.append("<p>");
|
||||||
fullHtml.append(part);
|
fullHtml.append(part);
|
||||||
fullHtml.append("</p>");
|
fullHtml.append("</p>");
|
||||||
}
|
}
|
||||||
fullHtml.append("</body></html>");
|
fullHtml.append("</div></body></html>");
|
||||||
|
|
||||||
var doc = sideloaderProcessing
|
var doc = sideloaderProcessing
|
||||||
.processDocument(fullUrl,
|
.processDocument(fullUrl,
|
||||||
|
Loading…
Reference in New Issue
Block a user