Specialization for javadocs
This commit is contained in:
parent
24dce8c03b
commit
42375f0e53
@ -13,15 +13,19 @@ public class HtmlProcessorSpecializations {
|
||||
private final LemmySpecialization lemmySpecialization;
|
||||
private final XenForoSpecialization xenforoSpecialization;
|
||||
private final PhpBBSpecialization phpBBSpecialization;
|
||||
private final JavadocSpecialization javadocSpecialization;
|
||||
private final DefaultSpecialization defaultSpecialization;
|
||||
|
||||
@Inject
|
||||
public HtmlProcessorSpecializations(LemmySpecialization lemmySpecialization,
|
||||
XenForoSpecialization xenforoSpecialization,
|
||||
PhpBBSpecialization phpBBSpecialization, DefaultSpecialization defaultSpecialization) {
|
||||
PhpBBSpecialization phpBBSpecialization,
|
||||
JavadocSpecialization javadocSpecialization,
|
||||
DefaultSpecialization defaultSpecialization) {
|
||||
this.lemmySpecialization = lemmySpecialization;
|
||||
this.xenforoSpecialization = xenforoSpecialization;
|
||||
this.phpBBSpecialization = phpBBSpecialization;
|
||||
this.javadocSpecialization = javadocSpecialization;
|
||||
this.defaultSpecialization = defaultSpecialization;
|
||||
}
|
||||
|
||||
@ -36,6 +40,10 @@ public class HtmlProcessorSpecializations {
|
||||
if (generator.keywords().contains("phpbb")) {
|
||||
return xenforoSpecialization;
|
||||
}
|
||||
if (generator.keywords().contains("javadoc")) {
|
||||
return javadocSpecialization;
|
||||
}
|
||||
|
||||
return defaultSpecialization;
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,40 @@
|
||||
package nu.marginalia.converting.processor.plugin.specialization;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.summary.SummaryExtractor;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
@Singleton
|
||||
public class JavadocSpecialization extends DefaultSpecialization {
|
||||
private static final Logger logger = LoggerFactory.getLogger(JavadocSpecialization.class);
|
||||
|
||||
@Inject
|
||||
public JavadocSpecialization(SummaryExtractor summaryExtractor) {
|
||||
super(summaryExtractor);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document prune(Document doc) {
|
||||
final var prunedDoc = super.prune(doc);
|
||||
|
||||
prunedDoc.getElementsByTag("noscript").remove();
|
||||
|
||||
return prunedDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSummary(Document doc,
|
||||
Set<String> importantWords) {
|
||||
var block = doc.getElementsByClass("block").first();
|
||||
|
||||
if (block != null)
|
||||
return block.text();
|
||||
|
||||
return super.getSummary(doc, importantWords);
|
||||
}
|
||||
}
|
@ -0,0 +1,48 @@
|
||||
package nu.marginalia.converting.processor.plugin.specialization;
|
||||
|
||||
import nu.marginalia.converting.processor.logic.DocumentGeneratorExtractor;
|
||||
import nu.marginalia.summary.SummaryExtractor;
|
||||
import nu.marginalia.test.CommonTestData;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
class JavadocSpecializationTest {
|
||||
|
||||
static JavadocSpecialization specialization;
|
||||
static DocumentGeneratorExtractor generatorExtractor = new DocumentGeneratorExtractor();
|
||||
|
||||
String thread = CommonTestData.loadTestData("mock-crawl-data/javadoc/stream.html");
|
||||
|
||||
@BeforeAll
|
||||
public static void setUpAll() {
|
||||
specialization = new JavadocSpecialization(
|
||||
new SummaryExtractor(255,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null));
|
||||
}
|
||||
|
||||
@Test
|
||||
void prune() {
|
||||
System.out.println(specialization.prune(Jsoup.parse(thread)));
|
||||
}
|
||||
|
||||
@Test
|
||||
void generatorExtraction() {
|
||||
var gen = generatorExtractor.generatorCleaned(Jsoup.parse(thread));
|
||||
|
||||
System.out.println(gen);
|
||||
}
|
||||
|
||||
@Test
|
||||
void getSummary() {
|
||||
String summary = specialization.getSummary(Jsoup.parse(thread), Set.of(""));
|
||||
|
||||
System.out.println(summary);
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user