Remove unrelated code, break tools into their own directory.
This commit is contained in:
parent
449471a076
commit
2eb972dea1
@ -3,9 +3,7 @@ package nu.marginalia.model.idx;
|
||||
import java.util.EnumSet;
|
||||
|
||||
public enum DocumentFlags {
|
||||
/** Simple processing was done, this document should be de-prioritized as a search result */
|
||||
Simple,
|
||||
|
||||
UnusedBit1,
|
||||
PlainText,
|
||||
UnusedBit2,
|
||||
UnusedBit3,
|
||||
|
@ -67,7 +67,7 @@ public class DocumentKeywordExtractor {
|
||||
|
||||
String flatWord = AsciiFlattener.flattenUnicode(word.word);
|
||||
|
||||
if (WordPatterns.hasWordQualities(flatWord)) {
|
||||
if (!flatWord.isBlank()) {
|
||||
wordsBuilder.add(flatWord, metadata.getMetadataForWord(word.stemmed));
|
||||
}
|
||||
}
|
||||
|
@ -220,7 +220,7 @@ public class KeywordExtractor {
|
||||
}
|
||||
String word = sentence.constructWordFromSpan(w);
|
||||
|
||||
if (word.isBlank() || !WordPatterns.filter(word)) return false;
|
||||
if (word.isBlank() || !WordPatterns.isNotJunkWord(word)) return false;
|
||||
if (sentence.posTags[w.start].equals("CC")) return false;
|
||||
if (sentence.posTags[w.end-1].equals("IN")) return false;
|
||||
if (sentence.posTags[w.end-1].equals("DT")) return false;
|
||||
|
@ -12,11 +12,6 @@ public class IndexJournalEntryBuilder {
|
||||
this.documentMeta = documentMeta;
|
||||
}
|
||||
|
||||
public IndexJournalEntryBuilder capacity(int size) {
|
||||
items.ensureCapacity(size);
|
||||
return this;
|
||||
}
|
||||
|
||||
public IndexJournalEntryBuilder add(long wordId, long metadata) {
|
||||
|
||||
items.add(wordId);
|
||||
|
@ -13,12 +13,13 @@ java {
|
||||
}
|
||||
dependencies {
|
||||
implementation project(':code:libraries:language-processing')
|
||||
implementation project(':code:libraries:ngram-bloom-filter')
|
||||
implementation project(':code:libraries:term-frequency-dict')
|
||||
implementation project(':code:features-convert:keyword-extraction')
|
||||
implementation project(':code:common:config')
|
||||
implementation project(':code:common:model')
|
||||
|
||||
implementation project(':third-party:porterstemmer')
|
||||
|
||||
implementation libs.lombok
|
||||
annotationProcessor libs.lombok
|
||||
implementation libs.bundles.slf4j
|
||||
@ -26,6 +27,7 @@ dependencies {
|
||||
|
||||
implementation libs.bundles.handlebars
|
||||
implementation libs.trove
|
||||
implementation libs.guice
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.language.statistics;
|
||||
package nu.marginalia.language;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.ngram_bloom_filter;
|
||||
package nu.marginalia.ngrams;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.ngram_bloom_filter;
|
||||
package nu.marginalia.ngrams;
|
||||
|
||||
import ca.rmen.porterstemmer.PorterStemmer;
|
||||
import com.google.common.hash.HashFunction;
|
@ -6,9 +6,9 @@ import lombok.Getter;
|
||||
import lombok.ToString;
|
||||
import nu.marginalia.LanguageModels;
|
||||
import nu.marginalia.keyword.KeywordExtractor;
|
||||
import nu.marginalia.language.statistics.EnglishDictionary;
|
||||
import nu.marginalia.language.EnglishDictionary;
|
||||
import nu.marginalia.language.sentence.SentenceExtractor;
|
||||
import nu.marginalia.ngram_bloom_filter.NGramBloomFilter;
|
||||
import nu.marginalia.ngrams.NGramBloomFilter;
|
||||
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
||||
import nu.marginalia.language.model.DocumentSentence;
|
||||
import nu.marginalia.language.model.WordSpan;
|
||||
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.ngram_bloom_filter;
|
||||
package nu.marginalia.ngrams;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
@ -1,8 +1,8 @@
|
||||
package nu.marginalia.query_parser;
|
||||
|
||||
import nu.marginalia.LanguageModels;
|
||||
import nu.marginalia.language.statistics.EnglishDictionary;
|
||||
import nu.marginalia.ngram_bloom_filter.NGramBloomFilter;
|
||||
import nu.marginalia.language.EnglishDictionary;
|
||||
import nu.marginalia.ngrams.NGramBloomFilter;
|
||||
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
||||
import nu.marginalia.query_parser.token.TokenType;
|
||||
import nu.marginalia.util.TestLanguageModels;
|
||||
|
@ -1,8 +1,8 @@
|
||||
package nu.marginalia.query_parser;
|
||||
|
||||
import nu.marginalia.LanguageModels;
|
||||
import nu.marginalia.language.statistics.EnglishDictionary;
|
||||
import nu.marginalia.ngram_bloom_filter.NGramBloomFilter;
|
||||
import nu.marginalia.language.EnglishDictionary;
|
||||
import nu.marginalia.ngrams.NGramBloomFilter;
|
||||
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
||||
import nu.marginalia.util.TestLanguageModels;
|
||||
import nu.marginalia.language.sentence.SentenceExtractor;
|
||||
|
@ -41,7 +41,7 @@ class TermCoherenceFactorTest {
|
||||
assertEquals(0, termCoherenceFactor.calculate(allPositionsSet));
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test @SuppressWarnings("unchecked")
|
||||
public void testLowPosMatches() {
|
||||
var allPositionsSet = createSet(
|
||||
List.of(0, 1, 2, 3), List.of(0, 1, 2, 3)
|
||||
@ -53,7 +53,7 @@ class TermCoherenceFactorTest {
|
||||
assertEquals(1.0, termCoherenceFactor.bitPositionFactor(mask), 0.01);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test @SuppressWarnings("unchecked")
|
||||
public void testHiPosMatches() {
|
||||
var allPositionsSet = createSet(
|
||||
List.of(28, 29, 30, 31), List.of(28, 29, 30, 31)
|
||||
|
@ -8,8 +8,6 @@ import java.io.InputStreamReader;
|
||||
import java.util.HashSet;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/** Regular expression patterns for deciding which words are eligible to be keywords.
|
||||
* <p/>
|
||||
@ -44,25 +42,17 @@ public class WordPatterns {
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean hasMoreThanTwo(String s, char c, int max) {
|
||||
int idx = 0;
|
||||
for (int i = 0; i <= max; i++) {
|
||||
idx = s.indexOf(c, idx+1);
|
||||
if (idx < 0 || idx >= s.length() - 1)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
public static boolean filter(String word) {
|
||||
/** Run checks on the word and exclude terms with too many special characters
|
||||
*/
|
||||
public static boolean isNotJunkWord(String word) {
|
||||
if (word.isBlank()) {
|
||||
return false;
|
||||
}
|
||||
if (hasMoreThanTwo(word, '-', 4)) {
|
||||
if (hasMoreThanN(word, '-', 4)) {
|
||||
return false;
|
||||
}
|
||||
if (hasMoreThanTwo(word, '+', 2)) {
|
||||
if (hasMoreThanN(word, '+', 2)) {
|
||||
return false;
|
||||
}
|
||||
if (word.startsWith("-")
|
||||
@ -83,29 +73,13 @@ public class WordPatterns {
|
||||
return true;
|
||||
}
|
||||
|
||||
public static boolean hasWordQualities(String s) {
|
||||
if (s.isBlank())
|
||||
return false;
|
||||
|
||||
int start = 0;
|
||||
int end = s.length();
|
||||
if (s.charAt(0) == '#') start++;
|
||||
if (end > 1 && s.charAt(end-1) == '#') end--;
|
||||
|
||||
for (int i = start; i < end; i++) {
|
||||
char c = s.charAt(i);
|
||||
if (("_@.'+-".indexOf(c) < 0)
|
||||
&& !(c >= 'a' && c <= 'z')
|
||||
&& !(c >= 'A' && c <= 'Z')
|
||||
&& !(c >= '0' && c <= '9')
|
||||
&& !(c >= '\u00C0' && c <= '\u00D6')
|
||||
&& !(c >= '\u00D8' && c <= '\u00f6')
|
||||
&& !(c >= '\u00f8' && c <= '\u00ff'))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
private static boolean hasMoreThanN(String s, char c, int max) {
|
||||
int idx = 0;
|
||||
for (int i = 0; i <= max; i++) {
|
||||
idx = s.indexOf(c, idx+1);
|
||||
if (idx < 0 || idx >= s.length() - 1)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -113,10 +87,8 @@ public class WordPatterns {
|
||||
if (s.length() < MIN_WORD_LENGTH) {
|
||||
return true;
|
||||
}
|
||||
if (!hasWordQualities(s)) {
|
||||
return true;
|
||||
}
|
||||
if (!filter(s)) {
|
||||
|
||||
if (!isNotJunkWord(s)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1,42 +0,0 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id "io.freefair.lombok" version "5.3.3.3"
|
||||
|
||||
id "de.undercouch.download" version "5.1.0"
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(17))
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation project(':code:common:config')
|
||||
implementation project(':third-party:porterstemmer')
|
||||
|
||||
implementation libs.lombok
|
||||
annotationProcessor libs.lombok
|
||||
implementation libs.bundles.slf4j
|
||||
implementation libs.notnull
|
||||
|
||||
implementation libs.bundles.nlp
|
||||
implementation libs.guice
|
||||
implementation libs.trove
|
||||
implementation libs.fastutil
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
}
|
||||
|
||||
|
||||
test {
|
||||
useJUnitPlatform()
|
||||
}
|
||||
|
||||
task fastTests(type: Test) {
|
||||
useJUnitPlatform {
|
||||
excludeTags "slow"
|
||||
}
|
||||
}
|
@ -21,6 +21,7 @@ dependencies {
|
||||
implementation project(':code:common:model')
|
||||
implementation project(':code:common:config')
|
||||
implementation project(':code:libraries:easy-lsh')
|
||||
implementation project(':code:libraries:array')
|
||||
|
||||
implementation libs.lombok
|
||||
annotationProcessor libs.lombok
|
||||
|
12
code/libraries/term-frequency-dict/readme.md
Normal file
12
code/libraries/term-frequency-dict/readme.md
Normal file
@ -0,0 +1,12 @@
|
||||
# Term Frequency Dictionary
|
||||
|
||||
This dictionary is used by various parts of the system to evaluate for example
|
||||
the TF-IDF score of a keyword.
|
||||
|
||||
## Central Classes
|
||||
|
||||
* [TermFrequencyDict](src/main/java/nu/marginalia/term_frequency_dict/TermFrequencyDict.java)
|
||||
|
||||
## See Also
|
||||
|
||||
* [tools/term-frequency-extractor](../../tools/term-frequency-extractor) constructs this file
|
@ -1,8 +1,10 @@
|
||||
package nu.marginalia.term_frequency_dict;
|
||||
|
||||
import ca.rmen.porterstemmer.PorterStemmer;
|
||||
import gnu.trove.map.hash.TLongIntHashMap;
|
||||
import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.LanguageModels;
|
||||
import nu.marginalia.array.LongArray;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.slf4j.Logger;
|
||||
@ -14,39 +16,45 @@ import java.io.*;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
/** Dictionary with term frequency information for (stemmed) words.
|
||||
*
|
||||
*/
|
||||
@Singleton
|
||||
public class TermFrequencyDict {
|
||||
private final TLongIntHashMap wordRates = new TLongIntHashMap(1_000_000, 0.5f, 0, 0);
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
private final Long2IntOpenHashMap wordRates;
|
||||
private static final Logger logger = LoggerFactory.getLogger(TermFrequencyDict.class);
|
||||
private static final PorterStemmer ps = new PorterStemmer();
|
||||
|
||||
private static final long DOC_COUNT_KEY = ~0L;
|
||||
public static final long DOC_COUNT_KEY = ~0L;
|
||||
|
||||
@Inject
|
||||
public TermFrequencyDict(@NotNull LanguageModels models) {
|
||||
this(models.termFrequencies);
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
public TermFrequencyDict(Path file) {
|
||||
try (var frequencyData = new DataInputStream(new BufferedInputStream(new FileInputStream(file.toFile())))) {
|
||||
wordRates.ensureCapacity((int)(Files.size(file)/16));
|
||||
|
||||
for (;;) {
|
||||
wordRates.put(frequencyData.readLong(), (int) frequencyData.readLong());
|
||||
}
|
||||
} catch (EOFException eof) {
|
||||
// ok
|
||||
} catch (IOException e) {
|
||||
logger.error("IO Exception reading " + file, e);
|
||||
}
|
||||
|
||||
wordRates = load(file);
|
||||
logger.info("Read {} N-grams frequencies", wordRates.size());
|
||||
}
|
||||
|
||||
public TermFrequencyDict(TLongIntHashMap data) {
|
||||
wordRates.putAll(data);
|
||||
private static Long2IntOpenHashMap load(Path file) throws IOException {
|
||||
LongArray array = LongArray.mmapRead(file);
|
||||
|
||||
int size = (int) Files.size(file)/16;
|
||||
var ret = new Long2IntOpenHashMap(size, 0.5f);
|
||||
|
||||
ret.defaultReturnValue(0);
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
ret.put(array.get(2*i), (int) array.get(2*i + 1));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** Total number of documents in the corpus */
|
||||
public int docCount() {
|
||||
int cnt = wordRates.get(DOC_COUNT_KEY);
|
||||
|
||||
@ -56,91 +64,20 @@ public class TermFrequencyDict {
|
||||
return cnt;
|
||||
}
|
||||
|
||||
// WIP refactoring, this needs a new home:
|
||||
//
|
||||
// public static void main(String... args) throws IOException, InterruptedException {
|
||||
// if (args.length != 2) {
|
||||
// System.err.println("Expected arguments: plan.yaml out-file");
|
||||
// }
|
||||
// String outFile = args[1];
|
||||
//
|
||||
// var plan = new CrawlPlanLoader().load(Path.of(args[0]));
|
||||
//
|
||||
// ThreadLocal<SentenceExtractor> se = ThreadLocal.withInitial(() -> new SentenceExtractor(WmsaHome.getLanguageModels()));
|
||||
// LanguageFilter lf = new LanguageFilter();
|
||||
//
|
||||
// TLongIntHashMap counts = new TLongIntHashMap(100_000_000, 0.7f, -1, -1);
|
||||
//
|
||||
// ForkJoinPool fjp = new ForkJoinPool(24);
|
||||
// AtomicInteger docCount = new AtomicInteger();
|
||||
//
|
||||
// for (var domain : plan.domainsIterable()) { // leaks file descriptor, is fine
|
||||
//
|
||||
// if (domain.doc == null)
|
||||
// continue;
|
||||
//
|
||||
// fjp.execute(() -> {
|
||||
//
|
||||
// TLongHashSet words = new TLongHashSet(10_000);
|
||||
//
|
||||
// for (var doc : domain.doc) {
|
||||
//
|
||||
// if (doc.documentBody == null)
|
||||
// continue;
|
||||
// docCount.incrementAndGet();
|
||||
//
|
||||
// Document parsed = Jsoup.parse(doc.documentBody.decode());
|
||||
// parsed.body().filter(new DomPruningFilter(0.5));
|
||||
//
|
||||
// DocumentLanguageData dld = se.get().extractSentences(parsed);
|
||||
//
|
||||
// if (lf.dictionaryAgreement(dld) < 0.1) {
|
||||
// return;
|
||||
// }
|
||||
//
|
||||
// for (var sent : dld.sentences) {
|
||||
// for (var word : sent) {
|
||||
// words.add(longHash(word.stemmed().getBytes(StandardCharsets.UTF_8)));
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// synchronized (counts) {
|
||||
// words.forEach(w -> {
|
||||
// counts.adjustOrPutValue(w, 1, 1);
|
||||
// return true;
|
||||
// });
|
||||
// }
|
||||
//
|
||||
// words.clear();
|
||||
// }
|
||||
//
|
||||
// System.out.println(domain.domain + "\t" + counts.size());
|
||||
// });
|
||||
//
|
||||
//
|
||||
// }
|
||||
//
|
||||
// fjp.shutdown();
|
||||
// fjp.awaitTermination(10, TimeUnit.DAYS);
|
||||
//
|
||||
// try (var dos = new DataOutputStream(Files.newOutputStream(Path.of(outFile)))) {
|
||||
// synchronized (counts) {
|
||||
// counts.put(DOC_COUNT_KEY, docCount.get());
|
||||
//
|
||||
// counts.forEachEntry((hash, cnt) -> {
|
||||
// try {
|
||||
// dos.writeLong(hash);
|
||||
// dos.writeLong(cnt);
|
||||
// } catch (IOException e) {
|
||||
// throw new RuntimeException(e);
|
||||
// }
|
||||
// return true;
|
||||
// });
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// System.out.println(docCount.get());
|
||||
// }
|
||||
/** Get the term frequency for the string s */
|
||||
public long getTermFreq(String s) {
|
||||
return wordRates.get(getStringHash(s));
|
||||
}
|
||||
|
||||
/** Get the term frequency for the already stemmed string s */
|
||||
public long getTermFreqStemmed(String s) {
|
||||
return wordRates.get(longHash(s.getBytes()));
|
||||
}
|
||||
|
||||
/** Get the term frequency for the already stemmed and already hashed value 'hash' */
|
||||
public long getTermFreqHash(long hash) {
|
||||
return wordRates.get(hash);
|
||||
}
|
||||
|
||||
public static long getStringHash(String s) {
|
||||
if (s.indexOf(' ') >= 0 || s.indexOf('_') >= 0) {
|
||||
@ -156,17 +93,11 @@ public class TermFrequencyDict {
|
||||
}
|
||||
}
|
||||
|
||||
public long getTermFreqHash(long hash) {
|
||||
return wordRates.get(hash);
|
||||
}
|
||||
public long getTermFreq(String s) {
|
||||
return wordRates.get(getStringHash(s));
|
||||
}
|
||||
public long getTermFreqStemmed(String s) {
|
||||
return wordRates.get(longHash(s.getBytes()));
|
||||
}
|
||||
|
||||
// If this ever changes, we need to re-generate the term frequency dictionary
|
||||
/** The hashing function used by TermFrequencyHash
|
||||
* <p>
|
||||
* If this function changes its behavior in any way,
|
||||
* it is necessary to re-generate the dictionary.
|
||||
*/
|
||||
public static long longHash(byte[]... bytesSets) {
|
||||
if (bytesSets == null || bytesSets.length == 0)
|
||||
return 0;
|
||||
|
@ -1,4 +1,49 @@
|
||||
# Converting Models
|
||||
|
||||
Contains models shared by the [converting-process](../../processes/converting-process/) and
|
||||
[loading-process](../../processes/loading-process/).
|
||||
[loading-process](../../processes/loading-process/).
|
||||
|
||||
## Design
|
||||
|
||||
The two processes communicate through a file-based protocol. The converter serializes [instructions](src/main/java/nu/marginalia/converting/instruction/Instruction.java)
|
||||
to file, which are deserialized by the loader and fed into an [instructions](src/main/java/nu/marginalia/converting/instruction/Interpreter.java).
|
||||
|
||||
The instructions implement a visitor pattern.
|
||||
|
||||
Conceptually the pattern can be thought of a bit like remote function calls over file,
|
||||
or a crude instructions-based programming language.
|
||||
|
||||
This
|
||||
|
||||
```java
|
||||
producer.foo("cat");
|
||||
producer.bar("milk", "eggs", "bread");
|
||||
```
|
||||
|
||||
translates through this paradigm, to this:
|
||||
|
||||
```
|
||||
(producer)
|
||||
writeInstruction(DoFoo("Cat"))
|
||||
writeInstruction(DoBar("Milk", "Eggs", "Bread"))
|
||||
|
||||
(consumer)
|
||||
while read instruction:
|
||||
interpreter.apply(instruction)
|
||||
|
||||
(Interpreter)
|
||||
doFoo(animal):
|
||||
...
|
||||
doBar(ingredients):
|
||||
...
|
||||
|
||||
(doFoo)
|
||||
DoFoo(animal):
|
||||
apply(interpreter):
|
||||
interpreter.foo(animal)
|
||||
|
||||
(doBar)
|
||||
DoBar(ingredients):
|
||||
apply(interpreter):
|
||||
interpreter.bar(ingredients)
|
||||
```
|
||||
|
@ -7,6 +7,7 @@ Contains models shared by the [crawling-process](../../processes/crawling-proces
|
||||
|
||||
* [CrawledDocument](src/main/java/nu/marginalia/crawling/model/CrawledDocument.java)
|
||||
* [CrawledDomain](src/main/java/nu/marginalia/crawling/model/CrawledDomain.java)
|
||||
* [CrawlingSpecification](src/main/java/nu/marginalia/crawling/model/spec/CrawlingSpecification.java)
|
||||
|
||||
### Marshalling
|
||||
* [CrawledDomainReader](src/main/java/nu/marginalia/crawling/io/CrawledDomainReader.java)
|
||||
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.language;
|
||||
package nu.marginalia.converting.language;
|
||||
|
||||
import nu.marginalia.language.encoding.UnicodeRanges;
|
||||
import nu.marginalia.language.model.DocumentLanguageData;
|
@ -29,7 +29,7 @@ public class ProcessedDocument {
|
||||
if (details == null)
|
||||
return false;
|
||||
|
||||
return !details.metadata.hasFlag(DocumentFlags.Simple);
|
||||
return true;
|
||||
}
|
||||
|
||||
public OptionalDouble quality() {
|
||||
|
@ -2,7 +2,7 @@ package nu.marginalia.converting.processor.plugin;
|
||||
|
||||
import nu.marginalia.crawling.model.CrawledDocument;
|
||||
import nu.marginalia.crawling.model.CrawledDomain;
|
||||
import nu.marginalia.language.LanguageFilter;
|
||||
import nu.marginalia.converting.language.LanguageFilter;
|
||||
import nu.marginalia.language.model.DocumentLanguageData;
|
||||
import nu.marginalia.converting.model.HtmlStandard;
|
||||
import nu.marginalia.keyword.model.DocumentKeywordsBuilder;
|
||||
|
@ -4,6 +4,7 @@ package nu.marginalia.converting;
|
||||
import com.google.inject.Guice;
|
||||
import com.google.inject.Injector;
|
||||
import nu.marginalia.bigstring.BigString;
|
||||
import nu.marginalia.converting.model.HtmlStandard;
|
||||
import nu.marginalia.converting.processor.DomainProcessor;
|
||||
import nu.marginalia.crawling.model.CrawledDocument;
|
||||
import nu.marginalia.crawling.model.CrawledDomain;
|
||||
@ -22,8 +23,7 @@ import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
public class ConvertingIntegrationTest {
|
||||
|
||||
|
||||
DomainProcessor domainProcessor;
|
||||
private DomainProcessor domainProcessor;
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() {
|
||||
@ -60,7 +60,22 @@ public class ConvertingIntegrationTest {
|
||||
ret.documents.forEach(doc -> {
|
||||
resultsByStatusCount.merge(doc.state, 1, Integer::sum);
|
||||
});
|
||||
assertTrue(resultsByStatusCount.get(UrlIndexingState.OK) > 5);
|
||||
|
||||
assertTrue(resultsByStatusCount.get(UrlIndexingState.OK) > 25);
|
||||
|
||||
for (var doc : ret.documents) {
|
||||
|
||||
if (!doc.isProcessedFully()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
var details = doc.details;
|
||||
|
||||
assertTrue(details.title.length() > 4);
|
||||
assertTrue(details.description.length() > 4);
|
||||
assertEquals(HtmlStandard.HTML5, details.standard);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private CrawledDomain readMarginaliaWorkingSet() throws IOException {
|
||||
|
@ -1,6 +1,5 @@
|
||||
package nu.marginalia.crawling;
|
||||
package nu.marginalia.converting.language;
|
||||
|
||||
import nu.marginalia.language.LanguageFilter;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
@ -0,0 +1,3 @@
|
||||
# Test Data
|
||||
|
||||
This is a snapshot of memex.marginalia.nu from 2023-03-17.
|
@ -1,4 +0,0 @@
|
||||
# Crawl Job Extractor
|
||||
|
||||
The crawl job extractor creates a file containing a list of domains
|
||||
along with known URLs. This is consumed by the [crawling-process](../crawling-process).
|
@ -1,22 +1,20 @@
|
||||
# Processes
|
||||
|
||||
## 1. Crawl Job Extractor
|
||||
|
||||
The [crawl-job-extractor-process](crawl-job-extractor-process/) creates a crawl job specification
|
||||
based on the content in the database.
|
||||
|
||||
## 2. Crawl Process
|
||||
## 1. Crawl Process
|
||||
|
||||
The [crawling-process](crawling-process/) fetches website contents and saves them
|
||||
as compressed JSON models described in [crawling-model](../process-models/crawling-model/).
|
||||
|
||||
## 3. Converting Process
|
||||
The operation is specified by a crawl job specification. This is generated by [tools/crawl-job-extractor](../tools/crawl-job-extractor/)
|
||||
based on the content in the database.
|
||||
|
||||
## 2. Converting Process
|
||||
|
||||
The [converting-process](converting-process/) reads crawl data from the crawling step and
|
||||
processes them, extracting keywords and metadata and saves them as compressed JSON models
|
||||
described in [converting-model](../process-models/converting-model/).
|
||||
|
||||
## 4. Loading Process
|
||||
## 3. Loading Process
|
||||
|
||||
The [loading-process](loading-process/) reads the processed data and creates an index journal
|
||||
and lexicon, and loads domains and addresses into the MariaDB-database.
|
||||
|
@ -21,11 +21,15 @@ You'll find a short description in each module of what it does and how it relate
|
||||
Processes are batch jobs that deal with data retrieval, processing and loading.
|
||||
|
||||
* [processes](processes/)
|
||||
* * [crawl-job-extractor](processes/crawl-job-extractor-process)
|
||||
* * [crawling-process](processes/crawling-process)
|
||||
* * [converting-process](processes/converting-process)
|
||||
* * [loading-process](processes/loading-process)
|
||||
|
||||
#### Tools
|
||||
|
||||
* * [crawl-job-extractor](tools/crawl-job-extractor)
|
||||
* * [term-frequency-extractor](tools/term-frequency-extractor)
|
||||
|
||||
### Features
|
||||
|
||||
Features are relatively stand-alone components that serve some part of the domain. They aren't domain-independent,
|
||||
|
@ -29,7 +29,6 @@ dependencies {
|
||||
implementation project(':code:libraries:easy-lsh')
|
||||
implementation project(':code:libraries:language-processing')
|
||||
implementation project(':code:libraries:braille-block-punch-cards')
|
||||
implementation project(':code:libraries:ngram-bloom-filter')
|
||||
implementation project(':code:libraries:term-frequency-dict')
|
||||
|
||||
implementation project(':code:api:assistant-api')
|
||||
|
@ -8,8 +8,8 @@ import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||
import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
import nu.marginalia.language.statistics.EnglishDictionary;
|
||||
import nu.marginalia.ngram_bloom_filter.NGramBloomFilter;
|
||||
import nu.marginalia.language.EnglishDictionary;
|
||||
import nu.marginalia.ngrams.NGramBloomFilter;
|
||||
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
||||
import nu.marginalia.query_parser.QueryParser;
|
||||
import nu.marginalia.query_parser.QueryPermutation;
|
||||
|
@ -2,9 +2,9 @@ package nu.marginalia.search.query;
|
||||
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimitType;
|
||||
import nu.marginalia.language.statistics.EnglishDictionary;
|
||||
import nu.marginalia.language.EnglishDictionary;
|
||||
import nu.marginalia.index.client.model.query.SearchSpecification;
|
||||
import nu.marginalia.ngram_bloom_filter.NGramBloomFilter;
|
||||
import nu.marginalia.ngrams.NGramBloomFilter;
|
||||
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
||||
import nu.marginalia.search.command.SearchJsParameter;
|
||||
import nu.marginalia.search.model.SearchProfile;
|
||||
|
@ -31,7 +31,7 @@ dependencies {
|
||||
|
||||
implementation libs.bundles.mariadb
|
||||
implementation libs.guice
|
||||
implementation libs.gson
|
||||
implementation libs.bundles.gson
|
||||
implementation libs.zstd
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
6
code/tools/crawl-job-extractor/readme.md
Normal file
6
code/tools/crawl-job-extractor/readme.md
Normal file
@ -0,0 +1,6 @@
|
||||
# Crawl Job Extractor
|
||||
|
||||
The crawl job extractor creates a file containing a list of domains
|
||||
along with known URLs.
|
||||
|
||||
This is consumed by [processes/crawling-process](../../processes/crawling-process).
|
62
code/tools/term-frequency-extractor/build.gradle
Normal file
62
code/tools/term-frequency-extractor/build.gradle
Normal file
@ -0,0 +1,62 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id "io.freefair.lombok" version "5.3.3.3"
|
||||
id 'application'
|
||||
|
||||
id 'jvm-test-suite'
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(17))
|
||||
}
|
||||
}
|
||||
|
||||
application {
|
||||
mainClass = 'nu.marginalia.tools.TermFrequencyExtractor'
|
||||
applicationName = 'term-frequency-extractor'
|
||||
}
|
||||
|
||||
tasks.distZip.enabled = false
|
||||
|
||||
dependencies {
|
||||
implementation project(':third-party:rdrpostagger')
|
||||
implementation project(':third-party:porterstemmer')
|
||||
implementation project(':third-party:monkey-patch-opennlp')
|
||||
implementation project(':code:common:model')
|
||||
implementation project(':code:common:config')
|
||||
implementation project(':code:common:process')
|
||||
implementation project(':code:libraries:language-processing')
|
||||
implementation project(':code:libraries:term-frequency-dict')
|
||||
implementation project(':code:libraries:big-string')
|
||||
implementation project(':code:processes:converting-process')
|
||||
implementation project(':code:process-models:crawling-model')
|
||||
|
||||
implementation libs.lombok
|
||||
annotationProcessor libs.lombok
|
||||
implementation libs.bundles.slf4j
|
||||
implementation libs.notnull
|
||||
|
||||
implementation libs.guice
|
||||
implementation libs.jsoup
|
||||
implementation libs.trove
|
||||
implementation libs.fastutil
|
||||
|
||||
implementation libs.bundles.nlp
|
||||
implementation libs.commons.lang3
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
}
|
||||
|
||||
|
||||
test {
|
||||
useJUnitPlatform()
|
||||
}
|
||||
|
||||
task fastTests(type: Test) {
|
||||
useJUnitPlatform {
|
||||
excludeTags "slow"
|
||||
}
|
||||
}
|
16
code/tools/term-frequency-extractor/readme.md
Normal file
16
code/tools/term-frequency-extractor/readme.md
Normal file
@ -0,0 +1,16 @@
|
||||
# Term Frequency Extractor
|
||||
|
||||
Generates a term frequency dictionary file from a batch of crawl data.
|
||||
|
||||
Usage:
|
||||
|
||||
```shell
|
||||
PATH_TO_SAMPLES=run/samples/crawl-s
|
||||
export JAVA_OPTS=-Dcrawl.rootDirRewrite=/crawl:${PATH_TO_SAMPLES}
|
||||
|
||||
term-frequency-extractor ${PATH_TO_SAMPLES}/plan.yaml out.dat
|
||||
```
|
||||
|
||||
## See Also
|
||||
|
||||
* [libraries/term-frequency-dict](../../libraries/term-frequency-dict)
|
@ -0,0 +1,114 @@
|
||||
package nu.marginalia.tools;
|
||||
|
||||
import gnu.trove.map.hash.TLongIntHashMap;
|
||||
import gnu.trove.set.hash.TLongHashSet;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.converting.language.LanguageFilter;
|
||||
import nu.marginalia.converting.processor.logic.dom.DomPruningFilter;
|
||||
import nu.marginalia.language.model.DocumentLanguageData;
|
||||
import nu.marginalia.language.sentence.SentenceExtractor;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import plan.CrawlPlanLoader;
|
||||
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.concurrent.ForkJoinPool;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import static nu.marginalia.term_frequency_dict.TermFrequencyDict.DOC_COUNT_KEY;
|
||||
import static nu.marginalia.term_frequency_dict.TermFrequencyDict.longHash;
|
||||
|
||||
public class TermFrequencyExtractor {
|
||||
|
||||
public static void main(String... args) throws IOException, InterruptedException {
|
||||
if (args.length != 2) {
|
||||
System.err.println("Expected arguments: plan.yaml out-file");
|
||||
return;
|
||||
}
|
||||
|
||||
String outFile = args[1];
|
||||
|
||||
var plan = new CrawlPlanLoader().load(Path.of(args[0]));
|
||||
|
||||
ThreadLocal<SentenceExtractor> se = ThreadLocal.withInitial(() -> new SentenceExtractor(WmsaHome.getLanguageModels()));
|
||||
LanguageFilter lf = new LanguageFilter();
|
||||
|
||||
TLongIntHashMap counts = new TLongIntHashMap(100_000_000, 0.7f, -1, -1);
|
||||
|
||||
ForkJoinPool fjp = new ForkJoinPool(24);
|
||||
AtomicInteger docCount = new AtomicInteger();
|
||||
|
||||
for (var domain : plan.domainsIterable()) { // leaks file descriptor, is fine
|
||||
|
||||
if (domain.doc == null)
|
||||
continue;
|
||||
|
||||
fjp.execute(() -> {
|
||||
|
||||
TLongHashSet words = new TLongHashSet(10_000);
|
||||
|
||||
for (var doc : domain.doc) {
|
||||
|
||||
if (doc.documentBody == null)
|
||||
continue;
|
||||
docCount.incrementAndGet();
|
||||
|
||||
Document parsed = Jsoup.parse(doc.documentBody.decode());
|
||||
parsed.body().filter(new DomPruningFilter(0.5));
|
||||
|
||||
DocumentLanguageData dld = se.get().extractSentences(parsed);
|
||||
|
||||
if (lf.dictionaryAgreement(dld) < 0.1) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (var sent : dld.sentences) {
|
||||
for (var word : sent) {
|
||||
words.add(longHash(word.stemmed().getBytes(StandardCharsets.UTF_8)));
|
||||
}
|
||||
}
|
||||
|
||||
synchronized (counts) {
|
||||
words.forEach(w -> {
|
||||
counts.adjustOrPutValue(w, 1, 1);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
words.clear();
|
||||
}
|
||||
|
||||
System.out.println(domain.domain + "\t" + counts.size());
|
||||
});
|
||||
|
||||
|
||||
}
|
||||
|
||||
fjp.shutdown();
|
||||
fjp.awaitTermination(10, TimeUnit.DAYS);
|
||||
|
||||
try (var dos = new DataOutputStream(Files.newOutputStream(Path.of(outFile)))) {
|
||||
synchronized (counts) {
|
||||
counts.put(DOC_COUNT_KEY, docCount.get());
|
||||
|
||||
counts.forEachEntry((hash, cnt) -> {
|
||||
try {
|
||||
dos.writeLong(hash);
|
||||
dos.writeLong(cnt);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println(docCount.get());
|
||||
}
|
||||
|
||||
}
|
@ -1,242 +0,0 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id "io.freefair.lombok" version "5.3.3.3"
|
||||
|
||||
id "me.champeau.jmh" version "0.6.6"
|
||||
id "de.undercouch.download" version "5.1.0"
|
||||
|
||||
id 'jvm-test-suite'
|
||||
}
|
||||
|
||||
repositories {
|
||||
mavenLocal()
|
||||
maven { url "https://artifactory.cronapp.io/public-release/" }
|
||||
maven { url "https://repo1.maven.org/maven2/" }
|
||||
maven { url "https://www2.ph.ed.ac.uk/maven2/" }
|
||||
maven { url "https://jitpack.io/" }
|
||||
exclusiveContent {
|
||||
forRepository {
|
||||
maven {
|
||||
url = uri("https://jitpack.io")
|
||||
}
|
||||
}
|
||||
filter {
|
||||
// Only use JitPack for the `gson-record-type-adapter-factory` library
|
||||
includeModule("com.github.Marcono1234", "gson-record-type-adapter-factory")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sourceSets {
|
||||
e2eTest {
|
||||
java {
|
||||
java {
|
||||
compileClasspath += main.output + test.output
|
||||
runtimeClasspath += main.output + test.output
|
||||
srcDir file('src/e2e/java')
|
||||
}
|
||||
resources.srcDir file('src/e2e/resources')
|
||||
}
|
||||
}
|
||||
jmh {
|
||||
java {
|
||||
java {
|
||||
compileClasspath += main.output + test.output
|
||||
runtimeClasspath += main.output + test.output
|
||||
srcDir file('src/jmh/java')
|
||||
}
|
||||
resources.srcDir file('src/jmh/resources')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(17))
|
||||
}
|
||||
}
|
||||
jmhJar {
|
||||
zip64 true
|
||||
}
|
||||
dependencies {
|
||||
implementation project(':code:common:service')
|
||||
implementation project(':code:common:config')
|
||||
implementation project(':code:common:service-discovery')
|
||||
implementation project(':code:common:service-client')
|
||||
|
||||
implementation 'org.projectlombok:lombok:1.18.24'
|
||||
implementation 'org.jetbrains:annotations:20.1.0'
|
||||
annotationProcessor 'org.projectlombok:lombok:1.18.24'
|
||||
|
||||
implementation 'com.github.jknack:handlebars:4.3.1'
|
||||
implementation 'com.github.jknack:handlebars-markdown:4.2.1'
|
||||
|
||||
implementation group: 'com.google.code.gson', name: 'gson', version: '2.9.0'
|
||||
implementation 'io.reactivex.rxjava3:rxjava:3.1.5'
|
||||
implementation "com.sparkjava:spark-core:2.9.3"
|
||||
implementation 'com.opencsv:opencsv:5.6'
|
||||
|
||||
implementation group: 'org.apache.logging.log4j', name: 'log4j-api', version: '2.17.2'
|
||||
implementation group: 'org.apache.logging.log4j', name: 'log4j-core', version: '2.17.2'
|
||||
implementation group: 'org.apache.logging.log4j', name: 'log4j-slf4j-impl', version: '2.17.2'
|
||||
|
||||
implementation 'org.slf4j:slf4j-api:1.7.36'
|
||||
testImplementation 'org.slf4j:slf4j-jdk14:2.0.3'
|
||||
|
||||
implementation 'com.google.guava:guava:31.1-jre'
|
||||
implementation 'com.google.inject:guice:5.1.0'
|
||||
implementation 'com.github.jnr:jnr-ffi:2.2.12'
|
||||
implementation 'org.apache.httpcomponents:httpcore:4.4.15'
|
||||
implementation 'org.apache.httpcomponents:httpclient:4.5.13'
|
||||
|
||||
implementation group: 'com.h2database', name: 'h2', version: '2.1.210'
|
||||
|
||||
implementation 'org.jsoup:jsoup:1.15.3'
|
||||
|
||||
implementation 'org.mariadb.jdbc:mariadb-java-client:3.0.6'
|
||||
implementation group: 'net.sf.trove4j', name: 'trove4j', version: '3.0.3'
|
||||
|
||||
implementation 'com.zaxxer:HikariCP:5.0.1'
|
||||
|
||||
implementation 'org.apache.opennlp:opennlp-tools:1.9.4'
|
||||
implementation 'io.prometheus:simpleclient:0.16.0'
|
||||
implementation 'io.prometheus:simpleclient_servlet:0.16.0'
|
||||
implementation 'io.prometheus:simpleclient_httpserver:0.16.0'
|
||||
implementation 'io.prometheus:simpleclient_hotspot:0.16.0'
|
||||
implementation 'com.fasterxml.jackson.core:jackson-databind:2.13.3'
|
||||
|
||||
implementation group: 'org.yaml', name: 'snakeyaml', version: '1.30'
|
||||
|
||||
implementation 'com.github.luben:zstd-jni:1.5.2-2'
|
||||
implementation 'org.lz4:lz4-java:1.8.0'
|
||||
|
||||
implementation 'com.github.vladimir-bukhtoyarov:bucket4j-core:7.5.0'
|
||||
implementation 'de.rototor.jeuclid:jeuclid-core:3.1.14'
|
||||
|
||||
implementation 'org.imgscalr:imgscalr-lib:4.2'
|
||||
implementation 'org.jclarion:image4j:0.7'
|
||||
|
||||
implementation 'commons-net:commons-net:3.8.0'
|
||||
implementation 'org.eclipse.jgit:org.eclipse.jgit:5.12.0.202106070339-r'
|
||||
implementation 'org.eclipse.jgit:org.eclipse.jgit.ssh.jsch:5.12.0.202106070339-r'
|
||||
implementation 'com.jcraft:jsch:0.1.55'
|
||||
|
||||
implementation group: 'it.unimi.dsi', name: 'fastutil', version: '8.5.8'
|
||||
implementation 'org.roaringbitmap:RoaringBitmap:0.9.32'
|
||||
|
||||
testImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.2'
|
||||
testImplementation 'org.mockito:mockito-junit-jupiter:4.5.1'
|
||||
testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine'
|
||||
testCompileOnly 'org.projectlombok:lombok:1.18.24'
|
||||
testImplementation 'org.projectlombok:lombok:1.18.24'
|
||||
testAnnotationProcessor 'org.projectlombok:lombok:1.18.24'
|
||||
|
||||
testImplementation group: 'org.mockito', name: 'mockito-core', version: '4.5.1'
|
||||
|
||||
testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4')
|
||||
testImplementation 'org.testcontainers:mariadb:1.17.4'
|
||||
testImplementation 'org.testcontainers:junit-jupiter:1.17.4'
|
||||
|
||||
e2eTestImplementation 'org.junit.jupiter:junit-jupiter-api:5.9.0'
|
||||
e2eTestRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine'
|
||||
e2eTestImplementation 'org.projectlombok:lombok:1.18.24'
|
||||
e2eTestAnnotationProcessor 'org.projectlombok:lombok:1.18.24'
|
||||
e2eTestImplementation 'org.testcontainers:nginx:1.17.4'
|
||||
e2eTestImplementation "org.testcontainers:junit-jupiter:1.17.2"
|
||||
e2eTestImplementation 'org.testcontainers:selenium:1.17.4'
|
||||
e2eTestImplementation 'org.seleniumhq.selenium:selenium-remote-driver:4.5.3'
|
||||
e2eTestImplementation 'org.seleniumhq.selenium:selenium-chrome-driver:4.5.3'
|
||||
|
||||
|
||||
implementation 'org.seleniumhq.selenium:selenium-chrome-driver:4.5.3'
|
||||
implementation 'org.seleniumhq.selenium:selenium-java:4.5.3'
|
||||
implementation 'org.sejda.imageio:webp-imageio:0.1.6'
|
||||
|
||||
jmh 'org.openjdk.jmh:jmh-core:1.35'
|
||||
jmh 'org.openjdk.jmh:jmh-generator-annprocess:1.35'
|
||||
|
||||
implementation 'net.agkn:hll:1.6.0'
|
||||
|
||||
}
|
||||
|
||||
configurations {
|
||||
e2eTestImplementation.extendsFrom(testImplementation)
|
||||
|
||||
}
|
||||
|
||||
test {
|
||||
maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
|
||||
maxHeapSize = "8G"
|
||||
useJUnitPlatform()
|
||||
}
|
||||
|
||||
task fastTests(type: Test) {
|
||||
maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
|
||||
maxHeapSize = "8G"
|
||||
useJUnitPlatform {
|
||||
excludeTags "slow"
|
||||
}
|
||||
}
|
||||
|
||||
task e2eTest(type: Test) {
|
||||
maxParallelForks = 1
|
||||
forkEvery = 1
|
||||
maxHeapSize = "8G"
|
||||
dependsOn ':shadowJar'
|
||||
dependsOn 'downloadTestData'
|
||||
dependsOn 'downloadRDRModelData'
|
||||
dependsOn 'downloadSentenceModelData'
|
||||
dependsOn 'downloadTokenModelData'
|
||||
dependsOn 'downloadTermFreqData'
|
||||
dependsOn 'IP2LocationFile'
|
||||
|
||||
classpath = sourceSets.e2eTest.runtimeClasspath
|
||||
testClassesDirs = sourceSets.e2eTest.output.classesDirs
|
||||
useJUnitPlatform {
|
||||
includeTags "e2e"
|
||||
}
|
||||
}
|
||||
|
||||
task downloadTestData(type: Download) {
|
||||
src 'http://hammurabi.acc.umu.se/mirror/kiwix.org/zim/wikipedia/wikipedia_en_100_nopic_2022-05.zim'
|
||||
dest file('data/test/wikipedia_en_100_nopic.zim')
|
||||
overwrite false
|
||||
}
|
||||
|
||||
task downloadRDRModelData(type: Download) {
|
||||
src (['https://raw.githubusercontent.com/datquocnguyen/RDRPOSTagger/master/Models/POS/English.DICT',
|
||||
'https://raw.githubusercontent.com/datquocnguyen/RDRPOSTagger/master/Models/POS/English.RDR'])
|
||||
dest file('data/models/')
|
||||
overwrite false
|
||||
}
|
||||
|
||||
task downloadSentenceModelData(type: Download) {
|
||||
src 'https://dlcdn.apache.org/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin'
|
||||
dest file('data/models/opennlp-sentence.bin')
|
||||
overwrite false
|
||||
}
|
||||
task downloadTokenModelData(type: Download) {
|
||||
src 'https://dlcdn.apache.org/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin'
|
||||
dest file('data/models/opennlp-tokens.bin')
|
||||
overwrite false
|
||||
}
|
||||
task downloadIP2LocationFile(type: Download) {
|
||||
src 'https://download.ip2location.com/lite/IP2LOCATION-LITE-DB1.CSV.ZIP'
|
||||
dest file('data/models/IP2LOCATION-LITE-DB1.CSV.ZIP')
|
||||
overwrite false
|
||||
}
|
||||
task IP2LocationFile(type: Copy) {
|
||||
dependsOn 'downloadIP2LocationFile'
|
||||
def zipFile = file('data/models/IP2LOCATION-LITE-DB1.CSV.ZIP')
|
||||
def outputDir = file("data/models/IP2LOC")
|
||||
|
||||
from zipTree(zipFile)
|
||||
into outputDir
|
||||
}
|
||||
|
||||
task downloadTermFreqData(type: Download) {
|
||||
src 'https://downloads.marginalia.nu/model/tfreq-new-algo3.bin'
|
||||
dest file('data/models/tfreq-new-algo3.bin')
|
||||
overwrite false
|
||||
}
|
||||
|
@ -1,2 +0,0 @@
|
||||
# This file is generated by the 'io.freefair.lombok' Gradle plugin
|
||||
config.stopBubbling = true
|
@ -1,15 +0,0 @@
|
||||
package nu.marginalia.memex;
|
||||
|
||||
import nu.marginalia.memex.auth.AuthMain;
|
||||
import nu.marginalia.service.descriptor.ServiceDescriptor;
|
||||
import nu.marginalia.service.descriptor.ServiceDescriptors;
|
||||
import nu.marginalia.service.id.ServiceId;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class MemexServiceDescriptors {
|
||||
public static ServiceDescriptors descriptors = new ServiceDescriptors(
|
||||
List.of(
|
||||
new ServiceDescriptor(ServiceId.Other_Memex, 5030),
|
||||
new ServiceDescriptor (ServiceId.Other_Auth, 5003)));
|
||||
}
|
@ -1,14 +0,0 @@
|
||||
package nu.marginalia.memex.auth;
|
||||
|
||||
import com.google.inject.AbstractModule;
|
||||
import com.google.inject.name.Names;
|
||||
import nu.marginalia.service.descriptor.HostsFile;
|
||||
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class AuthConfigurationModule extends AbstractModule {
|
||||
public void configure() {
|
||||
bind(Path.class).annotatedWith(Names.named("password-file")).toInstance(Path.of("/var/lib/wmsa/password.dat"));
|
||||
bind(HostsFile.class).toInstance(new HostsFile());
|
||||
}
|
||||
}
|
@ -1,27 +0,0 @@
|
||||
package nu.marginalia.memex.auth;
|
||||
|
||||
import com.google.inject.Guice;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Injector;
|
||||
import nu.marginalia.memex.MemexServiceDescriptors;
|
||||
import nu.marginalia.service.MainClass;
|
||||
import nu.marginalia.service.id.ServiceId;
|
||||
import nu.marginalia.service.module.ConfigurationModule;
|
||||
import nu.marginalia.service.server.Initialization;
|
||||
|
||||
public class AuthMain extends MainClass {
|
||||
|
||||
@Inject
|
||||
public AuthMain(AuthService service) {
|
||||
}
|
||||
|
||||
public static void main(String... args) {
|
||||
MainClass.init(ServiceId.Other_Auth, args);
|
||||
|
||||
Injector injector = Guice.createInjector(
|
||||
new AuthConfigurationModule(),
|
||||
new ConfigurationModule(MemexServiceDescriptors.descriptors, ServiceId.Other_Auth));
|
||||
injector.getInstance(AuthMain.class);
|
||||
injector.getInstance(Initialization.class).setReady();
|
||||
}
|
||||
}
|
@ -1,118 +0,0 @@
|
||||
package nu.marginalia.memex.auth;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.name.Named;
|
||||
import nu.marginalia.client.Context;
|
||||
import nu.marginalia.memex.auth.model.LoginFormModel;
|
||||
import nu.marginalia.memex.renderer.MustacheRenderer;
|
||||
import nu.marginalia.memex.renderer.RendererFactory;
|
||||
import nu.marginalia.service.server.Initialization;
|
||||
import nu.marginalia.service.server.MetricsServer;
|
||||
import nu.marginalia.service.server.RateLimiter;
|
||||
import nu.marginalia.service.server.Service;
|
||||
import org.apache.http.HttpStatus;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import spark.Request;
|
||||
import spark.Response;
|
||||
import spark.Spark;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
|
||||
import static spark.Spark.*;
|
||||
|
||||
public class AuthService extends Service {
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
private String password;
|
||||
|
||||
private final RateLimiter rateLimiter = RateLimiter.forLogin();
|
||||
private final MustacheRenderer<LoginFormModel> loginFormRenderer;
|
||||
|
||||
@Inject
|
||||
public AuthService(@Named("service-host") String ip,
|
||||
@Named("service-port") Integer port,
|
||||
@Named("password-file") Path topSecretPasswordFile,
|
||||
RendererFactory rendererFactory,
|
||||
Initialization initialization,
|
||||
MetricsServer metricsServer) throws IOException {
|
||||
|
||||
super(ip, port, initialization, metricsServer);
|
||||
|
||||
password = initPassword(topSecretPasswordFile);
|
||||
|
||||
loginFormRenderer = rendererFactory.renderer("auth/login");
|
||||
|
||||
Spark.path("public/api", () -> {
|
||||
before((req, rsp) -> {
|
||||
logger.info("{} {}", req.requestMethod(), req.pathInfo());
|
||||
});
|
||||
|
||||
post("/login", this::login);
|
||||
get("/login", this::loginForm);
|
||||
});
|
||||
Spark.path("api", () -> {
|
||||
get("/is-logged-in", this::isLoggedIn);
|
||||
});
|
||||
}
|
||||
|
||||
private String initPassword(Path topSecretPasswordFile) {
|
||||
if (Files.exists(topSecretPasswordFile)) {
|
||||
try {
|
||||
return Files.readString(topSecretPasswordFile);
|
||||
} catch (IOException e) {
|
||||
logger.error("Could not read password from file " + topSecretPasswordFile, e);
|
||||
}
|
||||
}
|
||||
logger.error("Setting random password");
|
||||
return UUID.randomUUID().toString();
|
||||
}
|
||||
|
||||
private Object loginForm(Request request, Response response) {
|
||||
String redir = Objects.requireNonNull(request.queryParams("redirect"));
|
||||
String service = Objects.requireNonNull(request.queryParams("service"));
|
||||
|
||||
return loginFormRenderer.render(new LoginFormModel(service, redir));
|
||||
}
|
||||
|
||||
private Object login(Request request, Response response) {
|
||||
var redir = Objects.requireNonNullElse(request.queryParams("redirect"), "/");
|
||||
|
||||
if (isLoggedIn(request, response)) {
|
||||
response.redirect(redir);
|
||||
return "";
|
||||
}
|
||||
|
||||
if (!rateLimiter.isAllowed(Context.fromRequest(request))) {
|
||||
Spark.halt(429, "Too many requests");
|
||||
return null;
|
||||
}
|
||||
|
||||
if (Objects.equals(password, request.queryParams("password"))) {
|
||||
request.session(true).attribute("logged-in", true);
|
||||
response.redirect(redir);
|
||||
return "";
|
||||
}
|
||||
|
||||
response.status(HttpStatus.SC_FORBIDDEN);
|
||||
return "<h1>Bad password!</h1>";
|
||||
}
|
||||
|
||||
public boolean isLoggedIn(Request request, Response response) {
|
||||
var session = request.session(false);
|
||||
|
||||
if (null == session) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return Optional.ofNullable(session.attribute("logged-in"))
|
||||
.map(Boolean.class::cast)
|
||||
.orElse(false);
|
||||
}
|
||||
|
||||
}
|
@ -1,45 +0,0 @@
|
||||
package nu.marginalia.memex.auth.client;
|
||||
|
||||
import com.google.gson.GsonBuilder;
|
||||
import com.google.inject.Inject;
|
||||
import io.reactivex.rxjava3.core.Observable;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.client.AbstractDynamicClient;
|
||||
import nu.marginalia.client.Context;
|
||||
import nu.marginalia.service.descriptor.ServiceDescriptors;
|
||||
import nu.marginalia.service.id.ServiceId;
|
||||
import org.apache.http.HttpStatus;
|
||||
import spark.Request;
|
||||
import spark.Response;
|
||||
import spark.Spark;
|
||||
|
||||
import java.net.URLEncoder;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
|
||||
public class AuthClient extends AbstractDynamicClient {
|
||||
@Inject
|
||||
public AuthClient(ServiceDescriptors descriptors) {
|
||||
super(descriptors.forId(ServiceId.Other_Auth), WmsaHome.getHostsFile(), new GsonBuilder()::create);
|
||||
}
|
||||
|
||||
public Observable<Boolean> isLoggedIn(Context ctx) {
|
||||
return get(ctx, "/api/is-logged-in").map(Boolean::parseBoolean);
|
||||
}
|
||||
|
||||
public void redirectToLoginIfUnauthenticated(String domain, Request req, Response rsp) {
|
||||
if (!isLoggedIn(Context.fromRequest(req)).timeout(1, TimeUnit.SECONDS).blockingFirst()) {
|
||||
rsp.redirect(req.headers("X-Extern-Domain") + "/auth/login?service="+domain
|
||||
+"&redirect="+ URLEncoder.encode(req.headers("X-Extern-Url"), StandardCharsets.UTF_8));
|
||||
Spark.halt();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void requireLogIn(Context ctx) {
|
||||
if (!isLoggedIn(ctx).timeout(1, TimeUnit.SECONDS).blockingFirst()) {
|
||||
Spark.halt(HttpStatus.SC_FORBIDDEN);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
package nu.marginalia.memex.auth.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
|
||||
@Getter @AllArgsConstructor
|
||||
public class LoginFormModel {
|
||||
public final String service;
|
||||
public final String redirect;
|
||||
}
|
@ -1,43 +0,0 @@
|
||||
package nu.marginalia.memex.gemini;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.net.InetAddress;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
public class BadBotList {
|
||||
private final Set<InetAddress> shitlist = new HashSet<>();
|
||||
public static final BadBotList INSTANCE = new BadBotList();
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass().getSimpleName());
|
||||
|
||||
private BadBotList() {}
|
||||
|
||||
public boolean isAllowed(InetAddress address) {
|
||||
return !shitlist.contains(address);
|
||||
}
|
||||
|
||||
public boolean isQueryPermitted(InetAddress address, String query) {
|
||||
if (isBadQuery(query)) {
|
||||
logger.info("Banning {}", address);
|
||||
shitlist.add(address);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean isBadQuery(String query) {
|
||||
if (query.startsWith("GET")) {
|
||||
return true;
|
||||
}
|
||||
if (query.startsWith("OPTIONS")) {
|
||||
return true;
|
||||
}
|
||||
if (query.contains("mstshash")) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
@ -1,17 +0,0 @@
|
||||
package nu.marginalia.memex.gemini;
|
||||
|
||||
import com.google.inject.AbstractModule;
|
||||
import com.google.inject.name.Names;
|
||||
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class GeminiConfigurationModule extends AbstractModule {
|
||||
public void configure() {
|
||||
bind(Path.class).annotatedWith(Names.named("gemini-server-root")).toInstance(Path.of("/var/lib/wmsa/memex-gmi"));
|
||||
bind(Path.class).annotatedWith(Names.named("gemini-cert-file")).toInstance(Path.of("/var/lib/wmsa/gemini/crypto.jks"));
|
||||
bind(Path.class).annotatedWith(Names.named("gemini-cert-password-file")).toInstance(Path.of("/var/lib/wmsa/gemini/password.dat"));
|
||||
bind(Integer.class).annotatedWith(Names.named("gemini-server-port")).toInstance(1965);
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -1,7 +0,0 @@
|
||||
package nu.marginalia.memex.gemini;
|
||||
|
||||
public interface GeminiService {
|
||||
String DEFAULT_FILENAME = "index.gmi";
|
||||
|
||||
void run();
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
package nu.marginalia.memex.gemini;
|
||||
|
||||
import com.google.inject.Singleton;
|
||||
|
||||
@Singleton
|
||||
public class GeminiServiceDummy implements GeminiService {
|
||||
@Override
|
||||
public void run() {
|
||||
}
|
||||
}
|
@ -1,164 +0,0 @@
|
||||
package nu.marginalia.memex.gemini;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import com.google.inject.name.Named;
|
||||
import nu.marginalia.memex.gemini.io.GeminiConnection;
|
||||
import nu.marginalia.memex.gemini.io.GeminiSSLSetUp;
|
||||
import nu.marginalia.memex.gemini.io.GeminiStatusCode;
|
||||
import nu.marginalia.memex.gemini.io.GeminiUserException;
|
||||
import nu.marginalia.memex.gemini.plugins.BareStaticPagePlugin;
|
||||
import nu.marginalia.memex.gemini.plugins.Plugin;
|
||||
import nu.marginalia.memex.gemini.plugins.SearchPlugin;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.net.ssl.SSLException;
|
||||
import javax.net.ssl.SSLServerSocket;
|
||||
import javax.net.ssl.SSLServerSocketFactory;
|
||||
import javax.net.ssl.SSLSocket;
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.Executor;
|
||||
import java.util.concurrent.Executors;
|
||||
|
||||
@Singleton
|
||||
public class GeminiServiceImpl implements GeminiService {
|
||||
|
||||
public final Path serverRoot;
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass().getSimpleName());
|
||||
private final Executor pool = Executors.newFixedThreadPool(32);
|
||||
private final SSLServerSocket serverSocket;
|
||||
|
||||
private final Plugin[] plugins;
|
||||
private final BadBotList badBotList = BadBotList.INSTANCE;
|
||||
|
||||
@Inject
|
||||
public GeminiServiceImpl(@Named("gemini-server-root") Path serverRoot,
|
||||
@Named("gemini-server-port") Integer port,
|
||||
GeminiSSLSetUp sslSetUp,
|
||||
BareStaticPagePlugin pagePlugin,
|
||||
SearchPlugin searchPlugin) throws Exception {
|
||||
this.serverRoot = serverRoot;
|
||||
logger.info("Setting up crypto");
|
||||
final SSLServerSocketFactory socketFactory = sslSetUp.getServerSocketFactory();
|
||||
|
||||
serverSocket = (SSLServerSocket) socketFactory.createServerSocket(port /* 1965 */);
|
||||
serverSocket.setEnabledCipherSuites(socketFactory.getSupportedCipherSuites());
|
||||
serverSocket.setEnabledProtocols(new String[] {"TLSv1.3", "TLSv1.2"});
|
||||
|
||||
logger.info("Verifying setup");
|
||||
if (!Files.exists(this.serverRoot)) {
|
||||
logger.error("Could not find SERVER_ROOT {}", this.serverRoot);
|
||||
System.exit(255);
|
||||
}
|
||||
|
||||
plugins = new Plugin[] {
|
||||
pagePlugin,
|
||||
searchPlugin
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
logger.info("Awaiting connections");
|
||||
|
||||
try {
|
||||
for (;;) {
|
||||
SSLSocket connection = (SSLSocket) serverSocket.accept();
|
||||
connection.setSoTimeout(10_000);
|
||||
|
||||
if (!badBotList.isAllowed(connection.getInetAddress())) {
|
||||
connection.close();
|
||||
} else {
|
||||
pool.execute(() -> serve(connection));
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException ex) {
|
||||
logger.error("IO Exception in gemini server", ex);
|
||||
}
|
||||
}
|
||||
|
||||
private void serve(SSLSocket socket) {
|
||||
final GeminiConnection connection;
|
||||
try {
|
||||
connection = new GeminiConnection(socket);
|
||||
}
|
||||
catch (IOException ex) {
|
||||
logger.error("Failed to create connection object", ex);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
handleRequest(connection);
|
||||
}
|
||||
catch (GeminiUserException ex) {
|
||||
errorResponse(connection, ex.getMessage());
|
||||
}
|
||||
catch (SSLException ex) {
|
||||
logger.error(connection.getAddress() + " SSL error");
|
||||
connection.close();
|
||||
}
|
||||
catch (Exception ex) {
|
||||
errorResponse(connection, "Error");
|
||||
logger.error(connection.getAddress(), ex);
|
||||
}
|
||||
finally {
|
||||
connection.close();
|
||||
}
|
||||
}
|
||||
|
||||
private void errorResponse(GeminiConnection connection, String message) {
|
||||
if (connection.isConnected()) {
|
||||
try {
|
||||
logger.error("=> " + connection.getAddress(), message);
|
||||
connection.writeStatusLine(GeminiStatusCode.ERROR_PERMANENT, message);
|
||||
}
|
||||
catch (IOException ex) {
|
||||
logger.error("Exception while sending error", ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void handleRequest(GeminiConnection connection) throws Exception {
|
||||
|
||||
final String address = connection.getAddress();
|
||||
logger.info("Connect: " + address);
|
||||
|
||||
final Optional<URI> maybeUri = connection.readUrl();
|
||||
if (maybeUri.isEmpty()) {
|
||||
logger.info("Done: {}", address);
|
||||
return;
|
||||
}
|
||||
|
||||
final URI uri = maybeUri.get();
|
||||
logger.info("Request {}", uri);
|
||||
|
||||
if (!uri.getScheme().equals("gemini")) {
|
||||
throw new GeminiUserException("Unsupported protocol");
|
||||
}
|
||||
|
||||
servePage(connection, uri);
|
||||
logger.info("Done: {}", address);
|
||||
}
|
||||
|
||||
private void servePage(GeminiConnection connection, URI url) throws IOException {
|
||||
String path = url.getPath();
|
||||
|
||||
for (Plugin p : plugins) {
|
||||
if (p.serve(url, connection)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
logger.error("FileNotFound {}", path);
|
||||
connection.writeStatusLine(GeminiStatusCode.ERROR_TEMPORARY, "No such file");
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -1,130 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.client;
|
||||
|
||||
import javax.net.ssl.SSLContext;
|
||||
import javax.net.ssl.SSLSocketFactory;
|
||||
import javax.net.ssl.TrustManager;
|
||||
import javax.net.ssl.X509TrustManager;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.URI;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.security.cert.X509Certificate;
|
||||
|
||||
/** Unstable code! */
|
||||
public class GeminiClient {
|
||||
|
||||
private final SSLSocketFactory socketFactory;
|
||||
|
||||
// Create a trust manager that does not validate anything
|
||||
public static final TrustManager[] trustAllCerts = new TrustManager[]{
|
||||
new X509TrustManager() {
|
||||
@Override
|
||||
public void checkClientTrusted(X509Certificate[] chain,
|
||||
String authType) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkServerTrusted(X509Certificate[] chain,
|
||||
String authType) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public X509Certificate[] getAcceptedIssuers() {
|
||||
return new X509Certificate[0];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
public static SSLSocketFactory buildSocketFactory() throws Exception {
|
||||
// Install the all-trusting trust manager
|
||||
final SSLContext sslContext = SSLContext.getInstance("SSL");
|
||||
sslContext.init(null, trustAllCerts, new java.security.SecureRandom());
|
||||
|
||||
return sslContext.getSocketFactory();
|
||||
}
|
||||
|
||||
public GeminiClient() throws Exception {
|
||||
socketFactory = buildSocketFactory();
|
||||
}
|
||||
|
||||
public Response get(URI uri) throws IOException {
|
||||
|
||||
final int port = uri.getPort() == -1 ? 1965 : uri.getPort();
|
||||
final String host = uri.getHost();
|
||||
var requestString = String.format("%s\r\n", uri).getBytes(StandardCharsets.UTF_8);
|
||||
|
||||
try (var socket = socketFactory.createSocket(host, port)) {
|
||||
socket.setSoTimeout(10_000);
|
||||
socket.getOutputStream().write(requestString);
|
||||
|
||||
var is = socket.getInputStream();
|
||||
String statusLine = new GeminiInput(is).get();
|
||||
|
||||
int code = Integer.parseInt(statusLine.substring(0,2));
|
||||
String meta = statusLine.substring(3);
|
||||
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
is.transferTo(baos);
|
||||
|
||||
return new Response(code, meta, baos.toByteArray());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static class Response {
|
||||
public final int code;
|
||||
public final String meta;
|
||||
public final byte[] data;
|
||||
|
||||
Response(int code, String meta, byte[] data) {
|
||||
this.code = code;
|
||||
this.meta = meta;
|
||||
this.data = data;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static class GeminiInput {
|
||||
private final InputStream is;
|
||||
private final byte[] buffer = new byte[1024];
|
||||
private int idx;
|
||||
|
||||
final String result;
|
||||
|
||||
public GeminiInput(InputStream is) throws IOException {
|
||||
this.is = is;
|
||||
|
||||
for (idx = 0; idx < buffer.length; idx++) {
|
||||
if (hasEndOfLine()) {
|
||||
result = new String(buffer, 0, idx-2, StandardCharsets.UTF_8);
|
||||
return;
|
||||
}
|
||||
|
||||
readCharacter();
|
||||
}
|
||||
|
||||
throw new RuntimeException("String too long");
|
||||
}
|
||||
|
||||
public String get() {
|
||||
return result;
|
||||
}
|
||||
|
||||
private void readCharacter() throws IOException {
|
||||
int rb = is.read();
|
||||
if (-1 == rb) {
|
||||
throw new RuntimeException("URL incomplete (no CR LF)");
|
||||
}
|
||||
buffer[idx] = (byte) rb;
|
||||
}
|
||||
|
||||
public boolean hasEndOfLine() {
|
||||
return idx > 2
|
||||
&& buffer[idx - 1] == (byte) '\n'
|
||||
&& buffer[idx - 2] == (byte) '\r';
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@ -1,53 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi;
|
||||
|
||||
import lombok.Getter;
|
||||
import nu.marginalia.memex.gemini.gmi.line.AbstractGemtextLine;
|
||||
import nu.marginalia.memex.gemini.gmi.parser.GemtextParser;
|
||||
import nu.marginalia.memex.gemini.gmi.renderer.GemtextRenderer;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeUrl;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Writer;
|
||||
import java.util.Arrays;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
|
||||
@Getter
|
||||
public class Gemtext {
|
||||
private final AbstractGemtextLine[] lines;
|
||||
private final MemexNodeUrl url;
|
||||
|
||||
public Gemtext(MemexNodeUrl url, String[] lines, MemexNodeHeadingId headingRoot) {
|
||||
this.lines = GemtextParser.parse(lines, headingRoot);
|
||||
this.url = url;
|
||||
}
|
||||
public Gemtext(MemexNodeUrl url, String[] lines) {
|
||||
this.lines = GemtextParser.parse(lines, new MemexNodeHeadingId(0));
|
||||
this.url = url;
|
||||
}
|
||||
|
||||
public String render(GemtextRenderer renderer) {
|
||||
return Arrays.stream(lines).map(renderer::renderLine).collect(Collectors.joining());
|
||||
}
|
||||
|
||||
public void render(GemtextRenderer renderer, Writer w) throws IOException {
|
||||
for (var line : lines) {
|
||||
w.write(renderer.renderLine(line));
|
||||
w.write('\n');
|
||||
}
|
||||
}
|
||||
|
||||
public Stream<AbstractGemtextLine> stream() {
|
||||
return Arrays.stream(lines);
|
||||
}
|
||||
|
||||
public AbstractGemtextLine get(int idx) {
|
||||
return lines[idx];
|
||||
}
|
||||
public int size() {
|
||||
return lines.length;
|
||||
}
|
||||
|
||||
}
|
@ -1,71 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import nu.marginalia.memex.gemini.gmi.line.GemtextLineVisitorAdapter;
|
||||
import nu.marginalia.memex.gemini.gmi.line.GemtextLink;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeUrl;
|
||||
import nu.marginalia.memex.memex.model.MemexUrl;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.*;
|
||||
|
||||
public class GemtextDatabase extends Gemtext {
|
||||
public final Map<String, Integer> links;
|
||||
|
||||
public GemtextDatabase(MemexNodeUrl url, String[] lines) {
|
||||
super(url, lines);
|
||||
|
||||
links = new HashMap<>();
|
||||
for (int i = 0; i < size(); i++) {
|
||||
int linkIdx = i;
|
||||
|
||||
get(i).visit(new GemtextLineVisitorAdapter<>() {
|
||||
@Override
|
||||
public Object visit(GemtextLink g) {
|
||||
links.put(g.getUrl().toString(), linkIdx);
|
||||
return null;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public Set<String> keys() {
|
||||
return links.keySet();
|
||||
}
|
||||
|
||||
public Optional<String> getLinkData(MemexUrl url) {
|
||||
Integer idx = links.get(url.getUrl());
|
||||
if (idx != null) {
|
||||
return
|
||||
Optional.of(get(idx).mapLink(GemtextLink::getTitle).orElse(""));
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
|
||||
public static GemtextDatabase of(MemexNodeUrl url, String[] lines) {
|
||||
return new GemtextDatabase(url, lines);
|
||||
}
|
||||
|
||||
public static GemtextDatabase of(MemexNodeUrl url, Path file) throws IOException {
|
||||
try (var s = Files.lines(file)) {
|
||||
return new GemtextDatabase(url, s.toArray(String[]::new));
|
||||
}
|
||||
}
|
||||
|
||||
public Set<MemexNodeUrl> difference(GemtextDatabase other) {
|
||||
Set<MemexNodeUrl> differences = new HashSet<>();
|
||||
|
||||
Sets.difference(keys(), other.keys()).stream().map(MemexNodeUrl::new).forEach(differences::add);
|
||||
|
||||
Sets.intersection(keys(), other.keys())
|
||||
.stream()
|
||||
.map(MemexNodeUrl::new)
|
||||
.filter(url -> !Objects.equals(getLinkData(url), other.getLinkData(url)))
|
||||
.forEach(differences::add);
|
||||
|
||||
return differences;
|
||||
}
|
||||
}
|
@ -1,163 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi;
|
||||
|
||||
import lombok.Getter;
|
||||
import nu.marginalia.memex.gemini.gmi.renderer.GemtextRenderer;
|
||||
import nu.marginalia.memex.gemini.gmi.renderer.GemtextRendererFactory;
|
||||
import nu.marginalia.memex.gemini.gmi.line.*;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeTaskId;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeUrl;
|
||||
import nu.marginalia.memex.memex.model.MemexTaskState;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Getter
|
||||
public class GemtextDocument extends Gemtext {
|
||||
private final Map<MemexNodeHeadingId, String> headings;
|
||||
private final Map<String, List<MemexNodeHeadingId>> headingsByName;
|
||||
private final Set<String> pragmas;
|
||||
private final List<GemtextTask> tasks;
|
||||
|
||||
private final String title;
|
||||
private final String date;
|
||||
private final List<GemtextLink> links;
|
||||
private final int hashCode;
|
||||
|
||||
private static final Pattern datePattern = Pattern.compile(".*(\\d{4}-\\d{2}-\\d{2}).*");
|
||||
private static final GemtextRenderer rawRenderer = new GemtextRendererFactory().gemtextRendererAsIs();
|
||||
|
||||
public GemtextDocument(MemexNodeUrl url, String[] lines, MemexNodeHeadingId headingRoot) {
|
||||
super(url, lines, headingRoot);
|
||||
|
||||
this.hashCode = Arrays.hashCode(lines);
|
||||
|
||||
GemtextDataExtractor extractor = new GemtextDataExtractor();
|
||||
|
||||
Arrays.stream(this.getLines()).forEach(extractor::take);
|
||||
|
||||
this.headings = extractor.getHeadings();
|
||||
this.links = extractor.getLinks();
|
||||
this.title = Objects.requireNonNullElse(extractor.getTitle(), url.getUrl());
|
||||
this.pragmas = extractor.getPragmas();
|
||||
this.headingsByName = extractor.getHeadingsByName();
|
||||
this.tasks = extractor.getTasks();
|
||||
this.date = extractor.getDate();
|
||||
}
|
||||
|
||||
public String getHeadingForElement(AbstractGemtextLine line) {
|
||||
return headings.getOrDefault(line.getHeading(), "");
|
||||
}
|
||||
|
||||
public List<AbstractGemtextLine> getSection(MemexNodeHeadingId headingId) {
|
||||
return stream()
|
||||
.filter(line -> line.getHeading().isChildOf(headingId))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public String getSectionGemtext(MemexNodeHeadingId headingId) {
|
||||
if (headingId.equals(new MemexNodeHeadingId(0))) {
|
||||
return stream()
|
||||
.map(rawRenderer::renderLine)
|
||||
.collect(Collectors.joining("\n"));
|
||||
}
|
||||
|
||||
return stream()
|
||||
.filter(line -> line.getHeading().isChildOf(headingId))
|
||||
.map(rawRenderer::renderLine)
|
||||
.collect(Collectors.joining("\n"));
|
||||
}
|
||||
|
||||
public Map<MemexNodeTaskId, Pair<String, MemexTaskState>> getOpenTopTasks() {
|
||||
return tasks.stream()
|
||||
.filter(task -> MemexTaskState.TODO.equals(task.getState())
|
||||
|| MemexTaskState.URGENT.equals(task.getState()))
|
||||
.filter(task -> task.getId().level() == 1)
|
||||
.collect(Collectors.toMap(GemtextTask::getId, task -> Pair.of(task.getTask(), task.getState())));
|
||||
}
|
||||
|
||||
public static GemtextDocument of(MemexNodeUrl url, String... lines) {
|
||||
return new GemtextDocument(url, lines, new MemexNodeHeadingId(0));
|
||||
}
|
||||
|
||||
public static GemtextDocument of(MemexNodeUrl url, Path file) throws IOException {
|
||||
try (var s = Files.lines(file)) {
|
||||
return new GemtextDocument(url, s.toArray(String[]::new), new MemexNodeHeadingId(0));
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isIndex() {
|
||||
return getUrl().getFilename().equals("index.gmi");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return hashCode;
|
||||
}
|
||||
|
||||
public Optional<String> getHeading(MemexNodeHeadingId heading) {
|
||||
return Optional.ofNullable(headings.get(heading));
|
||||
}
|
||||
|
||||
public Optional<MemexNodeHeadingId> getHeadingByName(MemexNodeHeadingId parent, String name) {
|
||||
var headings = headingsByName.get(name);
|
||||
if (null == headings) {
|
||||
return Optional.empty();
|
||||
}
|
||||
return headings.stream().filter(heading -> heading.isChildOf(parent)).findAny();
|
||||
}
|
||||
|
||||
@Getter
|
||||
private static class GemtextDataExtractor extends GemtextLineVisitorAdapter<Object> {
|
||||
|
||||
private String title;
|
||||
private String date;
|
||||
private final Map<MemexNodeHeadingId, String> headings = new TreeMap<>((a, b) -> Arrays.compare(a.getIds(), b.getIds()));
|
||||
private final Map<String, List<MemexNodeHeadingId>> headingsByName = new HashMap<>();
|
||||
private final Set<String> pragmas = new HashSet<>();
|
||||
private final List<GemtextLink> links = new ArrayList<>();
|
||||
private final List<GemtextTask> tasks = new ArrayList<>();
|
||||
|
||||
@Override
|
||||
public Object visit(GemtextHeading g) {
|
||||
headings.put(g.getLevel(), g.getName());
|
||||
headingsByName.computeIfAbsent(g.getName(), t -> new ArrayList<>()).add(g.getLevel());
|
||||
|
||||
if (title == null) {
|
||||
title = g.getName();
|
||||
var dateMatcher = datePattern.matcher(title);
|
||||
if (dateMatcher.matches()) {
|
||||
date = dateMatcher.group(1);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object visit(GemtextLink g) {
|
||||
links.add(g);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object visit(GemtextTask g) {
|
||||
tasks.add(g);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object visit(GemtextPragma g) {
|
||||
pragmas.add(g.getLine());
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.line;
|
||||
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
|
||||
import java.util.Optional;
|
||||
import java.util.function.Function;
|
||||
|
||||
public abstract class AbstractGemtextLine {
|
||||
public <T> Optional<T> mapLink(Function<GemtextLink, T> mapper) {
|
||||
return Optional.empty();
|
||||
}
|
||||
public <T> Optional<T> mapHeading(Function<GemtextHeading, T> mapper) { return Optional.empty(); }
|
||||
public <T> Optional<T> mapTask(Function<GemtextTask, T> mapper) { return Optional.empty(); }
|
||||
public abstract <T> T visit(GemtextLineVisitor<T> visitor);
|
||||
|
||||
public abstract boolean breaksTask();
|
||||
public abstract MemexNodeHeadingId getHeading();
|
||||
}
|
@ -1,21 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.line;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.ToString;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
|
||||
@AllArgsConstructor @Getter @ToString
|
||||
public class GemtextAside extends AbstractGemtextLine {
|
||||
private final String line;
|
||||
private final MemexNodeHeadingId heading;
|
||||
|
||||
@Override
|
||||
public <T> T visit(GemtextLineVisitor<T> visitor) {
|
||||
return visitor.visit(this);
|
||||
}
|
||||
|
||||
public boolean breaksTask() {
|
||||
return false;
|
||||
}
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.line;
|
||||
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.ToString;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
|
||||
import java.util.Optional;
|
||||
import java.util.function.Function;
|
||||
|
||||
@AllArgsConstructor
|
||||
@Getter
|
||||
@ToString
|
||||
public class GemtextHeading extends AbstractGemtextLine {
|
||||
private final MemexNodeHeadingId level;
|
||||
private final String name;
|
||||
private final MemexNodeHeadingId heading;
|
||||
|
||||
public <T> Optional<T> mapHeading(Function<GemtextHeading, T> mapper) {
|
||||
return Optional.of(mapper.apply(this));
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> T visit(GemtextLineVisitor<T> visitor) {
|
||||
return visitor.visit(this);
|
||||
}
|
||||
|
||||
public boolean breaksTask() {
|
||||
return true;
|
||||
}
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.line;
|
||||
|
||||
public interface GemtextLineVisitor<T> {
|
||||
default T take(AbstractGemtextLine line) {
|
||||
return line.visit(this);
|
||||
}
|
||||
|
||||
T visit(GemtextHeading g);
|
||||
T visit(GemtextLink g);
|
||||
T visit(GemtextList g);
|
||||
T visit(GemtextPreformat g);
|
||||
T visit(GemtextQuote g);
|
||||
T visit(GemtextText g);
|
||||
T visit(GemtextTextLiteral g);
|
||||
T visit(GemtextAside g);
|
||||
T visit(GemtextTask g);
|
||||
T visit(GemtextPragma g);
|
||||
}
|
@ -1,53 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.line;
|
||||
|
||||
public class GemtextLineVisitorAdapter<T> implements GemtextLineVisitor<T> {
|
||||
@Override
|
||||
public T visit(GemtextHeading g) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public T visit(GemtextLink g) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public T visit(GemtextList g) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public T visit(GemtextPreformat g) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public T visit(GemtextQuote g) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public T visit(GemtextText g) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public T visit(GemtextTextLiteral g) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public T visit(GemtextAside g) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public T visit(GemtextTask g) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public T visit(GemtextPragma g) {
|
||||
return null;
|
||||
}
|
||||
}
|
@ -1,33 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.line;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.ToString;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
import nu.marginalia.memex.memex.model.MemexUrl;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Function;
|
||||
|
||||
@AllArgsConstructor @Getter @ToString
|
||||
public class GemtextLink extends AbstractGemtextLine {
|
||||
private final MemexUrl url;
|
||||
|
||||
@Nullable
|
||||
private final String title;
|
||||
private final MemexNodeHeadingId heading;
|
||||
|
||||
public <T> Optional<T> mapLink(Function<GemtextLink, T> mapper) {
|
||||
return Optional.ofNullable(mapper.apply(this));
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> T visit(GemtextLineVisitor<T> visitor) {
|
||||
return visitor.visit(this);
|
||||
}
|
||||
|
||||
public boolean breaksTask() {
|
||||
return false;
|
||||
}
|
||||
}
|
@ -1,23 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.line;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.ToString;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@AllArgsConstructor @Getter @ToString
|
||||
public class GemtextList extends AbstractGemtextLine {
|
||||
private final List<String> items;
|
||||
private final MemexNodeHeadingId heading;
|
||||
|
||||
@Override
|
||||
public <T> T visit(GemtextLineVisitor<T> visitor) {
|
||||
return visitor.visit(this);
|
||||
}
|
||||
|
||||
public boolean breaksTask() {
|
||||
return true;
|
||||
}
|
||||
}
|
@ -1,21 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.line;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.ToString;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
|
||||
@AllArgsConstructor @Getter @ToString
|
||||
public class GemtextPragma extends AbstractGemtextLine {
|
||||
private final String line;
|
||||
private final MemexNodeHeadingId heading;
|
||||
|
||||
@Override
|
||||
public <T> T visit(GemtextLineVisitor<T> visitor) {
|
||||
return visitor.visit(this);
|
||||
}
|
||||
|
||||
public boolean breaksTask() {
|
||||
return false;
|
||||
}
|
||||
}
|
@ -1,23 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.line;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.ToString;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@AllArgsConstructor @Getter @ToString
|
||||
public class GemtextPreformat extends AbstractGemtextLine {
|
||||
private final List<String> items;
|
||||
private final MemexNodeHeadingId heading;
|
||||
|
||||
@Override
|
||||
public <T> T visit(GemtextLineVisitor<T> visitor) {
|
||||
return visitor.visit(this);
|
||||
}
|
||||
|
||||
public boolean breaksTask() {
|
||||
return true;
|
||||
}
|
||||
}
|
@ -1,23 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.line;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.ToString;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@AllArgsConstructor @Getter @ToString
|
||||
public class GemtextQuote extends AbstractGemtextLine {
|
||||
private final List<String> items;
|
||||
private final MemexNodeHeadingId heading;
|
||||
|
||||
@Override
|
||||
public <T> T visit(GemtextLineVisitor<T> visitor) {
|
||||
return visitor.visit(this);
|
||||
}
|
||||
|
||||
public boolean breaksTask() {
|
||||
return true;
|
||||
}
|
||||
}
|
@ -1,42 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.line;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.ToString;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeTaskId;
|
||||
import nu.marginalia.memex.memex.model.MemexTaskState;
|
||||
import nu.marginalia.memex.memex.model.MemexTaskTags;
|
||||
|
||||
import java.util.Optional;
|
||||
import java.util.function.Function;
|
||||
|
||||
@AllArgsConstructor @Getter @ToString
|
||||
public class GemtextTask extends AbstractGemtextLine {
|
||||
private final MemexNodeTaskId id;
|
||||
private final String task;
|
||||
private final MemexNodeHeadingId heading;
|
||||
private final MemexTaskTags tags;
|
||||
|
||||
public MemexTaskState getState() {
|
||||
return MemexTaskState.of(tags);
|
||||
}
|
||||
|
||||
public int getLevel() {
|
||||
return id.level();
|
||||
}
|
||||
@Override
|
||||
public <T> T visit(GemtextLineVisitor<T> visitor) {
|
||||
return visitor.visit(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean breaksTask() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> Optional<T> mapTask(Function<GemtextTask, T> mapper) {
|
||||
return Optional.of(mapper.apply(this));
|
||||
}
|
||||
}
|
@ -1,21 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.line;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.ToString;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
|
||||
@AllArgsConstructor @Getter @ToString
|
||||
public class GemtextText extends AbstractGemtextLine {
|
||||
private final String line;
|
||||
private final MemexNodeHeadingId heading;
|
||||
|
||||
@Override
|
||||
public <T> T visit(GemtextLineVisitor<T> visitor) {
|
||||
return visitor.visit(this);
|
||||
}
|
||||
|
||||
public boolean breaksTask() {
|
||||
return !line.isBlank();
|
||||
}
|
||||
}
|
@ -1,23 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.line;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.ToString;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@AllArgsConstructor @Getter @ToString
|
||||
public class GemtextTextLiteral extends AbstractGemtextLine {
|
||||
private final List<String> items;
|
||||
private final MemexNodeHeadingId heading;
|
||||
|
||||
@Override
|
||||
public <T> T visit(GemtextLineVisitor<T> visitor) {
|
||||
return visitor.visit(this);
|
||||
}
|
||||
|
||||
public boolean breaksTask() {
|
||||
return false;
|
||||
}
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.parser;
|
||||
|
||||
import nu.marginalia.memex.gemini.gmi.line.GemtextAside;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class GemtextAsideParser {
|
||||
private static final Pattern listItemPattern = Pattern.compile("^\\((.*)\\)$");
|
||||
|
||||
public static GemtextAside parse(String s, MemexNodeHeadingId heading) {
|
||||
var matcher = listItemPattern.matcher(s);
|
||||
|
||||
if (!matcher.matches()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return new GemtextAside(matcher.group(1), heading);
|
||||
}
|
||||
}
|
@ -1,26 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.parser;
|
||||
|
||||
import nu.marginalia.memex.gemini.gmi.line.AbstractGemtextLine;
|
||||
import nu.marginalia.memex.gemini.gmi.line.GemtextHeading;
|
||||
import nu.marginalia.memex.gemini.gmi.line.GemtextText;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class GemtextHeadingParser {
|
||||
private static final Pattern headingPattern = Pattern.compile("^(#+)\\s*([^#].*|$)$");
|
||||
|
||||
public static AbstractGemtextLine parse(String s, MemexNodeHeadingId heading) {
|
||||
var matcher = headingPattern.matcher(s);
|
||||
|
||||
if (!matcher.matches()) {
|
||||
return new GemtextText(s, heading);
|
||||
}
|
||||
|
||||
int level = matcher.group(1).length() - 1;
|
||||
var newHeading = heading.next(level);
|
||||
|
||||
return new GemtextHeading(newHeading, matcher.group(2), newHeading);
|
||||
}
|
||||
|
||||
}
|
@ -1,42 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.parser;
|
||||
|
||||
import nu.marginalia.memex.gemini.gmi.line.AbstractGemtextLine;
|
||||
import nu.marginalia.memex.gemini.gmi.line.GemtextLink;
|
||||
import nu.marginalia.memex.gemini.gmi.line.GemtextText;
|
||||
import nu.marginalia.memex.memex.model.MemexExternalUrl;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeUrl;
|
||||
import nu.marginalia.memex.memex.model.MemexUrl;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class GemtextLinkParser {
|
||||
private static final Pattern linkPattern = Pattern.compile("^=>\\s?([^\\s]+)\\s*(.+)?$");
|
||||
|
||||
@Nullable
|
||||
public static AbstractGemtextLine parse(String s, MemexNodeHeadingId heading) {
|
||||
var matcher = linkPattern.matcher(s);
|
||||
|
||||
if (!matcher.matches()) {
|
||||
return new GemtextText(s, heading);
|
||||
}
|
||||
if (matcher.groupCount() == 2) {
|
||||
return new GemtextLink(toMemexUrl(matcher.group(1)), matcher.group(2), heading);
|
||||
}
|
||||
else {
|
||||
return new GemtextLink(toMemexUrl(matcher.group(1)), null, heading);
|
||||
}
|
||||
}
|
||||
|
||||
private static MemexUrl toMemexUrl(String url) {
|
||||
if (url.startsWith("/")) {
|
||||
return new MemexNodeUrl(url);
|
||||
}
|
||||
else {
|
||||
return new MemexExternalUrl(url);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -1,17 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.parser;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class GemtextListParser {
|
||||
private static final Pattern listItemPattern = Pattern.compile("^\\*\\s?(.+)$");
|
||||
|
||||
public static String parse(String s) {
|
||||
var matcher = listItemPattern.matcher(s);
|
||||
|
||||
if (!matcher.matches()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return matcher.group(1);
|
||||
}
|
||||
}
|
@ -1,135 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.parser;
|
||||
|
||||
import nu.marginalia.memex.gemini.gmi.line.*;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeTaskId;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class GemtextParser {
|
||||
|
||||
private static final String PREFORMAT_MARKER = "```";
|
||||
private static final String LITERAL_MARKER = " ";
|
||||
private static final String LINK_MARKER = "=>";
|
||||
private static final String HEADING_MARKER = "#";
|
||||
private static final String LIST_MARKER = "*";
|
||||
private static final String QUOTE_MARKER = ">";
|
||||
private static final String ASIDE_MARKER = "(";
|
||||
private static final String TASK_MARKER = "-";
|
||||
private static final String PRAGMA_MARKER = "%%%";
|
||||
|
||||
public static AbstractGemtextLine[] parse(String[] lines, MemexNodeHeadingId headingRoot) {
|
||||
List<AbstractGemtextLine> items = new ArrayList<>();
|
||||
MemexNodeHeadingId heading = headingRoot;
|
||||
MemexNodeTaskId task = new MemexNodeTaskId(0);
|
||||
|
||||
Set<String> pragmas = new HashSet<>();
|
||||
|
||||
for (int i = 0; i < lines.length; i++) {
|
||||
String line = lines[i];
|
||||
|
||||
if (line.startsWith(PREFORMAT_MARKER)) {
|
||||
i = getBlockQuote(items, lines, heading, i);
|
||||
}
|
||||
else if (line.startsWith(PRAGMA_MARKER)) {
|
||||
var pragma = GemtextPragmaParser.parse(line, heading);
|
||||
|
||||
if (pragma instanceof GemtextPragma) {
|
||||
GemtextPragma gtp = (GemtextPragma) pragma;
|
||||
pragmas.add(gtp.getLine());
|
||||
}
|
||||
|
||||
items.add(pragma);
|
||||
|
||||
}
|
||||
else if (line.startsWith(LINK_MARKER)) {
|
||||
var link = GemtextLinkParser.parse(line, heading);
|
||||
items.add(link);
|
||||
}
|
||||
else if (line.startsWith(HEADING_MARKER)) {
|
||||
var tag = GemtextHeadingParser.parse(line, heading);
|
||||
|
||||
heading = tag.mapHeading(GemtextHeading::getHeading).orElse(heading);
|
||||
|
||||
items.add(tag);
|
||||
}
|
||||
else if (line.startsWith(LIST_MARKER)) {
|
||||
i = getList(items, lines, heading, i);
|
||||
}
|
||||
else if (line.startsWith(LITERAL_MARKER)) {
|
||||
i = getLitteral(items, lines, heading, i);
|
||||
}
|
||||
else if (pragmas.contains("TASKS")
|
||||
&& line.startsWith(TASK_MARKER))
|
||||
{
|
||||
var tag = GemtextTaskParser.parse(line, heading, task);
|
||||
|
||||
task = tag.mapTask(GemtextTask::getId).orElse(task);
|
||||
|
||||
items.add(tag);
|
||||
}
|
||||
else if (line.startsWith(QUOTE_MARKER)) {
|
||||
i = getQuote(items, lines, heading, i);
|
||||
}
|
||||
else if (line.startsWith(ASIDE_MARKER)) {
|
||||
var aside = GemtextAsideParser.parse(line, heading);
|
||||
items.add(Objects.requireNonNullElse(aside, new GemtextText(line, heading)));
|
||||
}
|
||||
else {
|
||||
items.add(new GemtextText(line, heading));
|
||||
}
|
||||
}
|
||||
return items.toArray(AbstractGemtextLine[]::new);
|
||||
}
|
||||
|
||||
private static int getBlockQuote(List<AbstractGemtextLine> items, String[] lines, MemexNodeHeadingId heading, int i) {
|
||||
int j = i+1;
|
||||
List<String> quotedLines = new ArrayList<>();
|
||||
for (;j < lines.length; j++) {
|
||||
if (lines[j].startsWith(PREFORMAT_MARKER)) {
|
||||
break;
|
||||
}
|
||||
quotedLines.add(lines[j]);
|
||||
}
|
||||
items.add(new GemtextPreformat(quotedLines, heading));
|
||||
return j;
|
||||
}
|
||||
|
||||
private static int getList(List<AbstractGemtextLine> items, String[] lines, MemexNodeHeadingId heading, int i) {
|
||||
int j = i;
|
||||
List<String> listLines = new ArrayList<>();
|
||||
for (;j < lines.length; j++) {
|
||||
if (!lines[j].startsWith(LIST_MARKER)) {
|
||||
break;
|
||||
}
|
||||
listLines.add(GemtextListParser.parse(lines[j]));
|
||||
}
|
||||
items.add(new GemtextList(listLines, heading));
|
||||
return j-1;
|
||||
}
|
||||
private static int getLitteral(List<AbstractGemtextLine> items, String[] lines, MemexNodeHeadingId heading, int i) {
|
||||
int j = i;
|
||||
List<String> listLines = new ArrayList<>();
|
||||
for (;j < lines.length; j++) {
|
||||
if (!lines[j].startsWith(LITERAL_MARKER)) {
|
||||
break;
|
||||
}
|
||||
listLines.add(lines[j]);
|
||||
}
|
||||
items.add(new GemtextTextLiteral(listLines, heading));
|
||||
return j-1;
|
||||
}
|
||||
|
||||
private static int getQuote(List<AbstractGemtextLine> items, String[] lines, MemexNodeHeadingId heading, int i) {
|
||||
int j = i;
|
||||
List<String> listLines = new ArrayList<>();
|
||||
for (;j < lines.length; j++) {
|
||||
if (!lines[j].startsWith(QUOTE_MARKER)) {
|
||||
break;
|
||||
}
|
||||
listLines.add(GemtextQuoteParser.parse(lines[j]));
|
||||
}
|
||||
items.add(new GemtextQuote(listLines, heading));
|
||||
return j-1;
|
||||
}
|
||||
}
|
@ -1,26 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.parser;
|
||||
|
||||
import nu.marginalia.memex.gemini.gmi.line.AbstractGemtextLine;
|
||||
import nu.marginalia.memex.gemini.gmi.line.GemtextPragma;
|
||||
import nu.marginalia.memex.gemini.gmi.line.GemtextText;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class GemtextPragmaParser {
|
||||
private static final Pattern pragmaPattern = Pattern.compile("^%%%\\s*(.*|$)$");
|
||||
|
||||
public static AbstractGemtextLine parse(String s, MemexNodeHeadingId heading) {
|
||||
var matcher = pragmaPattern.matcher(s);
|
||||
|
||||
if (!matcher.matches()) {
|
||||
return new GemtextText(s, heading);
|
||||
}
|
||||
|
||||
String task = matcher.group(1);
|
||||
|
||||
return new GemtextPragma(task, heading);
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -1,17 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.parser;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class GemtextQuoteParser {
|
||||
private static final Pattern listItemPattern = Pattern.compile("^>(.+)$");
|
||||
|
||||
public static String parse(String s) {
|
||||
var matcher = listItemPattern.matcher(s);
|
||||
|
||||
if (!matcher.matches()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return matcher.group(1);
|
||||
}
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.parser;
|
||||
|
||||
import nu.marginalia.memex.gemini.gmi.line.AbstractGemtextLine;
|
||||
import nu.marginalia.memex.gemini.gmi.line.GemtextTask;
|
||||
import nu.marginalia.memex.gemini.gmi.line.GemtextText;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeTaskId;
|
||||
import nu.marginalia.memex.memex.model.MemexTaskTags;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class GemtextTaskParser {
|
||||
private static final Pattern taskPattern = Pattern.compile("^(-+)\\s*([^-].*|$)$");
|
||||
|
||||
public static AbstractGemtextLine parse(String s, MemexNodeHeadingId heading,
|
||||
MemexNodeTaskId taskId) {
|
||||
var matcher = taskPattern.matcher(s);
|
||||
|
||||
if (!matcher.matches()) {
|
||||
return new GemtextText(s, heading);
|
||||
}
|
||||
|
||||
int level = matcher.group(1).length() - 1;
|
||||
|
||||
String task = matcher.group(2);
|
||||
|
||||
return new GemtextTask(taskId.next(level), task, heading, new MemexTaskTags(task));
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -1,91 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.renderer;
|
||||
|
||||
import nu.marginalia.memex.gemini.gmi.line.*;
|
||||
|
||||
import java.util.function.Function;
|
||||
|
||||
public class GemtextRenderer implements GemtextLineVisitor<String> {
|
||||
|
||||
private final Function<GemtextHeading, String> headingConverter;
|
||||
private final Function<GemtextLink, String> linkConverter;
|
||||
private final Function<GemtextList, String> listConverter;
|
||||
private final Function<GemtextPreformat, String> preformatConverter;
|
||||
private final Function<GemtextQuote, String> quoteConverter;
|
||||
private final Function<GemtextText, String> textConverter;
|
||||
private final Function<GemtextAside, String> asideConverter;
|
||||
private final Function<GemtextTask, String> taskConverter;
|
||||
private final Function<GemtextTextLiteral, String> literalConverter;
|
||||
private final Function<GemtextPragma, String> pragmaConverter;
|
||||
|
||||
public GemtextRenderer(Function<GemtextHeading, String> headingConverter,
|
||||
Function<GemtextLink, String> linkConverter,
|
||||
Function<GemtextList, String> listConverter,
|
||||
Function<GemtextPreformat, String> preformatConverter,
|
||||
Function<GemtextQuote, String> quoteConverter,
|
||||
Function<GemtextText, String> textConverter,
|
||||
Function<GemtextAside, String> asideConverter,
|
||||
Function<GemtextTask, String> taskConverter,
|
||||
Function<GemtextTextLiteral, String> literalConverter,
|
||||
Function<GemtextPragma, String> pragmaConverter
|
||||
) {
|
||||
this.headingConverter = headingConverter;
|
||||
this.linkConverter = linkConverter;
|
||||
this.listConverter = listConverter;
|
||||
this.preformatConverter = preformatConverter;
|
||||
this.quoteConverter = quoteConverter;
|
||||
this.textConverter = textConverter;
|
||||
this.asideConverter = asideConverter;
|
||||
this.taskConverter = taskConverter;
|
||||
this.literalConverter = literalConverter;
|
||||
this.pragmaConverter = pragmaConverter;
|
||||
}
|
||||
|
||||
|
||||
public String renderLine(AbstractGemtextLine line) {
|
||||
return line.visit(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(GemtextHeading g) {
|
||||
return headingConverter.apply(g);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(GemtextLink g) {
|
||||
return linkConverter.apply(g);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(GemtextList g) {
|
||||
return listConverter.apply(g);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(GemtextPreformat g) {
|
||||
return preformatConverter.apply(g);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(GemtextQuote g) {
|
||||
return quoteConverter.apply(g);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(GemtextText g) {
|
||||
return textConverter.apply(g);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(GemtextTextLiteral g) {
|
||||
return literalConverter.apply(g);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String visit(GemtextAside g) { return asideConverter.apply(g); }
|
||||
|
||||
@Override
|
||||
public String visit(GemtextTask g) { return taskConverter.apply(g); }
|
||||
|
||||
@Override
|
||||
public String visit(GemtextPragma g) { return pragmaConverter.apply(g); }
|
||||
}
|
@ -1,227 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.gmi.renderer;
|
||||
|
||||
import nu.marginalia.memex.gemini.gmi.line.*;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeUrl;
|
||||
import nu.marginalia.memex.memex.model.MemexUrl;
|
||||
import org.apache.logging.log4j.util.Strings;
|
||||
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class GemtextRendererFactory {
|
||||
|
||||
public final String urlBase;
|
||||
public final String docUrl;
|
||||
|
||||
public GemtextRendererFactory(String urlBase, String docUrl) {
|
||||
this.urlBase = Objects.requireNonNull(urlBase, "urlBase must not be null");
|
||||
this.docUrl = Objects.requireNonNull(docUrl, "docUrl must not be null");
|
||||
}
|
||||
|
||||
public GemtextRendererFactory(String urlBase) {
|
||||
this.urlBase = Objects.requireNonNull(urlBase, "urlBase must not be null");
|
||||
this.docUrl = null;
|
||||
}
|
||||
|
||||
public GemtextRendererFactory() {
|
||||
this.urlBase = null;
|
||||
this.docUrl = null;
|
||||
}
|
||||
|
||||
public GemtextRenderer htmlRendererEditable() {
|
||||
return new GemtextRenderer(this::htmlHeadingEditable,
|
||||
this::htmlLink, this::htmlList,
|
||||
this::htmlPre, this::htmlQuote,
|
||||
this::htmlText, this::htmlAside,
|
||||
this::htmlTask, this::htmlLiteral,
|
||||
this::htmlPragma);
|
||||
}
|
||||
|
||||
public GemtextRenderer htmlRendererReadOnly() {
|
||||
return new GemtextRenderer(this::htmlHeadingReadOnly,
|
||||
this::htmlLink, this::htmlList,
|
||||
this::htmlPre, this::htmlQuote,
|
||||
this::htmlText, this::htmlAside,
|
||||
this::htmlTask, this::htmlLiteral,
|
||||
this::htmlPragma);
|
||||
}
|
||||
|
||||
|
||||
public GemtextRenderer gemtextRendererAsIs() {
|
||||
return new GemtextRenderer(this::rawHeading,
|
||||
this::rawLink, this::rawList,
|
||||
this::rawPre, this::rawQuote,
|
||||
this::rawText, this::rawAside,
|
||||
this::rawTask, this::rawLiteral,
|
||||
this::rawPragma);
|
||||
}
|
||||
|
||||
|
||||
public GemtextRenderer gemtextRendererPublic() {
|
||||
return new GemtextRenderer(this::rawHeading,
|
||||
this::rawLink, this::rawList,
|
||||
this::rawPre, this::rawQuote,
|
||||
this::rawText, this::rawAside,
|
||||
this::rawTask, this::rawLiteral,
|
||||
this::rawSupressPragma);
|
||||
}
|
||||
|
||||
|
||||
private String htmlPragma(GemtextPragma gemtextPragma) {
|
||||
return "<!-- pragma: " + sanitizeText(gemtextPragma.getLine()) + " -->\n";
|
||||
}
|
||||
|
||||
public String htmlHeadingEditable(GemtextHeading g) {
|
||||
if (docUrl == null) {
|
||||
throw new UnsupportedOperationException("Wrong constructor used, need urlBase and docUrl");
|
||||
}
|
||||
// String editLink = String.format("\n<a class=\"utility\" href=\"%s/edit/%s\">Edit</a>\n", urlBase + docUrl, g.getLevel());
|
||||
|
||||
return htmlHeadingReadOnly(g);
|
||||
}
|
||||
|
||||
public String htmlHeadingReadOnly(GemtextHeading g) {
|
||||
if (g.getLevel().getLevel() == 1)
|
||||
return String.format("<h1 id=\"%s\">%s</h1>\n", g.getLevel(), sanitizeText(g.getName()));
|
||||
if (g.getLevel().getLevel() == 2)
|
||||
return String.format("<h2 id=\"%s\">%s</h2>\n", g.getLevel(), sanitizeText(g.getName()));
|
||||
if (g.getLevel().getLevel() == 3)
|
||||
return String.format("<h3 id=\"%s\">%s</h3>\n", g.getLevel(), sanitizeText(g.getName()));
|
||||
|
||||
return String.format("<h4 id=\"%s\">%s</h4>\n", g.getLevel(), sanitizeText(g.getName()));
|
||||
}
|
||||
|
||||
public String htmlLink(GemtextLink g) {
|
||||
if (urlBase == null) {
|
||||
throw new UnsupportedOperationException("Wrong constructor used, need urlBase");
|
||||
}
|
||||
final String linkClass = getLinkClass(g.getUrl());
|
||||
final String linkUrl = getLinkUrl(g.getUrl()).replaceFirst("^gemini://", "https://proxy.vulpes.one/gemini/");
|
||||
if (g.getTitle() != null) {
|
||||
return String.format("<dl class=\"link\"><dt><a class=\"%s\" href=\"%s\">%s</a></dt><dd>%s</dd></dl>\n",
|
||||
linkClass, linkUrl, g.getUrl(), sanitizeText(g.getTitle()));
|
||||
}
|
||||
else {
|
||||
return String.format("<a class=\"%s\" href=\"%s\">%s</a><br>\n",
|
||||
linkClass, linkUrl, g.getUrl());
|
||||
}
|
||||
}
|
||||
private String getLinkUrl(MemexUrl url) {
|
||||
if (url instanceof MemexNodeUrl || url.getUrl().startsWith("/")) {
|
||||
return urlBase + url;
|
||||
}
|
||||
return url.toString();
|
||||
}
|
||||
|
||||
private String getLinkClass(MemexUrl url) {
|
||||
if (url instanceof MemexNodeUrl) {
|
||||
return "internal";
|
||||
}
|
||||
return "external";
|
||||
}
|
||||
public String htmlList(GemtextList g) {
|
||||
return g.getItems()
|
||||
.stream()
|
||||
.map(s -> "<li>" + sanitizeText(s) + "</li>")
|
||||
.collect(
|
||||
Collectors.joining("\n", "<ul>\n", "</ul>\n"));
|
||||
}
|
||||
|
||||
public String htmlPre(GemtextPreformat g) {
|
||||
return g.getItems().stream()
|
||||
.map(this::sanitizeText)
|
||||
.collect(
|
||||
Collectors.joining("\n", "<pre>\n", "</pre>\n"));
|
||||
}
|
||||
|
||||
public String htmlLiteral(GemtextTextLiteral g) {
|
||||
return g.getItems().stream()
|
||||
.map(this::sanitizeText)
|
||||
.collect(
|
||||
Collectors.joining("\n", "<pre class=\"literal\">\n", "</pre>\n"));
|
||||
}
|
||||
public String htmlQuote(GemtextQuote g) {
|
||||
return g.getItems().stream()
|
||||
.map(this::sanitizeText)
|
||||
.collect(
|
||||
Collectors.joining("<br>\n", "<blockquote>\n", "</blockquote>\n"));
|
||||
|
||||
}
|
||||
public String htmlText(GemtextText g) {
|
||||
return sanitizeText(g.getLine()) + "<br>\n";
|
||||
}
|
||||
public String htmlAside(GemtextAside g) {
|
||||
return "<aside>" + sanitizeText(g.getLine()) + "</aside>\n";
|
||||
}
|
||||
|
||||
public String sanitizeText(String s) {
|
||||
return s.replaceAll("<", "<").replaceAll(">", ">");
|
||||
}
|
||||
|
||||
public String htmlTask(GemtextTask g) {
|
||||
return String.format("<a class=\"task-pointer\" name=\"t%s\"></a><div class=\"task %s\" id=\"%s\">%s %s</div>\n",
|
||||
g.getId(),
|
||||
g.getState().style,
|
||||
g.getId(),
|
||||
"-".repeat(g.getLevel()),
|
||||
g.getTask());
|
||||
}
|
||||
|
||||
public String rawHeading(GemtextHeading g) {
|
||||
if (g.getLevel().getLevel() == 1)
|
||||
return "# " + g.getName();
|
||||
if (g.getLevel().getLevel() == 2)
|
||||
return "## " + g.getName();
|
||||
if (g.getLevel().getLevel() == 3)
|
||||
return "### " + g.getName();
|
||||
|
||||
return "### " + g.getName();
|
||||
}
|
||||
|
||||
public String rawLink(GemtextLink g) {
|
||||
if (g.getTitle() != null && !g.getTitle().isBlank()) {
|
||||
return "=> " + g.getUrl().getUrl() + "\t" + g.getTitle();
|
||||
}
|
||||
return "=> " + g.getUrl().getUrl();
|
||||
}
|
||||
|
||||
public String rawList(GemtextList g) {
|
||||
return g.getItems()
|
||||
.stream()
|
||||
.map(s -> "* " + s)
|
||||
.collect(Collectors.joining("\n"));
|
||||
}
|
||||
|
||||
public String rawPre(GemtextPreformat g) {
|
||||
return g.getItems().stream()
|
||||
.collect(Collectors.joining("\n", "```\n", "\n```"));
|
||||
}
|
||||
|
||||
public String rawQuote(GemtextQuote g) {
|
||||
return g.getItems().stream()
|
||||
.map(s -> "> " + s)
|
||||
.collect(Collectors.joining());
|
||||
|
||||
}
|
||||
|
||||
public String rawText(GemtextText g) {
|
||||
return g.getLine();
|
||||
}
|
||||
|
||||
public String rawLiteral(GemtextTextLiteral g) {
|
||||
return Strings.join(g.getItems(), '\n');
|
||||
}
|
||||
|
||||
public String rawAside(GemtextAside g) {
|
||||
return "(" + g.getLine() + ")";
|
||||
}
|
||||
public String rawTask(GemtextTask g) {
|
||||
return "-".repeat(Math.max(0, g.getLevel())) + " " + g.getTask();
|
||||
}
|
||||
private String rawPragma(GemtextPragma gemtextPragma) {
|
||||
return "%%% " + gemtextPragma.getLine();
|
||||
}
|
||||
private String rawSupressPragma(GemtextPragma gemtextPragma) {
|
||||
return "";
|
||||
}
|
||||
}
|
@ -1,185 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.io;
|
||||
|
||||
import nu.marginalia.memex.gemini.BadBotList;
|
||||
import nu.marginalia.memex.gemini.plugins.FileType;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.net.ssl.SSLSocket;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.net.URI;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public class GeminiConnection {
|
||||
private final SSLSocket connection;
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger("Server");
|
||||
private final OutputStream os;
|
||||
private final InputStream is;
|
||||
private static final BadBotList badBotList = BadBotList.INSTANCE;
|
||||
|
||||
public GeminiConnection(SSLSocket connection) throws IOException {
|
||||
this.connection = connection;
|
||||
|
||||
this.os = connection.getOutputStream();
|
||||
this.is = connection.getInputStream();
|
||||
|
||||
}
|
||||
|
||||
public String getAddress() {
|
||||
return connection.getInetAddress().getHostAddress();
|
||||
}
|
||||
|
||||
public Optional<URI> readUrl() throws Exception {
|
||||
|
||||
var str = new GeminiInput().get();
|
||||
if (!badBotList.isQueryPermitted(connection.getInetAddress(), str)) {
|
||||
return Optional.empty();
|
||||
}
|
||||
if (!str.isBlank()) {
|
||||
return Optional.of(new URI(str));
|
||||
}
|
||||
throw new GeminiUserException("Bad URI");
|
||||
}
|
||||
|
||||
public void redirect(String address) throws IOException {
|
||||
writeStatusLine(GeminiStatusCode.REDIRECT, address);
|
||||
}
|
||||
public void redirectPermanent(String address) throws IOException {
|
||||
writeStatusLine(GeminiStatusCode.REDIRECT_PERMANENT, address);
|
||||
}
|
||||
public GeminiConnection writeStatusLine(int code, String meta) throws IOException {
|
||||
write(String.format("%2d %s", code, meta));
|
||||
return this;
|
||||
}
|
||||
|
||||
public GeminiConnection writeBytes(byte[] data) throws IOException {
|
||||
write(data);
|
||||
return this;
|
||||
}
|
||||
|
||||
public GeminiConnection printf(String pattern, Object...args) throws IOException {
|
||||
write(String.format(pattern, args));
|
||||
return this;
|
||||
}
|
||||
|
||||
public GeminiConnection writeLines(String... lines) throws IOException {
|
||||
for (String s : lines) {
|
||||
write(s);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
public GeminiConnection writeLinesFromFile(Path file) throws IOException {
|
||||
try (Stream<String> lines = Files.lines(file)) {
|
||||
lines.forEach(line -> {
|
||||
try {
|
||||
write(line);
|
||||
} catch (IOException e) {
|
||||
logger.error("IO Error", e);
|
||||
}
|
||||
});
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public GeminiConnection acceptLines(Stream<String> lines) {
|
||||
lines.forEach(line -> {
|
||||
try {
|
||||
write(line);
|
||||
} catch (IOException e) {
|
||||
logger.error("IO exception", e);
|
||||
}
|
||||
});
|
||||
return this;
|
||||
}
|
||||
|
||||
private void write(String s) throws IOException {
|
||||
os.write(s.getBytes(StandardCharsets.UTF_8));
|
||||
os.write(new byte[] { '\r', '\n'});
|
||||
}
|
||||
|
||||
private void write(byte[] bs) throws IOException {
|
||||
os.write(bs);
|
||||
}
|
||||
// This is a weird pattern but it makes the listing code very much cleaner
|
||||
|
||||
public void error(String message) {
|
||||
logger.error("{}", message);
|
||||
|
||||
throw new GeminiUserException(message);
|
||||
}
|
||||
|
||||
public void close() {
|
||||
try {
|
||||
connection.shutdownOutput();
|
||||
connection.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isConnected() {
|
||||
return connection.isConnected();
|
||||
}
|
||||
|
||||
public void respondWithFile(Path serverPath, FileType fileType) throws IOException {
|
||||
if (fileType.binary) {
|
||||
writeStatusLine(GeminiStatusCode.SUCCESS, fileType.mime)
|
||||
.writeBytes(Files.readAllBytes(serverPath));
|
||||
}
|
||||
else {
|
||||
writeStatusLine(GeminiStatusCode.SUCCESS, fileType.mime)
|
||||
.writeLinesFromFile(serverPath);
|
||||
}
|
||||
}
|
||||
|
||||
public class GeminiInput {
|
||||
private final byte[] buffer = new byte[1024];
|
||||
private int idx = 0;
|
||||
|
||||
final String result;
|
||||
|
||||
public GeminiInput() throws IOException {
|
||||
|
||||
for (idx = 0; idx < buffer.length; idx++) {
|
||||
if (hasEndOfLine()) {
|
||||
result = new String(buffer, 0, idx-2, StandardCharsets.UTF_8);
|
||||
return;
|
||||
}
|
||||
|
||||
readCharacter();
|
||||
}
|
||||
|
||||
error("String too long");
|
||||
|
||||
// unreachable
|
||||
result = "";
|
||||
}
|
||||
|
||||
public String get() {
|
||||
return result;
|
||||
}
|
||||
|
||||
private void readCharacter() throws IOException {
|
||||
int rb = is.read();
|
||||
if (-1 == rb) {
|
||||
error("URL incomplete (no CR LF)");
|
||||
}
|
||||
buffer[idx] = (byte) rb;
|
||||
}
|
||||
|
||||
public boolean hasEndOfLine() {
|
||||
return idx > 2
|
||||
&& buffer[idx - 1] == (byte) '\n'
|
||||
&& buffer[idx - 2] == (byte) '\r';
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -1,49 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.io;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.name.Named;
|
||||
|
||||
import javax.net.ssl.*;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.security.KeyStore;
|
||||
import java.security.SecureRandom;
|
||||
|
||||
public class GeminiSSLSetUp {
|
||||
private final Path certPasswordFile;
|
||||
private final Path certFile;
|
||||
|
||||
@Inject
|
||||
public GeminiSSLSetUp(
|
||||
@Named("gemini-cert-file") Path certFile,
|
||||
@Named("gemini-cert-password-file") Path certPasswordFile) {
|
||||
this.certFile = certFile;
|
||||
this.certPasswordFile = certPasswordFile;
|
||||
}
|
||||
public String getCertPassword() throws IOException {
|
||||
return Files.readString(certPasswordFile);
|
||||
}
|
||||
|
||||
private SSLContext getContext() throws Exception {
|
||||
KeyStore ks = KeyStore.getInstance("JKS", "SUN");
|
||||
ks.load(Files.newInputStream(certFile), getCertPassword().toCharArray());
|
||||
|
||||
KeyManagerFactory kmf = KeyManagerFactory.getInstance("SunX509");
|
||||
kmf.init(ks, getCertPassword().toCharArray());
|
||||
KeyManager[] keyManagers = kmf.getKeyManagers();
|
||||
|
||||
TrustManagerFactory tmf = TrustManagerFactory.getInstance("X509");
|
||||
tmf.init(ks);
|
||||
TrustManager[] trustManagers = tmf.getTrustManagers();
|
||||
|
||||
var ctx = SSLContext.getInstance("TLSv1.3");
|
||||
ctx.init(keyManagers, trustManagers, new SecureRandom());
|
||||
return ctx;
|
||||
}
|
||||
|
||||
|
||||
public SSLServerSocketFactory getServerSocketFactory() throws Exception {
|
||||
return getContext().getServerSocketFactory();
|
||||
}
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.io;
|
||||
|
||||
public class GeminiStatusCode {
|
||||
public static final int INPUT = 10;
|
||||
public static final int SUCCESS = 20;
|
||||
public static final int ERROR_PERMANENT = 50;
|
||||
public static final int ERROR_TEMPORARY = 40;
|
||||
public static final int PROXY_ERROR = 43;
|
||||
public static final int REDIRECT = 30;
|
||||
public static final int REDIRECT_PERMANENT = 31;
|
||||
}
|
@ -1,8 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.io;
|
||||
|
||||
/** Throw to report message to user */
|
||||
public class GeminiUserException extends RuntimeException {
|
||||
public GeminiUserException(String message) {
|
||||
super(message);
|
||||
}
|
||||
}
|
@ -1,52 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.plugins;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.name.Named;
|
||||
import nu.marginalia.memex.gemini.GeminiService;
|
||||
import nu.marginalia.memex.gemini.io.GeminiConnection;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class BareStaticPagePlugin implements Plugin {
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
private final Path geminiServerRoot;
|
||||
|
||||
@Inject
|
||||
public BareStaticPagePlugin(@Named("gemini-server-root") Path geminiServerRoot) {
|
||||
this.geminiServerRoot = geminiServerRoot;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean serve(URI url, GeminiConnection connection) throws IOException {
|
||||
|
||||
final Path serverPath = getServerPath(url.getPath());
|
||||
|
||||
if (!Files.isRegularFile(serverPath)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
verifyPath(geminiServerRoot, serverPath);
|
||||
logger.info("Serving {}", serverPath);
|
||||
|
||||
connection.respondWithFile(serverPath, FileType.match(serverPath));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private Path getServerPath(String requestPath) {
|
||||
final Path serverPath = Path.of(geminiServerRoot + requestPath);
|
||||
|
||||
if (Files.isDirectory(serverPath) && Files.isRegularFile(serverPath.resolve(GeminiService.DEFAULT_FILENAME))) {
|
||||
return serverPath.resolve(GeminiService.DEFAULT_FILENAME);
|
||||
}
|
||||
|
||||
return serverPath;
|
||||
}
|
||||
|
||||
}
|
@ -1,58 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.plugins;
|
||||
|
||||
import java.nio.file.Path;
|
||||
|
||||
public enum FileType {
|
||||
GMI("gmi", "text/gemini", FileIcons.DOCUMENT, false),
|
||||
GEM("gem", "text/gemini", FileIcons.DOCUMENT, false),
|
||||
TXT("txt", "text/plain", FileIcons.DOCUMENT, false),
|
||||
MARKDOWN("md", "text/markdown", FileIcons.DOCUMENT, false),
|
||||
JAVA("java", "text/java", FileIcons.JAVA, false),
|
||||
PROPERTIES("properties", "text/properties", FileIcons.SETTINGS, false),
|
||||
GRADLE("gradle", "text/gradle", FileIcons.SETTINGS, false),
|
||||
ZIP("zip", "application/zip", FileIcons.ZIP, true),
|
||||
PNG("png", "image/png", FileIcons.IMAGE, true),
|
||||
JPG("jpg", "image/jpg", FileIcons.IMAGE, true),
|
||||
JPEG("jpeg", "image/jpg", FileIcons.IMAGE, true),
|
||||
BIN("bin", "application/binary", FileIcons.BINARY, true),
|
||||
SH("sh", "text/sh", FileIcons.SETTINGS, false),
|
||||
XML("xml", "text/xml", FileIcons.DOCUMENT, false),
|
||||
DOCKERFILE("Dockerfile", "text/dockerfile", FileIcons.SETTINGS, false)
|
||||
;
|
||||
|
||||
public static FileType match(String fileName) {
|
||||
for (var type : values()) {
|
||||
if (fileName.endsWith(type.suffix)) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
return BIN;
|
||||
}
|
||||
|
||||
public static FileType match(Path path) {
|
||||
return match(path.toString());
|
||||
}
|
||||
|
||||
FileType(String suffix, String mime, String icon, boolean binary) {
|
||||
this.suffix = suffix;
|
||||
this.mime = mime;
|
||||
|
||||
this.icon = icon;
|
||||
this.binary = binary;
|
||||
}
|
||||
public final String suffix;
|
||||
public final String mime;
|
||||
public final String icon;
|
||||
public final boolean binary;
|
||||
|
||||
}
|
||||
|
||||
class FileIcons {
|
||||
public static final String DOCUMENT = "🗒";
|
||||
public static final String JAVA = "♨";
|
||||
public static final String SETTINGS = "💻";
|
||||
public static final String ZIP = "🗜";
|
||||
public static final String IMAGE = "🖼";
|
||||
public static final String DIRECTORY = "🗂";
|
||||
public static final String BINARY = "📚";
|
||||
}
|
@ -1,19 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.plugins;
|
||||
|
||||
import nu.marginalia.memex.gemini.io.GeminiConnection;
|
||||
import nu.marginalia.memex.gemini.io.GeminiUserException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public interface Plugin {
|
||||
/** @return true if content served */
|
||||
boolean serve(URI url, GeminiConnection connection) throws IOException;
|
||||
|
||||
default void verifyPath(Path root, Path p) {
|
||||
if (!p.normalize().startsWith(root)) {
|
||||
throw new GeminiUserException("ಠ_ಠ That path is off limits!");
|
||||
}
|
||||
}
|
||||
}
|
@ -1,78 +0,0 @@
|
||||
package nu.marginalia.memex.gemini.plugins;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.memex.gemini.io.GeminiConnection;
|
||||
import nu.marginalia.memex.gemini.io.GeminiStatusCode;
|
||||
import org.apache.http.HttpHost;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.conn.routing.HttpRoute;
|
||||
import org.apache.http.impl.client.HttpClients;
|
||||
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
|
||||
public class SearchPlugin implements Plugin {
|
||||
private final PoolingHttpClientConnectionManager connectionManager;
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
@Inject
|
||||
public SearchPlugin() {
|
||||
|
||||
connectionManager = new PoolingHttpClientConnectionManager();
|
||||
connectionManager.setMaxTotal(200);
|
||||
connectionManager.setDefaultMaxPerRoute(20);
|
||||
HttpHost host = new HttpHost("https://search.marginalia.nu/");
|
||||
connectionManager.setMaxPerRoute(new HttpRoute(host), 20);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean serve(URI url, GeminiConnection connection) throws IOException {
|
||||
var client = HttpClients.custom()
|
||||
.setConnectionManager(connectionManager)
|
||||
.build();
|
||||
|
||||
if (!"/search".equals(url.getPath())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
String query = url.getRawQuery();
|
||||
|
||||
if (null == query || "".equals(query)) {
|
||||
logger.info("Requesting search terms");
|
||||
connection.writeStatusLine(GeminiStatusCode.INPUT, "Please enter a search query");
|
||||
}
|
||||
else {
|
||||
logger.info("Delegating search query '{}'", query);
|
||||
|
||||
final HttpGet get = new HttpGet(createSearchUri(query));
|
||||
final byte[] binaryResponse;
|
||||
|
||||
try (var rsp = client.execute(get)) {
|
||||
binaryResponse = rsp.getEntity().getContent().readAllBytes();
|
||||
}
|
||||
catch (IOException ex) {
|
||||
logger.error("backend error", ex);
|
||||
|
||||
connection.writeStatusLine(GeminiStatusCode.PROXY_ERROR, "Failed to reach backend server");
|
||||
return true;
|
||||
}
|
||||
|
||||
connection
|
||||
.writeStatusLine(GeminiStatusCode.SUCCESS, "text/gemini")
|
||||
.writeBytes(binaryResponse);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private URI createSearchUri(String query) {
|
||||
try {
|
||||
return new URI("https://search.marginalia.nu/search?format=gmi&query="+query);
|
||||
} catch (URISyntaxException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,244 +0,0 @@
|
||||
package nu.marginalia.memex.memex;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import com.google.inject.name.Named;
|
||||
import io.reactivex.rxjava3.schedulers.Schedulers;
|
||||
import nu.marginalia.memex.gemini.GeminiService;
|
||||
import nu.marginalia.memex.gemini.gmi.GemtextDatabase;
|
||||
import nu.marginalia.memex.gemini.gmi.GemtextDocument;
|
||||
import nu.marginalia.memex.util.dithering.FloydSteinbergDither;
|
||||
import nu.marginalia.memex.util.dithering.Palettes;
|
||||
import nu.marginalia.memex.memex.change.GemtextTombstoneUpdateCaclulator;
|
||||
import nu.marginalia.memex.memex.model.MemexImage;
|
||||
import nu.marginalia.memex.memex.model.MemexNode;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeUrl;
|
||||
import nu.marginalia.memex.memex.renderer.MemexRendererers;
|
||||
import nu.marginalia.memex.memex.system.MemexFileSystemMonitor;
|
||||
import nu.marginalia.memex.memex.system.MemexFileWriter;
|
||||
import nu.marginalia.memex.memex.system.git.MemexGitRepo;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import javax.imageio.ImageIO;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
@Singleton
|
||||
public class Memex {
|
||||
|
||||
private final MemexData data;
|
||||
private final MemexFileSystemMonitor monitor;
|
||||
private final MemexGitRepo gitRepo;
|
||||
private final MemexLoader loader;
|
||||
|
||||
private final MemexFileWriter resources;
|
||||
private final GemtextTombstoneUpdateCaclulator tombstoneUpdateCaclulator;
|
||||
|
||||
private final FloydSteinbergDither ditherer = new FloydSteinbergDither(Palettes.MARGINALIA_PALETTE, 640, 480);
|
||||
private final MemexRendererers renderers;
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(Memex.class);
|
||||
|
||||
@Inject
|
||||
public Memex(MemexData data,
|
||||
@Nullable MemexFileSystemMonitor monitor,
|
||||
MemexGitRepo gitRepo, MemexLoader loader,
|
||||
@Named("html") MemexFileWriter htmlFiles,
|
||||
GemtextTombstoneUpdateCaclulator tombstoneUpdateCaclulator,
|
||||
MemexRendererers renderers,
|
||||
GeminiService geminiService) {
|
||||
this.data = data;
|
||||
this.monitor = monitor;
|
||||
this.gitRepo = gitRepo;
|
||||
this.loader = loader;
|
||||
this.resources = htmlFiles;
|
||||
this.tombstoneUpdateCaclulator = tombstoneUpdateCaclulator;
|
||||
this.renderers = renderers;
|
||||
|
||||
Schedulers.io().scheduleDirect(this::load);
|
||||
if (monitor != null) {
|
||||
Schedulers.io().schedulePeriodicallyDirect(this::refreshUpdatedUrls, 1, 1, TimeUnit.SECONDS);
|
||||
}
|
||||
|
||||
Schedulers.newThread().scheduleDirect(geminiService::run);
|
||||
}
|
||||
|
||||
private void refreshUpdatedUrls() {
|
||||
var updatedUrls = monitor.getUpdatedUrls();
|
||||
for (var url : updatedUrls) {
|
||||
try {
|
||||
if (url.toString().endsWith(".gmi")) {
|
||||
var updates = loader.reloadNode(url);
|
||||
updates.forEach(renderers::render);
|
||||
|
||||
if (!updates.isEmpty()) {
|
||||
renderers.render(url.getParentUrl());
|
||||
}
|
||||
} else if (url.toString().endsWith(".png")) {
|
||||
var updates = loader.reloadImage(url);
|
||||
renderers.render(url);
|
||||
|
||||
if (!updates.isEmpty()) {
|
||||
renderers.render(url.getParentUrl());
|
||||
}
|
||||
}
|
||||
|
||||
if (tombstoneUpdateCaclulator.isTombstoneFile(url)) {
|
||||
loader.loadTombstones().forEach(renderers::render);
|
||||
}
|
||||
if (tombstoneUpdateCaclulator.isRedirectFile(url)) {
|
||||
loader.loadRedirects().forEach(renderers::render);
|
||||
}
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.error("Failed to refresh URL " + url, ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void load() {
|
||||
copyStylesheet();
|
||||
|
||||
try {
|
||||
loader.load();
|
||||
renderAll();
|
||||
}
|
||||
catch (IOException ex) {
|
||||
logger.error("Failed to load", ex);
|
||||
}
|
||||
}
|
||||
|
||||
private void copyStylesheet() {
|
||||
try (var resource = Objects.requireNonNull(
|
||||
ClassLoader.getSystemResourceAsStream("static/memex/style-new.css"), "Could not load stylesheet")) {
|
||||
resources.write(new MemexNodeUrl("/style-new.css"), resource.readAllBytes());
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.error("Failed to copy stylesheet", ex);
|
||||
}
|
||||
|
||||
try (var resource = Objects.requireNonNull(
|
||||
ClassLoader.getSystemResourceAsStream("static/memex/ico/dir.png"), "Could not copy file")) {
|
||||
resources.write(new MemexNodeUrl("/ico/dir.png"), resource.readAllBytes());
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.error("Failed to copy file", ex);
|
||||
}
|
||||
|
||||
|
||||
try (var resource = Objects.requireNonNull(
|
||||
ClassLoader.getSystemResourceAsStream("static/memex/ico/file.png"), "Could not copy file")) {
|
||||
resources.write(new MemexNodeUrl("/ico/file.png"), resource.readAllBytes());
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.error("Failed to copy file", ex);
|
||||
}
|
||||
|
||||
|
||||
try (var resource = Objects.requireNonNull(
|
||||
ClassLoader.getSystemResourceAsStream("static/memex/ico/root.png"), "Could not copy file")) {
|
||||
resources.write(new MemexNodeUrl("/ico/root.png"), resource.readAllBytes());
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.error("Failed to copy file", ex);
|
||||
}
|
||||
|
||||
try (var resource = Objects.requireNonNull(
|
||||
ClassLoader.getSystemResourceAsStream("static/memex/ico/pic16.png"), "Could not copy file")) {
|
||||
resources.write(new MemexNodeUrl("/ico/pic16.png"), resource.readAllBytes());
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.error("Failed to copy file", ex);
|
||||
}
|
||||
}
|
||||
|
||||
private void renderAll() {
|
||||
data.forEach((url, doc) -> {
|
||||
renderers.render(url);
|
||||
});
|
||||
data.getDirectories().forEach(renderers::render);
|
||||
data.getImages().forEach(img -> renderers.render(img.path));
|
||||
|
||||
data.getTombstones().ifPresent(this::renderTombstoneFromGemtextDb);
|
||||
data.getRedirects().ifPresent(this::renderTombstoneFromGemtextDb);
|
||||
}
|
||||
|
||||
|
||||
private void renderTombstoneFromGemtextDb(GemtextDatabase db) {
|
||||
db.keys()
|
||||
.stream()
|
||||
.map(MemexNodeUrl::new)
|
||||
.filter(url -> getDocument(url) == null)
|
||||
.forEach(renderers::render);
|
||||
}
|
||||
|
||||
public void updateNode(MemexNodeUrl node, String text) throws IOException {
|
||||
var nodes = loader.updateNode(node, text);
|
||||
|
||||
nodes.forEach(renderers::render);
|
||||
|
||||
renderers.render(node.getParentUrl());
|
||||
}
|
||||
|
||||
public GemtextDocument getDocument(MemexNodeUrl url) {
|
||||
return data.getDocument(url);
|
||||
}
|
||||
public MemexImage getImage(MemexNodeUrl url) {
|
||||
return data.getImage(url);
|
||||
}
|
||||
|
||||
|
||||
public void createNode(MemexNodeUrl node, String text) throws IOException {
|
||||
var nodes = loader.createNode(node, text);
|
||||
|
||||
nodes.forEach(renderers::render);
|
||||
|
||||
renderers.render(node.getParentUrl());
|
||||
}
|
||||
|
||||
|
||||
public void uploadImage(MemexNodeUrl url, byte[] bytes) throws IOException {
|
||||
|
||||
var image = ImageIO.read(new ByteArrayInputStream(bytes));
|
||||
var convertedImage = ditherer.convert(image);
|
||||
var baosOut = new ByteArrayOutputStream();
|
||||
ImageIO.write(convertedImage, "png", baosOut);
|
||||
|
||||
loader.uploadImage(url, baosOut.toByteArray());
|
||||
|
||||
renderers.render(url);
|
||||
renderers.render(url.getParentUrl());
|
||||
}
|
||||
|
||||
public void delete(MemexNode node, String message) throws IOException {
|
||||
tombstoneUpdateCaclulator.addTombstone(node.getUrl(), message)
|
||||
.visit(this);
|
||||
loader.loadTombstones();
|
||||
loader.delete(node).forEach(renderers::render);
|
||||
}
|
||||
|
||||
public List<GemtextDocument> getDocumentsByPath(MemexNodeUrl url) {
|
||||
return data.getDocumentsByPath(url);
|
||||
}
|
||||
|
||||
public void gitPull() {
|
||||
gitRepo.pull();
|
||||
}
|
||||
|
||||
public void rename(MemexNode src, MemexNodeUrl dst) throws IOException {
|
||||
tombstoneUpdateCaclulator.addRedirect(src.getUrl(), dst.toString())
|
||||
.visit(this);
|
||||
loader.loadRedirects();
|
||||
loader.rename(src, dst).forEach(renderers::render);
|
||||
}
|
||||
|
||||
public byte[] getRaw(MemexNodeUrl url) throws IOException {
|
||||
return loader.getRaw(url);
|
||||
}
|
||||
}
|
@ -1,87 +0,0 @@
|
||||
package nu.marginalia.memex.memex;
|
||||
|
||||
import com.google.inject.AbstractModule;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Provider;
|
||||
import com.google.inject.name.Named;
|
||||
import com.google.inject.name.Names;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.memex.gemini.GeminiService;
|
||||
import nu.marginalia.memex.gemini.GeminiServiceDummy;
|
||||
import nu.marginalia.memex.gemini.GeminiServiceImpl;
|
||||
import nu.marginalia.memex.memex.system.MemexFileWriter;
|
||||
import nu.marginalia.memex.memex.system.git.MemexGitRepo;
|
||||
import nu.marginalia.memex.memex.system.git.MemexGitRepoDummy;
|
||||
import nu.marginalia.memex.memex.system.git.MemexGitRepoImpl;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class MemexConfigurationModule extends AbstractModule {
|
||||
private static final Logger logger = LoggerFactory.getLogger(MemexConfigurationModule.class);
|
||||
|
||||
private static final String MEMEX_ROOT_PROPERTY = System.getProperty("memex-root", "/var/lib/wmsa/memex");
|
||||
private static final String MEMEX_HTML_PROPERTY = System.getProperty("memex-html-resources", "/var/lib/wmsa/memex-html");
|
||||
private static final String MEMEX_GMI_PROPERTY = System.getProperty("memex-gmi-resources", "/var/lib/wmsa/memex-gmi");
|
||||
|
||||
private static final boolean MEMEX_DISABLE_GIT = Boolean.getBoolean("memex-disable-git");
|
||||
private static final boolean MEMEX_DISABLE_GEMINI = Boolean.getBoolean("memex-disable-gemini");
|
||||
|
||||
@SneakyThrows
|
||||
public MemexConfigurationModule() {
|
||||
Thread.sleep(100);
|
||||
}
|
||||
|
||||
public void configure() {
|
||||
bind(Path.class).annotatedWith(Names.named("memex-root")).toInstance(Path.of(MEMEX_ROOT_PROPERTY));
|
||||
bind(Path.class).annotatedWith(Names.named("memex-html-resources")).toInstance(Path.of(MEMEX_HTML_PROPERTY));
|
||||
bind(Path.class).annotatedWith(Names.named("memex-gmi-resources")).toInstance(Path.of(MEMEX_GMI_PROPERTY));
|
||||
|
||||
bind(String.class).annotatedWith(Names.named("tombestone-special-file")).toInstance("/special/tombstone.gmi");
|
||||
bind(String.class).annotatedWith(Names.named("redirects-special-file")).toInstance("/special/redirect.gmi");
|
||||
|
||||
switchImpl(MemexGitRepo.class, MEMEX_DISABLE_GIT, MemexGitRepoDummy.class, MemexGitRepoImpl.class);
|
||||
switchImpl(GeminiService.class, MEMEX_DISABLE_GEMINI, GeminiServiceDummy.class, GeminiServiceImpl.class);
|
||||
|
||||
bind(MemexFileWriter.class).annotatedWith(Names.named("html")).toProvider(MemexHtmlWriterProvider.class);
|
||||
bind(MemexFileWriter.class).annotatedWith(Names.named("gmi")).toProvider(MemexGmiWriterProvider.class);
|
||||
}
|
||||
|
||||
<T> void switchImpl(Class<T> impl, boolean param, Class<? extends T> ifEnabled, Class<? extends T> ifDisabled) {
|
||||
final Class<? extends T> choice;
|
||||
if (param) {
|
||||
choice = ifEnabled;
|
||||
}
|
||||
else {
|
||||
choice = ifDisabled;
|
||||
}
|
||||
bind(impl).to(choice).asEagerSingleton();
|
||||
}
|
||||
|
||||
public static class MemexHtmlWriterProvider implements Provider<MemexFileWriter> {
|
||||
private final Path path;
|
||||
|
||||
@Inject
|
||||
public MemexHtmlWriterProvider(@Named("memex-html-resources") Path resources) {
|
||||
this.path = resources;
|
||||
}
|
||||
@Override
|
||||
public MemexFileWriter get() {
|
||||
return new MemexFileWriter(path);
|
||||
}
|
||||
}
|
||||
|
||||
public static class MemexGmiWriterProvider implements Provider<MemexFileWriter> {
|
||||
private final Path path;
|
||||
|
||||
@Inject
|
||||
public MemexGmiWriterProvider(@Named("memex-gmi-resources") Path resources) {
|
||||
this.path = resources;
|
||||
}
|
||||
@Override
|
||||
public MemexFileWriter get() {
|
||||
return new MemexFileWriter(path);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,150 +0,0 @@
|
||||
package nu.marginalia.memex.memex;
|
||||
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.memex.gemini.gmi.GemtextDatabase;
|
||||
import nu.marginalia.memex.gemini.gmi.GemtextDocument;
|
||||
import nu.marginalia.memex.memex.model.MemexImage;
|
||||
import nu.marginalia.memex.memex.model.MemexLink;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeUrl;
|
||||
import nu.marginalia.memex.memex.model.fs.MemexFileSystem;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.function.BiConsumer;
|
||||
|
||||
@Singleton
|
||||
public class MemexData {
|
||||
private final MemexLinks links = new MemexLinks();
|
||||
private final Map<MemexNodeUrl, GemtextDocument> documents = new HashMap<>();
|
||||
|
||||
private final Map<MemexNodeUrl, MemexImage> images = new HashMap<>();
|
||||
private final MemexFileSystem fileSystem = new MemexFileSystem();
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
private GemtextDatabase tombstones = null;
|
||||
private GemtextDatabase redirects = null;
|
||||
|
||||
public synchronized Collection<MemexImage> getImages() {
|
||||
return new ArrayList<>(images.values());
|
||||
}
|
||||
public synchronized Collection<GemtextDocument> getDocuments() { return new ArrayList<>(documents.values()); }
|
||||
|
||||
public synchronized void setTombstones(GemtextDatabase tombstones) {
|
||||
this.tombstones = tombstones;
|
||||
}
|
||||
public synchronized void setRedirects(GemtextDatabase redirects) {
|
||||
this.redirects = redirects;
|
||||
}
|
||||
|
||||
public synchronized void addDocument(MemexNodeUrl url, GemtextDocument doc) {
|
||||
logger.debug("addDocument({})", url);
|
||||
documents.put(url, doc);
|
||||
fileSystem.register(doc);
|
||||
}
|
||||
|
||||
public synchronized void addImage(MemexNodeUrl url, MemexImage img) {
|
||||
images.put(url, img);
|
||||
fileSystem.register(img);
|
||||
}
|
||||
|
||||
public Optional<GemtextDatabase> getTombstones() {
|
||||
return Optional.ofNullable(tombstones);
|
||||
}
|
||||
public Optional<GemtextDatabase> getRedirects() {
|
||||
return Optional.ofNullable(redirects);
|
||||
}
|
||||
|
||||
public synchronized void updateOutlinks(MemexNodeUrl url, GemtextDocument doc) {
|
||||
|
||||
var linksForNode = new TreeSet<>(Comparator.comparing(MemexLink::getDest));
|
||||
|
||||
MemexNodeUrl srcUrl = "index.gmi".equals(url.getFilename()) ? url.getParentUrl() : url;
|
||||
|
||||
for (var link : doc.getLinks()) {
|
||||
link.getUrl().visitNodeUrl(nodeUrl ->
|
||||
linksForNode.add(new MemexLink(nodeUrl, srcUrl, doc.getTitle(), doc.getHeadingForElement(link), link.getHeading()))
|
||||
);
|
||||
}
|
||||
|
||||
links.setOutlinks(srcUrl, linksForNode);
|
||||
}
|
||||
|
||||
public synchronized Set<MemexNodeUrl> getNeighbors(MemexNodeUrl url) {
|
||||
return links.getNeighbors(url);
|
||||
}
|
||||
|
||||
public synchronized void forEach(BiConsumer<MemexNodeUrl, GemtextDocument> consumer) {
|
||||
documents.forEach(consumer);
|
||||
}
|
||||
|
||||
public synchronized GemtextDocument getDocument(MemexNodeUrl url) {
|
||||
return documents.get(url);
|
||||
}
|
||||
|
||||
public synchronized MemexImage getImage(MemexNodeUrl url) {
|
||||
return images.get(url);
|
||||
}
|
||||
public synchronized List<MemexLink> getBacklinks(MemexNodeUrl... urls) {
|
||||
return links.getBacklinks(urls);
|
||||
}
|
||||
|
||||
public synchronized List<GemtextDocument> getDocumentsByPath(MemexNodeUrl url) {
|
||||
return fileSystem.getDocuments(url);
|
||||
}
|
||||
public synchronized List<MemexImage> getImagesByPath(MemexNodeUrl url) {
|
||||
return fileSystem.getImages(url);
|
||||
}
|
||||
public synchronized List<MemexNodeUrl> getSubdirsByPath(MemexNodeUrl url) {
|
||||
return fileSystem.getSubdirs(url);
|
||||
}
|
||||
|
||||
public MemexFileSystem getFilesystem() {
|
||||
return fileSystem;
|
||||
}
|
||||
|
||||
public List<MemexNodeUrl> getDirectories() {
|
||||
return fileSystem.getAllDirectories();
|
||||
}
|
||||
public boolean isDirectory(MemexNodeUrl url) {
|
||||
return fileSystem.isDirectory(url);
|
||||
}
|
||||
|
||||
public synchronized Set<MemexNodeUrl> deleteImage(MemexNodeUrl url) {
|
||||
images.remove(url);
|
||||
fileSystem.remove(url);
|
||||
|
||||
Set<MemexNodeUrl> affectedUrls = new HashSet<>();
|
||||
|
||||
affectedUrls.add(url);
|
||||
affectedUrls.add(url.getParentUrl());
|
||||
|
||||
return affectedUrls;
|
||||
}
|
||||
|
||||
public synchronized Set<MemexNodeUrl> deleteDocument(MemexNodeUrl url) {
|
||||
Set<MemexNodeUrl> affectedUrls = new HashSet<>();
|
||||
|
||||
affectedUrls.add(url);
|
||||
affectedUrls.add(url.getParentUrl());
|
||||
|
||||
links.getOutlinks(url)
|
||||
.stream()
|
||||
.map(MemexLink::getDest)
|
||||
.forEach(affectedUrls::add);
|
||||
|
||||
documents.remove(url);
|
||||
fileSystem.remove(url);
|
||||
|
||||
links.remove(url);
|
||||
|
||||
return affectedUrls;
|
||||
}
|
||||
|
||||
public boolean hasTombstone(MemexNodeUrl url) {
|
||||
if (tombstones != null && tombstones.getLinkData(url).isPresent())
|
||||
return true;
|
||||
if (redirects != null && redirects.getLinkData(url).isPresent())
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
}
|
@ -1,54 +0,0 @@
|
||||
package nu.marginalia.memex.memex;
|
||||
|
||||
import nu.marginalia.memex.memex.model.MemexLink;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeUrl;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class MemexLinks {
|
||||
private Map<MemexNodeUrl, List<MemexLink>> backLinks = new HashMap<>();
|
||||
private final Map<MemexNodeUrl, Set<MemexLink>> links = new HashMap<>();
|
||||
|
||||
public void updateBacklinks() {
|
||||
backLinks.clear();
|
||||
backLinks = links.values().stream()
|
||||
.flatMap(Set::stream)
|
||||
.collect(Collectors.groupingBy(MemexLink::getDest));
|
||||
}
|
||||
|
||||
public Set<MemexNodeUrl> getNeighbors(MemexNodeUrl url) {
|
||||
final Set<MemexNodeUrl> neighbors = new HashSet<>();
|
||||
|
||||
links.getOrDefault(url, Collections.emptySet()).stream().map(MemexLink::getDest)
|
||||
.forEach(neighbors::add);
|
||||
backLinks.getOrDefault(url, Collections.emptyList()).stream()
|
||||
.map(MemexLink::getSrc)
|
||||
.forEach(neighbors::add);
|
||||
|
||||
return neighbors;
|
||||
}
|
||||
|
||||
public void setOutlinks(MemexNodeUrl url, TreeSet<MemexLink> linksForNode) {
|
||||
links.put(url, linksForNode);
|
||||
updateBacklinks();
|
||||
}
|
||||
|
||||
public List<MemexLink> getBacklinks(MemexNodeUrl... urls) {
|
||||
return Arrays.stream(urls)
|
||||
.map(backLinks::get)
|
||||
.filter(Objects::nonNull)
|
||||
.flatMap(List::stream)
|
||||
.sorted(Comparator.comparing(MemexLink::getSrc))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public Set<MemexLink> getOutlinks(MemexNodeUrl url) {
|
||||
return links.getOrDefault(url, Collections.emptySet());
|
||||
}
|
||||
|
||||
public void remove(MemexNodeUrl url) {
|
||||
links.remove(url);
|
||||
updateBacklinks();
|
||||
}
|
||||
}
|
@ -1,265 +0,0 @@
|
||||
package nu.marginalia.memex.memex;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.name.Named;
|
||||
import nu.marginalia.memex.gemini.gmi.GemtextDatabase;
|
||||
import nu.marginalia.memex.gemini.gmi.GemtextDocument;
|
||||
import nu.marginalia.memex.memex.model.MemexImage;
|
||||
import nu.marginalia.memex.memex.model.MemexNode;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeUrl;
|
||||
import nu.marginalia.memex.memex.system.MemexFileSystemModifiedTimes;
|
||||
import nu.marginalia.memex.memex.system.MemexSourceFileSystem;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.annotation.CheckReturnValue;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.*;
|
||||
import java.util.*;
|
||||
|
||||
public class MemexLoader {
|
||||
private final MemexData data;
|
||||
private final MemexFileSystemModifiedTimes modifiedTimes;
|
||||
private final Path root;
|
||||
private final MemexSourceFileSystem sourceFileSystem;
|
||||
|
||||
private final String tombstonePath;
|
||||
private final String redirectsPath;
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(MemexLoader.class);
|
||||
|
||||
@Inject
|
||||
public MemexLoader(MemexData data,
|
||||
MemexFileSystemModifiedTimes modifiedTimes,
|
||||
MemexSourceFileSystem sourceFileSystem,
|
||||
@Named("memex-root") Path root,
|
||||
@Named("tombestone-special-file") String tombstonePath,
|
||||
@Named("redirects-special-file") String redirectsPath) {
|
||||
|
||||
this.data = data;
|
||||
this.modifiedTimes = modifiedTimes;
|
||||
this.sourceFileSystem = sourceFileSystem;
|
||||
this.root = root;
|
||||
this.tombstonePath = tombstonePath;
|
||||
this.redirectsPath = redirectsPath;
|
||||
}
|
||||
|
||||
|
||||
public void load() throws IOException {
|
||||
|
||||
loadTombstones();
|
||||
loadRedirects();
|
||||
|
||||
try (var files = Files.walk(root)) {
|
||||
files.forEach(this::loadFile);
|
||||
}
|
||||
|
||||
data.getFilesystem().recalculateDirectories();
|
||||
|
||||
}
|
||||
|
||||
private void loadFile(Path p) {
|
||||
var file = p.toFile();
|
||||
|
||||
try {
|
||||
if (p.toString().contains(".git")) {
|
||||
return;
|
||||
}
|
||||
if (file.isDirectory() && !file.getName().startsWith(".")) {
|
||||
data.getFilesystem().registerDir(MemexNodeUrl.ofRelativePath(root, p));
|
||||
} else if (isGemtext(file)) {
|
||||
loadNode(p);
|
||||
} else if (isImage(file)) {
|
||||
loadImage(p);
|
||||
}
|
||||
}
|
||||
catch (IOException ex) {
|
||||
logger.error("Failed to load file " + p, ex);
|
||||
}
|
||||
}
|
||||
|
||||
public void loadImage(Path p) throws IOException {
|
||||
if (!modifiedTimes.isFreshUpdate(p)) {
|
||||
return;
|
||||
}
|
||||
|
||||
var url = MemexNodeUrl.ofRelativePath(root, p);
|
||||
data.addImage(url, new MemexImage(url, p));
|
||||
logger.info("Loading {}", p);
|
||||
}
|
||||
|
||||
public Set<MemexNodeUrl> loadTombstones() {
|
||||
var oldValues = data.getTombstones();
|
||||
var newValues = loadGemtextDb(Path.of(root + tombstonePath));
|
||||
|
||||
newValues.ifPresent(data::setTombstones);
|
||||
|
||||
|
||||
if (newValues.isPresent()) {
|
||||
if (oldValues.isPresent()) {
|
||||
var oldTs = oldValues.get();
|
||||
var newTs = newValues.get();
|
||||
return oldTs.difference(newTs);
|
||||
}
|
||||
}
|
||||
|
||||
return Collections.emptySet();
|
||||
}
|
||||
|
||||
public Set<MemexNodeUrl> loadRedirects() {
|
||||
var oldValues = data.getTombstones();
|
||||
var newValues = loadGemtextDb(Path.of(root + redirectsPath));
|
||||
|
||||
newValues.ifPresent(data::setRedirects);
|
||||
|
||||
if (newValues.isPresent()) {
|
||||
if (oldValues.isPresent()) {
|
||||
var oldTs = oldValues.get();
|
||||
var newTs = newValues.get();
|
||||
return oldTs.difference(newTs);
|
||||
}
|
||||
}
|
||||
|
||||
return Collections.emptySet();
|
||||
}
|
||||
|
||||
private Optional<GemtextDatabase> loadGemtextDb(Path p) {
|
||||
if (Files.exists(p)) {
|
||||
try {
|
||||
return Optional.of(GemtextDatabase.of(MemexNodeUrl.ofRelativePath(root, p), p));
|
||||
} catch (IOException e) {
|
||||
logger.error("Failed to load database " + p, e);
|
||||
}
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
private boolean isGemtext(File f) {
|
||||
return f.isFile() && f.getName().endsWith(".gmi");
|
||||
}
|
||||
|
||||
private boolean isImage(File f) {
|
||||
return f.isFile() && f.getName().endsWith(".png");
|
||||
}
|
||||
|
||||
@CheckReturnValue
|
||||
public Collection<MemexNodeUrl> updateNode(MemexNodeUrl url, String contents) throws IOException {
|
||||
sourceFileSystem.replaceFile(url, contents);
|
||||
return loadNode(url);
|
||||
}
|
||||
|
||||
@CheckReturnValue
|
||||
public Collection<MemexNodeUrl> createNode(MemexNodeUrl url, String contents) throws IOException {
|
||||
sourceFileSystem.createFile(url, contents);
|
||||
return loadNode(url);
|
||||
}
|
||||
|
||||
|
||||
public MemexImage uploadImage(MemexNodeUrl url, byte[] bytes) throws IOException {
|
||||
sourceFileSystem.createFile(url, bytes);
|
||||
|
||||
var img = new MemexImage(url, url.asAbsolutePath(root));
|
||||
data.addImage(url, img);
|
||||
return img;
|
||||
}
|
||||
|
||||
|
||||
public Set<MemexNodeUrl> reloadImage(MemexNodeUrl url) throws IOException {
|
||||
var path = url.asAbsolutePath(root);
|
||||
if (!Files.exists(path)) {
|
||||
return data.deleteImage(url);
|
||||
}
|
||||
else {
|
||||
loadImage(path);
|
||||
Set<MemexNodeUrl> affectedUrls = new HashSet<>();
|
||||
affectedUrls.add(url);
|
||||
|
||||
for (var u = url.getParentUrl(); u != null; u = u.getParentUrl()) {
|
||||
affectedUrls.add(u);
|
||||
}
|
||||
|
||||
return affectedUrls;
|
||||
}
|
||||
}
|
||||
|
||||
public Set<MemexNodeUrl> reloadNode(MemexNodeUrl url) throws IOException {
|
||||
var path = url.asAbsolutePath(root);
|
||||
if (!Files.exists(path)) {
|
||||
return data.deleteDocument(url);
|
||||
}
|
||||
else {
|
||||
return loadNode(path);
|
||||
}
|
||||
}
|
||||
|
||||
public Set<MemexNodeUrl> loadNode(Path path) throws IOException {
|
||||
|
||||
if (!modifiedTimes.isFreshUpdate(path)) {
|
||||
return Set.of(MemexNodeUrl.ofRelativePath(root, path));
|
||||
}
|
||||
|
||||
logger.info("Loading {}", path);
|
||||
|
||||
return loadNode(MemexNodeUrl.ofRelativePath(root, path));
|
||||
}
|
||||
|
||||
public Set<MemexNodeUrl> loadNode(MemexNodeUrl url) throws IOException {
|
||||
|
||||
var doc = GemtextDocument.of(url, url.asAbsolutePath(root));
|
||||
|
||||
data.addDocument(url, doc);
|
||||
|
||||
Set<MemexNodeUrl> urlsAffected = data.getNeighbors(url);
|
||||
|
||||
data.updateOutlinks(url, doc);
|
||||
|
||||
urlsAffected.addAll(data.getNeighbors(url));
|
||||
urlsAffected.add(url);
|
||||
urlsAffected.removeIf(u -> null == data.getDocument(u));
|
||||
|
||||
for (var u = url.getParentUrl(); u != null; u = u.getParentUrl()) {
|
||||
urlsAffected.add(u);
|
||||
}
|
||||
|
||||
return urlsAffected;
|
||||
}
|
||||
|
||||
public Set<MemexNodeUrl> delete(MemexNode node) throws IOException {
|
||||
sourceFileSystem.delete(node.getUrl());
|
||||
return node.visit(new MemexNode.MemexNodeVisitor<>() {
|
||||
@Override
|
||||
public Set<MemexNodeUrl> onDocument(MemexNodeUrl url) {
|
||||
return data.deleteDocument(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<MemexNodeUrl> onImage(MemexNodeUrl url) {
|
||||
return data.deleteImage(url);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public Set<MemexNodeUrl> rename(MemexNode src, MemexNodeUrl dst) throws IOException {
|
||||
sourceFileSystem.renameFile(src.getUrl(), dst);
|
||||
return src.visit(new MemexNode.MemexNodeVisitor<Set<MemexNodeUrl>>() {
|
||||
@Override
|
||||
public Set<MemexNodeUrl> onDocument(MemexNodeUrl url) throws IOException {
|
||||
var changes = data.deleteDocument(url);
|
||||
return Sets.union(changes, reloadNode(dst));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<MemexNodeUrl> onImage(MemexNodeUrl url) throws IOException {
|
||||
var changes = data.deleteImage(url);
|
||||
return Sets.union(changes, reloadImage(dst));
|
||||
}
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
public byte[] getRaw(MemexNodeUrl url) throws IOException {
|
||||
return sourceFileSystem.getRaw(url);
|
||||
}
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
package nu.marginalia.memex.memex;
|
||||
|
||||
import com.google.inject.Guice;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Injector;
|
||||
import nu.marginalia.memex.MemexServiceDescriptors;
|
||||
import nu.marginalia.memex.gemini.GeminiConfigurationModule;
|
||||
import nu.marginalia.service.MainClass;
|
||||
import nu.marginalia.service.id.ServiceId;
|
||||
import nu.marginalia.service.module.ConfigurationModule;
|
||||
import nu.marginalia.service.server.Initialization;
|
||||
|
||||
public class MemexMain extends MainClass {
|
||||
private final MemexService service;
|
||||
|
||||
@Inject
|
||||
public MemexMain(MemexService service) {
|
||||
this.service = service;
|
||||
}
|
||||
|
||||
public static void main(String... args) {
|
||||
MainClass.init(ServiceId.Other_Memex, args);
|
||||
|
||||
Injector injector = Guice.createInjector(
|
||||
new MemexConfigurationModule(),
|
||||
new GeminiConfigurationModule(),
|
||||
new ConfigurationModule(MemexServiceDescriptors.descriptors, ServiceId.Other_Memex));
|
||||
injector.getInstance(MemexMain.class);
|
||||
injector.getInstance(Initialization.class).setReady();
|
||||
}
|
||||
}
|
@ -1,292 +0,0 @@
|
||||
package nu.marginalia.memex.memex;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.name.Named;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.client.Context;
|
||||
import nu.marginalia.memex.gemini.gmi.GemtextDocument;
|
||||
import nu.marginalia.memex.gemini.gmi.renderer.GemtextRendererFactory;
|
||||
import nu.marginalia.memex.auth.client.AuthClient;
|
||||
import nu.marginalia.memex.memex.model.render.*;
|
||||
import nu.marginalia.memex.memex.change.GemtextMutation;
|
||||
import nu.marginalia.memex.memex.change.update.GemtextDocumentUpdateCalculator;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeUrl;
|
||||
import nu.marginalia.memex.memex.renderer.MemexHtmlRenderer;
|
||||
import nu.marginalia.service.server.Initialization;
|
||||
import nu.marginalia.service.server.MetricsServer;
|
||||
import nu.marginalia.service.server.Service;
|
||||
import org.apache.http.HttpStatus;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import spark.Request;
|
||||
import spark.Response;
|
||||
import spark.Spark;
|
||||
|
||||
import javax.servlet.MultipartConfigElement;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Objects;
|
||||
|
||||
import static spark.Spark.*;
|
||||
|
||||
public class MemexService extends Service {
|
||||
private final GemtextDocumentUpdateCalculator updateCalculator;
|
||||
private final Memex memex;
|
||||
private final MemexHtmlRenderer renderer;
|
||||
private final AuthClient authClient;
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
@Inject
|
||||
public MemexService(@Named("service-host") String ip,
|
||||
@Named("service-port") Integer port,
|
||||
GemtextDocumentUpdateCalculator updateCalculator,
|
||||
Memex memex,
|
||||
MemexHtmlRenderer renderer,
|
||||
AuthClient authClient,
|
||||
Initialization initialization,
|
||||
MetricsServer metricsServer,
|
||||
@Named("memex-html-resources") Path memexHtmlDir
|
||||
) {
|
||||
|
||||
super(ip, port, initialization, metricsServer, () -> {
|
||||
staticFiles.externalLocation(memexHtmlDir.toString());
|
||||
staticFiles.disableMimeTypeGuessing();
|
||||
staticFiles.registerMimeType("gmi", "text/html");
|
||||
staticFiles.registerMimeType("png", "text/html");
|
||||
staticFiles.expireTime(60);
|
||||
staticFiles.header("Cache-control", "public,proxy-revalidate");
|
||||
});
|
||||
|
||||
this.updateCalculator = updateCalculator;
|
||||
this.memex = memex;
|
||||
this.renderer = renderer;
|
||||
this.authClient = authClient;
|
||||
|
||||
Spark.get("git-pull", this::gitPull);
|
||||
|
||||
Spark.path("public/api", () -> {
|
||||
before((req, rsp) -> {
|
||||
logger.info("{} {}", req.requestMethod(), req.pathInfo());
|
||||
});
|
||||
after((req, rsp) -> {
|
||||
rsp.header("Cache-control", "no-cache");
|
||||
});
|
||||
|
||||
post("/create", this::create);
|
||||
get("/create", this::createForm, this::renderModel);
|
||||
post("/upload", this::upload);
|
||||
get("/upload", this::uploadForm, this::renderModel);
|
||||
post("/update", this::update);
|
||||
get("/update", this::updateForm, this::renderModel);
|
||||
post("/rename", this::rename);
|
||||
get("/rename", this::renameForm, this::renderModel);
|
||||
post("/delete", this::delete);
|
||||
get("/delete", this::deleteForm, this::renderModel);
|
||||
|
||||
get("/raw", this::raw);
|
||||
});
|
||||
}
|
||||
|
||||
private Object raw(Request request, Response response) throws IOException {
|
||||
final MemexNodeUrl url = new MemexNodeUrl(Objects.requireNonNull(request.queryParams("url")));
|
||||
|
||||
response.type(url.toNode().getType().mime);
|
||||
response.header("Content-Disposition", "attachment; filename=" + url.getFilename());
|
||||
response.raw().getOutputStream().write(memex.getRaw(url));
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
private Object renameForm(Request request, Response response) {
|
||||
final String type = Objects.requireNonNull(request.queryParams("type"));
|
||||
final MemexNodeUrl url = new MemexNodeUrl(Objects.requireNonNull(request.queryParams("url")));
|
||||
|
||||
authClient.redirectToLoginIfUnauthenticated("MEMEX", request, response);
|
||||
|
||||
if ("gmi".equals(type)) {
|
||||
var doc = memex.getDocument(url);
|
||||
if (null == doc) {
|
||||
Spark.halt(404);
|
||||
}
|
||||
|
||||
final String docHtml = doc.render(new GemtextRendererFactory("", url.toString()).htmlRendererEditable());
|
||||
return new MemexRendererRenameFormModel(docHtml,
|
||||
null, url, "gmi");
|
||||
}
|
||||
else if ("img".equals(type)) {
|
||||
var img = memex.getImage(url);
|
||||
if (null == img) {
|
||||
Spark.halt(404);
|
||||
}
|
||||
return new MemexRendererRenameFormModel(null,
|
||||
new MemexRendererImageModel(img, Collections.emptyList(), null),
|
||||
url, "img");
|
||||
}
|
||||
|
||||
Spark.halt(HttpStatus.SC_BAD_REQUEST);
|
||||
return null;
|
||||
}
|
||||
|
||||
private Object rename(Request request, Response response) throws IOException {
|
||||
authClient.redirectToLoginIfUnauthenticated("MEMEX", request, response);
|
||||
|
||||
var url = Objects.requireNonNull(request.queryParams("url"));
|
||||
var name = Objects.requireNonNull(request.queryParams("name"));
|
||||
var type = Objects.requireNonNull(request.queryParams("type"));
|
||||
var confirm = Objects.requireNonNull(request.queryParams("confirm"));
|
||||
|
||||
if (!"on".equals(confirm)) {
|
||||
logger.error("Confirm dialog not checked, was {}", confirm);
|
||||
Spark.halt(HttpStatus.SC_BAD_REQUEST, "Confirm was not checked");
|
||||
}
|
||||
|
||||
memex.rename(new MemexNodeUrl(url).toNode(), new MemexNodeUrl(name));
|
||||
|
||||
response.redirect("https://memex.marginalia.nu/"+name);
|
||||
return null;
|
||||
|
||||
}
|
||||
|
||||
private Object gitPull(Request request, Response response) {
|
||||
logger.info("Git pull by request");
|
||||
memex.gitPull();
|
||||
return "Ok";
|
||||
}
|
||||
|
||||
private String renderModel(Object model) {
|
||||
return ((MemexRendererableDirect)model).render(renderer);
|
||||
}
|
||||
|
||||
private MemexRendererDeleteFormModel deleteForm(Request request, Response response) {
|
||||
final String type = Objects.requireNonNull(request.queryParams("type"));
|
||||
final MemexNodeUrl url = new MemexNodeUrl(Objects.requireNonNull(request.queryParams("url")));
|
||||
|
||||
authClient.redirectToLoginIfUnauthenticated("MEMEX", request, response);
|
||||
|
||||
if ("gmi".equals(type)) {
|
||||
var doc = memex.getDocument(url);
|
||||
if (null == doc) {
|
||||
Spark.halt(404);
|
||||
}
|
||||
|
||||
final String docHtml = doc.render(new GemtextRendererFactory("", url.toString()).htmlRendererEditable());
|
||||
return new MemexRendererDeleteFormModel(docHtml,
|
||||
null, url, "gmi");
|
||||
}
|
||||
else if ("img".equals(type)) {
|
||||
var img = memex.getImage(url);
|
||||
if (null == img) {
|
||||
Spark.halt(404);
|
||||
}
|
||||
return new MemexRendererDeleteFormModel(null,
|
||||
new MemexRendererImageModel(img, Collections.emptyList(), null),
|
||||
url, "img");
|
||||
}
|
||||
|
||||
Spark.halt(HttpStatus.SC_BAD_REQUEST);
|
||||
return null;
|
||||
}
|
||||
|
||||
private Object delete(Request request, Response response) throws IOException {
|
||||
authClient.requireLogIn(Context.fromRequest(request));
|
||||
|
||||
var url = Objects.requireNonNull(request.queryParams("url"));
|
||||
var message = Objects.requireNonNull(request.queryParams("note"));
|
||||
var type = Objects.requireNonNull(request.queryParams("type"));
|
||||
var confirm = Objects.requireNonNull(request.queryParams("confirm"));
|
||||
|
||||
if (!"on".equals(confirm)) {
|
||||
logger.error("Confirm dialog not checked, was {}", confirm);
|
||||
Spark.halt(HttpStatus.SC_BAD_REQUEST, "Confirm was not checked");
|
||||
}
|
||||
|
||||
memex.delete(new MemexNodeUrl(url).toNode(), message);
|
||||
|
||||
response.redirect("https://memex.marginalia.nu/"+url);
|
||||
return null;
|
||||
}
|
||||
|
||||
private Object update(Request request, Response response) throws IOException {
|
||||
authClient.requireLogIn(Context.fromRequest(request));
|
||||
|
||||
String extUrl = Objects.requireNonNull(request.queryParams("url"));
|
||||
String extSection = Objects.requireNonNull(request.queryParams("section"));
|
||||
String newSectionText = Objects.requireNonNull(request.queryParams("text"));
|
||||
|
||||
var url = new MemexNodeUrl(extUrl);
|
||||
var section = MemexNodeHeadingId.parse(extSection);
|
||||
var lines = Arrays.asList(newSectionText.split("\r?\n")).toArray(String[]:: new);
|
||||
|
||||
var sectionGemtext = new GemtextDocument(url, lines, section);
|
||||
var updates = updateCalculator.calculateUpdates(memex.getDocument(url), section, sectionGemtext);
|
||||
|
||||
for (GemtextMutation mutation : updates) {
|
||||
mutation.visit(memex);
|
||||
}
|
||||
|
||||
response.redirect("https://memex.marginalia.nu/"+extUrl);
|
||||
return "";
|
||||
}
|
||||
|
||||
private Object create(Request request, Response response) throws IOException {
|
||||
authClient.requireLogIn(Context.fromRequest(request));
|
||||
|
||||
String directory = Objects.requireNonNull(request.queryParams("directory"));
|
||||
String filename = Objects.requireNonNull(request.queryParams("filename"));
|
||||
String text = Objects.requireNonNull(request.queryParams("text"));
|
||||
var url = new MemexNodeUrl(Path.of(directory).resolve(filename).toString());
|
||||
|
||||
memex.createNode(url, text);
|
||||
|
||||
response.redirect("https://memex.marginalia.nu/"+directory + "/" + filename);
|
||||
return "";
|
||||
}
|
||||
|
||||
private Object createForm(Request request, Response response) {
|
||||
final MemexNodeUrl url = new MemexNodeUrl(Objects.requireNonNull(request.queryParams("url")));
|
||||
authClient.redirectToLoginIfUnauthenticated("MEMEX", request, response);
|
||||
|
||||
return new MemexRenderCreateFormModel(url, memex.getDocumentsByPath(url));
|
||||
}
|
||||
|
||||
private Object uploadForm(Request request, Response response) {
|
||||
final MemexNodeUrl url = new MemexNodeUrl(Objects.requireNonNull(request.queryParams("url")));
|
||||
authClient.redirectToLoginIfUnauthenticated("MEMEX", request, response);
|
||||
|
||||
return new MemexRenderUploadFormModel(url, memex.getDocumentsByPath(url));
|
||||
}
|
||||
|
||||
private Object updateForm(Request request, Response response) {
|
||||
final MemexNodeUrl url = new MemexNodeUrl(Objects.requireNonNull(request.queryParams("url")));
|
||||
authClient.redirectToLoginIfUnauthenticated("MEMEX", request, response);
|
||||
|
||||
var doc = memex.getDocument(url);
|
||||
|
||||
return new MemexRenderUpdateFormModel(url, doc.getTitle(), "0", doc.getSectionGemtext(MemexNodeHeadingId.ROOT));
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
private Object upload(Request request, Response response) {
|
||||
authClient.requireLogIn(Context.fromRequest(request));
|
||||
|
||||
request.attribute("org.eclipse.jetty.multipartConfig", new MultipartConfigElement("/temp", 50*1024*1024, 50*1024*1024, 25*1024*1024));
|
||||
|
||||
String directory = Objects.requireNonNull(request.queryParams("directory"));
|
||||
String filename = Objects.requireNonNull(request.queryParams("filename"));
|
||||
var url = new MemexNodeUrl(Path.of(directory).resolve(filename).toString());
|
||||
try (InputStream input = request.raw().getPart("file").getInputStream()) {
|
||||
byte[] data = input.readAllBytes();
|
||||
memex.uploadImage(url, data);
|
||||
}
|
||||
|
||||
response.redirect("https://memex.marginalia.nu/"+directory + "/" + filename);
|
||||
return "";
|
||||
}
|
||||
|
||||
}
|
@ -1,70 +0,0 @@
|
||||
package nu.marginalia.memex.memex.change;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.ToString;
|
||||
import nu.marginalia.memex.memex.Memex;
|
||||
import nu.marginalia.memex.gemini.gmi.GemtextDocument;
|
||||
import nu.marginalia.memex.gemini.gmi.renderer.GemtextRendererFactory;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeUrl;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
@AllArgsConstructor @ToString
|
||||
public class GemtextAppend implements GemtextMutation {
|
||||
public final MemexNodeUrl doc;
|
||||
public final MemexNodeHeadingId id;
|
||||
public final String[] lines;
|
||||
|
||||
@Override
|
||||
public void visit(Memex memex) throws IOException {
|
||||
memex.updateNode(doc, calculateAppend(memex.getDocument(doc)));
|
||||
}
|
||||
|
||||
public String calculateAppend(GemtextDocument document) {
|
||||
|
||||
StringBuilder result = new StringBuilder();
|
||||
var renderer = new GemtextRendererFactory().gemtextRendererAsIs();
|
||||
|
||||
var lines = document.getLines();
|
||||
|
||||
int i = 0;
|
||||
// Copy from before heading
|
||||
for (; i < lines.length; i++) {
|
||||
var item = lines[i];
|
||||
|
||||
if (item.getHeading().isChildOf(id)) {
|
||||
break;
|
||||
}
|
||||
else {
|
||||
result.append(item.visit(renderer)).append('\n');
|
||||
}
|
||||
}
|
||||
|
||||
// Copy contents of heading
|
||||
for (; i < lines.length; i++) {
|
||||
var item = lines[i];
|
||||
|
||||
if (!item.getHeading().isChildOf(id)) {
|
||||
break;
|
||||
}
|
||||
else {
|
||||
result.append(item.visit(renderer)).append('\n');
|
||||
}
|
||||
}
|
||||
|
||||
// Insert new lines
|
||||
for (String newLine : this.lines) {
|
||||
result.append(newLine).append('\n');
|
||||
}
|
||||
|
||||
// Copy contents from after heading
|
||||
for (;i < lines.length; i++) {
|
||||
var item = lines[i];
|
||||
result.append(item.visit(renderer)).append('\n');
|
||||
}
|
||||
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
}
|
@ -1,19 +0,0 @@
|
||||
package nu.marginalia.memex.memex.change;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.ToString;
|
||||
import nu.marginalia.memex.memex.Memex;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeUrl;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
@AllArgsConstructor @ToString
|
||||
public class GemtextCreate implements GemtextMutation {
|
||||
public final MemexNodeUrl doc;
|
||||
public final String text;
|
||||
|
||||
@Override
|
||||
public void visit(Memex memex) throws IOException {
|
||||
memex.createNode(doc, text);
|
||||
}
|
||||
}
|
@ -1,26 +0,0 @@
|
||||
package nu.marginalia.memex.memex.change;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.ToString;
|
||||
import nu.marginalia.memex.memex.Memex;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeUrl;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
@AllArgsConstructor @ToString
|
||||
public class GemtextCreateOrMutate implements GemtextMutation {
|
||||
public final MemexNodeUrl doc;
|
||||
public final String text;
|
||||
public final GemtextMutation mutation;
|
||||
|
||||
@Override
|
||||
public void visit(Memex memex) throws IOException {
|
||||
if (memex.getDocument(doc) == null) {
|
||||
memex.createNode(doc, text);
|
||||
}
|
||||
if (memex.getDocument(doc) == null)
|
||||
throw new IllegalStateException();
|
||||
|
||||
mutation.visit(memex);
|
||||
}
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
package nu.marginalia.memex.memex.change;
|
||||
|
||||
import nu.marginalia.memex.memex.Memex;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
|
||||
import nu.marginalia.memex.memex.model.MemexNodeUrl;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface GemtextMutation {
|
||||
void visit(Memex memex) throws IOException;
|
||||
|
||||
static GemtextMutation createOrAppend(MemexNodeUrl url, String template, MemexNodeHeadingId heading, String... lines) {
|
||||
return new GemtextCreateOrMutate(url, template, new GemtextAppend(url, heading, lines));
|
||||
}
|
||||
static GemtextMutation createOrPrepend(MemexNodeUrl url, String template, MemexNodeHeadingId heading, String... lines) {
|
||||
return new GemtextCreateOrMutate(url, template, new GemtextPrepend(url, heading, lines));
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user