commit c24b978c51a7b6e041b614ccfe2aae197a440c92 Author: vlofgren Date: Thu May 19 17:45:26 2022 +0200 first commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..8b763f78 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.class +build/ +*~ diff --git a/README.md b/README.md new file mode 100644 index 00000000..098a6a57 --- /dev/null +++ b/README.md @@ -0,0 +1,9 @@ +# marginalia.nu + +This is the source code for marginalia.nu, including the search engine, +the MEMEX/gemini server, the and the encyclopedia service. + +As it stands now, the project is a bit of a mess as it wasn't developed +with the intention of going open source, a lot of tests and so on make +assumptions about the directory structure, much configuration is hard coded +and so on. It's a work in progress. \ No newline at end of file diff --git a/build.gradle b/build.gradle new file mode 100644 index 00000000..b25c2872 --- /dev/null +++ b/build.gradle @@ -0,0 +1,74 @@ +plugins { + id 'java' + + id 'com.github.johnrengelman.shadow' version '6.0.0' +} + +group 'nu.marginalia' +version 'SNAPSHOT' + +compileJava.options.encoding = "UTF-8" +compileTestJava.options.encoding = "UTF-8" +repositories { + mavenLocal() + maven { url "https://artifactory.cronapp.io/public-release/" } + maven { url "https://repo1.maven.org/maven2/" } + maven { url "https://www2.ph.ed.ac.uk/maven2/" } + maven { url "https://jitpack.io/" } + exclusiveContent { + forRepository { + maven { + url = uri("https://jitpack.io") + } + } + filter { + // Only use JitPack for the `gson-record-type-adapter-factory` library + includeModule("com.github.Marcono1234", "gson-record-type-adapter-factory") + } + } +} + +shadowJar { +} +jar { + manifest { + attributes 'Main-Class': "nu.marginalia.wmsa.configuration.ServiceDescriptor" + } + from { + configurations.shadow.collect { it.isDirectory() ? it : zipTree(it) } + } +} + +java { + toolchain { + languageVersion.set(JavaLanguageVersion.of(17)) + } +} + +dependencies { + implementation project(':marginalia_nu') +} +task version() { // +} + +test { + maxParallelForks = 16 + forkEvery = 1 + maxHeapSize = "8G" + useJUnitPlatform { + excludeTags "db" + excludeTags "nobuild" + } +} + +task dbTest(type: Test) { + maxParallelForks = 1 + forkEvery = 1 + maxHeapSize = "8G" + + useJUnitPlatform { + includeTags "db" + } +} + + diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 00000000..41d9927a Binary files /dev/null and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 00000000..41dfb879 --- /dev/null +++ b/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,5 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-7.4-bin.zip +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew new file mode 100755 index 00000000..1b6c7873 --- /dev/null +++ b/gradlew @@ -0,0 +1,234 @@ +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# +############################################################################## + +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit + +APP_NAME="Gradle" +APP_BASE_NAME=${0##*/} + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + +# Collect all arguments for the java command; +# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of +# shell script including quotes and variable substitutions, so put them in +# double quotes to make sure that they get re-expanded; and +# * put everything else in single quotes, so that it's not re-expanded. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 00000000..107acd32 --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,89 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto execute + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/marginalia_nu/build.gradle b/marginalia_nu/build.gradle new file mode 100644 index 00000000..dd6e06eb --- /dev/null +++ b/marginalia_nu/build.gradle @@ -0,0 +1,133 @@ +plugins { + id 'java' + id "io.freefair.lombok" version "5.3.3.3" + + id "me.champeau.jmh" version "0.6.6" +} + +repositories { + mavenLocal() + maven { url "https://artifactory.cronapp.io/public-release/" } + maven { url "https://repo1.maven.org/maven2/" } + maven { url "https://www2.ph.ed.ac.uk/maven2/" } + maven { url "https://jitpack.io/" } + exclusiveContent { + forRepository { + maven { + url = uri("https://jitpack.io") + } + } + filter { + // Only use JitPack for the `gson-record-type-adapter-factory` library + includeModule("com.github.Marcono1234", "gson-record-type-adapter-factory") + } + } +} + +dependencies { + implementation project(':third_party') + + implementation 'junit:junit:4.13.2' + testImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.2' + testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine' + + implementation 'org.projectlombok:lombok:1.18.22' + annotationProcessor 'org.projectlombok:lombok:1.18.22' + + testCompileOnly 'org.projectlombok:lombok:1.18.22' + testImplementation 'org.projectlombok:lombok:1.18.22' + testAnnotationProcessor 'org.projectlombok:lombok:1.18.22' + + implementation 'com.github.jknack:handlebars:4.3.0' + implementation 'com.github.jknack:handlebars-markdown:4.2.1' + + implementation group: 'com.google.code.gson', name: 'gson', version: '2.9.0' + implementation 'io.reactivex.rxjava3:rxjava:3.1.4' + implementation "com.sparkjava:spark-core:2.9.3" + implementation 'com.opencsv:opencsv:5.6' + + implementation group: 'org.apache.logging.log4j', name: 'log4j-api', version: '2.17.1' + implementation group: 'org.apache.logging.log4j', name: 'log4j-core', version: '2.17.1' + implementation group: 'org.apache.logging.log4j', name: 'log4j-slf4j-impl', version: '2.17.1' + implementation group: 'org.apache.logging.log4j', name: 'log4j-api', version: '2.17.1' + implementation group: 'org.apache.logging.log4j', name: 'log4j-core', version: '2.17.1' + implementation group: 'org.apache.logging.log4j', name: 'log4j-slf4j-impl', version: '2.17.1' + + implementation 'org.slf4j:slf4j-api:1.7.36' + + implementation 'com.google.guava:guava:31.1-jre' + implementation 'com.google.inject:guice:5.1.0' + implementation 'com.github.jnr:jnr-ffi:2.1.1' + implementation 'org.apache.httpcomponents:httpcore:4.4.15' + implementation 'org.apache.httpcomponents:httpclient:4.5.13' + implementation 'com.github.ThatJavaNerd:JRAW:1.1.0' + + implementation group: 'com.h2database', name: 'h2', version: '2.1.210' + testImplementation group: 'org.mockito', name: 'mockito-core', version: '4.3.1' + + implementation 'org.jsoup:jsoup:1.14.3' + implementation group: 'com.github.crawler-commons', name: 'crawler-commons', version: '1.2' + + implementation 'org.mariadb.jdbc:mariadb-java-client:3.0.3' + implementation group: 'net.sf.trove4j', name: 'trove4j', version: '3.0.3' + + implementation 'com.zaxxer:HikariCP:5.0.1' + + implementation 'org.apache.opennlp:opennlp-tools:1.9.4' + implementation 'io.prometheus:simpleclient:0.15.0' + implementation 'io.prometheus:simpleclient_servlet:0.15.0' + implementation 'io.prometheus:simpleclient_httpserver:0.15.0' + implementation 'io.prometheus:simpleclient_hotspot:0.15.0' + implementation 'com.fasterxml.jackson.core:jackson-databind:2.13.2.1' + implementation 'org.apache.opennlp:opennlp-tools:1.9.4' + implementation 'io.prometheus:simpleclient:0.15.0' + implementation 'io.prometheus:simpleclient_servlet:0.15.0' + implementation 'io.prometheus:simpleclient_httpserver:0.15.0' + implementation 'io.prometheus:simpleclient_hotspot:0.15.0' + implementation 'com.fasterxml.jackson.core:jackson-databind:2.13.2.1' + + implementation group: 'org.yaml', name: 'snakeyaml', version: '1.30' + + implementation 'com.syncthemall:boilerpipe:1.2.2' + implementation 'com.github.luben:zstd-jni:1.5.2-2' + implementation 'com.github.vladimir-bukhtoyarov:bucket4j-core:7.3.0' + implementation 'de.rototor.jeuclid:jeuclid-core:3.1.14' + + implementation 'org.imgscalr:imgscalr-lib:4.2' + implementation 'org.jclarion:image4j:0.7' + + implementation 'commons-net:commons-net:3.6' + implementation 'org.eclipse.jgit:org.eclipse.jgit:5.12.0.202106070339-r' + implementation 'org.eclipse.jgit:org.eclipse.jgit.ssh.jsch:5.12.0.202106070339-r' + implementation 'com.jcraft:jsch:0.1.55' + + implementation group: 'org.apache.commons', name: 'commons-compress', version: '1.21' + implementation 'edu.stanford.nlp:stanford-corenlp:4.4.0' + + implementation group: 'it.unimi.dsi', name: 'fastutil', version: '8.5.8' + implementation 'org.roaringbitmap:RoaringBitmap:[0.6,)' + implementation group: 'mysql', name: 'mysql-connector-java', version: '8.0.29' + + implementation 'com.github.Marcono1234:gson-record-type-adapter-factory:0.2.0' +} + +test { + maxParallelForks = 16 + forkEvery = 1 + maxHeapSize = "8G" + useJUnitPlatform { + excludeTags "db" + } +} + +task dbTest(type: Test) { + maxParallelForks = 1 + forkEvery = 1 + maxHeapSize = "8G" + + useJUnitPlatform { + includeTags "db" + } +} + + diff --git a/marginalia_nu/lombok.config b/marginalia_nu/lombok.config new file mode 100644 index 00000000..6aa51d71 --- /dev/null +++ b/marginalia_nu/lombok.config @@ -0,0 +1,2 @@ +# This file is generated by the 'io.freefair.lombok' Gradle plugin +config.stopBubbling = true diff --git a/marginalia_nu/src/jmh/java/bs_vs_ls/BinSearchVsLinSearch.java b/marginalia_nu/src/jmh/java/bs_vs_ls/BinSearchVsLinSearch.java new file mode 100644 index 00000000..c006838d --- /dev/null +++ b/marginalia_nu/src/jmh/java/bs_vs_ls/BinSearchVsLinSearch.java @@ -0,0 +1,37 @@ +package bs_vs_ls; + +import org.openjdk.jmh.annotations.*; + +import java.util.Arrays; +import java.util.stream.LongStream; + +public class BinSearchVsLinSearch { + static long[] data = LongStream.generate(() -> (long) (Long.MAX_VALUE * Math.random())).limit(512).sorted().toArray(); + + @State(Scope.Thread) + public static class Target { + long targetValue = 0; + + @Setup(Level.Invocation) + public void setUp() { + targetValue = data[(int)(data.length * Math.random())]; + } + + } + +// @Benchmark + public long testBs(Target t) { + return Arrays.binarySearch(data, t.targetValue); + } + +// @Benchmark + public long testLs(Target t) { + for (int i = 0; i < 512; i++) { + if (data[i] > t.targetValue) + break; + else if (data[i] == t.targetValue) + return i; + } + return -1; + } +} diff --git a/marginalia_nu/src/jmh/java/bs_vs_ls/BinSearchVsLinSearch2.java b/marginalia_nu/src/jmh/java/bs_vs_ls/BinSearchVsLinSearch2.java new file mode 100644 index 00000000..c449dd97 --- /dev/null +++ b/marginalia_nu/src/jmh/java/bs_vs_ls/BinSearchVsLinSearch2.java @@ -0,0 +1,68 @@ +package bs_vs_ls; + +import nu.marginalia.util.multimap.MultimapFileLong; +import nu.marginalia.util.multimap.MultimapSearcher; +import org.openjdk.jmh.annotations.*; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.stream.LongStream; + +public class BinSearchVsLinSearch2 { + static long[] data = LongStream.generate(() -> (long) (Long.MAX_VALUE * Math.random())).limit(512).sorted().toArray(); + + @State(Scope.Benchmark) + public static class Target { + Path tf; + MultimapFileLong file; + MultimapSearcher searcher; + long[] data = new long[512]; + + { + try { + tf = Files.createTempFile("tmpFileIOTest", "dat"); + file = MultimapFileLong.forOutput(tf, 1024); + searcher = file.createSearcher(); + for (int i = 0; i < 65535; i++) { + file.put(i, i); + } + } catch (IOException e) { + e.printStackTrace(); + } + } + } + + @Measurement(iterations = 1) + @Warmup(iterations = 1) + @Benchmark + public long testLs(Target t) { + int target = (int)(4096 + 512 * Math.random()); + for (int i = 4096; i < (4096+512); i++) { + long val = t.file.get(i); + if (val > target) + break; + if (val == target) + return val; + } + return -1; + } + + @Measurement(iterations = 1) + @Warmup(iterations = 1) + @Benchmark + public long testLs2(Target t) { + int target = (int)(4096 + 512 * Math.random()); + + t.file.read(t.data, 4096); + for (int i = 0; i < (512); i++) { + long val = t.file.get(i); + if (val > target) + break; + if (val == target) + return val; + } + return -1; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/BadBotList.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/BadBotList.java new file mode 100644 index 00000000..36961ee0 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/BadBotList.java @@ -0,0 +1,43 @@ +package nu.marginalia.gemini; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.InetAddress; +import java.util.HashSet; +import java.util.Set; + +public class BadBotList { + private final Set shitlist = new HashSet<>(); + public static BadBotList INSTANCE = new BadBotList(); + private final Logger logger = LoggerFactory.getLogger(getClass().getSimpleName()); + + private BadBotList() {} + + public boolean isAllowed(InetAddress address) { + return !shitlist.contains(address); + } + + public boolean isQueryPermitted(InetAddress address, String query) { + if (isBadQuery(query)) { + logger.info("Banning {}", address); + shitlist.add(address); + return false; + } + return true; + } + + private boolean isBadQuery(String query) { + if (query.startsWith("GET")) { + return true; + } + if (query.startsWith("OPTIONS")) { + return true; + } + if (query.contains("mstshash")) { + return true; + } + + return false; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiConfigurationModule.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiConfigurationModule.java new file mode 100644 index 00000000..f9ce793a --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiConfigurationModule.java @@ -0,0 +1,21 @@ +package nu.marginalia.gemini; + +import com.google.inject.AbstractModule; +import com.google.inject.Inject; +import com.google.inject.Provider; +import com.google.inject.name.Named; +import com.google.inject.name.Names; +import nu.marginalia.wmsa.memex.system.MemexFileWriter; + +import java.nio.file.Path; + +public class GeminiConfigurationModule extends AbstractModule { + public void configure() { + bind(Path.class).annotatedWith(Names.named("gemini-server-root")).toInstance(Path.of("/var/lib/wmsa/memex-gmi")); + bind(Path.class).annotatedWith(Names.named("gemini-cert-file")).toInstance(Path.of("/var/lib/wmsa/gemini/crypto.jks")); + bind(Path.class).annotatedWith(Names.named("gemini-cert-password-file")).toInstance(Path.of("/var/lib/wmsa/gemini/password.dat")); + bind(Integer.class).annotatedWith(Names.named("gemini-server-port")).toInstance(1965); + + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiService.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiService.java new file mode 100644 index 00000000..ffad695b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiService.java @@ -0,0 +1,164 @@ +package nu.marginalia.gemini; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import com.google.inject.name.Named; +import nu.marginalia.gemini.io.GeminiConnection; +import nu.marginalia.gemini.io.GeminiSSLSetUp; +import nu.marginalia.gemini.io.GeminiStatusCode; +import nu.marginalia.gemini.io.GeminiUserException; +import nu.marginalia.gemini.plugins.BareStaticPagePlugin; +import nu.marginalia.gemini.plugins.Plugin; +import nu.marginalia.gemini.plugins.SearchPlugin; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.net.ssl.SSLException; +import javax.net.ssl.SSLServerSocket; +import javax.net.ssl.SSLServerSocketFactory; +import javax.net.ssl.SSLSocket; +import java.io.IOException; +import java.net.URI; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Optional; +import java.util.concurrent.Executor; +import java.util.concurrent.Executors; + +@Singleton +public class GeminiService { + + public static final String DEFAULT_FILENAME = "index.gmi"; + public final Path serverRoot; + + private final Logger logger = LoggerFactory.getLogger("GeminiServer"); + private final Executor pool = Executors.newFixedThreadPool(32); + private final SSLServerSocket serverSocket; + + private final Plugin[] plugins; + private final BadBotList badBotList = BadBotList.INSTANCE; + + @Inject + public GeminiService(@Named("gemini-server-root") Path serverRoot, + @Named("gemini-server-port") Integer port, + GeminiSSLSetUp sslSetUp, + BareStaticPagePlugin pagePlugin, + SearchPlugin searchPlugin) throws Exception { + this.serverRoot = serverRoot; + logger.info("Setting up crypto"); + final SSLServerSocketFactory socketFactory = sslSetUp.getServerSocketFactory(); + + serverSocket = (SSLServerSocket) socketFactory.createServerSocket(port /* 1965 */); + serverSocket.setEnabledCipherSuites(socketFactory.getSupportedCipherSuites()); + serverSocket.setEnabledProtocols(new String[] {"TLSv1.3", "TLSv1.2"}); + + logger.info("Verifying setup"); + if (!Files.exists(this.serverRoot)) { + logger.error("Could not find SERVER_ROOT {}", this.serverRoot); + System.exit(255); + } + + plugins = new Plugin[] { + pagePlugin, + searchPlugin + }; + } + + public void run() { + logger.info("Awaiting connections"); + + try { + for (; ; ) { + SSLSocket connection = (SSLSocket) serverSocket.accept(); + connection.setSoTimeout(10_000); + + if (!badBotList.isAllowed(connection.getInetAddress())) { + connection.close(); + } else { + pool.execute(() -> serve(connection)); + } + } + } + catch (IOException ex) { + logger.error("IO Exception in gemini server", ex); + } + } + + private void serve(SSLSocket socket) { + final GeminiConnection connection; + try { + connection = new GeminiConnection(socket); + } + catch (IOException ex) { + logger.error("Failed to create connection object", ex); + return; + } + + try { + handleRequest(connection); + } + catch (GeminiUserException ex) { + errorResponse(connection, ex.getMessage()); + } + catch (SSLException ex) { + logger.error(connection.getAddress() + " SSL error"); + connection.close(); + } + catch (Exception ex) { + errorResponse(connection, "Error"); + logger.error(connection.getAddress(), ex); + } + finally { + connection.close(); + } + } + + private void errorResponse(GeminiConnection connection, String message) { + if (connection.isConnected()) { + try { + logger.error("=> " + connection.getAddress(), message); + connection.writeStatusLine(GeminiStatusCode.ERROR_PERMANENT, message); + } + catch (IOException ex) { + logger.error("Exception while sending error", ex); + } + } + } + + private void handleRequest(GeminiConnection connection) throws Exception { + + final String address = connection.getAddress(); + logger.info("Connect: " + address); + + final Optional maybeUri = connection.readUrl(); + if (maybeUri.isEmpty()) { + logger.info("Done: {}", address); + return; + } + + final URI uri = maybeUri.get(); + logger.info("Request {}", uri); + + if (!uri.getScheme().equals("gemini")) { + throw new GeminiUserException("Unsupported protocol"); + } + + servePage(connection, uri); + logger.info("Done: {}", address); + } + + private void servePage(GeminiConnection connection, URI url) throws IOException { + String path = url.getPath(); + + for (Plugin p : plugins) { + if (p.serve(url, connection)) { + return; + } + } + + logger.error("FileNotFound {}", path); + connection.writeStatusLine(GeminiStatusCode.ERROR_TEMPORARY, "No such file"); + } + + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/client/GeminiClient.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/client/GeminiClient.java new file mode 100644 index 00000000..e306e88f --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/client/GeminiClient.java @@ -0,0 +1,130 @@ +package nu.marginalia.gemini.client; + +import javax.net.ssl.SSLContext; +import javax.net.ssl.SSLSocketFactory; +import javax.net.ssl.TrustManager; +import javax.net.ssl.X509TrustManager; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.nio.charset.StandardCharsets; +import java.security.cert.X509Certificate; + +/** Unstable code! */ +public class GeminiClient { + + private final SSLSocketFactory socketFactory; + + // Create a trust manager that does not validate anything + public static final TrustManager[] trustAllCerts = new TrustManager[]{ + new X509TrustManager() { + @Override + public void checkClientTrusted(X509Certificate[] chain, + String authType) { + } + + @Override + public void checkServerTrusted(X509Certificate[] chain, + String authType) { + } + + @Override + public X509Certificate[] getAcceptedIssuers() { + return new X509Certificate[0]; + } + } + }; + + + public static SSLSocketFactory buildSocketFactory() throws Exception { + // Install the all-trusting trust manager + final SSLContext sslContext = SSLContext.getInstance("SSL"); + sslContext.init(null, trustAllCerts, new java.security.SecureRandom()); + + return sslContext.getSocketFactory(); + } + + public GeminiClient() throws Exception { + socketFactory = buildSocketFactory(); + } + + public Response get(URI uri) throws IOException { + + final int port = uri.getPort() == -1 ? 1965 : uri.getPort(); + final String host = uri.getHost(); + var requestString = String.format("%s\r\n", uri).getBytes(StandardCharsets.UTF_8); + + try (var socket = socketFactory.createSocket(host, port)) { + socket.setSoTimeout(10_000); + socket.getOutputStream().write(requestString); + + var is = socket.getInputStream(); + String statusLine = new GeminiInput(is).get(); + + int code = Integer.parseInt(statusLine.substring(0,2)); + String meta = statusLine.substring(3); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + is.transferTo(baos); + + return new Response(code, meta, baos.toByteArray()); + } + + } + + public static class Response { + public final int code; + public final String meta; + public final byte[] data; + + Response(int code, String meta, byte[] data) { + this.code = code; + this.meta = meta; + this.data = data; + } + } + + + public static class GeminiInput { + private final InputStream is; + private final byte[] buffer = new byte[1024]; + private int idx; + + final String result; + + public GeminiInput(InputStream is) throws IOException { + this.is = is; + + for (idx = 0; idx < buffer.length; idx++) { + if (hasEndOfLine()) { + result = new String(buffer, 0, idx-2, StandardCharsets.UTF_8); + return; + } + + readCharacter(); + } + + throw new RuntimeException("String too long"); + } + + public String get() { + return result; + } + + private void readCharacter() throws IOException { + int rb = is.read(); + if (-1 == rb) { + throw new RuntimeException("URL incomplete (no CR LF)"); + } + buffer[idx] = (byte) rb; + } + + public boolean hasEndOfLine() { + return idx > 2 + && buffer[idx - 1] == (byte) '\n' + && buffer[idx - 2] == (byte) '\r'; + } + + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/Gemtext.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/Gemtext.java new file mode 100644 index 00000000..3b07f4cc --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/Gemtext.java @@ -0,0 +1,53 @@ +package nu.marginalia.gemini.gmi; + +import lombok.Getter; +import nu.marginalia.gemini.gmi.line.AbstractGemtextLine; +import nu.marginalia.gemini.gmi.parser.GemtextParser; +import nu.marginalia.gemini.gmi.renderer.GemtextRenderer; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; + +import java.io.IOException; +import java.io.Writer; +import java.util.Arrays; +import java.util.stream.Collectors; +import java.util.stream.Stream; + + +@Getter +public class Gemtext { + private final AbstractGemtextLine[] lines; + private final MemexNodeUrl url; + + public Gemtext(MemexNodeUrl url, String[] lines, MemexNodeHeadingId headingRoot) { + this.lines = GemtextParser.parse(lines, headingRoot); + this.url = url; + } + public Gemtext(MemexNodeUrl url, String[] lines) { + this.lines = GemtextParser.parse(lines, new MemexNodeHeadingId(0)); + this.url = url; + } + + public String render(GemtextRenderer renderer) { + return Arrays.stream(lines).map(renderer::renderLine).collect(Collectors.joining()); + } + + public void render(GemtextRenderer renderer, Writer w) throws IOException { + for (var line : lines) { + w.write(renderer.renderLine(line)); + w.write('\n'); + } + } + + public Stream stream() { + return Arrays.stream(lines); + } + + public AbstractGemtextLine get(int idx) { + return lines[idx]; + } + public int size() { + return lines.length; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/GemtextDatabase.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/GemtextDatabase.java new file mode 100644 index 00000000..c86ce518 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/GemtextDatabase.java @@ -0,0 +1,72 @@ +package nu.marginalia.gemini.gmi; + +import com.google.common.collect.Sets; +import nu.marginalia.gemini.gmi.line.GemtextLineVisitorAdapter; +import nu.marginalia.gemini.gmi.line.GemtextLink; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import nu.marginalia.wmsa.memex.model.MemexUrl; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; + +public class GemtextDatabase extends Gemtext { + public Map links; + + public GemtextDatabase(MemexNodeUrl url, String[] lines) { + super(url, lines); + + links = new HashMap<>(); + for (int i = 0; i < size(); i++) { + int linkIdx = i; + + get(i).visit(new GemtextLineVisitorAdapter<>() { + @Override + public Object visit(GemtextLink g) { + links.put(g.getUrl().toString(), linkIdx); + return null; + } + }); + } + } + + public Set keys() { + return links.keySet(); + } + + public Optional getLinkData(MemexUrl url) { + Integer idx = links.get(url.getUrl()); + if (idx != null) { + return + Optional.of(get(idx).mapLink(GemtextLink::getTitle).orElse("")); + } + return Optional.empty(); + } + + + public static GemtextDatabase of(MemexNodeUrl url, String[] lines) { + return new GemtextDatabase(url, lines); + } + + public static GemtextDatabase of(MemexNodeUrl url, Path file) throws IOException { + try (var s = Files.lines(file)) { + return new GemtextDatabase(url, s.toArray(String[]::new)); + } + } + + public Set difference(GemtextDatabase other) { + Set differences = new HashSet<>(); + + Sets.difference(keys(), other.keys()).stream().map(MemexNodeUrl::new).forEach(differences::add); + + Sets.intersection(keys(), other.keys()) + .stream() + .map(MemexNodeUrl::new) + .filter(url -> !Objects.equals(getLinkData(url), other.getLinkData(url))) + .forEach(differences::add); + + return differences; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/GemtextDocument.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/GemtextDocument.java new file mode 100644 index 00000000..6f6bc40f --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/GemtextDocument.java @@ -0,0 +1,163 @@ +package nu.marginalia.gemini.gmi; + +import lombok.Getter; +import nu.marginalia.gemini.gmi.line.*; +import nu.marginalia.gemini.gmi.renderer.GemtextRenderer; +import nu.marginalia.gemini.gmi.renderer.GemtextRendererFactory; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; +import nu.marginalia.wmsa.memex.model.MemexNodeTaskId; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import nu.marginalia.wmsa.memex.model.MemexTaskState; +import org.apache.commons.lang3.tuple.Pair; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +@Getter +public class GemtextDocument extends Gemtext { + private final Map headings; + private final Map> headingsByName; + private final Set pragmas; + private final List tasks; + + private final String title; + private final String date; + private final List links; + private final int hashCode; + + private static final Pattern datePattern = Pattern.compile(".*(\\d{4}-\\d{2}-\\d{2}).*"); + private static final GemtextRenderer rawRenderer = new GemtextRendererFactory().gemtextRendererAsIs(); + + public GemtextDocument(MemexNodeUrl url, String[] lines, MemexNodeHeadingId headingRoot) { + super(url, lines, headingRoot); + + this.hashCode = Arrays.hashCode(lines); + + GemtextDataExtractor extractor = new GemtextDataExtractor(); + + Arrays.stream(this.getLines()).forEach(extractor::take); + + this.headings = extractor.getHeadings(); + this.links = extractor.getLinks(); + this.title = Objects.requireNonNullElse(extractor.getTitle(), url.getUrl()); + this.pragmas = extractor.getPragmas(); + this.headingsByName = extractor.getHeadingsByName(); + this.tasks = extractor.getTasks(); + this.date = extractor.getDate(); + } + + public String getHeadingForElement(AbstractGemtextLine line) { + return headings.getOrDefault(line.getHeading(), ""); + } + + public List getSection(MemexNodeHeadingId headingId) { + return stream() + .filter(line -> line.getHeading().isChildOf(headingId)) + .collect(Collectors.toList()); + } + + public String getSectionGemtext(MemexNodeHeadingId headingId) { + if (headingId.equals(new MemexNodeHeadingId(0))) { + return stream() + .map(rawRenderer::renderLine) + .collect(Collectors.joining("\n")); + } + + return stream() + .filter(line -> line.getHeading().isChildOf(headingId)) + .map(rawRenderer::renderLine) + .collect(Collectors.joining("\n")); + } + + public Map> getOpenTopTasks() { + return tasks.stream() + .filter(task -> MemexTaskState.TODO.equals(task.getState()) + || MemexTaskState.URGENT.equals(task.getState())) + .filter(task -> task.getId().level() == 1) + .collect(Collectors.toMap(GemtextTask::getId, task -> Pair.of(task.getTask(), task.getState()))); + } + + public static GemtextDocument of(MemexNodeUrl url, String... lines) { + return new GemtextDocument(url, lines, new MemexNodeHeadingId(0)); + } + + public static GemtextDocument of(MemexNodeUrl url, Path file) throws IOException { + try (var s = Files.lines(file)) { + return new GemtextDocument(url, s.toArray(String[]::new), new MemexNodeHeadingId(0)); + } + } + + public boolean isIndex() { + return getUrl().getFilename().equals("index.gmi"); + } + + @Override + public int hashCode() { + return hashCode; + } + + public Optional getHeading(MemexNodeHeadingId heading) { + return Optional.ofNullable(headings.get(heading)); + } + + public Optional getHeadingByName(MemexNodeHeadingId parent, String name) { + var headings = headingsByName.get(name); + if (null == headings) { + return Optional.empty(); + } + return headings.stream().filter(heading -> heading.isChildOf(parent)).findAny(); + } + + @Getter + private static class GemtextDataExtractor extends GemtextLineVisitorAdapter { + + private String title; + private String date; + private final Map headings = new TreeMap<>((a, b) -> Arrays.compare(a.getIds(), b.getIds())); + private final Map> headingsByName = new HashMap<>(); + private final Set pragmas = new HashSet<>(); + private final List links = new ArrayList<>(); + private final List tasks = new ArrayList<>(); + + @Override + public Object visit(GemtextHeading g) { + headings.put(g.getLevel(), g.getName()); + headingsByName.computeIfAbsent(g.getName(), t -> new ArrayList<>()).add(g.getLevel()); + + if (title == null) { + title = g.getName(); + var dateMatcher = datePattern.matcher(title); + if (dateMatcher.matches()) { + date = dateMatcher.group(1); + } + } + + return null; + } + + @Override + public Object visit(GemtextLink g) { + links.add(g); + + return null; + } + + @Override + public Object visit(GemtextTask g) { + tasks.add(g); + + return null; + } + + @Override + public Object visit(GemtextPragma g) { + pragmas.add(g.getLine()); + + return null; + } + }; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/AbstractGemtextLine.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/AbstractGemtextLine.java new file mode 100644 index 00000000..f1307b9b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/AbstractGemtextLine.java @@ -0,0 +1,18 @@ +package nu.marginalia.gemini.gmi.line; + +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; + +import java.util.Optional; +import java.util.function.Function; + +public abstract class AbstractGemtextLine { + public Optional mapLink(Function mapper) { + return Optional.empty(); + } + public Optional mapHeading(Function mapper) { return Optional.empty(); } + public Optional mapTask(Function mapper) { return Optional.empty(); } + public abstract T visit(GemtextLineVisitor visitor); + + public abstract boolean breaksTask(); + public abstract MemexNodeHeadingId getHeading(); +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextAside.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextAside.java new file mode 100644 index 00000000..ef73accc --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextAside.java @@ -0,0 +1,21 @@ +package nu.marginalia.gemini.gmi.line; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; + +@AllArgsConstructor @Getter @ToString +public class GemtextAside extends AbstractGemtextLine { + private final String line; + private final MemexNodeHeadingId heading; + + @Override + public T visit(GemtextLineVisitor visitor) { + return visitor.visit(this); + } + + public boolean breaksTask() { + return false; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextHeading.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextHeading.java new file mode 100644 index 00000000..a2c9f309 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextHeading.java @@ -0,0 +1,32 @@ +package nu.marginalia.gemini.gmi.line; + + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; + +import java.util.Optional; +import java.util.function.Function; + +@AllArgsConstructor +@Getter +@ToString +public class GemtextHeading extends AbstractGemtextLine { + private final MemexNodeHeadingId level; + private final String name; + private final MemexNodeHeadingId heading; + + public Optional mapHeading(Function mapper) { + return Optional.of(mapper.apply(this)); + } + + @Override + public T visit(GemtextLineVisitor visitor) { + return visitor.visit(this); + } + + public boolean breaksTask() { + return true; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextLineVisitor.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextLineVisitor.java new file mode 100644 index 00000000..219267ca --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextLineVisitor.java @@ -0,0 +1,18 @@ +package nu.marginalia.gemini.gmi.line; + +public interface GemtextLineVisitor { + default T take(AbstractGemtextLine line) { + return line.visit(this); + } + + T visit(GemtextHeading g); + T visit(GemtextLink g); + T visit(GemtextList g); + T visit(GemtextPreformat g); + T visit(GemtextQuote g); + T visit(GemtextText g); + T visit(GemtextTextLiteral g); + T visit(GemtextAside g); + T visit(GemtextTask g); + T visit(GemtextPragma g); +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextLineVisitorAdapter.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextLineVisitorAdapter.java new file mode 100644 index 00000000..cb0a7544 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextLineVisitorAdapter.java @@ -0,0 +1,53 @@ +package nu.marginalia.gemini.gmi.line; + +public class GemtextLineVisitorAdapter implements GemtextLineVisitor { + @Override + public T visit(GemtextHeading g) { + return null; + } + + @Override + public T visit(GemtextLink g) { + return null; + } + + @Override + public T visit(GemtextList g) { + return null; + } + + @Override + public T visit(GemtextPreformat g) { + return null; + } + + @Override + public T visit(GemtextQuote g) { + return null; + } + + @Override + public T visit(GemtextText g) { + return null; + } + + @Override + public T visit(GemtextTextLiteral g) { + return null; + } + + @Override + public T visit(GemtextAside g) { + return null; + } + + @Override + public T visit(GemtextTask g) { + return null; + } + + @Override + public T visit(GemtextPragma g) { + return null; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextLink.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextLink.java new file mode 100644 index 00000000..27aa1a5c --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextLink.java @@ -0,0 +1,33 @@ +package nu.marginalia.gemini.gmi.line; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; +import nu.marginalia.wmsa.memex.model.MemexUrl; + +import javax.annotation.Nullable; +import java.util.Optional; +import java.util.function.Function; + +@AllArgsConstructor @Getter @ToString +public class GemtextLink extends AbstractGemtextLine { + private final MemexUrl url; + + @Nullable + private final String title; + private final MemexNodeHeadingId heading; + + public Optional mapLink(Function mapper) { + return Optional.ofNullable(mapper.apply(this)); + } + + @Override + public T visit(GemtextLineVisitor visitor) { + return visitor.visit(this); + } + + public boolean breaksTask() { + return false; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextList.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextList.java new file mode 100644 index 00000000..c06c1e6a --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextList.java @@ -0,0 +1,23 @@ +package nu.marginalia.gemini.gmi.line; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; + +import java.util.List; + +@AllArgsConstructor @Getter @ToString +public class GemtextList extends AbstractGemtextLine { + private final List items; + private final MemexNodeHeadingId heading; + + @Override + public T visit(GemtextLineVisitor visitor) { + return visitor.visit(this); + } + + public boolean breaksTask() { + return true; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextPragma.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextPragma.java new file mode 100644 index 00000000..082cef26 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextPragma.java @@ -0,0 +1,21 @@ +package nu.marginalia.gemini.gmi.line; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; + +@AllArgsConstructor @Getter @ToString +public class GemtextPragma extends AbstractGemtextLine { + private final String line; + private final MemexNodeHeadingId heading; + + @Override + public T visit(GemtextLineVisitor visitor) { + return visitor.visit(this); + } + + public boolean breaksTask() { + return false; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextPreformat.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextPreformat.java new file mode 100644 index 00000000..56a1f196 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextPreformat.java @@ -0,0 +1,23 @@ +package nu.marginalia.gemini.gmi.line; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; + +import java.util.List; + +@AllArgsConstructor @Getter @ToString +public class GemtextPreformat extends AbstractGemtextLine { + private final List items; + private final MemexNodeHeadingId heading; + + @Override + public T visit(GemtextLineVisitor visitor) { + return visitor.visit(this); + } + + public boolean breaksTask() { + return true; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextQuote.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextQuote.java new file mode 100644 index 00000000..ad9f2e9b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextQuote.java @@ -0,0 +1,23 @@ +package nu.marginalia.gemini.gmi.line; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; + +import java.util.List; + +@AllArgsConstructor @Getter @ToString +public class GemtextQuote extends AbstractGemtextLine { + private final List items; + private final MemexNodeHeadingId heading; + + @Override + public T visit(GemtextLineVisitor visitor) { + return visitor.visit(this); + } + + public boolean breaksTask() { + return true; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextTask.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextTask.java new file mode 100644 index 00000000..d2360afc --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextTask.java @@ -0,0 +1,42 @@ +package nu.marginalia.gemini.gmi.line; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; +import nu.marginalia.wmsa.memex.model.MemexNodeTaskId; +import nu.marginalia.wmsa.memex.model.MemexTaskState; +import nu.marginalia.wmsa.memex.model.MemexTaskTags; + +import java.util.Optional; +import java.util.function.Function; + +@AllArgsConstructor @Getter @ToString +public class GemtextTask extends AbstractGemtextLine { + private final MemexNodeTaskId id; + private final String task; + private final MemexNodeHeadingId heading; + private final MemexTaskTags tags; + + public MemexTaskState getState() { + return MemexTaskState.of(tags); + } + + public int getLevel() { + return id.level(); + } + @Override + public T visit(GemtextLineVisitor visitor) { + return visitor.visit(this); + } + + @Override + public boolean breaksTask() { + return true; + } + + @Override + public Optional mapTask(Function mapper) { + return Optional.of(mapper.apply(this)); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextText.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextText.java new file mode 100644 index 00000000..15394533 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextText.java @@ -0,0 +1,21 @@ +package nu.marginalia.gemini.gmi.line; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; + +@AllArgsConstructor @Getter @ToString +public class GemtextText extends AbstractGemtextLine { + private final String line; + private final MemexNodeHeadingId heading; + + @Override + public T visit(GemtextLineVisitor visitor) { + return visitor.visit(this); + } + + public boolean breaksTask() { + return !line.isBlank(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextTextLiteral.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextTextLiteral.java new file mode 100644 index 00000000..7e44702f --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/line/GemtextTextLiteral.java @@ -0,0 +1,23 @@ +package nu.marginalia.gemini.gmi.line; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; + +import java.util.List; + +@AllArgsConstructor @Getter @ToString +public class GemtextTextLiteral extends AbstractGemtextLine { + private final List items; + private final MemexNodeHeadingId heading; + + @Override + public T visit(GemtextLineVisitor visitor) { + return visitor.visit(this); + } + + public boolean breaksTask() { + return false; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextAsideParser.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextAsideParser.java new file mode 100644 index 00000000..541ada0c --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextAsideParser.java @@ -0,0 +1,20 @@ +package nu.marginalia.gemini.gmi.parser; + +import nu.marginalia.gemini.gmi.line.GemtextAside; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; + +import java.util.regex.Pattern; + +public class GemtextAsideParser { + private static final Pattern listItemPattern = Pattern.compile("^\\((.*)\\)$"); + + public static GemtextAside parse(String s, MemexNodeHeadingId heading) { + var matcher = listItemPattern.matcher(s); + + if (!matcher.matches()) { + return null; + } + + return new GemtextAside(matcher.group(1), heading); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextHeadingParser.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextHeadingParser.java new file mode 100644 index 00000000..c91d2a45 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextHeadingParser.java @@ -0,0 +1,26 @@ +package nu.marginalia.gemini.gmi.parser; + +import nu.marginalia.gemini.gmi.line.AbstractGemtextLine; +import nu.marginalia.gemini.gmi.line.GemtextHeading; +import nu.marginalia.gemini.gmi.line.GemtextText; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; + +import java.util.regex.Pattern; + +public class GemtextHeadingParser { + private static final Pattern headingPattern = Pattern.compile("^(#+)\\s*([^#].*|$)$"); + + public static AbstractGemtextLine parse(String s, MemexNodeHeadingId heading) { + var matcher = headingPattern.matcher(s); + + if (!matcher.matches()) { + return new GemtextText(s, heading); + } + + int level = matcher.group(1).length() - 1; + var newHeading = heading.next(level); + + return new GemtextHeading(newHeading, matcher.group(2), newHeading); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextLinkParser.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextLinkParser.java new file mode 100644 index 00000000..8ed5a281 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextLinkParser.java @@ -0,0 +1,42 @@ +package nu.marginalia.gemini.gmi.parser; + +import nu.marginalia.gemini.gmi.line.AbstractGemtextLine; +import nu.marginalia.gemini.gmi.line.GemtextLink; +import nu.marginalia.gemini.gmi.line.GemtextText; +import nu.marginalia.wmsa.memex.model.MemexExternalUrl; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import nu.marginalia.wmsa.memex.model.MemexUrl; + +import javax.annotation.Nullable; +import java.util.regex.Pattern; + +public class GemtextLinkParser { + private static Pattern linkPattern = Pattern.compile("^=>\\s?([^\\s]+)\\s*(.+)?$"); + + @Nullable + public static AbstractGemtextLine parse(String s, MemexNodeHeadingId heading) { + var matcher = linkPattern.matcher(s); + + if (!matcher.matches()) { + return new GemtextText(s, heading); + } + if (matcher.groupCount() == 2) { + return new GemtextLink(toMemexUrl(matcher.group(1)), matcher.group(2), heading); + } + else { + return new GemtextLink(toMemexUrl(matcher.group(1)), null, heading); + } + } + + private static MemexUrl toMemexUrl(String url) { + if (url.startsWith("/")) { + return new MemexNodeUrl(url); + } + else { + return new MemexExternalUrl(url); + } + } + + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextListParser.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextListParser.java new file mode 100644 index 00000000..8416895e --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextListParser.java @@ -0,0 +1,17 @@ +package nu.marginalia.gemini.gmi.parser; + +import java.util.regex.Pattern; + +public class GemtextListParser { + private static final Pattern listItemPattern = Pattern.compile("^\\*\\s?(.+)$"); + + public static String parse(String s) { + var matcher = listItemPattern.matcher(s); + + if (!matcher.matches()) { + return null; + } + + return matcher.group(1); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextParser.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextParser.java new file mode 100644 index 00000000..ec15be17 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextParser.java @@ -0,0 +1,135 @@ +package nu.marginalia.gemini.gmi.parser; + +import nu.marginalia.gemini.gmi.line.*; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; +import nu.marginalia.wmsa.memex.model.MemexNodeTaskId; + +import java.util.*; + +public class GemtextParser { + + private static final String PREFORMAT_MARKER = "```"; + private static final String LITERAL_MARKER = " "; + private static final String LINK_MARKER = "=>"; + private static final String HEADING_MARKER = "#"; + private static final String LIST_MARKER = "*"; + private static final String QUOTE_MARKER = ">"; + private static final String ASIDE_MARKER = "("; + private static final String TASK_MARKER = "-"; + private static final String PRAGMA_MARKER = "%%%"; + + public static AbstractGemtextLine[] parse(String[] lines, MemexNodeHeadingId headingRoot) { + List items = new ArrayList<>(); + MemexNodeHeadingId heading = headingRoot; + MemexNodeTaskId task = new MemexNodeTaskId(0); + + Set pragmas = new HashSet<>(); + + for (int i = 0; i < lines.length; i++) { + String line = lines[i]; + + if (line.startsWith(PREFORMAT_MARKER)) { + i = getBlockQuote(items, lines, heading, i); + } + else if (line.startsWith(PRAGMA_MARKER)) { + var pragma = GemtextPragmaParser.parse(line, heading); + + if (pragma instanceof GemtextPragma) { + GemtextPragma gtp = (GemtextPragma) pragma; + pragmas.add(gtp.getLine()); + } + + items.add(pragma); + + } + else if (line.startsWith(LINK_MARKER)) { + var link = GemtextLinkParser.parse(line, heading); + items.add(link); + } + else if (line.startsWith(HEADING_MARKER)) { + var tag = GemtextHeadingParser.parse(line, heading); + + heading = tag.mapHeading(GemtextHeading::getHeading).orElse(heading); + + items.add(tag); + } + else if (line.startsWith(LIST_MARKER)) { + i = getList(items, lines, heading, i); + } + else if (line.startsWith(LITERAL_MARKER)) { + i = getLitteral(items, lines, heading, i); + } + else if (pragmas.contains("TASKS") + && line.startsWith(TASK_MARKER)) + { + var tag = GemtextTaskParser.parse(line, heading, task); + + task = tag.mapTask(GemtextTask::getId).orElse(task); + + items.add(tag); + } + else if (line.startsWith(QUOTE_MARKER)) { + i = getQuote(items, lines, heading, i); + } + else if (line.startsWith(ASIDE_MARKER)) { + var aside = GemtextAsideParser.parse(line, heading); + items.add(Objects.requireNonNullElse(aside, new GemtextText(line, heading))); + } + else { + items.add(new GemtextText(line, heading)); + } + } + return items.toArray(AbstractGemtextLine[]::new); + } + + private static int getBlockQuote(List items, String[] lines, MemexNodeHeadingId heading, int i) { + int j = i+1; + List quotedLines = new ArrayList<>(); + for (;j < lines.length; j++) { + if (lines[j].startsWith(PREFORMAT_MARKER)) { + break; + } + quotedLines.add(lines[j]); + } + items.add(new GemtextPreformat(quotedLines, heading)); + return j; + } + + private static int getList(List items, String[] lines, MemexNodeHeadingId heading, int i) { + int j = i; + List listLines = new ArrayList<>(); + for (;j < lines.length; j++) { + if (!lines[j].startsWith(LIST_MARKER)) { + break; + } + listLines.add(GemtextListParser.parse(lines[j])); + } + items.add(new GemtextList(listLines, heading)); + return j-1; + } + private static int getLitteral(List items, String[] lines, MemexNodeHeadingId heading, int i) { + int j = i; + List listLines = new ArrayList<>(); + for (;j < lines.length; j++) { + if (!lines[j].startsWith(LITERAL_MARKER)) { + break; + } + listLines.add(lines[j]); + } + items.add(new GemtextTextLiteral(listLines, heading)); + return j-1; + } + + private static int getQuote(List items, String[] lines, MemexNodeHeadingId heading, int i) { + int j = i; + List listLines = new ArrayList<>(); + for (;j < lines.length; j++) { + if (!lines[j].startsWith(QUOTE_MARKER)) { + break; + } + listLines.add(GemtextQuoteParser.parse(lines[j])); + } + items.add(new GemtextQuote(listLines, heading)); + return j-1; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextPragmaParser.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextPragmaParser.java new file mode 100644 index 00000000..192c4ba6 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextPragmaParser.java @@ -0,0 +1,26 @@ +package nu.marginalia.gemini.gmi.parser; + +import nu.marginalia.gemini.gmi.line.AbstractGemtextLine; +import nu.marginalia.gemini.gmi.line.GemtextPragma; +import nu.marginalia.gemini.gmi.line.GemtextText; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; + +import java.util.regex.Pattern; + +public class GemtextPragmaParser { + private static final Pattern pragmaPattern = Pattern.compile("^%%%\\s*(.*|$)$"); + + public static AbstractGemtextLine parse(String s, MemexNodeHeadingId heading) { + var matcher = pragmaPattern.matcher(s); + + if (!matcher.matches()) { + return new GemtextText(s, heading); + } + + String task = matcher.group(1); + + return new GemtextPragma(task, heading); + } + + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextQuoteParser.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextQuoteParser.java new file mode 100644 index 00000000..af72b3c9 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextQuoteParser.java @@ -0,0 +1,17 @@ +package nu.marginalia.gemini.gmi.parser; + +import java.util.regex.Pattern; + +public class GemtextQuoteParser { + private static final Pattern listItemPattern = Pattern.compile("^>(.+)$"); + + public static String parse(String s) { + var matcher = listItemPattern.matcher(s); + + if (!matcher.matches()) { + return null; + } + + return matcher.group(1); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextTaskParser.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextTaskParser.java new file mode 100644 index 00000000..d9b95f2e --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/parser/GemtextTaskParser.java @@ -0,0 +1,31 @@ +package nu.marginalia.gemini.gmi.parser; + +import nu.marginalia.gemini.gmi.line.AbstractGemtextLine; +import nu.marginalia.gemini.gmi.line.GemtextTask; +import nu.marginalia.gemini.gmi.line.GemtextText; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; +import nu.marginalia.wmsa.memex.model.MemexNodeTaskId; +import nu.marginalia.wmsa.memex.model.MemexTaskTags; + +import java.util.regex.Pattern; + +public class GemtextTaskParser { + private static final Pattern taskPattern = Pattern.compile("^(-+)\\s*([^-].*|$)$"); + + public static AbstractGemtextLine parse(String s, MemexNodeHeadingId heading, + MemexNodeTaskId taskId) { + var matcher = taskPattern.matcher(s); + + if (!matcher.matches()) { + return new GemtextText(s, heading); + } + + int level = matcher.group(1).length() - 1; + + String task = matcher.group(2); + + return new GemtextTask(taskId.next(level), task, heading, new MemexTaskTags(task)); + } + + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/renderer/GemtextRenderer.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/renderer/GemtextRenderer.java new file mode 100644 index 00000000..1697c8df --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/renderer/GemtextRenderer.java @@ -0,0 +1,91 @@ +package nu.marginalia.gemini.gmi.renderer; + +import nu.marginalia.gemini.gmi.line.*; + +import java.util.function.Function; + +public class GemtextRenderer implements GemtextLineVisitor { + + private final Function headingConverter; + private final Function linkConverter; + private final Function listConverter; + private final Function preformatConverter; + private final Function quoteConverter; + private final Function textConverter; + private final Function asideConverter; + private final Function taskConverter; + private final Function literalConverter; + private final Function pragmaConverter; + + public GemtextRenderer(Function headingConverter, + Function linkConverter, + Function listConverter, + Function preformatConverter, + Function quoteConverter, + Function textConverter, + Function asideConverter, + Function taskConverter, + Function literalConverter, + Function pragmaConverter + ) { + this.headingConverter = headingConverter; + this.linkConverter = linkConverter; + this.listConverter = listConverter; + this.preformatConverter = preformatConverter; + this.quoteConverter = quoteConverter; + this.textConverter = textConverter; + this.asideConverter = asideConverter; + this.taskConverter = taskConverter; + this.literalConverter = literalConverter; + this.pragmaConverter = pragmaConverter; + } + + + public String renderLine(AbstractGemtextLine line) { + return line.visit(this); + } + + @Override + public String visit(GemtextHeading g) { + return headingConverter.apply(g); + } + + @Override + public String visit(GemtextLink g) { + return linkConverter.apply(g); + } + + @Override + public String visit(GemtextList g) { + return listConverter.apply(g); + } + + @Override + public String visit(GemtextPreformat g) { + return preformatConverter.apply(g); + } + + @Override + public String visit(GemtextQuote g) { + return quoteConverter.apply(g); + } + + @Override + public String visit(GemtextText g) { + return textConverter.apply(g); + } + + @Override + public String visit(GemtextTextLiteral g) { + return literalConverter.apply(g); + } + + @Override + public String visit(GemtextAside g) { return asideConverter.apply(g); } + + @Override + public String visit(GemtextTask g) { return taskConverter.apply(g); } + + @Override + public String visit(GemtextPragma g) { return pragmaConverter.apply(g); } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/renderer/GemtextRendererFactory.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/renderer/GemtextRendererFactory.java new file mode 100644 index 00000000..257cfc1c --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/gmi/renderer/GemtextRendererFactory.java @@ -0,0 +1,227 @@ +package nu.marginalia.gemini.gmi.renderer; + +import nu.marginalia.gemini.gmi.line.*; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import nu.marginalia.wmsa.memex.model.MemexUrl; +import org.apache.logging.log4j.util.Strings; + +import java.util.Objects; +import java.util.stream.Collectors; + +public class GemtextRendererFactory { + + public final String urlBase; + public final String docUrl; + + public GemtextRendererFactory(String urlBase, String docUrl) { + this.urlBase = Objects.requireNonNull(urlBase, "urlBase must not be null"); + this.docUrl = Objects.requireNonNull(docUrl, "docUrl must not be null"); + } + + public GemtextRendererFactory(String urlBase) { + this.urlBase = Objects.requireNonNull(urlBase, "urlBase must not be null"); + this.docUrl = null; + } + + public GemtextRendererFactory() { + this.urlBase = null; + this.docUrl = null; + } + + public GemtextRenderer htmlRendererEditable() { + return new GemtextRenderer(this::htmlHeadingEditable, + this::htmlLink, this::htmlList, + this::htmlPre, this::htmlQuote, + this::htmlText, this::htmlAside, + this::htmlTask, this::htmlLiteral, + this::htmlPragma); + } + + public GemtextRenderer htmlRendererReadOnly() { + return new GemtextRenderer(this::htmlHeadingReadOnly, + this::htmlLink, this::htmlList, + this::htmlPre, this::htmlQuote, + this::htmlText, this::htmlAside, + this::htmlTask, this::htmlLiteral, + this::htmlPragma); + } + + + public GemtextRenderer gemtextRendererAsIs() { + return new GemtextRenderer(this::rawHeading, + this::rawLink, this::rawList, + this::rawPre, this::rawQuote, + this::rawText, this::rawAside, + this::rawTask, this::rawLiteral, + this::rawPragma); + } + + + public GemtextRenderer gemtextRendererPublic() { + return new GemtextRenderer(this::rawHeading, + this::rawLink, this::rawList, + this::rawPre, this::rawQuote, + this::rawText, this::rawAside, + this::rawTask, this::rawLiteral, + this::rawSupressPragma); + } + + + private String htmlPragma(GemtextPragma gemtextPragma) { + return "\n"; + } + + public String htmlHeadingEditable(GemtextHeading g) { + if (docUrl == null) { + throw new UnsupportedOperationException("Wrong constructor used, need urlBase and docUrl"); + } +// String editLink = String.format("\nEdit\n", urlBase + docUrl, g.getLevel()); + + return htmlHeadingReadOnly(g); + } + + public String htmlHeadingReadOnly(GemtextHeading g) { + if (g.getLevel().getLevel() == 1) + return String.format("

%s

\n", g.getLevel(), sanitizeText(g.getName())); + if (g.getLevel().getLevel() == 2) + return String.format("

%s

\n", g.getLevel(), sanitizeText(g.getName())); + if (g.getLevel().getLevel() == 3) + return String.format("

%s

\n", g.getLevel(), sanitizeText(g.getName())); + + return String.format("

%s

\n", g.getLevel(), sanitizeText(g.getName())); + } + + public String htmlLink(GemtextLink g) { + if (urlBase == null) { + throw new UnsupportedOperationException("Wrong constructor used, need urlBase"); + } + final String linkClass = getLinkClass(g.getUrl()); + final String linkUrl = getLinkUrl(g.getUrl()).replaceFirst("^gemini://", "https://proxy.vulpes.one/gemini/"); + if (g.getTitle() != null) { + return String.format("
%s
%s
\n", + linkClass, linkUrl, g.getUrl(), sanitizeText(g.getTitle())); + } + else { + return String.format("%s
\n", + linkClass, linkUrl, g.getUrl()); + } + } + private String getLinkUrl(MemexUrl url) { + if (url instanceof MemexNodeUrl || url.getUrl().startsWith("/")) { + return urlBase + url; + } + return url.toString(); + } + + private String getLinkClass(MemexUrl url) { + if (url instanceof MemexNodeUrl) { + return "internal"; + } + return "external"; + } + public String htmlList(GemtextList g) { + return g.getItems() + .stream() + .map(s -> "
  • " + sanitizeText(s) + "
  • ") + .collect( + Collectors.joining("\n", "
      \n", "
    \n")); + } + + public String htmlPre(GemtextPreformat g) { + return g.getItems().stream() + .map(this::sanitizeText) + .collect( + Collectors.joining("\n", "
    \n", "
    \n")); + } + + public String htmlLiteral(GemtextTextLiteral g) { + return g.getItems().stream() + .map(this::sanitizeText) + .collect( + Collectors.joining("\n", "
    \n", "
    \n")); + } + public String htmlQuote(GemtextQuote g) { + return g.getItems().stream() + .map(this::sanitizeText) + .collect( + Collectors.joining("
    \n", "
    \n", "
    \n")); + + } + public String htmlText(GemtextText g) { + return sanitizeText(g.getLine()) + "
    \n"; + } + public String htmlAside(GemtextAside g) { + return "\n"; + } + + public String sanitizeText(String s) { + return s.replaceAll("<", "<").replaceAll(">", ">"); + } + + public String htmlTask(GemtextTask g) { + return String.format("
    %s %s
    \n", + g.getId(), + g.getState().style, + g.getId(), + "-".repeat(g.getLevel()), + g.getTask()); + } + + public String rawHeading(GemtextHeading g) { + if (g.getLevel().getLevel() == 1) + return "# " + g.getName(); + if (g.getLevel().getLevel() == 2) + return "## " + g.getName(); + if (g.getLevel().getLevel() == 3) + return "### " + g.getName(); + + return "### " + g.getName(); + } + + public String rawLink(GemtextLink g) { + if (g.getTitle() != null && !g.getTitle().isBlank()) { + return "=> " + g.getUrl().getUrl() + "\t" + g.getTitle(); + } + return "=> " + g.getUrl().getUrl(); + } + + public String rawList(GemtextList g) { + return g.getItems() + .stream() + .map(s -> "* " + s) + .collect(Collectors.joining("\n")); + } + + public String rawPre(GemtextPreformat g) { + return g.getItems().stream() + .collect(Collectors.joining("\n", "```\n", "\n```")); + } + + public String rawQuote(GemtextQuote g) { + return g.getItems().stream() + .map(s -> "> " + s) + .collect(Collectors.joining()); + + } + + public String rawText(GemtextText g) { + return g.getLine(); + } + + public String rawLiteral(GemtextTextLiteral g) { + return Strings.join(g.getItems(), '\n'); + } + + public String rawAside(GemtextAside g) { + return "(" + g.getLine() + ")"; + } + public String rawTask(GemtextTask g) { + return "-".repeat(Math.max(0, g.getLevel())) + " " + g.getTask(); + } + private String rawPragma(GemtextPragma gemtextPragma) { + return "%%% " + gemtextPragma.getLine(); + } + private String rawSupressPragma(GemtextPragma gemtextPragma) { + return ""; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/io/GeminiConnection.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/io/GeminiConnection.java new file mode 100644 index 00000000..6d032a2e --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/io/GeminiConnection.java @@ -0,0 +1,185 @@ +package nu.marginalia.gemini.io; + +import nu.marginalia.gemini.BadBotList; +import nu.marginalia.gemini.plugins.FileType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.net.ssl.SSLSocket; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URI; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Optional; +import java.util.stream.Stream; + +public class GeminiConnection { + private final SSLSocket connection; + + private final Logger logger = LoggerFactory.getLogger("Server"); + private final OutputStream os; + private final InputStream is; + private static final BadBotList badBotList = BadBotList.INSTANCE; + + public GeminiConnection(SSLSocket connection) throws IOException { + this.connection = connection; + + this.os = connection.getOutputStream(); + this.is = connection.getInputStream(); + + } + + public String getAddress() { + return connection.getInetAddress().getHostAddress(); + } + + public Optional readUrl() throws Exception { + + var str = new GeminiInput().get(); + if (!badBotList.isQueryPermitted(connection.getInetAddress(), str)) { + return Optional.empty(); + } + if (!str.isBlank()) { + return Optional.of(new URI(str)); + } + throw new GeminiUserException("Bad URI"); + } + + public void redirect(String address) throws IOException { + writeStatusLine(GeminiStatusCode.REDIRECT, address); + } + public void redirectPermanent(String address) throws IOException { + writeStatusLine(GeminiStatusCode.REDIRECT_PERMANENT, address); + } + public GeminiConnection writeStatusLine(int code, String meta) throws IOException { + write(String.format("%2d %s", code, meta)); + return this; + } + + public GeminiConnection writeBytes(byte[] data) throws IOException { + write(data); + return this; + } + + public GeminiConnection printf(String pattern, Object...args) throws IOException { + write(String.format(pattern, args)); + return this; + } + + public GeminiConnection writeLines(String... lines) throws IOException { + for (String s : lines) { + write(s); + } + return this; + } + public GeminiConnection writeLinesFromFile(Path file) throws IOException { + try (Stream lines = Files.lines(file)) { + lines.forEach(line -> { + try { + write(line); + } catch (IOException e) { + logger.error("IO Error", e); + } + }); + } + return this; + } + + public GeminiConnection acceptLines(Stream lines) { + lines.forEach(line -> { + try { + write(line); + } catch (IOException e) { + logger.error("IO exception", e); + } + }); + return this; + } + + private void write(String s) throws IOException { + os.write(s.getBytes(StandardCharsets.UTF_8)); + os.write(new byte[] { '\r', '\n'}); + } + + private void write(byte[] bs) throws IOException { + os.write(bs); + } + // This is a weird pattern but it makes the listing code very much cleaner + + public void error(String message) { + logger.error("{}", message); + + throw new GeminiUserException(message); + } + + public void close() { + try { + connection.shutdownOutput(); + connection.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + public boolean isConnected() { + return connection.isConnected(); + } + + public void respondWithFile(Path serverPath, FileType fileType) throws IOException { + if (fileType.binary) { + writeStatusLine(GeminiStatusCode.SUCCESS, fileType.mime) + .writeBytes(Files.readAllBytes(serverPath)); + } + else { + writeStatusLine(GeminiStatusCode.SUCCESS, fileType.mime) + .writeLinesFromFile(serverPath); + } + } + + public class GeminiInput { + private final byte[] buffer = new byte[1024]; + private int idx = 0; + + final String result; + + public GeminiInput() throws IOException { + + for (idx = 0; idx < buffer.length; idx++) { + if (hasEndOfLine()) { + result = new String(buffer, 0, idx-2, StandardCharsets.UTF_8); + return; + } + + readCharacter(); + } + + error("String too long"); + + // unreachable + result = ""; + } + + public String get() { + return result; + } + + private void readCharacter() throws IOException { + int rb = is.read(); + if (-1 == rb) { + error("URL incomplete (no CR LF)"); + } + buffer[idx] = (byte) rb; + } + + public boolean hasEndOfLine() { + return idx > 2 + && buffer[idx - 1] == (byte) '\n' + && buffer[idx - 2] == (byte) '\r'; + } + + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/io/GeminiSSLSetUp.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/io/GeminiSSLSetUp.java new file mode 100644 index 00000000..525515f3 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/io/GeminiSSLSetUp.java @@ -0,0 +1,49 @@ +package nu.marginalia.gemini.io; + +import com.google.inject.Inject; +import com.google.inject.name.Named; + +import javax.net.ssl.*; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.security.KeyStore; +import java.security.SecureRandom; + +public class GeminiSSLSetUp { + private final Path certPasswordFile; + private final Path certFile; + + @Inject + public GeminiSSLSetUp( + @Named("gemini-cert-file") Path certFile, + @Named("gemini-cert-password-file") Path certPasswordFile) { + this.certFile = certFile; + this.certPasswordFile = certPasswordFile; + } + public String getCertPassword() throws IOException { + return Files.readString(certPasswordFile); + } + + private SSLContext getContext() throws Exception { + KeyStore ks = KeyStore.getInstance("JKS", "SUN"); + ks.load(Files.newInputStream(certFile), getCertPassword().toCharArray()); + + KeyManagerFactory kmf = KeyManagerFactory.getInstance("SunX509"); + kmf.init(ks, getCertPassword().toCharArray()); + KeyManager[] keyManagers = kmf.getKeyManagers(); + + TrustManagerFactory tmf = TrustManagerFactory.getInstance("X509"); + tmf.init(ks); + TrustManager[] trustManagers = tmf.getTrustManagers(); + + var ctx = SSLContext.getInstance("TLSv1.3"); + ctx.init(keyManagers, trustManagers, new SecureRandom()); + return ctx; + } + + + public SSLServerSocketFactory getServerSocketFactory() throws Exception { + return getContext().getServerSocketFactory(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/io/GeminiStatusCode.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/io/GeminiStatusCode.java new file mode 100644 index 00000000..f201e331 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/io/GeminiStatusCode.java @@ -0,0 +1,11 @@ +package nu.marginalia.gemini.io; + +public class GeminiStatusCode { + public static final int INPUT = 10; + public static final int SUCCESS = 20; + public static final int ERROR_PERMANENT = 50; + public static final int ERROR_TEMPORARY = 40; + public static final int PROXY_ERROR = 43; + public static final int REDIRECT = 30; + public static final int REDIRECT_PERMANENT = 31; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/io/GeminiUserException.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/io/GeminiUserException.java new file mode 100644 index 00000000..937da4fe --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/io/GeminiUserException.java @@ -0,0 +1,8 @@ +package nu.marginalia.gemini.io; + +/** Throw to report message to user */ +public class GeminiUserException extends RuntimeException { + public GeminiUserException(String message) { + super(message); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/plugins/BareStaticPagePlugin.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/plugins/BareStaticPagePlugin.java new file mode 100644 index 00000000..b0d4fe05 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/plugins/BareStaticPagePlugin.java @@ -0,0 +1,53 @@ +package nu.marginalia.gemini.plugins; + +import com.google.inject.Inject; +import com.google.inject.name.Named; +import nu.marginalia.gemini.io.GeminiConnection; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URI; +import java.nio.file.Files; +import java.nio.file.Path; + +import static nu.marginalia.gemini.GeminiService.DEFAULT_FILENAME; + +public class BareStaticPagePlugin implements Plugin { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + private Path geminiServerRoot; + + @Inject + public BareStaticPagePlugin(@Named("gemini-server-root") Path geminiServerRoot) { + this.geminiServerRoot = geminiServerRoot; + } + + @Override + public boolean serve(URI url, GeminiConnection connection) throws IOException { + + final Path serverPath = getServerPath(url.getPath()); + + if (!Files.isRegularFile(serverPath)) { + return false; + } + + verifyPath(geminiServerRoot, serverPath); + logger.info("Serving {}", serverPath); + + connection.respondWithFile(serverPath, FileType.match(serverPath)); + + return true; + } + + private Path getServerPath(String requestPath) { + final Path serverPath = Path.of(geminiServerRoot + requestPath); + + if (Files.isDirectory(serverPath) && Files.isRegularFile(serverPath.resolve(DEFAULT_FILENAME))) { + return serverPath.resolve(DEFAULT_FILENAME); + } + + return serverPath; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/plugins/FileType.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/plugins/FileType.java new file mode 100644 index 00000000..587a9894 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/plugins/FileType.java @@ -0,0 +1,58 @@ +package nu.marginalia.gemini.plugins; + +import java.nio.file.Path; + +public enum FileType { + GMI("gmi", "text/gemini", FileIcons.DOCUMENT, false), + GEM("gem", "text/gemini", FileIcons.DOCUMENT, false), + TXT("txt", "text/plain", FileIcons.DOCUMENT, false), + MARKDOWN("md", "text/markdown", FileIcons.DOCUMENT, false), + JAVA("java", "text/java", FileIcons.JAVA, false), + PROPERTIES("properties", "text/properties", FileIcons.SETTINGS, false), + GRADLE("gradle", "text/gradle", FileIcons.SETTINGS, false), + ZIP("zip", "application/zip", FileIcons.ZIP, true), + PNG("png", "image/png", FileIcons.IMAGE, true), + JPG("jpg", "image/jpg", FileIcons.IMAGE, true), + JPEG("jpeg", "image/jpg", FileIcons.IMAGE, true), + BIN("bin", "application/binary", FileIcons.BINARY, true), + SH("sh", "text/sh", FileIcons.SETTINGS, false), + XML("xml", "text/xml", FileIcons.DOCUMENT, false), + DOCKERFILE("Dockerfile", "text/dockerfile", FileIcons.SETTINGS, false) + ; + + public static FileType match(String fileName) { + for (var type : values()) { + if (fileName.endsWith(type.suffix)) { + return type; + } + } + return BIN; + } + + public static FileType match(Path path) { + return match(path.toString()); + } + + FileType(String suffix, String mime, String icon, boolean binary) { + this.suffix = suffix; + this.mime = mime; + + this.icon = icon; + this.binary = binary; + } + public final String suffix; + public final String mime; + public final String icon; + public final boolean binary; + +} + +class FileIcons { + public static final String DOCUMENT = "🗒"; + public static final String JAVA = "♨"; + public static final String SETTINGS = "💻"; + public static final String ZIP = "🗜"; + public static final String IMAGE = "🖼"; + public static final String DIRECTORY = "🗂"; + public static final String BINARY = "📚"; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/plugins/Plugin.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/plugins/Plugin.java new file mode 100644 index 00000000..3765e1ca --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/plugins/Plugin.java @@ -0,0 +1,19 @@ +package nu.marginalia.gemini.plugins; + +import nu.marginalia.gemini.io.GeminiConnection; +import nu.marginalia.gemini.io.GeminiUserException; + +import java.io.IOException; +import java.net.URI; +import java.nio.file.Path; + +public interface Plugin { + /** @return true if content served */ + boolean serve(URI url, GeminiConnection connection) throws IOException; + + default void verifyPath(Path root, Path p) { + if (!p.normalize().startsWith(root)) { + throw new GeminiUserException("ಠ_ಠ That path is off limits!"); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/plugins/SearchPlugin.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/plugins/SearchPlugin.java new file mode 100644 index 00000000..e0122d75 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/plugins/SearchPlugin.java @@ -0,0 +1,78 @@ +package nu.marginalia.gemini.plugins; + +import com.google.inject.Inject; +import nu.marginalia.gemini.io.GeminiConnection; +import nu.marginalia.gemini.io.GeminiStatusCode; +import org.apache.http.HttpHost; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.conn.routing.HttpRoute; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +public class SearchPlugin implements Plugin { + private final PoolingHttpClientConnectionManager connectionManager; + private final Logger logger = LoggerFactory.getLogger(getClass()); + + @Inject + public SearchPlugin() { + + connectionManager = new PoolingHttpClientConnectionManager(); + connectionManager.setMaxTotal(200); + connectionManager.setDefaultMaxPerRoute(20); + HttpHost host = new HttpHost("https://search.marginalia.nu/"); + connectionManager.setMaxPerRoute(new HttpRoute(host), 20); + } + + @Override + public boolean serve(URI url, GeminiConnection connection) throws IOException { + var client = HttpClients.custom() + .setConnectionManager(connectionManager) + .build(); + + if (!"/search".equals(url.getPath())) { + return false; + } + + String query = url.getRawQuery(); + + if (null == query || "".equals(query)) { + logger.info("Requesting search terms"); + connection.writeStatusLine(GeminiStatusCode.INPUT, "Please enter a search query"); + } + else { + logger.info("Delegating search query '{}'", query); + + final HttpGet get = new HttpGet(createSearchUri(query)); + final byte[] binaryResponse; + + try (var rsp = client.execute(get)) { + binaryResponse = rsp.getEntity().getContent().readAllBytes(); + } + catch (IOException ex) { + logger.error("backend error", ex); + + connection.writeStatusLine(GeminiStatusCode.PROXY_ERROR, "Failed to reach backend server"); + return true; + } + + connection + .writeStatusLine(GeminiStatusCode.SUCCESS, "text/gemini") + .writeBytes(binaryResponse); + } + return true; + } + + private URI createSearchUri(String query) { + try { + return new URI("https://search.marginalia.nu/search?format=gmi&query="+query); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/ByteFolder.java b/marginalia_nu/src/main/java/nu/marginalia/util/ByteFolder.java new file mode 100644 index 00000000..0406e06c --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/util/ByteFolder.java @@ -0,0 +1,80 @@ +package nu.marginalia.util; + +public class ByteFolder { + + public byte[] foldBytes(int p, int q) { + + int pw = bitWidth(p); + int qw = bitWidth(q); + int qpw = qw + pw; + + long qp = Integer.toUnsignedLong(q) << pw | Integer.toUnsignedLong(p); + + int qpwBytes = ((qpw - 1) / Byte.SIZE) + 1; + + byte[] bytes = new byte[qpwBytes + 1]; + bytes[0] = (byte) pw; + for (int i = 1; i < bytes.length; i++) { + bytes[i] = (byte) (qp >>> (qpwBytes - i) * Byte.SIZE & 0xff); + } + + return bytes; + } + + // Function such that (decodeBytes o foldBytes) = identity + public static int[] decodeBytes(byte[] data) { + int[] dest = new int[2]; + decodeBytes(data, data.length, dest); + return dest; + } + + public static void decodeBytes(byte[] data, int length, int[] dest) { + long val = 0; + + for (int i = 1; i < length; i++) { + val = (val << 8) | ((0xFF)&data[i]); + } + + dest[1] = (int)(val >>> data[0]); + dest[0] = (int)(val & ~(dest[1]<= 0; i--) { + s.append((b[j] & (1L << i)) > 0 ? 1 : 0); + } + } + return s.toString(); + } + public static String intBits(int v) { + StringBuilder s = new StringBuilder(); + for (int i = 32; i >=0; i--) { + s.append((v & (1L << i)) > 0 ? 1 : 0); + } + return s.toString(); + } + public static String longBits(long v) { + StringBuilder s = new StringBuilder(); + for (int i = 64; i >=0; i--) { + s.append((v & (1L << i)) > 0 ? 1 : 0); + } + return s.toString(); + } + + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/FileSizeUtil.java b/marginalia_nu/src/main/java/nu/marginalia/util/FileSizeUtil.java new file mode 100644 index 00000000..de0cb17b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/util/FileSizeUtil.java @@ -0,0 +1,18 @@ +package nu.marginalia.util; + +public class FileSizeUtil { + public static String readableSize(long byteCount) { + if (byteCount < 1024L) { + return String.format("%db", byteCount); + } + if (byteCount < 1024*1024L) { + return String.format("%2.2fKb", byteCount/1024.); + } + if (byteCount < 1024*1024*1024L) { + return String.format("%2.2fMb", byteCount/1024/1024.); + } + + return String.format("%2.2fGb", byteCount/1024/1024L/1024.); + + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/ParallelPipe.java b/marginalia_nu/src/main/java/nu/marginalia/util/ParallelPipe.java new file mode 100644 index 00000000..fd1ae119 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/util/ParallelPipe.java @@ -0,0 +1,101 @@ +package nu.marginalia.util; + +import lombok.SneakyThrows; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; + +public abstract class ParallelPipe { + private final LinkedBlockingQueue inputs; + private final LinkedBlockingQueue intermediates; + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private final List processThreads = new ArrayList<>(); + private final Thread receiverThread; + + private volatile boolean expectingInput = true; + private volatile boolean expectingOutput = true; + + public ParallelPipe(String name, int numberOfThreads, int inputQueueSize, int intermediateQueueSize) { + inputs = new LinkedBlockingQueue<>(inputQueueSize); + intermediates = new LinkedBlockingQueue<>(intermediateQueueSize); + + for (int i = 0; i < numberOfThreads; i++) { + processThreads.add(new Thread(this::runProcessThread, name + "-process["+i+"]")); + } + receiverThread = new Thread(this::runReceiverThread, name + "-receiver"); + + processThreads.forEach(Thread::start); + receiverThread.start(); + } + + public void clearQueues() { + inputs.clear(); + intermediates.clear(); + } + + @SneakyThrows + private void runProcessThread() { + while (expectingInput || !inputs.isEmpty()) { + var in = inputs.poll(1, TimeUnit.SECONDS); + + if (in != null) { + try { + var ret = onProcess(in); + if (ret != null) { + intermediates.put(ret); + } + } + catch (InterruptedException ex) { + throw ex; + } + catch (Exception ex) { + logger.error("Exception", ex); + } + + } + } + + logger.debug("Terminating {}", Thread.currentThread().getName()); + } + @SneakyThrows + private void runReceiverThread() { + while (expectingOutput || !inputs.isEmpty() || !intermediates.isEmpty()) { + var intermediate = intermediates.poll(997, TimeUnit.MILLISECONDS); + if (intermediate != null) { + try { + onReceive(intermediate); + } + catch (Exception ex) { + logger.error("Exception", ex); + } + } + } + + logger.info("Terminating {}", Thread.currentThread().getName()); + } + + @SneakyThrows + public void accept(INPUT input) { + inputs.put(input); + } + + protected abstract INTERMEDIATE onProcess(INPUT input) throws Exception; + protected abstract void onReceive(INTERMEDIATE intermediate) throws Exception; + + public void join() throws InterruptedException { + expectingInput = false; + + for (var thread : processThreads) { + thread.join(); + } + + expectingOutput = false; + receiverThread.join(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/PrimeUtil.java b/marginalia_nu/src/main/java/nu/marginalia/util/PrimeUtil.java new file mode 100644 index 00000000..0effbde9 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/util/PrimeUtil.java @@ -0,0 +1,41 @@ +package nu.marginalia.util; + +// This is not a fast way of finding primes +public class PrimeUtil { + + public static long nextPrime(long start, long step) { + if (isDivisible(start, 2)) { + start = start + step; + } + + long val; + for (val = start; !isPrime(val); val += 2*step) {} + return val; + } + + public static boolean isPrime(long v) { + if (v <= 2) { + return true; + } + if ((v & 1) == 0) { + return false; + } + for (long t = 3; t <= v/3; t++) { + if ((v % t) == 0) { + return false; + } + } + return true; + } + + public static boolean isDivisible(long a, long b) { + if (a == 0 || b == 0) { + return false; + } + + if (a > b) { + return (a % b) == 0; + } + return (b % a) == 0; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/RandomWriteFunnel.java b/marginalia_nu/src/main/java/nu/marginalia/util/RandomWriteFunnel.java new file mode 100644 index 00000000..94c1d3f4 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/util/RandomWriteFunnel.java @@ -0,0 +1,139 @@ +package nu.marginalia.util; + +import io.prometheus.client.Gauge; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; + +/** For managing random writes on SSDs + * + * See https://en.wikipedia.org/wiki/Write_amplification + * */ +public class RandomWriteFunnel implements AutoCloseable { + + private final static Gauge write_rate = Gauge.build("wmsa_rwf_write_bytes", "Bytes/s") + .register(); + private final static Gauge transfer_rate = Gauge.build("wmsa_rwf_transfer_bytes", "Bytes/s") + .register(); + private static final Logger logger = LoggerFactory.getLogger(RandomWriteFunnel.class); + private DataBin[] bins; + + private final int binSize; + + public RandomWriteFunnel(Path tempDir, long size, int binSize) throws IOException { + this.binSize = binSize; + + if (size > 0) { + int binCount = (int) (size / binSize + ((size % binSize) != 0L ? 1 : 0)); + bins = new DataBin[binCount]; + for (int i = 0; i < binCount; i++) { + bins[i] = new DataBin(tempDir, (int) Math.min(size - binSize * i, binSize)); + } + } + else { + bins = new DataBin[0]; + } + } + + public void put(long address, long data) throws IOException { + bins[((int)(address / binSize))].put((int)(address%binSize), data); + } + + public void write(FileChannel o) throws IOException { + ByteBuffer buffer = ByteBuffer.allocateDirect(binSize*8); + logger.debug("Writing from RWF"); + + for (int i = 0; i < bins.length; i++) { + var bin = bins[i]; + buffer.clear(); + bin.eval(buffer); + + while (buffer.hasRemaining()) { + int wb = o.write(buffer); + write_rate.set(wb); + } + } + logger.debug("Done"); + } + + @Override + public void close() throws IOException { + for (DataBin bin : bins) { + bin.close(); + } + } + + static class DataBin implements AutoCloseable { + private final ByteBuffer buffer; + private int size; + private final FileChannel channel; + private final File file; + + DataBin(Path tempDir, int size) throws IOException { + buffer = ByteBuffer.allocateDirect(360_000); + this.size = size; + file = Files.createTempFile(tempDir, "scatter-writer", ".dat").toFile(); + channel = new RandomAccessFile(file, "rw").getChannel(); + } + + void put(int address, long data) throws IOException { + buffer.putInt(address); + buffer.putLong(data); + + if (buffer.capacity() - buffer.position() < 12) { + flushBuffer(); + } + } + + private void flushBuffer() throws IOException { + if (buffer.position() == 0) + return; + + buffer.flip(); + while (channel.write(buffer) > 0); + buffer.clear(); + } + + private void eval(ByteBuffer dest) throws IOException { + flushBuffer(); + + channel.position(0); + buffer.clear(); + dest.clear(); + for (int i = 0; i < size; i++) { + dest.putLong(0L); + } + dest.position(0); + dest.limit(size*8); + while (channel.position() < channel.size()) { + int rb = channel.read(buffer); + if (rb < 0) { + break; + } + else { + transfer_rate.set(rb); + } + buffer.flip(); + while (buffer.limit() - buffer.position() >= 12) { + int addr = buffer.getInt(); + long data = buffer.getLong(); + dest.putLong(8*addr, data); + } + buffer.compact(); + } + } + + @Override + public void close() throws IOException { + channel.close(); + file.delete(); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/SeekDictionary.java b/marginalia_nu/src/main/java/nu/marginalia/util/SeekDictionary.java new file mode 100644 index 00000000..a49544e4 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/util/SeekDictionary.java @@ -0,0 +1,73 @@ +package nu.marginalia.util; + +import gnu.trove.list.array.TIntArrayList; + +import java.util.ArrayList; +import java.util.function.ToIntFunction; + +public abstract class SeekDictionary { + private final ArrayList banks = new ArrayList<>(); + private final TIntArrayList offsets = new TIntArrayList(); + + public static SeekDictionary of(ToIntFunction length) { + return new SeekDictionary() { + @Override + public int length(T obj) { + return length.applyAsInt(obj); + } + }; + } + public T last() { + return banks.get(banks.size()-1); + } + public int lastStart() { + return offsets.get(offsets.size()-1); + } + + public abstract int length(T obj); + public int end() { + if (banks.isEmpty()) return 0; + + return (offsets.getQuick(offsets.size()-1) + length(last())); + } + + public void add(T obj) { + + if (banks.isEmpty()) { + banks.add(obj); + offsets.add(0); + } + else { + offsets.add(end()); + banks.add(obj); + } + } + + public T bankForOffset(int offset) { + return banks.get(idxForOffset(offset)); + } + + public int idxForOffset(int offset) { + + int high = offsets.size() - 1; + int low = 0; + + while ( low <= high ) { + int mid = ( low + high ) >>> 1; + int midVal = offsets.getQuick(mid); + + if ( midVal < offset ) { + low = mid + 1; + } + else if ( midVal > offset ) { + high = mid - 1; + } + else { + return mid; + } + } + return low-1; + + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/btree/BTreeReader.java b/marginalia_nu/src/main/java/nu/marginalia/util/btree/BTreeReader.java new file mode 100644 index 00000000..ec8f204b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/util/btree/BTreeReader.java @@ -0,0 +1,104 @@ +package nu.marginalia.util.btree; + +import nu.marginalia.util.btree.model.BTreeContext; +import nu.marginalia.util.btree.model.BTreeHeader; +import nu.marginalia.util.multimap.MultimapFileLong; +import nu.marginalia.util.multimap.MultimapSearcher; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class BTreeReader { + + private final MultimapFileLong file; + private final BTreeContext ctx; + private final Logger logger = LoggerFactory.getLogger(BTreeReader.class); + private final long mask; + private final MultimapSearcher searcher; + + public BTreeReader(MultimapFileLong file, BTreeContext ctx) { + this.file = file; + this.searcher = file.createSearcher(); + this.ctx = ctx; + this.mask = ctx.equalityMask(); + } + + public long fileSize() { + return file.size(); + } + + public BTreeHeader getHeader(long offset) { + return new BTreeHeader(file.get(offset), file.get(offset+1), file.get(offset+2)); + } + + public long offsetForEntry(BTreeHeader header, final long keyRaw) { + final long key = keyRaw & mask; + + if (header.layers() == 0) { + return trivialSearch(header, key); + } + + long p = searchEntireTopLayer(header, key); + if (p < 0) return -1; + + long cumOffset = p * ctx.BLOCK_SIZE_WORDS(); + for (int i = header.layers() - 2; i >= 0; --i) { + long offsetBase = header.indexOffsetLongs() + header.relativeLayerOffset(ctx, i); + p = searchLayerBlock(key, offsetBase+cumOffset); + if (p < 0) + return -1; + cumOffset = ctx.BLOCK_SIZE_WORDS()*(p + cumOffset); + } + + long dataMax = header.dataOffsetLongs() + (long) header.numEntries() * ctx.entrySize(); + return searchDataBlock(key, + header.dataOffsetLongs() + ctx.entrySize()*cumOffset, + dataMax); + } + + + private long searchEntireTopLayer(BTreeHeader header, long key) { + long offset = header.indexOffsetLongs(); + + return searcher.binarySearchUpperBound(key, offset, offset + ctx.BLOCK_SIZE_WORDS()) - offset; + } + + private long searchLayerBlock(long key, long blockOffset) { + if (blockOffset < 0) + return blockOffset; + + return searcher.binarySearchUpperBound(key, blockOffset, blockOffset + ctx.BLOCK_SIZE_WORDS()) - blockOffset; + } + + + private long searchDataBlock(long key, long blockOffset, long dataMax) { + if (blockOffset < 0) + return blockOffset; + + long lastOffset = Math.min(blockOffset+ctx.BLOCK_SIZE_WORDS()*(long)ctx.entrySize(), dataMax); + int length = (int)(lastOffset - blockOffset); + + if (ctx.entrySize() == 1) { + if (mask == ~0L) return searcher.binarySearchUpperBoundNoMiss(key, blockOffset, blockOffset+length); + return searcher.binarySearchUpperBoundNoMiss(key, blockOffset, blockOffset+length, mask); + } + + return searcher.binarySearchUpperBoundNoMiss(key, blockOffset, ctx.entrySize(), length/ctx.entrySize(), mask); + } + + private long trivialSearch(BTreeHeader header, long key) { + long offset = header.dataOffsetLongs(); + + if (ctx.entrySize() == 1) { + if (mask == ~0L) { + return searcher.binarySearchUpperBoundNoMiss(key, offset, offset+header.numEntries()); + } + else { + return searcher.binarySearchUpperBoundNoMiss(key, offset, offset+header.numEntries(), mask); + } + } + + return searcher.binarySearchUpperBoundNoMiss(key, offset, ctx.entrySize(), header.numEntries(), mask); + + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/btree/BTreeWriter.java b/marginalia_nu/src/main/java/nu/marginalia/util/btree/BTreeWriter.java new file mode 100644 index 00000000..28ac4914 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/util/btree/BTreeWriter.java @@ -0,0 +1,110 @@ +package nu.marginalia.util.btree; + +import nu.marginalia.util.btree.model.BTreeContext; +import nu.marginalia.util.btree.model.BTreeHeader; +import nu.marginalia.util.multimap.MultimapFileLong; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + + +public class BTreeWriter { + private final Logger logger = LoggerFactory.getLogger(BTreeWriter.class); + private final BTreeContext ctx; + private final MultimapFileLong map; + + public BTreeWriter(MultimapFileLong map, BTreeContext ctx) { + this.map = map; + this.ctx = ctx; + } + + private static long indexSize(BTreeContext ctx, int numWords, int numLayers) { + if (numLayers == 0) { + return 0; // Special treatment for small tables + } + + long size = 0; + for (int layer = 0; layer < numLayers; layer++) { + size += ctx.layerSize(numWords, layer); + } + return size; + } + + public long write(long offset, int numEntries, WriteCallback writeIndex) + throws IOException + { + var header = makeHeader(offset, numEntries); + + header.write(map, offset); + writeIndex.write(header.dataOffsetLongs()); + + if (header.layers() < 1) { + return ctx.calculateSize(numEntries); + } + + writeIndex(header); + + return ctx.calculateSize(numEntries); + } + + public static BTreeHeader makeHeader(BTreeContext ctx, long offset, int numEntries) { + final int numLayers = ctx.numLayers(numEntries); + + final int padding = BTreeHeader.getPadding(ctx, offset, numLayers); + + final long indexOffset = offset + BTreeHeader.BTreeHeaderSizeLongs + padding; + final long dataOffset = indexOffset + indexSize(ctx, numEntries, numLayers); + + return new BTreeHeader(numLayers, numEntries, indexOffset, dataOffset); + } + + public BTreeHeader makeHeader(long offset, int numEntries) { + return makeHeader(ctx, offset, numEntries); + } + + + private void writeIndex(BTreeHeader header) { + var layerOffsets = getRelativeLayerOffsets(header); + + long stride = ctx.BLOCK_SIZE_WORDS(); + for (int layer = 0; layer < header.layers(); layer++, + stride*=ctx.BLOCK_SIZE_WORDS()) { + long indexWord = 0; + long offsetBase = layerOffsets[layer] + header.indexOffsetLongs(); + long numEntries = header.numEntries(); + for (long idx = 0; idx < numEntries; idx += stride, indexWord++) { + long dataOffset = header.dataOffsetLongs() + (idx + (stride-1)) * ctx.entrySize(); + long val; + + if (idx + (stride-1) < numEntries) { + val = map.get(dataOffset) & ctx.equalityMask(); + } + else { + val = Long.MAX_VALUE; + } + if (offsetBase + indexWord < 0) { + logger.error("bad put @ {}", offsetBase + indexWord); + logger.error("layer{}", layer); + logger.error("layer offsets {}", layerOffsets); + logger.error("offsetBase = {}", offsetBase); + logger.error("numEntries = {}", numEntries); + logger.error("indexWord = {}", indexWord); + } + map.put(offsetBase + indexWord, val); + } + for (; (indexWord % ctx.BLOCK_SIZE_WORDS()) != 0; indexWord++) { + map.put(offsetBase + indexWord, Long.MAX_VALUE); + } + } + + } + + private long[] getRelativeLayerOffsets(BTreeHeader header) { + long[] layerOffsets = new long[header.layers()]; + for (int i = 0; i < header.layers(); i++) { + layerOffsets[i] = header.relativeLayerOffset(ctx, i); + } + return layerOffsets; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/btree/WriteCallback.java b/marginalia_nu/src/main/java/nu/marginalia/util/btree/WriteCallback.java new file mode 100644 index 00000000..70bd8132 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/util/btree/WriteCallback.java @@ -0,0 +1,7 @@ +package nu.marginalia.util.btree; + +import java.io.IOException; + +public interface WriteCallback { + void write(long offset) throws IOException; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/btree/model/BTreeContext.java b/marginalia_nu/src/main/java/nu/marginalia/util/btree/model/BTreeContext.java new file mode 100644 index 00000000..4655946c --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/util/btree/model/BTreeContext.java @@ -0,0 +1,56 @@ +package nu.marginalia.util.btree.model; + +import nu.marginalia.util.btree.BTreeWriter; + +public record BTreeContext(int MAX_LAYERS, + int entrySize, + long equalityMask, + int BLOCK_SIZE_BITS, + int BLOCK_SIZE_WORDS) { + + public BTreeContext(int MAX_LAYERS, int entrySize, long equalityMask, int BLOCK_SIZE_BITS) { + this(MAX_LAYERS, entrySize, equalityMask, BLOCK_SIZE_BITS, 1 << BLOCK_SIZE_BITS); + + } + + public long calculateSize(int numEntries) { + var header = BTreeWriter.makeHeader(this, 0, numEntries); + + return header.dataOffsetLongs() + (long)numEntries * entrySize; + } + + public int numLayers(int numEntries) { + if (numEntries <= BLOCK_SIZE_WORDS*2) { + return 0; + } + for (int i = 1; i < MAX_LAYERS; i++) { + long div = (1L << (BLOCK_SIZE_BITS*i)); + long frq = numEntries / div; + if (frq < (1L << BLOCK_SIZE_BITS)) { + if (numEntries == (numEntries & div)) { + return i; + } + return i+1; + } + } + return MAX_LAYERS; + } + + public long layerSize(int numEntries, int level) { + return BLOCK_SIZE_WORDS * numBlocks(numEntries, level); + } + + private long numBlocks(int numWords, int level) { + + long layerSize = 1L<<(BLOCK_SIZE_BITS*(level+1)); + int numBlocks = 0; + + numBlocks += numWords / layerSize; + if (numWords % layerSize != 0) { + numBlocks++; + } + + return numBlocks; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/btree/model/BTreeHeader.java b/marginalia_nu/src/main/java/nu/marginalia/util/btree/model/BTreeHeader.java new file mode 100644 index 00000000..17dae46a --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/util/btree/model/BTreeHeader.java @@ -0,0 +1,46 @@ +package nu.marginalia.util.btree.model; + +import nu.marginalia.util.multimap.MultimapFileLong; + +public record BTreeHeader(int layers, int numEntries, long indexOffsetLongs, long dataOffsetLongs) { + public BTreeHeader { + assert (layers >= 0); + assert (numEntries >= 0); + assert (indexOffsetLongs >= 0); + assert (dataOffsetLongs >= 0); + assert (dataOffsetLongs >= indexOffsetLongs); + } + + public static int BTreeHeaderSizeLongs = 3; + + public BTreeHeader(long a, long b, long c) { + this((int)(a >>> 32), (int)(a & 0xFFFF_FFFFL), b, c); + } + + public static int getPadding(BTreeContext ctx, long offset, int numLayers) { + final int padding; + if (numLayers == 0) { + padding = 0; + } + else { + padding = (int) (ctx.BLOCK_SIZE_WORDS() - ((offset + BTreeHeader.BTreeHeaderSizeLongs) % ctx.BLOCK_SIZE_WORDS())); + } + return padding; + } + + public void write(MultimapFileLong dest, long offset) { + dest.put(offset, ((long) layers << 32L) | ((long)numEntries & 0xFFFF_FFFFL)); + dest.put(offset+1, indexOffsetLongs); + dest.put(offset+2, dataOffsetLongs); + } + + + public long relativeLayerOffset(BTreeContext ctx, int n) { + long offset = 0; + for (int i = n+1; i < layers; i++) { + offset += ctx.layerSize( numEntries, i); + } + return offset; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/dict/DictionaryData.java b/marginalia_nu/src/main/java/nu/marginalia/util/dict/DictionaryData.java new file mode 100644 index 00000000..847259db --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/util/dict/DictionaryData.java @@ -0,0 +1,186 @@ +package nu.marginalia.util.dict; + +import nu.marginalia.util.SeekDictionary; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.ByteBuffer; +import java.util.Arrays; + +public class DictionaryData { + + private final int DICTIONARY_BANK_SIZE; + private static final Logger logger = LoggerFactory.getLogger(DictionaryData.class); + + private final SeekDictionary banks = SeekDictionary.of(DictionaryDataBank::getSize); + + public DictionaryData(int bankSize) { + DICTIONARY_BANK_SIZE = bankSize; + + banks.add(new DictionaryDataBank(0)); + } + + public int size() { + return banks.end(); + } + + public int add(byte[] data, int value) { + var activeBank = banks.last(); + int rb = activeBank.add(data, value); + + if (rb == -1) { + int end = activeBank.getEnd(); + logger.debug("Switching bank @ {}", end); + var newBank = new DictionaryDataBank(end); + rb = newBank.add(data, value); + + banks.add(newBank); + } + + return rb; + } + + + public byte[] getBytes(int offset) { + return banks.bankForOffset(offset).getBytes(offset); + } + public boolean keyEquals(int offset, byte[] data) { + return banks.bankForOffset(offset).keyEquals(offset, data); + } + + public int getValue(int offset) { + return banks.bankForOffset(offset).getValue(offset); + } + + public class DictionaryDataBank { + + private final int start_idx; + private final ByteBuffer data; + + private int size; + private int[] offset; + private int[] value; + + public DictionaryDataBank(int start_idx) { + this.start_idx = start_idx; + + data = ByteBuffer.allocateDirect(DICTIONARY_BANK_SIZE); + + offset = new int[DICTIONARY_BANK_SIZE/16]; + value = new int[DICTIONARY_BANK_SIZE/16]; + size = 0; + } + + public int getStart() { + return start_idx; + } + + public int getEnd() { + return start_idx + size; + } + + public byte[] getBytes(int idx) { + if (idx < start_idx || idx - start_idx >= size) { + throw new IndexOutOfBoundsException(idx); + } + + idx = idx - start_idx; + + final int start; + final int end = offset[idx]; + + if (idx == 0) start = 0; + else start = offset[idx-1]; + + byte[] dst = new byte[end-start]; + data.get(start, dst); + return dst; + } + + public int getValue(int idx) { + if (idx < start_idx || idx - start_idx >= size) { + throw new IndexOutOfBoundsException(idx); + } + return value[idx - start_idx]; + } + + public boolean keyEquals(int idx, byte[] data) { + if (idx < start_idx || idx - start_idx >= size) { + throw new IndexOutOfBoundsException(idx); + } + + idx = idx - start_idx; + int start; + int end = offset[idx]; + + if (idx == 0) { + start = 0; + } + else { + start = offset[idx-1]; + } + if (data.length != end - start) { + return false; + } + for (int i = 0; i < data.length; i++) { + if (this.data.get(start + i) != data[i]) { + return false; + } + } + return true; + } + + public long longHashCode(int idx) { + if (idx < start_idx || idx - start_idx >= size) { + throw new IndexOutOfBoundsException(idx); + } + + idx = idx - start_idx; + int start; + int end = offset[idx]; + + if (idx == 0) { + start = 0; + } + else { + start = offset[idx-1]; + } + + long result = 1; + for (int i = start; i < end; i++) + result = 31 * result + data.get(i); + + return result; + } + + public int add(byte[] newData, int newValue) { + if (size == offset.length) { + logger.debug("Growing bank from {} to {}", offset.length, offset.length*2); + offset = Arrays.copyOf(offset, offset.length*2); + value = Arrays.copyOf(value, value.length*2); + } + + if (size > 0 && offset[size-1]+newData.length >= DICTIONARY_BANK_SIZE) { + if (offset.length > size+1) { + logger.debug("Shrinking bank from {} to {}", offset.length, size - 1); + offset = Arrays.copyOf(offset, size + 1); + value = Arrays.copyOf(value, size + 1); + } + return -1; // Full + } + + int dataOffset = size > 0 ? offset[size-1] : 0; + + data.put(dataOffset, newData); + + offset[size] = dataOffset + newData.length; + value[size] = newValue; + + return start_idx + size++; + } + + public int getSize() { + return size; + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/dict/DictionaryHashMap.java b/marginalia_nu/src/main/java/nu/marginalia/util/dict/DictionaryHashMap.java new file mode 100644 index 00000000..57a8b6f3 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/util/dict/DictionaryHashMap.java @@ -0,0 +1,208 @@ +package nu.marginalia.util.dict; + +import io.prometheus.client.Gauge; +import nu.marginalia.util.PrimeUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.ByteBuffer; +import java.nio.IntBuffer; +import java.util.concurrent.atomic.AtomicInteger; + +import static java.lang.Math.round; +import static nu.marginalia.util.FileSizeUtil.readableSize; + +/** + * Spiritually influenced by GNU Trove's hash maps + * LGPL 2.1 + */ +public class DictionaryHashMap { + private static final Logger logger = LoggerFactory.getLogger(DictionaryHashMap.class); + private static final Gauge probe_count_metrics + = Gauge.build("wmsa_dictionary_hash_map_probe_count", "Probing Count") + .register(); + + private final int bufferCount; + private final IntBuffer[] buffers; + public static final int NO_VALUE = Integer.MIN_VALUE; + + private final DictionaryData dictionaryData; + + private final long hashTableSize; + private final int bufferSizeBytes; + private final int intsPerBuffer; + private final long maxProbeLength; + + private AtomicInteger sz = new AtomicInteger(0); + + public DictionaryHashMap(long sizeMemory) { + final int intSize = 4; + + bufferCount = 1 + (int) ((intSize*sizeMemory) / (1<<30)); + buffers = new IntBuffer[bufferCount]; + + // Actually use a prime size for Donald Knuth reasons + hashTableSize = PrimeUtil.nextPrime(sizeMemory, -1); + + intsPerBuffer = 1 + (int)(sizeMemory/ bufferCount); + bufferSizeBytes = intSize*intsPerBuffer; + maxProbeLength = sizeMemory/10; + + logger.info("Allocating dictionary hash map of size {}, capacity: {}", + readableSize((long) bufferCount * bufferSizeBytes), + hashTableSize); + + logger.info("available-size:{} memory-size:{} buffer-count: {}, buffer-size:{} ints-per-buffer:{} max-probe-length:{}", + hashTableSize, sizeMemory, bufferCount, bufferSizeBytes, intsPerBuffer, maxProbeLength); + + if (((long) bufferCount * intsPerBuffer) < sizeMemory) { + logger.error("Buffer memory is less than requested memory: {}*{} = {} < {}; this data structure is not safe to use", + bufferCount, + bufferSizeBytes, (long) bufferCount * bufferSizeBytes, + sizeMemory); + throw new Error("Irrecoverable logic error"); + } + else { + logger.debug("Buffer size sanity checked passed"); + } + + + dictionaryData = new DictionaryData(Math.min(1<<30, Math.max(32, (int)(sizeMemory/4)))); + + initializeBuffers(); + } + + private void initializeBuffers() { + for (int b = 0; b < bufferCount; b++) { + buffers[b] = ByteBuffer.allocateDirect(bufferSizeBytes).asIntBuffer(); + + for (int i = 0; i < intsPerBuffer; i++) { + buffers[b].put(i, NO_VALUE); + } + } + } + + public int memSz() { + return dictionaryData.size(); + } + public int size() { + return sz.get(); + } + + private int getCell(long idx) { + int buffer = (int)(idx / intsPerBuffer); + int bufferIdx = (int)(idx % intsPerBuffer); + return buffers[buffer].get(bufferIdx); + } + private void setCell(long idx, int val) { + int buffer = (int)(idx / intsPerBuffer); + int bufferIdx = (int)(idx % intsPerBuffer); + + buffers[buffer].put(bufferIdx, val); + } + + public int put(byte[] data, int value) { + + long hash = longHash(data) & 0x7FFF_FFFF_FFFF_FFFFL; + + long idx = hash % hashTableSize; + + if (getCell(idx) == NO_VALUE) { + return setValue(data, value, idx); + } + + return putRehash(data, value, idx, hash); + } + + private int putRehash(byte[] data, int value, long idx, long hash) { + final long pStride = 1 + (hash % (hashTableSize - 2)); + + for (long j = 1; j < maxProbeLength; j++) { + idx = idx - pStride; + + if (idx < 0) { + idx += hashTableSize; + } + + final int val = getCell(idx); + + if (val == NO_VALUE) { + probe_count_metrics.set(j); + + return setValue(data, value, idx); + } + else if (dictionaryData.keyEquals(val, data)) { + return val; + } + } + + throw new IllegalStateException("DictionaryHashMap full @ size " + size() + "/" + hashTableSize + ", " + round((100.0*size()) / hashTableSize) + "%"); + } + + private int setValue(byte[] data, int value, long cell) { + sz.incrementAndGet(); + + int di = dictionaryData.add(data, value); + setCell(cell, di); + return di; + } + + public int get(byte[] data) { + final long hash = longHash(data) & 0x7FFF_FFFF_FFFF_FFFFL; + final long cell = hash % hashTableSize; + + if (getCell(cell) == NO_VALUE) { + return NO_VALUE; + } + else { + int val = getCell(cell); + + if (dictionaryData.keyEquals(val, data)) { + return dictionaryData.getValue(val); + } + } + + return getRehash(data, cell, hash); + } + + private int getRehash(byte[] data, long idx, long hash) { + final long pStride = 1 + (hash % (hashTableSize - 2)); + + for (long j = 1; j < maxProbeLength; j++) { + idx = idx - pStride; + + if (idx < 0) { + idx += hashTableSize; + } + + final var val = getCell(idx); + + if (val == NO_VALUE) { + return NO_VALUE; + } + else if (dictionaryData.keyEquals(val, data)) { + return dictionaryData.getValue(val); + } + } + + throw new IllegalStateException("DictionaryHashMap full @ size " + size() + "/" + hashTableSize + ", " + round((100.0*size()) / hashTableSize) + "%"); + } + + private long longHash(byte[] bytes) { + if (bytes == null) + return 0; + + // https://cp-algorithms.com/string/string-hashing.html + int p = 127; + long m = (1L<<61)-1; + long p_power = 1; + long hash_val = 0; + + for (byte element : bytes) { + hash_val = (hash_val + (element+1) * p_power) % m; + p_power = (p_power * p) % m; + } + return hash_val; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/graphics/dithering/FloydSteinbergDither.java b/marginalia_nu/src/main/java/nu/marginalia/util/graphics/dithering/FloydSteinbergDither.java new file mode 100644 index 00000000..59d0f848 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/util/graphics/dithering/FloydSteinbergDither.java @@ -0,0 +1,243 @@ +package nu.marginalia.util.graphics.dithering; + +import lombok.AllArgsConstructor; +import net.sf.image4j.util.ConvertUtil; +import org.imgscalr.Scalr; + +import java.awt.image.BufferedImage; +import java.awt.image.IndexColorModel; +import java.util.Arrays; +import java.util.Comparator; + +public class FloydSteinbergDither { + private final Color[] palette; + + private final int maxWidth; + private final int maxHeight; + + public FloydSteinbergDither(int[] colors, int maxWidth, int maxHeight) { + this.maxWidth = maxWidth; + this.maxHeight = maxHeight; + + palette = Arrays.stream(colors) + .mapToObj(Color::new) + .toArray(Color[]::new); + } + + public BufferedImage convert(BufferedImage src) { + BufferedImage out = dither(resize(src)); + + if (palette.length <= 16) { + int[] cmap = new int[palette.length]; + for (int i = 0; i < palette.length; i++) { + cmap[i] = palette[i].toInt(); + } + return ConvertUtil.convert4(out, cmap); + } + return out; + } + + private BufferedImage dither(BufferedImage in) { + + Errors errors = new Errors(in.getWidth(), in.getHeight()); + + final BufferedImage out = createOutBuffer(in); + + for (int y = 0; y < in.getHeight(); y++) { + for (int x = 0; x < in.getWidth(); x++) { + setOutPixel(errors, out, in, x, y, 1); + } + if (++y >= in.getHeight()) { + break; + } + for (int x = in.getWidth()-1; x >= 0; x--) { + setOutPixel(errors, out, in, x, y, -1); + } + } + return out; + } + + private void setOutPixel(Errors errors, BufferedImage out, BufferedImage in, int x, int y, int dx) { + final Color color = new Color(in.getRGB(x, y)); + final Color adjustedColor = errors.adjust(color, x, y); + + final int newColor = getNearestColorAndDiffuseError(errors, + x, dx, y, + adjustedColor, color); + + out.setRGB(x, y, newColor); + } + + private BufferedImage createOutBuffer(BufferedImage in) { + + var indexModel = createIndexColorModel(); + + return new BufferedImage(indexModel, + indexModel.createCompatibleWritableRaster(in.getWidth(), in.getHeight()), + false, null); + + } + + private BufferedImage resize(BufferedImage src) { + if (maxWidth < 0 || maxHeight < 0) { + return src; + } + final int width = src.getWidth(); + final int height = src.getHeight(); + + double scaleF = Math.min(scaleFactor(width, maxWidth), + scaleFactor(height, maxHeight)); + + if (scaleF < 1.0) { + int newWidth = (int)Math.min(maxWidth, scaleF * width); + int newHeight = (int)Math.min(maxHeight, scaleF * height); + + return Scalr.resize(src, + Scalr.Method.QUALITY, + Scalr.Mode.AUTOMATIC, + newWidth, newHeight); + } + + return src; + } + + private double scaleFactor(int actualValue, int desiredValue) { + if (actualValue <= desiredValue) { + return 1.; + } + return desiredValue / (double) actualValue; + } + + private IndexColorModel createIndexColorModel() { + byte[] reds = new byte[palette.length]; + byte[] greens = new byte[palette.length]; + byte[] blues = new byte[palette.length]; + + for (int i = 0; i < palette.length; i++) { + int colorInt = palette[i].toInt(); + + reds[i] = (byte) ((colorInt >>> 16) & 0xFF); + greens[i] = (byte) ((colorInt >>> 8) & 0xFF); + blues[i] = (byte) ((colorInt) & 0xFF); + } + + return new IndexColorModel(getPaletteBits(palette), palette.length, reds, greens, blues); + + } + + private int getPaletteBits(Color[] palette) { + if (palette.length <= 16) { + return 4; + } + else { + return 8; + } + } + + private int getNearestColorAndDiffuseError(Errors errors, int x, int dx, int y, Color color, Color colorOrig) { + + var match = Arrays.stream(palette).min(Comparator.comparing(c -> c.delta(color))); + assert match.isPresent(); + + var retC = match.get(); + var error = colorOrig.minus(retC); + + errors.add(x+dx, y, error.scale(7/16.)); + errors.add(x+dx, y+1, error.scale(1/16.)); + errors.add(x, y+1, error.scale(5/16.)); + errors.add(x-dx, y+1, error.scale(3/16.)); + + return retC.toInt(); + } +} + +class Errors { + private final int width; + private final int height; + private final Color[] errors; + + Errors(int width, int height) { + this.width = width; + this.height = height; + + errors = new Color[width * height]; + } + + public void add(int x, int y, Color color) { + if (x > 0 && y > 0 && x + 1 < width && y + 1 < height) { + int index = getIndex(x, y); + if (errors[index] == null) { + errors[index] = color; + } + else { + errors[index] = errors[index].plus(color); + } + } + } + + public Color adjust(Color in, int x, int y) { + int idx = getIndex(x, y); + + if (errors[idx] != null) { + return in.plus(errors[idx]); + } + return in; + } + + private int getIndex(int x, int y) { + return x * height + y; + } +} + +@AllArgsConstructor +class Color { + private final double r; + private final double g; + private final double b; + + Color(int hex) { + this.b = ((hex) & 0xFF); + this.g = ((hex >>> 8) & 0xFF); + this.r = ((hex >>> 16) & 0xFF); + } + + int toInt() { + double bv = clampByteRange(b); + double gv = clampByteRange(g); + double rv = clampByteRange(r); + + return (((int)bv&0xFF) | (((int)gv & 0xFF) << 8) | (((int)rv & 0xFF) << 16)); + } + + double clampByteRange(double v) { + if (v < 0) return 0; + if (v > 255) return 255; + return v; + } + + public Color scale(double factor) { + return new Color(r*factor, g*factor, b*factor); + } + + public Color plus(Color other) { + return new Color(r+other.r, g+other.g, b+other.b); + } + + public Color minus(Color other) { + return new Color(r-other.r, g-other.g, b-other.b); + } + + public double delta(Color other) { + double avgr = (r + other.r)/2; + double dr = r - other.r; + double dg = g - other.g; + double db = b - other.b; + + if (avgr > 128) { + return Math.sqrt(2 * dr * dr + 4 * dg * dg + 3 * db * db); + } + else { + return Math.sqrt(3 * dr * dr + 4 * dg * dg + 2 * db * db); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/graphics/dithering/Palettes.java b/marginalia_nu/src/main/java/nu/marginalia/util/graphics/dithering/Palettes.java new file mode 100644 index 00000000..afd6e99a --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/util/graphics/dithering/Palettes.java @@ -0,0 +1,29 @@ +package nu.marginalia.util.graphics.dithering; + +public class Palettes { + + public static int[] MARGINALIA_PALETTE = new int[] { + 0x000000, + 0x000000, + 0x808080, + 0x404040, + + 0xefefc0, + 0xf8f8ee, + 0x274fa5, + 0x85172f, + + 0x808060, + 0x60a060, + 0xFFFFFF, + }; + + public static int[] CGA_PALETTE = new int[]{ + 0x000000, 0xFFFFFF, 0x808080, 0xFF0000, + 0x800000, 0x00FF00, 0x008000, 0x0000FF, + 0x000080, 0xFFFF00, 0x808000, 0x00FFFF, + 0x008080, 0xFF00FF, 0x800080, 0x404040 + }; + + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/hash/LongPairHashMap.java b/marginalia_nu/src/main/java/nu/marginalia/util/hash/LongPairHashMap.java new file mode 100644 index 00000000..ae5f41ae --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/util/hash/LongPairHashMap.java @@ -0,0 +1,183 @@ +package nu.marginalia.util.hash; + +import io.prometheus.client.Gauge; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import nu.marginalia.wmsa.edge.index.service.index.wordstable.IndexWordsTable; +import nu.marginalia.util.multimap.MultimapFileLong; +import nu.marginalia.util.PrimeUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static java.lang.Math.round; + +/** + * Spiritually influenced by GNU Trove's hash maps + * LGPL 2.1 + */ +public class LongPairHashMap { + private static final Logger logger = LoggerFactory.getLogger(LongPairHashMap.class); + private static final Gauge probe_count_metrics + = Gauge.build("wmsa_wordfile_hash_map_probe_count", "Probing Count") + .register(); + + private final long hashTableSize; + private final MultimapFileLong data; + private final long maxProbeLength; + private int sz = 0; + private static final int HEADER_SIZE = 2; + + public LongPairHashMap(MultimapFileLong data, long size) { + this.data = data; + // Actually use a prime size for Donald Knuth reasons + hashTableSize = PrimeUtil.nextPrime(size, 1); + maxProbeLength = hashTableSize / 2; + + logger.debug("Table size = " + hashTableSize); + + data.put(0, IndexWordsTable.Strategy.HASH.ordinal()); + data.put(1, hashTableSize); + for (int i = 2; i < hashTableSize; i++) { + data.put(HEADER_SIZE + 2L*i, 0); + } + } + public LongPairHashMap(MultimapFileLong data) { + this.data = data; + hashTableSize = data.get(1); + maxProbeLength = hashTableSize / 10; + + logger.debug("Table size = " + hashTableSize); + } + + public int size() { + return sz; + } + + private CellData getCell(long idx) { + long bufferIdx = 2*idx + HEADER_SIZE; + long a = data.get(bufferIdx); + long b = data.get(bufferIdx+1); + return new CellData(a, b); + } + private void setCell(long idx, CellData cell) { + long bufferIdx = 2*idx + HEADER_SIZE; + data.put(bufferIdx, cell.first); + data.put(bufferIdx+1, cell.second); + } + + public CellData put(CellData data) { + + long hash = longHash(data.getKey()) & 0x7FFF_FFFFL; + + long idx = hash% hashTableSize; + if (!getCell(hash% hashTableSize).isSet()) { + return setValue(data, hash% hashTableSize); + } + + return putRehash(data, idx, hash); + + } + + private CellData putRehash(CellData data, long idx, long hash) { + final long pStride = 1 + (hash % (hashTableSize - 2)); + + for (long j = 1; j < maxProbeLength; j++) { + idx = idx - pStride; + + if (idx < 0) { + idx += hashTableSize; + } + + final var val = getCell(idx); + + if (!val.isSet()) { + probe_count_metrics.set(j); + + return setValue(data, idx); + } + else if (val.getKey() == data.getKey()) { + logger.error("Double write?"); + return val; + } + } + + throw new IllegalStateException("DictionaryHashMap full @ size " + size() + "/" + hashTableSize + ", " + round((100.0*size()) / hashTableSize) + "%, key = " + data.getKey() + ",#"+hash); + } + + private CellData setValue(CellData data, long cell) { + sz++; + + setCell(cell, data); + return data; + } + + public CellData get(int key) { + if (hashTableSize == 0) { + return new CellData(0, 0); + } + final long hash = longHash(key) & 0x7FFF_FFFFL; + + var val = getCell(hash % hashTableSize); + if (!val.isSet()) { + return val; + } + else if (val.getKey() == key) { + return val; + } + + return getRehash(key, hash % hashTableSize, hash); + } + + private CellData getRehash(int key, long idx, long hash) { + final long pStride = 1 + (hash % (hashTableSize - 2)); + + for (long j = 1; j < maxProbeLength; j++) { + idx = idx - pStride; + + if (idx < 0) { + idx += hashTableSize; + } + + final var val = getCell(idx); + + if (!val.isSet()) { + return val; + } + else if (val.getKey() == key) { + return val; + } + } + + throw new IllegalStateException("DictionaryHashMap full @ size " + size() + "/" + hashTableSize + ", " + round((100.0*size()) / hashTableSize) + "%"); + } + + private long longHash(long x) { + return x; + } + + @Getter @EqualsAndHashCode + public static class CellData { + long first; + long second; + + public CellData(long key, long offset) { + first = key | 0x8000_0000_000_000L; + second = offset; + } + + public long getKey() { + return first & ~0x8000_0000_000_000L; + } + public long getOffset() { + return second; + } + + public boolean isSet() { + return first != 0 || second != 0L; + } + } + + public void close() throws Exception { + data.close(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLong.java b/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLong.java new file mode 100644 index 00000000..1788fb0a --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLong.java @@ -0,0 +1,366 @@ +package nu.marginalia.util.multimap; + +import com.upserve.uppend.blobs.NativeIO; +import lombok.SneakyThrows; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.LongBuffer; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; + +import static java.nio.channels.FileChannel.MapMode.READ_ONLY; +import static java.nio.channels.FileChannel.MapMode.READ_WRITE; +import static nu.marginalia.util.FileSizeUtil.readableSize; + + +public class MultimapFileLong implements AutoCloseable { + + private final ArrayList buffers = new ArrayList<>(); + private final ArrayList mappedByteBuffers = new ArrayList<>(); + private final FileChannel.MapMode mode; + private final int bufferSize; + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private final FileChannel channel; + + private final long mapSize; + private final long fileLength; + private long mappedSize; + final static long WORD_SIZE = 8; + + private boolean loadAggressively; + + private NativeIO.Advice advice = null; + + public static MultimapFileLong forReading(Path file) throws IOException { + long fileSize = Files.size(file); + int bufferSize = getBufferSize(fileSize, false); + + return new MultimapFileLong(file.toFile(), READ_ONLY, Files.size(file), bufferSize); + } + + public static MultimapFileLong forOutput(Path file, long estimatedSize) throws IOException { + return new MultimapFileLong(file.toFile(), READ_WRITE, 0, getBufferSize(estimatedSize, true)); + } + + private static int getBufferSize(long totalSize, boolean write) { + if (totalSize > Integer.MAX_VALUE/WORD_SIZE) { + return (int)(Integer.MAX_VALUE/WORD_SIZE); + } + else if (write && totalSize < 8*1024*1024) { + return 8*1024*1024; + } + else { + return (int) Math.min(totalSize, Integer.MAX_VALUE/WORD_SIZE); + } + } + + + public MultimapFileLong(File file, + FileChannel.MapMode mode, + long mapSize, + int bufferSize) throws IOException { + + this(new RandomAccessFile(file, translateToRAFMode(mode)), mode, mapSize, bufferSize, false); + } + + public MultimapFileLong loadAggressively(boolean v) { + this.loadAggressively = v; + return this; + } + + private static String translateToRAFMode(FileChannel.MapMode mode) { + if (READ_ONLY.equals(mode)) { + return "r"; + } else if (READ_WRITE.equals(mode)) { + return "rw"; + } + return "rw"; + } + + + public MultimapFileLong(RandomAccessFile file, + FileChannel.MapMode mode, + long mapSizeBytes, + int bufferSizeWords, + boolean loadAggressively) throws IOException { + this.mode = mode; + this.bufferSize = bufferSizeWords; + this.mapSize = mapSizeBytes; + this.fileLength = file.length(); + this.loadAggressively = loadAggressively; + + channel = file.getChannel(); + mappedSize = 0; + + logger.debug("Creating multimap file size = {} / buffer size = {}, mode = {}", + readableSize(mapSizeBytes), readableSize(8L*bufferSizeWords), mode); + } + + public MultimapSearcher createSearcher() { + return new MultimapSearcher(this); + } + public MultimapSorter createSorter(Path tmpFile, int internalSortLimit) { + return new MultimapSorter(this, tmpFile, internalSortLimit); + } + + @SneakyThrows + public void advice(NativeIO.Advice advice) { + for (var buffer : mappedByteBuffers) { + NativeIO.madvise(buffer, advice); + }; + } + + @SneakyThrows + public void advice0(NativeIO.Advice advice) { + NativeIO.madvise(mappedByteBuffers.get(0), advice); + } + + @SneakyThrows + public void adviceRange(NativeIO.Advice advice, long startLongs, long lengthLongs) { + long endLongs = (startLongs+lengthLongs); + + if (endLongs >= mappedSize) + grow(endLongs); + + var buff = mappedByteBuffers.get((int)(startLongs / bufferSize)); + + if ((int)(startLongs / bufferSize) != (int)((endLongs) / bufferSize)) { + logger.warn("Misaligned madvise, skipping"); + return; + } + + NativeIO.madviseRange(buff, advice, (startLongs % bufferSize) * WORD_SIZE, (int)(lengthLongs*WORD_SIZE)); + } + + public void pokeRange(long offset, int length) { + for (int i = 0; i < length; i += 4096/8) { + get(offset + i); + } + } + + public void force() { + logger.debug("Forcing"); + + for (MappedByteBuffer buffer: mappedByteBuffers) { + buffer.force(); + } + } + + @SneakyThrows + private void grow(long posIdxRequired) { + if (posIdxRequired*WORD_SIZE > mapSize && mode == READ_ONLY) { + throw new IndexOutOfBoundsException(posIdxRequired + " (max " + mapSize + ")"); + } + logger.trace("Growing to encompass {}i/{}b", posIdxRequired, posIdxRequired*WORD_SIZE); + long start; + if (buffers.isEmpty()) { + start = 0; + } + else { + start = (long) buffers.size() * bufferSize; + } + for (long posIdx = start; posIdxRequired >= posIdx; posIdx += bufferSize) { + long posBytes = posIdx * WORD_SIZE; + long bzBytes; + if (mode == READ_ONLY) { + bzBytes = Math.min(WORD_SIZE*bufferSize, mapSize - posBytes); + } + else { + bzBytes = WORD_SIZE*bufferSize; + } + logger.trace("Allocating {}-{}", posBytes, posBytes+bzBytes); + + var buffer = channel.map(mode, posBytes, bzBytes); + + if (loadAggressively) + buffer.load(); + + if (advice != null) { + NativeIO.madvise(buffer, advice); + } + + buffers.add(buffer.asLongBuffer()); + mappedByteBuffers.add(buffer); + + mappedSize += bzBytes/WORD_SIZE; + } + } + + public long size() { + return fileLength; + } + + public void put(long idx, long val) { + if (idx >= mappedSize) + grow(idx); + + try { + buffers.get((int)(idx / bufferSize)).put((int) (idx % bufferSize), val); + } + catch (IndexOutOfBoundsException ex) { + logger.error("Index out of bounds {} -> {}:{} cap {}", idx, buffers.get((int)(idx / bufferSize)), idx % bufferSize, + buffers.get((int)(idx / bufferSize)).capacity()); + throw new RuntimeException(ex); + } + } + + public long get(long idx) { + if (idx >= mappedSize) + grow(idx); + + try { + return buffers.get((int)(idx / bufferSize)).get((int)(idx % bufferSize)); + } + catch (IndexOutOfBoundsException ex) { + logger.error("Index out of bounds {} -> {}:{} cap {}", idx, buffers.get((int)(idx / bufferSize)), idx % bufferSize, + buffers.get((int)(idx / bufferSize)).capacity()); + throw new RuntimeException(ex); + } + } + + + public void read(long[] vals, long idx) { + read(vals, vals.length, idx); + } + + public void read(long[] vals, int n, long idx) { + if (idx+n >= mappedSize) { + grow(idx+n); + } + + int iN = (int)((idx + n) / bufferSize); + + for (int i = 0; i < n; ) { + int i0 = (int)((idx + i) / bufferSize); + int bufferOffset = (int) ((idx+i) % bufferSize); + var buffer = buffers.get(i0); + + final int l; + + if (i0 < iN) l = bufferSize - bufferOffset; + else l = Math.min(n - i, bufferSize - bufferOffset); + + buffer.get(bufferOffset, vals, i, l); + i+=l; + + } + + } + + public void write(long[] vals, long idx) { + write(vals, vals.length, idx); + } + + public void write(long[] vals, int n, long idx) { + if (idx+n >= mappedSize) { + grow(idx+n); + } + + int iN = (int)((idx + n) / bufferSize); + + for (int i = 0; i < n; ) { + int i0 = (int)((idx + i) / bufferSize); + int bufferOffset = (int) ((idx+i) % bufferSize); + var buffer = buffers.get(i0); + + final int l; + + if (i0 < iN) l = bufferSize - bufferOffset; + else l = Math.min(n - i, bufferSize - bufferOffset); + + buffer.put(bufferOffset, vals, i, l); + i+=l; + + } + + } + + public void write(LongBuffer vals, long idx) { + int n = vals.limit() - vals.position(); + if (idx+n >= mappedSize) { + grow(idx+n); + } + int iN = (int)((idx + n) / bufferSize); + + for (int i = 0; i < n; ) { + int i0 = (int)((idx + i) / bufferSize); + + int bufferOffset = (int) ((idx+i) % bufferSize); + var buffer = buffers.get(i0); + + final int l; + + if (i0 < iN) l = bufferSize - bufferOffset; + else l = Math.min(n - i, bufferSize - bufferOffset); + + buffer.put(bufferOffset, vals, vals.position() + i, l); + i+=l; + } + + } + + + public void transferFromFileChannel(FileChannel sourceChannel, long destOffset, long sourceStart, long sourceEnd) throws IOException { + + int length = (int)(sourceEnd - sourceStart); + + if (destOffset+length >= mappedSize) { + grow(destOffset+length); + } + + int i0 = (int)((destOffset) / bufferSize); + int iN = (int)((destOffset + length) / bufferSize); + + int numBuffers = iN - i0 + 1; + ByteBuffer[] buffers = new ByteBuffer[numBuffers]; + for (int i = 0; i < numBuffers; i++) { + buffers[i] = mappedByteBuffers.get(i0 + i); + buffers[i].clear(); + } + if (i0 != iN) { + int startBuf0 = (int) ((destOffset) % bufferSize) * 8; + int endBuf0 = buffers[0].capacity() - (int) ((destOffset) % bufferSize) * 8; + int endBufN = (int)((destOffset + length) % bufferSize)*8; + buffers[0] = buffers[0].slice(startBuf0, endBuf0); + buffers[numBuffers-1] = buffers[numBuffers-1].slice(0, endBufN); + } + else { + buffers[0] = buffers[0].slice((int) ((destOffset) % bufferSize) * 8, 8*length); + } + + sourceChannel.position(sourceStart*8); + + long twb = 0; + while (twb < length * 8L) { + long rb = sourceChannel.read(buffers, 0, buffers.length); + if (rb < 0) + throw new IOException(); + twb += rb; + } + + } + + + @Override + public void close() throws IOException { + force(); + mappedByteBuffers.clear(); + buffers.clear(); + channel.close(); + + // I want to believe + System.runFinalization(); + System.gc(); + } + + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapSearcher.java b/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapSearcher.java new file mode 100644 index 00000000..c961ac0e --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapSearcher.java @@ -0,0 +1,128 @@ +package nu.marginalia.util.multimap; + +import lombok.experimental.Delegate; + +public class MultimapSearcher { + @Delegate + private final MultimapFileLong mmf; + + public MultimapSearcher(MultimapFileLong mmf) { + this.mmf = mmf; + } + + public boolean binarySearch(long key, long fromIndex, long toIndex) { + + long low = fromIndex; + long high = toIndex - 1; + + while (low <= high) { + long mid = (low + high) >>> 1; + long midVal = get(mid); + + if (midVal < key) + low = mid + 1; + else if (midVal > key) + high = mid - 1; + else + return true; // key found + } + return false; // key not found. + } + + public long binarySearchUpperBound(long key, long fromIndex, long toIndex) { + + long low = fromIndex; + long high = toIndex - 1; + + while (low <= high) { + long mid = (low + high) >>> 1; + long midVal = get(mid); + + if (midVal < key) + low = mid + 1; + else if (midVal > key) + high = mid - 1; + else + return mid; + } + return low; + } + + public long binarySearchUpperBound(long key, long fromIndex, long toIndex, long mask) { + + long low = fromIndex; + long high = toIndex - 1; + + while (low <= high) { + long mid = (low + high) >>> 1; + long midVal = get(mid) & mask; + + if (midVal < key) + low = mid + 1; + else if (midVal > key) + high = mid - 1; + else + return mid; + } + return low; + } + + public long binarySearchUpperBoundNoMiss(long key, long fromIndex, long toIndex) { + + long low = fromIndex; + long high = toIndex - 1; + + while (low <= high) { + long mid = (low + high) >>> 1; + long midVal = get(mid); + + if (midVal < key) + low = mid + 1; + else if (midVal > key) + high = mid - 1; + else + return mid; + } + return -1; + } + + + public long binarySearchUpperBoundNoMiss(long key, long fromIndex, long toIndex, long mask) { + + long low = fromIndex; + long high = toIndex - 1; + + while (low <= high) { + long mid = (low + high) >>> 1; + long midVal = get(mid) & mask; + + if (midVal < key) + low = mid + 1; + else if (midVal > key) + high = mid - 1; + else + return mid; + } + return -1; + } + + + public long binarySearchUpperBoundNoMiss(long key, long fromIndex, long step, long steps, long mask) { + + long low = 0; + long high = steps - 1; + + while (low <= high) { + long mid = (low + high) >>> 1; + long midVal = get(fromIndex + mid*step) & mask; + + if (midVal < key) + low = mid + 1; + else if (midVal > key) + high = mid - 1; + else + return fromIndex + mid*step; + } + return -1; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapSorter.java b/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapSorter.java new file mode 100644 index 00000000..6ca4f64f --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapSorter.java @@ -0,0 +1,89 @@ +package nu.marginalia.util.multimap; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.LongBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; + +import static nu.marginalia.util.multimap.MultimapFileLong.WORD_SIZE; + +public class MultimapSorter { + private final Path tmpFileDir; + private final int internalSortLimit; + private final MultimapFileLong multimapFileLong; + private final long[] buffer; + + public MultimapSorter(MultimapFileLong multimapFileLong, Path tmpFileDir, int internalSortLimit) { + this.multimapFileLong = multimapFileLong; + this.tmpFileDir = tmpFileDir; + this.internalSortLimit = internalSortLimit; + buffer = new long[internalSortLimit]; + } + + public void sort(long start, int length) throws IOException { + if (length <= internalSortLimit) { + multimapFileLong.read(buffer, length, start); + Arrays.sort(buffer, 0, length); + multimapFileLong.write(buffer, length, start); + } + else { + externalSort(start, length); + } + } + + + private void externalSort(long start, int length) throws IOException { + Path tmpFile = Files.createTempFile(tmpFileDir,"sort-"+start+"-"+(start+length), ".dat"); + + try (var raf = new RandomAccessFile(tmpFile.toFile(), "rw"); var channel = raf.getChannel()) { + var workBuffer = + channel.map(FileChannel.MapMode.READ_WRITE, 0, length * WORD_SIZE) + .asLongBuffer(); + + int width = Math.min(Integer.highestOneBit(length), Integer.highestOneBit(internalSortLimit)); + + // Do in-memory sorting up until internalSortLimit first + for (int i = 0; i < length; i += width) { + sort(start + i, Math.min(width, length-i)); + } + + // Then merge sort on disk for the rest + for (; width < length; width*=2) { + + for (int i = 0; i < length; i += 2*width) { + merge(start, i, Math.min(i+width, length), Math.min(i+2*width, length), workBuffer); + } + + workBuffer.clear(); + multimapFileLong.write(workBuffer, start); + } + + } + finally { + tmpFile.toFile().delete(); + } + } + + void merge(long offset, int left, int right, int end, LongBuffer workBuffer) { + int i = left; + int j = right; + + for (int k = left; k < end; k++) { + final long bufferI = multimapFileLong.get(offset+i); + final long bufferJ = multimapFileLong.get(offset+j); + + if (i < right && (j >= end || bufferI < bufferJ)) { + workBuffer.put(k, bufferI); + i++; + } + else { + workBuffer.put(k, bufferJ); + j++; + } + } + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/AuthConfigurationModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/AuthConfigurationModule.java new file mode 100644 index 00000000..3a6772ae --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/AuthConfigurationModule.java @@ -0,0 +1,12 @@ +package nu.marginalia.wmsa.auth; + +import com.google.inject.AbstractModule; +import com.google.inject.name.Names; + +import java.nio.file.Path; + +public class AuthConfigurationModule extends AbstractModule { + public void configure() { + bind(Path.class).annotatedWith(Names.named("password-file")).toInstance(Path.of("/var/lib/wmsa/password.dat")); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/AuthMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/AuthMain.java new file mode 100644 index 00000000..385dfc47 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/AuthMain.java @@ -0,0 +1,28 @@ +package nu.marginalia.wmsa.auth; + +import com.google.inject.Guice; +import com.google.inject.Inject; +import com.google.inject.Injector; +import nu.marginalia.wmsa.configuration.MainClass; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.module.ConfigurationModule; +import nu.marginalia.wmsa.configuration.server.Initialization; + +import java.io.IOException; + +public class AuthMain extends MainClass { + + @Inject + public AuthMain(AuthService service) throws IOException { + } + + public static void main(String... args) { + init(ServiceDescriptor.AUTH, args); + + Injector injector = Guice.createInjector( + new AuthConfigurationModule(), + new ConfigurationModule()); + injector.getInstance(AuthMain.class); + injector.getInstance(Initialization.class).setReady(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/AuthService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/AuthService.java new file mode 100644 index 00000000..d34de843 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/AuthService.java @@ -0,0 +1,105 @@ +package nu.marginalia.wmsa.auth; + +import com.github.jknack.handlebars.internal.Files; +import com.google.inject.Inject; +import com.google.inject.name.Named; +import nu.marginalia.wmsa.auth.model.LoginFormModel; +import nu.marginalia.wmsa.configuration.server.*; +import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer; +import nu.marginalia.wmsa.renderer.mustache.RendererFactory; +import org.apache.http.HttpStatus; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Request; +import spark.Response; +import spark.Spark; + +import java.io.FileReader; +import java.io.IOException; +import java.nio.file.Path; +import java.util.Objects; +import java.util.Optional; + +import static spark.Spark.*; + +public class AuthService extends Service { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + private String password; + + private final RateLimiter rateLimiter = RateLimiter.forLogin(); + private final MustacheRenderer loginFormRenderer; + + @Inject + public AuthService(@Named("service-host") String ip, + @Named("service-port") Integer port, + @Named("password-file") Path topSecretPasswordFile, + RendererFactory rendererFactory, + Initialization initialization, + MetricsServer metricsServer) throws IOException { + + super(ip, port, initialization, metricsServer); + + try (var is = new FileReader(topSecretPasswordFile.toFile())) { + password = Files.read(is); + } catch (IOException e) { + logger.error("Could not read password from file " + topSecretPasswordFile, e); + } + loginFormRenderer = rendererFactory.renderer("auth/login"); + + Spark.path("public/api", () -> { + before((req, rsp) -> { + logger.info("{} {}", req.requestMethod(), req.pathInfo()); + }); + + post("/login", this::login); + get("/login", this::loginForm); + }); + Spark.path("api", () -> { + get("/is-logged-in", this::isLoggedIn); + }); + } + + private Object loginForm(Request request, Response response) throws IOException { + String redir = Objects.requireNonNull(request.queryParams("redirect")); + String service = Objects.requireNonNull(request.queryParams("service")); + + return loginFormRenderer.render(new LoginFormModel(service, redir)); + } + + private Object login(Request request, Response response) { + var redir = Objects.requireNonNullElse(request.queryParams("redirect"), "/"); + + if (isLoggedIn(request, response)) { + response.redirect(redir); + return ""; + } + + if (!rateLimiter.isAllowed(Context.fromRequest(request))) { + Spark.halt(429, "Too many requests"); + return null; + } + + if (Objects.equals(password, request.queryParams("password"))) { + request.session(true).attribute("logged-in", true); + response.redirect(redir); + return ""; + } + + response.status(HttpStatus.SC_FORBIDDEN); + return "

    Bad password!

    "; + } + + public boolean isLoggedIn(Request request, Response response) { + var session = request.session(false); + + if (null == session) { + return false; + } + + return Optional.ofNullable(session.attribute("logged-in")) + .map(Boolean.class::cast) + .orElse(false); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/api/ApiMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/api/ApiMain.java new file mode 100644 index 00000000..03f7ef31 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/api/ApiMain.java @@ -0,0 +1,32 @@ +package nu.marginalia.wmsa.auth.api; + +import com.google.inject.Guice; +import com.google.inject.Inject; +import com.google.inject.Injector; +import nu.marginalia.wmsa.auth.AuthConfigurationModule; +import nu.marginalia.wmsa.auth.AuthMain; +import nu.marginalia.wmsa.auth.AuthService; +import nu.marginalia.wmsa.configuration.MainClass; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.module.ConfigurationModule; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.configuration.server.Initialization; + +import java.io.IOException; + +public class ApiMain extends MainClass { + + @Inject + public ApiMain(ApiService service) throws IOException { + } + + public static void main(String... args) { + init(ServiceDescriptor.API, args); + + Injector injector = Guice.createInjector( + new DatabaseModule(), + new ConfigurationModule()); + injector.getInstance(ApiMain.class); + injector.getInstance(Initialization.class).setReady(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/api/ApiService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/api/ApiService.java new file mode 100644 index 00000000..45874c92 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/api/ApiService.java @@ -0,0 +1,127 @@ +package nu.marginalia.wmsa.auth.api; + +import com.google.common.base.Strings; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.inject.Inject; +import com.google.inject.name.Named; +import com.zaxxer.hikari.HikariDataSource; +import nu.marginalia.wmsa.auth.api.model.ApiLicense; +import nu.marginalia.wmsa.configuration.server.*; +import nu.marginalia.wmsa.edge.search.client.EdgeSearchClient; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Request; +import spark.Response; +import spark.Spark; + +import java.io.IOException; +import java.util.concurrent.ConcurrentHashMap; + +public class ApiService extends Service { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final Gson gson = new GsonBuilder().create(); + private final EdgeSearchClient searchClient; + private final HikariDataSource dataSource; + private final ConcurrentHashMap licenseCache = new ConcurrentHashMap<>(); + private final ConcurrentHashMap rateLimiters = new ConcurrentHashMap<>(); + + @Inject + public ApiService(@Named("service-host") String ip, + @Named("service-port") Integer port, + Initialization initialization, + MetricsServer metricsServer, + EdgeSearchClient searchClient, + HikariDataSource dataSource) + throws IOException + { + super(ip, port, initialization, metricsServer); + this.searchClient = searchClient; + this.dataSource = dataSource; + + Spark.get("/public/api/", (rq, rsp) -> { + logger.info("Redireting to info"); + rsp.redirect("https://memex.marginalia.nu/projects/edge/api.gmi"); + return ""; + }); + Spark.get("/public/api/:key/", this::getKeyInfo, gson::toJson); + Spark.get("/public/api/:key/search/*", this::search, gson::toJson); + } + + private Object getKeyInfo(Request request, Response response) { + return getLicense(request); + } + + private Object search(Request request, Response response) { + response.type("application/json"); + + String[] args = request.splat(); + if (args.length != 1) { + Spark.halt(400); + } + + var license = getLicense(request); + if (null == license) { + Spark.halt(401); + return "Forbidden"; + } + + RateLimiter rl = getRateLimiter(license); + + if (rl != null && !rl.isAllowed()) { + Spark.halt(503); + return "Slow down"; + } + + int count = Integer.parseInt(request.queryParamOrDefault("count", "20")); + int index = Integer.parseInt(request.queryParamOrDefault("index", "3")); + + logger.info("{} Search {}", license.key, args[0]); + + return searchClient.query(Context.fromRequest(request), args[0], count, index) + .blockingFirst().withLicense(license.getLicense()); + } + + private RateLimiter getRateLimiter(ApiLicense license) { + if (license.rate > 0) { + return rateLimiters.computeIfAbsent(license, l -> RateLimiter.custom(license.rate)); + } + else { + return null; + } + } + + + private ApiLicense getLicense(Request request) { + final String key = request.params("key"); + + if (Strings.isNullOrEmpty(key)) { + Spark.halt(400); + } + + var cachedLicense = licenseCache.get(key.toLowerCase()); + if (cachedLicense != null) { + return cachedLicense; + } + + try (var conn = dataSource.getConnection()) { + try (var stmt = conn.prepareStatement("SELECT LICENSE,NAME,RATE FROM EC_API_KEY WHERE LICENSE_KEY=?")) { + stmt.setString(1, key); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + var license = new ApiLicense(key.toLowerCase(), rsp.getString(1), rsp.getString(2), rsp.getInt(3)); + licenseCache.put(key.toLowerCase(), license); + return license; + } + } + } + catch (Exception ex) { + logger.error("Bad request", ex); + Spark.halt(500); + } + + Spark.halt(401); + return null; // unreachable + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/api/model/ApiLicense.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/api/model/ApiLicense.java new file mode 100644 index 00000000..21997a2b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/api/model/ApiLicense.java @@ -0,0 +1,19 @@ +package nu.marginalia.wmsa.auth.api.model; + +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.NonNull; + +@Getter +@AllArgsConstructor +@EqualsAndHashCode +public class ApiLicense { + @NonNull + public String key; + @NonNull + public String license; + @NonNull + public String name; + public int rate; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/api/model/ApiSearchResult.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/api/model/ApiSearchResult.java new file mode 100644 index 00000000..1a6baf26 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/api/model/ApiSearchResult.java @@ -0,0 +1,20 @@ +package nu.marginalia.wmsa.auth.api.model; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import nu.marginalia.wmsa.edge.model.search.EdgeUrlDetails; + +@AllArgsConstructor @Getter +public class ApiSearchResult { + public String url; + public String title; + public String description; + public double quality; + + public ApiSearchResult(EdgeUrlDetails url) { + this.url = url.url.toString(); + this.title = url.getTitle(); + this.description = url.getDescription(); + this.quality = url.getTermScore(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/api/model/ApiSearchResults.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/api/model/ApiSearchResults.java new file mode 100644 index 00000000..4551755f --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/api/model/ApiSearchResults.java @@ -0,0 +1,17 @@ +package nu.marginalia.wmsa.auth.api.model; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.With; + +import java.util.List; + +@AllArgsConstructor +@Getter +@With +public class ApiSearchResults { + private final String license; + + private final String query; + private final List results; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/client/AuthClient.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/client/AuthClient.java new file mode 100644 index 00000000..81cc95ba --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/client/AuthClient.java @@ -0,0 +1,42 @@ +package nu.marginalia.wmsa.auth.client; + +import com.google.inject.Inject; +import io.reactivex.rxjava3.core.Observable; +import kotlin.text.Charsets; +import nu.marginalia.wmsa.client.AbstractDynamicClient; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.server.Context; +import org.apache.http.HttpStatus; +import spark.Request; +import spark.Response; +import spark.Spark; + +import java.net.URLEncoder; +import java.util.concurrent.TimeUnit; + + +public class AuthClient extends AbstractDynamicClient { + @Inject + public AuthClient() { + super(ServiceDescriptor.AUTH); + } + + public Observable isLoggedIn(Context ctx) { + return get(ctx, "/api/is-logged-in").map(Boolean::parseBoolean); + } + + public void redirectToLoginIfUnauthenticated(String domain, Request req, Response rsp) { + if (!isLoggedIn(Context.fromRequest(req)).timeout(1, TimeUnit.SECONDS).blockingFirst()) { + rsp.redirect(req.headers("X-Extern-Domain") + "/auth/login?service="+domain + +"&redirect="+ URLEncoder.encode(req.headers("X-Extern-Url"), Charsets.UTF_8)); + Spark.halt(); + } + } + + + public void requireLogIn(Context ctx) { + if (!isLoggedIn(ctx).timeout(1, TimeUnit.SECONDS).blockingFirst()) { + Spark.halt(HttpStatus.SC_FORBIDDEN); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/model/LoginFormModel.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/model/LoginFormModel.java new file mode 100644 index 00000000..09029161 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/model/LoginFormModel.java @@ -0,0 +1,10 @@ +package nu.marginalia.wmsa.auth.model; + +import lombok.AllArgsConstructor; +import lombok.Getter; + +@Getter @AllArgsConstructor +public class LoginFormModel { + public final String service; + public final String redirect; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/AbortingScheduler.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/AbortingScheduler.java new file mode 100644 index 00000000..fc06bd26 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/AbortingScheduler.java @@ -0,0 +1,63 @@ +package nu.marginalia.wmsa.client; + +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import io.reactivex.rxjava3.core.Scheduler; +import io.reactivex.rxjava3.schedulers.Schedulers; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.ThreadFactory; + +public class AbortingScheduler implements AutoCloseable { + private final String name; + private final ThreadFactory threadFactory; + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + @Nullable + private ExecutorService executorService; + + public AbortingScheduler(String name) { + this.name = name; + threadFactory = new ThreadFactoryBuilder() + .setNameFormat(name+"client--%d") + .setUncaughtExceptionHandler(this::handleException) + .build(); + } + + private void handleException(Thread thread, Throwable throwable) { + logger.error("Uncaught exception during Client IO in thread {}", thread.getName(), throwable); + } + + public synchronized Scheduler get() { + return Schedulers.from(getExecutorService(), + true, + false); + } + + public synchronized void abort() { + if (null != executorService) { + executorService.shutdownNow(); + executorService = Executors.newFixedThreadPool(16, threadFactory); + } + } + + @Nonnull + private synchronized ExecutorService getExecutorService() { + if (null == executorService) { + executorService = Executors.newFixedThreadPool(16, threadFactory); + } + return executorService; + } + + @Override + public synchronized void close() { + if (null != executorService) { + executorService.shutdown(); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/AbstractClient.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/AbstractClient.java new file mode 100644 index 00000000..f82e7167 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/AbstractClient.java @@ -0,0 +1,501 @@ +package nu.marginalia.wmsa.client; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import io.reactivex.rxjava3.core.Observable; +import io.reactivex.rxjava3.core.ObservableSource; +import io.reactivex.rxjava3.plugins.RxJavaPlugins; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.client.exception.LocalException; +import nu.marginalia.wmsa.client.exception.NetworkException; +import nu.marginalia.wmsa.client.exception.RemoteException; +import nu.marginalia.wmsa.client.exception.RouteNotConfiguredException; +import nu.marginalia.wmsa.configuration.server.Context; +import okhttp3.*; +import org.apache.http.HttpHost; +import org.apache.logging.log4j.ThreadContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.slf4j.Marker; +import org.slf4j.MarkerFactory; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.net.ConnectException; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.zip.GZIPOutputStream; + +public abstract class AbstractClient implements AutoCloseable { + public static final String CONTEXT_OUTBOUND_REQUEST = "outbound-request"; + private final Gson gson = new GsonBuilder().create(); + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final Marker httpMarker = MarkerFactory.getMarker("HTTP"); + + private final OkHttpClient client; + + private boolean quiet; + private String url; + + public void setTimeout(int timeout) { + this.timeout = timeout; + } + + private int timeout; + private volatile boolean alive; + + private final Thread livenessMonitor; + + public AbstractClient(String host, int port, int timeout) { + logger.info("Creating client for {}", getClass().getSimpleName()); + + this.timeout = timeout; + client = new OkHttpClient.Builder() + .connectTimeout(100, TimeUnit.MILLISECONDS) + .readTimeout(6000, TimeUnit.SECONDS) + .retryOnConnectionFailure(true) + .followRedirects(true) + .build(); + url = new HttpHost(host, port).toURI(); + + RxJavaPlugins.setErrorHandler(e -> { + if (e.getMessage() == null) { + logger.error("Error", e); + } + else { + logger.error("Error {}: {}", e.getClass().getSimpleName(), e.getMessage()); + } + }); + livenessMonitor = new Thread(this::monitorLiveness, host + "-monitor"); + livenessMonitor.setDaemon(true); + livenessMonitor.start(); + + logger.info("Finished creating client for {}", getClass().getSimpleName()); + } + + public void setServiceRoute(String hostname, int port) { + scheduler().abort(); + url = new HttpHost(hostname, port).toURI(); + } + + @SneakyThrows + private void monitorLiveness() { + Thread.sleep(100); // Wait for initialization + for (;;) { + try { + alive = isResponsive(); + } + catch (java.util.concurrent.TimeoutException tex) { + // + } + catch (Exception ex) { + logger.warn("Oops", ex); + } + synchronized (livenessMonitor) { + if (alive) { + livenessMonitor.wait(1000); + } + } + if (!alive) { + Thread.sleep(100); + } + } + } + + @Override + public void close() { + livenessMonitor.interrupt(); + scheduler().close(); + } + + public abstract AbortingScheduler scheduler(); + + public void setQuiet(boolean quiet) { + this.quiet = quiet; + } + + public abstract String name(); + + public synchronized boolean isResponsive() throws java.util.concurrent.TimeoutException { + Context ctx = Context.internal("ping"); + var req = ctx.paint(new Request.Builder()).url(url + "/internal/ping").get().build(); + + var call = client.newCall(req); + + return Observable.just(call) + .subscribeOn(scheduler().get()) + .map(Call::execute) + .map(this::getResponseStatus) + .flatMap(line -> validateStatus(line, req).timeout(5000, TimeUnit.SECONDS).onErrorReturn(e -> 500)) + .onErrorReturn(error -> 500) + .map(HttpStatusCode::new) + .map(HttpStatusCode::isGood) + .blockingFirst(); + } + + public synchronized boolean isAccepting() { + Context ctx = Context.internal("ready"); + + var req = ctx.paint(new Request.Builder()).url(url + "/internal/ready").get().build(); + + var call = client.newCall(req); + + return Observable.just(call) + .subscribeOn(scheduler().get()) + .map(Call::execute) + .map(this::getResponseStatus) + .flatMap(line -> validateStatus(line, req)) + .timeout(100, TimeUnit.MILLISECONDS) + .onErrorReturn(error -> 500) + .map(HttpStatusCode::new) + .map(HttpStatusCode::isGood) + .blockingFirst(); + } + + @SneakyThrows + protected synchronized Observable post(Context ctx, String endpoint, Object data) { + + ensureAlive(); + + RequestBody body = RequestBody.create( + MediaType.parse("application/json; charset=utf-8"), + json(data)); + + var req = ctx.paint(new Request.Builder()).url(url + endpoint).post(body).build(); + var call = client.newCall(req); + + return Observable + .just(call) + .map((c) -> { + ThreadContext.put("outbound-request", url + endpoint); + return c; + }) + .subscribeOn(scheduler().get()) + .map(this::logInbound) + .map(Call::execute) + .map(this::logOutbound) + .map(this::getResponseStatus) + .retryWhen(this::retryHandler) + .flatMap(line -> validateStatus(line, req)) + .map(HttpStatusCode::new) + .timeout(timeout, TimeUnit.SECONDS) + .doFinally(() -> ThreadContext.remove("outbound-request")); + } + + + @SneakyThrows + protected synchronized Observable postGet(Context ctx, String endpoint, Object data, Class returnType) { + + ensureAlive(); + + RequestBody body = RequestBody.create( + MediaType.parse("application/json"), + json(data)); + + var req = ctx.paint(new Request.Builder()).url(url + endpoint).post(body).build(); + var call = client.newCall(req); + + return Observable.just(call) + .map((c) -> { + ThreadContext.put("outbound-request", url + endpoint); + return c; + }) + .subscribeOn(scheduler().get()) + .map(this::logInbound) + .map(Call::execute) + .map(this::logOutbound) + .retryWhen(this::retryHandler) + .map(rsp -> validateResponseStatus(rsp, req, 200)) + .map(rsp -> getEntity(rsp, returnType)) + .timeout(timeout, TimeUnit.SECONDS) + .doFinally(() -> ThreadContext.remove("outbound-request")); + } + + protected synchronized Observable post(Context ctx, String endpoint, String data, MediaType mediaType) { + ensureAlive(); + + var body = RequestBody.create(mediaType, data); + + var req = ctx.paint(new Request.Builder()).url(url + endpoint).post(body).build(); + var call = client.newCall(req); + + + return Observable.just(call) + .map((c) -> { + ThreadContext.put(CONTEXT_OUTBOUND_REQUEST, url + endpoint); + return c; + }) + .subscribeOn(scheduler().get()) + .map(this::logInbound) + .map(Call::execute) + .map(this::logOutbound) + .map(this::getResponseStatus) + .retryWhen(this::retryHandler) + .flatMap(line -> validateStatus(line, req)) + .map(HttpStatusCode::new) + .timeout(timeout, TimeUnit.SECONDS) + .doFinally(() -> ThreadContext.remove("outbound-request")); + } + + protected synchronized Observable get(Context ctx, String endpoint, Class type) { + ensureAlive(); + + var req = ctx.paint(new Request.Builder()).url(url + endpoint).get().build(); + var call = client.newCall(req); + + return Observable.just(call) + .map((c) -> { + ThreadContext.put("outbound-request", url + endpoint); + return c; + }) + .subscribeOn(scheduler().get()) + .map(this::logInbound) + .map(Call::execute) + .map(this::logOutbound) + .map(rsp -> validateResponseStatus(rsp, req, 200)) + .map(rsp -> getEntity(rsp, type)) + .retryWhen(this::retryHandler) + .timeout(timeout, TimeUnit.SECONDS) + .doFinally(() -> ThreadContext.remove("outbound-request")); + } + + @SuppressWarnings("unchecked") + protected synchronized Observable> getList(Context ctx, String endpoint, Class type) { + ensureAlive(); + + var req = ctx.paint(new Request.Builder()).url(url + endpoint).get().build(); + var call = client.newCall(req); + + return Observable.just(call) + .map((c) -> { + ThreadContext.put("outbound-request", url + endpoint); + return c; + }) + .subscribeOn(scheduler().get()) + .map(this::logInbound) + .map(Call::execute) + .map(this::logOutbound) + .map(rsp -> validateResponseStatus(rsp, req, 200)) + .map(rsp -> Arrays.asList((T[])getEntity(rsp, type.arrayType()))) + .retryWhen(this::retryHandler) + .timeout(timeout, TimeUnit.SECONDS) + .doFinally(() -> ThreadContext.remove("outbound-request")); + } + + + protected synchronized Observable getBinary(Context ctx, String endpoint) { + ensureAlive(); + + var req = ctx.paint(new Request.Builder()).url(url + endpoint).get().build(); + var call = client.newCall(req); + + return Observable.just(call) + .map((c) -> { + ThreadContext.put("outbound-request", url + endpoint); + return c; + }) + .subscribeOn(scheduler().get()) + .map(this::logInbound) + .map(Call::execute) + .map(this::logOutbound) + .map(rsp -> validateResponseStatus(rsp, req, 200)) + .map(this::getBinaryEntity) + .retryWhen(this::retryHandler) + .timeout(timeout, TimeUnit.SECONDS) + .doFinally(() -> ThreadContext.remove("outbound-request")); + } + + protected synchronized Observable get(Context ctx, String endpoint) { + ensureAlive(); + + var req = ctx.paint(new Request.Builder()).url(url + endpoint).get().build(); + var call = client.newCall(req); + + return Observable.just(call) + .map((c) -> { + ThreadContext.put("outbound-request", url + endpoint); + return c; + }) + .subscribeOn(scheduler().get()) + .map(this::logInbound) + .map(Call::execute) + .map(this::logOutbound) + .map(rsp -> validateResponseStatus(rsp, req,200)) + .map(this::getText) + .retryWhen(this::retryHandler) + .timeout(timeout, TimeUnit.SECONDS) + .doFinally(() -> ThreadContext.remove("outbound-request")); + } + + protected synchronized Observable delete(Context ctx, String endpoint) { + ensureAlive(); + + var req = ctx.paint(new Request.Builder()).url(url + endpoint).delete().build(); + var call = client.newCall(req); + + return Observable.just(call) + .map((c) -> { + ThreadContext.put("outbound-request", url + endpoint); + return c; + }) + .subscribeOn(scheduler().get()) + .map(this::logInbound) + .map(Call::execute) + .map(this::logOutbound) + .map(this::getResponseStatus) + .flatMap(line -> validateStatus(line, req)) + .map(HttpStatusCode::new) + .retryWhen(this::retryHandler) + .timeout(timeout, TimeUnit.SECONDS) + .doFinally(() -> ThreadContext.remove("outbound-request")); + } + + + @SneakyThrows + private Call logInbound(Call outgoing) { + return outgoing; + } + + @SneakyThrows + private Response logOutbound(Response incoming) { + return incoming; + } + + @SneakyThrows + private void ensureAlive() { + if (!isAlive()) { + wait(2000); + if (!isAlive()) { + throw new RouteNotConfiguredException("Route not configured for " + name()); + } + } + } + + + @SneakyThrows + public void waitReady() { + boolean accepting = isAccepting(); + if (accepting) { + return; + } + + logger.info("Waiting for " + name()); + do { + Thread.sleep(1000); + } while (!isAccepting()); + } + + + private ObservableSource retryHandler(Observable error) { + return error.flatMap(this::filterRetryableExceptions); + } + + private Observable filterRetryableExceptions(Throwable error) throws Throwable { + + synchronized (livenessMonitor) { + livenessMonitor.notifyAll(); + } + + if (error.getClass().equals(RouteNotConfiguredException.class)) { + logger.error("Network error {}", error.getMessage()); + return Observable.empty().delay(50, TimeUnit.MILLISECONDS); + } + else if (error.getClass().equals(NetworkException.class)) { + logger.error("Network error {}", error.getMessage()); + return Observable.empty().delay(1, TimeUnit.SECONDS); + } + else if (error.getClass().equals(ConnectException.class)) { + logger.error("Network error {}", error.getMessage()); + return Observable.empty().delay(1, TimeUnit.SECONDS); + } + + if (!quiet) { + if (error.getMessage() != null) { + logger.error("{} {}", error.getClass().getSimpleName(), error.getMessage()); + } + else { + logger.error("Error ", error); + } + } + throw error; + } + + private Observable validateStatus(int status, Request request) { + if (status == org.apache.http.HttpStatus.SC_OK) + return Observable.just(status); + if (status == org.apache.http.HttpStatus.SC_ACCEPTED) + return Observable.just(status); + if (status == org.apache.http.HttpStatus.SC_CREATED) + return Observable.just(status); + + return Observable.error(new RemoteException(name() + " responded status code " + status + " " + request.url())); + } + + private Response validateResponseStatus(Response response, Request req, int expected) { + if (expected != response.code()) { + response.close(); + + throw new RemoteException(name() + " responded status code " + response.code() + ", " + req.method() + " " + req.url().toString()); + } + return response; + } + + private int getResponseStatus(Response response) { + try (response) { + return response.code(); + } + } + + + @SneakyThrows + private T getEntity(Response response, Class clazz) { + try (response) { + return gson.fromJson(response.body().charStream(), clazz); + } + catch (Exception ex) { + throw ex; + } + } + @SneakyThrows + private String getText(Response response) { + try (response) { + return response.body().string(); + } + + } + + @SneakyThrows + private byte[] getBinaryEntity(Response response) { + try (response) { + return response.body().bytes(); + } + } + public boolean isAlive() { + return alive; + } + + private String json(Object o) { + try { + return gson.toJson(o); + } + catch (Exception ex) { + throw new LocalException(ex); + } + } + + private byte[] compressedJson(Object o) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + GZIPOutputStream gos = new GZIPOutputStream(baos); + try { + gson.toJson(o, new OutputStreamWriter(gos)); + gos.finish(); + return baos.toByteArray(); + } + catch (Exception ex) { + throw new LocalException(ex); + } + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/AbstractDynamicClient.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/AbstractDynamicClient.java new file mode 100644 index 00000000..dcdbbbc7 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/AbstractDynamicClient.java @@ -0,0 +1,52 @@ +package nu.marginalia.wmsa.client; + +import io.reactivex.rxjava3.core.Observable; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.server.Context; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nonnull; + +public class AbstractDynamicClient extends AbstractClient { + private final ServiceDescriptor service; + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final AbortingScheduler scheduler; + + public AbstractDynamicClient(@Nonnull ServiceDescriptor service) { + super("localhost", service.port, 10); + + this.service = service; + this.scheduler = new AbortingScheduler(name()); + } + + @Override + public String name() { + return service.name; + } + + public ServiceDescriptor getService() { + return service; + } + + @SneakyThrows + public void blockingWait() { + logger.info("Waiting for route to {}", service); + while (!isAlive()) { + Thread.sleep(1000); + } + } + + @Override + public AbortingScheduler scheduler() { + return scheduler; + } + + public Observable who(Context ctx) { + return get(ctx, "/public/who"); + } + public Observable ping(Context ctx) { + return get(ctx, "/internal/ping"); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/HttpStatusCode.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/HttpStatusCode.java new file mode 100644 index 00000000..3b39ae84 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/HttpStatusCode.java @@ -0,0 +1,19 @@ +package nu.marginalia.wmsa.client; + +public final class HttpStatusCode { + public final int code; + + public HttpStatusCode(int code) { + this.code = code; + } + + public boolean isGood() { + if (code == org.apache.http.HttpStatus.SC_OK) + return true; + if (code == org.apache.http.HttpStatus.SC_ACCEPTED) + return true; + if (code == org.apache.http.HttpStatus.SC_CREATED) + return true; + return false; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/exception/LocalException.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/exception/LocalException.java new file mode 100644 index 00000000..f721de69 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/exception/LocalException.java @@ -0,0 +1,15 @@ +package nu.marginalia.wmsa.client.exception; + +public class LocalException extends MessagingException { + public LocalException() { + } + public LocalException(String message) { + super(message); + } + public LocalException(Throwable cause) { + super(cause); + } + public LocalException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/exception/MessagingException.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/exception/MessagingException.java new file mode 100644 index 00000000..f08b47b7 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/exception/MessagingException.java @@ -0,0 +1,20 @@ +package nu.marginalia.wmsa.client.exception; + +public class MessagingException extends RuntimeException { + public MessagingException() { + } + public MessagingException(String message) { + super(message); + } + public MessagingException(Throwable cause) { + super(cause); + } + public MessagingException(String message, Throwable cause) { + super(message, cause); + } + + @Override + public Throwable fillInStackTrace() { + return this; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/exception/NetworkException.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/exception/NetworkException.java new file mode 100644 index 00000000..e39028fb --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/exception/NetworkException.java @@ -0,0 +1,15 @@ +package nu.marginalia.wmsa.client.exception; + +public class NetworkException extends MessagingException { + public NetworkException() { + } + public NetworkException(String message) { + super(message); + } + public NetworkException(Throwable cause) { + super(cause); + } + public NetworkException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/exception/RemoteException.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/exception/RemoteException.java new file mode 100644 index 00000000..ed2c8645 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/exception/RemoteException.java @@ -0,0 +1,16 @@ +package nu.marginalia.wmsa.client.exception; + +public class RemoteException extends MessagingException { + public RemoteException() { + } + public RemoteException(String message) { + super(message); + } + public RemoteException(Throwable cause) { + super(cause); + } + public RemoteException(String message, Throwable cause) { + super(message, cause); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/exception/RouteNotConfiguredException.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/exception/RouteNotConfiguredException.java new file mode 100644 index 00000000..7f2a4c40 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/exception/RouteNotConfiguredException.java @@ -0,0 +1,15 @@ +package nu.marginalia.wmsa.client.exception; + +public class RouteNotConfiguredException extends MessagingException { + public RouteNotConfiguredException() { + } + public RouteNotConfiguredException(String message) { + super(message); + } + public RouteNotConfiguredException(Throwable cause) { + super(cause); + } + public RouteNotConfiguredException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/exception/TimeoutException.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/exception/TimeoutException.java new file mode 100644 index 00000000..35adc152 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/client/exception/TimeoutException.java @@ -0,0 +1,15 @@ +package nu.marginalia.wmsa.client.exception; + +public class TimeoutException extends MessagingException { + public TimeoutException() { + } + public TimeoutException(String message) { + super(message); + } + public TimeoutException(Throwable cause) { + super(cause); + } + public TimeoutException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/MainClass.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/MainClass.java new file mode 100644 index 00000000..ab347f46 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/MainClass.java @@ -0,0 +1,56 @@ +package nu.marginalia.wmsa.configuration; + +import io.prometheus.client.hotspot.DefaultExports; +import io.reactivex.rxjava3.exceptions.UndeliverableException; +import io.reactivex.rxjava3.plugins.RxJavaPlugins; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.client.exception.NetworkException; +import org.mariadb.jdbc.Driver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.SocketTimeoutException; +import java.net.UnknownHostException; +import java.util.Arrays; + +public abstract class MainClass { + private Logger logger = LoggerFactory.getLogger(getClass()); + + public MainClass() { + + RxJavaPlugins.setErrorHandler(ex -> { + if (ex instanceof UndeliverableException) { + ex = ex.getCause(); + } + + if (ex instanceof SocketTimeoutException) { + logger.warn("SocketTimeoutException"); + } + else if (ex instanceof UnknownHostException) { + logger.warn("UnknownHostException"); + } + else if (ex instanceof NetworkException) { + logger.warn("NetworkException", ex); + } + else { + logger.error("Uncaught exception", ex); + } + }); + + } + + @SneakyThrows + protected static void init(ServiceDescriptor service, String... args) { + System.setProperty("log4j2.isThreadContextMapInheritable", "true"); + System.setProperty("isThreadContextMapInheritable", "true"); + System.setProperty("service-name", service.name); + + org.mariadb.jdbc.Driver driver = new Driver(); + + if (Arrays.asList(args).contains("go-no-go")) { + System.setProperty("go-no-go", "true"); + } + DefaultExports.initialize(); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/ServiceDescriptor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/ServiceDescriptor.java new file mode 100644 index 00000000..9bfe2a2b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/ServiceDescriptor.java @@ -0,0 +1,101 @@ +package nu.marginalia.wmsa.configuration; + +import nu.marginalia.wmsa.auth.AuthMain; +import nu.marginalia.wmsa.auth.api.ApiMain; +import nu.marginalia.wmsa.configuration.command.Command; +import nu.marginalia.wmsa.configuration.command.ListCommand; +import nu.marginalia.wmsa.configuration.command.StartCommand; +import nu.marginalia.wmsa.configuration.command.VersionCommand; +import nu.marginalia.wmsa.data_store.DataStoreMain; +import nu.marginalia.wmsa.edge.archive.EdgeArchiveMain; +import nu.marginalia.wmsa.edge.assistant.EdgeAssistantMain; +import nu.marginalia.wmsa.edge.crawler.EdgeCrawlerMain; +import nu.marginalia.wmsa.edge.dating.DatingMain; +import nu.marginalia.wmsa.edge.director.EdgeDirectorMain; +import nu.marginalia.wmsa.edge.index.EdgeIndexMain; +import nu.marginalia.wmsa.edge.search.EdgeSearchMain; +import nu.marginalia.wmsa.memex.MemexMain; +import nu.marginalia.wmsa.podcasts.PodcastScraperMain; +import nu.marginalia.wmsa.renderer.RendererMain; +import nu.marginalia.wmsa.resource_store.ResourceStoreMain; +import nu.marginalia.wmsa.smhi.scraper.SmhiScraperMain; +import org.apache.logging.log4j.core.lookup.MainMapLookup; + +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public enum ServiceDescriptor { + RESOURCE_STORE("resource-store", 5000, ResourceStoreMain.class), + DATA_STORE("data-store", 5001, DataStoreMain.class), + RENDERER("renderer", 5002, RendererMain.class), + AUTH("auth", 5003, AuthMain.class), + API("api", 5004, ApiMain.class), + + SMHI_SCRAPER("smhi-scraper",5012, SmhiScraperMain.class), + PODCST_SCRAPER("podcast-scraper", 5013, PodcastScraperMain.class), + + EDGE_CRAWLER("edge-crawler", 5020, EdgeCrawlerMain.class), + EDGE_INDEX("edge-index", 5021, EdgeIndexMain.class), + EDGE_DIRECTOR("edge-director", 5022, EdgeDirectorMain.class), + EDGE_SEARCH("edge-search", 5023, EdgeSearchMain.class), + EDGE_ARCHIVE("edge-archive", 5024, EdgeArchiveMain.class), + EDGE_ASSISTANT("edge-assistant", 5025, EdgeAssistantMain.class), + + EDGE_MEMEX("memex", 5030, MemexMain.class), + + DATING("dating", 5070, DatingMain.class), + + TEST_1("test-1", 0, null), + TEST_2("test-2", 0, null); + + public static ServiceDescriptor byName(String name) { + for (var v : values()) { + if (v.name.equals(name)) { + return v; + } + } + throw new IllegalArgumentException(name); + } + public final String name; + public final Class mainClass; + public final int port; + + ServiceDescriptor(String name, int port, Class mainClass) { + this.name = name; + this.port = port; + this.mainClass = mainClass; + } + + public String toString() { + return name; + } + + public String describeService() { + return String.format("%s %s", name, mainClass.getName()); + } + + public static void main(String... args) { + + MainMapLookup.setMainArguments(args); + Map functions = Stream.of(new ListCommand(), + new StartCommand(), + new VersionCommand() + ).collect(Collectors.toMap(c -> c.name, c -> c)); + + if(args.length > 0) { + functions.getOrDefault(args[0], new Command("") { + @Override + public void execute(String... args) { + System.err.println("Unknown command"); + System.exit(1); + } + }).execute(args); + } + else { + System.err.println("Usage: " + String.join("|", functions.keySet())); + System.exit(1); + } + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/WmsaHome.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/WmsaHome.java new file mode 100644 index 00000000..f749c9a6 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/WmsaHome.java @@ -0,0 +1,16 @@ +package nu.marginalia.wmsa.configuration; + +import java.nio.file.Files; +import java.nio.file.Path; + +public class WmsaHome { + private static final String DEFAULT = "/var/lib/wmsa"; + + public static Path get() { + var ret = Path.of(System.getProperty("WMSA_HOME", DEFAULT)); + if (!Files.isDirectory(ret)) { + throw new IllegalStateException("Could not find WMSA_HOME, either set environment variable or ensure " + DEFAULT + " exists"); + } + return ret; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/command/Command.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/command/Command.java new file mode 100644 index 00000000..5267045f --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/command/Command.java @@ -0,0 +1,31 @@ +package nu.marginalia.wmsa.configuration.command; + +import nu.marginalia.wmsa.configuration.ServiceDescriptor; + +import java.util.Arrays; +import java.util.Objects; + +public abstract class Command { + public final String name; + + protected Command(String name) { + this.name = name; + } + + public abstract void execute(String... args); + + static ServiceDescriptor getKind(String arg) { + + try { + return Arrays.stream(ServiceDescriptor.values()) + .filter(sd -> Objects.equals(arg, sd.name)) + .findFirst() + .orElseThrow(IllegalArgumentException::new) + ; + } catch (IllegalArgumentException ex) { + System.err.println("Unknown service '" + arg + "'"); + System.exit(1); + } + return null; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/command/ListCommand.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/command/ListCommand.java new file mode 100644 index 00000000..0bd2c3eb --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/command/ListCommand.java @@ -0,0 +1,23 @@ +package nu.marginalia.wmsa.configuration.command; + +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; + +import java.util.Arrays; +import java.util.Objects; + +public class ListCommand extends Command { + public ListCommand() { + super("list"); + } + + @Override + @SneakyThrows + public void execute(String... args) { + Arrays.stream(ServiceDescriptor.values()) + .filter(sd -> Objects.nonNull(sd.mainClass)) + .map(ServiceDescriptor::describeService) + .forEach(System.out::println); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/command/StartCommand.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/command/StartCommand.java new file mode 100644 index 00000000..55d46813 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/command/StartCommand.java @@ -0,0 +1,24 @@ +package nu.marginalia.wmsa.configuration.command; + +import lombok.SneakyThrows; + +import java.util.Arrays; + +public class StartCommand extends Command { + public StartCommand() { + super("start"); + } + + @Override + @SneakyThrows + public void execute(String... args) { + if (args.length < 2) { + System.err.println("Usage: start service-descriptor"); + System.exit(255); + } + + var mainMethod = getKind(args[1]).mainClass.getMethod("main", String[].class); + String[] args2 = Arrays.copyOfRange(args, 2, args.length); + mainMethod.invoke(null, (Object) args2); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/command/VersionCommand.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/command/VersionCommand.java new file mode 100644 index 00000000..179a0300 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/command/VersionCommand.java @@ -0,0 +1,20 @@ +package nu.marginalia.wmsa.configuration.command; + +import lombok.SneakyThrows; + +public class VersionCommand extends Command { + public VersionCommand() { + super("version"); + } + + @Override + @SneakyThrows + public void execute(String... args) { + try (var str = ClassLoader.getSystemResourceAsStream("_version.txt")) { + if (null == str) { + System.err.println("Bad jar, missing _version.txt"); + return; + } + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/ConfigurationModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/ConfigurationModule.java new file mode 100644 index 00000000..9f762cb1 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/ConfigurationModule.java @@ -0,0 +1,45 @@ +package nu.marginalia.wmsa.configuration.module; + +import com.google.inject.AbstractModule; +import com.google.inject.Provides; +import com.google.inject.Singleton; +import com.google.inject.name.Named; +import lombok.SneakyThrows; + +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.Objects; + +import static com.google.inject.name.Names.named; + +public class ConfigurationModule extends AbstractModule { + private static final String SERVICE_NAME = System.getProperty("service-name"); + public static int MONITOR_PORT = Integer.getInteger("monitor.port", 5000); + public static String MONITOR_HOST = System.getProperty("monitor.host", "127.0.0.1"); + + public void configure() { + bind(Integer.class).annotatedWith(named("monitor-port")).toInstance(MONITOR_PORT); + bind(String.class).annotatedWith(named("monitor-host")).toInstance(MONITOR_HOST); + bind(Integer.class).annotatedWith(named("monitor-boot-timeout")).toInstance(10); + + bind(String.class).annotatedWith(named("service-name")).toInstance(Objects.requireNonNull(SERVICE_NAME)); + bind(String.class).annotatedWith(named("service-host")).toProvider(HostnameProvider.class).in(Singleton.class); + bind(Integer.class).annotatedWith(named("service-port")).toProvider(PortProvider.class).in(Singleton.class); + bind(Integer.class).annotatedWith(named("metrics-server-port")).toProvider(MetricsPortProvider.class).in(Singleton.class); + + } + + @Provides + @Named("build-version") + @SneakyThrows + public String buildVersion() { + try (var str = ClassLoader.getSystemResourceAsStream("_version.txt")) { + if (null == str) { + System.err.println("Missing _version.txt from classpath"); + return LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); + } + return new String(str.readAllBytes()); + } + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/DatabaseModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/DatabaseModule.java new file mode 100644 index 00000000..8ce96c3a --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/DatabaseModule.java @@ -0,0 +1,114 @@ +package nu.marginalia.wmsa.configuration.module; + +import com.google.inject.AbstractModule; +import com.google.inject.Provides; +import com.google.inject.Singleton; +import com.zaxxer.hikari.HikariConfig; +import com.zaxxer.hikari.HikariDataSource; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.WmsaHome; +import org.h2.tools.RunScript; +import org.mariadb.jdbc.Driver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Properties; + +public class DatabaseModule extends AbstractModule { + private static final Logger logger = LoggerFactory.getLogger(DatabaseModule.class); + + private static final String DB_USER_KEY="db.user"; + private static final String DB_PASS_KEY ="db.pass"; + private static final String DB_CONN_KEY ="db.conn"; + + private final Properties dbProperties; + + public DatabaseModule() { + new Driver(); + + dbProperties = loadDbProperties(); + } + + private Properties loadDbProperties() { + Path propDir = WmsaHome.get().resolve("db.properties"); + if (!Files.isRegularFile(propDir)) { + throw new IllegalStateException("Database properties file " + propDir + " does not exist"); + } + + try (var is = new FileInputStream(propDir.toFile())) { + var props = new Properties(); + props.load(is); + + if (!props.containsKey(DB_USER_KEY)) throw new IllegalStateException(propDir + " missing required attribute " + DB_USER_KEY); + if (!props.containsKey(DB_PASS_KEY)) throw new IllegalStateException(propDir + " missing required attribute " + DB_PASS_KEY); + if (!props.containsKey(DB_CONN_KEY)) throw new IllegalStateException(propDir + " missing required attribute " + DB_CONN_KEY); + + return props; + } + catch (IOException ex) { + throw new RuntimeException(ex); + } + + } + + @SneakyThrows + @Singleton + @Provides + public HikariDataSource provideConnection() { + if (Boolean.getBoolean("data-store-h2")) { + return getH2(); + } + else { + return getMariaDB(); + } + + } + + @SneakyThrows + private HikariDataSource getMariaDB() { + var connStr = dbProperties.getProperty(DB_CONN_KEY); + + try { + HikariConfig config = new HikariConfig(); + + + config.setJdbcUrl(connStr); + config.setUsername(dbProperties.getProperty(DB_USER_KEY)); + config.setPassword(dbProperties.getProperty(DB_PASS_KEY)); + + config.addDataSourceProperty("cachePrepStmts", "true"); + config.addDataSourceProperty("prepStmtCacheSize", "250"); + config.addDataSourceProperty("prepStmtCacheSqlLimit", "2048"); + config.setMaximumPoolSize(100); + config.setMinimumIdle(10); + return new HikariDataSource(config); + } + finally { + logger.info("Created HikariPool for {}", connStr); + } + } + + + @SneakyThrows + private HikariDataSource getH2() { + HikariConfig config = new HikariConfig(); + config.setJdbcUrl("jdbc:h2:~/wmsa-db"); + config.setUsername("wmsa"); + config.setPassword(""); + + var ds = new HikariDataSource(config); + + try (var stream = ClassLoader.getSystemResourceAsStream("sql/data-store-init.sql")) { + RunScript.execute(ds.getConnection(), new InputStreamReader(stream)); + } + try (var stream = ClassLoader.getSystemResourceAsStream("sql/edge-crawler-cache.sql")) { + RunScript.execute(ds.getConnection(), new InputStreamReader(stream)); + } + return ds; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/HostnameProvider.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/HostnameProvider.java new file mode 100644 index 00000000..86276f91 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/HostnameProvider.java @@ -0,0 +1,36 @@ +package nu.marginalia.wmsa.configuration.module; + +import com.google.inject.name.Named; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.inject.Inject; +import javax.inject.Provider; + +public class HostnameProvider implements Provider { + private static final String DEFAULT_HOSTNAME = "127.0.0.1"; + private final int monitorPort; + private final String monitorHost; + private final int timeout; + private Logger logger = LoggerFactory.getLogger(getClass()); + + @Inject + public HostnameProvider(@Named("monitor-port") Integer monitorPort, + @Named("monitor-host") String monitorHost, + @Named("monitor-boot-timeout") Integer timeout + ) { + this.monitorHost = monitorHost; + this.monitorPort = monitorPort; + this.timeout = timeout; + } + + @Override + public String get() { + var override = System.getProperty("service-host"); + if (null != override) { + return override; + } + return DEFAULT_HOSTNAME; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/LoggerConfiguration.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/LoggerConfiguration.java new file mode 100644 index 00000000..9557b433 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/LoggerConfiguration.java @@ -0,0 +1,13 @@ +package nu.marginalia.wmsa.configuration.module; + +import com.google.inject.name.Named; + +import javax.inject.Inject; + +public class LoggerConfiguration { + @Inject + public LoggerConfiguration(@Named("service-name") String serviceName) { + System.setProperty("service-name", serviceName); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/MetricsPortProvider.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/MetricsPortProvider.java new file mode 100644 index 00000000..cca5bbfc --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/MetricsPortProvider.java @@ -0,0 +1,21 @@ +package nu.marginalia.wmsa.configuration.module; + +import com.google.inject.name.Named; + +import javax.inject.Inject; +import javax.inject.Provider; + +public class MetricsPortProvider implements Provider { + private final Integer servicePort; + + @Inject + public MetricsPortProvider(@Named("service-port") Integer servicePort) { + this.servicePort = servicePort; + } + + @Override + public Integer get() { + return servicePort+1000; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/PortProvider.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/PortProvider.java new file mode 100644 index 00000000..090bc46d --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/PortProvider.java @@ -0,0 +1,46 @@ +package nu.marginalia.wmsa.configuration.module; + +import com.google.inject.name.Named; +import io.reactivex.rxjava3.core.Flowable; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import org.apache.http.HttpResponse; +import org.reactivestreams.Publisher; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.inject.Inject; +import javax.inject.Provider; +import java.io.IOException; +import java.util.concurrent.TimeUnit; + +public class PortProvider implements Provider { + private static final Integer DEFAULT_PORT = 5000; + private final int monitorPort; + private final String monitorHost; + private Logger logger = LoggerFactory.getLogger(getClass()); + private final int timeout = 10; + @Inject + public PortProvider(@Named("monitor-port") Integer monitorPort, + @Named("monitor-host") String monitorHost, + @Named("monitor-boot-timeout") Integer timeout) { + this.monitorHost = monitorHost; + this.monitorPort = monitorPort; + } + + @Override + public Integer get() { + return ServiceDescriptor.byName(System.getProperty("service-name")).port; + } + + private Publisher repeatDelay(Flowable error) { + return error.delay(1, TimeUnit.SECONDS); + } + + private String accept200(HttpResponse rsp) throws IOException { + if (rsp.getStatusLine().getStatusCode() != 200) { + throw new RuntimeException("Monitor responded unexpected status " + + rsp.getStatusLine().getStatusCode()); + } + return new String(rsp.getEntity().getContent().readAllBytes()); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/Context.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/Context.java new file mode 100644 index 00000000..b98e5d3b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/Context.java @@ -0,0 +1,139 @@ +package nu.marginalia.wmsa.configuration.server; + +import io.reactivex.rxjava3.schedulers.Schedulers; +import org.apache.logging.log4j.ThreadContext; +import spark.Request; + +import java.util.*; +import java.util.concurrent.TimeUnit; + +public class Context { + public static final String CONTEXT_HEADER = "X-Context"; + public static final String SESSION_HEADER = "Cookie"; + public static final String PUBLIC_HEADER = "X-Public"; + private static final Random random; + + private static volatile byte[] seed = new byte[12]; + + private static byte[] generateSalt() { + byte[] oldHash = seed; + + int hash1 = Long.hashCode(random.nextLong()); + int hash2 = Objects.hash(System.currentTimeMillis()); + int hash3 = Arrays.hashCode(oldHash); + + return new byte[]{ + (byte) (hash1 & 0xFF), + (byte) (hash1 >>> 8 & 0xFF), + (byte) (hash1 >>> 16 & 0xFF), + (byte) (hash1 >>> 24 & 0xFF), + (byte) (hash2 & 0xFF), + (byte) (hash2 >>> 8 & 0xFF), + (byte) (hash2 >>> 16 & 0xFF), + (byte) (hash2 >>> 24 & 0xFF), + (byte) (hash3 & 0xFF), + (byte) (hash3 >>> 8 & 0xFF), + (byte) (hash3 >>> 16 & 0xFF), + (byte) (hash3 >>> 24 & 0xFF) + }; + } + + static { + random = new Random(); + for (int i = 0; i < 1_000_000; i++) { + random.nextLong(); + } + random.nextBytes(seed); + + updateSeed(); + } + + private static void updateSeed() { + seed = generateSalt(); + + Schedulers.computation().scheduleDirect(Context::updateSeed, + 60*5000 + (int)(1000*60*10*Math.random()), + TimeUnit.MILLISECONDS); + } + + private String id; + private String session; + private boolean treatAsPublic; + + private Context(String id, String session) { + this.id = id; + this.session = session; + } + + public Context treatAsPublic() { + this.treatAsPublic = true; + return this; + } + + public static Context internal() { + return new Context(UUID.randomUUID().toString(), null); + } + public static Context internal(String hwat) { + return new Context(hwat, null); + } + + public static Context fromRequest(Request request) { + + if (Boolean.getBoolean("unit-test")) { + return Context.internal(); + } + + final var ctxHeader = hashPublicIp(request.headers(CONTEXT_HEADER)); + final var sessHeader = request.headers(SESSION_HEADER); + + ThreadContext.put("context", ctxHeader+"-"+sessHeader); + ThreadContext.put("outbound-request", "none"); + + return new Context(ctxHeader, sessHeader); + } + + private static String hashPublicIp(String header) { + + if (header != null && header.contains("-")) { + + byte[] hashData = Arrays.copyOf(seed, seed.length+4); + int hashi = Objects.hash(header.split("-", 2)[0]); + + for (int i = 0; i < 4; i++) { + hashData[seed.length] = (byte)(hashi & 0xFF); + hashData[seed.length+1] = (byte)(hashi>>>8 & 0xFF); + hashData[seed.length+2] = (byte)(hashi>>>16 & 0xFF); + hashData[seed.length+3] = (byte)(hashi>>>24 & 0xFF); + } + + return String.format("#%x", Arrays.hashCode(hashData)); + } + else { + return header; + } + } + + public okhttp3.Request.Builder paint(okhttp3.Request.Builder requestBuilder) { + requestBuilder.addHeader(CONTEXT_HEADER, id); + + if (session != null) { + requestBuilder.addHeader(SESSION_HEADER, session); + } + + if (treatAsPublic) { + requestBuilder.header(PUBLIC_HEADER, "1"); + } + + return requestBuilder; + } + + public Optional getIpHash() { + + if (id.startsWith("#")) { + return Optional.of(id); + } + + return Optional.empty(); + } + +} \ No newline at end of file diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/Initialization.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/Initialization.java new file mode 100644 index 00000000..6b146672 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/Initialization.java @@ -0,0 +1,47 @@ +package nu.marginalia.wmsa.configuration.server; + +import com.google.inject.Singleton; +import lombok.SneakyThrows; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Singleton +public class Initialization { + boolean initialized; + private final Logger logger = LoggerFactory.getLogger(getClass()); + + public static Initialization already() { + Initialization init = new Initialization(); + init.setReady(); + return init; + } + + public void setReady() { + synchronized (this) { + logger.info("Initialized"); + initialized = true; + notifyAll(); + } + + if (Boolean.getBoolean("go-no-go")) { + logger.info("Self-test OK"); + System.exit(0); + } + } + + public boolean isReady() { + synchronized (this) { + return initialized; + } + } + + @SneakyThrows + public boolean waitReady() { + synchronized (this) { + while (!initialized) { + wait(); + } + return initialized; + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/MetricsServer.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/MetricsServer.java new file mode 100644 index 00000000..c8da5e97 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/MetricsServer.java @@ -0,0 +1,25 @@ +package nu.marginalia.wmsa.configuration.server; + +import com.google.inject.Inject; +import com.google.inject.name.Named; +import io.prometheus.client.exporter.MetricsServlet; +import lombok.SneakyThrows; +import org.eclipse.jetty.server.Server; +import org.eclipse.jetty.servlet.ServletContextHandler; +import org.eclipse.jetty.servlet.ServletHolder; + +public class MetricsServer { + + @SneakyThrows + @Inject + public MetricsServer(@Named("metrics-server-port") int port) { + Server server = new Server(port); + ServletContextHandler context = new ServletContextHandler(); + context.setContextPath("/"); + server.setHandler(context); + + context.addServlet(new ServletHolder(new MetricsServlet()), "/metrics"); + + server.start(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/RateLimiter.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/RateLimiter.java new file mode 100644 index 00000000..4dc4c8da --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/RateLimiter.java @@ -0,0 +1,71 @@ +package nu.marginalia.wmsa.configuration.server; + +import io.github.bucket4j.Bandwidth; +import io.github.bucket4j.Bucket; +import io.github.bucket4j.Bucket4j; +import io.github.bucket4j.Refill; +import io.reactivex.rxjava3.schedulers.Schedulers; + +import java.time.Duration; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; + +public class RateLimiter { + + private final Map bucketMap = new ConcurrentHashMap<>(); + + private final int capacity; + private final int refillRate; + + public RateLimiter(int capacity, int refillRate) { + this.capacity = capacity; + this.refillRate = refillRate; + + Schedulers.io().schedulePeriodicallyDirect(this::cleanIdleBuckets, 30, 30, TimeUnit.MINUTES); + } + + + public static RateLimiter forExpensiveRequest() { + return new RateLimiter(5, 10); + } + + public static RateLimiter custom(int perMinute) { + return new RateLimiter(perMinute, 60); + } + + public static RateLimiter forSpamBots() { + return new RateLimiter(120, 3600); + } + + + public static RateLimiter forLogin() { + return new RateLimiter(3, 15); + } + + private void cleanIdleBuckets() { + bucketMap.clear(); + } + + public boolean isAllowed(Context ctx) { + final Optional maybeIp = ctx.getIpHash(); + + if (maybeIp.isEmpty()) { // Internal server->server request + return true; + } + + return bucketMap.computeIfAbsent(maybeIp.get(), + (ip) -> createBucket()).tryConsume(1); + } + + public boolean isAllowed() { + return bucketMap.computeIfAbsent("any", + (ip) -> createBucket()).tryConsume(1); + } + private Bucket createBucket() { + var refill = Refill.greedy(1, Duration.ofSeconds(refillRate)); + var bw = Bandwidth.classic(capacity, refill); + return Bucket4j.builder().addLimit(bw).build(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/Service.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/Service.java new file mode 100644 index 00000000..c9f618da --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/Service.java @@ -0,0 +1,146 @@ +package nu.marginalia.wmsa.configuration.server; + +import com.google.common.base.Strings; +import io.prometheus.client.Counter; +import nu.marginalia.wmsa.client.exception.MessagingException; +import org.apache.http.HttpStatus; +import org.apache.logging.log4j.ThreadContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.slf4j.Marker; +import org.slf4j.MarkerFactory; +import spark.Request; +import spark.Response; +import spark.Spark; + +import java.util.Optional; + +public class Service { + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final Marker httpMarker = MarkerFactory.getMarker("HTTP"); + + private final Initialization initialization; + + private final static Counter request_counter = Counter.build("wmsa_service_in_request_counter", "Request Counter") + .labelNames("service") + .register(); + private final static Counter request_counter_good = Counter.build("wmsa_service_good_request_counter", "Good Requests") + .labelNames("service") + .register(); + private final static Counter request_counter_bad = Counter.build("wmsa_service_bad_request_counter", "Bad Requests") + .labelNames("service") + .register(); + private final static Counter request_counter_err = Counter.build("wmsa_service_error_request_counter", "Error Requests") + .labelNames("service") + .register(); + private final String serviceName; + + private static volatile boolean initialized = false; + + public Service(String ip, int port, Initialization initialization, MetricsServer metricsServer) { + this.initialization = initialization; + + serviceName = System.getProperty("service-name"); + + if (!initialization.isReady() && ! initialized ) { + initialized = true; + + Spark.threadPool(32, 4, 60_000); + Spark.ipAddress(ip); + Spark.port(port); + + logger.info("{} Listening to {}:{}", getClass().getSimpleName(), ip == null ? "" : ip, port); + + Spark.staticFiles.expireTime(3600); + Spark.staticFiles.header("Cache-control", "public"); + + Spark.before(this::filterPublicRequests); + Spark.before(this::auditRequestIn); + Spark.after(this::auditRequestOut); + Spark.exception(MessagingException.class, this::handleException); + + Spark.get("/internal/ping", (rq,rp) -> "pong"); + Spark.get("/internal/started", this::isInitialized); + Spark.get("/internal/ready", this::isReady); + Spark.get("/public/who", (rq,rp) -> getClass().getSimpleName()); + } + } + + private void filterPublicRequests(Request request, Response response) { + if (null != request.headers("X-Public")) { + + String context = Optional + .ofNullable(request.headers("X-Context")) + .orElseGet(request::ip); + + if (!request.pathInfo().startsWith("/public/")) { + logger.warn(httpMarker, "External connection to internal API: {} -> {} {}", context, request.requestMethod(), request.pathInfo()); + Spark.halt(HttpStatus.SC_FORBIDDEN); + } + + String url = request.pathInfo(); + if (request.queryString() != null) { + url = url + "?" + request.queryString(); + } + logger.info(httpMarker, "PUBLIC {}: {} {}", Context.fromRequest(request).getIpHash().orElse("?"), request.requestMethod(), url); + } + } + + private Object isInitialized(Request request, Response response) { + if (initialization.isReady()) { + return "ok"; + } + else { + response.status(HttpStatus.SC_FAILED_DEPENDENCY); + return "bad"; + } + } + + public boolean isReady() { + return true; + } + + private String isReady(Request request, Response response) { + if (isReady()) { + return "ok"; + } + else { + response.status(HttpStatus.SC_FAILED_DEPENDENCY); + return "bad"; + } + } + + private void auditRequestIn(Request request, Response response) { + request_counter.labels(serviceName).inc(); + + // Paint context + if (!Strings.isNullOrEmpty(request.headers(Context.CONTEXT_HEADER))) { + Context.fromRequest(request); + } + } + private void auditRequestOut(Request request, Response response) { + ThreadContext.clearMap(); + + if (response.status() < 400) { + request_counter_good.labels(serviceName).inc(); + } + else { + request_counter_bad.labels(serviceName).inc(); + } + + if (null != request.headers("X-Public")) { + logger.info(httpMarker, "RSP {}", response.status()); + } + } + + private void handleException(Exception ex, Request request, Response response) { + request_counter_err.labels(serviceName).inc(); + if (ex instanceof MessagingException) { + logger.error("{} {}", ex.getClass().getSimpleName(), ex.getMessage()); + } + else { + logger.error("Uncaught exception", ex); + } + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/data_store/DataStoreMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/data_store/DataStoreMain.java new file mode 100644 index 00000000..56c61293 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/data_store/DataStoreMain.java @@ -0,0 +1,35 @@ +package nu.marginalia.wmsa.data_store; + +import com.google.inject.Guice; +import com.google.inject.Inject; +import com.google.inject.Injector; +import nu.marginalia.wmsa.configuration.MainClass; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.module.ConfigurationModule; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.edge.index.EdgeTablesModule; + +import java.io.IOException; + +public class DataStoreMain extends MainClass { + private DataStoreService service; + + @Inject + public DataStoreMain(DataStoreService service) throws IOException { + this.service = service; + } + + public static void main(String... args) { + init(ServiceDescriptor.DATA_STORE, args); + Injector injector = Guice.createInjector( + new DataStoreModule(), + new EdgeTablesModule(), + new DatabaseModule(), + new ConfigurationModule() + ); + injector.getInstance(DataStoreMain.class); + + injector.getInstance(Initialization.class).setReady(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/data_store/DataStoreModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/data_store/DataStoreModule.java new file mode 100644 index 00000000..185b16c5 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/data_store/DataStoreModule.java @@ -0,0 +1,14 @@ +package nu.marginalia.wmsa.data_store; + +import com.google.inject.AbstractModule; +import com.google.inject.name.Names; + +public class DataStoreModule extends AbstractModule { + + public void configure() { + bind(String.class).annotatedWith(Names.named("file-storage-dir")).toInstance("/tmp/files"); + bind(String.class).annotatedWith(Names.named("distro-file-name")).toInstance("wmsa.jar"); + bind(String.class).annotatedWith(Names.named("file-tmp-dir")).toInstance("/tmp"); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/data_store/DataStoreService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/data_store/DataStoreService.java new file mode 100644 index 00000000..3aa8799a --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/data_store/DataStoreService.java @@ -0,0 +1,152 @@ +package nu.marginalia.wmsa.data_store; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.inject.Inject; +import com.google.inject.name.Named; +import com.zaxxer.hikari.HikariDataSource; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.configuration.server.MetricsServer; +import nu.marginalia.wmsa.configuration.server.Service; +import org.eclipse.jetty.http.HttpStatus; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Request; +import spark.Response; +import spark.Spark; + +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.List; + +import static spark.Spark.*; + +public class DataStoreService extends Service { + private final HikariDataSource dataSource; + private final EdgeDataStoreService edgeService; + private Logger logger = LoggerFactory.getLogger(getClass()); + private final Gson gson = new GsonBuilder().create(); + + @Inject + public DataStoreService( + @Named("service-host") String ip, + @Named("service-port") Integer port, + FileRepository fileRepo, + HikariDataSource dataSource, + EdgeDataStoreService edgeService, + Initialization init, + MetricsServer metricsServer + ) { + super(ip, port, init, metricsServer); + + this.dataSource = dataSource; + this.edgeService = edgeService; + + Spark.get("data/:domain/:model/:resource", this::getResource); + Spark.get("data/:domain/:model", this::getResourceIdsForModel, this::convertToJson); + post("data/:domain/:model/:resource", this::storeResource); + + post("release", fileRepo::uploadFile); + Spark.get("release", fileRepo::downloadFile); + Spark.get("release/upload", fileRepo::uploadForm); + Spark.get("release/version", fileRepo::version); + + Spark.path("edge", () -> { + post("/domain-alias/*/*", edgeService::putDomainAlias, this::convertToJson); + post("/link", edgeService::putLink, this::convertToJson); + + post("/url", edgeService::putUrl, this::convertToJson); + post("/url-visited", edgeService::putUrlVisited, this::convertToJson); + get("/url/:id", edgeService::getUrlName, this::convertToJson); + get("/domain-id/*", edgeService::getDomainId, this::convertToJson); + get("/domain/:id", edgeService::getDomainName, this::convertToJson); + get("/meta/:site", edgeService::domainInfo, this::convertToJson); + + }); + + + } + + private String convertToJson(Object o) { + return gson.toJson(o); + } + + @SneakyThrows + private Object getResourceIdsForModel(Request request, Response response) { + try (var connection = dataSource.getConnection()) { + + String model = request.params("model"); + String domain = request.params("domain"); + + String query = String.format("SELECT ID FROM JSON_DATA WHERE MODEL='%s' AND DOM='%s'", + model, domain); + + + List ids = new ArrayList<>(); + try (var stmt = connection.createStatement()) { + var rs = stmt.executeQuery(query); + while (rs.next()) { + ids.add(rs.getString(1)); + } + } + + return ids; + } + } + + + @SneakyThrows + private Object getResource(Request request, Response response) { + try (var connection = dataSource.getConnection()) { + + String resource = request.params("resource"); + String model = request.params("model"); + String domain = request.params("domain"); + + String query = String.format("SELECT DATA FROM JSON_DATA WHERE ID='%s' AND MODEL='%s' AND DOM='%s'", + resource, model, domain); + + try (var stmt = connection.createStatement()) { + var rs = stmt.executeQuery(query); + if (!rs.next()) { + halt(404); + } + + rs.getAsciiStream(1).transferTo(response.raw().getOutputStream()); + + if (rs.next()) { + logger.warn("Duplicate data for {}/{}/{}", domain, model, resource); + } + } + + return ""; + } + } + + @SneakyThrows + private Object storeResource(Request request, Response response) { + try (var connection = dataSource.getConnection()) { + + String resource = request.params("resource"); + String model = request.params("model"); + String domain = request.params("domain"); + + try (var stmt = connection.prepareStatement("INSERT INTO JSON_DATA(dom, model, id, data) VALUES (?,?,?,?)")) { + stmt.setString(1, domain); + stmt.setString(2, model); + stmt.setString(3, resource); + stmt.setCharacterStream(4, new InputStreamReader(request.raw().getInputStream())); + + stmt.executeUpdate(); + + if (stmt.getUpdateCount() != 1) { + logger.warn("Query failed"); + halt(500); + } + } + halt(HttpStatus.ACCEPTED_202); + return null; + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/data_store/EdgeDataStoreService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/data_store/EdgeDataStoreService.java new file mode 100644 index 00000000..febbe8d3 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/data_store/EdgeDataStoreService.java @@ -0,0 +1,201 @@ +package nu.marginalia.wmsa.data_store; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.inject.Inject; +import com.google.inject.Singleton; +import io.prometheus.client.Histogram; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.data_store.meta.DomainInformation; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainLink; +import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlVisit; +import org.eclipse.jetty.http.HttpStatus; +import org.eclipse.jetty.util.UrlEncoded; +import spark.Request; +import spark.Response; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.URISyntaxException; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.zip.GZIPInputStream; + +@Singleton +public class EdgeDataStoreService { + + private final EdgeDataStoreDao dataStore; + private Gson gson = new GsonBuilder().create(); + + + static final Histogram request_time_metrics + = Histogram.build("wmsa_edge_data_store_request_time", "DB Request Time") + .labelNames("request") + .register(); + + @Inject + public EdgeDataStoreService(EdgeDataStoreDao dataStore) { + this.dataStore = dataStore; + } + + @SneakyThrows + public Object putLink(Request request, Response response) { + final long start = System.currentTimeMillis(); + + var model = readFromJson(request, EdgeDomainLink[].class); + + dataStore.putLink(false, model); + + request_time_metrics.labels("put_link").observe(System.currentTimeMillis() - start); + + response.status(HttpStatus.CREATED_201); + return ""; + } + + @SneakyThrows + public Object putUrl(Request request, Response response) { + final long start = System.currentTimeMillis(); + + var model = readFromJson(request, EdgeUrl[].class); + + double quality = Double.parseDouble(request.queryParams("quality")); + dataStore.putUrl(quality, model); + + request_time_metrics.labels("put_url").observe(System.currentTimeMillis() - start); + + response.status(HttpStatus.CREATED_201); + return ""; + } + + @SneakyThrows + public Object putUrlVisited(Request request, Response response) { + final long start = System.currentTimeMillis(); + + var model = readFromJson(request, EdgeUrlVisit[].class); + + dataStore.putUrlVisited(model); + + request_time_metrics.labels("put_url_visited").observe(System.currentTimeMillis() - start); + + response.status(HttpStatus.CREATED_201); + return ""; + } + + public Object putDomainAlias(Request request, Response response) { + final long start = System.currentTimeMillis(); + + var src = UrlEncoded.decodeString(request.splat()[0]); + var dst = UrlEncoded.decodeString(request.splat()[1]); + + dataStore.putDomainAlias(new EdgeDomain(src), new EdgeDomain(dst)); + + request_time_metrics.labels("put_domain_alias").observe(System.currentTimeMillis() - start); + + response.status(HttpStatus.ACCEPTED_202); + return ""; + } + + public Object getUrlName(Request request, Response response) { + final long start = System.currentTimeMillis(); + + try { + var id = Integer.parseInt(request.params("id")); + var ret = dataStore.getUrl(new EdgeId<>(id)); + + request_time_metrics.labels("get_url_name").observe(System.currentTimeMillis() - start); + return ret; + } + catch (NoSuchElementException ex) { + response.status(404); + return ""; + } + + } + + public Object getDomainId(Request request, Response response) { + final long start = System.currentTimeMillis(); + + var domain = UrlEncoded.decodeString(request.splat()[0]); + + try { + var ret = dataStore.getDomainId(new EdgeDomain(domain)); + + request_time_metrics.labels("get_domain_id").observe(System.currentTimeMillis() - start); + return ret; + } + catch (NoSuchElementException ex) { + response.status(404); + return ""; + } + + } + + public Object getDomainName(Request request, Response response) { + final long start = System.currentTimeMillis(); + + try { + var id = Integer.parseInt(request.params("id")); + var ret = dataStore.getDomain(new EdgeId<>(id)); + + request_time_metrics.labels("get_domain_name").observe(System.currentTimeMillis() - start); + return ret; + } + catch (NoSuchElementException ex) { + response.status(404); + return ""; + } + + } + + public T readFromJson(Request request, Class clazz) throws IOException { + if ("gzip".equals(request.headers("Content-Encoding"))) { + return gson.fromJson(new InputStreamReader(new GZIPInputStream(request.raw().getInputStream())), clazz); + } + else { + return gson.fromJson(new InputStreamReader(request.raw().getInputStream()), clazz); + } + } + + + public DomainInformation domainInfo(Request request, Response response) throws URISyntaxException { + final String site = request.params("site"); + + EdgeId domainId = getDomainFromPartial(site); + if (domainId == null) { + response.status(404); + return null; + } + EdgeDomain domain = dataStore.getDomain(domainId); + + boolean blacklisted = dataStore.isBlacklisted(domain); + int pagesKnown = dataStore.getPagesKnown(domainId); + int pagesVisited = dataStore.getPagesVisited(domainId); + int pagesIndexed = dataStore.getPagesIndexed(domainId); + int incomingLinks = dataStore.getIncomingLinks(domainId); + int outboundLinks = dataStore.getOutboundLinks(domainId); + double rank = Math.round(10000.0*(1.0-dataStore.getRank(domainId)))/100; + EdgeDomainIndexingState state = dataStore.getDomainState(domainId); + double nominalQuality = Math.round(100*100*Math.exp(dataStore.getDomainQuality(domainId)))/100.; + List linkingDomains = dataStore.getLinkingDomains(domainId); + + return new DomainInformation(domain, blacklisted, pagesKnown, pagesVisited, pagesIndexed, incomingLinks, outboundLinks, nominalQuality, rank, state, linkingDomains); + } + + private EdgeId getDomainFromPartial(String site) throws URISyntaxException { + try { + return dataStore.getDomainId(new EdgeDomain(site)); + } + catch (Exception ex) { + try { + return dataStore.getDomainId(new EdgeDomain(site)); + } + catch (Exception ex2) { + return null; + } + } + + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/data_store/FileRepository.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/data_store/FileRepository.java new file mode 100644 index 00000000..54c023fd --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/data_store/FileRepository.java @@ -0,0 +1,138 @@ +package nu.marginalia.wmsa.data_store; + +import com.google.inject.Inject; +import com.google.inject.name.Named; +import lombok.SneakyThrows; +import org.eclipse.jetty.http.HttpStatus; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Request; +import spark.Response; + +import javax.servlet.MultipartConfigElement; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.zip.ZipFile; + + +public class FileRepository { + @Inject @Named("file-storage-dir") + private String fileStoreDir; + + @Inject @Named("file-tmp-dir") + private String fileTempDir; + + @Inject @Named("distro-file-name") + private String distroFileName; + + private String version; + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + ReadWriteLock rwl = new ReentrantReadWriteLock(); + + @SneakyThrows + public Object uploadFile(Request request, Response response) { + + request.attribute("org.eclipse.jetty.multipartConfig", + new MultipartConfigElement(fileTempDir, Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE) + ); + + final var part = Objects.requireNonNull(request.raw().getPart("uploaded_file"), "Missing part"); + + var lock = rwl.writeLock(); + try (InputStream is = part.getInputStream()) { + lock.lock(); + + var tempPath + = Files.createTempFile(Path.of(fileStoreDir), + "upload-", ".jar"); + + var tempFile = tempPath.toFile(); + + try (var os = new FileOutputStream(tempFile)) { + is.transferTo(os); + } + + var oldVersion = Optional.ofNullable(version) + .orElseGet(() -> readJarVersion(getReleasePath().toFile())); + var newVersion = readJarVersion(tempFile); + + logger.info("Uploading new version {}, replacing {}", newVersion, oldVersion); + + Files.move(tempPath, Path.of(fileStoreDir).resolve(distroFileName), + StandardCopyOption.ATOMIC_MOVE, + StandardCopyOption.REPLACE_EXISTING); + + version = newVersion; + } + finally { + lock.unlock(); + } + + response.status(HttpStatus.ACCEPTED_202); + return ""; + } + + @SneakyThrows + private String readJarVersion(File tempFile) { + try (var zipFile = new ZipFile(tempFile)) { + return new String(zipFile.getInputStream(zipFile.getEntry("_version.txt")).readAllBytes()); + } + } + + @SneakyThrows + public Object downloadFile(Request request, Response response) { + response.type("application/java-archive"); + response.header("Content-Disposition", "attachment; filename=" + distroFileName); + + Lock lock = rwl.readLock(); + try (var is = new FileInputStream(getReleasePath().toFile())) { + lock.lock(); + is.transferTo(response.raw().getOutputStream()); + } + finally { + lock.unlock(); + } + return ""; + }; + + private Path getReleasePath() { + return Path.of(fileStoreDir, distroFileName); + } + + @SneakyThrows + public synchronized Object version(Request request, Response response) { + Lock lock = rwl.readLock(); + try { + lock.lock(); + + if (null != version) { + return version; + } + + return readJarVersion(getReleasePath().toFile()); + } + finally { + lock.unlock(); + } + } + + public Object uploadForm(Request request, Response response) { + return "
    " // note the enctype + + " " + + " " + + "
    "; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/data_store/client/DataStoreClient.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/data_store/client/DataStoreClient.java new file mode 100644 index 00000000..6fec2ca8 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/data_store/client/DataStoreClient.java @@ -0,0 +1,106 @@ +package nu.marginalia.wmsa.data_store.client; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import io.reactivex.rxjava3.core.Observable; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.client.AbstractDynamicClient; +import nu.marginalia.wmsa.client.HttpStatusCode; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.data_store.meta.DomainInformation; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainLink; +import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlVisit; +import org.eclipse.jetty.util.UrlEncoded; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.CheckReturnValue; +import javax.inject.Inject; +import java.util.List; + +public class DataStoreClient extends AbstractDynamicClient { + private final Gson gson = new GsonBuilder() + .create(); + + private final Logger logger = LoggerFactory.getLogger(getClass()); + @Inject + public DataStoreClient() { + super(ServiceDescriptor.DATA_STORE); + } + + @CheckReturnValue + public Observable getJson(Context ctx, Class type, String domain, String resource) { + var route = "/data/"+domain+"/"+type.getSimpleName()+"/"+resource; + + return super.get(ctx, route, type); + } + + @CheckReturnValue + @SuppressWarnings("unchecked") + public Observable> getJsonIndicies(Context ctx, Class type, String domain) { + var route = "/data/"+domain+"/"+type.getSimpleName(); + + return super.get(ctx, route) + .map(data -> (List) gson.fromJson(data, List.class)) + ; + } + + @CheckReturnValue + @SneakyThrows + public Observable offerJson(Context ctx, Class type, T object, String domain, String resource) { + var route = "/data/"+domain+"/"+type.getSimpleName()+"/"+resource; + return super.post(ctx, route, object) + ; + } + @CheckReturnValue + @Deprecated + public Observable putLink(Context ctx, EdgeDomainLink... data) { + return super.post(ctx, "/edge/link", data); + } + @CheckReturnValue + @Deprecated + public Observable putUrl(Context ctx, double quality, EdgeUrl... data) { + return super.post(ctx, "/edge/url?quality="+quality, data); + } + @CheckReturnValue + @Deprecated + public Observable putUrlVisited(Context ctx, EdgeUrlVisit... data) { + return super.post(ctx, "/edge/url-visited", data); + } + @CheckReturnValue + @Deprecated + public Observable putDomainAlias(Context ctx, EdgeDomain source, EdgeDomain dest) { + var srcEnc = UrlEncoded.encodeString(source.toString()); + var dstEnc = UrlEncoded.encodeString(dest.toString()); + + return super.post(ctx, "/edge/domain-alias/" + srcEnc + "/" + dstEnc, ""); + } + + + + @CheckReturnValue + public Observable getUrl(Context ctx, EdgeId url) { + return super.get(ctx, "/edge/url/"+url.getId(), EdgeUrl.class); + } + + @CheckReturnValue + @SuppressWarnings("unchecked") + public Observable> getDomainId(Context ctx, EdgeDomain domain) { + var dom = UrlEncoded.encodeString(domain.toString()); + + return super.get(ctx, "/edge/domain-id/"+dom, EdgeId.class) + .map(id -> (EdgeId) id); + } + + + @CheckReturnValue + public Observable getDomain(Context ctx, EdgeId url) { + return super.get(ctx, "/edge/domain/"+url.getId(), EdgeDomain.class); + } + + public Observable siteInfo(Context ctx, String site) { + return super.get(ctx,"/edge/meta/" + UrlEncoded.encodeString(site), DomainInformation.class); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/data_store/meta/DomainInformation.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/data_store/meta/DomainInformation.java new file mode 100644 index 00000000..5bc7c90b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/data_store/meta/DomainInformation.java @@ -0,0 +1,26 @@ +package nu.marginalia.wmsa.data_store.meta; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; + +import java.util.List; + +@Getter @AllArgsConstructor @ToString +public class DomainInformation { + EdgeDomain domain; + + boolean blacklisted; + int pagesKnown; + int pagesFetched; + int pagesIndexed; + int incomingLinks; + int outboundLinks; + double nominalQuality; + double ranking; + + EdgeDomainIndexingState state; + List linkingDomains; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/EdgeArchiveMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/EdgeArchiveMain.java new file mode 100644 index 00000000..70b1e3e1 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/EdgeArchiveMain.java @@ -0,0 +1,33 @@ +package nu.marginalia.wmsa.edge.archive; + +import com.google.inject.Guice; +import com.google.inject.Inject; +import com.google.inject.Injector; +import nu.marginalia.wmsa.configuration.MainClass; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.module.ConfigurationModule; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.configuration.server.Initialization; + +public class EdgeArchiveMain extends MainClass { + private EdgeArchiveService service; + + @Inject + public EdgeArchiveMain(EdgeArchiveService service) { + this.service = service; + } + + public static void main(String... args) { + init(ServiceDescriptor.EDGE_ARCHIVE, args); + + Injector injector = Guice.createInjector( + new EdgeArchiveModule(), + new ConfigurationModule(), + new DatabaseModule() + ); + + injector.getInstance(EdgeArchiveMain.class); + injector.getInstance(Initialization.class).setReady(); + + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/EdgeArchiveModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/EdgeArchiveModule.java new file mode 100644 index 00000000..1d3c8215 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/EdgeArchiveModule.java @@ -0,0 +1,15 @@ +package nu.marginalia.wmsa.edge.archive; + +import com.google.inject.AbstractModule; +import com.google.inject.name.Names; + +import java.nio.file.Path; + +public class EdgeArchiveModule extends AbstractModule { + public void configure() { + bind(Path.class).annotatedWith(Names.named("archive-path")).toInstance(Path.of("/var/lib/wmsa/archive/webpage/")); + bind(Path.class).annotatedWith(Names.named("wiki-path")).toInstance(Path.of("/var/lib/wmsa/archive.fast/wiki/")); + bind(Integer.class).annotatedWith(Names.named("archive-size")).toInstance(10_000); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/EdgeArchiveService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/EdgeArchiveService.java new file mode 100644 index 00000000..bbacb600 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/EdgeArchiveService.java @@ -0,0 +1,182 @@ +package nu.marginalia.wmsa.edge.archive; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.inject.Inject; +import com.google.inject.name.Named; +import io.prometheus.client.Histogram; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.configuration.server.MetricsServer; +import nu.marginalia.wmsa.configuration.server.Service; +import nu.marginalia.wmsa.edge.archive.archiver.ArchivedFile; +import nu.marginalia.wmsa.edge.archive.archiver.Archiver; +import nu.marginalia.wmsa.edge.archive.request.EdgeArchiveSubmissionReq; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Request; +import spark.Response; +import spark.Spark; + +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.stream.Collectors; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; + +public class EdgeArchiveService extends Service { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final Gson gson = new GsonBuilder().create(); + + private static final Histogram wmsa_archive_store_time = Histogram.build().name("wmsa_archive_store_time").help("-").register(); + private static final Histogram wmsa_archive_fetch_time = Histogram.build().name("wmsa_archive_fetch_time").help("-").register(); + + private final Path wikiPath; + private final Archiver archiver; + + @SneakyThrows + @Inject + public EdgeArchiveService(@Named("service-host") String ip, + @Named("service-port") Integer port, + @Named("wiki-path") Path wikiPath, + Archiver archiver, + Initialization initialization, + MetricsServer metricsServer) + { + super(ip, port, initialization, metricsServer); + this.wikiPath = wikiPath; + this.archiver = archiver; + + Spark.staticFiles.expireTime(600); + + Spark.post("/page/submit", this::pathPageSubmit); + + Spark.post("/wiki/submit", this::pathWikiSubmit); + Spark.get("/wiki/has", this::pathWikiHas); + Spark.get("/wiki/get", this::pathWikiGet); + + Spark.awaitInitialization(); + } + + @SneakyThrows + private Object pathPageSubmit(Request request, Response response) { + var timer = wmsa_archive_store_time.startTimer(); + try { + var body = request.body(); + var data = gson.fromJson(body, EdgeArchiveSubmissionReq.class); + + String domainNamePart = data.getUrl().domain.domain.length() > 32 ? data.getUrl().domain.domain.substring(0, 32) : data.getUrl().domain.domain; + String fileName = String.format("%s-%10d", domainNamePart, data.getUrl().hashCode()); + + archiver.writeData(new ArchivedFile(fileName, body.getBytes())); + + return "ok"; + } finally { + timer.observeDuration(); + } + + } + + + @SneakyThrows + private Object pathWikiSubmit(Request request, Response response) { + var timer = wmsa_archive_store_time.startTimer(); + + try { + byte[] data = request.bodyAsBytes(); + + String wikiUrl = request.queryParams("url"); + Path filename = getWikiFilename(wikiPath, wikiUrl); + + Files.createDirectories(filename.getParent()); + + System.out.println(new String(data)); + logger.debug("Writing {} to {}", wikiUrl, filename); + + try (var gos = new GZIPOutputStream(new FileOutputStream(filename.toFile()))) { + gos.write(data); + gos.flush(); + } + + return "ok"; + } finally { + timer.observeDuration(); + } + + } + + + private Path getWikiFilename(Path base, String url) { + Path p = base; + + int urlHash = url.hashCode(); + + p = p.resolve(Integer.toString(urlHash & 0xFF)); + p = p.resolve(Integer.toString((urlHash>>>8) & 0xFF)); + p = p.resolve(Integer.toString((urlHash>>>16) & 0xFF)); + p = p.resolve(Integer.toString((urlHash>>>24) & 0xFF)); + + String fileName = url.chars() + .mapToObj(this::encodeUrlChar) + .collect(Collectors.joining()); + + if (fileName.length() > 128) { + fileName = fileName.substring(0, 128) + (((long)urlHash)&0xFFFFFFFFL); + } + + return p.resolve(fileName + ".gz"); + } + + + private String encodeUrlChar(int i) { + if (i >= 'a' && i <= 'z') { + return Character.toString(i); + } + if (i >= 'A' && i <= 'Z') { + return Character.toString(i); + } + if (i >= '0' && i <= '9') { + return Character.toString(i); + } + if (i == '.') { + return Character.toString(i); + } + else { + return String.format("%%%2X", i); + } + } + + @SneakyThrows + private Object pathWikiHas(Request request, Response response) { + return Files.exists(getWikiFilename(wikiPath, request.queryParams("url"))); + } + + + @SneakyThrows + private String pathWikiGet(Request request, Response response) { + var timer = wmsa_archive_fetch_time.startTimer(); + + try { + String url = request.queryParams("url"); + + var filename = getWikiFilename(wikiPath, url); + + if (Files.exists(filename)) { + try (var stream = new GZIPInputStream(new FileInputStream(filename.toFile()))) { + return new String(stream.readAllBytes()); + } + } else { + Spark.halt(404); + return null; + } + } + finally { + timer.observeDuration(); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/archiver/ArchiveExtractor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/archiver/ArchiveExtractor.java new file mode 100644 index 00000000..e8d920ef --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/archiver/ArchiveExtractor.java @@ -0,0 +1,65 @@ +package nu.marginalia.wmsa.edge.archive.archiver; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import nu.marginalia.wmsa.edge.archive.request.EdgeArchiveSubmissionReq; +import nu.marginalia.wmsa.edge.model.crawl.EdgeRawPageContents; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedInputStream; +import java.io.FileInputStream; +import java.io.InputStreamReader; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.function.Consumer; + +public class ArchiveExtractor { + private final Path archivePath; + private final String arhivePattern = "archive-%04d.tar.gz"; + + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final Gson gson = new GsonBuilder().create(); + + public ArchiveExtractor(Path archivePath) { + this.archivePath = archivePath; + + } + + public void forEach(Consumer contents) { + for (int i = 0; ; ++i) { + var fn = getArchiveFile(i); + logger.info("{}", fn); + if (!Files.exists(fn)) { + break; + } + try (var stream = new TarArchiveInputStream(new GzipCompressorInputStream(new BufferedInputStream(new FileInputStream(fn.toFile()))))) { + TarArchiveEntry entry; + while ((entry = stream.getNextTarEntry()) != null) { + if (entry.isFile()) { + try { + var obj = gson.fromJson(new InputStreamReader(stream), EdgeArchiveSubmissionReq.class); + if (obj != null) { + contents.accept(obj.getData()); + } + } + catch (Exception ex) { + logger.error("Could not unpack {} - {} {}", entry.getName(), ex.getClass().getSimpleName(), ex.getMessage()); + } + } + } + } catch (Exception e) { + e.printStackTrace(); + } + } + } + + private Path getArchiveFile(int number) { + final String fileName = String.format(arhivePattern, number); + return archivePath.resolve(fileName); + } +} + diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/archiver/ArchivedFile.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/archiver/ArchivedFile.java new file mode 100644 index 00000000..5a477495 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/archiver/ArchivedFile.java @@ -0,0 +1,7 @@ +package nu.marginalia.wmsa.edge.archive.archiver; + +import lombok.Data; + + +public record ArchivedFile(String filename,byte[] data ) { +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/archiver/Archiver.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/archiver/Archiver.java new file mode 100644 index 00000000..63a45b3c --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/archiver/Archiver.java @@ -0,0 +1,116 @@ +package nu.marginalia.wmsa.edge.archive.archiver; + +import com.google.inject.name.Named; +import lombok.SneakyThrows; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; +import org.apache.commons.io.IOUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.inject.Inject; +import javax.inject.Singleton; +import java.io.ByteArrayInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.TimeUnit; + +@Singleton +public class Archiver implements AutoCloseable { + private final Path archivePath; + private final int filesPerArchive; + private final String arhivePattern = "archive-%04d.tar.gz"; + + private final LinkedBlockingDeque writeQueue = new LinkedBlockingDeque<>(10); + private final Thread writeThread; + + private volatile int archiveNumber; + private volatile boolean running; + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + @Inject + public Archiver(@Named("archive-path") Path archivePath, @Named("archive-size") Integer filesPerArchive) { + this.archivePath = archivePath; + this.filesPerArchive = filesPerArchive; + + if (!Files.exists(archivePath)) { + throw new IllegalArgumentException("Archive path does not exist"); + } + for (int i = 0;; ++i) { + if (!Files.exists(getArchiveFile(i))) { + archiveNumber = i; + break; + } + } + + running = true; + writeThread = new Thread(this::writeThreadMain, "ArchiveWriteThread"); + writeThread.start(); + } + + private Path getArchiveFile(int number) { + final String fileName = String.format(arhivePattern, number); + return archivePath.resolve(fileName); + } + + public void writeData(ArchivedFile file) throws InterruptedException { + if (!running) throw new IllegalStateException("Archiver is closing or closed"); + writeQueue.put(file); + } + + private void writeThreadMain() { + try { + while (running || !writeQueue.isEmpty()) { + writeToFile(archiveNumber); + archiveNumber++; + } + running = false; + } + catch (Exception ex) { + logger.error("Uncaught exception in writer thread!!"); + } + } + + private void writeToFile(int archiveNumber) { + var archiveFile = getArchiveFile(archiveNumber); + + logger.info("Switching to file {}", archiveFile); + + try (TarArchiveOutputStream taos = new TarArchiveOutputStream(new GzipCompressorOutputStream(new FileOutputStream(archiveFile.toFile())))) { + for (int i = 0; i < filesPerArchive; i++) { + + ArchivedFile writeJob = null; + while (writeJob == null) { + writeJob = writeQueue.poll(1, TimeUnit.SECONDS); + if (!running) return; + } + + var entry = new TarArchiveEntry(String.format("%06d-%s", i, writeJob.filename())); + entry.setSize(writeJob.data().length); + taos.putArchiveEntry(entry); + logger.debug("Writing {} to {}", writeJob.filename(), archiveFile); + try (var bais = new ByteArrayInputStream(writeJob.data())) { + IOUtils.copy(bais, taos); + } + taos.closeArchiveEntry(); + } + taos.finish(); + logger.debug("Finishing {}", archiveFile); + } catch (Exception e) { + logger.error("Error", e); + } + + } + + @Override + public void close() throws Exception { + running = false; + writeThread.join(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/client/ArchiveClient.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/client/ArchiveClient.java new file mode 100644 index 00000000..0e56ac53 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/client/ArchiveClient.java @@ -0,0 +1,56 @@ +package nu.marginalia.wmsa.edge.archive.client; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import io.reactivex.rxjava3.core.Observable; +import nu.marginalia.wmsa.client.AbstractDynamicClient; +import nu.marginalia.wmsa.client.HttpStatusCode; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.edge.archive.request.EdgeArchiveSubmissionReq; +import nu.marginalia.wmsa.edge.model.crawl.EdgeRawPageContents; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import okhttp3.MediaType; +import org.eclipse.jetty.util.UrlEncoded; + +import javax.annotation.CheckReturnValue; +import java.util.concurrent.Semaphore; + +@Singleton +public class ArchiveClient extends AbstractDynamicClient { + + private final Semaphore submitPageSem = new Semaphore(3, true); + + @Inject + public ArchiveClient() { + super(ServiceDescriptor.EDGE_ARCHIVE); + } + + @CheckReturnValue + public void submitPage(Context ctx, EdgeUrl url, EdgeRawPageContents data) throws InterruptedException { + try { + submitPageSem.acquire(); + super.post(ctx, "/page/submit", new EdgeArchiveSubmissionReq(url, data)).blockingSubscribe(); + } + finally { + submitPageSem.release(); + } + + } + + @CheckReturnValue + public Observable submitWiki(Context ctx, String url, String data) { + return super.post(ctx, "/wiki/submit?url="+UrlEncoded.encodeString(url), data, MediaType.parse("text/plain; charset=UTF-8")); + } + + @CheckReturnValue + public Observable hasWiki(Context ctx, String url) { + return super.get(ctx, "/wiki/has?url="+UrlEncoded.encodeString(url), Boolean.class); + } + + @CheckReturnValue + public Observable getWiki(Context ctx, String url) { + return super.get(ctx, "/wiki/get?url="+UrlEncoded.encodeString(url)); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/request/EdgeArchiveSubmissionReq.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/request/EdgeArchiveSubmissionReq.java new file mode 100644 index 00000000..fbd97452 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/archive/request/EdgeArchiveSubmissionReq.java @@ -0,0 +1,13 @@ +package nu.marginalia.wmsa.edge.archive.request; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; +import nu.marginalia.wmsa.edge.model.crawl.EdgeRawPageContents; +import nu.marginalia.wmsa.edge.model.EdgeUrl; + +@AllArgsConstructor @Getter @ToString +public class EdgeArchiveSubmissionReq { + EdgeUrl url; + EdgeRawPageContents data; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/EdgeAssistantMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/EdgeAssistantMain.java new file mode 100644 index 00000000..44041723 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/EdgeAssistantMain.java @@ -0,0 +1,33 @@ +package nu.marginalia.wmsa.edge.assistant; + +import com.google.inject.Guice; +import com.google.inject.Inject; +import com.google.inject.Injector; +import nu.marginalia.wmsa.configuration.MainClass; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.module.ConfigurationModule; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.configuration.server.Initialization; + +public class EdgeAssistantMain extends MainClass { + private EdgeAssistantService service; + + @Inject + public EdgeAssistantMain(EdgeAssistantService service) { + this.service = service; + } + + public static void main(String... args) { + init(ServiceDescriptor.EDGE_ASSISTANT, args); + + Injector injector = Guice.createInjector( + new EdgeAssistantModule(), + new ConfigurationModule(), + new DatabaseModule() + ); + + injector.getInstance(EdgeAssistantMain.class); + injector.getInstance(Initialization.class).setReady(); + + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/EdgeAssistantModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/EdgeAssistantModule.java new file mode 100644 index 00000000..1632bbf2 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/EdgeAssistantModule.java @@ -0,0 +1,22 @@ +package nu.marginalia.wmsa.edge.assistant; + +import com.google.inject.AbstractModule; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; + +import java.nio.file.Path; + +import static com.google.inject.name.Names.named; + +public class EdgeAssistantModule extends AbstractModule { + public void configure() { + bind(Path.class).annotatedWith(named("suggestions-file")).toInstance(Path.of("/var/lib/wmsa/suggestions.txt")); + bind(LanguageModels.class).toInstance(new LanguageModels( + Path.of("/var/lib/wmsa/model/ngrams-generous-emstr.bin"), + Path.of("/var/lib/wmsa/model/tfreq-new-algo3.bin"), + Path.of("/var/lib/wmsa/model/opennlp-sentence.bin"), + Path.of("/var/lib/wmsa/model/English.RDR"), + Path.of("/var/lib/wmsa/model/English.DICT"), + Path.of("/var/lib/wmsa/model/opennlp-tok.bin") + )); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/EdgeAssistantService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/EdgeAssistantService.java new file mode 100644 index 00000000..3e595d47 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/EdgeAssistantService.java @@ -0,0 +1,192 @@ +package nu.marginalia.wmsa.edge.assistant; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.inject.Inject; +import com.google.inject.name.Named; +import io.reactivex.rxjava3.core.Observable; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.client.HttpStatusCode; +import nu.marginalia.wmsa.configuration.server.*; +import nu.marginalia.wmsa.edge.archive.client.ArchiveClient; +import nu.marginalia.wmsa.edge.assistant.dict.DictionaryService; +import nu.marginalia.wmsa.edge.assistant.eval.MathParser; +import nu.marginalia.wmsa.edge.assistant.eval.Units; +import nu.marginalia.wmsa.edge.assistant.screenshot.ScreenshotService; +import nu.marginalia.wmsa.edge.assistant.suggest.Suggestions; +import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer; +import nu.marginalia.wmsa.renderer.mustache.RendererFactory; +import org.eclipse.jetty.websocket.api.StatusCode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Request; +import spark.Response; +import spark.Spark; + +import java.util.Map; + +public class EdgeAssistantService extends Service { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final Gson gson = new GsonBuilder().create(); + private final Units units; + private final DictionaryService dictionaryService; + private final MathParser mathParser; + private final ArchiveClient archiveClient; + private final ScreenshotService screenshotService; + private final MustacheRenderer wikiErrorPageRenderer; + private final MustacheRenderer wikiSearchResultRenderer; + private final Suggestions suggestions; + + @SneakyThrows + @Inject + public EdgeAssistantService(@Named("service-host") String ip, + @Named("service-port") Integer port, + Initialization initialization, + MetricsServer metricsServer, + DictionaryService dictionaryService, + MathParser mathParser, + Units units, + ArchiveClient archiveClient, + RendererFactory rendererFactory, + ScreenshotService screenshotService, + Suggestions suggestions + ) + { + super(ip, port, initialization, metricsServer); + this.dictionaryService = dictionaryService; + this.mathParser = mathParser; + this.units = units; + this.archiveClient = archiveClient; + this.screenshotService = screenshotService; + this.suggestions = suggestions; + + Spark.staticFiles.expireTime(600); + + if (rendererFactory != null) { + wikiErrorPageRenderer = rendererFactory.renderer("encyclopedia/wiki-error"); + wikiSearchResultRenderer = rendererFactory.renderer("encyclopedia/wiki-search"); + } + else { + wikiErrorPageRenderer = null; + wikiSearchResultRenderer = null; + } + + Spark.get("/public/wiki/*", this::getWikiPage); + Spark.get("/public/wiki-search", this::searchWikiPage); + + Spark.get("/public/screenshot/:id", screenshotService::serveScreenshotRequest); + Spark.get("/screenshot/:id", screenshotService::serveScreenshotRequest); + + Spark.get("/dictionary/:word", (req, rsp) -> dictionaryService.define(req.params("word")), this::convertToJson); + Spark.get("/spell-check/:term", (req, rsp) -> dictionaryService.spellCheck(req.params("term").toLowerCase()), this::convertToJson); + Spark.get("/encyclopedia/:term", (req, rsp) -> dictionaryService.encyclopedia(req.params("term")), this::convertToJson); + Spark.get("/unit-conversion", (req, rsp) -> unitConversion( + rsp, + req.queryParams("value"), + req.queryParams("from"), + req.queryParams("to") + + )); + Spark.get("/eval-expression", (req, rsp) -> evalExpression( + rsp, + req.queryParams("value") + )); + + Spark.get("/public/suggest/", this::getSuggestions, this::convertToJson); + + Spark.awaitInitialization(); + } + + private Object getSuggestions(Request request, Response response) { + response.type("application/json"); + var param = request.queryParams("partial"); + if (param == null) { + logger.warn("Bad parameter, partial is null"); + Spark.halt(500); + } + return suggestions.getSuggestions(10, param); + } + + @SneakyThrows + private Object getWikiPage(Request req, Response rsp) { + final var ctx = Context.fromRequest(req); + + final String[] splats = req.splat(); + if (splats.length == 0) + rsp.redirect("https://encyclopedia.marginalia.nu/wiki-start.html"); + + + final String s = splats[0]; + + String pageName = dictionaryService.resolveEncylopediaRedirect(s).orElse(s); + logger.info("Resolved {} -> {}", s, pageName); + return archiveClient.getWiki(ctx, pageName) + .onErrorResumeWith(resolveWikiPageNameWrongCase(ctx, s)) + .blockingFirst(); + } + + private Observable resolveWikiPageNameWrongCase(Context ctx, String s) { + var rsp = dictionaryService.findEncyclopediaPageDirect(s); + if (rsp.isEmpty()) { + return renderSearchPage(s); + } + return archiveClient.getWiki(ctx, rsp.get().getInternalName()) + .onErrorResumeWith(renderSearchPage(s)); + } + + private Observable renderSearchPage(String s) { + return Observable.fromCallable(() -> wikiSearchResultRenderer.render( + Map.of("query", s, + "error", "true", + "results", dictionaryService.findEncyclopediaPages(s)))); + } + + @SneakyThrows + private Object searchWikiPage(Request req, Response rsp) { + final var ctx = Context.fromRequest(req); + + String term = req.queryParams("query"); + if (null == term) { + rsp.redirect("https://encyclopedia.marginalia.nu/wiki-start.html"); + return ""; + } + + return wikiSearchResultRenderer.render( + Map.of("query", term, + "results", + dictionaryService.findEncyclopediaPages(term)) + ); + } + + private Object evalExpression(Response rsp, String value) { + try { + var val = mathParser.evalFormatted(value); + if (val.isBlank()) { + Spark.halt(400); + return null; + } + return val; + } + catch (Exception ex) { + Spark.halt(400); + return null; + } + } + + private Object unitConversion(Response rsp, String value, String fromUnit, String toUnit) { + var result = units.convert(value, fromUnit, toUnit); + if (result.isPresent()) { + return result.get(); + } + { + Spark.halt(400); + return null; + } + } + + private String convertToJson(Object o) { + return gson.toJson(o); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/client/AssistantClient.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/client/AssistantClient.java new file mode 100644 index 00000000..891a2cc0 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/client/AssistantClient.java @@ -0,0 +1,42 @@ +package nu.marginalia.wmsa.edge.assistant.client; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import io.reactivex.rxjava3.core.Observable; +import nu.marginalia.wmsa.client.AbstractDynamicClient; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.edge.assistant.dict.DictionaryResponse; +import nu.marginalia.wmsa.edge.assistant.dict.WikiArticles; +import org.eclipse.jetty.util.UrlEncoded; + +import java.util.List; + +@Singleton +public class AssistantClient extends AbstractDynamicClient { + + @Inject + public AssistantClient() { + super(ServiceDescriptor.EDGE_ASSISTANT); + } + + public Observable dictionaryLookup(Context ctx, String word) { + return super.get(ctx,"/dictionary/" + UrlEncoded.encodeString(word), DictionaryResponse.class); + } + + public Observable encyclopediaLookup(Context ctx, String word) { + return super.get(ctx,"/encyclopedia/" + UrlEncoded.encodeString(word), WikiArticles.class); + } + + @SuppressWarnings("unchecked") + public Observable> spellCheck(Context ctx, String word) { + return (Observable>) (Object) super.get(ctx,"/spell-check/" + UrlEncoded.encodeString(word), List.class); + } + public Observable unitConversion(Context ctx, String value, String from, String to) { + return super.get(ctx,"/unit-conversion?value="+value + "&from="+from+"&to="+to); + } + + public Observable evalMath(Context ctx, String expression) { + return super.get(ctx,"/eval-expression?value="+UrlEncoded.encodeString(expression)); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/DictionaryEntry.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/DictionaryEntry.java new file mode 100644 index 00000000..88c48986 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/DictionaryEntry.java @@ -0,0 +1,14 @@ +package nu.marginalia.wmsa.edge.assistant.dict; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; + +@AllArgsConstructor +@Getter +@ToString +public class DictionaryEntry { + public final String type; + public final String word; + public final String definition; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/DictionaryResponse.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/DictionaryResponse.java new file mode 100644 index 00000000..624782a6 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/DictionaryResponse.java @@ -0,0 +1,14 @@ +package nu.marginalia.wmsa.edge.assistant.dict; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.ToString; + +import java.util.List; + +@ToString @Getter @AllArgsConstructor @NoArgsConstructor +public class DictionaryResponse { + public String word; + public List entries; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/DictionaryService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/DictionaryService.java new file mode 100644 index 00000000..2147c297 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/DictionaryService.java @@ -0,0 +1,185 @@ +package nu.marginalia.wmsa.edge.assistant.dict; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import com.zaxxer.hikari.HikariDataSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; +import java.util.stream.Collectors; + +@Singleton +public class DictionaryService { + + private final HikariDataSource dataSource; + private final SpellChecker spellChecker; + private final Logger logger = LoggerFactory.getLogger(getClass()); + + @Inject + public DictionaryService(HikariDataSource dataSource, SpellChecker spellChecker) + { + this.spellChecker = spellChecker; + this.dataSource = dataSource; + } + + public DictionaryResponse define(String word) { + DictionaryResponse response = new DictionaryResponse(); + response.entries = new ArrayList<>(); + + try (var connection = dataSource.getConnection()) { + var stmt = connection.prepareStatement("SELECT TYPE,WORD,DEFINITION FROM REF_DICTIONARY WHERE WORD=?"); + stmt.setString(1, word); + + var rsp = stmt.executeQuery(); + while (rsp.next()) { + response.entries.add(new DictionaryEntry(rsp.getString(1), rsp.getString(2), rsp.getString(3))); + } + } + catch (Exception ex) { + throw new RuntimeException(ex); + } + + return response; + } + + public WikiArticles encyclopedia(String term) { + WikiArticles response = new WikiArticles(); + response.entries = new ArrayList<>(); + + try (var connection = dataSource.getConnection()) { + var stmt = connection.prepareStatement("SELECT DISTINCT(NAME_LOWER) FROM REF_WIKI_TITLE WHERE NAME_LOWER=?"); + stmt.setString(1, term); + + var rsp = stmt.executeQuery(); + while (rsp.next()) { + response.entries.add(capitalizeWikiString(rsp.getString(1))); + } + } + catch (Exception ex) { + logger.error("Failed to fetch articles", ex); + return new WikiArticles(); + } + + return response; + } + + public Optional resolveEncylopediaRedirect(String term) { + final List matches = new ArrayList<>(); + + try (var connection = dataSource.getConnection()) { + try (var stmt = connection.prepareStatement("SELECT NAME, REF_NAME FROM REF_WIKI_TITLE WHERE NAME_LOWER=LOWER(?)")) { + stmt.setString(1, term); + + var rsp = stmt.executeQuery(); + while (rsp.next()) { + if (term.equals(rsp.getString(1)) + || rsp.getString(2) == null) { + return Optional.ofNullable(rsp.getString(2)); + } else { + matches.add(rsp.getString(2)); + } + } + } + } + catch (Exception ex) { + throw new RuntimeException(ex); + } + + if (!matches.isEmpty()) { + return Optional.of(matches.get(0)); + } + return Optional.empty(); + } + + + public Optional findEncyclopediaPageDirect(String term) { + + try (var connection = dataSource.getConnection()) { + + try (var stmt = connection.prepareStatement("SELECT NAME, REF_NAME FROM REF_WIKI_TITLE WHERE NAME_LOWER=LOWER(?)")) { + stmt.setString(1, term.replace(' ', '_')); + + var rsp = stmt.executeQuery(); + while (rsp.next()) { + String name = rsp.getString(1); + String refName = rsp.getString(2); + + if (refName == null) { + return Optional.of(new WikiSearchResult(name, null)); + } + } + } + } + catch (Exception ex) { + throw new RuntimeException(ex); + } + + return Optional.empty(); + } + + public List findEncyclopediaPages(String term) { + final List directMatches = new ArrayList<>(); + final Set directSearchMatches = new HashSet<>(); + final Set indirectMatches = new HashSet<>(); + + try (var connection = dataSource.getConnection()) { + + try (var stmt = connection.prepareStatement("SELECT NAME, REF_NAME FROM REF_WIKI_TITLE WHERE NAME_LOWER=LOWER(?)")) { + stmt.setString(1, term.replace(' ', '_')); + + var rsp = stmt.executeQuery(); + while (rsp.next()) { + String name = rsp.getString(1); + String refName = rsp.getString(2); + + if (refName == null) { + directMatches.add(new WikiSearchResult(name, null)); + } else { + indirectMatches.add(new WikiSearchResult(name, refName)); + } + } + } + + try (var stmt = connection.prepareStatement("SELECT NAME, REF_NAME FROM REF_WIKI_TITLE WHERE NAME_LOWER LIKE ? LIMIT 10")) { + stmt.setString(1, term.replace(' ', '_').replaceAll("%", "\\%").toLowerCase() + "%"); + + var rsp = stmt.executeQuery(); + while (rsp.next()) { + String name = rsp.getString(1); + String refName = rsp.getString(2); + + if (refName == null) { + directSearchMatches.add(new WikiSearchResult(name, null)); + } else { + indirectMatches.add(new WikiSearchResult(name, refName)); + } + } + } + } + catch (Exception ex) { + throw new RuntimeException(ex); + } + + directMatches.forEach(indirectMatches::remove); + indirectMatches.removeAll(directSearchMatches); + directMatches.forEach(directSearchMatches::remove); + directMatches.addAll(indirectMatches); + directMatches.addAll(directSearchMatches); + return directMatches; + } + + private String capitalizeWikiString(String string) { + if (string.contains("_")) { + return Arrays.stream(string.split("_")).map(this::capitalizeWikiString).collect(Collectors.joining("_")); + } + if (string.length() < 2) { + return string.toUpperCase(); + } + return Character.toUpperCase(string.charAt(0)) + string.substring(1).toLowerCase(); + } + + public List spellCheck(String word) { + return spellChecker.correct(word); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/NGramDict.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/NGramDict.java new file mode 100644 index 00000000..256b0b9a --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/NGramDict.java @@ -0,0 +1,140 @@ +package nu.marginalia.wmsa.edge.assistant.dict; + +import ca.rmen.porterstemmer.PorterStemmer; +import gnu.trove.map.hash.TLongIntHashMap; +import gnu.trove.map.hash.TLongLongHashMap; +import gnu.trove.set.hash.TLongHashSet; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; +import javax.inject.Inject; +import javax.inject.Singleton; +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +@Singleton +public class NGramDict { + + private final TLongIntHashMap wordRates = new TLongIntHashMap(1_000_000, 0.5f, 0, 0); + + private final Logger logger = LoggerFactory.getLogger(getClass()); + private static final Pattern separator = Pattern.compile("[_ ]+"); + private static PorterStemmer ps = new PorterStemmer(); + + private static long fileSize(Path p) throws IOException { + return Files.size(p); + } + + @Inject + public NGramDict(@Nullable LanguageModels models) { + if (models == null) { + return; + } + + if (models.ngramFrequency != null) { + + try (var frequencyData = new DataInputStream(new BufferedInputStream(new FileInputStream(models.ngramFrequency.toFile())))) { + + wordRates.ensureCapacity((int)(fileSize(models.ngramFrequency)/16)); + + for (;;) { + wordRates.put(frequencyData.readLong(), (int) frequencyData.readLong()); + } + } catch (EOFException eof) { + // ok + } catch (IOException e) { + logger.error("IO Exception reading " + models.ngramFrequency, e); + } + } + + logger.info("Read {} N-grams frequencies", wordRates.size()); + } + + + public static void main(String... args) { + if (args.length != 2) { + System.err.println("Expected arguments: in-file out-file"); + } + String inFile = args[0]; + String outFile = args[1]; + + var wordPattern = Pattern.compile("\\w+(_\\w+)*").asMatchPredicate(); + try (var linesStr = Files.lines(Path.of(inFile)); + var dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(outFile))) + ) { + linesStr + .filter(wordPattern) + .mapToLong(NGramDict::getStringHash).forEach(l -> + { + try { + dos.writeLong(l); + } catch (IOException e) { + e.printStackTrace(); + } + }); + } catch (IOException e) { + e.printStackTrace(); + } + } + + public static long getStringHash(String s) { + String[] strings = separator.split(s); + if (s.length() > 1) { + byte[][] parts = new byte[strings.length][]; + for (int i = 0; i < parts.length; i++) { + parts[i] = ps.stemWord(strings[i]).getBytes(); + } + return longHash(parts); + } + else { + return longHash(s.getBytes()); + } + } + public long getTermFreqHash(long hash) { + return wordRates.get(hash); + } + public long getTermFreq(String s) { + return wordRates.get(getStringHash(s)); + } + public long getTermFreqStemmed(String s) { + return wordRates.get(longHash(s.getBytes())); + } + + public static String getStemmedString(String s) { + String[] strings = separator.split(s); + if (s.length() > 1) { + return Arrays.stream(strings).map(ps::stemWord).collect(Collectors.joining("_")); + } + else { + return s; + } + + } + + public static long longHash(byte[]... bytesSets) { + if (bytesSets == null || bytesSets.length == 0) + return 0; + + // https://cp-algorithms.com/string/string-hashing.html + int p = 127; + long m = (1L<<61)-1; + long p_power = 1; + long hash_val = 0; + + for (byte[] bytes: bytesSets) { + for (byte element : bytes) { + hash_val = (hash_val + (element + 1) * p_power) % m; + p_power = (p_power * p) % m; + } + } + return hash_val; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/SpellChecker.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/SpellChecker.java new file mode 100644 index 00000000..1e878096 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/SpellChecker.java @@ -0,0 +1,22 @@ +package nu.marginalia.wmsa.edge.assistant.dict; + +import com.google.inject.Inject; +import com.google.inject.Singleton; + +import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; + +@Singleton +public class SpellChecker { + + private final SymSpell symSpell = new SymSpell(); + + public SpellChecker() { + + } + + public List correct(String word) { + return symSpell.Correct(word).stream().sorted(Comparator.comparing(term -> term.distance)).map(term->term.term).collect(Collectors.toList()); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/SymSpell.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/SymSpell.java new file mode 100644 index 00000000..ec143fba --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/SymSpell.java @@ -0,0 +1,442 @@ +package nu.marginalia.wmsa.edge.assistant.dict; + + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +// SymSpell: 1 million times faster through Symmetric Delete spelling correction algorithm +// +// The Symmetric Delete spelling correction algorithm reduces the complexity of edit candidate generation and dictionary lookup +// for a given Damerau-Levenshtein distance. It is six orders of magnitude faster and language independent. +// Opposite to other algorithms only deletes are required, no transposes + replaces + inserts. +// Transposes + replaces + inserts of the input term are transformed into deletes of the dictionary term. +// Replaces and inserts are expensive and language dependent: e.g. Chinese has 70,000 Unicode Han characters! +// +// Copyright (C) 2015 Wolf Garbe +// Version: 3.0 +// Author: Wolf Garbe +// Maintainer: Wolf Garbe +// URL: http://blog.faroo.com/2012/06/07/improved-edit-distance-based-spelling-correction/ +// Description: http://blog.faroo.com/2012/06/07/improved-edit-distance-based-spelling-correction/ +// +// License: +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License, +// version 3.0 (LGPL-3.0) as published by the Free Software Foundation. +// http://www.opensource.org/licenses/LGPL-3.0 +// +// Usage: single word + Enter: Display spelling suggestions +// Enter without input: Terminate the program + +public class SymSpell +{ + private int editDistanceMax=2; + private int verbose = 1; + //0: top suggestion + //1: all suggestions of smallest edit distance + //2: all suggestions <= editDistanceMax (slower, no early termination) + + public static class dictionaryItem + { + public List suggestions = new ArrayList(); + public int count = 0; + } + + public static class SuggestionItem + { + public String term = ""; + public int distance = 0; + public int count = 0; + + @Override + public boolean equals(Object obj) + { + return term.equals(((SuggestionItem)obj).term); + } + + @Override + public int hashCode() + { + return term.hashCode(); + } + } + + //Dictionary that contains both the original words and the deletes derived from them. A term might be both word and delete from another word at the same time. + //For space reduction a item might be either of type dictionaryItem or Int. + //A dictionaryItem is used for word, word/delete, and delete with multiple suggestions. Int is used for deletes with a single suggestion (the majority of entries). + private HashMap dictionary = new HashMap(); //initialisierung + + //List of unique words. By using the suggestions (Int) as index for this list they are translated into the original String. + private List wordlist = new ArrayList(); + + //create a non-unique wordlist from sample text + //language independent (e.g. works with Chinese characters) + private Iterable parseWords(String text) + { + // \w Alphanumeric characters (including non-latin characters, umlaut characters and digits) plus "_" + // \d Digits + // Provides identical results to Norvigs regex "[a-z]+" for latin characters, while additionally providing compatibility with non-latin characters + List allMatches = new ArrayList(); + Matcher m = Pattern.compile("[\\w-[\\d_]]+").matcher(text.toLowerCase()); + while (m.find()) { + allMatches.add(m.group()); + } + return allMatches; + } + + public int maxlength = 0;//maximum dictionary term length + + //for every word there all deletes with an edit distance of 1..editDistanceMax created and added to the dictionary + //every delete entry has a suggestions list, which points to the original term(s) it was created from + //The dictionary may be dynamically updated (word frequency and new words) at any time by calling createDictionaryEntry + private boolean CreateDictionaryEntry(String key, String language) + { + boolean result = false; + dictionaryItem value=null; + Object valueo; + valueo = dictionary.get(language+key); + if (valueo!=null) + { + //int or dictionaryItem? delete existed before word! + if (valueo instanceof Integer) { + int tmp = (int)valueo; + value = new dictionaryItem(); + value.suggestions.add(tmp); + dictionary.put(language + key,value); + } + + //already exists: + //1. word appears several times + //2. word1==deletes(word2) + else + { + value = (dictionaryItem)valueo; + } + + //prevent overflow + if (value.count < Integer.MAX_VALUE) value.count++; + } + else if (wordlist.size() < Integer.MAX_VALUE) + { + value = new dictionaryItem(); + value.count++; + dictionary.put(language + key, value); + + if (key.length() > maxlength) maxlength = key.length(); + } + + //edits/suggestions are created only once, no matter how often word occurs + //edits/suggestions are created only as soon as the word occurs in the corpus, + //even if the same term existed before in the dictionary as an edit from another word + //a treshold might be specifid, when a term occurs so frequently in the corpus that it is considered a valid word for spelling correction + if(value.count == 1) + { + //word2index + wordlist.add(key); + int keyint = (int)(wordlist.size() - 1); + + result = true; + + //create deletes + for (String delete : Edits(key, 0, new HashSet())) + { + Object value2; + value2 = dictionary.get(language+delete); + if (value2!=null) + { + //already exists: + //1. word1==deletes(word2) + //2. deletes(word1)==deletes(word2) + //int or dictionaryItem? single delete existed before! + if (value2 instanceof Integer) + { + //transformes int to dictionaryItem + int tmp = (int)value2; + dictionaryItem di = new dictionaryItem(); + di.suggestions.add(tmp); + dictionary.put(language + delete,di); + if (!di.suggestions.contains(keyint)) AddLowestDistance(di, key, keyint, delete); + } + else if (!((dictionaryItem)value2).suggestions.contains(keyint)) AddLowestDistance((dictionaryItem) value2, key, keyint, delete); + } + else + { + dictionary.put(language + delete, keyint); + } + + } + } + return result; + } + + //create a frequency dictionary from a corpus + private void CreateDictionary(String path, String language) + { + + try (var resource = Objects.requireNonNull(ClassLoader.getSystemResourceAsStream(path), + "Could not load dictionary"); + BufferedReader br = new BufferedReader(new InputStreamReader(resource))){ + String line; + while ((line = br.readLine()) != null) + { + for (String key : parseWords(line)) + { + CreateDictionaryEntry(key.toLowerCase(), language); + } + } + } + catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + + //save some time and space + private void AddLowestDistance(dictionaryItem item, String suggestion, int suggestionint, String delete) + { + //remove all existing suggestions of higher distance, if verbose<2 + //index2word + //TODO check + if ((verbose < 2) && (item.suggestions.size() > 0) && (wordlist.get(item.suggestions.get(0)).length()-delete.length() > suggestion.length() - delete.length())) item.suggestions.clear(); + //do not add suggestion of higher distance than existing, if verbose<2 + if ((verbose == 2) || (item.suggestions.size() == 0) || (wordlist.get(item.suggestions.get(0)).length()-delete.length() >= suggestion.length() - delete.length())) item.suggestions.add(suggestionint); + } + + //inexpensive and language independent: only deletes, no transposes + replaces + inserts + //replaces and inserts are expensive and language dependent (Chinese has 70,000 Unicode Han characters) + private HashSet Edits(String word, int editDistance, HashSet deletes) + { + editDistance++; + if (word.length() > 1) + { + for (int i = 0; i < word.length(); i++) + { + //delete ith character + String delete = word.substring(0,i)+word.substring(i+1); + if (deletes.add(delete)) + { + //recursion, if maximum edit distance not yet reached + if (editDistance < editDistanceMax) Edits(delete, editDistance, deletes); + } + } + } + return deletes; + } + + private List Lookup(String input, String language, int editDistanceMax) + { + //save some time + if (input.length() - editDistanceMax > maxlength) + return new ArrayList(); + + List candidates = new ArrayList(); + HashSet hashset1 = new HashSet(); + + List suggestions = new ArrayList(); + HashSet hashset2 = new HashSet(); + + Object valueo; + + //add original term + candidates.add(input); + + while (candidates.size()>0) + { + String candidate = candidates.get(0); + candidates.remove(0); + + //save some time + //early termination + //suggestion distance=candidate.distance... candidate.distance+editDistanceMax + //if canddate distance is already higher than suggestion distance, than there are no better suggestions to be expected + + //label for c# goto replacement + nosort:{ + + if ((verbose < 2) && (suggestions.size() > 0) && (input.length()-candidate.length() > suggestions.get(0).distance)) + break nosort; + + //read candidate entry from dictionary + valueo = dictionary.get(language + candidate); + if (valueo != null) + { + dictionaryItem value= new dictionaryItem(); + if (valueo instanceof Integer) + value.suggestions.add((int)valueo); + else value = (dictionaryItem)valueo; + + //if count>0 then candidate entry is correct dictionary term, not only delete item + if ((value.count > 0) && hashset2.add(candidate)) + { + //add correct dictionary term term to suggestion list + SuggestionItem si = new SuggestionItem(); + si.term = candidate; + si.count = value.count; + si.distance = input.length() - candidate.length(); + suggestions.add(si); + //early termination + if ((verbose < 2) && (input.length() - candidate.length() == 0)) + break nosort; + } + + //iterate through suggestions (to other correct dictionary items) of delete item and add them to suggestion list + Object value2; + for (int suggestionint : value.suggestions) + { + //save some time + //skipping double items early: different deletes of the input term can lead to the same suggestion + //index2word + //TODO + String suggestion = wordlist.get(suggestionint); + if (hashset2.add(suggestion)) + { + //True Damerau-Levenshtein Edit Distance: adjust distance, if both distances>0 + //We allow simultaneous edits (deletes) of editDistanceMax on on both the dictionary and the input term. + //For replaces and adjacent transposes the resulting edit distance stays <= editDistanceMax. + //For inserts and deletes the resulting edit distance might exceed editDistanceMax. + //To prevent suggestions of a higher edit distance, we need to calculate the resulting edit distance, if there are simultaneous edits on both sides. + //Example: (bank==bnak and bank==bink, but bank!=kanb and bank!=xban and bank!=baxn for editDistanceMaxe=1) + //Two deletes on each side of a pair makes them all equal, but the first two pairs have edit distance=1, the others edit distance=2. + int distance = 0; + if (suggestion != input) + { + if (suggestion.length() == candidate.length()) distance = input.length() - candidate.length(); + else if (input.length() == candidate.length()) distance = suggestion.length() - candidate.length(); + else + { + //common prefixes and suffixes are ignored, because this speeds up the Damerau-levenshtein-Distance calculation without changing it. + int ii = 0; + int jj = 0; + while ((ii < suggestion.length()) && (ii < input.length()) && (suggestion.charAt(ii) == input.charAt(ii))) ii++; + while ((jj < suggestion.length() - ii) && (jj < input.length() - ii) && (suggestion.charAt(suggestion.length() - jj - 1) == input.charAt(input.length() - jj - 1))) jj++; + if ((ii > 0) || (jj > 0)) { + distance = DamerauLevenshteinDistance(suggestion.substring(ii, suggestion.length() - jj), input.substring(ii, input.length() - jj)); + } + else distance = DamerauLevenshteinDistance(suggestion, input); + } + } + + //save some time. + //remove all existing suggestions of higher distance, if verbose<2 + if ((verbose < 2) && (suggestions.size() > 0) && (suggestions.get(0).distance > distance)) suggestions.clear(); + //do not process higher distances than those already found, if verbose<2 + if ((verbose < 2) && (suggestions.size() > 0) && (distance > suggestions.get(0).distance)) continue; + + if (distance <= editDistanceMax) + { + value2 = dictionary.get(language + suggestion); + if (value2!=null) + { + SuggestionItem si = new SuggestionItem(); + si.term = suggestion; + si.count = ((dictionaryItem)value2).count; + si.distance = distance; + suggestions.add(si); + } + } + } + }//end foreach + }//end if + + //add edits + //derive edits (deletes) from candidate (input) and add them to candidates list + //this is a recursive process until the maximum edit distance has been reached + if (input.length() - candidate.length() < editDistanceMax) + { + //save some time + //do not create edits with edit distance smaller than suggestions already found + if ((verbose < 2) && (suggestions.size() > 0) && (input.length() - candidate.length() >= suggestions.get(0).distance)) continue; + + for (int i = 0; i < candidate.length(); i++) + { + String delete = candidate.substring(0, i)+candidate.substring(i+1); + if (hashset1.add(delete)) candidates.add(delete); + } + } + } //end lable nosort + } //end while + + //sort by ascending edit distance, then by descending word frequency + if (verbose < 2) + //suggestions.Sort((x, y) => -x.count.CompareTo(y.count)); + Collections.sort(suggestions, new Comparator() + { + public int compare(SuggestionItem f1, SuggestionItem f2) + { + return -(f1.count-f2.count); + } + }); + else + //suggestions.Sort((x, y) => 2*x.distance.CompareTo(y.distance) - x.count.CompareTo(y.count)); + Collections.sort(suggestions, new Comparator() + { + public int compare(SuggestionItem x, SuggestionItem y) + { + return ((2*x.distance-y.distance)>0?1:0) - ((x.count - y.count)>0?1:0); + } + }); + if ((verbose == 0)&&(suggestions.size()>1)) + return suggestions.subList(0, 1); + else return suggestions; + } + + public List Correct(String input) + { + return Lookup(input, "", editDistanceMax); + } + + public SymSpell() { + CreateDictionary("dictionary/en-words", ""); + } + + // Damerau–Levenshtein distance algorithm and code + // from http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance (as retrieved in June 2012) + public int DamerauLevenshteinDistance(String a, String b) { + final int inf = a.length() + b.length() + 1; + int[][] H = new int[a.length() + 2][b.length() + 2]; + for (int i = 0; i <= a.length(); i++) { + H[i + 1][1] = i; + H[i + 1][0] = inf; + } + for (int j = 0; j <= b.length(); j++) { + H[1][j + 1] = j; + H[0][j + 1] = inf; + } + HashMap DA = new HashMap(); + for (int d = 0; d < a.length(); d++) + if (!DA.containsKey(a.charAt(d))) + DA.put(a.charAt(d), 0); + + + for (int d = 0; d < b.length(); d++) + if (!DA.containsKey(b.charAt(d))) + DA.put(b.charAt(d), 0); + + for (int i = 1; i <= a.length(); i++) { + int DB = 0; + for (int j = 1; j <= b.length(); j++) { + final int i1 = DA.get(b.charAt(j - 1)); + final int j1 = DB; + int d = 1; + if (a.charAt(i - 1) == b.charAt(j - 1)) { + d = 0; + DB = j; + } + H[i + 1][j + 1] = min( + H[i][j] + d, + H[i + 1][j] + 1, + H[i][j + 1] + 1, + H[i1][j1] + ((i - i1 - 1)) + + 1 + ((j - j1 - 1))); + } + DA.put(a.charAt(i - 1), i); + } + return H[a.length() + 1][b.length() + 1]; + } + public int min(int a, int b, int c, int d) { + return Math.min(a, Math.min(b, Math.min(c, d))); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/WikiArticles.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/WikiArticles.java new file mode 100644 index 00000000..29cf187e --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/WikiArticles.java @@ -0,0 +1,23 @@ +package nu.marginalia.wmsa.edge.assistant.dict; + +import lombok.Getter; +import lombok.ToString; + +import java.util.List; + +@ToString @Getter +public class WikiArticles { + public List entries; + + public WikiArticles(String... args) { + entries = List.of(args); + } + public String getPage() { + if (entries.isEmpty()) { + return null; + } + else { + return entries.get(0); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/WikiCleaner.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/WikiCleaner.java new file mode 100644 index 00000000..80560be1 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/WikiCleaner.java @@ -0,0 +1,393 @@ +package nu.marginalia.wmsa.edge.assistant.dict; + +import lombok.SneakyThrows; +import net.sourceforge.jeuclid.MathMLParserSupport; +import net.sourceforge.jeuclid.context.Display; +import net.sourceforge.jeuclid.context.LayoutContextImpl; +import net.sourceforge.jeuclid.context.Parameter; +import net.sourceforge.jeuclid.font.FontFactory; +import org.apache.commons.lang3.tuple.Pair; +import org.jetbrains.annotations.NotNull; +import org.jsoup.Jsoup; +import org.jsoup.nodes.*; +import org.jsoup.select.Elements; +import org.jsoup.select.NodeFilter; + +import java.awt.*; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.List; +import java.util.*; +import java.util.stream.Collectors; + + +public class WikiCleaner { + + static { + try (var font = ClassLoader.getSystemResourceAsStream("fonts/LM-regular.ttf")) { + FontFactory.getInstance().registerFont(Font.TRUETYPE_FONT, font); + } catch (IOException | FontFormatException e) { + e.printStackTrace(); + } + try (var font = ClassLoader.getSystemResourceAsStream("fonts/STIXTwoMath-Regular.ttf")) { + FontFactory.getInstance().registerFont(Font.TRUETYPE_FONT, font); + } catch (IOException | FontFormatException e) { + e.printStackTrace(); + } + } + public String cleanWikiJunk(String url, String html) { + return cleanWikiJunk(url, Jsoup.parse(html)); + } + + public List extractLinkWords(String data) { + var doc = Jsoup.parse(data); + return getWikiPageLinkText(doc); + } + + public String cleanWikiJunk(String url, Document doc) { + + if (doc.getElementById("content") == null) { + return null; + } + List> disambig = getDisambiguationLinks(doc); + List> topLinks = getWikiPageLinks(doc); + + removeTag(doc, "script", "object", "embed", "audio", "style", "noscript", "link", "meta", "img"); + doc.getElementsByClass("mwe-math-element").forEach(this::convertMathTag); + removeByClass(doc, "infobox", "collapsible", "navbar", "printfooter", + "mw-editsection", "thumb", "sidebar", "navbox", "mw-jump-link", + "vertical-navbox"); + removeByClass(doc, "mw-indicators", "noprint", "sistersitebox"); + removeIds(doc, "coordinates", "mw-page-base", "mw-head-base", "site-notice", "contentSub", "contentSub2"); + + doc.getElementsByAttributeValue("role", "presentation").remove(); + + doc.getElementsByTag("a").forEach(atag -> { + var href = atag.attr("href"); + var parent = atag.parent(); + + if ("li".equals(parent.tagName())) { + atag.removeAttr("title"); + if (href.startsWith("http://")) { + atag.addClass("extern-link"); + atag.attr("rel", "nofollow"); + return; + } + } + else { + atag.replaceWith(new TextNode(atag.text())); + } + }); + + Optional.ofNullable(doc.getElementsByTag("cite")).ifPresent(cite -> cite.forEach(c -> { + c.tagName("span"); + })); + + + removeIds(doc, "toc", "catlinks", "Notes", "mw-navigation", "mw-data-after-content", "jump-to-nav"); + removeByClass(doc, "mw-references-wrap", "references", "reference", "siteSub", "refbegin"); + + // doc.getElementById("mw-content-text").insertChildren(0, doc.getElementById("firstHeading")); + doc.getElementById("content").tagName("article"); + doc.getAllElements().forEach(elem -> { + if (elem.parent() != null + && "summary".equals(elem.parent().tagName())) + { + elem.parent().replaceWith(elem); + } + }); + + doc.getElementsByTag("span").forEach(elem -> { + if ("pre".equals(elem.parent().tagName())) { + if (elem.hasClass("linenos")) { + elem.replaceWith(new TextNode(String.format("%-4s", elem.text()))); + } + else { + elem.replaceWith(new TextNode(elem.text())); + } + } + else { + elem.replaceWith(new TextNode(" " + elem.text() + " ")); + } + }); + + doc.getElementsByTag("details").forEach(deets -> { + if (deets.children().size() == 1) { + deets.replaceWith(deets.children().first()); + } + else { + deets.tagName("div"); + } + }); + + removeEmptyTags(doc, "li"); + removeEmptyTags(doc, "ul"); + removeEmptyTags(doc, "div"); + + doc.getElementsByTag("p").forEach(elem -> { + if ("blockquote".equals(elem.parent().tagName())) { + elem.replaceWith(new TextNode(elem.text())); + } + }); + + removeEmptyTags(doc, "p"); + + doc.getElementsByTag("h4").forEach(elem -> { + var next = elem.nextElementSibling(); + if (next == null) { + elem.remove(); + return; + } + String nextTagName = next.tagName(); + if ("h4".equals(nextTagName) || "h3".equals(nextTagName) || "h2".equals(nextTagName)) { + elem.remove(); + } + }); + + + doc.getElementsByTag("h3").forEach(elem -> { + var next = elem.nextElementSibling(); + if (next == null) { + elem.remove(); + return; + } + String nextTagName = next.tagName(); + if ("h3".equals(nextTagName) || "h2".equals(nextTagName)) { + elem.remove(); + } + }); + + doc.getElementsByTag("h2").forEach(elem -> { + var next = elem.nextElementSibling(); + if (next == null) { + elem.remove(); + return; + } + if ("h2".equals(next.tagName())) { + elem.remove(); + } + }); + doc.getElementsByTag("footer").remove(); + doc.getElementsByTag("table").forEach(table -> { + table.attr("border", "1"); + }); + doc.getElementsByTag("table").forEach(table -> { + if ("right".equals(table.attr("align"))) { + table.remove(); + } + }); + + doc.getElementsByTag("head").append(""); + doc.getElementsByTag("head").append(""); + doc.getElementsByTag("head").append(""); + doc.getElementsByTag("head").append(""); + doc.getElementsByTag("head").append(""); + doc.getElementsByTag("head").append(""); + + if (!topLinks.isEmpty()) { + doc.getElementsByTag("article").append("

    Index of References

    "); + } + + if (!disambig.isEmpty()) { + doc.getElementsByTag("h1").first().nextElementSibling().prepend("
    See Also" + + disambig.stream().map(href -> ""+href.getValue()+"").collect(Collectors.joining("
    ")) + + ""); + } + + doc.getElementsByTag("article").first().parent().prepend("
    "); + doc.getElementsByTag("article").first().parent().append(""); + + doc.getElementsByTag("div").forEach(tag -> { + if (tag.text().startsWith("This article is issued from Wikipedia")) { + tag.remove(); // we have our own + } + }); + doc.getAllElements().forEach(elem -> { + var classes = elem.classNames().stream().filter(this::isWikiClass).collect(Collectors.toList()); + classes.forEach(elem::removeClass); + elem.removeAttr("lang"); + elem.removeAttr("dir"); + elem.removeAttr("id"); + elem.removeAttr("role"); + elem.removeAttr("style"); + elem.removeAttr("tabindex"); + elem.removeAttr("aria-haspopup"); + elem.removeAttr("data-section-id"); + elem.removeAttr("aria-expanded"); + elem.removeAttr("aria-pressed"); + elem.removeAttr("open"); + elem.removeAttr("data-level"); + }); + + marginifyHeaders(doc); + + + doc.filter(new NodeFilter() { + @Override + public FilterResult head(Node node, int depth) { + if (node instanceof Comment) { + return FilterResult.REMOVE; + } + return FilterResult.CONTINUE; + } + + @Override + public FilterResult tail(Node node, int depth) { + if (node instanceof Comment) { + return FilterResult.REMOVE; + } + return FilterResult.CONTINUE; + } + }); + return doc.html(); + } + + @SneakyThrows + private void convertMathTag(Element math) { + + try { + var formula = math.getElementsByTag("math"); + var converter = net.sourceforge.jeuclid.converter.Converter.getInstance(); + var sos = new ByteArrayOutputStream(); + var alt = Optional.ofNullable(formula.attr("alttext")) + .or(() -> Optional.ofNullable(math.getElementsByTag("annotation").text())) + .orElse(""); + + var layoutContext = new LayoutContextImpl(LayoutContextImpl.getDefaultLayoutContext()); + + String parentTag = math.parent().tag().getName(); + boolean topLevel = "dd".equals(parentTag) || "div".equals(parentTag) + || (math.nextElementSibling() == null && math.previousElementSibling() == null); + + int mathSize = 16; + if (topLevel) + mathSize = 24; + if ("h1".equals(parentTag)) { + mathSize = 28; + } + if ("h2".equals(parentTag)) { + mathSize = 24; + } + if ("h3".equals(parentTag)) { + mathSize = 22; + } + layoutContext.setParameter(Parameter.MATHSIZE, mathSize); + + layoutContext.setParameter(Parameter.ANTIALIAS, true); + layoutContext.setParameter(Parameter.SCRIPTMINSIZE, 8); + layoutContext.setParameter(Parameter.FONTS_SERIF, "STIX Two Math"); + layoutContext.setParameter(Parameter.FONTS_SCRIPT, "STIX Two Math"); + layoutContext.setParameter(Parameter.DISPLAY, topLevel ? Display.BLOCK : Display.INLINE); + + converter.convert(MathMLParserSupport.parseString( + formula.html().replace(" ", " ")), sos, + "image/png", + layoutContext).toString(); + + math.tagName("img") + .text("") + .attr("src", "data:image/png;base64," + Base64.getEncoder().encodeToString(sos.toByteArray())) + .attr("alt", alt); + + } + catch (Exception ex) { + ex.printStackTrace(); + } + } + + private void removeEmptyTags(Document doc, String tag) { + doc.getElementsByTag(tag).forEach(elem -> { + if (elem.text().isBlank() && elem.getElementsByTag("img").isEmpty()) { + elem.replaceWith(new TextNode(" ")); + } + + }); + } + + @NotNull + private List> getWikiPageLinks(Document doc) { + List> topLinks = new ArrayList<>(); + Optional.ofNullable(doc.select("p a")).ifPresent(links -> links.forEach(atag -> { + String href = atag.attr("href"); + + if (href != null && !href.isBlank() + && !href.contains(":") + && !href.startsWith("#") + ) { + topLinks.add(Pair.of(href, atag.attr("title"))); + } + })); + return topLinks; + } + + + @NotNull + private List getWikiPageLinkText(Document doc) { + List topLinks = new ArrayList<>(); + + doc.select("p a,h1,h2,h3,h4,i,em,strong,b").forEach(e -> topLinks.add(e.text())); + + return topLinks; + } + + @NotNull + private List> getDisambiguationLinks(Document doc) { + List> disambig = new ArrayList<>(); + + + Optional.ofNullable(doc.getElementsByClass("hatnote")).ifPresent(hatnotes -> { + hatnotes.forEach(note -> { + Optional.ofNullable(note.getElementsByTag("a")) + .ifPresent(links -> links.forEach(atag -> { + String href = atag.attr("href"); + if (atag.hasClass("mw-disambig") && href != null) { + disambig.add(Pair.of(href, atag.attr("title"))); + } + })); + }); + }); + Optional.ofNullable(doc.getElementsByClass("hatnote")).ifPresent(Elements::remove); + return disambig; + } + + private void removeTag(Document doc, String... tags) { + for (String tag : tags) { + doc.getElementsByTag(tag).remove(); + } + } + private void removeByClass(Document doc, String... classes) { + for (String clas: classes) { + doc.getElementsByClass(clas).remove(); + } + } + private void removeIds(Document doc, String... ids) { + Arrays.stream(ids) + .map(doc::getElementById) + .filter(Objects::nonNull) + .forEach(Element::remove); + } + + private void marginifyHeaders(Document doc) { + Elements headers = doc.getElementsByTag("h4"); + if (headers.size() == 0) { + headers = doc.getElementsByTag("h3"); + } + headers.addClass("margin-note"); + } + + boolean isWikiClass(String clazz) { + if ("verb".equals(clazz)) { + return false; + } + if ("extern-link".equals(clazz)) { + return false; + } + if ("margin-note".equals(clazz)) { + return false; + } + return true; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/WikiSearchResult.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/WikiSearchResult.java new file mode 100644 index 00000000..f3e0f7ac --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/dict/WikiSearchResult.java @@ -0,0 +1,55 @@ +package nu.marginalia.wmsa.edge.assistant.dict; + +import lombok.AllArgsConstructor; + +import javax.annotation.Nullable; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.Optional; + +@AllArgsConstructor +public class WikiSearchResult { + private final String name; + @Nullable + private final String refName; + + public String getName() { + return name.replace('_', ' '); + } + @Nullable + public String getRefName() { + if (refName == null) + return null; + + return refName.replace('_', ' '); + } + + public String getUrl() { + return "https://encyclopedia.marginalia.nu/wiki/" + URLEncoder.encode(getRealName(), StandardCharsets.UTF_8); + } + + public String getRealName() { + return Optional.ofNullable(refName).orElse(name); + } + + public String getInternalName() { + return name; + } + + @Override + public int hashCode() { + return getRealName().hashCode(); + } + + public boolean equals(Object other) { + if (other == this) { + return true; + } + if (other instanceof WikiSearchResult) { + WikiSearchResult r = (WikiSearchResult) other; + return r.getRealName().equals(getRealName()); + } + return false; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/eval/MathParser.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/eval/MathParser.java new file mode 100644 index 00000000..6bb316c1 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/eval/MathParser.java @@ -0,0 +1,394 @@ +package nu.marginalia.wmsa.edge.assistant.eval; + +import lombok.AllArgsConstructor; +import lombok.SneakyThrows; +import lombok.ToString; + +import javax.inject.Singleton; +import java.math.RoundingMode; +import java.text.DecimalFormat; +import java.text.NumberFormat; +import java.text.ParseException; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.regex.Pattern; + +@Singleton +public class MathParser { + private final NumberFormat df; + static final Map constants = Map.of("e", Math.E, "pi", Math.PI, "2pi", 2*Math.PI); + + Predicate isTrivial = Pattern.compile("([0-9]+\\.[0-9]*|\\.[0-9]+)").asMatchPredicate(); + + public MathParser() { + df = DecimalFormat.getInstance(Locale.US); + df.setRoundingMode(RoundingMode.HALF_UP); + df.setMaximumFractionDigits(6); + } + + public String evalFormatted(String inputExpression) throws ParseException { + if (isTrivial.test(inputExpression)) { + return ""; + } + + return df.format(eval(inputExpression)); + } + + @SneakyThrows + public double eval(String inputExpression) { + if (isTrivial.test(inputExpression)) { + return Double.parseDouble(inputExpression); + } + + List tokens = tokenize(inputExpression); + + tokens = parenthesize(tokens); + tokens = negate(tokens); + tokens = functions(tokens); + tokens = binaryExpression(tokens, "^"); + tokens = binaryExpression(tokens, "*/"); + tokens = binaryExpression(tokens, "+-"); + + return new GroupExpression(' ', tokens).evaluate(); + } + + List negate(List tokens) { + if (tokens.isEmpty()) { + return tokens; + } + for (int i = 0; i < tokens.size(); i++) { + var t = tokens.get(i); + t.transform(this::negate); + } + + + for (int i = 0; i < tokens.size()-1;) { + var t = tokens.get(i); + + if (t.tokenType != '-') { + i++; + continue; + } + + if (i == 0) { + tokens.set(0, new UniExpression('~', tokens.get(1))); + tokens.remove(1); + continue; + } + + var t2 = tokens.get(i-1); + if ("+-%*/A".indexOf(t2.tokenType) >= 0) { + tokens.set(i, new UniExpression('~', tokens.get(i+1))); + tokens.remove(i+1); + continue; + } + + i++; + } + return tokens; + } + + List functions(List tokens) { + if (tokens.isEmpty()) { + return tokens; + } + + for (int i = 0; i < tokens.size(); i++) { + var t = tokens.get(i); + t.transform(this::functions); + } + + + for (int i = 0; i < tokens.size()-1;) { + var t = tokens.get(i); + + if (t.tokenType != 'A') { + i++; + continue; + } + + tokens.set(i, new BiExpression('F', tokens.get(i), tokens.get(i+1))); + tokens.remove(i+1); + } + return tokens; + } + + + List binaryExpression(List tokens, String operators) { + for (int i = 0; i < tokens.size(); i++) { + var t = tokens.get(i); + + t.transform(toks-> binaryExpression(toks, operators)); + } + + for (int i = 1; i < tokens.size()-1; i++) { + var t = tokens.get(i); + + if (operators.indexOf(t.tokenType) >= 0) { + Token newToken = new BiExpression(t.tokenType, tokens.get(i-1), tokens.get(i+1)); + tokens.set(i, newToken); + tokens.remove(i+1); + tokens.remove(i-1); + i = i-1; + } + + } + return tokens; + } + + List parenthesize(List tokens) { + int depth = 0; + for (int i = 0; i < tokens.size(); i++) { + var t = tokens.get(i); + if (t.tokenType == ')') { + throw new IllegalArgumentException("Unbalanced parentheses"); + } + if (t.tokenType == '(') { + int j; + for (j = i+1; j < tokens.size(); j++) { + var t2 = tokens.get(j); + if (t2.tokenType == '(') { + depth++; + } + else if (t2.tokenType == ')') { + if (depth == 0) { + break; + } + else { + depth--; + } + } + } + if (j == tokens.size()) { + throw new IllegalArgumentException("Unbalanced parentheses, depth = " + depth); + } + else { + var newToken = new GroupExpression(' ', parenthesize(new ArrayList<>(tokens.subList(i+1, j)))); + tokens.set(i, newToken); + tokens.subList(i+1, j+1).clear(); + } + } + } + return tokens; + } + + List tokenize(String inputExpression) throws ParseException { + List tokens = new ArrayList<>(); + + for (int i = 0; i < inputExpression.length(); i++) { + char c = inputExpression.charAt(i); + if ("()+-/*^".indexOf(c) >= 0) { + tokens.add(new Token(c)); + } + else if (Character.isDigit(c)) { + int j; + boolean hasPeriod = false; + for (j = i+1; j < inputExpression.length(); j++) { + char c2 = inputExpression.charAt(j); + if (Character.isDigit(c2)) { + continue; + } + if (c2 == '.') { + if (!hasPeriod) { + hasPeriod = true; + continue; + } + else { + throw new ParseException("Malformatted number in " + inputExpression, j); + } + } + break; + } + tokens.add(new StringToken('0', inputExpression.substring(i, j))); + i = j-1; + } + else if (Character.isAlphabetic(c)) { + int j; + for (j = i+1; j < inputExpression.length(); j++) { + char c2 = inputExpression.charAt(j); + if (Character.isAlphabetic(c2)) { + continue; + } + break; + } + var str = inputExpression.substring(i, j); + if (constants.containsKey(str)) { + tokens.add(new StringToken('C', str)); + } + else { + tokens.add(new StringToken('A', str)); + } + i = j-1; + } + else if(Character.isSpaceChar(c)) { + // + } + else { + throw new ParseException(inputExpression, i); + } + } + return tokens; + } +} + +@AllArgsConstructor @ToString +class Token { + public final char tokenType; + + public double evaluate() { + throw new IllegalArgumentException("Can't evaluate" + this); + } + + public void transform(Function, List> mapper) { + + } +} + +@ToString +class StringToken extends Token { + public final String value; + + public StringToken(char tokenType, String value) { + super(tokenType); + + this.value = value; + } + + public double evaluate() { + var cv = MathParser.constants.get(value); + if (cv != null) { + return cv; + } + + return Double.parseDouble(value); + } +} + +class UniExpression extends Token { + public final Token argument; + + public UniExpression(char tokenType, Token argument) { + super(tokenType); + + this.argument = argument; + } + + public String toString() { + return String.format("(%s %s)", tokenType, argument); + } + + @Override + public double evaluate() { + if (tokenType == '~') { + return -argument.evaluate(); + } + throw new IllegalArgumentException("Can't evaluate" + this); + } + + public void transform(Function, List> mapper) { + argument.transform(mapper); + } +} + +@ToString +class GroupExpression extends Token { + public List argument; + + public GroupExpression(char tokenType, List argument) { + super(tokenType); + + this.argument = argument; + } + + @Override + public double evaluate() { + if (argument.size() == 1) { + return argument.get(0).evaluate(); + } + throw new IllegalArgumentException("Can't evaluate" + this); + } + + public void transform(Function, List> mapper) { + argument = mapper.apply(argument); + } +} + + +class BiExpression extends Token { + public final Token left; + public final Token right; + + BiExpression(char tokenType, Token left, Token right) { + super(tokenType); + + this.left = left; + this.right = right; + } + + public String toString() { + return String.format("(%s %s %s)", tokenType, left, right); + } + + public void transform(Function, List> mapper) { + left.transform(mapper); + right.transform(mapper); + } + + @Override + public double evaluate() { + double rightVal = right.evaluate(); + switch (tokenType) { + case '+': + return left.evaluate() + rightVal; + case '-': + return left.evaluate() - rightVal; + case '*': + return left.evaluate() * rightVal; + case '/': { + if (rightVal == 0) { + return Double.NaN; + } + return left.evaluate() / rightVal; + } + case '%': + { + if (rightVal == 0) { + return Double.NaN; + } + return left.evaluate() % rightVal; + } + case '^': + return Math.pow(left.evaluate(), rightVal); + case 'F': + return evalFunction(rightVal); + default: + throw new IllegalArgumentException("Can't evaluate" + this); + } + } + + private double evalFunction(double rightVal) { + StringToken left2 = (StringToken) left; + switch (left2.value.toLowerCase()) { + case "sqrt": + return Math.sqrt(rightVal); + case "log": + return Math.log(rightVal); + case "log10": + return Math.log10(rightVal); + case "log2": + return Math.log(rightVal)/Math.log(2); + case "cos": + return Math.cos(rightVal); + case "sin": + return Math.sin(rightVal); + case "tan": + return Math.tan(rightVal); + default: + throw new IllegalArgumentException("Can't evaluate" + this); + } + } +} \ No newline at end of file diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/eval/Unit.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/eval/Unit.java new file mode 100644 index 00000000..e8da905e --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/eval/Unit.java @@ -0,0 +1,15 @@ +package nu.marginalia.wmsa.edge.assistant.eval; + +public class Unit { + + public final String name; + public final String type; + public final double baseValue; + + public Unit(String type, double value, String name) { + this.type = type; + this.name = name; + this.baseValue = value; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/eval/Units.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/eval/Units.java new file mode 100644 index 00000000..6a0d4be8 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/eval/Units.java @@ -0,0 +1,122 @@ +package nu.marginalia.wmsa.edge.assistant.eval; + +import com.opencsv.CSVReader; +import lombok.SneakyThrows; + +import javax.inject.Inject; +import javax.inject.Singleton; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.text.DecimalFormat; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; + +@Singleton +public class Units { + + private final Map unitsByName = new HashMap<>(); + private final MathParser mathParser; + + @SneakyThrows + @Inject + public Units(MathParser mathParser) { + this.mathParser = mathParser; + + var resource = Objects.requireNonNull(ClassLoader.getSystemResourceAsStream("units.csv"), + "Could not load IP location db"); + + try (var reader = new CSVReader(new InputStreamReader(resource, StandardCharsets.UTF_8))) { + for (;;) { + String[] vals = reader.readNext(); + if (vals == null) { + break; + } + + var unit = new Unit(vals[1], Double.parseDouble(vals[0]), vals[2]); + + for (int i = 2; i < vals.length; i++) { + unitsByName.put(vals[i].toLowerCase(), unit); + } + } + } + + } + + public Optional convert(String value, String fromUnitName, String toUnitName) { + var fromUnit = unitsByName.get(fromUnitName.toLowerCase()); + var toUnit = unitsByName.get(toUnitName.toLowerCase()); + + if (Objects.equals(fromUnit, toUnit)) { + return Optional.of(value + " " + fromUnit.name); + } + if (null == fromUnit || null == toUnit) { + return Optional.empty(); + } + + if (!Objects.equals(toUnit.type, fromUnit.type)) { + return Optional.empty(); + } + + double valNum; + try { + valNum = mathParser.eval(value); + } + catch (Exception ex) { + return Optional.empty(); + } + double convertedValue; + if ("TEMPERATURE".equals(fromUnit.type)) { + convertedValue = convertTemperature(valNum, fromUnit, toUnit); + } + else { + convertedValue = fromUnit.baseValue * valNum / toUnit.baseValue; + } + + boolean negative = convertedValue < 0; + if (negative) { + convertedValue = -convertedValue; + } + + long intFraction = (int) Math.log10(convertedValue); + + int sigFigs = countSigFigs(value); + var nf = new DecimalFormat(); + nf.setMaximumIntegerDigits(1 + (int) intFraction); + nf.setMaximumFractionDigits(1 + sigFigs - (int)intFraction); + return Optional.of((negative ? "-":"") + nf.format(convertedValue) + " " + toUnit.name); + } + + private double convertTemperature(double valNum, Unit fromUnit, Unit toUnit) { + if ("C".equals(fromUnit.name)) { + if ("K".equals(toUnit.name)) { + return valNum + 273.15; + } + else if ("F".equals(toUnit.name)) { + return 32. + 9*valNum/5; + } + } + else if ("F".equals(fromUnit.name)) { + if ("C".equals(toUnit.name)) { + return 5*(valNum - 32.)/9; + } + if ("K".equals(toUnit.name)) { + return 5*(valNum - 32.)/9 + 273.15; + } + } + else if ("K".equals(fromUnit.name)) { + if ("C".equals(toUnit.name)) { + return valNum - 273.15; + } + else if ("F".equals(toUnit.name)) { + return 32. + 9*(valNum-273.15)/5; + } + } + return 0; + } + + private int countSigFigs(String value) { + return (int) value.chars().filter(Character::isDigit).count(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/screenshot/ScreenshotService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/screenshot/ScreenshotService.java new file mode 100644 index 00000000..59a719b2 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/screenshot/ScreenshotService.java @@ -0,0 +1,132 @@ +package nu.marginalia.wmsa.edge.assistant.screenshot; + +import com.google.common.base.Strings; +import com.google.inject.Inject; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeId; +import spark.Request; +import spark.Response; +import spark.utils.IOUtils; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.NoSuchElementException; + +import static java.lang.Integer.parseInt; + +public class ScreenshotService { + + private final Path screenshotsRoot = Path.of("/var/lib/wmsa/archive/screenshots/screenshots/"); + private final Path screenshotsRootWebp = Path.of("/var/lib/wmsa/archive.fast/screenshots/"); + private final EdgeDataStoreDao edgeDataStoreDao; + private final long MIN_FILE_SIZE = 4096; + + @Inject + public ScreenshotService(EdgeDataStoreDao edgeDataStoreDao) { + this.edgeDataStoreDao = edgeDataStoreDao; + } + + public boolean hasScreenshot(EdgeId domainId) { + EdgeDomain domain = edgeDataStoreDao.getDomain(domainId); + + Path p = getScreenshotPath(screenshotsRootWebp, domain, ".webp"); + if (p == null) { + p = getScreenshotPath(screenshotsRoot, domain, ".png"); + } + + try { + return p != null && Files.size(p) >= MIN_FILE_SIZE; + } catch (IOException e) { + return false; + } + } + + @SneakyThrows + public Object serveScreenshotRequest(Request request, Response response) { + if (Strings.isNullOrEmpty(request.params("id"))) { + response.redirect("https://search.marginalia.nu/"); + return null; + } + + int id = parseInt(request.params("id")); + + Path p = null; + if (id == 0) { + p = screenshotsRootWebp.resolve("dummy-snapshot.webp"); + } else { + EdgeDomain domain; + try { + domain = edgeDataStoreDao.getDomain(new EdgeId<>(id)); + p = getScreenshotPath(screenshotsRootWebp, domain, ".webp"); + if (p == null) { + p = getScreenshotPath(screenshotsRoot, domain, ".png"); + } + + if (p != null && Files.size(p) <= MIN_FILE_SIZE) { + p = null; + } + } catch (NoSuchElementException ex) { + domain = new EdgeDomain("error.example.com"); + } + + if (p == null) { + response.type("image/svg+xml"); + + return String.format("\n" + + "\n" + + " \n" + + " \n" + + " Placeholder\n" + + " %s\n" + + " \n" + + "\n", domain); + } + } + response.status(200); + response.header("Cache-control", "public,max-age=3600"); + if (p.toString().endsWith("webp")) { + response.type("image/webp"); + } else { + response.type("image/png"); + } + IOUtils.copy(new ByteArrayInputStream(Files.readAllBytes(p)), response.raw().getOutputStream()); + return ""; + } + + private Path getScreenshotPath(Path root, EdgeDomain domain, String ending) { + + var p = root.resolve(domain.toString() + ending); + if (!p.normalize().startsWith(root)) { + return null; + } + + if (!Files.exists(p)) { + return null; + } + + return p; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/suggest/Suggestions.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/suggest/Suggestions.java new file mode 100644 index 00000000..69b3f7f1 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/suggest/Suggestions.java @@ -0,0 +1,145 @@ +package nu.marginalia.wmsa.edge.assistant.suggest; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.inject.Inject; +import com.google.inject.name.Named; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.assistant.dict.SpellChecker; +import org.apache.commons.collections4.trie.PatriciaTrie; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class Suggestions { + private final PatriciaTrie suggestionsTrie; + private final NGramDict nGramDict; + private final SpellChecker spellChecker; + + private static final Pattern suggestionPattern = Pattern.compile("^[a-zA-Z0-9]+( [a-zA-Z0-9]+)*$"); + private static final Logger logger = LoggerFactory.getLogger(Suggestions.class); + + private static final int MIN_SUGGEST_LENGTH = 3; + @Inject + public Suggestions(@Named("suggestions-file") Path suggestionsFile, + SpellChecker spellChecker, + NGramDict dict + ) { + this.spellChecker = spellChecker; + + suggestionsTrie = loadSuggestions(suggestionsFile); + nGramDict = dict; + + logger.info("Loaded {} suggestions", suggestionsTrie.size()); + } + + private static PatriciaTrie loadSuggestions(Path file) { + try (var lines = Files.lines(file)) { + var ret = new PatriciaTrie(); + + lines.filter(suggestionPattern.asPredicate()) + .filter(line -> line.length()<32) + .map(String::toLowerCase) + .forEach(w -> ret.put(w, w)); + + return ret; + } + catch (IOException ex) { + logger.error("Failed to load suggestions file", ex); + return new PatriciaTrie(); + } + } + + private record SuggestionStream(String prefix, Stream suggestionStream) { + public Stream stream() { + return suggestionStream.map(s -> prefix + s); + } + + } + + public List getSuggestions(int count, String searchWord) { + if (searchWord.length() < MIN_SUGGEST_LENGTH) { + return Collections.emptyList(); + } + + searchWord = trimLeading(searchWord.toLowerCase()); + + List streams = new ArrayList<>(4); + streams.add(new SuggestionStream("", getSuggestionsForKeyword(count, searchWord))); + + int sp = searchWord.lastIndexOf(' '); + if (sp >= 0) { + String prefixString = searchWord.substring(0, sp+1); + String suggestString = searchWord.substring(sp+1); + + if (suggestString.length() >= MIN_SUGGEST_LENGTH) { + streams.add(new SuggestionStream(prefixString, getSuggestionsForKeyword(count, suggestString))); + } + + } + streams.add(spellCheckStream(searchWord)); + + return streams.stream().flatMap(SuggestionStream::stream).limit(count).collect(Collectors.toList()); + } + + private SuggestionStream spellCheckStream(String word) { + int start = word.lastIndexOf(' '); + String prefix; + String corrWord; + + if (start < 0) { + corrWord = word; + prefix = ""; + } + else { + prefix = word.substring(0, start + 1); + corrWord = word.substring(start + 1); + } + + if (corrWord.length() >= MIN_SUGGEST_LENGTH) { + Supplier> suggestionsLazyEval = () -> spellChecker.correct(corrWord).stream(); + return new SuggestionStream(prefix, Stream.of(suggestionsLazyEval).flatMap(Supplier::get)); + } + else { + return new SuggestionStream("", Stream.empty()); + } + } + + private String trimLeading(String word) { + + for (int i = 0; i < word.length(); i++) { + if (!Character.isWhitespace(word.charAt(i))) + return word.substring(i); + } + + return ""; + } + + public Stream getSuggestionsForKeyword(int count, String prefix) { + var start = suggestionsTrie.select(prefix); + + if (!start.getKey().startsWith(prefix)) { + return Stream.empty(); + } + + Map scach = new HashMap<>(512); + Function valr = s -> -nGramDict.getTermFreqHash(scach.computeIfAbsent(s, NGramDict::getStringHash)); + + return Stream.iterate(start.getKey(), Objects::nonNull, suggestionsTrie::nextKey) + .takeWhile(s -> s.startsWith(prefix)) + .limit(256) + .sorted(Comparator.comparing(valr).thenComparing(String::length).thenComparing(Comparator.naturalOrder())) + .limit(count); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/ConvertedDomainReader.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/ConvertedDomainReader.java new file mode 100644 index 00000000..7c3621cb --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/ConvertedDomainReader.java @@ -0,0 +1,62 @@ +package nu.marginalia.wmsa.edge.converting; + +import com.github.luben.zstd.ZstdInputStream; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonParseException; +import com.google.gson.JsonSyntaxException; +import crawlercommons.utils.Strings; +import nu.marginalia.wmsa.edge.converting.interpreter.Instruction; +import nu.marginalia.wmsa.edge.converting.interpreter.InstructionTag; +import nu.marginalia.wmsa.edge.crawling.model.CrawledDomain; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.inject.Inject; +import java.io.*; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +public class ConvertedDomainReader { + private static final Logger logger = LoggerFactory.getLogger(ConvertedDomainReader.class); + private final Gson gson; + + @Inject + public ConvertedDomainReader(Gson gson) { + this.gson = gson; + } + + public List read(Path path, int cntHint) throws IOException { + List ret = new ArrayList<>(cntHint); + + try (var br = new BufferedReader(new InputStreamReader(new ZstdInputStream(new BufferedInputStream(new FileInputStream(path.toFile())))))) { + String line; + for (;;) { + line = br.readLine(); + + if (line == null) { + break; + } + if (Strings.isBlank(line)) { + continue; + } + var parts= line.split(" ", 2); + var type = InstructionTag.valueOf(parts[0]).clazz; + + try { + ret.add(gson.fromJson(parts[1], type)); + } + catch (JsonParseException ex) { + logger.warn("Failed to deserialize {} {}", type.getSimpleName(), StringUtils.abbreviate(parts[1], 255)); + logger.warn("Json error", ex); + } + } + } + + return ret; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/ConverterMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/ConverterMain.java new file mode 100644 index 00000000..84a5d2f0 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/ConverterMain.java @@ -0,0 +1,137 @@ +package nu.marginalia.wmsa.edge.converting; + +import com.google.gson.*; +import com.google.inject.Guice; +import com.google.inject.Inject; +import com.google.inject.Injector; +import nu.marginalia.wmsa.edge.converting.interpreter.Instruction; +import nu.marginalia.wmsa.edge.converting.processor.DomainProcessor; +import nu.marginalia.wmsa.edge.converting.processor.InstructionsCompiler; +import nu.marginalia.wmsa.edge.crawling.CrawlPlanLoader; +import nu.marginalia.wmsa.edge.crawling.CrawledDomainReader; +import nu.marginalia.wmsa.edge.crawling.WorkLog; +import nu.marginalia.wmsa.edge.crawling.CrawlerSpecificationLoader; +import nu.marginalia.wmsa.edge.crawling.model.CrawledDomain; +import nu.marginalia.util.ParallelPipe; +import nu.marginalia.wmsa.edge.model.EdgeCrawlPlan; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class ConverterMain { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final DomainProcessor processor; + private final InstructionsCompiler compiler; + private final WorkLog processLog; + private final CrawledInstructionWriter instructionWriter; + + private Gson gson; + private final CrawledDomainReader reader = new CrawledDomainReader(); + + private final Map domainToId = new HashMap<>(); + private final Map idToFileName = new HashMap<>(); + + public static void main(String... args) throws IOException { + + if (args.length != 1) { + System.err.println("Arguments: crawl-plan.yaml"); + System.exit(0); + } + var plan = new CrawlPlanLoader().load(Path.of(args[0])); + + Injector injector = Guice.createInjector( + new ConverterModule(plan) + ); + + injector.getInstance(ConverterMain.class); + } + + private static void requireArgs(String[] args, String... help) { + if (args.length != help.length) { + System.out.println("Usage: " + String.join(", ", help)); + System.exit(255); + } + } + + @Inject + public ConverterMain( + EdgeCrawlPlan plan, + DomainProcessor processor, + InstructionsCompiler compiler, + Gson gson + ) throws Exception { + this.processor = processor; + this.compiler = compiler; + this.gson = gson; + + instructionWriter = new CrawledInstructionWriter(plan.process.getDir(), gson); + + logger.info("Loading input spec"); + CrawlerSpecificationLoader.readInputSpec(plan.getJobSpec(), + spec -> domainToId.put(spec.domain, spec.id)); + + logger.info("Replaying crawl log"); + WorkLog.readLog(plan.crawl.getLogFile(), + entry -> idToFileName.put(entry.id(), entry.path())); + + logger.info("Starting pipe"); + processLog = new WorkLog(plan.process.getLogFile()); + + + var pipe = new ParallelPipe("Crawler", 48, 4, 2) { + @Override + protected ProcessingInstructions onProcess(CrawledDomain domainData) throws Exception { + var processed = processor.process(domainData); + return new ProcessingInstructions(domainData.id, compiler.compile(processed)); + } + + @Override + protected void onReceive(ProcessingInstructions processedInstructions) throws IOException { + var instructions = processedInstructions.instructions; + instructions.removeIf(Instruction::isNoOp); + + String where = instructionWriter.accept(processedInstructions.id, instructions); + processLog.setJobToFinished(processedInstructions.id, where, instructions.size()); + } + }; + + domainToId.forEach((domain, id) -> { + String fileName = idToFileName.get(id); + Path dest = getFilePath(plan.crawl.getDir(), fileName); + logger.info("{} - {} - {}", domain, id, dest); + + if (!processLog.isJobFinished(id)) { + try { + var cd = reader.read(dest); + pipe.accept(cd); + + } catch (IOException e) { + logger.error("Failed to read {}", dest); + } + } + }); + + pipe.join(); + + processLog.close(); + + logger.info("Finished"); + + System.exit(0); + } + + record ProcessingInstructions(String id, List instructions) {} + + private Path getFilePath(Path dir, String fileName) { + String sp1 = fileName.substring(0, 2); + String sp2 = fileName.substring(2, 4); + return dir.resolve(sp1).resolve(sp2).resolve(fileName); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/ConverterModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/ConverterModule.java new file mode 100644 index 00000000..bd003030 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/ConverterModule.java @@ -0,0 +1,60 @@ +package nu.marginalia.wmsa.edge.converting; + +import com.google.gson.*; +import com.google.inject.AbstractModule; +import com.google.inject.name.Names; +import marcono1234.gson.recordadapter.RecordTypeAdapterFactory; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import nu.marginalia.wmsa.edge.model.EdgeCrawlPlan; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeUrl; + +import java.net.URISyntaxException; +import java.nio.file.Path; + +public class ConverterModule extends AbstractModule { + + private final EdgeCrawlPlan plan; + + public ConverterModule(EdgeCrawlPlan plan) { + this.plan = plan; + } + + public void configure() { + bind(EdgeCrawlPlan.class).toInstance(plan); + + bind(Gson.class).toInstance(createGson()); + + bind(Double.class).annotatedWith(Names.named("min-document-quality")).toInstance(-15.); + bind(Integer.class).annotatedWith(Names.named("min-document-length")).toInstance(100); + bind(Integer.class).annotatedWith(Names.named("max-title-length")).toInstance(128); + bind(Integer.class).annotatedWith(Names.named("max-summary-length")).toInstance(255); + + bind(LanguageModels.class).toInstance(new LanguageModels( + Path.of("/var/lib/wmsa/model/ngrams-generous-emstr.bin"), + Path.of("/var/lib/wmsa/model/tfreq-new-algo3.bin"), + Path.of("/var/lib/wmsa/model/opennlp-sentence.bin"), + Path.of("/var/lib/wmsa/model/English.RDR"), + Path.of("/var/lib/wmsa/model/English.DICT"), + Path.of("/var/lib/wmsa/model/opennlp-tok.bin") + )); + } + + private Gson createGson() { + + return new GsonBuilder() + .registerTypeAdapter(EdgeUrl.class, (JsonSerializer) (src, typeOfSrc, context) -> new JsonPrimitive(src.toString())) + .registerTypeAdapter(EdgeDomain.class, (JsonSerializer) (src, typeOfSrc, context) -> new JsonPrimitive(src.toString())) + .registerTypeAdapter(EdgeUrl.class, (JsonDeserializer) (json, typeOfT, context) -> { + try { + return new EdgeUrl(json.getAsString()); + } catch (URISyntaxException e) { + throw new JsonParseException("URL Parse Exception", e); + } + }) + .registerTypeAdapter(EdgeDomain.class, (JsonDeserializer) (json, typeOfT, context) -> new EdgeDomain(json.getAsString())) + .registerTypeAdapterFactory(RecordTypeAdapterFactory.builder().allowMissingComponentValues().create()) + .create(); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/CrawledInstructionWriter.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/CrawledInstructionWriter.java new file mode 100644 index 00000000..e269528a --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/CrawledInstructionWriter.java @@ -0,0 +1,64 @@ +package nu.marginalia.wmsa.edge.converting; + +import com.github.luben.zstd.ZstdOutputStream; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import nu.marginalia.wmsa.edge.converting.interpreter.Instruction; +import nu.marginalia.wmsa.edge.crawling.model.CrawledDomain; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedOutputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +public class CrawledInstructionWriter { + private final Path outputDir; + private Gson gson; + private static final Logger logger = LoggerFactory.getLogger(CrawledInstructionWriter.class); + + public CrawledInstructionWriter(Path outputDir, Gson gson) { + this.outputDir = outputDir; + this.gson = gson; + + if (!Files.isDirectory(outputDir)) { + throw new IllegalArgumentException("Output dir " + outputDir + " does not exist"); + } + } + + public String accept(String id, List instructionList) throws IOException { + Path outputFile = getOutputFile(id); + + if (Files.exists(outputFile)) { + Files.delete(outputFile); + } + + try (var outputStream = new OutputStreamWriter(new ZstdOutputStream(new BufferedOutputStream(new FileOutputStream(outputFile.toFile()))))) { + logger.info("Writing {} - {}", id, instructionList.size()); + + for (var instr : instructionList) { + outputStream.append(instr.tag().name()); + outputStream.append(' '); + gson.toJson(instr, outputStream); + outputStream.append('\n'); + } + } + + return outputFile.getFileName().toString(); + } + + private Path getOutputFile(String id) throws IOException { + String first = id.substring(0, 2); + String second = id.substring(2, 4); + + Path destDir = outputDir.resolve(first).resolve(second); + if (!Files.exists(destDir)) { + Files.createDirectories(destDir); + } + return destDir.resolve(id + ".pzstd"); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/LoaderMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/LoaderMain.java new file mode 100644 index 00000000..95a9fbb4 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/LoaderMain.java @@ -0,0 +1,140 @@ +package nu.marginalia.wmsa.edge.converting; + +import com.google.inject.Guice; +import com.google.inject.Inject; +import com.google.inject.Injector; +import com.zaxxer.hikari.HikariDataSource; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.edge.converting.interpreter.Instruction; +import nu.marginalia.wmsa.edge.converting.loader.Loader; +import nu.marginalia.wmsa.edge.converting.loader.LoaderFactory; +import nu.marginalia.wmsa.edge.crawling.CrawlPlanLoader; +import nu.marginalia.wmsa.edge.crawling.WorkLog; +import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient; +import nu.marginalia.wmsa.edge.model.EdgeCrawlPlan; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.List; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +public class LoaderMain { + + private final Path processDir; + private EdgeCrawlPlan plan; + private final ConvertedDomainReader instructionsReader; + private final HikariDataSource dataSource; + + private static final Logger logger = LoggerFactory.getLogger(LoaderMain.class); + private final LoaderFactory loaderFactory; + private EdgeIndexClient indexClient; + private volatile boolean running = true; + + Thread processorThread = new Thread(this::processor, "Processor Thread"); + + public static void main(String... args) throws IOException { + if (args.length != 1) { + System.err.println("Arguments: crawl-plan.yaml"); + System.exit(0); + } + var plan = new CrawlPlanLoader().load(Path.of(args[0])); + + Injector injector = Guice.createInjector( + new ConverterModule(plan), + new DatabaseModule() + ); + + var instance = injector.getInstance(LoaderMain.class); + instance.run(); + } + + @Inject + public LoaderMain(EdgeCrawlPlan plan, + ConvertedDomainReader instructionsReader, + HikariDataSource dataSource, + LoaderFactory loaderFactory, + EdgeIndexClient indexClient) { + + this.processDir = plan.process.getDir(); + this.plan = plan; + this.instructionsReader = instructionsReader; + this.dataSource = dataSource; + this.loaderFactory = loaderFactory; + this.indexClient = indexClient; + + processorThread.start(); + } + + @SneakyThrows + public void run() { + var logFile = plan.process.getLogFile(); + + AtomicInteger loadTotal = new AtomicInteger(); + WorkLog.readLog(logFile, entry -> { loadTotal.incrementAndGet(); }); + LoaderMain.loadTotal = loadTotal.get(); + + WorkLog.readLog(logFile, entry -> { + load(entry.path(), entry.cnt()); + }); + + processorThread.join(); + indexClient.close(); + } + + private volatile static int loadTotal; + private volatile static int loaded = 0; + + private void load(String path, int cnt) { + String first = path.substring(0, 2); + String second = path.substring(2, 4); + Path destDir = processDir.resolve(first).resolve(second).resolve(path); + + + + try { + var loader = loaderFactory.create(cnt); + var instructions = instructionsReader.read(destDir, cnt); + processQueue.put(new LoadJob(path, loader, instructions)); + } catch (Exception e) { + logger.error("Failed to load " + destDir, e); + } + } + + static TaskStats taskStats = new TaskStats(100); + + private record LoadJob(String path, Loader loader, List instructionList) { + public void run() { + long startTime = System.currentTimeMillis(); + for (var i : instructionList) { + i.apply(loader); + } + + loader.finish(); + long loadTime = System.currentTimeMillis() - startTime; + taskStats.observe(loadTime); + logger.info("Loaded {}/{} : {} ({}) {}ms {} l/s", taskStats.getCount(), loadTotal, path, loader.data.sizeHint, loadTime, taskStats.avgTime()); + } + + }; + private static final LinkedBlockingQueue processQueue = new LinkedBlockingQueue<>(2); + + private void processor() { + try { + while (running || !processQueue.isEmpty()) { + LoadJob job = processQueue.poll(1, TimeUnit.SECONDS); + + if (job != null) { + job.run(); + } + } + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/TaskStats.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/TaskStats.java new file mode 100644 index 00000000..7c0384bb --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/TaskStats.java @@ -0,0 +1,37 @@ +package nu.marginalia.wmsa.edge.converting; + +public class TaskStats { + private final long[] taskTimes; + private int count = 0; + private long total = 0; + + public TaskStats(int windowSize) { + taskTimes = new long[windowSize]; + } + + public synchronized void observe(long time) { + taskTimes[count++%taskTimes.length] = time; + total += time; + } + + public double avgTime() { + long tts = 0; + long tot; + + if (count < taskTimes.length) tot = count; + else tot = taskTimes.length; + + for (int i = 0; i < tot; i++) tts += taskTimes[i]; + + return (tot * 10_000L / tts)/10.; + } + + public double totalTime() { + return total; + } + + public int getCount() { + return count; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/Instruction.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/Instruction.java new file mode 100644 index 00000000..7f40edf6 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/Instruction.java @@ -0,0 +1,8 @@ +package nu.marginalia.wmsa.edge.converting.interpreter; + +public interface Instruction { + void apply(Interpreter interpreter); + boolean isNoOp(); + + InstructionTag tag(); +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/InstructionTag.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/InstructionTag.java new file mode 100644 index 00000000..398ad430 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/InstructionTag.java @@ -0,0 +1,23 @@ +package nu.marginalia.wmsa.edge.converting.interpreter; + +import nu.marginalia.wmsa.edge.converting.interpreter.instruction.*; + +public enum InstructionTag { + + DOMAIN(LoadDomain.class), + URL(LoadUrl.class), + LINK(LoadDomainLink.class), + REDIRECT(LoadDomainRedirect.class), + WORDS(LoadKeywords.class), + PROC_DOCUMENT(LoadProcessedDocument.class), + PROC_DOCUMENT_ERR(LoadProcessedDocumentWithError.class), + PROC_DOMAIN(LoadProcessedDomain.class), + RSS(LoadRssFeed.class); + + public final Class clazz; + + InstructionTag(Class clazz) { + this.clazz = clazz; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/Interpreter.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/Interpreter.java new file mode 100644 index 00000000..1d9d13a8 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/Interpreter.java @@ -0,0 +1,28 @@ +package nu.marginalia.wmsa.edge.converting.interpreter; + +import nu.marginalia.wmsa.edge.converting.interpreter.instruction.DocumentKeywords; +import nu.marginalia.wmsa.edge.converting.interpreter.instruction.DomainLink; +import nu.marginalia.wmsa.edge.converting.interpreter.instruction.LoadProcessedDocument; +import nu.marginalia.wmsa.edge.converting.interpreter.instruction.LoadProcessedDocumentWithError; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import nu.marginalia.wmsa.edge.model.crawl.EdgeHtmlStandard; +import nu.marginalia.wmsa.edge.model.crawl.EdgePageWordSet; +import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlState; + +public interface Interpreter { + void loadUrl(EdgeUrl[] url); + void loadDomain(EdgeDomain[] domain); + void loadRssFeed(EdgeUrl[] rssFeed); + void loadDomainLink(DomainLink[] links); + + void loadProcessedDomain(EdgeDomain domain, EdgeDomainIndexingState state, double quality); + void loadProcessedDocument(LoadProcessedDocument loadProcessedDocument); + void loadProcessedDocumentWithError(LoadProcessedDocumentWithError loadProcessedDocumentWithError); + + void loadKeywords(EdgeUrl url, DocumentKeywords[] words); + + void loadDomainRedirect(DomainLink link); +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/DocumentKeywords.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/DocumentKeywords.java new file mode 100644 index 00000000..e9d2471f --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/DocumentKeywords.java @@ -0,0 +1,17 @@ +package nu.marginalia.wmsa.edge.converting.interpreter.instruction; + +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.model.crawl.EdgePageWords; + +import java.util.Arrays; + +public record DocumentKeywords(IndexBlock block, String... keywords) { + public DocumentKeywords(EdgePageWords words) { + this(words.block, words.words.toArray(String[]::new)); + } + + @Override + public String toString() { + return getClass().getSimpleName()+"["+block +", "+Arrays.toString(keywords)+"]"; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/DomainLink.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/DomainLink.java new file mode 100644 index 00000000..338e345c --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/DomainLink.java @@ -0,0 +1,6 @@ +package nu.marginalia.wmsa.edge.converting.interpreter.instruction; + +import nu.marginalia.wmsa.edge.model.EdgeDomain; + +public record DomainLink(EdgeDomain from, EdgeDomain to) { +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadDomain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadDomain.java new file mode 100644 index 00000000..7cf88b06 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadDomain.java @@ -0,0 +1,31 @@ +package nu.marginalia.wmsa.edge.converting.interpreter.instruction; + +import nu.marginalia.wmsa.edge.converting.interpreter.Instruction; +import nu.marginalia.wmsa.edge.converting.interpreter.InstructionTag; +import nu.marginalia.wmsa.edge.converting.interpreter.Interpreter; +import nu.marginalia.wmsa.edge.model.EdgeDomain; + +import java.util.Arrays; + +public record LoadDomain(EdgeDomain... domain) implements Instruction { + + @Override + public void apply(Interpreter interpreter) { + interpreter.loadDomain(domain); + } + + @Override + public boolean isNoOp() { + return domain.length == 0; + } + + @Override + public InstructionTag tag() { + return InstructionTag.DOMAIN; + } + + @Override + public String toString() { + return getClass().getSimpleName()+"["+Arrays.toString(domain)+"]"; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadDomainLink.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadDomainLink.java new file mode 100644 index 00000000..2d302ddf --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadDomainLink.java @@ -0,0 +1,31 @@ +package nu.marginalia.wmsa.edge.converting.interpreter.instruction; + +import nu.marginalia.wmsa.edge.converting.interpreter.Instruction; +import nu.marginalia.wmsa.edge.converting.interpreter.InstructionTag; +import nu.marginalia.wmsa.edge.converting.interpreter.Interpreter; + +import java.util.Arrays; + +public record LoadDomainLink(DomainLink... links) implements Instruction { + + @Override + public void apply(Interpreter interpreter) { + interpreter.loadDomainLink(links); + } + + @Override + public String toString() { + return getClass().getSimpleName()+"["+ Arrays.toString(links)+"]"; + } + + @Override + public InstructionTag tag() { + return InstructionTag.LINK; + } + + @Override + public boolean isNoOp() { + return links.length == 0; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadDomainRedirect.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadDomainRedirect.java new file mode 100644 index 00000000..742ad5cb --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadDomainRedirect.java @@ -0,0 +1,31 @@ +package nu.marginalia.wmsa.edge.converting.interpreter.instruction; + +import nu.marginalia.wmsa.edge.converting.interpreter.Instruction; +import nu.marginalia.wmsa.edge.converting.interpreter.InstructionTag; +import nu.marginalia.wmsa.edge.converting.interpreter.Interpreter; + +import java.util.Arrays; + +public record LoadDomainRedirect(DomainLink links) implements Instruction { + + @Override + public void apply(Interpreter interpreter) { + interpreter.loadDomainRedirect(links); + } + + @Override + public String toString() { + return getClass().getSimpleName()+"["+ links+"]"; + } + + @Override + public InstructionTag tag() { + return InstructionTag.REDIRECT; + } + + @Override + public boolean isNoOp() { + return false; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadKeywords.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadKeywords.java new file mode 100644 index 00000000..7f12bf67 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadKeywords.java @@ -0,0 +1,32 @@ +package nu.marginalia.wmsa.edge.converting.interpreter.instruction; + +import nu.marginalia.wmsa.edge.converting.interpreter.Instruction; +import nu.marginalia.wmsa.edge.converting.interpreter.InstructionTag; +import nu.marginalia.wmsa.edge.converting.interpreter.Interpreter; +import nu.marginalia.wmsa.edge.model.EdgeUrl; + +import java.util.Arrays; + +public record LoadKeywords(EdgeUrl url, DocumentKeywords... words) implements Instruction { + + @Override + public void apply(Interpreter interpreter) { + interpreter.loadKeywords(url, words); + } + + @Override + public boolean isNoOp() { + return words.length == 0; + } + + @Override + public InstructionTag tag() { + return InstructionTag.WORDS; + } + + @Override + public String toString() { + return getClass().getSimpleName()+"["+ Arrays.toString(words)+"]"; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadProcessedDocument.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadProcessedDocument.java new file mode 100644 index 00000000..9a35c58b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadProcessedDocument.java @@ -0,0 +1,35 @@ +package nu.marginalia.wmsa.edge.converting.interpreter.instruction; + +import nu.marginalia.wmsa.edge.converting.interpreter.Instruction; +import nu.marginalia.wmsa.edge.converting.interpreter.InstructionTag; +import nu.marginalia.wmsa.edge.converting.interpreter.Interpreter; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import nu.marginalia.wmsa.edge.model.crawl.EdgeHtmlStandard; +import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlState; + + +public record LoadProcessedDocument(EdgeUrl url, + EdgeUrlState state, + String title, + String description, + int htmlFeatures, + EdgeHtmlStandard standard, + int length, + long hash, + double quality) implements Instruction +{ + @Override + public void apply(Interpreter interpreter) { + interpreter.loadProcessedDocument(this); + } + + @Override + public InstructionTag tag() { + return InstructionTag.PROC_DOCUMENT; + } + + @Override + public boolean isNoOp() { + return false; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadProcessedDocumentWithError.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadProcessedDocumentWithError.java new file mode 100644 index 00000000..28d96989 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadProcessedDocumentWithError.java @@ -0,0 +1,28 @@ +package nu.marginalia.wmsa.edge.converting.interpreter.instruction; + +import nu.marginalia.wmsa.edge.converting.interpreter.Instruction; +import nu.marginalia.wmsa.edge.converting.interpreter.InstructionTag; +import nu.marginalia.wmsa.edge.converting.interpreter.Interpreter; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlState; + + +public record LoadProcessedDocumentWithError(EdgeUrl url, + EdgeUrlState state) implements Instruction +{ + @Override + public void apply(Interpreter interpreter) { + interpreter.loadProcessedDocumentWithError(this); + } + + @Override + public InstructionTag tag() { + return InstructionTag.PROC_DOCUMENT_ERR; + } + + @Override + public boolean isNoOp() { + return false; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadProcessedDomain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadProcessedDomain.java new file mode 100644 index 00000000..065d6211 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadProcessedDomain.java @@ -0,0 +1,26 @@ +package nu.marginalia.wmsa.edge.converting.interpreter.instruction; + +import nu.marginalia.wmsa.edge.converting.interpreter.Instruction; +import nu.marginalia.wmsa.edge.converting.interpreter.InstructionTag; +import nu.marginalia.wmsa.edge.converting.interpreter.Interpreter; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; + +public record LoadProcessedDomain(EdgeDomain domain, EdgeDomainIndexingState state, double quality) implements Instruction { + + @Override + public void apply(Interpreter interpreter) { + interpreter.loadProcessedDomain(domain, state, quality); + } + + @Override + public InstructionTag tag() { + return InstructionTag.PROC_DOMAIN; + } + + @Override + public boolean isNoOp() { + return false; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadRssFeed.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadRssFeed.java new file mode 100644 index 00000000..d4dbe0eb --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadRssFeed.java @@ -0,0 +1,32 @@ +package nu.marginalia.wmsa.edge.converting.interpreter.instruction; + +import nu.marginalia.wmsa.edge.converting.interpreter.Instruction; +import nu.marginalia.wmsa.edge.converting.interpreter.InstructionTag; +import nu.marginalia.wmsa.edge.converting.interpreter.Interpreter; +import nu.marginalia.wmsa.edge.model.EdgeUrl; + +import java.util.Arrays; + +public record LoadRssFeed(EdgeUrl... feeds) implements Instruction { + + @Override + public void apply(Interpreter interpreter) { + interpreter.loadRssFeed(feeds); + } + + @Override + public String toString() { + return getClass().getSimpleName()+"["+ Arrays.toString(feeds)+"]"; + } + + @Override + public InstructionTag tag() { + return InstructionTag.RSS; + } + + @Override + public boolean isNoOp() { + return feeds.length == 0; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadUrl.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadUrl.java new file mode 100644 index 00000000..50c2b34c --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/interpreter/instruction/LoadUrl.java @@ -0,0 +1,31 @@ +package nu.marginalia.wmsa.edge.converting.interpreter.instruction; + +import nu.marginalia.wmsa.edge.converting.interpreter.Instruction; +import nu.marginalia.wmsa.edge.converting.interpreter.InstructionTag; +import nu.marginalia.wmsa.edge.converting.interpreter.Interpreter; +import nu.marginalia.wmsa.edge.model.EdgeUrl; + +import java.util.Arrays; + +public record LoadUrl(EdgeUrl... url) implements Instruction { + + @Override + public void apply(Interpreter interpreter) { + interpreter.loadUrl(url); + } + + @Override + public String toString() { + return getClass().getSimpleName()+"["+ Arrays.toString(url)+"]"; + } + + @Override + public InstructionTag tag() { + return InstructionTag.URL; + } + + @Override + public boolean isNoOp() { + return url.length == 0; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/IndexLoadKeywords.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/IndexLoadKeywords.java new file mode 100644 index 00000000..491af2de --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/IndexLoadKeywords.java @@ -0,0 +1,64 @@ +package nu.marginalia.wmsa.edge.converting.loader; + +import com.google.inject.Inject; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.edge.converting.interpreter.instruction.DocumentKeywords; +import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient; +import nu.marginalia.wmsa.edge.model.EdgeId; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import nu.marginalia.wmsa.edge.model.crawl.EdgePageWordSet; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Arrays; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; + +public class IndexLoadKeywords implements Runnable { + private EdgeIndexClient client; + private static final Logger logger = LoggerFactory.getLogger(IndexLoadKeywords.class); + private final LinkedBlockingQueue insertQueue = new LinkedBlockingQueue<>(32); + + private record InsertTask(int urlId, int domainId, EdgePageWordSet wordSet) {}; + private final Thread runThread; + private volatile boolean canceled = false; + + @Inject + public IndexLoadKeywords(EdgeIndexClient client) { + this.client = client; + runThread = new Thread(this, getClass().getSimpleName()); + runThread.start(); + } + + @SneakyThrows + public void run() { + while (!canceled) { + var data = insertQueue.poll(1, TimeUnit.SECONDS); + if (data != null) { + client.putWords(Context.internal(), new EdgeId<>(data.domainId), new EdgeId<>(data.urlId), -5., data.wordSet, 1).blockingSubscribe(); + } + } + } + + public void close() throws InterruptedException { + canceled = true; + runThread.join(); + } + + public void load(LoaderData loaderData, EdgeUrl url, DocumentKeywords[] words) throws InterruptedException { + int domainId = loaderData.getDomainId(url.domain); + int urlId = loaderData.getUrlId(url); + + if (urlId < 0 || domainId < 0) { + logger.warn("Failed to get IDs for {} -- d={},u={}", url, domainId, urlId); + } + + var ws = new EdgePageWordSet(); + for (var doc : words) { + ws.append(doc.block(), Arrays.asList(doc.keywords())); + } + + insertQueue.put(new InsertTask(urlId, domainId, ws)); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/Loader.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/Loader.java new file mode 100644 index 00000000..bdd0a5b3 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/Loader.java @@ -0,0 +1,115 @@ +package nu.marginalia.wmsa.edge.converting.loader; + +import io.reactivex.rxjava3.core.Observable; +import nu.marginalia.wmsa.edge.converting.interpreter.Interpreter; +import nu.marginalia.wmsa.edge.converting.interpreter.instruction.DocumentKeywords; +import nu.marginalia.wmsa.edge.converting.interpreter.instruction.DomainLink; +import nu.marginalia.wmsa.edge.converting.interpreter.instruction.LoadProcessedDocument; +import nu.marginalia.wmsa.edge.converting.interpreter.instruction.LoadProcessedDocumentWithError; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; + +public class Loader implements Interpreter { + private final SqlLoadUrls sqlLoadUrls; + private final SqlLoadDomains sqlLoadDomains; + private final SqlLoadDomainLinks sqlLoadDomainLinks; + private final SqlLoadProcessedDomain sqlLoadProcessedDomain; + private final SqlLoadProcessedDocument sqlLoadProcessedDocument; + private final IndexLoadKeywords indexLoadKeywords; + + private static final Logger logger = LoggerFactory.getLogger(Loader.class); + + private final List processedDocumentList; + private final List processedDocumentWithErrorList; + + public final LoaderData data; + + public Loader(int sizeHint, + SqlLoadUrls sqlLoadUrls, + SqlLoadDomains sqlLoadDomains, + SqlLoadDomainLinks sqlLoadDomainLinks, + SqlLoadProcessedDomain sqlLoadProcessedDomain, + SqlLoadProcessedDocument sqlLoadProcessedDocument, + IndexLoadKeywords indexLoadKeywords) + { + data = new LoaderData(sizeHint); + + this.sqlLoadUrls = sqlLoadUrls; + this.sqlLoadDomains = sqlLoadDomains; + this.sqlLoadDomainLinks = sqlLoadDomainLinks; + this.sqlLoadProcessedDomain = sqlLoadProcessedDomain; + this.sqlLoadProcessedDocument = sqlLoadProcessedDocument; + this.indexLoadKeywords = indexLoadKeywords; + + processedDocumentList = new ArrayList<>(sizeHint); + processedDocumentWithErrorList = new ArrayList<>(sizeHint); + } + + + @Override + public void loadUrl(EdgeUrl[] urls) { + logger.debug("loadUrl({})", urls, null); + + sqlLoadUrls.load(data, urls); + } + + @Override + public void loadDomain(EdgeDomain[] domains) { + logger.debug("loadDomain({})", domains, null); + sqlLoadDomains.load(data, domains); + } + + @Override + public void loadRssFeed(EdgeUrl[] rssFeed) { + logger.debug("loadRssFeed({})", rssFeed, null); + } + + @Override + public void loadDomainLink(DomainLink[] links) { + logger.debug("loadDomainLink({})", links, null); + sqlLoadDomainLinks.load(links); + } + + @Override + public void loadProcessedDomain(EdgeDomain domain, EdgeDomainIndexingState state, double quality) { + logger.debug("loadProcessedDomain({}, {}, {})", domain, state, quality); + sqlLoadProcessedDomain.load(data, domain, state, quality); + } + + @Override + public void loadProcessedDocument(LoadProcessedDocument document) { + processedDocumentList.add(document); + } + + @Override + public void loadProcessedDocumentWithError(LoadProcessedDocumentWithError document) { + processedDocumentWithErrorList.add(document); + } + + @Override + public void loadKeywords(EdgeUrl url, DocumentKeywords[] words) { + logger.debug("loadKeywords(#{})", words.length); + try { + indexLoadKeywords.load(data, url, words); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + + @Override + public void loadDomainRedirect(DomainLink link) { + logger.debug("loadDomainRedirect({})", link); + sqlLoadProcessedDomain.loadAlias(data, link); + } + + public void finish() { + sqlLoadProcessedDocument.load(data, processedDocumentList); + sqlLoadProcessedDocument.loadWithError(data, processedDocumentWithErrorList); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/LoaderData.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/LoaderData.java new file mode 100644 index 00000000..5c9dc4a1 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/LoaderData.java @@ -0,0 +1,43 @@ +package nu.marginalia.wmsa.edge.converting.loader; + +import gnu.trove.map.hash.TObjectIntHashMap; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeUrl; + +public class LoaderData { + + private final TObjectIntHashMap urlIds; + private final TObjectIntHashMap domainIds; + private EdgeDomain targetDomain; + public final int sizeHint; + + public LoaderData(int sizeHint) { + urlIds = new TObjectIntHashMap<>(sizeHint+1); + domainIds = new TObjectIntHashMap<>(10); + this.sizeHint = sizeHint; + } + + public void setTargetDomain(EdgeDomain domain) { + this.targetDomain = domain; + } + public EdgeDomain getTargetDomain() { + return targetDomain; + } + + + public void addDomain(EdgeDomain domain, int id) { + domainIds.put(domain, id); + } + + public void addUrl(EdgeUrl url, int id) { + urlIds.put(url, id); + } + + public int getUrlId(EdgeUrl url) { + return urlIds.get(url); + } + + public int getDomainId(EdgeDomain domain) { + return domainIds.get(domain); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/LoaderFactory.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/LoaderFactory.java new file mode 100644 index 00000000..f92319aa --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/LoaderFactory.java @@ -0,0 +1,32 @@ +package nu.marginalia.wmsa.edge.converting.loader; + +import com.google.inject.Inject; + +public class LoaderFactory { + private final SqlLoadUrls sqlLoadUrls; + private final SqlLoadDomains sqlLoadDomains; + private final SqlLoadDomainLinks sqlLoadDomainLinks; + private final SqlLoadProcessedDomain sqlLoadProcessedDomain; + private final SqlLoadProcessedDocument sqlLoadProcessedDocument; + private final IndexLoadKeywords indexLoadKeywords; + + @Inject + public LoaderFactory(SqlLoadUrls sqlLoadUrls, + SqlLoadDomains sqlLoadDomains, + SqlLoadDomainLinks sqlLoadDomainLinks, + SqlLoadProcessedDomain sqlLoadProcessedDomain, + SqlLoadProcessedDocument sqlLoadProcessedDocument, + IndexLoadKeywords indexLoadKeywords) { + + this.sqlLoadUrls = sqlLoadUrls; + this.sqlLoadDomains = sqlLoadDomains; + this.sqlLoadDomainLinks = sqlLoadDomainLinks; + this.sqlLoadProcessedDomain = sqlLoadProcessedDomain; + this.sqlLoadProcessedDocument = sqlLoadProcessedDocument; + this.indexLoadKeywords = indexLoadKeywords; + } + + public Loader create(int sizeHint) { + return new Loader(sizeHint, sqlLoadUrls, sqlLoadDomains, sqlLoadDomainLinks, sqlLoadProcessedDomain, sqlLoadProcessedDocument, indexLoadKeywords); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/SqlLoadDomainLinks.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/SqlLoadDomainLinks.java new file mode 100644 index 00000000..e0978828 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/SqlLoadDomainLinks.java @@ -0,0 +1,69 @@ +package nu.marginalia.wmsa.edge.converting.loader; + +import com.google.inject.Inject; +import com.zaxxer.hikari.HikariDataSource; +import nu.marginalia.wmsa.edge.converting.interpreter.instruction.DomainLink; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.SQLException; + +import static java.sql.Statement.SUCCESS_NO_INFO; + +public class SqlLoadDomainLinks { + + private final HikariDataSource dataSource; + private static final Logger logger = LoggerFactory.getLogger(SqlLoadDomainLinks.class); + + @Inject + public SqlLoadDomainLinks(HikariDataSource dataSource) { + this.dataSource = dataSource; + try (var conn = dataSource.getConnection()) { + try (var stmt = conn.createStatement()) { + stmt.execute("DROP PROCEDURE IF EXISTS INSERT_LINK"); + stmt.execute(""" + CREATE PROCEDURE INSERT_LINK ( + IN FROM_DOMAIN VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci, + IN TO_DOMAIN VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci + ) + BEGIN + INSERT IGNORE INTO EC_DOMAIN_LINK (SOURCE_DOMAIN_ID, DEST_DOMAIN_ID) + SELECT SOURCE.ID,DEST.ID + FROM EC_DOMAIN SOURCE INNER JOIN EC_DOMAIN DEST + ON SOURCE.URL_PART=FROM_DOMAIN AND DEST.URL_PART=TO_DOMAIN; + END + """); + } + } + catch (SQLException ex) { + throw new RuntimeException("Failed to set up loader", ex); + } + } + + public void load(DomainLink[] links) { + + try (var connection = dataSource.getConnection(); + var stmt = + connection.prepareCall("CALL INSERT_LINK(?,?)")) + { + + for (DomainLink link : links) { + stmt.setString(1, link.from().toString()); + stmt.setString(2, link.to().toString()); + + stmt.addBatch(); + } + + var ret = stmt.executeBatch(); + for (int rv = 0; rv < links.length; rv++) { + if (ret[rv] != 1 && ret[rv] != SUCCESS_NO_INFO) { + logger.warn("load({}) -- bad row count {}", links[rv], ret[rv]); + } + } + } + catch (SQLException sql) { + sql.printStackTrace(); + } + + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/SqlLoadDomains.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/SqlLoadDomains.java new file mode 100644 index 00000000..18cc40bd --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/SqlLoadDomains.java @@ -0,0 +1,124 @@ +package nu.marginalia.wmsa.edge.converting.loader; + +import com.google.inject.Inject; +import com.zaxxer.hikari.HikariDataSource; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.sql.SQLException; + +import static java.sql.Statement.SUCCESS_NO_INFO; + +public class SqlLoadDomains { + + private final HikariDataSource dataSource; + private static final Logger logger = LoggerFactory.getLogger(SqlLoadDomains.class); + + @Inject + public SqlLoadDomains(HikariDataSource dataSource) { + this.dataSource = dataSource; + try (var conn = dataSource.getConnection()) { + try (var stmt = conn.createStatement()) { + stmt.execute("DROP PROCEDURE IF EXISTS INSERT_DOMAIN"); + stmt.execute(""" + CREATE PROCEDURE INSERT_DOMAIN ( + IN DOMAIN_NAME VARCHAR(255), + IN SUB_DOMAIN VARCHAR(255), + IN TOP_DOMAIN VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci) + BEGIN + INSERT IGNORE INTO EC_TOP_DOMAIN (URL_PART) VALUES (TOP_DOMAIN); + + INSERT IGNORE INTO EC_DOMAIN(URL_PART, URL_SUBDOMAIN, URL_TOP_DOMAIN_ID) + SELECT DOMAIN_NAME,SUB_DOMAIN,ID + FROM EC_TOP_DOMAIN + WHERE EC_TOP_DOMAIN.URL_PART=TOP_DOMAIN; + END + """); + } + } + catch (SQLException ex) { + throw new RuntimeException("Failed to set up loader", ex); + } + } + + public void load(LoaderData data, EdgeDomain domain) { + + try (var connection = dataSource.getConnection()) { + try (var insertCall = connection.prepareCall("CALL INSERT_DOMAIN(?,?,?)")) { + insertCall.setString(1, domain.toString()); + insertCall.setString(2, domain.subDomain); + insertCall.setString(3, domain.domain); + insertCall.addBatch(); + + var ret = insertCall.executeUpdate(); + if (ret < 0) { + logger.warn("load({}) -- bad row count {}", domain, ret); + } + + connection.commit(); + findIdForTargetDomain(connection, data); + } + } + catch (SQLException ex) { + ex.printStackTrace(); + } + + + } + + public void load(LoaderData data, EdgeDomain[] domains) { + + try (var connection = dataSource.getConnection()) { + connection.setAutoCommit(false); + + try (var insertCall = connection.prepareCall("CALL INSERT_DOMAIN(?,?,?)")) { + + for (var domain : domains) { + insertCall.setString(1, domain.toString()); + insertCall.setString(2, domain.subDomain); + insertCall.setString(3, domain.domain); + insertCall.addBatch(); + } + var ret = insertCall.executeBatch(); + + for (int rv = 0; rv < domains.length; rv++) { + if (ret[rv] < 0 && ret[rv] != SUCCESS_NO_INFO) { + logger.warn("load({}) -- bad row count {}", domains[rv], ret[rv]); + } + } + + } + connection.commit(); + connection.setAutoCommit(true); + findIdForTargetDomain(connection, data); + } + catch (SQLException ex) { + ex.printStackTrace(); + } + } + + void findIdForTargetDomain(Connection connection, LoaderData data) { + if (data.getTargetDomain() == null || data.getDomainId(data.getTargetDomain()) > 0) { + return; + } + + try (var query = connection.prepareStatement("SELECT ID FROM EC_DOMAIN WHERE URL_PART=?")) + { + + var targetDomain = data.getTargetDomain(); + query.setString(1, targetDomain.toString()); + var rsp = query.executeQuery(); + if (rsp.next()) { + data.addDomain(targetDomain, rsp.getInt(1)); + } + else { + logger.warn("load() -- could not find ID for target domain {}", targetDomain); + } + } + catch (SQLException ex) { + ex.printStackTrace(); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/SqlLoadProcessedDocument.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/SqlLoadProcessedDocument.java new file mode 100644 index 00000000..5bc48caa --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/SqlLoadProcessedDocument.java @@ -0,0 +1,126 @@ +package nu.marginalia.wmsa.edge.converting.loader; + +import com.google.inject.Inject; +import com.zaxxer.hikari.HikariDataSource; +import nu.marginalia.wmsa.edge.converting.interpreter.instruction.LoadProcessedDocument; +import nu.marginalia.wmsa.edge.converting.interpreter.instruction.LoadProcessedDocumentWithError; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.SQLException; +import java.util.Collection; +import java.util.List; + +import static java.sql.Statement.SUCCESS_NO_INFO; + +public class SqlLoadProcessedDocument { + private final HikariDataSource dataSource; + private static final Logger logger = LoggerFactory.getLogger(SqlLoadProcessedDocument.class); + + @Inject + public SqlLoadProcessedDocument(HikariDataSource dataSource) { + this.dataSource = dataSource; + + try (var conn = dataSource.getConnection()) { + try (var stmt = conn.createStatement()) { + stmt.execute("DROP PROCEDURE IF EXISTS INSERT_PAGE_VISIT"); + stmt.execute("DROP PROCEDURE IF EXISTS INSERT_PAGE_VISIT_BAD"); + stmt.execute(""" + CREATE PROCEDURE INSERT_PAGE_VISIT ( + IN URL_ID INT, + IN STATE VARCHAR(32), + IN TITLE VARCHAR(255), + IN DESCRIPTION VARCHAR(255), + IN LENGTH INT, + IN QUALITY_MEASURE DOUBLE, + IN FEATURES INT, + IN STANDARD VARCHAR(32), + IN HASH INT) + BEGIN + SET FOREIGN_KEY_CHECKS=0; + REPLACE INTO EC_PAGE_DATA(ID, TITLE, DESCRIPTION, WORDS_TOTAL, FORMAT, FEATURES) VALUES (URL_ID, TITLE, DESCRIPTION, LENGTH, STANDARD, FEATURES); + UPDATE EC_URL SET VISITED=1, STATE=STATE, QUALITY_MEASURE=QUALITY_MEASURE, DATA_HASH=HASH WHERE ID=URL_ID; + SET FOREIGN_KEY_CHECKS=1; + END + """); + stmt.execute(""" + CREATE PROCEDURE INSERT_PAGE_VISIT_BAD ( + IN URL_ID INT, + IN STATE VARCHAR(32)) + BEGIN + UPDATE EC_URL SET VISITED=1, STATE=STATE, QUALITY_MEASURE=-100, DATA_HASH=NULL WHERE ID=URL_ID; + END + """); + + } + } + catch (SQLException ex) { + throw new RuntimeException("Failed to set up loader", ex); + } + } + + public void load(LoaderData data, List documents) { + try (var conn = dataSource.getConnection(); + var stmt = conn.prepareCall("CALL INSERT_PAGE_VISIT(?, ?, ?, ?, ?, ?, ?, ?, ?)")) { + + for (var doc : documents) { + int urlId = data.getUrlId(doc.url()); + if (urlId < 0) { + logger.warn("Failed to resolve ID for URL {}", doc.url()); + return; + } + + stmt.setInt(1, urlId); + stmt.setString(2, doc.state().name()); + stmt.setString(3, doc.title()); + stmt.setString(4, doc.description()); + stmt.setInt(5, doc.length()); + stmt.setDouble(6, doc.quality()); + stmt.setInt(7, doc.htmlFeatures()); + stmt.setString(8, doc.standard().name()); + stmt.setInt(9, (int) doc.hash()); + stmt.addBatch(); + } + var ret = stmt.executeBatch(); + + for (int rv = 0; rv < documents.size(); rv++) { + if (ret[rv] < 1 && ret[rv] != SUCCESS_NO_INFO) { + logger.warn("load({}) -- bad row count {}", documents.get(rv), ret[rv]); + } + } + + conn.commit(); + } catch (SQLException e) { + e.printStackTrace(); + } + + + } + + public void loadWithError(LoaderData data, List documents) { + try (var conn = dataSource.getConnection(); + var stmt = conn.prepareCall("CALL INSERT_PAGE_VISIT_BAD(?, ?)")) { + + for (var doc : documents) { + int urlId = data.getUrlId(doc.url()); + if (urlId < 0) { + logger.warn("Failed to resolve ID for URL {}", doc.url()); + return; + } + + stmt.setInt(1, urlId); + stmt.setString(2, doc.state().name()); + stmt.addBatch(); + } + var ret = stmt.executeBatch(); + for (int rv = 0; rv < documents.size(); rv++) { + if (ret[rv] < 0 && ret[rv] != SUCCESS_NO_INFO) { + logger.warn("load({}) -- bad row count {}", documents.get(rv), ret[rv]); + } + } + } catch (SQLException e) { + e.printStackTrace(); + } + + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/SqlLoadProcessedDomain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/SqlLoadProcessedDomain.java new file mode 100644 index 00000000..64607b3a --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/SqlLoadProcessedDomain.java @@ -0,0 +1,87 @@ +package nu.marginalia.wmsa.edge.converting.loader; + +import com.google.inject.Inject; +import com.zaxxer.hikari.HikariDataSource; +import nu.marginalia.wmsa.edge.converting.interpreter.instruction.DomainLink; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.SQLException; + +public class SqlLoadProcessedDomain { + private final HikariDataSource dataSource; + private final SqlLoadDomains loadDomains; + private static final Logger logger = LoggerFactory.getLogger(SqlLoadProcessedDomain.class); + @Inject + public SqlLoadProcessedDomain(HikariDataSource dataSource, SqlLoadDomains loadDomains) { + this.dataSource = dataSource; + this.loadDomains = loadDomains; + + + try (var conn = dataSource.getConnection()) { + try (var stmt = conn.createStatement()) { + stmt.execute("DROP PROCEDURE IF EXISTS INITIALIZE_DOMAIN"); + stmt.execute(""" + CREATE PROCEDURE INITIALIZE_DOMAIN ( + IN ST INT, + IN IDX INT, + IN QUAL DOUBLE, + IN DID INT) + BEGIN + UPDATE EC_DOMAIN SET INDEX_DATE=NOW(), STATE=ST, DOMAIN_ALIAS=NULL, INDEXED=GREATEST(INDEXED,IDX), QUALITY=QUAL, QUALITY_RAW=QUAL, QUALITY_ORIGINAL=QUAL WHERE ID=DID; + DELETE FROM EC_DOMAIN_LINK WHERE SOURCE_DOMAIN_ID=DID; + END + """); + } + } + catch (SQLException ex) { + throw new RuntimeException("Failed to set up loader", ex); + } + } + + public void load(LoaderData data, EdgeDomain domain, EdgeDomainIndexingState state, double quality) { + data.setTargetDomain(domain); + + loadDomains.load(data, domain); + + try (var conn = dataSource.getConnection(); + var initCall = conn.prepareCall("CALL INITIALIZE_DOMAIN(?,?,?,?)")) + { + initCall.setInt(1, state.code); + initCall.setInt(2, 1 + data.sizeHint / 100); + initCall.setDouble(3, quality); + initCall.setInt(4, data.getDomainId(domain)); + int rc = initCall.executeUpdate(); + if (rc < 1) { + logger.warn("load({},{},{}) -- bad rowcount {}", domain, state, quality, rc); + } + conn.commit(); + } + catch (SQLException ex) { + ex.printStackTrace(); + } + + } + + public void loadAlias(LoaderData data, DomainLink link) { + try (var conn = dataSource.getConnection(); + var stmt = conn.prepareStatement(""" + UPDATE EC_DOMAIN TARGET + INNER JOIN EC_DOMAIN ALIAS ON ALIAS.URL_PART=? + SET TARGET.DOMAIN_ALIAS=ALIAS.ID + WHERE TARGET.URL_PART=? + """)) { + stmt.setString(1, link.to().toString()); + stmt.setString(2, link.from().toString()); + int rc = stmt.executeUpdate(); + if (rc != 1) { + logger.warn("loadAlias({}) - unexpected row count {}", link, rc); + } + } + catch (SQLException ex) { + ex.printStackTrace(); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/SqlLoadUrls.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/SqlLoadUrls.java new file mode 100644 index 00000000..7d8851ca --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/loader/SqlLoadUrls.java @@ -0,0 +1,92 @@ +package nu.marginalia.wmsa.edge.converting.loader; + +import com.google.inject.Inject; +import com.zaxxer.hikari.HikariDataSource; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.SQLException; +import java.sql.Types; + +import static java.sql.Statement.SUCCESS_NO_INFO; + +public class SqlLoadUrls { + + private final HikariDataSource dataSource; + private static final Logger logger = LoggerFactory.getLogger(SqlLoadUrls.class); + + @Inject + public SqlLoadUrls(HikariDataSource dataSource) { + this.dataSource = dataSource; + try (var conn = dataSource.getConnection()) { + try (var stmt = conn.createStatement()) { + stmt.execute("DROP PROCEDURE IF EXISTS INSERT_URL"); + stmt.execute(""" + CREATE PROCEDURE INSERT_URL ( + IN PROTO VARCHAR(255), + IN DOMAIN_NAME VARCHAR(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci, + IN PORT INT, + IN URL VARCHAR(255) + ) + BEGIN + INSERT IGNORE INTO EC_URL (PROTO,DOMAIN_ID,PORT,URL) SELECT PROTO,ID,PORT,URL FROM EC_DOMAIN WHERE URL_PART=DOMAIN_NAME; + END + """); + } + } + catch (SQLException ex) { + throw new RuntimeException("Failed to set up loader", ex); + } + } + + public void load(LoaderData data, EdgeUrl[] urls) { + try (var conn = dataSource.getConnection(); + var insertCall = conn.prepareCall("CALL INSERT_URL(?,?,?,?)"); + var queryCall = conn.prepareStatement("SELECT ID, PROTO, URL FROM EC_URL WHERE DOMAIN_ID=?") + ) + { + conn.setAutoCommit(false); + for (var url : urls) { + + insertCall.setString(1, url.proto); + insertCall.setString(2, url.domain.toString()); + if (url.port != null) { + insertCall.setInt(3, url.port); + } + else { + insertCall.setNull(3, Types.INTEGER); + } + insertCall.setString(4, url.path); + insertCall.addBatch(); + } + var ret = insertCall.executeBatch(); + for (int rv = 0; rv < urls.length; rv++) { + if (ret[rv] < 0 && ret[rv] != SUCCESS_NO_INFO) { + logger.warn("load({}) -- bad row count {}", urls[rv], ret[rv]); + } + } + + conn.commit(); + conn.setAutoCommit(true); + + + var targetDomain = data.getTargetDomain(); + queryCall.setInt(1, data.getDomainId(targetDomain)); + + var rsp = queryCall.executeQuery(); + + while (rsp.next()) { + int urlId = rsp.getInt(1); + String proto = rsp.getString(2); + String path = rsp.getString(3); + + data.addUrl(new EdgeUrl(proto, targetDomain, null, path), urlId); + } + + } + catch (SQLException ex) { + ex.printStackTrace(); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/model/DisqualifiedException.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/model/DisqualifiedException.java new file mode 100644 index 00000000..1c785371 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/model/DisqualifiedException.java @@ -0,0 +1,17 @@ +package nu.marginalia.wmsa.edge.converting.model; + +public class DisqualifiedException extends Exception { + public final DisqualificationReason reason; + + public DisqualifiedException(DisqualificationReason reason) { + this.reason = reason; + } + @Override + public Throwable fillInStackTrace() { + return this; + } + + public enum DisqualificationReason { + LENGTH, CONTENT_TYPE, LANGUAGE, STATUS, QUALITY + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/model/ProcessedDocument.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/model/ProcessedDocument.java new file mode 100644 index 00000000..e73b6a8f --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/model/ProcessedDocument.java @@ -0,0 +1,25 @@ +package nu.marginalia.wmsa.edge.converting.model; + +import lombok.ToString; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import nu.marginalia.wmsa.edge.model.crawl.EdgePageWordSet; +import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlState; + +import java.util.OptionalDouble; + +@ToString +public class ProcessedDocument { + public EdgeUrl url; + + public ProcessedDocumentDetails details; + public EdgePageWordSet words; + + public EdgeUrlState state; + + public OptionalDouble quality() { + if (details != null) { + return OptionalDouble.of(details.quality); + } + return OptionalDouble.empty(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/model/ProcessedDocumentDetails.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/model/ProcessedDocumentDetails.java new file mode 100644 index 00000000..f21e86ae --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/model/ProcessedDocumentDetails.java @@ -0,0 +1,26 @@ +package nu.marginalia.wmsa.edge.converting.model; + +import lombok.ToString; +import nu.marginalia.wmsa.edge.crawler.domain.processor.HtmlFeature; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import nu.marginalia.wmsa.edge.model.crawl.EdgeHtmlStandard; + +import java.util.List; +import java.util.Set; + +@ToString +public class ProcessedDocumentDetails { + public String title; + public String description; + + public int length; + public double quality; + public long hashCode; + + public Set features; + public EdgeHtmlStandard standard; + + public List linksInternal; + public List linksExternal; + public List feedLinks; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/model/ProcessedDomain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/model/ProcessedDomain.java new file mode 100644 index 00000000..101d1fb8 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/model/ProcessedDomain.java @@ -0,0 +1,34 @@ +package nu.marginalia.wmsa.edge.converting.model; + +import lombok.ToString; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; + +import java.util.List; +import java.util.Optional; +import java.util.OptionalDouble; + +@ToString +public class ProcessedDomain { + public EdgeDomain domain; + + public List documents; + public EdgeDomainIndexingState state; + public EdgeDomain redirect; + public String ip; + + public OptionalDouble averageQuality() { + if (documents == null) { + return OptionalDouble.empty(); + } + return documents.stream() + .map(ProcessedDocument::quality) + .filter(OptionalDouble::isPresent) + .mapToDouble(OptionalDouble::getAsDouble) + .average(); + } + + public int size() { + return Optional.ofNullable(documents).map(List::size).orElse(1); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/DocumentProcessor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/DocumentProcessor.java new file mode 100644 index 00000000..2d7cda5e --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/DocumentProcessor.java @@ -0,0 +1,245 @@ +package nu.marginalia.wmsa.edge.converting.processor; + +import com.google.common.hash.HashCode; +import com.google.inject.Inject; +import com.google.inject.name.Named; +import nu.marginalia.wmsa.edge.converting.model.DisqualifiedException; +import nu.marginalia.wmsa.edge.converting.model.DisqualifiedException.DisqualificationReason; +import nu.marginalia.wmsa.edge.converting.model.ProcessedDocument; +import nu.marginalia.wmsa.edge.converting.model.ProcessedDocumentDetails; +import nu.marginalia.wmsa.edge.converting.processor.logic.*; +import nu.marginalia.wmsa.edge.crawler.domain.FeedExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.LinkParser; +import nu.marginalia.wmsa.edge.crawler.domain.language.LanguageFilter; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentLanguageData; +import nu.marginalia.wmsa.edge.crawler.domain.processor.HtmlFeature; +import nu.marginalia.wmsa.edge.crawler.domain.processor.HtmlStandardExtractor; +import nu.marginalia.wmsa.edge.crawling.model.CrawledDocument; +import nu.marginalia.wmsa.edge.crawling.model.CrawledDomain; +import nu.marginalia.wmsa.edge.crawling.model.CrawlerDocumentStatus; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import nu.marginalia.wmsa.edge.model.crawl.EdgeHtmlStandard; +import nu.marginalia.wmsa.edge.model.crawl.EdgePageWordSet; +import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlState; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.URISyntaxException; +import java.util.*; + +import static nu.marginalia.wmsa.edge.model.crawl.EdgeHtmlStandard.UNKNOWN; + +public class DocumentProcessor { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private final int minDocumentLength; + private final double minDocumentQuality; + + private static final Set acceptedContentTypes = Set.of("application/xhtml+xml", "application/xhtml", "text/html"); + + private final SentenceExtractor sentenceExtractor; + private final FeatureExtractor featureExtractor; + private final TitleExtractor titleExtractor; + private final DocumentKeywordExtractor keywordExtractor; + private final SummaryExtractor summaryExtractor; + + private static final DocumentValuator documentValuator = new DocumentValuator(); + private static final LanguageFilter languageFilter = new LanguageFilter(); + private static final LinkParser linkParser = new LinkParser(); + private static final FeedExtractor feedExtractor = new FeedExtractor(linkParser); + + @Inject + public DocumentProcessor(@Named("min-document-length") Integer minDocumentLength, + @Named("min-document-quality") Double minDocumentQuality, + SentenceExtractor sentenceExtractor, + FeatureExtractor featureExtractor, + TitleExtractor titleExtractor, + DocumentKeywordExtractor keywordExtractor, + SummaryExtractor summaryExtractor) + { + this.minDocumentLength = minDocumentLength; + this.minDocumentQuality = minDocumentQuality; + this.sentenceExtractor = sentenceExtractor; + this.featureExtractor = featureExtractor; + this.titleExtractor = titleExtractor; + this.keywordExtractor = keywordExtractor; + this.summaryExtractor = summaryExtractor; + } + + public ProcessedDocument process(CrawledDocument crawledDocument, CrawledDomain crawledDomain) { + ProcessedDocument ret = new ProcessedDocument(); + + try { + ret.url = new EdgeUrl(crawledDocument.url); + ret.state = crawlerStatusToUrlState(crawledDocument.crawlerStatus, crawledDocument.httpStatus); + + if (ret.state == EdgeUrlState.OK && isAcceptedContentType(crawledDocument)) { + var detailsWords = createDetails(crawledDomain, crawledDocument); + + if (detailsWords.details().quality < minDocumentQuality) { + throw new DisqualifiedException(DisqualificationReason.QUALITY); + } + + ret.details = detailsWords.details(); + ret.words = detailsWords.words(); + } + else { + throw new DisqualifiedException(DisqualificationReason.STATUS); + } + } + catch (DisqualifiedException ex) { + ret.state = EdgeUrlState.DISQUALIFIED; + logger.debug("Disqualified {}: {}", ret.url, ex.reason); + } + catch (Exception ex) { + ret.state = EdgeUrlState.DISQUALIFIED; + logger.info("Failed to convert " + ret.url, ex); + ex.printStackTrace(); + } + + return ret; + } + + private boolean isAcceptedContentType(CrawledDocument crawledDocument) { + return crawledDocument.contentType != null && acceptedContentTypes.contains(crawledDocument.contentType.toLowerCase()); + } + + private EdgeUrlState crawlerStatusToUrlState(String crawlerStatus, int httpStatus) { + return switch (CrawlerDocumentStatus.valueOf(crawlerStatus)) { + case OK -> httpStatus < 300 ? EdgeUrlState.OK : EdgeUrlState.DEAD; + case REDIRECT -> EdgeUrlState.REDIRECT; + default -> EdgeUrlState.DEAD; + }; + } + + private DetailsWithWords createDetails(CrawledDomain crawledDomain, CrawledDocument crawledDocument) + throws DisqualifiedException, URISyntaxException { + + var doc = Jsoup.parse(crawledDocument.documentBody); + var dld = sentenceExtractor.extractSentences(doc.clone()); + + checkDocumentLanguage(dld); + + var ret = new ProcessedDocumentDetails(); + + ret.description = getDescription(doc); + ret.length = getLength(doc); + ret.standard = getHtmlStandard(doc); + ret.title = titleExtractor.getTitleAbbreviated(doc, dld, crawledDocument.url); + ret.features = featureExtractor.getFeatures(crawledDomain, doc); + ret.quality = documentValuator.getQuality(ret.standard, doc, dld); + ret.hashCode = HashCode.fromString(crawledDocument.documentBodyHash).asLong(); + + var words = getWords(dld); + + var url = new EdgeUrl(crawledDocument.url); + addMetaWords(ret, url, crawledDomain, words); + + getLinks(url, ret, doc, words); + + return new DetailsWithWords(ret, words); + } + + private void addMetaWords(ProcessedDocumentDetails ret, EdgeUrl url, CrawledDomain domain, EdgePageWordSet words) { + List tagWords = new ArrayList<>(); + + var edgeDomain = url.domain; + tagWords.add("format:"+ret.standard.toString().toLowerCase()); + + + tagWords.add("site:" + edgeDomain.toString().toLowerCase()); + if (!Objects.equals(edgeDomain.toString(), edgeDomain.domain)) { + tagWords.add("site:" + edgeDomain.domain.toLowerCase()); + } + + tagWords.add("proto:"+url.proto.toLowerCase()); + tagWords.add("js:" + Boolean.toString(ret.features.contains(HtmlFeature.JS)).toLowerCase()); + + if (ret.features.contains(HtmlFeature.MEDIA)) { + tagWords.add("special:media"); + } + if (ret.features.contains(HtmlFeature.TRACKING)) { + tagWords.add("special:tracking"); + } + if (ret.features.contains(HtmlFeature.AFFILIATE_LINK)) { + tagWords.add("special:affiliate"); + } + if (ret.features.contains(HtmlFeature.COOKIES)) { + tagWords.add("special:cookies"); + } + + words.append(IndexBlock.Meta, tagWords); + words.append(IndexBlock.Words, tagWords); + } + + private void getLinks(EdgeUrl baseUrl, ProcessedDocumentDetails ret, Document doc, EdgePageWordSet words) { + var links = doc.getElementsByTag("a"); + var frames = doc.getElementsByTag("frame"); + var feeds = doc.select("link[rel=alternate]"); + + LinkProcessor lp = new LinkProcessor(ret, baseUrl); + + for (var atag : links) { + linkParser.parseLink(baseUrl, atag).ifPresent(lp::accept); + } + for (var frame : frames) { + linkParser.parseFrame(baseUrl, frame).ifPresent(lp::accept); + } + + for (var link : feeds) { + feedExtractor + .getFeedFromAlternateTag(baseUrl, link) + .ifPresent(lp::acceptFeed); + } + + Set linkTerms = new HashSet<>(); + + for (var domain : lp.getForeignDomains()) { + linkTerms.add("links:"+domain.toString().toLowerCase()); + linkTerms.add("links:"+domain.getDomain().toLowerCase()); + } + + words.append(IndexBlock.Meta, linkTerms); + + } + + private void checkDocumentLanguage(DocumentLanguageData dld) throws DisqualifiedException { + if (dld.totalNumWords() < minDocumentLength) { + throw new DisqualifiedException(DisqualificationReason.LENGTH); + } + + double languageAgreement = languageFilter.dictionaryAgreement(dld); + if (languageAgreement < 0.1) { + throw new DisqualifiedException(DisqualificationReason.LANGUAGE); + } + } + + private EdgeHtmlStandard getHtmlStandard(Document doc) { + EdgeHtmlStandard htmlStandard = HtmlStandardExtractor.parseDocType(doc.documentType()); + + if (UNKNOWN.equals(htmlStandard)) { + return HtmlStandardExtractor.sniffHtmlStandard(doc); + } + return htmlStandard; + } + + private EdgePageWordSet getWords(DocumentLanguageData dld) { + return keywordExtractor.extractKeywords(dld); + } + + private String getDescription(Document doc) { + return summaryExtractor.extractSummary(doc).orElse(""); + } + + private int getLength(Document doc) { + return doc.text().length(); + } + + private record DetailsWithWords(ProcessedDocumentDetails details, EdgePageWordSet words) {}; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/DomainProcessor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/DomainProcessor.java new file mode 100644 index 00000000..4343b0c3 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/DomainProcessor.java @@ -0,0 +1,59 @@ +package nu.marginalia.wmsa.edge.converting.processor; + +import com.google.inject.Inject; +import nu.marginalia.wmsa.edge.converting.model.ProcessedDomain; +import nu.marginalia.wmsa.edge.crawling.model.CrawledDomain; +import nu.marginalia.wmsa.edge.crawling.model.CrawlerDomainStatus; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; + +import java.util.ArrayList; +import java.util.Collections; + +public class DomainProcessor { + private final DocumentProcessor documentProcessor; + + @Inject + public DomainProcessor(DocumentProcessor documentProcessor) { + this.documentProcessor = documentProcessor; + } + + public ProcessedDomain process(CrawledDomain crawledDomain) { + var ret = new ProcessedDomain(); + + ret.domain = new EdgeDomain(crawledDomain.domain); + ret.ip = crawledDomain.ip; + + if (crawledDomain.redirectDomain != null) { + ret.redirect = new EdgeDomain(crawledDomain.redirectDomain); + } + + if (crawledDomain.doc != null) { + ret.documents = new ArrayList<>(crawledDomain.doc.size()); + + for (var doc : crawledDomain.doc) { + var processedDoc = documentProcessor.process(doc, crawledDomain); + if (processedDoc.url != null) { + ret.documents.add(processedDoc); + } + } + + } + else { + ret.documents = Collections.emptyList(); + } + + ret.state = getState(crawledDomain.crawlerStatus); + + return ret; + } + + private EdgeDomainIndexingState getState(String crawlerStatus) { + return switch (CrawlerDomainStatus.valueOf(crawlerStatus)) { + case OK -> EdgeDomainIndexingState.ACTIVE; + case REDIRECT -> EdgeDomainIndexingState.REDIR; + case BLOCKED -> EdgeDomainIndexingState.BLOCKED; + default -> EdgeDomainIndexingState.ERROR; + }; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/InstructionsCompiler.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/InstructionsCompiler.java new file mode 100644 index 00000000..13be9939 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/InstructionsCompiler.java @@ -0,0 +1,116 @@ +package nu.marginalia.wmsa.edge.converting.processor; + +import nu.marginalia.wmsa.edge.converting.interpreter.Instruction; +import nu.marginalia.wmsa.edge.converting.interpreter.instruction.*; +import nu.marginalia.wmsa.edge.converting.model.ProcessedDocument; +import nu.marginalia.wmsa.edge.converting.model.ProcessedDomain; +import nu.marginalia.wmsa.edge.crawler.domain.processor.HtmlFeature; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeUrl; + +import java.util.*; + +public class InstructionsCompiler { + + public List compile(ProcessedDomain domain) { + List ret = new ArrayList<>(domain.size()*4); + + ret.add(new LoadProcessedDomain(domain.domain, domain.state, domain.averageQuality().orElse(-5.))); + + if (domain.documents != null) { + compileUrls(ret, domain.documents); + compileDocuments(ret, domain.documents); + compileFeeds(ret, domain.documents); + + compileLinks(ret, domain.domain, domain.documents); + } + if (domain.redirect != null) { + compileRedirect(ret, domain.domain, domain.redirect); + + } + + return ret; + } + + private void compileRedirect(List ret, EdgeDomain from, EdgeDomain to) { + ret.add(new LoadDomain(to)); + ret.add(new LoadDomainLink(new DomainLink(from, to))); + ret.add(new LoadDomainRedirect(new DomainLink(from, to))); + } + + private void compileUrls(List ret, List documents) { + Set seenUrls = new HashSet<>(documents.size()*4); + Set seenDomains = new HashSet<>(documents.size()); + + documents.stream().map(doc -> doc.url).forEach(seenUrls::add); + + for (var doc : documents) { + if (doc.details == null) continue; + for (var url : doc.details.linksExternal) { + seenDomains.add(url.domain); + } + seenUrls.addAll(doc.details.linksExternal); + seenUrls.addAll(doc.details.linksInternal); + } + + ret.add(new LoadDomain(seenDomains.toArray(EdgeDomain[]::new))); + ret.add(new LoadUrl(seenUrls.toArray(EdgeUrl[]::new))); + } + + private void compileLinks(List ret, EdgeDomain from, List documents) { + DomainLink[] links = documents.stream().map(doc -> doc.details) + .filter(Objects::nonNull) + .flatMap(dets -> dets.linksExternal.stream()) + .map(link -> link.domain) + .distinct() + .map(domain -> new DomainLink(from, domain)) + .toArray(DomainLink[]::new); + + ret.add(new LoadDomainLink(links)); + } + + private void compileFeeds(List ret, List documents) { + + EdgeUrl[] feeds = documents.stream().map(doc -> doc.details) + .filter(Objects::nonNull) + .flatMap(dets -> dets.feedLinks.stream()) + .distinct() + .toArray(EdgeUrl[]::new); + + ret.add(new LoadRssFeed(feeds)); + } + + private void compileDocuments(List ret, List documents) { + + for (var doc : documents) { + compileDocumentDetails(ret, doc); + } + + for (var doc : documents) { + compileWords(ret, doc); + } + + } + + private void compileDocumentDetails(List ret, ProcessedDocument doc) { + var details = doc.details; + + if (details != null) { + ret.add(new LoadProcessedDocument(doc.url, doc.state, details.title, details.description, HtmlFeature.encode(details.features), details.standard, details.length, details.hashCode, details.quality)); + } + else { + ret.add(new LoadProcessedDocumentWithError(doc.url, doc.state)); + } + } + + private void compileWords(List ret, ProcessedDocument doc) { + var words = doc.words; + if (words != null) { + var wordsArray = words.values().stream() + .map(DocumentKeywords::new) + .toArray(DocumentKeywords[]::new); + + ret.add(new LoadKeywords(doc.url, wordsArray)); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/DocumentValuator.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/DocumentValuator.java new file mode 100644 index 00000000..bd49b6f8 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/DocumentValuator.java @@ -0,0 +1,70 @@ +package nu.marginalia.wmsa.edge.converting.processor.logic; + +import crawlercommons.utils.Strings; +import nu.marginalia.wmsa.edge.converting.model.DisqualifiedException; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentLanguageData; +import nu.marginalia.wmsa.edge.model.crawl.EdgeHtmlStandard; +import org.jsoup.nodes.Document; + +import java.util.Set; + +import static nu.marginalia.wmsa.edge.converting.model.DisqualifiedException.DisqualificationReason.LENGTH; + +public class DocumentValuator { + + private static final Set filthTable = Set.of( + "xxx", "sex", "anal", "sexy", + "bdsm", "fetish", "porn", "camgirls", "dildo", + "gangbang", "buttplug", "orgasm", "vibrator", + "cameltoe", "download", "iso", "botox", "torrent", + "jackpot", "vegas", "casino", "coinbase", "poloniex", + "myetherwallet", "ethereum", "binance", "bitcoin", + "litecoin", "seo", "serp" + + ); + + public double getQuality(EdgeHtmlStandard htmlStandard, Document doc, DocumentLanguageData dld) throws DisqualifiedException { + double smutCoefficient = dld.streamLowerCase().filter(filthTable::contains).count(); + double scriptPenalty = getScriptPenalty(doc); + + + int textBodyLength = doc.text().length(); + int rawLength = doc.html().length(); + + if (textBodyLength == 0) { + throw new DisqualifiedException(LENGTH); + } + + return Math.log(textBodyLength / (double) rawLength)*htmlStandard.scale + + htmlStandard.offset + - scriptPenalty + - smutCoefficient; + } + + + private int getScriptPenalty(Document parsed) { + var scriptTags = parsed.getElementsByTag("script"); + String scriptText = scriptTags.html(); + int badScript = 0; + if (scriptText.contains(".createElement(")) { + badScript = 1; + } + + double scriptPenalty = 0; + for (var tag : scriptTags) { + String srcTag = tag.attr("src"); + if (Strings.isBlank(srcTag)) { + scriptPenalty += 1; + } + else if (srcTag.contains("wp-content") || srcTag.contains("wp-includes") || srcTag.contains("jquery")) { + scriptPenalty += 0.49; + } + else { + scriptPenalty += 1; + } + + } + return (int)(scriptPenalty + badScript + (scriptText.length())/1000.); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/FeatureExtractor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/FeatureExtractor.java new file mode 100644 index 00000000..e790d9ec --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/FeatureExtractor.java @@ -0,0 +1,70 @@ +package nu.marginalia.wmsa.edge.converting.processor.logic; + +import nu.marginalia.wmsa.edge.crawler.domain.processor.HtmlFeature; +import nu.marginalia.wmsa.edge.crawling.model.CrawledDomain; +import org.jsoup.nodes.Document; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public class FeatureExtractor { + + private static final List trackers = List.of("adform.net", + "connect.facebook", + "googletagmanager.com", + "googlesyndication.com", + "google.com", + "twitter.com", + "smartadserver.com", + "doubleclick.com", + "2mdn.com", + "dmtry.com", + "bing.com", + "msn.com", + "amazon-adsystem.com", + "alexametrics.com", + "rubiconproject.com", + "chango.com", + "d5nxst8fruw4z.cloudfront.net", + "d31qbv1cthcecs.cloudfront.net", + "linkedin.com"); + + public Set getFeatures(CrawledDomain domain, Document doc) { + Set features = new HashSet<>(); + + var scriptTags = doc.getElementsByTag("script"); + + if (scriptTags.size() > 0) { + features.add(HtmlFeature.JS); + } + + if (!doc.getElementsByTag("object").isEmpty() + || !doc.getElementsByTag("audio").isEmpty() + || !doc.getElementsByTag("video").isEmpty()) { + features.add(HtmlFeature.MEDIA); + } + + if (scriptTags.stream() + .anyMatch(tag -> trackers.stream().anyMatch(tracker -> tag.attr("src").contains(tracker)))) { + features.add(HtmlFeature.TRACKING); + } + + if (scriptTags.html().contains("google-analytics.com")) { + features.add(HtmlFeature.TRACKING); + } + + if (doc.getElementsByTag("a").stream().map(e -> e.attr("href")) + .map(String::toLowerCase) + .anyMatch(href -> + href.contains("amzn.to/") || href.contains("amazon.com/"))) { + features.add(HtmlFeature.AFFILIATE_LINK); + } + + if (!domain.cookies.isEmpty()) { + features.add(HtmlFeature.COOKIES); + } + + return features; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/LinkProcessor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/LinkProcessor.java new file mode 100644 index 00000000..75daa00b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/LinkProcessor.java @@ -0,0 +1,90 @@ +package nu.marginalia.wmsa.edge.converting.processor.logic; + +import nu.marginalia.wmsa.edge.converting.model.ProcessedDocumentDetails; +import nu.marginalia.wmsa.edge.crawler.worker.UrlBlocklist; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeUrl; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; + +public class LinkProcessor { + private final ProcessedDocumentDetails ret; + private final EdgeUrl baseUrl; + private final Set seenUrls = new HashSet<>(); + private final Set foreignDomains = new HashSet<>(); + + private static final int MAX_INTERNAL_LINK = 250; + private static final int MAX_EXTERNAL_LINK = 100; + private static final UrlBlocklist urlBlocklist = new UrlBlocklist(); + + public LinkProcessor(ProcessedDocumentDetails documentDetails, EdgeUrl baseUrl) { + this.ret = documentDetails; + this.baseUrl = baseUrl; + + ret.linksExternal = new ArrayList<>(); + ret.linksInternal = new ArrayList<>(); + ret.feedLinks = new ArrayList<>(); + } + + public Set getForeignDomains() { + return foreignDomains; + } + + public void accept(EdgeUrl link) { + if (!isLinkPermitted(link)) { + return; + } + + if (!seenUrls.add(link)) { + return; + } + + if (Objects.equals(link.domain, baseUrl.domain)) { // internal link + if (ret.linksInternal.size() < MAX_INTERNAL_LINK) { + ret.linksInternal.add(link); + } + } + else { + if (ret.linksExternal.size() < MAX_EXTERNAL_LINK) { + ret.linksExternal.add(link); + foreignDomains.add(link.domain); + } + } + } + + public void acceptFeed(EdgeUrl link) { + if (!isLinkPermitted(link)) { + return; + } + + if (!seenUrls.add(link)) { + return; + } + + ret.feedLinks.add(link); + } + + private boolean isLinkPermitted(EdgeUrl link) { + if (!isProtoSupported(link.proto)) { + return false; + } + + if (urlBlocklist.isForumLink(link)) { + return false; + } + + if (urlBlocklist.isUrlBlocked(link)) { + return false; + } + + return true; + } + + private boolean isProtoSupported(String proto) { + return proto.equalsIgnoreCase("http") + || proto.equalsIgnoreCase("https"); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/SummaryExtractor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/SummaryExtractor.java new file mode 100644 index 00000000..438c0cfa --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/SummaryExtractor.java @@ -0,0 +1,62 @@ +package nu.marginalia.wmsa.edge.converting.processor.logic; + +import com.google.inject.Inject; +import com.google.inject.name.Named; +import org.apache.commons.lang3.StringUtils; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import java.util.Optional; +import java.util.regex.Pattern; + +public class SummaryExtractor { + private final int maxSummaryLength; + + private final Pattern truncatedCharacters = Pattern.compile("[^a-zA-Z0-9.,!?\\-'\"]+"); + + @Inject + public SummaryExtractor(@Named("max-summary-length") Integer maxSummaryLength) { + this.maxSummaryLength = maxSummaryLength; + } + + public Optional extractSummary(Document parsed) { + var cleanDoc = parsed.clone(); + cleanDoc.select("h1,h2,h3,header,nav,#header,#nav,#navigation,.header,.nav,.navigation,ul,li").remove(); + + return extractSummaryRaw(cleanDoc) + .map(String::trim) + .filter(s -> !s.isBlank() && s.length() > 20) + .or(() -> getOgDescription(parsed)) + .or(() -> getMetaDescription(parsed)) + .map(this::trimLongSpaces) + .map(s -> StringUtils.abbreviate(s, "", maxSummaryLength)) + ; + } + + private String trimLongSpaces(String s) { + return truncatedCharacters.matcher(s).replaceAll(" "); + } + + private Optional extractSummaryRaw(Document parsed) { + StringBuilder content = new StringBuilder(); + + parsed.select("p,div,section,article").stream() + .takeWhile(e -> content.length() <= maxSummaryLength) + .filter(elem -> elem.text().length() > elem.html().length()/2) + .map(Element::text) + .forEach(content::append); + + if (content.length() > 10) { + return Optional.of(content.toString()); + } + return Optional.empty(); + } + + private Optional getMetaDescription(Document parsed) { + return Optional.of(parsed.select("meta[name=description]").attr("content")).filter(s -> !s.isBlank()); + } + + private Optional getOgDescription(Document parsed) { + return Optional.of(parsed.select("meta[name=og:description]").attr("content")).filter(s -> !s.isBlank()); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/TitleExtractor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/TitleExtractor.java new file mode 100644 index 00000000..33b89c03 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/TitleExtractor.java @@ -0,0 +1,57 @@ +package nu.marginalia.wmsa.edge.converting.processor.logic; + +import com.google.inject.Inject; +import com.google.inject.name.Named; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentLanguageData; +import org.apache.commons.lang3.StringUtils; +import org.jsoup.nodes.Document; + +public class TitleExtractor { + + private final int maxTitleLength; + + @Inject + public TitleExtractor(@Named("max-title-length") Integer maxTitleLength) { + this.maxTitleLength = maxTitleLength; + + } + + public String getTitleAbbreviated(Document doc, DocumentLanguageData dld, String url) { + return StringUtils.abbreviate(getFullTitle(doc, dld, url), maxTitleLength); + } + public String getFullTitle(Document doc, DocumentLanguageData dld, String url) { + String title; + + title = getFirstTagText(doc, "head > title"); + if (title != null) return title; + + title = getFirstTagText(doc, "h1"); + if (title != null) return title; + + title = getFirstTagText(doc, "h2"); + if (title != null) return title; + + title = getFirstTagText(doc, "h3"); + if (title != null) return title; + + title = getFirstTagText(doc, "h4"); + if (title != null) return title; + + title = getFirstTagText(doc, "h5"); + if (title != null) return title; + + if (dld.sentences.length > 0) { + return dld.sentences[0].originalSentence; + } + + return url; + } + + private String getFirstTagText(Document doc, String selector) { + var firstTag = doc.selectFirst(selector); + if (firstTag != null) { + return firstTag.text(); + } + return null; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/CrawlJobsSpecificationSet.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/CrawlJobsSpecificationSet.java new file mode 100644 index 00000000..dfac5bc2 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/CrawlJobsSpecificationSet.java @@ -0,0 +1,52 @@ +package nu.marginalia.wmsa.edge.crawler; + +import com.google.inject.Inject; +import com.google.inject.name.Named; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.crawler.worker.data.CrawlJobsSpecification; +import org.apache.commons.lang3.StringUtils; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +public class CrawlJobsSpecificationSet { + private final List specs = new ArrayList<>(); + + @SneakyThrows + @Inject + public CrawlJobsSpecificationSet(@Named("crawl-specifications-path") Path specsFile) { + Files.readAllLines(specsFile) + .stream() + .map(this::stripComments) + .filter(StringUtils::isNotBlank) + .flatMap(this::generateSpecsFromLine) + .map(CrawlJobsSpecification::new) + .forEach(specs::add); + } + + private Stream generateSpecsFromLine(String s) { + String[] parts = s.split("\\s"); + if (parts.length == 1) { + return Stream.of(Integer.parseInt(s)); + } + else { + int times = Integer.parseInt(parts[0]); + int config = Integer.parseInt(parts[1]); + return Stream.generate(() -> config).limit(times); + } + } + + private String stripComments(String s) { + return s.replaceAll("#.*", ""); + } + + CrawlJobsSpecification get(int i) { + return specs.get(i); + } + int size() { + return specs.size(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/EdgeCrawlerMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/EdgeCrawlerMain.java new file mode 100644 index 00000000..36eabc5f --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/EdgeCrawlerMain.java @@ -0,0 +1,35 @@ +package nu.marginalia.wmsa.edge.crawler; + +import com.google.inject.Guice; +import com.google.inject.Inject; +import com.google.inject.Injector; +import nu.marginalia.wmsa.configuration.MainClass; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.module.ConfigurationModule; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.configuration.server.Initialization; + +import java.io.IOException; + +public class EdgeCrawlerMain extends MainClass { + private EdgeCrawlerService service; + + @Inject + public EdgeCrawlerMain(EdgeCrawlerService service) throws IOException { + this.service = service; + } + + public static void main(String... args) { + init(ServiceDescriptor.EDGE_CRAWLER, args); + + Injector injector = Guice.createInjector( + new EdgeCrawlerModule(), + new ConfigurationModule(), + new DatabaseModule() + ); + + injector.getInstance(EdgeCrawlerMain.class); + injector.getInstance(Initialization.class).setReady(); + + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/EdgeCrawlerModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/EdgeCrawlerModule.java new file mode 100644 index 00000000..13e171c6 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/EdgeCrawlerModule.java @@ -0,0 +1,52 @@ +package nu.marginalia.wmsa.edge.crawler; + +import com.google.inject.AbstractModule; +import com.google.inject.Provides; +import com.google.inject.name.Names; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import opennlp.tools.langdetect.LanguageDetector; +import opennlp.tools.langdetect.LanguageDetectorME; +import opennlp.tools.langdetect.LanguageDetectorModel; + +import java.io.FileNotFoundException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Optional; + +public class EdgeCrawlerModule extends AbstractModule { + + + public void configure() { + bind(String.class).annotatedWith(Names.named("user-agent")).toInstance("search.marginalia.nu"); + bind(String.class).annotatedWith(Names.named("user-agent-robots")).toInstance("search.marginalia.nu"); + bind(Path.class).annotatedWith(Names.named("crawl-specifications-path")).toInstance(Path.of("/var/lib/wmsa/crawler-specs.dat")); + + bind(LanguageModels.class).toInstance(new LanguageModels( + Path.of("/var/lib/wmsa/model/ngrams-generous-emstr.bin"), + Path.of("/var/lib/wmsa/model/tfreq-new-algo3.bin"), + Path.of("/var/lib/wmsa/model/opennlp-sentence.bin"), + Path.of("/var/lib/wmsa/model/English.RDR"), + Path.of("/var/lib/wmsa/model/English.DICT"), + Path.of("/var/lib/wmsa/model/opennlp-tok.bin") + )); + } + + @Provides @SneakyThrows + LanguageDetector detector() { + final Path[] paths = new Path[]{ + Path.of("/app/resources/langdetect-183.bin"), + Path.of("/home/vlofgren/Code/wmsa-b/src/main/nlp-models/langdetect-183.bin") + }; + Optional path = Arrays.stream(paths).filter(p->p.toFile().exists()).findAny(); + if (path.isEmpty()) { + throw new FileNotFoundException("Could not find langdetect-183.bin"); + } + + try (var is = Files.newInputStream(path.get())) { + return new LanguageDetectorME(new LanguageDetectorModel(is)); + } + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/EdgeCrawlerService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/EdgeCrawlerService.java new file mode 100644 index 00000000..dc25088c --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/EdgeCrawlerService.java @@ -0,0 +1,107 @@ +package nu.marginalia.wmsa.edge.crawler; + +import com.google.inject.Inject; +import com.google.inject.name.Named; +import io.reactivex.rxjava3.schedulers.Schedulers; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.configuration.server.MetricsServer; +import nu.marginalia.wmsa.configuration.server.Service; +import nu.marginalia.wmsa.data_store.client.DataStoreClient; +import nu.marginalia.wmsa.edge.crawler.worker.UploaderWorker; +import nu.marginalia.wmsa.edge.crawler.worker.Worker; +import nu.marginalia.wmsa.edge.crawler.worker.WorkerFactory; +import nu.marginalia.wmsa.edge.crawler.worker.data.CrawlJobsSpecification; +import nu.marginalia.wmsa.edge.crawler.worker.results.WorkerResults; +import nu.marginalia.wmsa.edge.director.client.EdgeDirectorClient; +import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.LinkedBlockingQueue; + +public class EdgeCrawlerService extends Service { + private final EdgeIndexClient indexClient; + private final EdgeDirectorClient directorClient; + private final Initialization init; + private final WorkerFactory workerFactory; + private final CrawlJobsSpecificationSet specifications; + private final DataStoreClient dataStoreClient; + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private UploaderWorker uploader; + private final List crawlers = new ArrayList<>(); + + @Inject + public EdgeCrawlerService(@Named("service-host") String ip, + @Named("service-port") Integer port, + DataStoreClient dataStoreClient, + EdgeIndexClient indexClient, + EdgeDirectorClient directorClient, + Initialization init, + WorkerFactory workerFactory, + CrawlJobsSpecificationSet specifications, + Initialization initialization, + MetricsServer metricsServer + ) { + super(ip, port, initialization, metricsServer); + this.dataStoreClient = dataStoreClient; + this.indexClient = indexClient; + this.directorClient = directorClient; + this.init = init; + this.workerFactory = workerFactory; + this.specifications = specifications; + + Schedulers.newThread().scheduleDirect(this::run); + + } + + @SneakyThrows + private void run() { + init.waitReady(); + indexClient.waitReady(); + directorClient.waitReady(); + dataStoreClient.waitReady(); + + directorClient.flushOngoingJobs(Context.internal()); + + dataStoreClient.putUrl(Context.internal(), 0, new EdgeUrl("https://memex.marginalia.nu/")).blockingSubscribe(); + dataStoreClient.putUrl(Context.internal(), 0, new EdgeUrl("http://www.cs.uni.edu/")).blockingSubscribe(); + dataStoreClient.putUrl(Context.internal(), 0, new EdgeUrl("https://www.leonardcohenfiles.com/")).blockingSubscribe(); + dataStoreClient.putUrl(Context.internal(), 0, new EdgeUrl("http://atsf.railfan.net/")).blockingSubscribe(); + dataStoreClient.putUrl(Context.internal(), 0, new EdgeUrl("http://sprott.physics.wisc.edu/")).blockingSubscribe(); + dataStoreClient.putUrl(Context.internal(), 0, new EdgeUrl("http://www.attalus.org/")).blockingSubscribe(); + dataStoreClient.putUrl(Context.internal(), 0, new EdgeUrl("http://www.attalus.org/")).blockingSubscribe(); + + final List> queues = new ArrayList<>(specifications.size()); + + for (int i = 0; i < specifications.size(); i++) { + queues.add(new LinkedBlockingQueue<>(1)); + } + + for (int i = 0; i < specifications.size(); i++) { + var spec = specifications.get(i); + var queue = queues.get(i); + + Worker worker; + if (spec.pass == 0) { + worker = workerFactory.buildDiscoverWorker(queue); + } + else { + worker = workerFactory.buildIndexWorker(queue, spec.pass); + } + + crawlers.add(worker); + + new Thread(worker, "Fetcher-"+i).start(); + } + + uploader = workerFactory.buildUploader(queues); + new Thread(uploader, "Uploader").start(); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/RssScraperMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/RssScraperMain.java new file mode 100644 index 00000000..5018063b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/RssScraperMain.java @@ -0,0 +1,31 @@ +package nu.marginalia.wmsa.edge.crawler; + + +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.edge.crawler.domain.DomainCrawlerRobotsTxt; +import nu.marginalia.wmsa.edge.crawler.domain.RssCrawler; +import nu.marginalia.wmsa.edge.crawler.fetcher.HttpFetcher; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDaoImpl; +import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient; +import org.mariadb.jdbc.Driver; + +import java.io.IOException; + +public class RssScraperMain { + + public static void main(String... args) throws IOException { +// Driver driver = new Driver(); +// +// var conn = new DatabaseModule().provideConnection(); +// var fetcher = new HttpFetcher("search.marginalia.nu"); +// var indexClient = new EdgeIndexClient(); +// indexClient.waitReady(); +// +// new RssCrawler(conn, +// new DomainCrawlerRobotsTxt(fetcher, "search.marginalia.nu"), +// new EdgeDataStoreDaoImpl(conn), +// fetcher, +// keywordExtractor, sentenceExtractor, indexClient).run(); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlResults.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlResults.java new file mode 100644 index 00000000..b9b51abf --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlResults.java @@ -0,0 +1,55 @@ +package nu.marginalia.wmsa.edge.crawler.domain; + +import lombok.AllArgsConstructor; +import lombok.ToString; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainLink; +import nu.marginalia.wmsa.edge.model.crawl.EdgePageContent; +import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlState; +import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlVisit; + +import java.util.*; +import java.util.stream.Collectors; + +@AllArgsConstructor @ToString +public class DomainCrawlResults { + public final EdgeDomain domain; + public final double rank; + public final int pass; + + public final long crawlStart = System.currentTimeMillis(); + + public final Set extUrl = new HashSet<>(); + public final Set intUrl = new HashSet<>(); + public final Set visitedUrl = new HashSet<>(); + public final Set feeds = new HashSet<>(); + public final Map urlStates = new HashMap<>(); + + public final Map pageContents = new HashMap<>(); + public final HashSet links = new HashSet<>(); + + public final List visits() { + return visitedUrl.stream().map(url -> { + var page = pageContents.get(url); + if (page != null) { + return new EdgeUrlVisit(url, + page.hash, + page.getMetadata().quality(), + page.metadata.title.replaceAll("[^\\x20-\\x7E\\xA0-\\xFF]", ""), + page.metadata.description.replaceAll("[^\\x20-\\x7E\\xA0-\\xFF]", ""), + page.ipAddress, + page.metadata.htmlStandard.toString(), + page.metadata.features, + page.metadata.textDistinctWords, + page.metadata.totalWords, + urlStates.getOrDefault(url, EdgeUrlState.OK) + ); + } + else { + return new EdgeUrlVisit(url, null, null, null, null,null, "text", 0, 0, 0, + urlStates.getOrDefault(url, EdgeUrlState.OK)); + } + }).collect(Collectors.toList()); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawler.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawler.java new file mode 100644 index 00000000..2f92519b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawler.java @@ -0,0 +1,509 @@ +package nu.marginalia.wmsa.edge.crawler.domain; + +import crawlercommons.robots.SimpleRobotRules; +import gnu.trove.set.hash.TIntHashSet; +import io.reactivex.rxjava3.core.Observable; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.edge.archive.client.ArchiveClient; +import nu.marginalia.wmsa.edge.crawler.domain.language.LanguageFilter; +import nu.marginalia.wmsa.edge.crawler.domain.processor.HtmlProcessor; +import nu.marginalia.wmsa.edge.crawler.domain.processor.PlainTextProcessor; +import nu.marginalia.wmsa.edge.crawler.fetcher.HttpFetcher; +import nu.marginalia.wmsa.edge.crawler.worker.IpBlockList; +import nu.marginalia.wmsa.edge.crawler.worker.UrlBlocklist; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.crawl.*; +import org.apache.logging.log4j.util.Strings; +import org.apache.logging.log4j.util.Supplier; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; + +public class DomainCrawler { + private final HttpFetcher fetcher; + private final EdgeDomain indexDomain; + private int maxDepth; + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final double EXT_LINK_SCORE_THRESHOLD = -15; + public final static int MIN_WORDS_PER_DOCUMENT = 100; + public final static int DEFAULT_CRAWL_DELAY_MS = 1000; + + private final LinkedList queue = new LinkedList<>(); + private final Set visited = new HashSet<>(); + private final TIntHashSet visitedHash = new TIntHashSet(); + + private static final LinkParser linkParser = new LinkParser(); + private static final FeedExtractor feedExtractor = new FeedExtractor(linkParser); + + + private static final UrlsCache URLS_CACHE = new UrlsCache<>(); + private final int pass; + private final int maxExtLinks; + private final int maxIntLinks; + + private final IpBlockList ipBlockList; + + private final PlainTextProcessor plainTextProcessor; + private final HtmlProcessor htmlProcessor; + private final ArchiveClient archiveClient; + private final DomainCrawlerRobotsTxt domainRobotsTxtFetcher; + private final LanguageFilter languageFilter; + private static final UrlBlocklist urlBlocklist = new UrlBlocklist(); + private final double rank; + + public EdgeDomain domain() { + return indexDomain; + } + + public DomainCrawler(HttpFetcher fetcher, + PlainTextProcessor plainTextProcessor, + HtmlProcessor htmlProcessor, + ArchiveClient archiveClient, + DomainCrawlerRobotsTxt domainRobotsTxtFetcher, + LanguageFilter languageFilter, + EdgeIndexTask ingress, + IpBlockList ipBlockList) { + this.fetcher = fetcher; + this.plainTextProcessor = plainTextProcessor; + this.htmlProcessor = htmlProcessor; + this.archiveClient = archiveClient; + this.domainRobotsTxtFetcher = domainRobotsTxtFetcher; + this.languageFilter = languageFilter; + this.ipBlockList = ipBlockList; + this.indexDomain = ingress.domain; + + this.pass = ingress.pass; + this.rank = ingress.rank; + this.maxExtLinks = ingress.limit * 50; + this.maxIntLinks = 100 + ingress.limit * 5; + + if (ingress.pass == 0) { + this.maxDepth = 25; + } + else { + this.maxDepth = 100; + } + + ingress.streamUrls().forEach(queue::add); + visitedHash.addAll(ingress.visited); + } + + + public DomainCrawlResults crawlToExhaustion(int maxCount, Supplier continueSignal) { + maxDepth = maxCount; + + var robotsTxtRules = domainRobotsTxtFetcher.fetchRulesCached(indexDomain); + final DomainCrawlResults results = new DomainCrawlResults(indexDomain, rank, pass); + + fetcher.clearCookies(); + + crawlDelay(0, robotsTxtRules); + + int count = 0; + while (!queue.isEmpty() && count < maxCount) { + if (!continueSignal.get()) break; + + if (crawlNextUrl(results, robotsTxtRules) != 0) + count++; + } + + addLinkWords(results); + + results.feeds.removeIf(url -> isDeadUrl(robotsTxtRules, url)); + results.intUrl.removeAll(results.visitedUrl); + + return results; + } + + public DomainCrawlResults crawl() { + var robotsTxtRules = domainRobotsTxtFetcher.fetchRulesCached(indexDomain); + final DomainCrawlResults results = new DomainCrawlResults(indexDomain, rank, pass); + + fetcher.clearCookies(); + + double scoreRemaining = 2*maxDepth; + + Comparator comparator = Comparator.comparing(u -> + results.pageContents.values().stream().filter(contents -> contents.hasHotLink(u)).count()); + Comparator comparator2 = comparator.thenComparing(EdgeUrl::depth); + + crawlDelay(0, robotsTxtRules); + + while (!queue.isEmpty() && scoreRemaining > 0) { + queue.sort(comparator2); + scoreRemaining += crawlNextUrl(results, robotsTxtRules); + } + + addLinkWords(results); + + results.feeds.removeIf(url -> isDeadUrl(robotsTxtRules, url)); + results.intUrl.removeAll(results.visitedUrl); + + return results; + } + + private double crawlNextUrl(DomainCrawlResults results, SimpleRobotRules robotsTxtRules) { + EdgeUrl url = queue.removeFirst(); + + if (visitedHash.contains(url.hashCode())) { + return 0.; + } + results.visitedUrl.add(url); + + if (!robotsTxtRules.isAllowed(url.toString()) || + urlBlocklist.isUrlBlocked(url) || + isUrlTooLong(url)) { + results.urlStates.put(url, EdgeUrlState.DISQUALIFIED); + return 0.; + } + + if (!visited.add(url)) + { + return 0.; + } + + logger.debug("{}", url); + + return fetchUrl(robotsTxtRules, results, url); + } + + private void addLinkWords(DomainCrawlResults results) { + + results.pageContents.values().forEach(page -> { + page.linkWords.forEach((url,words) -> { + if (words.isEmpty()) return; + + var dest = results.pageContents.get(url); + if (dest != null) { + logger.debug("Amending title words {} -> {}", url, words); + var namesWords = dest.words.get(IndexBlock.Link); + namesWords.words.forEach(words::remove); + dest.words.append(IndexBlock.Link, words); + } + }); + }); + + } + + private boolean isDeadUrl(SimpleRobotRules robotsTxtRules, EdgeUrl edgeUrl) { + try { + if (!robotsTxtRules.isAllowed(edgeUrl.toString())) { + return false; + } + + long tStart = System.currentTimeMillis(); + fetcher.fetchContent(edgeUrl); + + crawlDelay(System.currentTimeMillis() - tStart, robotsTxtRules); + + return true; + } + catch (Exception ex) { + return false; + } + } + + private boolean isUrlTooLong(EdgeUrl url) { + return url.path.length() > 255; + } + + private boolean isEquivalentUrl(EdgeUrl a, EdgeUrl b) { + if ((a == null) != (b == null)) + return false; + if (a == b) + return true; + if (!Objects.equals(a.domain, b.domain)) { + return false; + } + if (!Objects.equals(a.path, b.path)) { + return false; + } + return true; + } + + private double fetchUrl(SimpleRobotRules rules, DomainCrawlResults results, EdgeUrl url) { + try { + var page = fetcher.fetchContent(url); + + if (!isEquivalentUrl(page.redirectUrl, page.url)) { + handleLink(results, page.redirectUrl, -1); + results.urlStates.put(url, EdgeUrlState.REDIRECT); + + logger.debug("Redirect {} -> {}", url, page.redirectUrl); + + return -1; + } + final long startTime = System.currentTimeMillis(); + final long stopTime; + + var contents = parseContent(results, page); + if (contents.isPresent()) { + var content = contents.get(); + + results.pageContents.put(content.url, content); + + archiveClient.submitPage(Context.internal(), url, page); + } + else { + results.urlStates.put(url, EdgeUrlState.DISQUALIFIED); + } + + stopTime = System.currentTimeMillis(); + crawlDelay(stopTime - startTime, rules); + + return contents.map(c -> c.getMetadata().quality()).orElse(-5.); + } + catch (HttpFetcher.BadContentType ex) { + results.urlStates.put(url, EdgeUrlState.DISQUALIFIED); + + logger.debug("Bad content type {}", ex.getMessage()); + return -.1; + } + catch (Exception ex) { + results.urlStates.put(url, EdgeUrlState.DEAD); + + if (logger.isDebugEnabled()) { + ex.printStackTrace(); + logger.debug("Failed to crawl url {} : {} - {}", url, ex.getClass().getSimpleName(), ex.getMessage()); + } + return -.5; + } + + } + + private Optional parseContent(DomainCrawlResults results, EdgeRawPageContents content) { + var contentType = content.contentType.getContentType(); + + if (content.data.length() < 500) { + return Optional.empty(); + } + switch (contentType) { + case "application/xhtml+xml": + case "application/xhtml": + case "text/html": return parseHtmlContent(results, content); + case "text/plain": return plainTextProcessor.parsePlainText(content); + default: { + logger.debug("Skipping contentType {}", content.contentType); + return Optional.empty(); + } + } + } + + private Optional parseHtmlContent(DomainCrawlResults results, EdgeRawPageContents rawContents) { + + var parsed = parseRawContents(rawContents.data); + + var langTagIsInteresting + = languageFilter.isPageInterestingByHtmlTag(parsed) + .or(() -> languageFilter.isPageInterestingByMetaLanguage(parsed)); + + if (langTagIsInteresting.isPresent() && !langTagIsInteresting.get()) { + logger.debug("Rejected due to language tag"); + return Optional.empty(); + } + + var canonicalUrl = Optional.ofNullable(parsed.select("meta[rel=canonical]")) + .map(tag -> tag.attr("href")) + .filter(Strings::isNotBlank) + .flatMap(url -> linkParser.parseLink(rawContents.url, url)) + .filter(url -> !url.equals(rawContents.url)); + + if (canonicalUrl.isPresent()) { + logger.debug("Noncanonical {} -> {}", rawContents.url, canonicalUrl.get()); + handleLink(results, canonicalUrl.get(), -1); + return Optional.empty(); + } + + var processedPageContents = htmlProcessor.processHtmlPage(rawContents, parsed); + if (processedPageContents == null) { + logger.debug("Empty Processed Data for {}", rawContents.url); + return Optional.empty(); + } + + extractLinks(results, rawContents, parsed, processedPageContents); + + if (processedPageContents.numWords() < MIN_WORDS_PER_DOCUMENT) { + logger.debug("Rejected because too few words {} - {}", rawContents.url, processedPageContents.numWords()); + return Optional.empty(); + } + if (processedPageContents.metadata.title.startsWith("Index of")) { + return Optional.empty(); + } + + return Optional.of(processedPageContents); + } + + private Document parseRawContents(String data) { + return Jsoup.parse(data); + } + + private void extractLinks(DomainCrawlResults results, EdgeRawPageContents rawContents, Document parsed, EdgePageContent processedPageContents) { + var links = parsed.getElementsByTag("a"); + + Observable.fromStream(links.stream()) + .mapOptional(elem -> linkParser.parseLink(rawContents.url, elem)) + .blockingForEach(link -> handleLink(results, link, processedPageContents.metadata.quality())); + + var frames = parsed.getElementsByTag("frame"); + + Observable.fromStream(frames.stream()) + .mapOptional(elem -> linkParser.parseFrame(rawContents.url, elem)) + .blockingForEach(link -> handleLink(results, link, processedPageContents.metadata.quality())); + + parsed.select("link[rel=alternate]").forEach(alternateTag -> + feedExtractor + .getFeedFromAlternateTag(rawContents.url, alternateTag) + .ifPresent(results.feeds::add) + + ); + } + + private void handleLink(DomainCrawlResults results, EdgeUrl linkUrl, double pageScore) { + if (!isProtoSupported(linkUrl.proto)) { + return; + } + + if (!linkUrl.domain.equals(indexDomain)) { + handleExternalLink(results, pageScore, linkUrl); + } + else if (!isLinkVisited(linkUrl) + && !urlBlocklist.isForumLink(linkUrl) + && !urlBlocklist.isUrlBlocked(linkUrl) + ) { + enqueueUrl(linkUrl); + + if (results.intUrl.size() < maxIntLinks) { + results.intUrl.add(linkUrl); + } + } + } + + private boolean isProtoSupported(String proto) { + return switch (proto.toLowerCase()) { + case "http", "https" -> true; + default -> false; + }; + } + + private void handleExternalLink(DomainCrawlResults results, + double pageScore, + EdgeUrl linkUrl) { + if (pageScore <= EXT_LINK_SCORE_THRESHOLD) + return; + if (results.extUrl.size() > maxExtLinks) + return; + if (isBlacklistedDomain(linkUrl.domain)) + return; + if (urlBlocklist.isForumLink(linkUrl)) { + return; + } + if (urlBlocklist.isUrlBlocked(linkUrl)) { + return; + } + + if (URLS_CACHE.add(linkUrl)) { + results.extUrl.add(linkUrl); + } + results.links.add(new EdgeDomainLink(indexDomain, linkUrl.domain)); + } + + + private boolean isBlacklistedDomain(EdgeDomain domain) { + if ((isRestrictedTLD(domain.getAddress())) + && !("".equals(domain.subDomain) || "www".equals(domain.subDomain))) + return true; + + if (!ipBlockList.isAllowed(domain)) { + return true; + } + + if (isBlacklistedSubdomain(domain.subDomain)) + return true; + + return false; + } + + private boolean isRestrictedTLD(String domain) { + if (domain.contains("blog")) { + return true; + } + + if (domain.endsWith(".se")) + return false; + if (domain.endsWith(".nu")) + return false; + if (domain.endsWith(".uk")) + return false; + if (domain.endsWith(".jp")) + return false; + if (domain.endsWith(".com")) + return false; + if (domain.endsWith(".net")) + return false; + if (domain.endsWith(".org")) + return false; + if (domain.endsWith(".edu")) + return false; + + return true; + } + + private boolean isBlacklistedSubdomain(String subDomain) { + if (subDomain.equals("git")) { + return true; + } + else if (subDomain.contains("mirror")) { + return true; + } + else if (subDomain.equals("docs")) { + return true; + } + else if (subDomain.equals("mail")) { + return true; + } + else if (subDomain.contains("list")) { + return true; + } + else if (subDomain.startsWith("ftp")) { + return true; + } + + return false; + } + + private boolean isLinkVisited(EdgeUrl linkUrl) { + return visited.contains(linkUrl) + || visitedHash.contains(linkUrl.hashCode()); + } + + private void enqueueUrl(EdgeUrl url) { + if (visited.size() + queue.size() > maxDepth) { + return; + } + queue.addLast(url); + } + + @SneakyThrows + public static void crawlDelay(long timeParsed, SimpleRobotRules rules) { + var delay = rules.getCrawlDelay(); + if (delay >= 1) { + if (timeParsed*1000 > delay) + return; + + Thread.sleep(Math.min(1000*delay-timeParsed, 5000)); + } + else { + if (timeParsed > DEFAULT_CRAWL_DELAY_MS) + return; + + Thread.sleep(DEFAULT_CRAWL_DELAY_MS - timeParsed); + } + } + + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerFactory.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerFactory.java new file mode 100644 index 00000000..bd856d28 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerFactory.java @@ -0,0 +1,40 @@ +package nu.marginalia.wmsa.edge.crawler.domain; + +import com.google.inject.Inject; +import nu.marginalia.wmsa.edge.archive.client.ArchiveClient; +import nu.marginalia.wmsa.edge.crawler.domain.language.LanguageFilter; +import nu.marginalia.wmsa.edge.crawler.domain.processor.HtmlProcessor; +import nu.marginalia.wmsa.edge.crawler.domain.processor.PlainTextProcessor; +import nu.marginalia.wmsa.edge.crawler.fetcher.HttpFetcher; +import nu.marginalia.wmsa.edge.crawler.worker.IpBlockList; +import nu.marginalia.wmsa.edge.model.crawl.EdgeIndexTask; + +public class DomainCrawlerFactory { + private final HttpFetcher fetcher; + private final HtmlProcessor htmlProcessor; + private final ArchiveClient archiveClient; + private DomainCrawlerRobotsTxt domainCrawlerRobotsTxt; + private LanguageFilter languageFilter; + private final IpBlockList blockList; + private final PlainTextProcessor plainTextProcessor; + + @Inject + public DomainCrawlerFactory(HttpFetcher fetcher, + HtmlProcessor htmlProcessor, + PlainTextProcessor plainTextProcessor, ArchiveClient archiveClient, + DomainCrawlerRobotsTxt domainCrawlerRobotsTxt, + LanguageFilter languageFilter, + IpBlockList blockList) { + this.fetcher = fetcher; + this.htmlProcessor = htmlProcessor; + this.plainTextProcessor = plainTextProcessor; + this.archiveClient = archiveClient; + this.domainCrawlerRobotsTxt = domainCrawlerRobotsTxt; + this.languageFilter = languageFilter; + this.blockList = blockList; + } + + public DomainCrawler domainCrawler(EdgeIndexTask indexTask) { + return new DomainCrawler(fetcher, plainTextProcessor, htmlProcessor, archiveClient, domainCrawlerRobotsTxt, languageFilter, indexTask, blockList); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerRobotsTxt.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerRobotsTxt.java new file mode 100644 index 00000000..e8c152ec --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerRobotsTxt.java @@ -0,0 +1,67 @@ +package nu.marginalia.wmsa.edge.crawler.domain; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.inject.name.Named; +import crawlercommons.robots.SimpleRobotRules; +import crawlercommons.robots.SimpleRobotRulesParser; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.crawler.fetcher.HttpFetcher; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.crawl.EdgeRawPageContents; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.inject.Inject; +import javax.inject.Singleton; +import java.nio.charset.StandardCharsets; +import java.util.Optional; + +@Singleton +public class DomainCrawlerRobotsTxt { + + private static final SimpleRobotRulesParser parser = new SimpleRobotRulesParser(); + + private final Cache urlIdCache = CacheBuilder.newBuilder().maximumSize(1000).build(); + + private final String userAgent; + private final HttpFetcher fetcher; + private final Logger logger = LoggerFactory.getLogger(getClass()); + + @Inject + public DomainCrawlerRobotsTxt(HttpFetcher fetcher, + @Named("user-agent-robots") String userAgent) { + this.userAgent = userAgent; + this.fetcher = fetcher; + } + + @SneakyThrows + public SimpleRobotRules fetchRulesCached(EdgeDomain domain) { + return urlIdCache.get(domain, () -> fetchRulesRaw(domain)); + } + + private SimpleRobotRules fetchRulesRaw(EdgeDomain domain) { + return fetchRobotsForProto("https", domain) + .or(() -> fetchRobotsForProto("http", domain)) + .orElseGet(() -> new SimpleRobotRules(SimpleRobotRules.RobotRulesMode.ALLOW_ALL)); + } + + private Optional fetchRobotsForProto(String proto, EdgeDomain domain) { + try { + var url = new EdgeUrl(proto, domain, null, "/robots.txt"); + return Optional.of(parseRobotsTxt(fetcher.fetchContent(url))); + } + catch (Exception ex) { + return Optional.empty(); + } + } + + private SimpleRobotRules parseRobotsTxt(EdgeRawPageContents edgePageContent) { + return parser.parseContent(edgePageContent.url.toString(), + edgePageContent.data.getBytes(StandardCharsets.UTF_8), + edgePageContent.contentType.contentType, + userAgent); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/FeedExtractor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/FeedExtractor.java new file mode 100644 index 00000000..c256786e --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/FeedExtractor.java @@ -0,0 +1,55 @@ +package nu.marginalia.wmsa.edge.crawler.domain; + +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.jsoup.nodes.Element; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Objects; +import java.util.Optional; + +public class FeedExtractor { + private final LinkParser linkParser; + private final Logger logger = LoggerFactory.getLogger(getClass()); + + public FeedExtractor(LinkParser linkParser) { + this.linkParser = linkParser; + } + + public Optional getFeedFromAlternateTag(EdgeUrl crawlUrl, Element alternateTag) { + var type = alternateTag.attr("type"); + if (type == null) { + return Optional.empty(); + } + + try { + var url = linkParser.parseLink(crawlUrl, alternateTag.attr("href")); + + if (url.isEmpty()) + return Optional.empty(); + + if (!Objects.equals(crawlUrl.domain, url.get().domain)) + return Optional.empty(); + + if ("application/atom+xml".equalsIgnoreCase(type)) { + return url; + } + + if ("application/rss+xml".equalsIgnoreCase(type)) { + return url; + } + + if ("application/rdf+xml".equalsIgnoreCase(type)) { + return url; + } + + + } + catch (Exception ex) { + logger.debug("Bad URI syntax", ex); + } + return Optional.empty(); + } + + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/LinkParser.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/LinkParser.java new file mode 100644 index 00000000..f29fb658 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/LinkParser.java @@ -0,0 +1,166 @@ +package nu.marginalia.wmsa.edge.crawler.domain; + +import com.google.common.base.CharMatcher; +import io.reactivex.rxjava3.core.Maybe; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.jetbrains.annotations.Contract; +import org.jsoup.nodes.Element; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.URI; +import java.net.URISyntaxException; +import java.util.List; +import java.util.Optional; +import java.util.regex.Pattern; + +public class LinkParser { + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final List blockPrefixList = List.of( + "mailto:", "javascript:", "tel:", "itpc:", "#", "file:"); + private final List blockSuffixList = List.of( + ".pdf", ".mp3", ".wmv", ".avi", ".zip", ".7z", + ".bin", ".exe", ".tar.gz", ".tar.bz2", ".xml", ".swf", + ".wav", ".ogg", ".jpg", ".jpeg", ".png", ".gif", ".webp", + ".webm", ".bmp", ".doc", ".docx", ".ppt", ".pptx", ".xls", ".xlsx", + ".gz", ".asc", ".md5", ".asf", ".mov", ".sig", ".pub", ".iso"); + + @Contract(pure=true) + public Optional parseLink(EdgeUrl baseUrl, Element l) { + return Optional.of(l) + .filter(this::shouldIndexLink) + .map(this::getUrl) + .map(link -> resolveUrl(baseUrl, link)) + .flatMap(this::createURI) + .map(URI::normalize) + .map(this::renormalize) + .flatMap(this::createEdgeUrl); + } + + private Optional createURI(String s) { + try { + return Optional.of(new URI(s)); + } + catch (URISyntaxException e) { + logger.debug("Bad URI {}", s); + return Optional.empty(); + } + } + + private Optional createEdgeUrl(URI uri) { + try { + return Optional.of(new EdgeUrl(uri)); + } + catch (Exception ex) { + logger.debug("Bad URI {}", uri); + return Optional.empty(); + } + } + + @Contract(pure=true) + public Optional parseLink(EdgeUrl baseUrl, String str) { + return Optional.of(str) + .map(link -> resolveUrl(baseUrl, link)) + .flatMap(this::createURI) + .map(URI::normalize) + .map(this::renormalize) + .flatMap(this::createEdgeUrl); + } + + @Contract(pure=true) + public Optional parseFrame(EdgeUrl baseUrl, Element frame) { + return Optional.of(frame) + .map(l -> l.attr("src")) + .map(link -> resolveUrl(baseUrl, link)) + .flatMap(this::createURI) + .map(URI::normalize) + .map(this::renormalize) + .flatMap(this::createEdgeUrl); + } + + @SneakyThrows + private URI renormalize(URI uri) { + if (uri.getPath() == null) { + return renormalize(new URI(uri.getScheme(), uri.getHost(), "/", uri.getFragment())); + } + if (uri.getPath().startsWith("/../")) { + return renormalize(new URI(uri.getScheme(), uri.getHost(), uri.getPath().substring(3), uri.getFragment())); + } + return uri; + } + + private String getUrl(Element element) { + var url = CharMatcher.noneOf(" \r\n\t").retainFrom(element.attr("href")); + + int anchorIndex = url.indexOf('#'); + if (anchorIndex > 0) { + return url.substring(0, anchorIndex); + } + return url; + } + + private static Pattern paramRegex = Pattern.compile("\\?.*$"); + @SneakyThrows + private String resolveUrl(EdgeUrl baseUrl, String s) { + s = paramRegex.matcher(s).replaceAll(""); + + // url looks like http://www.marginalia.nu/ + if (isAbsoluteDomain(s)) { + return s; + } + + // url looks like /my-page + if (s.startsWith("/")) { + return baseUrl.sibling(s).toString(); + } + + return baseUrl.sibling(relativeNavigation(baseUrl) + s.replaceAll(" ", "%20")).toString(); + } + + // for a relative url that looks like /foo or /foo/bar; return / or /foo + private String relativeNavigation(EdgeUrl url) { + + var lastSlash = url.path.lastIndexOf("/"); + if (lastSlash < 0) { + return "/"; + } + return url.path.substring(0, lastSlash+1); + } + + private boolean isAbsoluteDomain(String s) { + return s.matches("^[a-zA-Z]+:.*$"); + } + + private boolean shouldIndexLink(Element link) { + return isUrlRelevant(link.attr("href")) + && isRelRelevant(link.attr("rel")); + + } + + private boolean isRelRelevant(String rel) { + if (null == rel) { + return true; + } + return switch (rel) { + case "noindex" -> false; + default -> true; + }; + } + + private boolean isUrlRelevant(String href) { + if (null == href || "".equals(href)) { + return false; + } + if (blockPrefixList.stream().anyMatch(href::startsWith)) { + return false; + } + if (blockSuffixList.stream().anyMatch(href::endsWith)) { + return false; + } + if (href.length() > 128) { + return false; + } + return true; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/RssCrawler.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/RssCrawler.java new file mode 100644 index 00000000..44de1344 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/RssCrawler.java @@ -0,0 +1,217 @@ +package nu.marginalia.wmsa.edge.crawler.domain; + +import com.zaxxer.hikari.HikariDataSource; +import gnu.trove.list.array.TIntArrayList; +import gnu.trove.map.hash.TIntIntHashMap; +import it.unimi.dsi.fastutil.ints.IntArrays; +import lombok.AllArgsConstructor; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.edge.crawler.fetcher.HttpFetcher; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDaoImpl; +import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklistImpl; +import nu.marginalia.wmsa.edge.index.service.util.ranking.BuggyStandardPageRank; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.jsoup.Jsoup; +import org.mariadb.jdbc.Driver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.inject.Inject; +import javax.inject.Singleton; +import java.io.IOException; +import java.util.*; +import java.util.concurrent.LinkedBlockingQueue; + +@Singleton +public class RssCrawler { + + + static LinkedBlockingQueue feedsQueue = new LinkedBlockingQueue<>(); + static LinkedBlockingQueue uploadQueue = new LinkedBlockingQueue<>(2); + + @AllArgsConstructor + static class UploadJob { + int domainId; + EdgeUrl[] urls; + } + private final HikariDataSource dataSource; + + private final HttpFetcher fetcher; + private final LinkParser lp = new LinkParser(); + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + public static void main(String[] args) throws IOException { + org.mariadb.jdbc.Driver driver = new Driver(); + + var dbModule = new DatabaseModule(); + new RssCrawler(dbModule.provideConnection()).run(); + } + + @Inject + public RssCrawler(HikariDataSource dataSource) { + + this.dataSource = dataSource; + this.fetcher = new HttpFetcher("search.marginalia.nu"); + fetcher.setAllowAllContentTypes(true); + } + + @SneakyThrows + public void run() throws IOException { + var rank = new BuggyStandardPageRank(dataSource, "memex.marginalia.nu"); + var nodes = rank.pageRankWithPeripheralNodes(rank.size(), false); + + EdgeDomainBlacklistImpl blacklist = new EdgeDomainBlacklistImpl(dataSource); + + TIntIntHashMap domainRankById = new TIntIntHashMap(nodes.size(), 0.5f, 0, Integer.MAX_VALUE); + + for (int i = 0; i < nodes.size(); i++) { + if (!blacklist.isBlacklisted(nodes.get(i))) { + domainRankById.put(nodes.get(i), i); + } + } + + List feedUrls = new ArrayList<>(15_000); + + TIntArrayList feedDomainIds = new TIntArrayList(); + try (var conn = dataSource.getConnection()) { + + try (var stmt = conn.prepareStatement("SELECT DISTINCT(DOMAIN_ID) FROM EC_FEED_URL INNER JOIN EC_DOMAIN ON EC_DOMAIN.ID=DOMAIN_ID ORDER BY RANK ASC")) { + stmt.setFetchSize(1000); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + int id = rsp.getInt(1); + + if (domainRankById.get(id) < rank.size()) { + feedDomainIds.add(id); + } + } + } + + int[] ids = feedDomainIds.toArray(); + IntArrays.quickSort(ids, (a,b) -> domainRankById.get(a) - domainRankById.get(b)); + + for (int i = 0; i < ids.length; i++) { + try (var stmt = conn.prepareStatement("SELECT DOMAIN_ID, PROTO, URL_PART, PORT, URL from EC_FEED_URL INNER JOIN EC_DOMAIN ON EC_DOMAIN.ID=DOMAIN_ID WHERE DOMAIN_ID=? ORDER BY LENGTH(URL) ASC LIMIT 1")) { + stmt.setFetchSize(10); + stmt.setInt(1, ids[i]); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + var url = new EdgeUrl(rsp.getString(2), new EdgeDomain(rsp.getString(3)), rsp.getInt(4), rsp.getString(5)); + feedUrls.add(url); + } + } + } + } + catch (Exception ex) { + logger.error("SQL error", ex); + } + + + + feedsQueue.addAll(feedUrls); + + List threads = new ArrayList<>(); + threads.add(new Thread(this::uploadThread, "Uploader")); + for (int i = 0; i < 256; i++) { + threads.add(new Thread(this::processor, "Processor")); + } + + threads.forEach(Thread::start); + + Thread.sleep(5*60_000); + threads.forEach(Thread::interrupt); + Thread.sleep(60_000); + + System.exit(0); + } + + @SneakyThrows + private void processor() { + EdgeDataStoreDaoImpl dataStoreDao = new EdgeDataStoreDaoImpl(dataSource); + + while (!feedsQueue.isEmpty()) { + try { + + var url = feedsQueue.take(); + logger.info("{}", url); + + var domainId = dataStoreDao.getDomainId(url.getDomain()); + var contents = fetcher.fetchContent(url); + + if (null != contents) { + List urls = getLinks(url, contents.data); + urls = dataStoreDao.getNewUrls(domainId, urls); + if (!urls.isEmpty()) { + uploadQueue.put(new UploadJob(domainId.getId(), urls.toArray(EdgeUrl[]::new))); + } + } + } + catch (InterruptedException ex) { + break; + } + catch (Exception ex) { + // + } + } + logger.info("Processor done"); + } + + private List getLinks(EdgeUrl base, String str) { + + var doc = Jsoup.parse(str.replaceAll("link", "lnk")); + + Set urls = new LinkedHashSet<>(); + + doc.select("entry > lnk[rel=alternate]").forEach(element -> { + var href = element.attr("href"); + if (href != null && !href.isBlank()) { + lp.parseLink(base, href) + .filter(u -> Objects.equals(u.domain.domain, base.domain.domain)) + .filter(u -> u.proto.startsWith("http")) + .ifPresent(urls::add); + } + }); + + doc.getElementsByTag("lnk").forEach(element -> { + var href = element.text(); + if (href != null && !href.isBlank()) { + lp.parseLink(base, href) + .filter(u -> Objects.equals(u.domain.domain, base.domain.domain)) + .filter(u -> u.proto.startsWith("http")) + .ifPresent(urls::add); + } + }); + + doc.select("item > guid[isPermalink=true]").forEach(element -> { + var href = element.text(); + if (href != null && !href.isBlank()) { + lp.parseLink(base, href) + .filter(u -> Objects.equals(u.domain.domain, base.domain.domain)) + .filter(u -> u.proto.startsWith("http")) + .ifPresent(urls::add); + } + }); + + return new ArrayList<>(urls); +} + + @SneakyThrows + public void uploadThread() { + EdgeDataStoreDaoImpl dao = new EdgeDataStoreDaoImpl(dataSource); + try (var conn = dataSource.getConnection(); var stmt = conn.prepareStatement("UPDATE EC_DOMAIN SET STATE=0, INDEXED=LEAST(8, INDEXED) WHERE ID=?")) { + while (!feedsQueue.isEmpty() || !uploadQueue.isEmpty()) { + var job = uploadQueue.take(); + dao.putUrl(-5., job.urls); + + stmt.setInt(1, job.domainId); + stmt.executeUpdate(); + + logger.info("{}[{}]", job.urls[0].domain, job.urls.length); + } + } + logger.info("Uploader done"); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/UrlsCache.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/UrlsCache.java new file mode 100644 index 00000000..4f83b5ab --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/UrlsCache.java @@ -0,0 +1,110 @@ +package nu.marginalia.wmsa.edge.crawler.domain; + +import gnu.trove.set.hash.TLongHashSet; +import nu.marginalia.wmsa.edge.model.WideHashable; + +import java.util.Arrays; +import java.util.concurrent.atomic.AtomicBoolean; + +public class UrlsCache { + private final TLongHashSet _int_set_not_thread_safe = new TLongHashSet(); + private final long[] _inserts_not_thread_safe; + + private int insertP = 0; + private long size = 0; + private final int maxSize; + + private final AtomicBoolean spinLock = new AtomicBoolean(); + + public UrlsCache() { + this(50000); + } + + public UrlsCache(final int maxSize) { + this.maxSize = maxSize; + _inserts_not_thread_safe = new long[maxSize]; + } + + /** + * + * @return true if the set was modified + */ + public boolean add(T entity) { + try { + while (!spinLock.compareAndSet(false, true)); + + return addEntityThreadUnsafe(entity.wideHash()); + } + finally { + spinLock.set(false); + } + } + + private boolean addEntityThreadUnsafe(long hash) { + + if (!_int_set_not_thread_safe.add(hash)) { + return false; + } + + if (size == maxSize) { + _int_set_not_thread_safe.remove(_inserts_not_thread_safe[insertP]); + } + else { + size++; + } + + _inserts_not_thread_safe[insertP] = hash; + insertP = (insertP+1) % maxSize; + + return true; + } + + public void addAll(T... entities) { + try { + while (!spinLock.compareAndSet(false, true)); + + Arrays.stream(entities) + .mapToLong(WideHashable::wideHash) + .forEach(this::addEntityThreadUnsafe); + } + finally { + spinLock.set(false); + } + } + + public boolean contains(T entity) { + try { + while (!spinLock.compareAndSet(false, true)); + + return _int_set_not_thread_safe.contains(entity.wideHash()); + } + finally { + spinLock.set(false); + } + } + + public boolean isMissing(T entity) { + try { + while (!spinLock.compareAndSet(false, true)); + + return !_int_set_not_thread_safe.contains(entity.wideHash()); + } + finally { + spinLock.set(false); + } + } + + + public void clear() { + try { + while (!spinLock.compareAndSet(false, true)) ; + _int_set_not_thread_safe.clear(); + size = 0; + insertP = 0; + } + finally { + spinLock.set(false); + } + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/DocumentDebugger.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/DocumentDebugger.java new file mode 100644 index 00000000..439c84fb --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/DocumentDebugger.java @@ -0,0 +1,132 @@ +package nu.marginalia.wmsa.edge.crawler.domain.language; + +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.KeywordCounter; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.KeywordExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.NameCounter; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentSentence; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.WordRep; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.tag.WordSeparator; +import org.jsoup.nodes.Document; + +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.PrintWriter; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; +import java.util.stream.Collectors; + +public class DocumentDebugger { + private final KeywordCounter kc; + private final SentenceExtractor se; + private final KeywordExtractor ke; + private final NameCounter nc; + + Map docsByPath = new TreeMap<>(); + Path tempDir; + public DocumentDebugger(LanguageModels lm) throws IOException { + se = new SentenceExtractor(lm); + var dict = new NGramDict(lm); + ke = new KeywordExtractor(); + + kc = new KeywordCounter(dict, ke); + nc = new NameCounter(ke); + + tempDir = Files.createTempDirectory("documentdebugger"); + } + + public void writeIndex() throws FileNotFoundException { + var output = tempDir.resolve("index.html"); + + try (var pw = new PrintWriter(new FileOutputStream(output.toFile()))) { + pw.println("
      "); + + docsByPath.forEach((name, path) -> { + pw.println("
    • "); + pw.printf("%s", path, name); + pw.println("
    • "); + }); + + + pw.println("
    "); + } + + System.out.println(output); + } + + public Path debugDocument(String name, Document document) throws IOException { + + var output = tempDir.resolve(name.substring(name.lastIndexOf("/")+1)+".html"); + docsByPath.put(name, output); + + document.select("table,sup,.reference").remove(); + var languageData = se.extractSentences(document); + + Set reps = new HashSet<>(); + +// kc.count(languageData, 0.75).forEach(rep -> reps.add(rep.stemmed)); + kc.count(languageData, 0.75).forEach(rep -> reps.add(rep.stemmed)); + + try (var pw = new PrintWriter(new FileOutputStream(output.toFile()))) { + + for (var sent : languageData.titleSentences) { + pw.print("

    "); + printSent(pw, sent, reps); + pw.println("

    "); + } + + for (var sent : languageData.sentences) { + pw.println("
    "); + printSent(pw, sent, reps); + pw.println("
    "); + } + } + + return output; + } + + private void printSent(PrintWriter pw, DocumentSentence sent, Set words) { + TreeMap> spans = new TreeMap<>(); + + var names = ke.getKeywordsFromSentence(sent); + + for (var span : names) { + for (int j = 0; j < span.size(); j++) { + spans.computeIfAbsent(span.start + j, n -> new HashSet<>()).add(new WordRep(sent, span)); + } + } + + for (int i = 0; i < sent.words.length; i++) { + List matches = spans.getOrDefault(i, Collections.emptySet()).stream().filter(rep -> true || words.contains(rep.stemmed)).collect(Collectors.toList()); + + printTag(pw, sent, i, matches); + } + } + + private void printTag(PrintWriter pw, DocumentSentence sent, int i, List matches) { + + String style; + if (matches.isEmpty()) { + style = ""; + } + else if (matches.size() == 1 && !matches.get(0).word.contains("_")) { + style = "text-decoration: underline; color: #00f"; + } + else { + style = "text-decoration: underline; color: #f00"; + } + pw.printf("", + matches.stream().map(rep -> rep.word).collect(Collectors.joining(", ")), + style + ); + pw.print(sent.words[i]); + pw.print(""); pw.println(sent.posTags[i]); pw.print(""); + pw.print(" "); + if (sent.separators[i] == WordSeparator.COMMA) + pw.printf(", "); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/LanguageFilter.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/LanguageFilter.java new file mode 100644 index 00000000..7a5a1411 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/LanguageFilter.java @@ -0,0 +1,90 @@ +package nu.marginalia.wmsa.edge.crawler.domain.language; + +import com.google.common.collect.Sets; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentLanguageData; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentSentence; +import opennlp.tools.langdetect.LanguageDetector; +import org.jsoup.nodes.Document; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.inject.Inject; +import javax.inject.Singleton; +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.text.BreakIterator; +import java.util.*; +import java.util.function.Predicate; +import java.util.regex.Pattern; + +@Singleton +public class LanguageFilter { + + private static final Set interestingLanguages = Set.of("en", "en-us", "en-gb", "eng", "english"); + + private static final Set englishWords = new HashSet<>(); + private static final Logger logger = LoggerFactory.getLogger(LanguageFilter.class); + static { + try (var resource = Objects.requireNonNull(ClassLoader.getSystemResourceAsStream("dictionary/en-1000"), + "Could not load word frequency table"); + var br = new BufferedReader(new InputStreamReader(resource)) + ) { + for (;;) { + String s = br.readLine(); + if (s == null) { + break; + } + englishWords.add(s.toLowerCase()); + } + } + catch (Exception ex) { + throw new RuntimeException(ex); + } + + } + + public double dictionaryAgreement(DocumentLanguageData dld) { + Set seenWords = new HashSet<>(); + int englishCount = 0; + + for (var sent : dld.sentences) { + for (var word : sent.wordsLowerCase) { + if (seenWords.add(word) && englishWords.contains(word)) { + englishCount++; + } + } + } + + double englishAgreement = englishCount / (double) Math.min(seenWords.size(), englishWords.size()); + + logger.debug("Agreement: {}", englishAgreement); + + return englishAgreement; + } + + @Inject + public LanguageFilter() { + } + + public Optional isPageInterestingByHtmlTag(Document parsed) { + return Optional.of(parsed.getElementsByTag("html")) + .map(tag -> tag.attr("lang")) + .filter(s -> !s.isBlank()) + .map(String::toLowerCase) + .map(interestingLanguages::contains); + } + + public Optional isPageInterestingByMetaLanguage(Document parsed) { + return parsed.getElementsByTag("meta").stream().filter(elem -> "content-language".equalsIgnoreCase(elem.attr("http-equiv"))) + .map(elem -> elem.attr("content")) + .filter(s -> !s.isBlank()) + .map(String::toLowerCase) + .map(interestingLanguages::contains) + .findAny(); + } + + public boolean isBlockedUnicodeRange(String data) { + return Arrays.stream(UnicodeRanges.values()) + .parallel().anyMatch(range -> range.test(data)); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/UnicodeRanges.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/UnicodeRanges.java new file mode 100644 index 00000000..347b2f7f --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/UnicodeRanges.java @@ -0,0 +1,77 @@ +package nu.marginalia.wmsa.edge.crawler.domain.language; + +public enum UnicodeRanges { + GREEK(false, 0x0370,0x03FF), + CYRILLIC(false, 0x0400,0x04FF), + CYRILLIC2(false, 0x0500,0x052F), + ARMENIAN(false, 0x0530,0x058F), + HEBREW(false, 0x0590,0x05FF), + ARABIC(false, 0x0600,0x06FF), + SYRIAC(false, 0x0700,0x074F), + THAANA(false, 0x0780,0x07BF), + DEVANAGARI(false, 0x0900,0x097F), + BENGALI(false, 0x0980,0x09FF), + GURMUKHI(false, 0x0A00,0x0A7F), + GUJARATI(false, 0x0A80,0x0AFF), + ORIYA(false, 0x0B00,0x0B7F), + TAMIL(false, 0x0B80,0x0BFF), + TELUGU(false, 0x0C00,0x0C7F), + KANNADA(false, 0x0C80,0x0CFF), + MALAYALAM(false, 0x0D00,0x0D7F), + SINHALA(false, 0x0D80,0x0DFF), + THAI(false, 0x0E00,0x0E7F), + LAO(false, 0x0E80,0x0EFF), + TIBETAN(false, 0x0F00,0x0FFF), + MYANMAR(false, 0x1000,0x109F), + GEORGIAN(false, 0x10A0,0x10FF), + HANGUL(false, 0x1100,0x11FF), + ETHIOPIC(false, 0x1200,0x137F), + CHEROKEE(false, 0x13A0,0x13FF), + ABORIGINAL(false, 0x1400,0x167F), + OGHAM(false, 0x1680,0x169F), + RUNIC(false, 0x16A0,0x16FF), + TAGALOG(false, 0x1700,0x171F), + HANUNOO(false, 0x1720,0x173F), + BUHID(false, 0x1740,0x175F), + TAGBANWA(false, 0x1760,0x177F), + KHMER(false, 0x1780,0x17FF), + MONGOLIAN(false, 0x1800,0x18AF), + LIMBU(false, 0x1900,0x194F), + TAILE(false, 0x1950,0x197F), + KHMER2(false, 0x19E0,0x19FF), + CJKRADICALS(true,0x2E80,0x2EFF), + KANGXIRADICALS(true, 0x2F00,0x2FDF), + IDEOGRAPHICDESCRIPTION(true,0x2FF0,0x2FFF), + CJKSYMBOLS(true, 0x3000,0x303F), + HIRAGANA(true,0x3040,0x309F), + KATAKANA(true,0x30A0,0x30FF), + BOPOMOFO(true,0x3100,0x312F), + HANGULJAMO(true,0x3130,0x318F), + KANBUN(true,0x3190,0x319F), + BOPOMOFOEXTENDED(true,0x31A0,0x31BF), + KATAKANAPHONETIC(true, 0x31F0,0x31FF), + ENCLOSEDCJK(true, 0x3200,0x32FF), + CJKCOMPATIBILITY(true,0x3300,0x33FF), + CJKUNIFIEDA(true,0x3400,0x4DBF), + YIJINGHEXAGRAMSYMBOLS(true,0x4DC0,0x4DFF), + CJKUNIFIEDIDEOGRAPHS(true,0x4E00,0x9FFF), + YISYLLABLES(true,0xA000,0xA48F), + YIRADICALS(true, 0xA490,0xA4CF), + HANGULSYLLABLES(true, 0xAC00,0xD7AF) + ; + final int min; + final int max; + final boolean sensitive; + UnicodeRanges(boolean sensitive, int min, int max) { + this.sensitive = sensitive; + this.min = min; + this.max = max; + } + + boolean test(String text) { + return text.chars().limit(1000).parallel() + .filter(i -> i >= min && i < max) + .count() >= (sensitive ? 15 : 100); + + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/WordPatterns.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/WordPatterns.java new file mode 100644 index 00000000..6b178853 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/WordPatterns.java @@ -0,0 +1,117 @@ +package nu.marginalia.wmsa.edge.crawler.domain.language; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.*; +import java.util.function.Predicate; +import java.util.regex.Pattern; + +public class WordPatterns { + public static final int MIN_WORD_LENGTH = 1; + public static final int MAX_WORD_LENGTH = 64; + + public static final String WORD_TOKEN_JOINER = "_"; + public static final Pattern wordPattern = Pattern.compile("[#]?[_@.a-zA-Z0-9'+\\-\\u00C0-\\u00D6\\u00D8-\\u00f6\\u00f8-\\u00ff]+[#]?"); + public static final Pattern wordPatternRestrictive = Pattern.compile("[#]?[@a-zA-Z0-9'+\\-\\u00C0-\\u00D6\\u00D8-\\u00f6\\u00f8-\\u00ff]+[#]?"); + public static final Pattern keyWordPattern = Pattern.compile("[A-Z\\u00C0-\\u00D6][_a-zA-Z\\u00C0-\\u00D6\\u00D8-\\u00f6\\u00f8-\\u00ff]{0,32}('[a-zA-Z])?"); + public static final Pattern wordAppendixPattern = Pattern.compile("[.]?[0-9a-zA-Z\\u00C0-\\u00D6\\u00D8-\\u00f6\\u00f8-\\u00ff]{1,3}[0-9]?"); + public static final Pattern joinWord = Pattern.compile("(as|an|the|of|in|a)"); + public static final Pattern keywordAppendixPattern = Pattern.compile("([0-9A-Z][A-Z0-9]{0,3})"); + public static final Pattern wordBreakPattern = Pattern.compile("([^_#@.a-zA-Z'+\\-0-9\\u00C0-\\u00D6\\u00D8-\\u00f6\\u00f8-\\u00ff]+)|[|]|(\\.(\\s+|$))"); + public static final Pattern characterNoisePattern = Pattern.compile("^[/+\\-]+$"); + + public static final Predicate wordQualitiesPredicate = wordPattern.asMatchPredicate(); + public static final Predicate restrictivePredicate = wordPatternRestrictive.asMatchPredicate(); + public static final Predicate wordAppendixPredicate = wordAppendixPattern.asMatchPredicate(); + public static final Predicate keywordPredicate = keyWordPattern.asMatchPredicate(); + public static final Predicate keywordAppendixPredicate = keywordAppendixPattern.asMatchPredicate(); + public static final Predicate wordPredicateEither = wordQualitiesPredicate.or(wordAppendixPredicate); + public static final Predicate keywordPredicateEither = keywordPredicate.or(keywordAppendixPredicate); + public static final Predicate characterNoisePredicate = characterNoisePattern.asMatchPredicate(); + + public static final Set topWords; + static { + topWords = new HashSet<>(200); + try (var resource = Objects.requireNonNull(ClassLoader.getSystemResourceAsStream("dictionary/en-stopwords"), + "Could not load word frequency table"); + var br = new BufferedReader(new InputStreamReader(resource)) + ) { + while (true) { + String s = br.readLine(); + if (s == null) { + break; + } + topWords.add(s); + } + } catch (IOException e) { + e.printStackTrace(); + } + } + + private static boolean hasMoreThanTwo(String s, char c, int max) { + int idx = 0; + for (int i = 0; i <= max; i++) { + idx = s.indexOf(c, idx+1); + if (idx < 0 || idx >= s.length() - 1) + return false; + } + return true; + } + + + public static boolean filter(String word) { + if (word.isBlank()) { + return false; + } + if (hasMoreThanTwo(word, '-', 2)) { + return false; + } + if (hasMoreThanTwo(word, '+', 2)) { + return false; + } + if (word.startsWith("-") + || word.endsWith("-") + ) { + return false; + } + + int numDigits = 0; + for (int i = 0; i < word.length(); i++) { + if (Character.isDigit(word.charAt(i))) { + numDigits++; + } + if (numDigits > 6) + return false; + } + + return true; + } + + public static boolean filterStrict(String word) { + + int numDigits = (int) word.chars().filter(Character::isDigit).count(); + if (numDigits == word.length()) { + return false; + } + + return true; + } + + public static boolean isStopWord(String s) { + if (s.length() < MIN_WORD_LENGTH) { + return true; + } + if (!wordQualitiesPredicate.test(s)) { + return true; + } + if (!filter(s)) { + return true; + } + if (topWords.contains(s.toLowerCase())) { + return true; + } + return false; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/conf/LanguageModels.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/conf/LanguageModels.java new file mode 100644 index 00000000..3823b5ae --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/conf/LanguageModels.java @@ -0,0 +1,15 @@ +package nu.marginalia.wmsa.edge.crawler.domain.language.conf; + +import lombok.AllArgsConstructor; + +import java.nio.file.Path; + +@AllArgsConstructor +public class LanguageModels { + public final Path ngramDictionary; + public final Path ngramFrequency; + public final Path openNLPSentenceDetectionData; + public final Path posRules; + public final Path posDict; + public final Path openNLPTokenData; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/AsciiFlattener.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/AsciiFlattener.java new file mode 100644 index 00000000..56e540ad --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/AsciiFlattener.java @@ -0,0 +1,51 @@ +package nu.marginalia.wmsa.edge.crawler.domain.language.processing; + +import java.util.function.Predicate; +import java.util.regex.Pattern; + +public class AsciiFlattener { + + private static final Pattern nonAscii = Pattern.compile("[^a-zA-Z0-9_.'+@#:]+"); + private static final Pattern plainAsciiPattern = Pattern.compile("^[a-zA-Z0-9_.'+@#:]+$"); + private static final Predicate plainAscii = plainAsciiPattern.asMatchPredicate(); + + public static String flattenUnicode(String s) { + if (plainAscii.test(s)) { + return s; + } + + var cdata = s.toCharArray(); + var newCdata = new char[cdata.length]; + for (int i = 0; i < cdata.length; i++) { + if ("àáâãäåæ".indexOf(cdata[i]) >= 0) { + newCdata[i] = 'a'; + } + else if ("ç".indexOf(cdata[i]) >= 0) { + newCdata[i] = 'g'; + } + else if ("òóôõöø".indexOf(cdata[i]) >= 0) { + newCdata[i] = 'o'; + } + else if ("ùúûü".indexOf(cdata[i]) >= 0) { + newCdata[i] = 'u'; + } + else if ("ýÿÞþ".indexOf(cdata[i]) >= 0) { + newCdata[i] = 'y'; + } + else if ("ìíîï".indexOf(cdata[i]) >= 0) { + newCdata[i] = 'i'; + } + else if ("èéêë".indexOf(cdata[i]) >= 0) { + newCdata[i] = 'e'; + } + else if ("ß".indexOf(cdata[i]) >= 0) { + newCdata[i] = 's'; + } + else { + newCdata[i] = cdata[i]; + } + } + return nonAscii.matcher(new String(newCdata)).replaceAll(""); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/DocumentKeywordExtractor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/DocumentKeywordExtractor.java new file mode 100644 index 00000000..19f2a117 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/DocumentKeywordExtractor.java @@ -0,0 +1,150 @@ +package nu.marginalia.wmsa.edge.crawler.domain.language.processing; + +import com.google.common.collect.Sets; +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.language.WordPatterns; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentLanguageData; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.WordRep; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.model.crawl.EdgePageWordSet; +import nu.marginalia.wmsa.edge.model.crawl.EdgePageWords; +import org.jetbrains.annotations.NotNull; + +import javax.inject.Inject; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class DocumentKeywordExtractor { + + private final KeywordExtractor keywordExtractor; + private final KeywordCounter tfIdfCounter; + private final NameCounter nameCounter; + private final LongNameCounter longNameCounter; + private final SubjectCounter subjectCounter; + + private final NGramDict dict; + + @Inject + public DocumentKeywordExtractor(NGramDict dict) { + this.dict = dict; + + keywordExtractor = new KeywordExtractor(); + + tfIdfCounter = new KeywordCounter(dict, keywordExtractor); + nameCounter = new NameCounter(keywordExtractor); + longNameCounter = new LongNameCounter(dict, keywordExtractor); + subjectCounter = new SubjectCounter(keywordExtractor); + } + + public EdgePageWordSet extractKeywords(DocumentLanguageData documentLanguageData) { + + var titleWords = extractTitleWords(documentLanguageData); + + var wordsTfIdf = tfIdfCounter.count(documentLanguageData, 0.75); + var wordsNamesRepeated = nameCounter.count(documentLanguageData, 2); + var wordsNamesAll = nameCounter.count(documentLanguageData, 1); + var subjects = subjectCounter.count(documentLanguageData); + + List wordsLongName = longNameCounter.count(documentLanguageData); + + int totalSize = wordsTfIdf.size(); + + List lowKeywords = new ArrayList<>(totalSize / 2); + List midKeywords = new ArrayList<>(totalSize / 2); + List topKeywords = new ArrayList<>(totalSize / 2); + + for(var v : wordsTfIdf) { + if (topKeywords.size() < totalSize / 10) topKeywords.add(v); + else if (midKeywords.size() < totalSize / 5) midKeywords.add(v); + else lowKeywords.add(v); + } + + var wordsToMatchWithTitle = joinWordLists(topKeywords, midKeywords, wordsNamesRepeated, subjects); + + var words = getSimpleWords(documentLanguageData); + + for (var w : wordsLongName) + words.add(w.word); + for (var w : lowKeywords) + words.remove(w.word); + for (var w : midKeywords) + words.remove(w.word); + for (var w : topKeywords) + words.remove(w.word); + + var wordSet = new EdgePageWordSet( + createWords(IndexBlock.TitleKeywords, overlappingStems(titleWords, wordsToMatchWithTitle)), + createWords(IndexBlock.Topic, subjects), + createWords(IndexBlock.Title, titleWords), + createWords(IndexBlock.NamesWords, wordsNamesAll), + createWords(IndexBlock.Top, topKeywords), + createWords(IndexBlock.Middle, midKeywords), + createWords(IndexBlock.Low, lowKeywords) + ); + + wordSet.append(IndexBlock.Words, words); + + return wordSet; + } + + private List extractTitleWords(DocumentLanguageData documentLanguageData) { + return Arrays.stream(documentLanguageData.titleSentences).flatMap(sent -> + keywordExtractor.getWordsFromSentence(sent).stream().sorted().distinct().map(w -> new WordRep(sent, w))) + .limit(100) + .collect(Collectors.toList()); + } + + private Collection joinWordLists(List... words) { + int size = 0; + for (var lst : words) { + size += lst.size(); + } + if (size == 0) + return Collections.emptyList(); + + final LinkedHashSet ret = new LinkedHashSet<>(size); + for (var lst : words) { + ret.addAll(lst); + } + return ret; + } + + @NotNull + private Set getSimpleWords(DocumentLanguageData documentLanguageData) { + Map counts = new HashMap<>(documentLanguageData.totalNumWords()); + + for (var sent : documentLanguageData.sentences) { + for (int i = 0; i < sent.length(); i++) { + if (!sent.isStopWord(i)) { + String w = AsciiFlattener.flattenUnicode(sent.wordsLowerCase[i]); + if (counts.containsKey(w) || (WordPatterns.wordQualitiesPredicate.test(w) && WordPatterns.filter(w))) { + counts.merge(w, 1, Integer::sum); + } + } + } + } + + return counts.entrySet().stream().filter(c2 -> c2.getValue()>=1) + .sorted(Comparator.comparing(this::value)) + .map(Map.Entry::getKey) + .limit(512).collect(Collectors.toSet()); + } + + private double value(Map.Entry e) { + double N = 11820118.; // Number of documents in term freq dictionary + + return (1+Math.log(e.getValue())) * Math.log((1.+dict.getTermFreq(e.getKey()))/N); + } + + public EdgePageWords createWords(IndexBlock block, Collection words) { + return new EdgePageWords(block, words.stream().map(w -> w.word).map(AsciiFlattener::flattenUnicode).filter(WordPatterns.wordQualitiesPredicate).collect(Collectors.toSet())); + } + + private Set overlappingStems(Collection wordsA, Collection wordsB) { + Set stemmedA = wordsA.stream().map(WordRep::getStemmed).collect(Collectors.toSet()); + Set stemmedB = wordsB.stream().map(WordRep::getStemmed).collect(Collectors.toSet()); + Set stemmedIntersect = Sets.intersection(stemmedA, stemmedB); + return Stream.concat(wordsA.stream(), wordsB.stream()).filter(w -> stemmedIntersect.contains(w.getStemmed())).collect(Collectors.toSet()); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/KeywordCounter.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/KeywordCounter.java new file mode 100644 index 00000000..89bd9950 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/KeywordCounter.java @@ -0,0 +1,93 @@ +package nu.marginalia.wmsa.edge.crawler.domain.language.processing; + +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentLanguageData; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentSentence; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.WordRep; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.WordSpan; + +import java.util.*; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +public class KeywordCounter { + private final KeywordExtractor keywordExtractor; + private final NGramDict dict; + + public KeywordCounter(NGramDict dict, KeywordExtractor keywordExtractor) { + this.dict = dict; + this.keywordExtractor = keywordExtractor; + } + + public List count(DocumentLanguageData dld, double cutoff) { + HashMap counts = new HashMap<>(1000); + HashMap> instances = new HashMap<>(1000); + + for (int i = 0; i < dld.sentences.length; i++) { + DocumentSentence sent = dld.sentences[i]; + double value = 1.0 / Math.log(1+i); + var keywords = keywordExtractor.getKeywordsFromSentence(sent); + for (var span : keywords) { + var stemmed = sent.constructStemmedWordFromSpan(span); + if (stemmed.isBlank()) + continue; + + counts.merge(stemmed, value, Double::sum); + + instances.computeIfAbsent(stemmed, k -> new HashSet<>()).add(sent.constructWordFromSpan(span)); + } + }; + + var topWords = counts.entrySet().stream() + .filter(w -> w.getValue() > cutoff) + .sorted(Comparator.comparing(this::getTermValue)) + .limit(Math.min(100, counts.size()/2)) + .map(Map.Entry::getKey) + .collect(Collectors.toList()); + + var topWordsSet = new HashSet<>(topWords); + + final Set keywords = new HashSet<>(); + + for (var sentence : dld.sentences) { + for (WordSpan kw : keywordExtractor.getKeywordsFromSentence(sentence)) { + String stemmedWord = sentence.constructStemmedWordFromSpan(kw); + if (topWords.contains(stemmedWord)) { + keywords.add(new WordRep(sentence, kw)); + } + } + } + + for (var sentence : dld.sentences) { + for (var kw : keywordExtractor.getKeywordsFromSentenceStrict(sentence, topWordsSet, true)) { + keywords.add(new WordRep(sentence, kw)); + } + } + + Map sortOrder = IntStream.range(0, topWords.size()).boxed().collect(Collectors.toMap(topWords::get, i->i)); + + Comparator comp = Comparator.comparing(wr -> sortOrder.getOrDefault(wr.stemmed, topWords.size())); + + var ret = new ArrayList<>(keywords); + ret.sort(comp); + return ret; + } + + private static Pattern separator = Pattern.compile("_"); + + public double getTermValue(Map.Entry e) { + String[] parts = separator.split(e.getKey()); + double totalValue = 0.; + for (String part : parts) { + totalValue += value(part, e.getValue()); + } + return totalValue / Math.sqrt(parts.length); + } + + double value(String key, double value) { + return (1+Math.log(value)) * Math.log((1.+dict.getTermFreq(key))/11820118.); + } + + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/KeywordExtractor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/KeywordExtractor.java new file mode 100644 index 00000000..6c847daf --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/KeywordExtractor.java @@ -0,0 +1,380 @@ +package nu.marginalia.wmsa.edge.crawler.domain.language.processing; + +import nu.marginalia.wmsa.edge.crawler.domain.language.WordPatterns; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentSentence; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.tag.WordSeparator; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.WordSpan; + +import java.lang.ref.SoftReference; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +public class KeywordExtractor { + + public boolean isLegacy() { + return legacy; + } + + public void setLegacy(boolean legacy) { + this.legacy = legacy; + } + + private boolean legacy; + + public WordSpan[] getNameLikes(DocumentSentence sentence) { + var direct = IntStream.range(0, sentence.length()) + .filter(i -> sentence.posTags[i].startsWith("N")) + .mapToObj(i -> new WordSpan(i, i+1)) + ; + var two = IntStream.range(1, sentence.length()) + .filter(i -> sentence.separators[i-1] == WordSeparator.SPACE) + .filter(i -> isName(i, sentence, Collections.emptySet())) + .filter(i -> isName(i -1, sentence, Collections.emptySet())) + .mapToObj(i -> new WordSpan(i-1, i+1)) + ; + + var a_in_b = IntStream.range(2, sentence.length()) + .filter(i -> sentence.separators[i-1] == WordSeparator.SPACE) + .filter(i -> isProperNoun(i, sentence)) + .filter(i -> isJoiner(sentence, i-1)) + .filter(i -> isProperNoun(i-2, sentence)) + .mapToObj(i -> new WordSpan(i-2, i+1)) + ; + + var a_in_det_b = IntStream.range(3, sentence.length()) + .filter(i -> sentence.separators[i-1] == WordSeparator.SPACE + && sentence.separators[i-2] == WordSeparator.SPACE) + .filter(i -> isProperNoun(i, sentence)) + .filter(i -> isJoiner(sentence, i-1)) + .filter(i -> sentence.posTags[i-2].equals("DT")) + .filter(i -> isProperNoun(i-3, sentence)) + .mapToObj(i -> new WordSpan(i-3, i+1)) + ; + var a_in_in_b = IntStream.range(3, sentence.length()) + .filter(i -> sentence.separators[i-1] == WordSeparator.SPACE + && sentence.separators[i-2] == WordSeparator.SPACE) + .filter(i -> isProperNoun(i, sentence)) + .filter(i -> isJoiner(sentence, i-1) || isProperNoun(i-1, sentence)) + .filter(i -> isJoiner(sentence, i-2) || isProperNoun(i-2, sentence)) + .filter(i -> isProperNoun(i-3, sentence)) + .mapToObj(i -> new WordSpan(i-3, i+1)) + ; + var three = IntStream.range(2, sentence.length()) + .filter(i -> sentence.separators[i-1] == WordSeparator.SPACE + && sentence.separators[i-2] == WordSeparator.SPACE) + .filter(i -> isName(i, sentence, Collections.emptySet())) + .filter(i -> isName(i-1, sentence, Collections.emptySet())) + .filter(i -> isName(i-2, sentence, Collections.emptySet())) + .mapToObj(i -> new WordSpan(i-2, i+1)) + ; + var four = IntStream.range(3, sentence.length()) + .filter(i -> sentence.separators[i-1] == WordSeparator.SPACE + && sentence.separators[i-2] == WordSeparator.SPACE + && sentence.separators[i-3] == WordSeparator.SPACE) + .filter(i -> isName(i, sentence, Collections.emptySet())) + .filter(i -> isName(i - 1, sentence, Collections.emptySet())) + .filter(i -> isName(i - 2, sentence, Collections.emptySet())) + .filter(i -> isName(i - 3, sentence, Collections.emptySet())) + .mapToObj(i -> new WordSpan(i-3, i+1)) + ; + + return Stream.of(direct, two, a_in_b, a_in_in_b, a_in_det_b, three, four).flatMap(Function.identity()) + .toArray(WordSpan[]::new); + } + + + public WordSpan[] getNames(DocumentSentence sentence) { + List spans = new ArrayList<>(sentence.length()); + + for (int i = 0; i < sentence.length(); i++) { + if (isProperNoun(i, sentence)) + spans.add(new WordSpan(i, i+1)); + } + + for (int i = 1; i < sentence.length(); i++) { + if (sentence.separators[i-1] == WordSeparator.COMMA) { continue; } + + if (isProperNoun(i, sentence) && isProperNoun(i-1, sentence)) + spans.add(new WordSpan(i-1, i+1)); + } + + for (int i = 2; i < sentence.length(); i++) { + if (sentence.separators[i-2] == WordSeparator.COMMA) { continue; } + if (sentence.separators[i-1] == WordSeparator.COMMA) { i++; continue; } + + if (isProperNoun(i, sentence) && (isJoiner(sentence, i-1) || isProperNoun(i-1, sentence)) && isProperNoun(i-2, sentence)) + spans.add(new WordSpan(i-2, i+1)); + } + + for (int i = 3; i < sentence.length(); i++) { + if (sentence.separators[i-3] == WordSeparator.COMMA) { continue; } + if (sentence.separators[i-2] == WordSeparator.COMMA) { i++; continue; } + if (sentence.separators[i-1] == WordSeparator.COMMA) { i+=2; continue; } + + if (isProperNoun(i, sentence) && isProperNoun(i-3, sentence)) { + if (isProperNoun(i - 1, sentence) && isProperNoun(i - 2, sentence)) { + spans.add(new WordSpan(i-3, i+1)); + } + else if (isJoiner(sentence, i-2) && sentence.posTags[i-1].equals("DT")) { + spans.add(new WordSpan(i-3, i+1)); + } + else if ((isJoiner(sentence, i-1)||isProperNoun(i - 1, sentence)) && (isJoiner(sentence, i-2)||isProperNoun(i - 2, sentence))) { + spans.add(new WordSpan(i-3, i+1)); + } + } + } + + return spans.toArray(WordSpan[]::new); + } + + public WordSpan[] getNamesStrict(DocumentSentence sentence) { + List spans = new ArrayList<>(sentence.length()); + + + for (int i = 0; i < sentence.length(); i++) { + if (isProperNoun(i, sentence)) + spans.add(new WordSpan(i, i+1)); + } + + for (int i = 1; i < sentence.length(); i++) { + if (sentence.separators[i-1] == WordSeparator.COMMA) { continue; } + if (isProperNoun(i, sentence) && isProperNoun(i-1, sentence)) + spans.add(new WordSpan(i-1, i+1)); + } + + for (int i = 2; i < sentence.length(); i++) { + if (sentence.separators[i-2] == WordSeparator.COMMA) { continue; } + if (sentence.separators[i-1] == WordSeparator.COMMA) { i++; continue; } + if (isProperNoun(i, sentence) && (isJoiner(sentence, i-1) || isProperNoun(i-1, sentence)) && isProperNoun(i-2, sentence)) + spans.add(new WordSpan(i-2, i+1)); + } + + for (int i = 3; i < sentence.length(); i++) { + if (sentence.separators[i-3] == WordSeparator.COMMA) { continue; } + if (sentence.separators[i-2] == WordSeparator.COMMA) { i++; continue; } + if (sentence.separators[i-1] == WordSeparator.COMMA) { i+=2; continue; } + + if (isProperNoun(i, sentence) && isProperNoun(i-3, sentence)) { + if (isProperNoun(i - 1, sentence) && isProperNoun(i - 2, sentence)) { + spans.add(new WordSpan(i-3, i+1)); + } + else if (isJoiner(sentence, i-1) && sentence.posTags[i-2].equals("DT")) { + spans.add(new WordSpan(i-3, i+1)); + } + } + } + + return spans.toArray(WordSpan[]::new); + } + + public boolean isProperNoun(int i, DocumentSentence sent) { + return "NNP".equals(sent.posTags[i]) || "NNPS".equals(sent.posTags[i]); + } + + public boolean isJoiner(DocumentSentence sent, int i) { + if(sent.posTags[i].equals("IN")) { + return true; + } + if (sent.posTags[i].equals("TO")) { + return true; + } + if (sent.posTags[i].equals("CC")) { + return sent.wordsLowerCase[i].equals("and"); + } + return false; + } + + public List getWordsFromSentence(DocumentSentence sentence) { + List spans = new ArrayList<>(); + + for (int k = 0; k < 4; k++) { + for (int i = k; i < sentence.length(); i++) { + var w = new WordSpan(i-k, i + 1); + + if (isViableSpanForWord(sentence, w)) { + spans.add(w); + } + } + } + + return spans; + } + + private boolean isViableSpanForWord(DocumentSentence sentence, WordSpan w) { + + for (int i = w.start; i < w.end-1; i++) { + if (sentence.separators[i] == WordSeparator.COMMA) { + return false; + } + } + String word = sentence.constructWordFromSpan(w); + + if (word.isBlank() || WordPatterns.isStopWord(word)) return false; + if (sentence.posTags[w.start].equals("CC")) return false; + if (sentence.posTags[w.end-1].equals("IN")) return false; + if (sentence.posTags[w.end-1].equals("DT")) return false; + if (sentence.posTags[w.end-1].equals("CC")) return false; + if (sentence.posTags[w.end-1].equals("TO")) return false; + + return true; + } + + public WordSpan[] getKeywordsFromSentence(DocumentSentence sentence) { + if (sentence.keywords != null) { + return sentence.keywords.get(); + } + List spans = new ArrayList<>(sentence.length()); + + Set topWords = Collections.emptySet(); + + for (int i = 0; i < sentence.length(); i++) { + if (isName(i, sentence, topWords) || isTopAdj(i, sentence, topWords)) + spans.add(new WordSpan(i, i+1)); + } + + for (int i = 1; i < sentence.length(); i++) { + if (sentence.separators[i-1] == WordSeparator.COMMA) { continue; } + + if (isName(i, sentence, topWords)) { + if (isName(i - 1, sentence, topWords) || isTopAdj(i-1, sentence, topWords)) + spans.add(new WordSpan(i - 1, i + 1)); + } + if (sentence.posTags[i].equals("CD") && isName(i-1, sentence, topWords)) { + spans.add(new WordSpan(i - 1, i + 1)); + } + } + + for (int i = 2; i < sentence.length(); i++) { + if (sentence.separators[i-1] == WordSeparator.COMMA) { i++; continue; } + if (sentence.separators[i-2] == WordSeparator.COMMA) { continue; } + + if (isName(i, sentence, topWords)) { + if ((isName(i-1, sentence, topWords) || isTopAdj(i-1, sentence, topWords)) + && (isName(i-2, sentence, topWords) || isTopAdj(i-2, sentence, topWords))) { + spans.add(new WordSpan(i - 2, i + 1)); + } + else if ((isProperNoun(i-1, sentence) || isJoiner(sentence, i-1)) && isProperNoun(i-2, sentence)) { + spans.add(new WordSpan(i - 2, i + 1)); + } + } + else if (sentence.posTags[i].equals("CD") && isName(i-1, sentence, topWords) && isName(i-2, sentence, topWords)) { + spans.add(new WordSpan(i - 2, i + 1)); + } + } + + for (int i = 3; i < sentence.length(); i++) { + if (sentence.separators[i-1] == WordSeparator.COMMA) { i+=2; continue; } + if (sentence.separators[i-2] == WordSeparator.COMMA) { i++; continue; } + if (sentence.separators[i-3] == WordSeparator.COMMA) { continue; } + + if (isName(i, sentence, topWords) && + (isName(i-1, sentence, topWords) || isTopAdj(i-1, sentence, topWords)) && + (isName(i-2, sentence, topWords) || isTopAdj(i-2, sentence, topWords)) && + (isName(i-3, sentence, topWords) || isTopAdj(i-3, sentence, topWords))) { + spans.add(new WordSpan(i - 3, i + 1)); + } + else if (isProperNoun(i, sentence) && isProperNoun(i-3, sentence)) { + if (isProperNoun(i - 1, sentence) && isProperNoun(i - 2, sentence)) { + spans.add(new WordSpan(i-3, i+1)); + } + else if (isJoiner(sentence, i-1) && sentence.posTags[i-2].equals("DT")) { + spans.add(new WordSpan(i-3, i+1)); + } + else if ((isProperNoun(i-1, sentence) || isJoiner(sentence, i-1)) && (isProperNoun(i-2, sentence)|| isJoiner(sentence, i-2))) { + spans.add(new WordSpan(i-3, i + 1)); + } + } + + } + + var ret = spans.toArray(WordSpan[]::new); + sentence.keywords = new SoftReference<>(ret); + + return ret; + } + + public WordSpan[] getKeywordsFromSentenceStrict(DocumentSentence sentence, Set topWords, boolean reducePartials) { + List spans = new ArrayList<>(sentence.length()); + + if (!reducePartials) { + for (int i = 0; i < sentence.length(); i++) { + if (topWords.contains(sentence.stemmedWords[i])) + spans.add(new WordSpan(i, i + 1)); + } + } + + for (int i = 1; i < sentence.length(); i++) { + if (sentence.separators[i-1] == WordSeparator.COMMA) { continue; } + + if (topWords.contains(sentence.stemmedWords[i]) + && !sentence.words[i].endsWith("'s") + && topWords.contains(sentence.stemmedWords[i-1])) { + spans.add(new WordSpan(i-1, i + 1)); + } + } + for (int i = 2; i < sentence.length(); i++) { + if (sentence.separators[i-2] == WordSeparator.COMMA) { continue; } + if (sentence.separators[i-1] == WordSeparator.COMMA) { i++; continue; } + + if (topWords.contains(sentence.stemmedWords[i]) + && !sentence.words[i].endsWith("'s") + && (topWords.contains(sentence.stemmedWords[i-1]) || isJoiner(sentence, i-1)) + && topWords.contains(sentence.stemmedWords[i-2]) + ) { + spans.add(new WordSpan(i-2, i + 1)); + } + } + + for (int i = 3; i < sentence.length(); i++) { + if (sentence.separators[i-3] == WordSeparator.COMMA) { continue; } + if (sentence.separators[i-2] == WordSeparator.COMMA) { i++; continue; } + if (sentence.separators[i-1] == WordSeparator.COMMA) { i+=2; continue; } + if (!sentence.words[i-2].endsWith("'s")) { continue; } + if (!sentence.words[i-3].endsWith("'s")) { continue; } + + if (topWords.contains(sentence.stemmedWords[i]) + && !sentence.words[i].endsWith("'s") && topWords.contains(sentence.stemmedWords[i-3])) { + if (topWords.contains(sentence.stemmedWords[i-1]) && topWords.contains(sentence.stemmedWords[i-2])) { + spans.add(new WordSpan(i-3, i + 1)); + } + else if (topWords.contains(sentence.stemmedWords[i-1]) && isJoiner(sentence, i-2)) { + spans.add(new WordSpan(i-3, i + 1)); + } + else if (isJoiner(sentence, i-2) && sentence.posTags[i-1].equals("DT")) { + spans.add(new WordSpan(i-3, i + 1)); + } + else if (isJoiner(sentence, i-2) && isJoiner(sentence, i-1)) { + spans.add(new WordSpan(i-3, i + 1)); + } + } + } + + return spans.toArray(WordSpan[]::new); + } + + private boolean isName(int i, DocumentSentence sentence, Set topWords) { + if (!topWords.isEmpty()) { + String posTag = sentence.posTags[i]; + String word = sentence.stemmedWords[i]; + + return ((topWords.contains(word)) && (posTag.startsWith("N") || "VBN".equals(posTag)) && !sentence.isStopWord(i)); + } + + + String posTag = sentence.posTags[i]; + +// if (posTag.startsWith("N") || posTag.startsWith("V") || posTag.startsWith("R") || posTag.startsWith("J")) + return (posTag.startsWith("N") || "VBN".equals(posTag)) && !sentence.isStopWord(i); + } + + private boolean isTopAdj(int i, DocumentSentence sentence, Set topWords) { + String posTag = sentence.posTags[i]; + + return (posTag.startsWith("JJ") || posTag.startsWith("R") || posTag.startsWith("VBG")); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/LongNameCounter.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/LongNameCounter.java new file mode 100644 index 00000000..332a6bbe --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/LongNameCounter.java @@ -0,0 +1,63 @@ +package nu.marginalia.wmsa.edge.crawler.domain.language.processing; + +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentLanguageData; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentSentence; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.WordRep; + +import java.util.*; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +public class LongNameCounter { + private final KeywordExtractor keywordExtractor; + + private final NGramDict dict; + public LongNameCounter(NGramDict dict, KeywordExtractor keywordExtractor) { + this.dict = dict; + this.keywordExtractor = keywordExtractor; + } + + public List count(DocumentLanguageData dld) { + HashMap counts = new HashMap<>(1000); + HashMap> instances = new HashMap<>(1000); + + for (int i = 0; i < dld.sentences.length; i++) { + DocumentSentence sent = dld.sentences[i]; + var keywords = keywordExtractor.getNamesStrict(sent); + for (var span : keywords) { + var stemmed = sent.constructStemmedWordFromSpan(span); + counts.merge(stemmed, 1., Double::sum); + instances.computeIfAbsent(stemmed, k -> new HashSet<>()).add(new WordRep(sent, span)); + } + }; + + return counts.entrySet().stream().filter(e -> termSize(e.getKey()) > 1) + .sorted(Comparator.comparing(this::getTermValue)) + .limit(Math.min(50, counts.size()/3)) + .map(Map.Entry::getKey) + .flatMap(w -> instances.get(w).stream()).collect(Collectors.toList()); + } + + int termSize(String word) { + return 1 + (int) word.chars().filter(c -> c == '_').count(); + } + + + Pattern separator = Pattern.compile("_"); + + public double getTermValue(Map.Entry e) { + String[] parts = separator.split(e.getKey()); + double totalValue = 0.; + for (String part : parts) { + totalValue += value(part, e.getValue()); + } + return totalValue / Math.sqrt(parts.length); + } + + double value(String key, double value) { + return (1+Math.log(value)) * Math.log((1.+dict.getTermFreq(key))/11820118.); + } + + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/NameCounter.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/NameCounter.java new file mode 100644 index 00000000..4023154b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/NameCounter.java @@ -0,0 +1,40 @@ +package nu.marginalia.wmsa.edge.crawler.domain.language.processing; + +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentLanguageData; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentSentence; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.WordRep; + +import java.util.*; +import java.util.stream.Collectors; + +public class NameCounter { + private final KeywordExtractor keywordExtractor; + + public NameCounter(KeywordExtractor keywordExtractor) { + this.keywordExtractor = keywordExtractor; + } + + public List count(DocumentLanguageData dld, int minCount) { + HashMap counts = new HashMap<>(1000); + HashMap> instances = new HashMap<>(1000); + + for (int i = 0; i < dld.sentences.length; i++) { + DocumentSentence sent = dld.sentences[i]; + var keywords = keywordExtractor.getNames(sent); + for (var span : keywords) { + var stemmed = sent.constructStemmedWordFromSpan(span); + + counts.merge(stemmed, 1., Double::sum); + instances.computeIfAbsent(stemmed, k -> new HashSet<>()).add(new WordRep(sent, span)); + } + }; + + return counts.entrySet().stream() + .filter(e -> e.getValue() >= minCount) + .sorted(Comparator.comparing(e -> -e.getValue())) + .limit(150) + .map(Map.Entry::getKey) + .flatMap(w -> instances.get(w).stream()).collect(Collectors.toList()); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/SentenceExtractor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/SentenceExtractor.java new file mode 100644 index 00000000..564ff100 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/SentenceExtractor.java @@ -0,0 +1,299 @@ +package nu.marginalia.wmsa.edge.crawler.domain.language.processing; + +import com.github.datquocnguyen.RDRPOSTagger; +import gnu.trove.list.array.TIntArrayList; +import gnu.trove.map.hash.TObjectIntHashMap; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentLanguageData; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentSentence; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.tag.WordSeparator; +import nu.marginalia.wmsa.edge.crawler.domain.processor.HtmlTagCleaner; +import opennlp.tools.sentdetect.SentenceDetectorME; +import opennlp.tools.sentdetect.SentenceModel; +import opennlp.tools.stemmer.PorterStemmer; +import org.jetbrains.annotations.NotNull; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.inject.Inject; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import java.util.regex.Pattern; + +import static nu.marginalia.wmsa.edge.crawler.domain.language.WordPatterns.*; + +public class SentenceExtractor { + + private SentenceDetectorME sentenceDetector; + private RDRPOSTagger rdrposTagger; + + private final PorterStemmer porterStemmer = new PorterStemmer(); + private boolean legacyMode = false; + private static final Logger logger = LoggerFactory.getLogger(SentenceExtractor.class); + + private static final HtmlTagCleaner tagCleaner = new HtmlTagCleaner(); + + @SneakyThrows @Inject + public SentenceExtractor(LanguageModels models) { + try (InputStream modelIn = new FileInputStream(models.openNLPSentenceDetectionData.toFile())) { + var sentenceModel = new SentenceModel(modelIn); + sentenceDetector = new SentenceDetectorME(sentenceModel); + } + catch (IOException ex) { + sentenceDetector = null; + logger.error("Could not initialize sentence detector", ex); + } + + try { + rdrposTagger = new RDRPOSTagger(models.posDict, models.posRules); + } + catch (Exception ex) { + throw new IllegalStateException(ex); + } + } + + public DocumentLanguageData extractSentences(Document doc) { + final String text = asText(doc); + final DocumentSentence[] textSentences = extractSentencesFromString(text); + + String title = doc.getElementsByTag("title").text() + " . " + + Optional.ofNullable(doc.getElementsByTag("h1").first()).map(Element::text).orElse(""); + + if (title.trim().length() < 3) { + title = Optional.ofNullable(doc.getElementsByTag("h2").first()).map(Element::text).orElse(""); + } + + if (title.trim().length() < 3 && textSentences.length > 0) { + for (DocumentSentence textSentence : textSentences) { + if (textSentence.length() > 0) { + title = textSentence.originalSentence.toLowerCase(); + break; + } + } + } + + TObjectIntHashMap counts = calculateWordCounts(textSentences); + var titleSentences = extractSentencesFromString(title.toLowerCase()); + return new DocumentLanguageData(textSentences, titleSentences, counts); + } + + public DocumentLanguageData extractSentences(String text) { + final DocumentSentence[] textSentences = extractSentencesFromString(text); + + String title = ""; + for (DocumentSentence textSentence : textSentences) { + if (textSentence.length() > 0) { + title = textSentence.originalSentence.toLowerCase(); + break; + } + } + + + TObjectIntHashMap counts = calculateWordCounts(textSentences); + + return new DocumentLanguageData(textSentences, extractSentencesFromString(title.toLowerCase()), counts); + } + + + public DocumentLanguageData extractSentences(String text, String title) { + final DocumentSentence[] textSentences = extractSentencesFromString(text); + + TObjectIntHashMap counts = calculateWordCounts(textSentences); + + return new DocumentLanguageData(textSentences, extractSentencesFromString(title.toLowerCase()), counts); + } + + + @NotNull + private TObjectIntHashMap calculateWordCounts(DocumentSentence[] textSentences) { + TObjectIntHashMap counts = new TObjectIntHashMap<>(textSentences.length*10, 0.5f, 0); + + for (var sent : textSentences) { + for (var word : sent.stemmedWords) { + counts.adjustOrPutValue(word, 1, 1); + } + } + return counts; + } + + private static final Pattern dotPattern = Pattern.compile("\\.+$"); + private static final Pattern splitPattern = Pattern.compile("( -|- |\\|)"); + private static final Pattern badCharPattern = Pattern.compile("([^_#@.a-zA-Z'+\\-0-9\\u00C0-\\u00D6\\u00D8-\\u00f6\\u00f8-\\u00ff]+)|(\\.(\\s+|$))"); + private static final Pattern possessivePattern = Pattern.compile("'(s)?$"); + + public DocumentSentence extractSentence(String text) { + var wordsAndSeps = splitSegment(text); + + var words = wordsAndSeps.words; + var seps = wordsAndSeps.separators; + var lc = toLc(wordsAndSeps.words); + + return new DocumentSentence( + badCharPattern.matcher(text).replaceAll(" "), words, seps, lc, rdrposTagger.tagsForEnSentence(words), stemSentence(lc) + ); + } + + public DocumentSentence[] extractSentencesFromString(String text) { + String[] sentences; + + String textNormalizedSpaces = text.replaceAll("\\s", " "); + try { + sentences = sentenceDetector.sentDetect(textNormalizedSpaces); + } + catch (Exception ex) { + sentences = textNormalizedSpaces.split("[.]"); + } + + if (sentences.length > 250) { + sentences = Arrays.copyOf(sentences, 250); + } + + sentences = Arrays.stream(sentences) + .filter(s -> !s.isBlank()) + .flatMap(s -> Arrays.stream(splitPattern.split(s))) + .toArray(String[]::new); + + final String[][] tokens = new String[sentences.length][]; + final int[][] separators = new int[sentences.length][]; + final String[][] posTags = new String[sentences.length][]; + final String[][] tokensLc = new String[sentences.length][]; + final String[][] stemmedWords = new String[sentences.length][]; + + for (int i = 0; i < tokens.length; i++) { + + var wordsAndSeps = splitSegment(sentences[i]); //tokenizer.tokenize(sentences[i]); + tokens[i] = wordsAndSeps.words; + separators[i] = wordsAndSeps.separators; + if (tokens[i].length > 250) { + tokens[i] = Arrays.copyOf(tokens[i], 250); + separators[i] = Arrays.copyOf(separators[i], 250); + } + for (int j = 0; j < tokens[i].length; j++) { + tokens[i][j] = dotPattern.matcher(tokens[i][j]).replaceAll( ""); + } + } + + for (int i = 0; i < tokens.length; i++) { + posTags[i] = rdrposTagger.tagsForEnSentence(tokens[i]); + } + + for (int i = 0; i < tokens.length; i++) { + tokensLc[i] = toLc(tokens[i]); + } + + for (int i = 0; i < tokens.length; i++) { + stemmedWords[i] = stemSentence(tokensLc[i]); + } + + DocumentSentence[] ret = new DocumentSentence[sentences.length]; + for (int i = 0; i < ret.length; i++) { + ret[i] = new DocumentSentence(badCharPattern.matcher(sentences[i]).replaceAll(" "), tokens[i], separators[i], tokensLc[i], posTags[i], stemmedWords[i]); + } + return ret; + } + + private String[] stemSentence(String[] strings) { + String[] stemmed = new String[strings.length]; + for (int i = 0; i < stemmed.length; i++) { + var sent = possessivePattern.matcher(strings[i]).replaceAll(""); + try { + stemmed[i] = porterStemmer.stem(sent); + } + catch (Exception ex) { + stemmed[i] = "NN"; // ??? + } + } + return stemmed; + } + + private String[] toLc(String[] words) { + String[] lower = new String[words.length]; + for (int i = 0; i < lower.length; i++) { + lower[i] = possessivePattern.matcher(words[i].toLowerCase()).replaceAll(""); + } + return lower; + } + + public String asText(Document dc) { + + tagCleaner.clean(dc); + + String text = dc.getElementsByTag("body").text(); + + return text.substring(0, (int) (text.length()*0.95)); + } + + @AllArgsConstructor @Getter + private static class WordsAndSeparators { + String[] words; + int[] separators; + } + + private WordsAndSeparators splitSegment(String segment) { + var matcher = wordBreakPattern.matcher(segment); + + List words = new ArrayList<>(segment.length()/6); + TIntArrayList separators = new TIntArrayList(segment.length()/6); + + int start = 0; + int wordStart = 0; + while (wordStart <= segment.length()) { + if (!matcher.find(wordStart)) { + words.add(segment.substring(wordStart)); + separators.add(WordSeparator.SPACE); + break; + } + + if (wordStart != matcher.start()) { + words.add(segment.substring(wordStart, matcher.start())); + separators.add(segment.substring(matcher.start(), matcher.end()).isBlank() ? WordSeparator.SPACE : WordSeparator.COMMA); + } + wordStart = matcher.end(); + } + + String[] parts = words.toArray(String[]::new); + int length = 0; + for (int i = 0; i < parts.length; i++) { + if (parts[i].isBlank() || parts[i].length() >= MAX_WORD_LENGTH || characterNoisePredicate.test(parts[i])) { + parts[i] = null; + } + else { + length++; + } + } + + String[] ret = new String[length]; + int[] seps = new int[length]; + for (int i = 0, j=0; i < parts.length; i++) { + if (parts[i] != null) { + seps[j] = separators.getQuick(i); + ret[j++] = parts[i]; + } + } + + for (int i = 0; i < ret.length; i++) { + if (ret[i].startsWith("'") && ret[i].length() > 1) { ret[i] = ret[i].substring(1); } + if (ret[i].endsWith("'") && ret[i].length() > 1) { ret[i] = ret[i].substring(0, ret[i].length()-1); } + } + return new WordsAndSeparators(ret, seps); + } + + + public boolean isLegacyMode() { + return legacyMode; + } + public void setLegacyMode(boolean legacyMode) { + this.legacyMode = legacyMode; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/SubjectCounter.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/SubjectCounter.java new file mode 100644 index 00000000..24a71c22 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/SubjectCounter.java @@ -0,0 +1,47 @@ +package nu.marginalia.wmsa.edge.crawler.domain.language.processing; + +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentLanguageData; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.WordRep; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.WordSpan; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.tag.WordSeparator; + +import java.util.*; +import java.util.stream.Collectors; + +public class SubjectCounter { + private final KeywordExtractor keywordExtractor; + + public SubjectCounter(KeywordExtractor keywordExtractor) { + this.keywordExtractor = keywordExtractor; + } + + public List count(DocumentLanguageData dld) { + + Map counts = new HashMap<>(); + for (var sentence : dld.sentences) { + for (WordSpan kw : keywordExtractor.getNames(sentence)) { + if (kw.end + 2 >= sentence.length()) { + continue; + } + if (sentence.separators[kw.end] == WordSeparator.COMMA + || sentence.separators[kw.end + 1] == WordSeparator.COMMA) + break; + + if (("VBZ".equals(sentence.posTags[kw.end]) || "VBP".equals(sentence.posTags[kw.end])) + && ("DT".equals(sentence.posTags[kw.end + 1]) || "RB".equals(sentence.posTags[kw.end]) || sentence.posTags[kw.end].startsWith("VB")) + ) { + counts.merge(new WordRep(sentence, new WordSpan(kw.start, kw.end)), -1, Integer::sum); + } + } + } + + int best = counts.values().stream().mapToInt(Integer::valueOf).min().orElse(0); + + return counts.entrySet().stream().sorted(Map.Entry.comparingByValue()) + .filter(e -> e.getValue()<-2 && e.getValue() wordCount; + + public int totalNumWords() { + int ret = 0; + for (int i = 0; i < sentences.length; i++) { + ret += sentences[i].length(); + } + return ret; + } + + public Stream streamLowerCase() { + return Arrays.stream(sentences).map(sent -> sent.wordsLowerCase).flatMap(Arrays::stream); + } + + public Stream stream() { + return Arrays.stream(sentences).map(sent -> sent.words).flatMap(Arrays::stream); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/model/DocumentSentence.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/model/DocumentSentence.java new file mode 100644 index 00000000..690683a8 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/model/DocumentSentence.java @@ -0,0 +1,88 @@ +package nu.marginalia.wmsa.edge.crawler.domain.language.processing.model; + + +import nu.marginalia.wmsa.edge.crawler.domain.language.WordPatterns; + +import java.lang.ref.SoftReference; +import java.util.BitSet; +import java.util.StringJoiner; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +public class DocumentSentence { + public final String originalSentence; + public final String[] words; + public final int[] separators; + public final String[] wordsLowerCase; + public final String[] posTags; + public final String[] stemmedWords; + + private final BitSet isStopWord; + + public SoftReference keywords; + + public DocumentSentence(String originalSentence, String[] words, int[] separators, String[] wordsLowerCase, String[] posTags, String[] stemmedWords) { + this.originalSentence = originalSentence; + this.words = words; + this.separators = separators; + this.wordsLowerCase = wordsLowerCase; + this.posTags = posTags; + this.stemmedWords = stemmedWords; + + isStopWord = new BitSet(words.length); + + for (int i = 0; i < words.length; i++) { + if (WordPatterns.isStopWord(words[i])) + isStopWord.set(i); + } + } + + public boolean isStopWord(int idx) { + return isStopWord.get(idx); + } + public void setIsStopWord(int idx, boolean val) { + if (val) + isStopWord.set(idx); + else + isStopWord.clear(); + } + public int length() { + return words.length; + } + + private final static Pattern trailingJunkPattern = Pattern.compile("(^[\"'_*]+|[_*'\"]+$)"); + private final static Pattern joinerPattern = Pattern.compile("[-+.]+"); + + public String constructWordFromSpan(WordSpan span) { + StringJoiner sj = new StringJoiner("_"); + for (int i = span.start; i < span.end; i++) { + sj.add(wordsLowerCase[i]); + } + + return trailingJunkPattern.matcher(sj.toString()).replaceAll(""); + } + + public String constructStemmedWordFromSpan(WordSpan span) { + StringJoiner sj = new StringJoiner("_"); + for (int i = span.start; i < span.end; i++) { + if (includeInStemming(i)) + sj.add(joinerPattern.matcher(stemmedWords[i]).replaceAll("_")); + + } + return sj.toString(); + } + + private boolean includeInStemming(int i) { + if (posTags[i].equals("IN") || posTags[i].equals("TO") || posTags[i].equals("CC") || posTags[i].equals("DT")) { + return false; + } + return true; + } + + + @Override + public String toString() { + return IntStream.range(0, length()).mapToObj(i -> String.format("%s[%s]", words[i], posTags[i])).collect(Collectors.joining(" ")); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/model/WordRef.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/model/WordRef.java new file mode 100644 index 00000000..34a6d79a --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/model/WordRef.java @@ -0,0 +1,46 @@ +package nu.marginalia.wmsa.edge.crawler.domain.language.processing.model; + +import lombok.AllArgsConstructor; + +import java.util.Objects; +import java.util.Optional; + +@AllArgsConstructor +public class WordRef { + public final int sentenceIndex; + public final int wordIndex; + + public String getWord(DocumentLanguageData dld) { + return dld.sentences[sentenceIndex].words[wordIndex]; + } + + public String getWordStemmed(DocumentLanguageData dld) { + return dld.sentences[sentenceIndex].stemmedWords[wordIndex]; + } + + public Optional next(DocumentLanguageData dld) { + if (wordIndex + 1 < dld.sentences[sentenceIndex].length()) { + return Optional.of(new WordRef(sentenceIndex, wordIndex+1)); + } + return Optional.empty(); + } + public Optional prev() { + if (wordIndex - 1 >= 0) { + return Optional.of(new WordRef(sentenceIndex, wordIndex-1)); + } + return Optional.empty(); + } + + @Override + public int hashCode() { + return Objects.hash(sentenceIndex, wordIndex); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + WordRef wordRef = (WordRef) o; + return sentenceIndex == wordRef.sentenceIndex && wordIndex == wordRef.wordIndex; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/model/WordRep.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/model/WordRep.java new file mode 100644 index 00000000..946545b2 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/model/WordRep.java @@ -0,0 +1,31 @@ +package nu.marginalia.wmsa.edge.crawler.domain.language.processing.model; + +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import org.jetbrains.annotations.NotNull; + +import java.util.Objects; + +@AllArgsConstructor @EqualsAndHashCode @Getter +public class WordRep implements Comparable { + public WordRep(DocumentSentence sent, WordSpan span) { + word = sent.constructWordFromSpan(span); + stemmed = sent.constructStemmedWordFromSpan(span); + length = span.end - span.start; + } + public final int length; + public final String word; + public final String stemmed; + + @Override + public int compareTo(@NotNull WordRep o) { + return stemmed.compareTo(o.stemmed); + } + + @Override + public String toString() { + return word; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/model/WordSpan.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/model/WordSpan.java new file mode 100644 index 00000000..5ec4f912 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/model/WordSpan.java @@ -0,0 +1,53 @@ +package nu.marginalia.wmsa.edge.crawler.domain.language.processing.model; + +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import org.jetbrains.annotations.NotNull; + +@AllArgsConstructor @EqualsAndHashCode +public class WordSpan implements Comparable{ + public final int start; + public final int end; + + public int size() { + return end - start; + } + @Override + public int compareTo(@NotNull WordSpan o) { + return start - o.start; + } + + public boolean overlaps(WordSpan other) { + if (other.start >= start && other.start <= end) return true; + if (other.end >= start && other.end <= end) return true; + if (start >= other.start && start <= other.end) return true; + return false; + } + + public int distance(WordSpan other) { + if (overlaps(other)) { + return 0; + } + if (start < other.start) { + return end - other.start; + } + else { + return other.end - start; + } + + } + + public boolean hasSimilarWords(DocumentSentence s, WordSpan other) { + for (int i = start; i < end; i++) { + for (int j = other.start; j < other.end; j++) { + if (s.stemmedWords[i].equals(s.stemmedWords[j])) + return true; + } + } + return false; + } + + public String toString() { + return String.format("WordSpan[%s,%s]", start, end); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/model/tag/WordSeparator.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/model/tag/WordSeparator.java new file mode 100644 index 00000000..cfaa3e9b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/model/tag/WordSeparator.java @@ -0,0 +1,6 @@ +package nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.tag; + +public final class WordSeparator { + public static final int COMMA = 0; + public static final int SPACE = 1; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/model/tag/WordTag.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/model/tag/WordTag.java new file mode 100644 index 00000000..d4a6402a --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/model/tag/WordTag.java @@ -0,0 +1,8 @@ +package nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.tag; + +public class WordTag { + public static int UNSET = 0; + public static int STOP_WORD = 1; + public static int NAME = 2; + public static int NOT_NAME = 3; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlFeature.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlFeature.java new file mode 100644 index 00000000..dda2cbf9 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlFeature.java @@ -0,0 +1,28 @@ +package nu.marginalia.wmsa.edge.crawler.domain.processor; + +import java.util.Collection; + +public enum HtmlFeature { + MEDIA(0), + JS(1), + AFFILIATE_LINK(2), + TRACKING(3), + COOKIES(4) + ; + + public int bit; + + HtmlFeature(int bit) { + this.bit = bit; + } + + public static int encode(Collection featuresAll) { + return featuresAll.stream().mapToInt(f -> 1 << f.bit).reduce(0, (l, r) -> (l|r)); + } + public static boolean hasFeature(int value, HtmlFeature feature) { + return (value & (1<< feature.bit)) != 0; + } + public static int addFeature(int value, HtmlFeature feature) { + return (value | (1<< feature.bit)); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlProcessor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlProcessor.java new file mode 100644 index 00000000..cf21f815 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlProcessor.java @@ -0,0 +1,328 @@ +package nu.marginalia.wmsa.edge.crawler.domain.processor; + +//import net.sf.classifier4J.summariser.SimpleSummariser; + +import nu.marginalia.wmsa.edge.crawler.domain.LinkParser; +import nu.marginalia.wmsa.edge.crawler.domain.language.LanguageFilter; +import nu.marginalia.wmsa.edge.crawler.domain.language.WordPatterns; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.KeywordExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentSentence; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.crawl.*; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.inject.Inject; +import javax.inject.Singleton; +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; + +import static nu.marginalia.wmsa.edge.model.crawl.EdgeHtmlStandard.UNKNOWN; + +@Singleton +public class HtmlProcessor { + private static final LanguageFilter languageFilter = new LanguageFilter(); + private static final Logger logger = LoggerFactory.getLogger(HtmlProcessor.class); + + private final DocumentKeywordExtractor documentKeywordExtractor; + private final SentenceExtractor sentenceExtractor; + private final KeywordExtractor keywordExtractor = new KeywordExtractor(); + + private static final Set filthTable = Set.of( + "xxx", "sex", "anal", "sexy", + "bdsm", "fetish", "porn", "camgirls", "dildo", + "gangbang", "buttplug", "orgasm", "vibrator", + "cameltoe", "download", "iso", "botox", "torrent", + "jackpot", "vegas", "casino", "coinbase", "poloniex", + "myetherwallet", "ethereum", "binance", "bitcoin", + "litecoin", "seo", "serp" + + ); + + private static final LinkParser linkParser = new LinkParser(); + + @Inject + public HtmlProcessor(DocumentKeywordExtractor documentKeywordExtractor, SentenceExtractor sentenceExtractor) { + this.documentKeywordExtractor = documentKeywordExtractor; + this.sentenceExtractor = sentenceExtractor; + } + + public EdgePageContent processHtmlPage(EdgeRawPageContents rawPageContent, Document parsed) { + + var parsed2 = parsed.clone(); + final String text = parsed.getElementsByTag("body").text(); + + if (languageFilter.isBlockedUnicodeRange(text)) { + logger.debug("Skipping {} , foreign unicode ranges in excessive presence", rawPageContent.url); + return null; + } + + int rawLength = rawPageContent.data.length(); + int scriptTags = getScriptPenalty(parsed); + int textLength = text.length(); + + EdgeHtmlStandard htmlStandard = HtmlStandardExtractor.parseDocType(parsed.documentType()); + if (UNKNOWN.equals(htmlStandard)) { + htmlStandard = HtmlStandardExtractor.sniffHtmlStandard(parsed); + } + + var dld = sentenceExtractor.extractSentences(parsed.clone()); + var keywords = documentKeywordExtractor.extractKeywords(dld); + + var featureSet = getFeatureSet(parsed, scriptTags, rawPageContent.hasCookies); + addTags(keywords, htmlStandard, rawPageContent.url, featureSet); + + final String title = Optional.ofNullable(parsed.getElementsByTag("title")) + .map(Elements::first) + .map(Element::text) + .or(() -> Optional.ofNullable(parsed.getElementsByTag("h1").first()).map(Element::text)) + .or(() -> Optional.ofNullable(parsed.getElementsByTag("h2").first()).map(Element::text)) + .or(() -> Optional.ofNullable(parsed.getElementsByTag("h3").first()).map(Element::text)) + .or(() -> { + if (dld.sentences.length > 0) return Optional.of(dld.sentences[0].originalSentence); + return Optional.empty(); + }) + .map(str -> StringUtils.truncate(str, 128)) + .orElseGet(rawPageContent.url::toString); + + int wc = dld.totalNumWords(); + + var bodyWords = keywords.get(IndexBlock.Words); + if (wc > 100) { + double languageAgreement = languageFilter.dictionaryAgreement(dld); + if (languageAgreement < 0.01 || (wc > 200 && languageAgreement < 0.05) ) { + logger.debug("Skipping {} , poor language agreement {}", rawPageContent.url, languageAgreement); + return null; + } + } + + double smutCoefficient = bodyWords.words.stream().filter(filthTable::contains).count(); + + Set summaryKeywords = new HashSet<>(); + + summaryKeywords.addAll(keywords.get(IndexBlock.Low).words); + summaryKeywords.addAll(keywords.get(IndexBlock.Middle).words); + summaryKeywords.addAll(keywords.get(IndexBlock.Top).words); + summaryKeywords.addAll(keywords.get(IndexBlock.Title).words); + + var description = extractSummary(parsed2, summaryKeywords) + .or(() -> getOgDescription(parsed2)) + .or(() -> getMetaDescription(parsed2)); + + int totalWords = Arrays.stream(dld.sentences).mapToInt(DocumentSentence::length).sum(); + + final var metadata = new EdgePageMetadata( + HtmlFeature.encode(featureSet), scriptTags, rawLength, textLength, wc, + title, description.orElse(""), smutCoefficient, + totalWords, htmlStandard); + + Map> linkWords = extractLinkWords(keywords, rawPageContent.getUrl(), parsed); + + return new EdgePageContent(rawPageContent.url, keywords, linkWords, metadata, rawPageContent.getData().hashCode(), + rawPageContent.ip); + } + + List trackers = List.of("adform.net", + "connect.facebook", + "googletagmanager.com", + "googlesyndication.com", + "google.com", + "twitter.com", + "smartadserver.com", + "doubleclick.com", + "2mdn.com", + "dmtry.com", + "bing.com", + "msn.com", + "amazon-adsystem.com", + "alexametrics.com", + "rubiconproject.com", + "chango.com", + "d5nxst8fruw4z.cloudfront.net", + "d31qbv1cthcecs.cloudfront.net", + "linkedin.com"); + + private Set getFeatureSet(Document parsed, int scriptTags, boolean cookies) { + Set features = new HashSet<>(); + + if (scriptTags > 0) { + features.add(HtmlFeature.JS); + } + if (!parsed.getElementsByTag("object").isEmpty() + || !parsed.getElementsByTag("audio").isEmpty() + || !parsed.getElementsByTag("video").isEmpty()) { + features.add(HtmlFeature.MEDIA); + } + if (parsed.getElementsByTag("script").stream() + .filter(tag -> tag.attr("src") != null) + .anyMatch(tag -> trackers.stream().anyMatch(tracker -> tag.attr("src").contains(tracker)))) { + features.add(HtmlFeature.TRACKING); + } + if (parsed.getElementsByTag("script").html().contains("google-analytics.com")) { + features.add(HtmlFeature.TRACKING); + } + if (parsed.getElementsByTag("a").stream().map(e -> e.attr("href")) + .filter(Objects::nonNull) + .map(String::toLowerCase) + .anyMatch(href -> + href.contains("amzn.to/") || href.contains("amazon.com/"))) { + features.add(HtmlFeature.AFFILIATE_LINK); + } + if (cookies) { + features.add(HtmlFeature.COOKIES); + } + + return features; + } + + private void addTags(EdgePageWordSet wordSet, EdgeHtmlStandard htmlStandard, EdgeUrl url, Set features) { + List tagWords = new ArrayList<>(); + tagWords.add("format:"+htmlStandard.toString().toLowerCase()); + tagWords.add("site:"+url.domain.toString().toLowerCase()); + tagWords.add("proto:"+url.proto.toLowerCase()); + tagWords.add("js:" + Boolean.toString(features.contains(HtmlFeature.JS)).toLowerCase()); + if (features.contains(HtmlFeature.MEDIA)) { + tagWords.add("special:media"); + } + if (features.contains(HtmlFeature.TRACKING)) { + tagWords.add("special:tracking"); + } + if (features.contains(HtmlFeature.AFFILIATE_LINK)) { + tagWords.add("special:affiliate"); + } + if (features.contains(HtmlFeature.COOKIES)) { + tagWords.add("special:cookies"); + } + wordSet.append(IndexBlock.Meta, tagWords); + wordSet.append(IndexBlock.Words, tagWords); + } + + private int getScriptPenalty(Document parsed) { + var scriptTags = parsed.getElementsByTag("script"); + String scriptText = scriptTags.html(); + int badScript = 0; + if (scriptText.contains(".createElement(")) { + badScript = 1; + } + + double scriptPenalty = 0; + for (var tag : scriptTags) { + String srcTag = tag.attr("src"); + if (srcTag == null) { + scriptPenalty += 1; + } + else if (srcTag.contains("wp-content") || srcTag.contains("wp-includes") || srcTag.contains("jquery")) { + scriptPenalty += 0.49; + } + else { + scriptPenalty += 1; + } + + } + return (int)(scriptPenalty + badScript + (scriptText.length())/1000.); + } + + private Map> extractLinkWords(EdgePageWordSet keywords, EdgeUrl pageUrl, Document parsed) { + + + List> urls = new ArrayList<>(); + + Set linkKeywords = new HashSet<>(); + + Map> linkTextWords = new ConcurrentHashMap<>(); + + for (var tag : parsed.getElementsByTag("a")) { + if (!tag.hasAttr("href")) { + continue; + } + if (urls.size() > 100) { + break; + } + + var linkOpt = linkParser.parseLink(pageUrl, tag); + if (linkOpt.isEmpty()) + continue; + + var link = linkOpt.get(); + + urls.add(Pair.of(link, tag.text())); + + if (!Objects.equals(link.domain.domain, pageUrl.domain.domain) + && linkKeywords.size() <= 25) + { + linkKeywords.add("links:" + link.domain.domain); + } + + Set words = new HashSet<>(); + + for (var sent : sentenceExtractor.extractSentencesFromString(tag.text())) { + for (var keyword : keywordExtractor.getWordsFromSentence(sent)) { + words.add(sent.constructWordFromSpan(keyword)); + } + } + + linkTextWords.compute(link, (k, set) -> { + if (set == null) return words; + else { set.addAll(words); return set; } + }); + + } + + keywords.get(IndexBlock.Meta).addAll(linkKeywords); + + if (WordPatterns.wordQualitiesPredicate.test(pageUrl.domain.domain.toLowerCase())) { + keywords.get(IndexBlock.Link).addJust(pageUrl.domain.domain.toLowerCase()); + } + + return linkTextWords; + } + + public Optional extractSummary(Document parsed, Set keywords) { + var cleanDoc = parsed.clone(); + cleanDoc.getElementsByTag("nav").remove(); + cleanDoc.getElementsByTag("header").remove(); + Optional.ofNullable(cleanDoc.getElementById("header")).ifPresent(Element::remove); + cleanDoc.getElementsByClass("header").remove(); + cleanDoc.getElementsByClass("nav").remove(); + + return extractSummaryRaw(cleanDoc) + .map(String::trim) + .filter(s -> !s.isEmpty()) + .filter(s -> !s.isBlank() && s.length() > 20) + .map(s -> s.substring(0, Math.min(500, s.length()))) + ; + } + + private Optional extractSummaryRaw(Document parsed) { + StringBuilder content = new StringBuilder(); + + parsed.getElementsByTag("p").forEach( + elem -> { + if (elem.text().length() > elem.html().length()/2) { + content.append(elem.text()); + } + } + ); + + if (content.length() > 10) { + return Optional.of(content.toString()); + } + return Optional.empty(); + } + + private Optional getMetaDescription(Document parsed) { + return Optional.ofNullable(parsed.select("meta[name=description]")).map(tag -> tag.attr("content")).filter(s -> !s.isBlank()); + } + private Optional getOgDescription(Document parsed) { + return Optional.ofNullable(parsed.select("meta[name=og:description]")).map(tag -> tag.attr("content")).filter(s -> !s.isBlank()); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlStandardExtractor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlStandardExtractor.java new file mode 100644 index 00000000..862293b5 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlStandardExtractor.java @@ -0,0 +1,84 @@ +package nu.marginalia.wmsa.edge.crawler.domain.processor; + +import com.google.common.base.Strings; +import nu.marginalia.wmsa.edge.model.crawl.EdgeHtmlStandard; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.DocumentType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static nu.marginalia.wmsa.edge.model.crawl.EdgeHtmlStandard.*; + +public class HtmlStandardExtractor { + + + private static final Logger logger = LoggerFactory.getLogger(HtmlStandardExtractor.class); + + public static EdgeHtmlStandard parseDocType(DocumentType docType) { + if (null == docType) { + return UNKNOWN; + } + String publicId = docType.publicId(); + if (Strings.isNullOrEmpty(publicId)) + return HTML5; + + publicId = publicId.toUpperCase(); + if (publicId.startsWith("-//SOFTQUAD SOFTWARE//DTD") && publicId.contains("HTML 4")) { + return HTML4; + } + if (publicId.startsWith("-//SOFTQUAD SOFTWARE//DTD") && publicId.contains("HTML 3")) { + return HTML123; + } + if (publicId.startsWith("-//INTERNET/RFC XXXX//EN")) + return HTML123; + if (publicId.startsWith("-//NETSCAPE COMM. CORP")) + return HTML123; + if (publicId.startsWith("-//SQ//DTD HTML 2")) + return HTML123; + if (publicId.startsWith("-//SOFTQUAD//DTD HTML 2")) + return HTML123; + if (publicId.startsWith("-//W3O//DTD W3 HTML 2")) + return HTML123; + if (publicId.startsWith("-//IETF//DTD HTML 2")) + return HTML123; + if (publicId.startsWith("-//IETF//DTD HTML//EN")) + return HTML123; + if (publicId.startsWith("-/W3C//DTD HTML 3")) + return HTML123; + if (publicId.startsWith("-/W3C/DTD HTML 3")) + return HTML123; + if (publicId.startsWith("-//IETF//DTD HTML 3")) + return HTML123; + if (publicId.startsWith("-//W3C//DTD XHTML")) + return XHTML; + if (publicId.startsWith("ISO/IEC 15445:2000//DTD")) + return XHTML; + if (publicId.startsWith("-//W3C//DTD HTML")) + return HTML4; + + logger.debug("Unknown publicID standard {}", publicId); + return UNKNOWN; + } + + public static EdgeHtmlStandard sniffHtmlStandard(Document parsed) { + int html4Attributes = 0; + int html5Attributes = 0; + + if (parsed.getElementsByTag("article").size() > 0) html5Attributes++; + if (parsed.getElementsByTag("header").size() > 0) html5Attributes++; + if (parsed.getElementsByTag("footer").size() > 0) html5Attributes++; + if (parsed.getElementsByTag("video").size() > 0) html5Attributes++; + if (parsed.getElementsByTag("audio").size() > 0) html5Attributes++; + if (parsed.getElementsByTag("canvas").size() > 0) html5Attributes++; + if (parsed.getElementsByTag("link").stream().anyMatch(elem -> "stylesheet".equals(elem.attr("rel")))) { + html4Attributes++; + } + if (html5Attributes > 0) { + return HTML5; + } + if (html4Attributes > 0) { + return HTML4; + } + return HTML123; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlSummarizer.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlSummarizer.java new file mode 100644 index 00000000..36e41573 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlSummarizer.java @@ -0,0 +1,139 @@ +package nu.marginalia.wmsa.edge.crawler.domain.processor; + +import gnu.trove.list.array.TIntArrayList; +import it.unimi.dsi.fastutil.ints.IntArrays; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import java.util.*; +import java.util.regex.Pattern; + +public class HtmlSummarizer { + private static Pattern extendedJunk = Pattern.compile("[^a-zA-Z0-9]{4,}"); + + private static final int MAX_CONSIDERABLE_SENTENCES = 200; + private static final int MIN_SUMMARY_LENGTH = 20; + private static final int MIN_TAG_LENGTH = 25; + private static final int MAX_SUMMARY_LENGTH = 255; + + private final SentenceExtractor sentenceExtractor; + + + public HtmlSummarizer(SentenceExtractor sentenceExtractor) { + this.sentenceExtractor = sentenceExtractor; + } + + public Optional getSummary(Document parsed, Set keywords) { + List candidates = extractCandidates(parsed); + TIntArrayList scores = new TIntArrayList(candidates.size()); + + for (String sentence : candidates) { + scores.add(calculateScore(sentence, keywords)); + } + + String summary = constructSummary(candidates, scores); + if (summary.isBlank() || summary.length() < MIN_SUMMARY_LENGTH) { + return Optional.empty(); + } + return Optional.of(summary); + } + + private String constructSummary(List candidates, TIntArrayList scores) { + int[] scoresReversed = getIndicesByScore(scores, candidates); + TIntArrayList includedParts = new TIntArrayList(); + + int length = 0; + for (int i = 0; length < MAX_SUMMARY_LENGTH && i < scoresReversed.length; i++) { + String sentence = candidates.get(scoresReversed[i]); + length += sentence.length(); + includedParts.add(scoresReversed[i]); + } + includedParts.sort(); + + StringBuilder summary = new StringBuilder(); + includedParts.forEach(i -> { + var candidate = candidates.get(i).trim(); + + summary.append(candidate); + if (endsInLetterOrNumber(candidate.trim())) { + summary.append(". "); + } + else if (!candidate.isEmpty() && !Character.isSpaceChar(candidate.charAt(candidate.length()-1))) { + summary.append(" "); + } + return true; + }); + + return summary.toString(); + } + + private boolean endsInLetterOrNumber(String candidate) { + if (candidate.isBlank()) return false; + char lastChar = candidate.charAt(candidate.length()-1); + + return (Character.isAlphabetic(lastChar) || Character.isDigit(lastChar)); + } + + private int[] getIndicesByScore(TIntArrayList scores, List candidates) { + int[] scoresReversed = new int[scores.size()]; + for (int i = 0; i { + int d = scores.get(b) - scores.get(a); + if (d == 0) { + return candidates.get(a).length() - candidates.get(b).length(); + } + return d; + }); + return scoresReversed; + } + + private List extractCandidates(Document parsed) { + var clone = parsed.clone(); + clone.getElementsByTag("br").remove(); + + List ret = new ArrayList<>(); + + for (var elem : clone.select("p,div,section,article")) { + if (isCandidate(elem)) { + ret.add(cleanText(elem.text())); + } + if (ret.size() > MAX_CONSIDERABLE_SENTENCES) { + break; + } + }; + + return ret; + } + private String cleanText(String text) { + return extendedJunk.matcher(text).replaceAll(" "); + + } + + private int calculateScore(String sentence, Set keywords) { + int score = 0; + + final var data = sentenceExtractor.extractSentencesFromString(sentence); + + for (var s : data) { + for (var word : s.wordsLowerCase) { + if (keywords.contains(word)) { + score++; + } + } + } + + return score; + } + + private boolean isCandidate(Element elem) { + if (elem.html().length() < MIN_TAG_LENGTH) { + return false; + } + + if (elem.childrenSize() > 3) + return false; + + return elem.text().length() > 0.75*elem.html().length(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlTagCleaner.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlTagCleaner.java new file mode 100644 index 00000000..98fa6918 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlTagCleaner.java @@ -0,0 +1,40 @@ +package nu.marginalia.wmsa.edge.crawler.domain.processor; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.TextNode; + +import java.util.regex.Pattern; + +public class HtmlTagCleaner { + public final int MAX_CODE_TAG_LENGTH = 32; + public final Pattern codeTagJunkPattern = Pattern.compile("(\\.|<|>|<|>|\\([^)]*\\)[;]?$)"); + + public void clean(Document doc) { + cleanCodeTags(doc); + + doc.select("nav,form,input,code,body>title").remove(); + + // Create "sentences" out of elements that sometimes lack a period at the end to help + // NLP work better + doc.select("li,h1,h2,h3,h4,h5,h6,td,th,p,div,title").forEach(e -> e.appendText(". ")); + doc.select("br,hr").forEach(e -> e.prependText(". ")); + } + + private void cleanCodeTags(Document doc) { + for (var codeTag : doc.getElementsByTag("code")) { + var text = codeTag.text(); + + if (text.length() <= MAX_CODE_TAG_LENGTH) { + codeTag.replaceWith(new TextNode(trimCodeTagContents(text))); + } + else { + codeTag.remove(); + } + + } + } + + private String trimCodeTagContents(String text) { + return codeTagJunkPattern.matcher(text).replaceAll(" "); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/processor/PlainTextProcessor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/processor/PlainTextProcessor.java new file mode 100644 index 00000000..16721708 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/domain/processor/PlainTextProcessor.java @@ -0,0 +1,74 @@ +package nu.marginalia.wmsa.edge.crawler.domain.processor; + +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.crawl.EdgeHtmlStandard; +import nu.marginalia.wmsa.edge.model.crawl.EdgePageContent; +import nu.marginalia.wmsa.edge.model.crawl.EdgePageMetadata; +import nu.marginalia.wmsa.edge.model.crawl.EdgeRawPageContents; +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.util.Strings; + +import javax.inject.Inject; +import java.util.Arrays; +import java.util.Collections; +import java.util.Optional; +import java.util.stream.Collectors; + +public class PlainTextProcessor { + + private final DocumentKeywordExtractor keywordExtractor; + private final SentenceExtractor sentenceExtractor; + @Inject + public PlainTextProcessor(DocumentKeywordExtractor keywordExtractor, SentenceExtractor sentenceExtractor) { + this.keywordExtractor = keywordExtractor; + this.sentenceExtractor = sentenceExtractor; + } + + public Optional parsePlainText(EdgeRawPageContents rawContents) { + if (!isFileEndingAllowed(rawContents.url)) { + return Optional.empty(); + } + final String textData = rawContents.getData(); + final String[] textLines = textData.substring(0, Math.min(5000, textData.length())).split("\n"); + + var dld = sentenceExtractor.extractSentences(textData); + var keywords = keywordExtractor.extractKeywords(dld); + keywords.get(IndexBlock.Meta).addJust("format:plain"); + keywords.get(IndexBlock.Words).addJust("format:plain"); + + final var metadata = new EdgePageMetadata(0, 0, textData.length(), + textData.length(), dld.totalNumWords(), + rawContents.url.fileName(), + getDescription(textLines), 0., 1, + EdgeHtmlStandard.PLAIN); + + return Optional.of(new EdgePageContent(rawContents.url, + keywords, + Collections.emptyMap(), + metadata, + rawContents.getData().hashCode(), + rawContents.ip)); + } + + private boolean isFileEndingAllowed(EdgeUrl url) { + String urlString = url.toString().toLowerCase(); + if (urlString.endsWith(".txt")) { + return true; + } + if (urlString.endsWith(".md")) { + return true; + } + if (urlString.endsWith(".gmi")) { + return true; + } + return false; + } + + private String getDescription(String[] textLines) { + return StringUtils.truncate(Arrays.stream(textLines).filter(Strings::isNotBlank).limit(10).collect(Collectors.joining("\n")), 200); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/fetcher/ContentTypeParser.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/fetcher/ContentTypeParser.java new file mode 100644 index 00000000..b86fa118 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/fetcher/ContentTypeParser.java @@ -0,0 +1,77 @@ +package nu.marginalia.wmsa.edge.crawler.fetcher; + +import crawlercommons.mimetypes.MimeTypeDetector; +import nu.marginalia.wmsa.edge.model.crawl.EdgeContentType; +import org.jsoup.Jsoup; + +import java.util.Arrays; +import java.util.Optional; + +public class ContentTypeParser { + + static MimeTypeDetector mimeTypeDetector = new MimeTypeDetector(); + + public static EdgeContentType parse(String contentType, byte[] data) { + return getContentTypeFromContentTypeString(contentType) + .or(() -> getContentTypeStringFromTag(data)) + .orElseGet(() -> { + Optional charset = getCharsetFromTag(data); + return new EdgeContentType( + Optional.ofNullable(contentType) + .or(() -> Optional.ofNullable(mimeTypeDetector.detect(data))) + .orElseGet(() -> ContentTypeParser.shittyMimeSniffer(data)), charset.orElse("ISO_8859_1")); + }); + } + + private static Optional getContentTypeFromContentTypeString(String contentType) { + if (contentType != null && contentType.contains(";")) { + var parts = contentType.split(";"); + var content = parts[0].trim(); + var extra = parts[1].trim(); + if (extra.startsWith("charset=")) { + return Optional.of(new EdgeContentType(content, extra.substring("charset=".length()))); + } + } + return Optional.empty(); + } + + private static String shittyMimeSniffer(byte[] data) { + + for (int i = 0; i < data.length && i < 128; i++) { + if (data[i] < 32) { + return "application/binary"; + } + } + + String startStr = new String(Arrays.copyOf(data, Math.min(128, data.length))).trim().toLowerCase(); + if (startStr.contains(" getContentTypeStringFromTag(byte[] data) { + String header = new String(Arrays.copyOf(data, Math.min(1024, data.length))); + var doc = Jsoup.parse(header); + for (var metaTag : doc.getElementsByTag("meta")) { + if ("content-type".equalsIgnoreCase(metaTag.attr("http-equiv"))) { + return getContentTypeFromContentTypeString(metaTag.attr("content")); + } + } + return Optional.empty(); + } + + private static Optional getCharsetFromTag(byte[] data) { + String header = new String(Arrays.copyOf(data, Math.min(1024, data.length))); + var doc = Jsoup.parse(header); + for (var metaTag : doc.getElementsByTag("meta")) { + if (metaTag.hasAttr("charset")) { + return Optional.of(metaTag.attr("charset")); + } + } + return Optional.empty(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/fetcher/Cookies.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/fetcher/Cookies.java new file mode 100644 index 00000000..0264259f --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/fetcher/Cookies.java @@ -0,0 +1,44 @@ +package nu.marginalia.wmsa.edge.crawler.fetcher; + +import okhttp3.Cookie; +import okhttp3.CookieJar; +import okhttp3.HttpUrl; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.WeakHashMap; +import java.util.concurrent.ConcurrentHashMap; + +public class Cookies { + final ThreadLocal>> cookieJar = ThreadLocal.withInitial(ConcurrentHashMap::new); + + public CookieJar getJar() { + return new CookieJar() { + + @Override + public void saveFromResponse(HttpUrl url, List cookies) { + if (!cookies.isEmpty()) { + cookieJar.get().put(url, cookies); + } + } + + @Override + public List loadForRequest(HttpUrl url) { + return cookieJar.get().getOrDefault(url, Collections.emptyList()); + } + }; + } + + public void clear() { + cookieJar.get().clear(); + } + + public boolean hasCookies() { + return !cookieJar.get().isEmpty(); + } + + public List getCookies() { + return cookieJar.get().values().stream().flatMap(List::stream).map(Cookie::toString).toList(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/fetcher/HttpFetcher.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/fetcher/HttpFetcher.java new file mode 100644 index 00000000..1770e6d9 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/fetcher/HttpFetcher.java @@ -0,0 +1,256 @@ +package nu.marginalia.wmsa.edge.crawler.fetcher; + +import com.google.inject.Inject; +import com.google.inject.name.Named; +import lombok.AllArgsConstructor; +import lombok.SneakyThrows; +import lombok.ToString; +import nu.marginalia.wmsa.client.exception.NetworkException; +import nu.marginalia.wmsa.edge.crawler.domain.LinkParser; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.crawl.EdgeRawPageContents; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.Response; +import org.apache.commons.io.input.BOMInputStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.net.ssl.X509TrustManager; +import java.io.IOException; +import java.net.InetAddress; +import java.net.URISyntaxException; +import java.nio.charset.Charset; +import java.time.LocalDateTime; +import java.util.Objects; +import java.util.concurrent.TimeUnit; +import java.util.function.Predicate; +import java.util.regex.Pattern; +import java.util.zip.GZIPInputStream; + +public class HttpFetcher { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final String userAgent; + private final int maxFetchSize = 1024*512; + private Cookies cookies = new Cookies(); + + private final LinkParser linkParser = new LinkParser(); + + public void setAllowAllContentTypes(boolean allowAllContentTypes) { + this.allowAllContentTypes = allowAllContentTypes; + } + + private boolean allowAllContentTypes = false; + + private final OkHttpClient client = createClient(); + + public enum FetchResultState { + OK, + REDIRECT, + ERROR; + }; + + @AllArgsConstructor @ToString + public static class FetchResult { + public final FetchResultState state; + public final EdgeDomain domain; + + public boolean ok() { + return state == FetchResultState.OK; + } + }; + + @SneakyThrows + private OkHttpClient createClient() { + return new OkHttpClient.Builder() + .sslSocketFactory(NoSecuritySSL.buildSocketFactory(), (X509TrustManager) NoSecuritySSL.trustAllCerts[0]) + .hostnameVerifier(NoSecuritySSL.buildHostnameVerifyer()) + .cookieJar(cookies.getJar()) + .followRedirects(true) + .followSslRedirects(true) + .connectTimeout(8, TimeUnit.SECONDS) + .readTimeout(10, TimeUnit.SECONDS) + .writeTimeout(10, TimeUnit.SECONDS) + .build(); + } + + public boolean hasCookies() { + return cookies.hasCookies(); + } + + public void clearCookies() { + cookies.clear(); + } + + @Inject + public HttpFetcher(@Named("user-agent") String userAgent) { + this.userAgent = userAgent; + } + + @SneakyThrows + public FetchResult probeDomain(EdgeUrl url) { + var head = new Request.Builder().head().addHeader("User-agent", userAgent) + .url(new EdgeUrl(url.proto, url.domain, url.port, "/").toString()) + .build(); + + var call = client.newCall(head); + + try (var rsp = call.execute()) { + var requestUrl = rsp.request().url().toString(); + EdgeDomain requestDomain = new EdgeUrl(requestUrl).domain; + + if (!Objects.equals(requestDomain, url.domain)) { + return new FetchResult(FetchResultState.REDIRECT, requestDomain); + } + return new FetchResult(FetchResultState.OK, requestDomain); + } + catch (Exception ex) { + return new FetchResult(FetchResultState.ERROR, url.domain); + } + + + } + + @SneakyThrows + public EdgeRawPageContents fetchContent(EdgeUrl url) { + if (isUrlLikeBinary(url) && !probeContentType(url)) { + return null; + } + + var get = new Request.Builder().get().addHeader("User-agent", userAgent) + .url(url.toString()) + .addHeader("Accept-Encoding", "gzip") + .build(); + + var call = client.newCall(get); + + try (var rsp = call.execute()) { + if (rsp.code() >= 400) { + throw new NetworkException("Bad status " + rsp.code()); + } + return extractBody(url, rsp); + } + } + + private final Predicate probableHtmlPattern = Pattern.compile("^.*\\.(htm|html|php|txt)(\\?.*)?$").asPredicate(); + private final Predicate probableBinaryPattern = Pattern.compile("^.*\\.[a-z]+$").asPredicate(); + + public boolean isUrlLikeBinary(EdgeUrl url) { + String urlString = url.toString().toLowerCase(); + + return (!probableHtmlPattern.test(urlString) && probableBinaryPattern.test(urlString)); + } + + @SneakyThrows + private boolean probeContentType(EdgeUrl url) { + logger.debug("Probing suspected binary {}", url); + + var head = new Request.Builder().get().addHeader("User-agent", userAgent) + .url(url.toString()) + .addHeader("Accept-Encoding", "gzip") + .build(); + + var call = client.newCall(head); + + try (var rsp = call.execute()) { + if (rsp.code() >= 400) { + throw new NetworkException("Bad status " + rsp.code()); + } + var contentTypeHeader = rsp.header("Content-type"); + if (contentTypeHeader != null && !isAllowableContentType(contentTypeHeader)) { + return false; + } + } + + return true; + } + + @SneakyThrows + private EdgeRawPageContents extractBody(EdgeUrl url, Response response) { + try { + var body = response.body(); + if (null == body) { + throw new NetworkException("No body in response"); + } + + var byteStream = body.byteStream(); + if (null == byteStream) { + throw new NetworkException("No body in response"); + } + if ("gzip".equals(response.header("Content-encoding"))) { + byteStream = new GZIPInputStream(byteStream); + } + byteStream = new BOMInputStream(byteStream); + + var contentTypeHeader = response.header("Content-type"); + if (contentTypeHeader != null && !isAllowableContentType(contentTypeHeader)) { + throw new BadContentType(contentTypeHeader); + } + + byte[] data = byteStream.readNBytes(maxFetchSize); + + var contentType = ContentTypeParser.parse(contentTypeHeader, data); + if (!isAllowableContentType(contentType.contentType)) { + throw new BadContentType(contentType.contentType); + } + + if ("Shift_JIS".equalsIgnoreCase(contentType.charset)) { + throw new BadContentType(contentType.contentType); + } + + var strData = new String(data, Charset.forName(contentType.charset)); + + return new EdgeRawPageContents(url, + getRedirectUrl(url, response), + strData, + contentType, + InetAddress.getByName(url.domain.getAddress()).getHostAddress(), + hasCookies(), + LocalDateTime.now().toString()); + } + catch (IOException ex) { + throw new NetworkException(ex); + } + } + + private EdgeUrl getRedirectUrl(EdgeUrl url, Response response) throws URISyntaxException { + + final String canonicalHeader = response.header("rel=canonical"); + if (null != canonicalHeader) { + var ret = linkParser.parseLink(url, canonicalHeader); + return ret.orElse(url); + } + + var responseUrl = new EdgeUrl(response.request().url().toString()); + if (!responseUrl.equals(url)) { + return new EdgeUrl(response.request().url().toString()); + } + + return url; + } + + private boolean isAllowableContentType(String contentType) { + return allowAllContentTypes || contentType.startsWith("text") + || contentType.startsWith("application/xhtml") + || contentType.startsWith("application/xml") + || contentType.startsWith("application/atom+xml") + || contentType.startsWith("application/rss+xml") + || contentType.startsWith("application/x-rss+xml") + || contentType.startsWith("application/rdf+xml") + || contentType.startsWith("x-rss+xml"); + } + + + public static class BadContentType extends RuntimeException { + public BadContentType(String type) { + super(type); + } + + @Override + public Throwable fillInStackTrace() { + return this; + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/fetcher/HttpRedirectResolver.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/fetcher/HttpRedirectResolver.java new file mode 100644 index 00000000..44def33f --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/fetcher/HttpRedirectResolver.java @@ -0,0 +1,105 @@ +package nu.marginalia.wmsa.edge.crawler.fetcher; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import com.google.inject.name.Named; +import io.reactivex.rxjava3.core.Observable; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.client.exception.NetworkException; +import nu.marginalia.wmsa.edge.crawler.domain.LinkParser; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import okhttp3.Call; +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.Response; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.net.ssl.X509TrustManager; +import java.util.concurrent.TimeUnit; + +@Singleton +public class HttpRedirectResolver { + private static final LinkParser linkParser = new LinkParser(); + + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final String userAgent; + private Cookies cookies = new Cookies(); + + private final OkHttpClient client = createClient(); + + @SneakyThrows + private OkHttpClient createClient() { + + return new OkHttpClient.Builder() + .sslSocketFactory(NoSecuritySSL.buildSocketFactory(), (X509TrustManager) NoSecuritySSL.trustAllCerts[0]) + .hostnameVerifier(NoSecuritySSL.buildHostnameVerifyer()) + .cookieJar(cookies.getJar()) + .followRedirects(false) + .followSslRedirects(false) + .connectTimeout(8, TimeUnit.SECONDS) + .build(); + } + + @Inject + public HttpRedirectResolver(@Named("user-agent") String userAgent) { + this.userAgent = userAgent; + } + + @SneakyThrows + public Observable probe(EdgeUrl url) { + return probe(url, 0); + } + + private Observable probe(EdgeUrl url, int depth) { + if (depth > 10) { + return Observable.error(new IllegalStateException("Too many redirects")); + } + if (!url.proto.toLowerCase().startsWith("http")) { + return Observable.empty(); + } + var head = new Request.Builder().get().addHeader("User-agent", userAgent) + .url(url.toString()) + .addHeader("Accept-Encoding", "gzip") + .build(); + + return Observable.just(client.newCall(head)) + .map(Call::execute) + .flatMap(data -> resolveRedirects(depth, url, data)) + .timeout(10, TimeUnit.SECONDS); + } + + @SneakyThrows + private Observable resolveRedirects(int depth, EdgeUrl url, Response response) { + int code = response.code(); + response.close(); + + if (code < 300) { + return Observable.just(url); + } + if (code < 309) { + String newUrl = response.header("Location"); + return Observable.fromOptional(linkParser.parseLink(url, newUrl)) + .flatMap(u -> probe(u, depth + 1)); + } + if (code >= 400) { + return Observable.just(url); + } + return Observable.error(new IllegalStateException("HttpStatusCode " + code)); + } + + + private boolean failOnBadStatus(Response response) { + if (response.code() >= 400) { + response.close(); + throw new NetworkException("Bad status " + response.code()); + } + return true; + }; + + public static class BadContentType extends RuntimeException { + public BadContentType(String type) { + super(type); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/fetcher/NoSecuritySSL.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/fetcher/NoSecuritySSL.java new file mode 100644 index 00000000..ba70f868 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/fetcher/NoSecuritySSL.java @@ -0,0 +1,44 @@ +package nu.marginalia.wmsa.edge.crawler.fetcher; + +import lombok.SneakyThrows; + +import javax.net.ssl.*; +import java.security.cert.CertificateException; +import java.security.cert.X509Certificate; + +public class NoSecuritySSL { + + // Create a trust manager that does not validate certificate chains + public static final TrustManager[] trustAllCerts = new TrustManager[]{ + new X509TrustManager() { + @Override + public void checkClientTrusted(java.security.cert.X509Certificate[] chain, + String authType) throws CertificateException { + } + + @Override + public void checkServerTrusted(java.security.cert.X509Certificate[] chain, + String authType) throws CertificateException { + } + + @Override + public java.security.cert.X509Certificate[] getAcceptedIssuers() { + return new X509Certificate[0]; + } + } + }; + + + @SneakyThrows + public static SSLSocketFactory buildSocketFactory() { + // Install the all-trusting trust manager + final SSLContext sslContext = SSLContext.getInstance("SSL"); + sslContext.init(null, trustAllCerts, new java.security.SecureRandom()); + // Create an ssl socket factory with our all-trusting manager + return sslContext.getSocketFactory(); + } + + public static HostnameVerifier buildHostnameVerifyer() { + return (hn, session) -> true; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/CrawlerDiscoverWorker.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/CrawlerDiscoverWorker.java new file mode 100644 index 00000000..509706eb --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/CrawlerDiscoverWorker.java @@ -0,0 +1,131 @@ +package nu.marginalia.wmsa.edge.crawler.worker; + +import io.reactivex.rxjava3.core.Observable; +import nu.marginalia.wmsa.edge.crawler.domain.DomainCrawlerFactory; +import nu.marginalia.wmsa.edge.crawler.fetcher.HttpRedirectResolver; +import nu.marginalia.wmsa.edge.crawler.worker.facade.TaskProvider; +import nu.marginalia.wmsa.edge.crawler.worker.results.DomainAliasResult; +import nu.marginalia.wmsa.edge.crawler.worker.results.DomainCrawlerWorkerResults; +import nu.marginalia.wmsa.edge.crawler.worker.results.InvalidTaskResult; +import nu.marginalia.wmsa.edge.crawler.worker.results.WorkerResults; +import nu.marginalia.wmsa.edge.model.crawl.EdgeIndexTask; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.LinkedBlockingQueue; + +public class CrawlerDiscoverWorker implements Worker { + + private HttpRedirectResolver redirectResolver; + private TaskProvider taskProvider; + private final DomainCrawlerFactory domainCrawlerFactory; + private final IpBlockList blockList; + private final LinkedBlockingQueue queue; + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + public CrawlerDiscoverWorker( + DomainCrawlerFactory domainCrawlerFactory, TaskProvider taskProvider, HttpRedirectResolver redirectResolver, + IpBlockList blockList, LinkedBlockingQueue queue) + { + this.redirectResolver = redirectResolver; + this.taskProvider = taskProvider; + this.domainCrawlerFactory = domainCrawlerFactory; + this.blockList = blockList; + this.queue = queue; + } + + @Override + public void runCycle() throws InterruptedException { + + var ingress = taskProvider.getDiscoverTask(); + + + if (ingress.isEmpty()) { + wmsa_edge_crawler_idle_worker.inc(); + Thread.sleep(1000); + return; + } + + try { + if (ingress.rank > 0.25 && !blockList.isAllowed(ingress.domain) + ) { + logger.info("{} IP-blacklisted", ingress.domain); + queue.put(new InvalidTaskResult(ingress.domain, "IP blacklisted")); + return; + } + + Optional results + = resolveRedirects(ingress); + if (results.isPresent()) { + queue.put(results.get()); + return; + } + + long start = System.currentTimeMillis(); + + var dc = domainCrawlerFactory.domainCrawler(ingress); + var res = dc.crawl(); + + wmsa_edge_crawler_thread_run_times.observe(System.currentTimeMillis() - start); + + queue.put(new DomainCrawlerWorkerResults(res)); + } + catch (RuntimeException ex) { + logger.warn("Leaking {}", ingress.domain); + logger.error("Uncaught exception", ex); + } + catch (StackOverflowError er) { + logger.error("Stack Overflow on {}", ingress.domain); + queue.put(new InvalidTaskResult(ingress.domain, "Stack overflow")); + } + catch (InterruptedException e) { + logger.warn("ex", e); + } + } + + @Override + public void run() { + try { + for (;;) { + runCycle(); + } + } + catch (InterruptedException ex) { + logger.error("Interrupted", ex); + } + catch (Throwable t) { + logger.error("Fetcher thread terminating on uncaught exception", t); + throw t; + } + } + + private Optional resolveRedirects(EdgeIndexTask ingress) { + try { + EdgeUrl firstUrl = ingress.urls.get(0); + EdgeUrl homeUrl = new EdgeUrl(firstUrl.proto, firstUrl.domain, firstUrl.port, "/"); + + EdgeUrl[] resolvedUrl = Observable.just(homeUrl) + .flatMap(url -> redirectResolver.probe(url).onErrorComplete()) + .blockingStream().toArray(EdgeUrl[]::new); + + if (resolvedUrl.length == 0) { + return Optional.of(new InvalidTaskResult(ingress.domain, "Failed to resolve redirect 1 @ " + ingress.urls.get(0))); + } + if (Objects.equals(resolvedUrl[0].domain, ingress.domain)) { + return Optional.empty(); + } + + logger.debug("Aliased domain {} -> {}", ingress.domain, resolvedUrl[0].domain); + + return Optional.of(new DomainAliasResult(ingress.domain, resolvedUrl[0].domain, resolvedUrl)); + } + catch (Exception ex) { + logger.info("Could not alias ingress {}", ingress.domain); + } + return Optional.of(new InvalidTaskResult(ingress.domain, "Failed to resolve redirect 2")); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/CrawlerIndexWorker.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/CrawlerIndexWorker.java new file mode 100644 index 00000000..c7ec5643 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/CrawlerIndexWorker.java @@ -0,0 +1,89 @@ +package nu.marginalia.wmsa.edge.crawler.worker; + +import nu.marginalia.wmsa.edge.crawler.domain.DomainCrawlerFactory; +import nu.marginalia.wmsa.edge.crawler.worker.facade.TaskProvider; +import nu.marginalia.wmsa.edge.crawler.worker.results.DomainCrawlerWorkerResults; +import nu.marginalia.wmsa.edge.crawler.worker.results.InvalidTaskResult; +import nu.marginalia.wmsa.edge.crawler.worker.results.WorkerResults; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.concurrent.LinkedBlockingQueue; + +public class CrawlerIndexWorker implements Worker { + private final DomainCrawlerFactory domainCrawlerFactory; + private final TaskProvider taskProvider; + private final IpBlockList blockList; + + private final LinkedBlockingQueue queue; + private final int pass; + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + public CrawlerIndexWorker( + DomainCrawlerFactory domainCrawlerFactory, + TaskProvider taskProvider, + IpBlockList blockList, LinkedBlockingQueue queue, int pass) { + this.domainCrawlerFactory = domainCrawlerFactory; + this.taskProvider = taskProvider; + this.blockList = blockList; + this.queue = queue; + this.pass = pass; + } + + @Override + public void runCycle() throws InterruptedException { + + var ingress = taskProvider + .getIndexTask(pass); + + if (ingress.isEmpty()) { + wmsa_edge_crawler_idle_worker.inc(); + Thread.sleep(100); + return; + } + try { + if (ingress.rank > 0.25 && !blockList.isAllowed(ingress.domain)) { + queue.put(new InvalidTaskResult(ingress.domain, "IP blocked")); + logger.info("{} IP-blacklisted", ingress.domain); + return; + } + + long start = System.currentTimeMillis(); + + var dc = domainCrawlerFactory.domainCrawler(ingress); + var res = dc.crawl(); + + wmsa_edge_crawler_thread_run_times.observe(System.currentTimeMillis() - start); + + queue.put(new DomainCrawlerWorkerResults(res)); + } + catch (RuntimeException ex) { + logger.warn("Leaking {}", ingress.domain); + logger.error("Uncaught exception", ex); + } + catch (StackOverflowError er) { + logger.error("Stack Overflow on {}", ingress.domain); + queue.put(new InvalidTaskResult(ingress.domain, "Stack overflow")); + } + catch (InterruptedException e) { + throw e; + } + } + + @Override + public void run() { + for (;;) { + try { + runCycle(); + } + catch (InterruptedException ex) { + logger.error("Interrupted", ex); + break; + } + catch (Exception ex) { + logger.error("Uncaught exception in Fetcher thread", ex); + } + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/GeoIpBlocklist.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/GeoIpBlocklist.java new file mode 100644 index 00000000..482f5bbf --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/GeoIpBlocklist.java @@ -0,0 +1,101 @@ +package nu.marginalia.wmsa.edge.crawler.worker; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.inject.Singleton; +import com.opencsv.CSVReader; +import com.opencsv.exceptions.CsvValidationException; +import lombok.AllArgsConstructor; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.InetAddress; +import java.nio.charset.StandardCharsets; +import java.util.Objects; +import java.util.Set; +import java.util.TreeMap; +import java.util.concurrent.ExecutionException; + +@Singleton +public class GeoIpBlocklist { + private final TreeMap ranges = new TreeMap<>(); + private Set blacklist = Set.of("CN", "HK"); + private Set graylist = Set.of("RU", "TW", "IN", "ZA", "SG", "UA"); + + private final Cache countryCache = CacheBuilder.newBuilder().maximumSize(100_000).build(); + + private static final Logger logger = LoggerFactory.getLogger(GeoIpBlocklist.class); + + @AllArgsConstructor + static class IpRange { + public final long from; + public final long to; + public final String country; + }; + + public GeoIpBlocklist() throws IOException, CsvValidationException { + var resource = Objects.requireNonNull(ClassLoader.getSystemResourceAsStream("IP2LOCATION-LITE-DB1.CSV"), + "Could not load IP location db"); + + try (var reader = new CSVReader(new InputStreamReader(resource, StandardCharsets.UTF_8))) { + for (;;) { + String[] vals = reader.readNext(); + if (vals == null) { + break; + } + if (!(blacklist.contains(vals[2]) || graylist.contains(vals[2]))) { + continue; + } + var range = new GeoIpBlocklist.IpRange(Long.parseLong(vals[0]), + Long.parseLong(vals[1]), + vals[2]); + ranges.put(range.from, range); + } + } + + logger.info("Loaded {} IP ranges", ranges.size()); + } + + public String getCountry(InetAddress address) { + byte[] bytes = address.getAddress(); + long ival = ((long)bytes[0]&0xFF) << 24 | ((long)bytes[1]&0xFF) << 16 | ((long)bytes[2]&0xFF)<< 8 | ((long)bytes[3]&0xFF); + + Long key = ranges.floorKey(ival); + if (null == key) { + return "-"; + } + + var range = ranges.get(key); + if (ival >= key && ival < range.to) { + return range.country; + } + + return "-"; + } + + public boolean isAllowed(EdgeDomain domain) { + String country = getCountry(domain); + + if (blacklist.contains(country)) { + return false; + } + if (graylist.contains(country)) { + return "www".equals(domain.subDomain); + } + + return true; + } + + public String getCountry(EdgeDomain domain) { + try { + return getCountry(InetAddressCache.getAddress(domain)); + } + catch (Throwable ex) { + logger.debug("Failed to resolve {}", domain); + return "-"; + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/InetAddressCache.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/InetAddressCache.java new file mode 100644 index 00000000..3bf5d616 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/InetAddressCache.java @@ -0,0 +1,23 @@ +package nu.marginalia.wmsa.edge.crawler.worker; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import nu.marginalia.wmsa.edge.model.EdgeDomain; + +import java.net.InetAddress; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; + +public class InetAddressCache { + private static Cache cache = CacheBuilder.newBuilder().maximumSize(10_000_000).expireAfterAccess(1, TimeUnit.HOURS).build(); + public static InetAddress getAddress(EdgeDomain domain) throws Throwable { + try { + return cache.get(domain, ()->{ + return InetAddress.getByName(domain.getAddress()); + }); + } + catch (ExecutionException ex) { + throw ex.getCause(); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/IpBlockList.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/IpBlockList.java new file mode 100644 index 00000000..ce33abcb --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/IpBlockList.java @@ -0,0 +1,88 @@ +package nu.marginalia.wmsa.edge.crawler.worker; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.inject.Inject; +import com.google.inject.Singleton; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import org.apache.commons.net.util.SubnetUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.function.Predicate; +import java.util.regex.Pattern; + +@Singleton +public class IpBlockList { + private final GeoIpBlocklist geoIpBlocklist; + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final List badSubnets = new ArrayList<>(); + + @Inject + public IpBlockList(GeoIpBlocklist geoIpBlocklist) { + this.geoIpBlocklist = geoIpBlocklist; + + var resource = Objects.requireNonNull( + ClassLoader.getSystemResourceAsStream("ip-banned-cidr.txt"), + "Could not load IP blacklist"); + + try (var reader = new BufferedReader(new InputStreamReader(resource, StandardCharsets.UTF_8))) { + + for (;;) { + var cidr = reader.readLine(); + if (cidr == null) { + break; + } + if (!cidr.isBlank() && !cidr.startsWith("#") && !cidr.contains(":")) { + badSubnets.add(new SubnetUtils(cidr).getInfo()); + } + } + } catch (IOException e) { + logger.error("Failed to read IP list"); + } + + logger.info("Loaded {} CIDRs", badSubnets.size()); + } + + Predicate numericPattern = Pattern.compile(".*\\d{4}.*").asMatchPredicate(); + + public boolean isAllowed(EdgeDomain domain) { + if (domain.domain.endsWith(".cn")) { + logger.debug("Blocking {} on .cn-end", domain); + return false; + } + if (numericPattern.test(domain.toString())) { + logger.debug("Blocking {} on numeric", domain); + return false; + } + + try { + var hostAddress = InetAddressCache.getAddress(domain).getHostAddress(); + var subnet = badSubnets.stream().filter(sn -> sn.isInRange(hostAddress)).findFirst(); + if (subnet.isPresent()) { + logger.debug("Blocking {} on IP range: {}", domain, subnet.get()); + return false; + } + } catch (Throwable t) { + return false; + } + + var geo = geoIpBlocklist.isAllowed(domain); + if (!geo) { + logger.debug("Blocking {} on geo blocklist", domain); + } + return geo; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/UploaderWorker.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/UploaderWorker.java new file mode 100644 index 00000000..08b71868 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/UploaderWorker.java @@ -0,0 +1,200 @@ +package nu.marginalia.wmsa.edge.crawler.worker; + +import io.prometheus.client.Counter; +import io.prometheus.client.Histogram; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.crawler.domain.DomainCrawlResults; +import nu.marginalia.wmsa.edge.crawler.worker.facade.UploadFacade; +import nu.marginalia.wmsa.edge.crawler.worker.results.WorkerResults; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Arrays; +import java.util.List; +import java.util.OptionalDouble; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; + +public class UploaderWorker implements Runnable { + private final List> queues; + private final UploadFacade uploadFacade; + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private final static double UNKNOWN_SITE_ATTRACTOR = -2.5; + public final static double QUALITY_LOWER_BOUND_CUTOFF = -15; + + private static final Counter wmsa_edge_crawler_pages_indexed = Counter.build("wmsa_edge_crawler_pages_indexed", "Pages Indexed") + .register(); + private static final Counter wmsa_edge_crawler_domains_indexed = Counter.build("wmsa_edge_crawler_domains_indexed", "Domains Indexed") + .register(); + private static final Counter wmsa_edge_crawler_links_discovered = Counter.build("wmsa_edge_crawler_links_discovered", "Links Discovered") + .register(); + private static final Counter wmsa_edge_crawler_duds = Counter.build("wmsa_edge_crawler_duds", "Duds") + .register(); + private static final Counter wmsa_edge_crawler_domain_alias = Counter.build("wmsa_edge_crawler_domain_alias", "Alias") + .register(); + private static final Histogram wmsa_edge_crawler_publish_time = Histogram.build("wmsa_edge_crawler_publish_time", "Post wait") + .register(); + private static final Histogram wmsa_uploader_job_wait_time = Histogram.build("wmsa_uploader_job_wait_time", "Underrun Time") + .register(); + + public UploaderWorker(List> queues, UploadFacade uploadFacade) { + this.queues = queues; + this.uploadFacade = uploadFacade; + } + + volatile int queueDepth = 0; + + @SneakyThrows + @Override + public void run() { + uploaderThread(); + } + + @SneakyThrows + private void uploaderThread() { + for (;;) { + long waitStart = System.currentTimeMillis(); + int waitTicks = 0; + + updateQueueDepth(); + + for (var queue : queues) { + WorkerResults res; + + if (waitTicks++ < queues.size()) { + res = queue.poll(); + } + else { + res = queue.poll(10, TimeUnit.MILLISECONDS); + } + + try { + if (null != res) { + waitTicks = 0; + wmsa_uploader_job_wait_time.observe(System.currentTimeMillis() - waitStart); + res.upload(this); + waitStart = System.currentTimeMillis(); + } + } catch (Exception ex) { + logger.error("Error", ex); + } + } + } + } + + private void updateQueueDepth() { + int qd = 0; + for (var queue : queues) { + qd += queue.size(); + } + queueDepth = qd; + } + + + @SneakyThrows + public void onDomainCrawlResults(DomainCrawlResults dc) { + while (uploadFacade.isBlocked()) { + Thread.sleep(1000); + } + var domain = dc.domain; + + long start = System.currentTimeMillis(); + + updateStatsForResults(dc); + + double avgQuality = calculateMedianQuality(dc).orElse(-5.); + + if (logger.isInfoEnabled()) { + + String log = String.format("QD:%2d\t%2d\tQ:%4.2f\tR:%4.2f\t%3d\t%4.2f\t%s", + queueDepth, dc.pass, + Math.round(100 * avgQuality) / 100., + Math.round(10000 * (1 - dc.rank)) / 100., + dc.pageContents.size(), + (System.currentTimeMillis() - dc.crawlStart) / 1000., + domain); + + logger.info(log); + } + + uploadResults(dc, avgQuality); + + double depthPenalty = dc.pass / 250.; + uploadFacade.finishTask(domain, avgQuality - depthPenalty, EdgeDomainIndexingState.ACTIVE); + + wmsa_edge_crawler_publish_time.observe(System.currentTimeMillis() - start); + } + + private void updateStatsForResults(DomainCrawlResults dc) { + if (dc.pageContents.isEmpty()) { + wmsa_edge_crawler_duds.inc(); + } + + wmsa_edge_crawler_domains_indexed.inc(); + wmsa_edge_crawler_pages_indexed.inc(dc.pageContents.size()); + wmsa_edge_crawler_links_discovered.inc(dc.extUrl.size()); + } + + public static OptionalDouble calculateMedianQuality(DomainCrawlResults dc) { + + double[] qualities = dc.pageContents.values().stream().mapToDouble(page -> page.metadata.quality()).sorted().toArray(); + if (qualities.length <= 5) { + return Arrays.stream(qualities).average(); + } + else { + return OptionalDouble.of(qualities[qualities.length/2]); + } + } + + public static double calculateExternalLinkPenalty(DomainCrawlResults dc) { + return dc.extUrl.size() / ((1+dc.pageContents.size())*50.); + } + + private void uploadResults(DomainCrawlResults dc, double avgQuality) { + + final double extLinkPenalty = calculateExternalLinkPenalty(dc); + if (uploadFacade.isBlacklisted(dc.domain)) { + return; + } + + final double linkQualityRating = -5; //(avgQuality + UNKNOWN_SITE_ATTRACTOR)/2 - extLinkPenalty; + + uploadFacade.putUrls(dc.extUrl, linkQualityRating); + uploadFacade.putUrls(dc.intUrl, linkQualityRating); + uploadFacade.putUrlVisits(dc.visits()); + uploadFacade.putFeeds(dc.feeds); + + if (avgQuality < QUALITY_LOWER_BOUND_CUTOFF) { + return; + } + + uploadFacade.putLinks(dc.links, false); + uploadFacade.putWords(dc.pageContents.values(), 0); + } + + public void onDomainAlias(EdgeDomain source, EdgeDomain dest, EdgeUrl[] urls) { + wmsa_edge_crawler_domain_alias.inc(); + + if (urls.length == 0) { + wmsa_edge_crawler_duds.inc(); + } + + long start = System.currentTimeMillis(); + uploadFacade.putDomainAlias(source, dest); + uploadFacade.putUrls(Arrays.asList(urls), -2); + uploadFacade.finishTask(source, -1000, EdgeDomainIndexingState.REDIR); + + wmsa_edge_crawler_publish_time.observe(System.currentTimeMillis() - start); + } + + public void onInvalidDomain(EdgeDomain domain, String why) { + logger.warn("Setting domain {} state to ERROR: {}", domain, why); + long start = System.currentTimeMillis(); + uploadFacade.finishTask(domain, -1000, EdgeDomainIndexingState.ERROR); + wmsa_edge_crawler_publish_time.observe(System.currentTimeMillis() - start); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/UrlBlocklist.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/UrlBlocklist.java new file mode 100644 index 00000000..9ed749bf --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/UrlBlocklist.java @@ -0,0 +1,55 @@ +package nu.marginalia.wmsa.edge.crawler.worker; + +import nu.marginalia.wmsa.edge.model.EdgeUrl; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Predicate; +import java.util.regex.Pattern; + +public class UrlBlocklist { + private final List> patterns = new ArrayList<>(); + + public UrlBlocklist() { + patterns.add(Pattern.compile(".*/[a-f0-9]{40}(/|$)").asPredicate()); + patterns.add(Pattern.compile("/download(-([A-Za-z]+|[0-9]+)){4,}\\.(htm|html|php)$").asPredicate()); + patterns.add(Pattern.compile("/download(-([A-Za-z]+|[0-9]+)){4,}\\.(htm|html|php)$").asPredicate()); + patterns.add(Pattern.compile("/permalink/[a-z]+(-([A-Za-z]+|[0-9]+)){3,}\\.(htm|html|php)$").asPredicate()); + patterns.add(Pattern.compile("(webrx3|lib|pdf|book|720p).*/[A-Za-z]+(-([A-Za-z]+|[0-9]+)){3,}((-[0-9]+)?/|\\.(php|htm|html))$").asPredicate()); + patterns.add(Pattern.compile("/node/.*/[a-z]+(-[a-z0-9]+)+.htm$").asPredicate()); + patterns.add(Pattern.compile(".*-download-free$").asPredicate()); + } + + public boolean isUrlBlocked(EdgeUrl url) { + try { + if ("github.com".equals(url.domain.domain)) { + return url.path.chars().filter(c -> c == '/').count() > 2; + } + + return patterns.stream().anyMatch(p -> p.test(url.path)); + } + catch (StackOverflowError ex) { + return true; + } + } + + public boolean isForumLink(EdgeUrl linkUrl) { + var path = linkUrl.path; + if (path.startsWith("/forum")) { + return true; + } + if (path.startsWith("/lists/")) { + return true; + } + if (path.startsWith("mailinglist")) { + return true; + } + if (path.contains("phpbb")) { + return true; + } + return false; + } + + + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/Worker.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/Worker.java new file mode 100644 index 00000000..4454ec48 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/Worker.java @@ -0,0 +1,15 @@ +package nu.marginalia.wmsa.edge.crawler.worker; + +import io.prometheus.client.Counter; +import io.prometheus.client.Histogram; + +public interface Worker extends Runnable { + static Histogram wmsa_edge_crawler_thread_run_times = + Histogram.build("wmsa_edge_crawler_thread_run_times", "Run Times") + .register(); + static Counter wmsa_edge_crawler_idle_worker = + Counter.build("wmsa_edge_crawler_idle_worke", "No work, no money") + .register(); + + void runCycle() throws InterruptedException; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/WorkerFactory.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/WorkerFactory.java new file mode 100644 index 00000000..3a58ff9b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/WorkerFactory.java @@ -0,0 +1,47 @@ +package nu.marginalia.wmsa.edge.crawler.worker; + +import com.google.inject.Inject; +import nu.marginalia.wmsa.edge.crawler.domain.DomainCrawlerFactory; +import nu.marginalia.wmsa.edge.crawler.fetcher.HttpRedirectResolver; +import nu.marginalia.wmsa.edge.crawler.worker.facade.TaskProvider; +import nu.marginalia.wmsa.edge.crawler.worker.facade.UploadFacade; +import nu.marginalia.wmsa.edge.crawler.worker.results.WorkerResults; + +import java.util.List; +import java.util.concurrent.LinkedBlockingQueue; + +public class WorkerFactory { + + private final DomainCrawlerFactory domainCrawlerFactory; + private final TaskProvider taskProvider; + private final HttpRedirectResolver redirectResolver; + private final UploadFacade uploadFacade; + private final IpBlockList blockList; + + @Inject + public WorkerFactory(DomainCrawlerFactory domainCrawlerFactory, + TaskProvider taskProvider, + + HttpRedirectResolver redirectResolver, + UploadFacade uploadFacade, IpBlockList blockList) + { + this.domainCrawlerFactory = domainCrawlerFactory; + this.taskProvider = taskProvider; + this.uploadFacade = uploadFacade; + this.redirectResolver = redirectResolver; + this.blockList = blockList; + } + + public CrawlerIndexWorker buildIndexWorker(LinkedBlockingQueue queue, int pass) { + return new CrawlerIndexWorker(domainCrawlerFactory, taskProvider, blockList, queue, pass); + } + + + public CrawlerDiscoverWorker buildDiscoverWorker(LinkedBlockingQueue queue) { + return new CrawlerDiscoverWorker(domainCrawlerFactory, taskProvider, redirectResolver, blockList, queue); + } + + public UploaderWorker buildUploader(List> queues) { + return new UploaderWorker(queues, uploadFacade); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/data/CrawlJobsSpecification.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/data/CrawlJobsSpecification.java new file mode 100644 index 00000000..4e4aa5d7 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/data/CrawlJobsSpecification.java @@ -0,0 +1,8 @@ +package nu.marginalia.wmsa.edge.crawler.worker.data; + +import lombok.AllArgsConstructor; + +@AllArgsConstructor +public class CrawlJobsSpecification { + public final int pass; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/data/UploaderMetrics.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/data/UploaderMetrics.java new file mode 100644 index 00000000..e325a6b7 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/data/UploaderMetrics.java @@ -0,0 +1,15 @@ +package nu.marginalia.wmsa.edge.crawler.worker.data; + +import lombok.AllArgsConstructor; +import lombok.NoArgsConstructor; +import lombok.With; + +@NoArgsConstructor @AllArgsConstructor @With +public class UploaderMetrics { + public long pagesIndexed = 0L; + public long domainsIndexed = 0L; + public long extLinksDiscovered = 0L; + public long duds = 0L; + public long waitTime = 0L; + public long aliasedDomains = 0L; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/facade/TaskProvider.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/facade/TaskProvider.java new file mode 100644 index 00000000..73355310 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/facade/TaskProvider.java @@ -0,0 +1,10 @@ +package nu.marginalia.wmsa.edge.crawler.worker.facade; + +import com.google.inject.ImplementedBy; +import nu.marginalia.wmsa.edge.model.crawl.EdgeIndexTask; + +@ImplementedBy(TaskProviderImpl.class) +public interface TaskProvider { + EdgeIndexTask getIndexTask(int pass); + EdgeIndexTask getDiscoverTask(); +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/facade/TaskProviderImpl.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/facade/TaskProviderImpl.java new file mode 100644 index 00000000..01274d82 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/facade/TaskProviderImpl.java @@ -0,0 +1,47 @@ +package nu.marginalia.wmsa.edge.crawler.worker.facade; + +import nu.marginalia.wmsa.client.exception.RouteNotConfiguredException; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.edge.director.client.EdgeDirectorClient; +import nu.marginalia.wmsa.edge.model.crawl.EdgeIndexTask; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.inject.Inject; + +public class TaskProviderImpl implements TaskProvider { + + private final EdgeDirectorClient client; + private final Logger logger = LoggerFactory.getLogger(getClass()); + + @Inject + public TaskProviderImpl(EdgeDirectorClient client) { + this.client = client; + } + + @Override + public EdgeIndexTask getIndexTask(int pass) { + try { + return client.getIndexTask(Context.internal(), pass, 100) + .onErrorReturn(t -> new EdgeIndexTask(null, 0, 0, 1.)) + .blockingFirst(); + } + catch (RouteNotConfiguredException ex) { + logger.warn("No route to Director"); + return new EdgeIndexTask(null, 0, 0, 1.); + } + } + + @Override + public EdgeIndexTask getDiscoverTask() { + try { + return client.getDiscoverTask(Context.internal()) + .onErrorReturn(t -> new EdgeIndexTask(null, 0, 0, 1.)) + .blockingFirst(); + } + catch (RouteNotConfiguredException ex) { + logger.warn("No route to Data Store"); + return new EdgeIndexTask(null, 0, 0, 1.); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/facade/UploadFacade.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/facade/UploadFacade.java new file mode 100644 index 00000000..03e63c7c --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/facade/UploadFacade.java @@ -0,0 +1,25 @@ +package nu.marginalia.wmsa.edge.crawler.worker.facade; + +import com.google.inject.ImplementedBy; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainLink; +import nu.marginalia.wmsa.edge.model.crawl.EdgePageContent; +import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlVisit; + +import java.util.Collection; + +@ImplementedBy(UploadFacadeDirectImpl.class) +public interface UploadFacade { + void putLinks(Collection links, boolean wipeExisting); + void putUrls(Collection urls, double quality); + void putFeeds(Collection urls); + void putUrlVisits(Collection visits); + void putDomainAlias(EdgeDomain src, EdgeDomain dst); + void finishTask(EdgeDomain domain, double quality, EdgeDomainIndexingState state); + + void putWords(Collection pages, int writer); + + boolean isBlacklisted(EdgeDomain domain); + boolean isBlocked(); +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/facade/UploadFacadeDirectImpl.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/facade/UploadFacadeDirectImpl.java new file mode 100644 index 00000000..aa29d787 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/facade/UploadFacadeDirectImpl.java @@ -0,0 +1,145 @@ +package nu.marginalia.wmsa.edge.crawler.worker.facade; + +import com.google.inject.Inject; +import io.prometheus.client.Histogram; +import io.reactivex.rxjava3.core.BackpressureStrategy; +import io.reactivex.rxjava3.core.Flowable; +import io.reactivex.rxjava3.core.Observable; +import io.reactivex.rxjava3.schedulers.Schedulers; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao; +import nu.marginalia.wmsa.edge.director.client.EdgeDirectorClient; +import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainLink; +import nu.marginalia.wmsa.edge.model.crawl.EdgePageContent; +import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlVisit; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collection; +import java.util.concurrent.TimeUnit; + +public class UploadFacadeDirectImpl implements UploadFacade { + private final EdgeDataStoreDao dataStore; + private final EdgeIndexClient indexClient; + private final EdgeDirectorClient directorClient; + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private static final Histogram wmsa_edge_upload_metrics = Histogram + .build("wmsa_edge_upload_metrics", "upload times") + .labelNames("action") + .register(); + + @Inject + public UploadFacadeDirectImpl(EdgeDataStoreDao dataStore, + EdgeIndexClient indexClient, + EdgeDirectorClient directorClient) { + this.dataStore = dataStore; + this.indexClient = indexClient; + this.directorClient = directorClient; + } + + @Override + public void putLinks(Collection links, boolean wipeExisting) { + wmsa_edge_upload_metrics + .labels("putLinks") + .time(() -> { + dataStore.putLink(wipeExisting, links.toArray(EdgeDomainLink[]::new)); + }); + } + + @Override + public void putUrls(Collection urls, double quality) { + wmsa_edge_upload_metrics + .labels("putUrls") + .time(() -> { + dataStore.putUrl(quality, urls.toArray(EdgeUrl[]::new)); + }); + } + @Override + public void putFeeds(Collection feeds) { + if (feeds.isEmpty()) { + return; + } + wmsa_edge_upload_metrics + .labels("putFeeds") + .time(() -> { + dataStore.putFeeds(feeds.toArray(EdgeUrl[]::new)); + }); + } + + @Override + public void putUrlVisits(Collection visits) { + wmsa_edge_upload_metrics + .labels("putUrlVisits") + .time(() -> { + dataStore.putUrlVisited(visits.toArray(EdgeUrlVisit[]::new)); + }); + } + + @Override + public void putDomainAlias(EdgeDomain src, EdgeDomain dst) { + wmsa_edge_upload_metrics + .labels("putDomainAlias") + .time(() -> { + dataStore.putDomainAlias(src, dst); + }); + } + + @Override + public void finishTask(EdgeDomain domain, double quality, EdgeDomainIndexingState state) { + wmsa_edge_upload_metrics + .labels("finishTask") + .time(() -> { + directorClient.finishTask(Context.internal(), domain, quality, state).blockingSubscribe(); + }); + } + + @Override + public void putWords(Collection pages, int writer) { + wmsa_edge_upload_metrics + .labels("putWords") + .time(() -> { + Flowable.fromIterable(pages) + + .parallel(4) + .flatMap(page -> indexClient + .putWords(Context.internal(), + dataStore.getDomainId(page.url.domain), + dataStore.getUrlId(page.url), + page.metadata.quality(), + page.words, + writer + + ).subscribeOn(Schedulers.io()) + .retryWhen((Observable f) -> f.take(5).delay(30, TimeUnit.SECONDS)) + .toFlowable(BackpressureStrategy.BUFFER) + ).reduce((a,b)->a) + .blockingSubscribe(); + }); + } + + @Override + public boolean isBlacklisted(EdgeDomain domain) { + return dataStore.isBlacklisted(domain); + } + + @Override + public boolean isBlocked() { + var ctx = Context.internal(); + + try { + return directorClient.isBlocked(ctx).blockingFirst() + || indexClient.isBlocked(ctx).blockingFirst(); + } + catch (Exception ex) { + return false; + } + } + + public void updateDomainIndexTimestamp(EdgeDomain domain, EdgeDomainIndexingState state, EdgeDomain alias, int minIndexed) { + dataStore.updateDomainIndexTimestamp(domain, state, alias, minIndexed); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/results/DomainAliasResult.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/results/DomainAliasResult.java new file mode 100644 index 00000000..c6e1ea06 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/results/DomainAliasResult.java @@ -0,0 +1,19 @@ +package nu.marginalia.wmsa.edge.crawler.worker.results; + +import lombok.AllArgsConstructor; +import lombok.ToString; +import nu.marginalia.wmsa.edge.crawler.worker.UploaderWorker; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeUrl; + +@AllArgsConstructor @ToString +public class DomainAliasResult implements WorkerResults { + private final EdgeDomain source; + private final EdgeDomain dest; + private final EdgeUrl[] urls; + + @Override + public void upload(UploaderWorker uploader) { + uploader.onDomainAlias(source, dest, urls); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/results/DomainCrawlerWorkerResults.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/results/DomainCrawlerWorkerResults.java new file mode 100644 index 00000000..5c9f4ee6 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/results/DomainCrawlerWorkerResults.java @@ -0,0 +1,16 @@ +package nu.marginalia.wmsa.edge.crawler.worker.results; + +import lombok.AllArgsConstructor; +import lombok.ToString; +import nu.marginalia.wmsa.edge.crawler.domain.DomainCrawlResults; +import nu.marginalia.wmsa.edge.crawler.worker.UploaderWorker; + +@AllArgsConstructor @ToString +public class DomainCrawlerWorkerResults implements WorkerResults { + private final DomainCrawlResults results; + + @Override + public void upload(UploaderWorker uploader) { + uploader.onDomainCrawlResults(results); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/results/InvalidTaskResult.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/results/InvalidTaskResult.java new file mode 100644 index 00000000..d4ae6a8c --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/results/InvalidTaskResult.java @@ -0,0 +1,18 @@ +package nu.marginalia.wmsa.edge.crawler.worker.results; + +import lombok.AllArgsConstructor; +import lombok.ToString; +import nu.marginalia.wmsa.edge.crawler.worker.UploaderWorker; +import nu.marginalia.wmsa.edge.model.EdgeDomain; + +@AllArgsConstructor @ToString +public class InvalidTaskResult implements WorkerResults { + private final EdgeDomain domain; + public String why; + + + @Override + public void upload(UploaderWorker uploader) { + uploader.onInvalidDomain(domain, why); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/results/WorkerResults.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/results/WorkerResults.java new file mode 100644 index 00000000..77dfd162 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawler/worker/results/WorkerResults.java @@ -0,0 +1,9 @@ +package nu.marginalia.wmsa.edge.crawler.worker.results; + +import nu.marginalia.wmsa.edge.crawler.worker.UploaderWorker; + +public interface WorkerResults { + + void upload(UploaderWorker uploader); + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/AbortMonitor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/AbortMonitor.java new file mode 100644 index 00000000..4a87acb5 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/AbortMonitor.java @@ -0,0 +1,47 @@ +package nu.marginalia.wmsa.edge.crawling; + + +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.tools.ReindexMain; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.file.Files; +import java.nio.file.Path; + +public class AbortMonitor { + private volatile boolean abort = false; + private static volatile AbortMonitor instance = null; + private static final Logger logger = LoggerFactory.getLogger(AbortMonitor.class); + + public static AbortMonitor getInstance() { + if (instance == null) { + synchronized (ReindexMain.AbortMonitor.class) { + if (instance == null) { + instance = new AbortMonitor(); + new Thread(instance::run, "AbortMon").start(); + } + } + } + return instance; + } + + private AbortMonitor() { + } + + @SneakyThrows + public void run() { + for (;;) { + Thread.sleep(1000); + if (Files.exists(Path.of("/tmp/stop"))) { + logger.warn("Abort file found"); + abort = true; + Files.delete(Path.of("/tmp/stop")); + } + } + } + + public boolean isAlive() { + return !abort; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlJobExtractorMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlJobExtractorMain.java new file mode 100644 index 00000000..a13d630a --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlJobExtractorMain.java @@ -0,0 +1,224 @@ +package nu.marginalia.wmsa.edge.crawling; + +import com.github.luben.zstd.ZstdOutputStream; +import com.google.common.hash.HashFunction; +import com.google.common.hash.Hashing; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.zaxxer.hikari.HikariDataSource; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.edge.crawling.model.CrawlingSpecification; +import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklistImpl; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import org.mariadb.jdbc.Driver; + +import java.io.BufferedOutputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.PrintWriter; +import java.nio.file.Path; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.*; +import java.util.stream.Stream; + +public class CrawlJobExtractorMain { + + private static final String specificDomainSql = + """ + SELECT ID + FROM EC_DOMAIN + WHERE URL_PART=? + """; + + private static final String domainsSql = + """ + SELECT ID, LOWER(EC_DOMAIN.URL_PART) + FROM EC_DOMAIN + WHERE QUALITY_RAW>-100 + AND INDEXED>0 + AND STATE<2 + ORDER BY + INDEX_DATE ASC, + DISCOVER_DATE ASC, + STATE DESC, + INDEXED DESC, + EC_DOMAIN.ID + """; + + private static final String urlsSql = + """ + SELECT CONCAT(PROTO, "://", ?, URL) + FROM EC_URL + WHERE DOMAIN_ID=? + ORDER BY + VISITED DESC, + DATA_HASH IS NOT NULL DESC, + ID + LIMIT 25000 + """; + + private static final String visitedUrlsSql = + """ + SELECT COUNT(*) + FROM EC_URL + WHERE DOMAIN_ID=? + AND VISITED + ; + """; + private static final int MIN_VISIT_COUNT = 100; + private static final int MAX_VISIT_COUNT = 5000; + + private final EdgeDomainBlacklistImpl blacklist; + + private final Connection conn; + private final HashFunction hasher = Hashing.murmur3_128(0); + + public static void main(String... args) throws SQLException, IOException { + Driver driver = new Driver(); + var outFile = Path.of(args[0]); + + Gson gson = new GsonBuilder().create(); + String[] targetDomains = Arrays.stream(args).skip(1).toArray(String[]::new); + + + try (var out = new PrintWriter(new ZstdOutputStream(new BufferedOutputStream(new FileOutputStream(outFile.toFile()))))) { + final var extractor = new CrawlJobExtractorMain(new DatabaseModule().provideConnection()); + final Stream jobs; + + if (targetDomains.length > 0) { + jobs = Arrays.stream(targetDomains).map(EdgeDomain::new).map(extractor::extractDomain); + } else { + jobs = extractor.extractDomains(); + } + + jobs.map(gson::toJson).forEach(out::println); + } + } + + private record DomainWithId(String domainName, int id) {}; + + private Stream extractDomains() { + List ids = new ArrayList<>(100_000); + + try (var stmt = conn.prepareStatement(domainsSql)) { + stmt.setFetchSize(10_000); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + ids.add(new DomainWithId(rsp.getString(2), rsp.getInt(1))); + } + } + catch (SQLException ex) { + ex.printStackTrace(); + } + + Collections.shuffle(ids); + return ids.stream() + .filter(id -> !blacklist.isBlacklisted(id.id)) + .map(this::createCrawlJobForDomain); + } + + private CrawlingSpecification createCrawlJobForDomain(DomainWithId domainWithId) { + var spec = new CrawlingSpecification(); + spec.id = createId(domainWithId); + spec.domain = domainWithId.domainName; + spec.urls = new ArrayList<>(); + spec.crawlDepth = getCrawlDepth(domainWithId); + + try (var stmt = conn.prepareStatement(urlsSql)) { + stmt.setFetchSize(1000); + stmt.setString(1, domainWithId.domainName); + stmt.setInt(2, domainWithId.id); + var rsp = stmt.executeQuery(); + + while (rsp.next()) { + spec.urls.add(rsp.getString(1)); + } + } + catch (SQLException ex) { + ex.printStackTrace(); + } + + spec.urls.sort(Comparator.naturalOrder()); + + return spec; + } + + public CrawlJobExtractorMain(HikariDataSource ds) throws SQLException { + blacklist = new EdgeDomainBlacklistImpl(ds); + conn = ds.getConnection(); + } + + public CrawlingSpecification extractDomain(EdgeDomain domain) { + CrawlingSpecification spec = new CrawlingSpecification(); + spec.domain = domain.toString(); + spec.id = createId(domain); + spec.urls = new ArrayList<>(1000); + + + try (var domainQuery = conn.prepareStatement(specificDomainSql); + var urlQuery = conn.prepareStatement(urlsSql)) + { + domainQuery.setString(1, domain.toString()); + ResultSet rsp = domainQuery.executeQuery(); + int domainId = rsp.next() ? rsp.getInt(1) : -1; + + spec.crawlDepth = getCrawlDepth(new DomainWithId(domain.toString(), domainId)); + + urlQuery.setString(1, domain.toString()); + urlQuery.setInt(2, domainId); + urlQuery.setFetchSize(1000); + rsp = urlQuery.executeQuery(); + + while (rsp.next()) { + spec.urls.add(rsp.getString(1)); + } + + } catch (SQLException e) { + e.printStackTrace(); + } + + if (spec.urls.isEmpty()) { + spec.urls.add("https://"+domain+"/"); + } + + return spec; + } + + private String createId(DomainWithId domainWithId) { + return hasher.hashUnencodedChars(domainWithId.domainName).toString(); + } + + private String createId(EdgeDomain domain) { + return hasher.hashUnencodedChars(domain.toString()).toString(); + } + + private int getCrawlDepth(DomainWithId domainWithId) { + try (var domainQuery = conn.prepareStatement(visitedUrlsSql)) { + domainQuery.setInt(1, domainWithId.id); + var rsp = domainQuery.executeQuery(); + if (rsp.next()) { + return calculateCrawlDepthFromVisitedCount(rsp.getInt(1)); + } + } catch (SQLException e) { + e.printStackTrace(); + } + + return MIN_VISIT_COUNT; + } + + private int calculateCrawlDepthFromVisitedCount(int count) { + count = count + 100 + count / 4; + + if (count < MIN_VISIT_COUNT) { + count = MIN_VISIT_COUNT; + } + + if (count > MAX_VISIT_COUNT) { + count = MAX_VISIT_COUNT; + } + + return count; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlJobExtractorPageRankMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlJobExtractorPageRankMain.java new file mode 100644 index 00000000..5865935a --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlJobExtractorPageRankMain.java @@ -0,0 +1,203 @@ +package nu.marginalia.wmsa.edge.crawling; + +import com.github.luben.zstd.ZstdOutputStream; +import com.google.common.hash.HashFunction; +import com.google.common.hash.Hashing; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.zaxxer.hikari.HikariDataSource; +import gnu.trove.list.array.TIntArrayList; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.edge.crawling.model.CrawlingSpecification; +import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklistImpl; +import nu.marginalia.wmsa.edge.index.service.util.ranking.BetterReversePageRank; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeId; +import org.mariadb.jdbc.Driver; + +import java.io.BufferedOutputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.PrintWriter; +import java.nio.file.Path; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.*; +import java.util.stream.Stream; + +public class CrawlJobExtractorPageRankMain { + + private static final String specificDomainSql = + """ + SELECT ID + FROM EC_DOMAIN + WHERE URL_PART=? + """; + private static final String specificDomainSqlFromId = + """ + SELECT LOWER(URL_PART) + FROM EC_DOMAIN + WHERE ID=? + """; + + private static final String urlsSql = + """ + SELECT CONCAT(PROTO, "://", ?, URL) + FROM EC_URL + WHERE DOMAIN_ID=? + ORDER BY + VISITED DESC, + DATA_HASH IS NOT NULL DESC, + ID + LIMIT 25000 + """; + + private static final String visitedUrlsSql = + """ + SELECT COUNT(*) + FROM EC_URL + WHERE DOMAIN_ID=? + AND VISITED + ; + """; + private static final int MIN_VISIT_COUNT = 100; + private static final int MAX_VISIT_COUNT = 5000; + + private final EdgeDomainBlacklistImpl blacklist; + + private final Connection conn; + private final HashFunction hasher = Hashing.murmur3_128(0); + + public static void main(String... args) throws SQLException, IOException { + Driver driver = new Driver(); + var outFile = Path.of(args[0]); + + Gson gson = new GsonBuilder().create(); + + var rpr = new BetterReversePageRank(new DatabaseModule().provideConnection(), "bikobatanari.art", "sadgrl.online", "wiki.xxiivv.com", "%neocities.org"); + rpr.setMaxKnownUrls(750); + + var targetDomainIds = rpr.pageRankWithPeripheralNodes(rpr.size(), false); + + try (var out = new PrintWriter(new ZstdOutputStream(new BufferedOutputStream(new FileOutputStream(outFile.toFile()))))) { + final var extractor = new CrawlJobExtractorPageRankMain(new DatabaseModule().provideConnection()); + + targetDomainIds.forEach(i -> { + out.println(gson.toJson(extractor.extractDomain(new EdgeId<>(i)))); + return true; + }); + } + } + + private record DomainWithId(String domainName, int id) {}; + + public CrawlJobExtractorPageRankMain(HikariDataSource ds) throws SQLException { + blacklist = new EdgeDomainBlacklistImpl(ds); + conn = ds.getConnection(); + } + + public CrawlingSpecification extractDomain(EdgeId domainId) { + CrawlingSpecification spec = new CrawlingSpecification(); + + String domainName = ""; + try (var domainQuery = conn.prepareStatement(specificDomainSqlFromId); + var urlQuery = conn.prepareStatement(urlsSql)) + { + domainQuery.setInt(1, domainId.getId()); + ResultSet rsp = domainQuery.executeQuery(); + domainName = rsp.next() ? rsp.getString(1) : ""; + + spec.domain = domainName; + spec.id = createId(new EdgeDomain(domainName)); + spec.urls = new ArrayList<>(1000); + + spec.crawlDepth = getCrawlDepth(new DomainWithId(domainName, domainId.getId())); + + urlQuery.setString(1, domainName.toString()); + urlQuery.setInt(2, domainId.getId()); + urlQuery.setFetchSize(1000); + rsp = urlQuery.executeQuery(); + + while (rsp.next()) { + spec.urls.add(rsp.getString(1)); + } + + } catch (SQLException e) { + e.printStackTrace(); + } + + if (spec.urls.isEmpty()) { + spec.urls.add("https://"+domainName+"/"); + } + + return spec; + } + public CrawlingSpecification extractDomain(EdgeDomain domain) { + CrawlingSpecification spec = new CrawlingSpecification(); + spec.domain = domain.toString(); + spec.id = createId(domain); + spec.urls = new ArrayList<>(1000); + + + try (var domainQuery = conn.prepareStatement(specificDomainSql); + var urlQuery = conn.prepareStatement(urlsSql)) + { + domainQuery.setString(1, domain.toString()); + ResultSet rsp = domainQuery.executeQuery(); + int domainId = rsp.next() ? rsp.getInt(1) : -1; + + spec.crawlDepth = getCrawlDepth(new DomainWithId(domain.toString(), domainId)); + + urlQuery.setString(1, domain.toString()); + urlQuery.setInt(2, domainId); + urlQuery.setFetchSize(1000); + rsp = urlQuery.executeQuery(); + + while (rsp.next()) { + spec.urls.add(rsp.getString(1)); + } + + } catch (SQLException e) { + e.printStackTrace(); + } + + if (spec.urls.isEmpty()) { + spec.urls.add("https://"+domain+"/"); + } + + return spec; + } + + private String createId(EdgeDomain domain) { + return hasher.hashUnencodedChars(domain.toString()).toString(); + } + + private int getCrawlDepth(DomainWithId domainWithId) { + try (var domainQuery = conn.prepareStatement(visitedUrlsSql)) { + domainQuery.setInt(1, domainWithId.id); + var rsp = domainQuery.executeQuery(); + if (rsp.next()) { + return calculateCrawlDepthFromVisitedCount(rsp.getInt(1)); + } + } catch (SQLException e) { + e.printStackTrace(); + } + + return MIN_VISIT_COUNT; + } + + private int calculateCrawlDepthFromVisitedCount(int count) { + count = count + 100 + count / 4; + + if (count < MIN_VISIT_COUNT) { + count = MIN_VISIT_COUNT; + } + + if (count > MAX_VISIT_COUNT) { + count = MAX_VISIT_COUNT; + } + + return count; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlPlanLoader.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlPlanLoader.java new file mode 100644 index 00000000..f4060aff --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlPlanLoader.java @@ -0,0 +1,26 @@ +package nu.marginalia.wmsa.edge.crawling; + +import nu.marginalia.wmsa.edge.model.EdgeCrawlPlan; +import org.yaml.snakeyaml.Yaml; + +import java.io.FileReader; +import java.io.IOException; +import java.nio.file.Path; + +public class CrawlPlanLoader { + private final Yaml yaml; + + public CrawlPlanLoader() { + yaml = new Yaml(); + } + + public EdgeCrawlPlan load(Path yamlFile) throws IOException { + try (var reader = new FileReader(yamlFile.toFile())) { + return yaml.loadAs(reader, EdgeCrawlPlan.class); + } + catch (IOException ex) { + throw new IOException("Failed to load crawl plan " + yamlFile, ex); + } + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawledDomainReader.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawledDomainReader.java new file mode 100644 index 00000000..e9b0b72e --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawledDomainReader.java @@ -0,0 +1,28 @@ +package nu.marginalia.wmsa.edge.crawling; + +import com.github.luben.zstd.ZstdInputStream; +import com.github.luben.zstd.ZstdOutputStream; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import nu.marginalia.wmsa.edge.crawling.model.CrawledDomain; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.*; +import java.nio.file.Files; +import java.nio.file.Path; + +public class CrawledDomainReader { + private final Gson gson = new GsonBuilder().create(); + private static final Logger logger = LoggerFactory.getLogger(CrawledDomainReader.class); + + public CrawledDomainReader() { + } + + public CrawledDomain read(Path path) throws IOException { + try (var br = new BufferedReader(new InputStreamReader(new ZstdInputStream(new BufferedInputStream(new FileInputStream(path.toFile())))))) { + return gson.fromJson(br, CrawledDomain.class); + } + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawledDomainWriter.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawledDomainWriter.java new file mode 100644 index 00000000..ce0c7216 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawledDomainWriter.java @@ -0,0 +1,66 @@ +package nu.marginalia.wmsa.edge.crawling; + +import com.github.luben.zstd.ZstdOutputStream; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import nu.marginalia.wmsa.edge.crawling.model.CrawledDomain; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedOutputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.nio.file.Files; +import java.nio.file.Path; + +public class CrawledDomainWriter { + private final Path outputDir; + private final Gson gson = new GsonBuilder().create(); + private static final Logger logger = LoggerFactory.getLogger(CrawledDomainWriter.class); + + public CrawledDomainWriter(Path outputDir) { + this.outputDir = outputDir; + + if (!Files.isDirectory(outputDir)) { + throw new IllegalArgumentException("Output dir " + outputDir + " does not exist"); + } + } + + public String accept(CrawledDomain domainData) throws IOException { + Path outputFile = getOutputFile(domainData.id, domainData.domain); + + try (var outputStream = new OutputStreamWriter(new ZstdOutputStream(new BufferedOutputStream(new FileOutputStream(outputFile.toFile()))))) { + logger.info("Writing {} - {}", domainData.id, domainData.domain); + + gson.toJson(domainData, outputStream); + } + + return outputFile.getFileName().toString(); + } + + private Path getOutputFile(String id, String name) throws IOException { + String first = id.substring(0, 2); + String second = id.substring(2, 4); + + Path destDir = outputDir.resolve(first).resolve(second); + if (!Files.exists(destDir)) { + Files.createDirectories(destDir); + } + return destDir.resolve(id + "-" + filesystemSafeName(name) + ".zstd"); + } + + private String filesystemSafeName(String name) { + StringBuilder nameSaneBuilder = new StringBuilder(); + + name.chars() + .map(Character::toLowerCase) + .map(c -> (c & ~0x7F) == 0 ? c : 'X') + .map(c -> (Character.isDigit(c) || Character.isAlphabetic(c) || c == '.') ? c : 'X') + .limit(128) + .forEach(c -> nameSaneBuilder.append((char) c)); + + return nameSaneBuilder.toString(); + + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlerMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlerMain.java new file mode 100644 index 00000000..ea62e742 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlerMain.java @@ -0,0 +1,131 @@ +package nu.marginalia.wmsa.edge.crawling; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import nu.marginalia.wmsa.edge.crawling.model.CrawledDomain; +import nu.marginalia.wmsa.edge.crawling.model.CrawlingSpecification; +import nu.marginalia.wmsa.edge.crawling.retreival.CrawlerRetreiver; +import nu.marginalia.wmsa.edge.crawling.retreival.HttpFetcher; +import nu.marginalia.util.ParallelPipe; +import nu.marginalia.wmsa.edge.model.EdgeCrawlPlan; +import okhttp3.Dispatcher; +import okhttp3.internal.Util; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.*; +import java.nio.file.Path; +import java.util.concurrent.Semaphore; +import java.util.concurrent.SynchronousQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +public class CrawlerMain implements AutoCloseable { + public static Gson gson = new GsonBuilder().create(); + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private final Path inputSpec; + + private final WorkLog workLog; + private final CrawledDomainWriter domainWriter; + + private final int numberOfThreads; + private final ParallelPipe pipe; + private final Dispatcher dispatcher = new Dispatcher(new ThreadPoolExecutor(0, Integer.MAX_VALUE, 5, TimeUnit.SECONDS, + new SynchronousQueue<>(), Util.threadFactory("OkHttp Dispatcher", true))); + + + public CrawlerMain(EdgeCrawlPlan plan) throws Exception { + this.inputSpec = plan.getJobSpec(); + this.numberOfThreads = 512; + + workLog = new WorkLog(plan.crawl.getLogFile()); + domainWriter = new CrawledDomainWriter(plan.crawl.getDir()); + + Semaphore sem = new Semaphore(250_000); + + pipe = new ParallelPipe<>("Crawler", numberOfThreads, 2, 1) { + @Override + protected CrawledDomain onProcess(CrawlingSpecification crawlingSpecification) throws Exception { + int toAcquire = crawlingSpecification.urls.size(); + sem.acquire(toAcquire); + try { + return fetchDomain(crawlingSpecification); + } + finally { + sem.release(toAcquire); + } + } + + @Override + protected void onReceive(CrawledDomain crawledDomain) throws IOException { + writeDomain(crawledDomain); + } + }; + } + + public static void main(String... args) throws Exception { + if (!AbortMonitor.getInstance().isAlive()) { + System.err.println("Remove abort file first"); + return; + } + + if (args.length != 1) { + System.err.println("Arguments: crawl-plan.yaml"); + System.exit(0); + } + var plan = new CrawlPlanLoader().load(Path.of(args[0])); + + try (var crawler = new CrawlerMain(plan)) { + crawler.run(); + } + } + + private CrawledDomain fetchDomain(CrawlingSpecification specification) { + if (workLog.isJobFinished(specification.id)) + return null; + + var fetcher = new HttpFetcher("search.marginalia.nu", dispatcher); + + try { + var retreiver = new CrawlerRetreiver(fetcher, specification); + + return retreiver.fetch(); + } catch (Exception e) { + logger.error("Error fetching domain", e); + return null; + } + } + + private void writeDomain(CrawledDomain crawledDomain) throws IOException { + String name = domainWriter.accept(crawledDomain); + workLog.setJobToFinished(crawledDomain.id, name, crawledDomain.size()); + } + + public void run() throws InterruptedException { + // First a validation run to ensure the file is all good to parse + + logger.info("Validating JSON"); + CrawlerSpecificationLoader.readInputSpec(inputSpec, spec -> {}); + + logger.info("Starting pipe"); + CrawlerSpecificationLoader.readInputSpec(inputSpec, pipe::accept); + + if (!AbortMonitor.getInstance().isAlive()) { + logger.info("Aborting"); + pipe.clearQueues(); + } + else { + logger.info("All jobs queued, waiting for pipe to finish"); + } + pipe.join(); + + logger.info("All finished"); + } + + public void close() throws Exception { + workLog.close(); + dispatcher.executorService().shutdownNow(); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlerSpecificationLoader.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlerSpecificationLoader.java new file mode 100644 index 00000000..c9e6afc1 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlerSpecificationLoader.java @@ -0,0 +1,32 @@ +package nu.marginalia.wmsa.edge.crawling; + +import com.github.luben.zstd.ZstdInputStream; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import nu.marginalia.wmsa.edge.crawling.model.CrawlingSpecification; +import org.apache.logging.log4j.util.Strings; + +import java.io.*; +import java.nio.file.Path; +import java.util.function.Consumer; + +public class CrawlerSpecificationLoader { + private final static Gson gson = new GsonBuilder().create(); + + public static void readInputSpec(Path inputSpec, Consumer consumer) { + try (var inputStream = new BufferedReader(new InputStreamReader(new ZstdInputStream(new FileInputStream(inputSpec.toFile()))))) { + + for (;;) { + var line = inputStream.readLine(); + if (line == null || !AbortMonitor.getInstance().isAlive()) + break; + + if (Strings.isNotBlank(line)) { + consumer.accept(gson.fromJson(line, CrawlingSpecification.class)); + } + } + } catch (IOException e) { + e.printStackTrace(); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/WorkLog.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/WorkLog.java new file mode 100644 index 00000000..276d3651 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/WorkLog.java @@ -0,0 +1,86 @@ +package nu.marginalia.wmsa.edge.crawling; + +import nu.marginalia.wmsa.edge.crawling.model.CrawlLogEntry; +import org.apache.logging.log4j.util.Strings; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.LocalDateTime; +import java.util.HashSet; +import java.util.Set; +import java.util.function.Consumer; +import java.util.regex.Pattern; + +public class WorkLog implements AutoCloseable { + private final Set finishedJobs = new HashSet<>(); + private final FileOutputStream logWriter; + + public WorkLog(Path logFile) throws IOException { + loadLog(logFile); + + logWriter = new FileOutputStream(logFile.toFile(), true); + writeLogEntry("# Starting WorkLog @ " + LocalDateTime.now()); + } + + public static void readLog(Path logFile, Consumer entryConsumer) { + if (!Files.exists(logFile)) { + return; + } + + try (var lines = Files.lines(logFile)) { + lines.filter(WorkLog::isJobId).map(line -> { + String[] parts = line.split("\\s+"); + return new CrawlLogEntry(parts[0], parts[1], parts[2], Integer.parseInt(parts[3])); + }).forEach(entryConsumer); + } catch (IOException e) { + e.printStackTrace(); + } + } + private void loadLog(Path logFile) throws IOException { + if (!Files.exists(logFile)) { + return; + } + + try (var lines = Files.lines(logFile)) { + lines.filter(WorkLog::isJobId).map(this::getJobIdFromWrittenString).forEach(finishedJobs::add); + } + } + + private static boolean isJobId(String s) { + return Strings.isNotBlank(s) && !s.startsWith("#"); + } + + private static final Pattern splitPattern = Pattern.compile("\\s+"); + + private String getJobIdFromWrittenString(String s) { + return splitPattern.split(s, 2)[0]; + } + + public synchronized boolean isJobFinished(String id) { + return finishedJobs.contains(id); + } + + // Use synchro over concurrent set to avoid competing writes + // - correct is better than fast here, it's sketchy enough to use + // a PrintWriter + + public synchronized void setJobToFinished(String id, String where, int size) throws IOException { + finishedJobs.add(id); + + writeLogEntry(String.format("%s\t%s\t%s\t%d",id, LocalDateTime.now(), where, size)); + } + + private void writeLogEntry(String entry) throws IOException { + logWriter.write(entry.getBytes(StandardCharsets.UTF_8)); + logWriter.write("\n".getBytes(StandardCharsets.UTF_8)); + logWriter.flush(); + } + + @Override + public void close() throws Exception { + logWriter.flush(); + logWriter.close(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/model/CrawlLogEntry.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/model/CrawlLogEntry.java new file mode 100644 index 00000000..cdad9a70 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/model/CrawlLogEntry.java @@ -0,0 +1,4 @@ +package nu.marginalia.wmsa.edge.crawling.model; + +public record CrawlLogEntry(String id, String ts, String path, int cnt) { +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/model/CrawledDocument.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/model/CrawledDocument.java new file mode 100644 index 00000000..5d8d7e54 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/model/CrawledDocument.java @@ -0,0 +1,25 @@ +package nu.marginalia.wmsa.edge.crawling.model; + +import lombok.Builder; + +@Builder +public class CrawledDocument { + public String crawlId; + + public String url; + public String contentType; + + public String timestamp; + public int httpStatus; + + public String crawlerStatus; + public String crawlerStatusDesc; + + public String headers; + public String documentBody; + + public String documentBodyHash; + + public String canonicalUrl; + public String redirectUrl; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/model/CrawledDomain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/model/CrawledDomain.java new file mode 100644 index 00000000..1a0a3f46 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/model/CrawledDomain.java @@ -0,0 +1,27 @@ +package nu.marginalia.wmsa.edge.crawling.model; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; + +import java.util.List; + +@AllArgsConstructor @Data @Builder +public class CrawledDomain { + public String id; + public String domain; + + public String redirectDomain; + + public String crawlerStatus; + public String crawlerStatusDesc; + public String ip; + + public List doc; + public List cookies; + + public int size() { + if (doc == null) return 0; + return doc.size(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/model/CrawlerDocumentStatus.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/model/CrawlerDocumentStatus.java new file mode 100644 index 00000000..38f17abd --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/model/CrawlerDocumentStatus.java @@ -0,0 +1,10 @@ +package nu.marginalia.wmsa.edge.crawling.model; + +public enum CrawlerDocumentStatus { + OK, + BAD_CONTENT_TYPE, + BAD_CHARSET, + REDIRECT, + ROBOTS_TXT, + ERROR +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/model/CrawlerDomainStatus.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/model/CrawlerDomainStatus.java new file mode 100644 index 00000000..1c22067c --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/model/CrawlerDomainStatus.java @@ -0,0 +1,5 @@ +package nu.marginalia.wmsa.edge.crawling.model; + +public enum CrawlerDomainStatus { + OK, ERROR, BLOCKED, REDIRECT +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/model/CrawlingSpecification.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/model/CrawlingSpecification.java new file mode 100644 index 00000000..d55cd2bb --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/model/CrawlingSpecification.java @@ -0,0 +1,13 @@ +package nu.marginalia.wmsa.edge.crawling.model; + +import java.util.List; + +public class CrawlingSpecification { + public String id; + + public int crawlDepth; + + // Don't make this EdgeUrl, EdgeDomain etc. -- we want this plastic to change! + public String domain; + public List urls; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/retreival/CrawlerRetreiver.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/retreival/CrawlerRetreiver.java new file mode 100644 index 00000000..467376f5 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/retreival/CrawlerRetreiver.java @@ -0,0 +1,297 @@ +package nu.marginalia.wmsa.edge.crawling.retreival; + +import com.google.common.hash.HashFunction; +import com.google.common.hash.Hashing; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.crawler.domain.LinkParser; +import nu.marginalia.wmsa.edge.crawler.worker.GeoIpBlocklist; +import nu.marginalia.wmsa.edge.crawler.worker.IpBlockList; +import nu.marginalia.wmsa.edge.crawler.worker.UrlBlocklist; +import nu.marginalia.wmsa.edge.crawling.model.*; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.time.LocalDateTime; +import java.util.*; + +public class CrawlerRetreiver { + private static final long DEFAULT_CRAWL_DELAY_MS = 1000; + private final LinkedList queue = new LinkedList<>(); + private final HttpFetcher fetcher; + private final HashSet visited; + private final HashSet known; + + private final int depth; + private final String id; + private final String domain; + + private static final LinkParser linkParser = new LinkParser(); + private static final Logger logger = LoggerFactory.getLogger(CrawlerRetreiver.class); + + private static final HashFunction hashMethod = Hashing.murmur3_128(0); + private static final IpBlockList ipBlocklist; + private static final UrlBlocklist urlBlocklist = new UrlBlocklist(); + + static { + try { + ipBlocklist = new IpBlockList(new GeoIpBlocklist()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + public CrawlerRetreiver(HttpFetcher fetcher, CrawlingSpecification specs) { + this.fetcher = fetcher; + visited = new HashSet<>((int)(specs.urls.size() * 1.5)); + known = new HashSet<>(specs.urls.size() * 10); + + depth = specs.crawlDepth; + id = specs.id; + domain = specs.domain; + + specs.urls.stream() + .map(this::parseUrl) + .filter(Optional::isPresent) + .map(Optional::get) + .filter(known::add) + .forEach(queue::addLast); + + if (queue.peek() != null) { + var fst = queue.peek(); + var root = new EdgeUrl(fst.proto, fst.domain, fst.port, "/"); + if (known.add(root)) + queue.addFirst(root); + } + } + + private Optional parseUrl(String str) { + try { + return Optional.of(new EdgeUrl(str)); + } + catch (Exception ex) { + return Optional.empty(); + } + } + + public CrawledDomain fetch() { + logger.info("Fetching {}", domain); + + Optional probeResult = probeDomainForProblems(domain); + + return probeResult.orElseGet(this::crawlDomain); + } + + private Optional probeDomainForProblems(String domain) { + EdgeUrl fst = queue.peek(); + + + if (fst == null) { + logger.warn("No URLs for domain {}", domain); + + return Optional.of(CrawledDomain.builder() + .crawlerStatus(CrawlerDomainStatus.ERROR.name()) + .crawlerStatusDesc("No known URLs") + .id(id) + .domain(domain) + .build()); + } + + if (!ipBlocklist.isAllowed(fst.domain)) { + return Optional.of(CrawledDomain.builder() + .crawlerStatus(CrawlerDomainStatus.BLOCKED.name()) + .id(id) + .domain(domain) + .ip(findIp(domain)) + .build()); + } + + var fetchResult = fetcher.probeDomain(new EdgeUrl(fst.proto, fst.domain, fst.port, "/")); + if (!fetchResult.ok()) { + logger.debug("Bad status on {}", domain); + return Optional.of(createErrorPostFromStatus(fetchResult)); + } + return Optional.empty(); + } + + private CrawledDomain crawlDomain() { + String ip = findIp(domain); + + var robotsRules = fetcher.fetchRobotRules(queue.peek().domain); + long crawlDelay = robotsRules.getCrawlDelay(); + + List docs = new ArrayList<>(depth); + CrawledDomain ret = new CrawledDomain(id, domain, null, CrawlerDomainStatus.OK.name(), null, ip, docs, null); + + int visitedCount = 0; + while (!queue.isEmpty() && visitedCount < depth) { + var top = queue.removeFirst(); + + if (!robotsRules.isAllowed(top.toString())) { + ret.doc.add(createRobotsError(top)); + continue; + } + + if (urlBlocklist.isUrlBlocked(top)) + continue; + if (top.toString().length() > 255) + continue; + + if (!visited.add(top)) { + continue; + } + + logger.debug("Fetching {}", top); + long startTime = System.currentTimeMillis(); + + fetchUrl(top).ifPresent(ret.doc::add); + + long crawledTime = System.currentTimeMillis() - startTime; + delay(crawlDelay, crawledTime); + + visitedCount ++; + } + + ret.cookies = fetcher.getCookies(); + + return ret; + } + + private Optional fetchUrl(EdgeUrl top) { + try { + + var doc = fetcher.fetchContent(top); + + if (doc.documentBody != null) { + + doc.documentBodyHash = createHash(doc.documentBody); + + Optional parsedDoc = parseDoc(doc); + EdgeUrl url = new EdgeUrl(doc.url); + + parsedDoc.ifPresent(parsed -> findLinks(url, parsed)); + parsedDoc.flatMap(parsed -> findCanonicalUrl(url, parsed)) + .ifPresent(canonicalLink -> doc.canonicalUrl = canonicalLink.toString()); + } + + return Optional.of(doc); + } + catch (Exception ex) { + logger.warn("Failed to process document {}", top); + } + + return Optional.empty(); + + } + + private String createHash(String documentBodyHash) { + return hashMethod.hashUnencodedChars(documentBodyHash).toString(); + } + + private Optional parseDoc(CrawledDocument doc) { + if (doc.documentBody == null) + return Optional.empty(); + return Optional.of(Jsoup.parse(doc.documentBody)); + } + + public boolean isSameDomain(EdgeUrl url) { + return domain.equals(url.domain.toString().toLowerCase()); + } + + private void findLinks(EdgeUrl url, Document parsed) { + + for (var link : parsed.getElementsByTag("a")) { + linkParser.parseLink(url, link) + .filter(this::isSameDomain) + .filter(u -> !urlBlocklist.isUrlBlocked(u)) + .filter(u -> !urlBlocklist.isForumLink(u)) + .filter(known::add) + .ifPresent(queue::addLast); + } + for (var link : parsed.getElementsByTag("frame")) { + linkParser.parseFrame(url, link) + .filter(this::isSameDomain) + .filter(u -> !urlBlocklist.isUrlBlocked(u)) + .filter(u -> !urlBlocklist.isForumLink(u)) + .filter(known::add) + .ifPresent(queue::addLast); + } + for (var link : parsed.getElementsByTag("iframe")) { + linkParser.parseFrame(url, link) + .filter(this::isSameDomain) + .filter(u -> !urlBlocklist.isUrlBlocked(u)) + .filter(u -> !urlBlocklist.isForumLink(u)) + .filter(known::add) + .ifPresent(queue::addLast); + } + } + + private Optional findCanonicalUrl(EdgeUrl url, Document parsed) { + + for (var link : parsed.select("link[rel=canonical]")) { + return linkParser.parseLink(url, link); + } + + return Optional.empty(); + } + + private String findIp(String domain) { + try { + return InetAddress.getByName(domain).getHostAddress(); + } catch (UnknownHostException e) { + return ""; + } + } + + @SneakyThrows + private void delay(long crawlDelay, long timeParsed) { + if (crawlDelay >= 1) { + if (timeParsed/1000 > crawlDelay) + return; + + Thread.sleep(Math.min(1000*crawlDelay-timeParsed, 5000)); + } + else { + if (timeParsed > DEFAULT_CRAWL_DELAY_MS) + return; + + Thread.sleep(DEFAULT_CRAWL_DELAY_MS - timeParsed); + } + } + + private CrawledDocument createRobotsError(EdgeUrl url) { + return CrawledDocument.builder() + .url(url.toString()) + .timestamp(LocalDateTime.now().toString()) + .httpStatus(-1) + .crawlerStatus(CrawlerDocumentStatus.ROBOTS_TXT.name()) + .build(); + } + + private CrawledDomain createErrorPostFromStatus(HttpFetcher.FetchResult ret) { + String ip = findIp(domain); + + if (ret.state == HttpFetcher.FetchResultState.ERROR) { + return CrawledDomain.builder() + .crawlerStatus(CrawlerDomainStatus.ERROR.name()) + .id(id).domain(domain) + .ip(ip) + .build(); + } + if (ret.state == HttpFetcher.FetchResultState.REDIRECT) { + return CrawledDomain.builder() + .crawlerStatus(CrawlerDomainStatus.REDIRECT.name()) + .id(id) + .domain(domain) + .redirectDomain(ret.domain.toString()) + .ip(ip) + .build(); + } + throw new AssertionError("Unexpected case"); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/retreival/HttpFetcher.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/retreival/HttpFetcher.java new file mode 100644 index 00000000..e3260608 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/retreival/HttpFetcher.java @@ -0,0 +1,305 @@ +package nu.marginalia.wmsa.edge.crawling.retreival; + +import com.google.inject.Inject; +import com.google.inject.name.Named; +import crawlercommons.robots.SimpleRobotRules; +import crawlercommons.robots.SimpleRobotRulesParser; +import lombok.AllArgsConstructor; +import lombok.SneakyThrows; +import lombok.ToString; +import nu.marginalia.wmsa.edge.crawler.domain.LinkParser; +import nu.marginalia.wmsa.edge.crawler.fetcher.ContentTypeParser; +import nu.marginalia.wmsa.edge.crawler.fetcher.Cookies; +import nu.marginalia.wmsa.edge.crawler.fetcher.NoSecuritySSL; +import nu.marginalia.wmsa.edge.crawling.model.CrawledDocument; +import nu.marginalia.wmsa.edge.crawling.model.CrawlerDocumentStatus; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import okhttp3.Dispatcher; +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.Response; +import org.apache.commons.io.input.BOMInputStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.net.ssl.X509TrustManager; +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.time.LocalDateTime; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.TimeUnit; +import java.util.function.Predicate; +import java.util.regex.Pattern; +import java.util.zip.GZIPInputStream; + +public class HttpFetcher { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final String userAgent; + private final int maxFetchSize = 1024*512; + private Cookies cookies = new Cookies(); + + private static final SimpleRobotRulesParser robotsParser = new SimpleRobotRulesParser(); + + private final LinkParser linkParser = new LinkParser(); + + public void setAllowAllContentTypes(boolean allowAllContentTypes) { + this.allowAllContentTypes = allowAllContentTypes; + } + + private boolean allowAllContentTypes = false; + + private final OkHttpClient client; + + public enum FetchResultState { + OK, + REDIRECT, + ERROR; + }; + + @AllArgsConstructor @ToString + public static class FetchResult { + public final FetchResultState state; + public final EdgeDomain domain; + + public boolean ok() { + return state == FetchResultState.OK; + } + }; + + @SneakyThrows + private OkHttpClient createClient(Dispatcher dispatcher) { + return new OkHttpClient.Builder() + .dispatcher(dispatcher) + .sslSocketFactory(NoSecuritySSL.buildSocketFactory(), (X509TrustManager) NoSecuritySSL.trustAllCerts[0]) + .hostnameVerifier(NoSecuritySSL.buildHostnameVerifyer()) + .cookieJar(cookies.getJar()) + .followRedirects(true) + .followSslRedirects(true) + .connectTimeout(8, TimeUnit.SECONDS) + .readTimeout(10, TimeUnit.SECONDS) + .writeTimeout(10, TimeUnit.SECONDS) + .build(); + } + + public List getCookies() { + return cookies.getCookies(); + } + + public void clearCookies() { + cookies.clear(); + } + + @Inject + public HttpFetcher(@Named("user-agent") String userAgent, Dispatcher dispatcher) { + this.client = createClient(dispatcher); + this.userAgent = userAgent; + } + + @SneakyThrows + public FetchResult probeDomain(EdgeUrl url) { + var head = new Request.Builder().head().addHeader("User-agent", userAgent) + .url(new EdgeUrl(url.proto, url.domain, url.port, "/").toString()) + .build(); + + var call = client.newCall(head); + + try (var rsp = call.execute()) { + var requestUrl = rsp.request().url().toString(); + EdgeDomain requestDomain = new EdgeUrl(requestUrl).domain; + + if (!Objects.equals(requestDomain, url.domain)) { + return new FetchResult(FetchResultState.REDIRECT, requestDomain); + } + return new FetchResult(FetchResultState.OK, requestDomain); + } + catch (Exception ex) { + return new FetchResult(FetchResultState.ERROR, url.domain); + } + } + + private Request createHeadRequest(EdgeUrl url) { + return new Request.Builder().head().addHeader("User-agent", userAgent) + .url(url.toString()) + .addHeader("Accept-Encoding", "gzip") + .build(); + } + + private Request createGetRequest(EdgeUrl url) { + return new Request.Builder().get().addHeader("User-agent", userAgent) + .url(url.toString()) + .addHeader("Accept-Encoding", "gzip") + .build(); + + } + + @SneakyThrows + public CrawledDocument fetchContent(EdgeUrl url) { + if (isUrlLikeBinary(url)) { + + logger.debug("Probing suspected binary {}", url); + + var head = createHeadRequest(url); + var call = client.newCall(head); + + try (var rsp = call.execute()) { + var contentTypeHeader = rsp.header("Content-type"); + if (contentTypeHeader != null && !isAllowableContentType(contentTypeHeader)) { + return createErrorResponse(url, rsp, CrawlerDocumentStatus.BAD_CONTENT_TYPE, "Early probe failed"); + } + } + catch (Exception ex) { + return createHardErrorRsp(url, ex); + } + } + + var get = createGetRequest(url); + var call = client.newCall(get); + + + + + try (var rsp = call.execute()) { + return extractBody(url, rsp); + } + catch (Exception ex) { + return createHardErrorRsp(url, ex); + } + } + + private CrawledDocument createHardErrorRsp(EdgeUrl url, Exception why) { + return CrawledDocument.builder() + .crawlerStatus(CrawlerDocumentStatus.ERROR.toString()) + .crawlerStatusDesc(why.getClass().getSimpleName() + ": " + why.getMessage()) + .timestamp(LocalDateTime.now().toString()) + .url(url.toString()) + .build(); + } + + private CrawledDocument createErrorResponse(EdgeUrl url, Response rsp, CrawlerDocumentStatus status, String why) { + return CrawledDocument.builder() + .crawlerStatus(status.toString()) + .crawlerStatusDesc(why) + .headers(rsp.headers().toString()) + .contentType(rsp.header("Content-type")) + .timestamp(LocalDateTime.now().toString()) + .httpStatus(rsp.code()) + .url(url.toString()) + .build(); + } + + private CrawledDocument extractBody(EdgeUrl url, Response rsp) throws IOException, URISyntaxException { + + var responseUrl = new EdgeUrl(rsp.request().url().toString()); + if (!responseUrl.equals(url)) { + return createRedirectResponse(url, rsp, responseUrl); + } + + var body = rsp.body(); + if (null == body) { + return createErrorResponse(url, rsp, CrawlerDocumentStatus.ERROR, "No body"); + } + + var byteStream = body.byteStream(); + if (null == byteStream) { + return createErrorResponse(url, rsp, CrawlerDocumentStatus.ERROR, "No body"); + } + if ("gzip".equals(rsp.header("Content-encoding"))) { + byteStream = new GZIPInputStream(byteStream); + } + byteStream = new BOMInputStream(byteStream); + + var contentTypeHeader = rsp.header("Content-type"); + if (contentTypeHeader != null && !isAllowableContentType(contentTypeHeader)) { + return createErrorResponse(url, rsp, CrawlerDocumentStatus.BAD_CONTENT_TYPE, ""); + } + + byte[] data = byteStream.readNBytes(maxFetchSize); + + var contentType = ContentTypeParser.parse(contentTypeHeader, data); + if (!isAllowableContentType(contentType.contentType)) { + return createErrorResponse(url, rsp, CrawlerDocumentStatus.BAD_CONTENT_TYPE, ""); + } + + if ("Shift_JIS".equalsIgnoreCase(contentType.charset)) { + return createErrorResponse(url, rsp, CrawlerDocumentStatus.BAD_CHARSET, ""); + } + + var strData = new String(data, Charset.forName(contentType.charset)); + var canonical = rsp.header("rel=canonical", ""); + + return CrawledDocument.builder() + .crawlerStatus(CrawlerDocumentStatus.OK.name()) + .headers(rsp.headers().toString()) + .contentType(rsp.header("Content-type")) + .timestamp(LocalDateTime.now().toString()) + .canonicalUrl(canonical) + .httpStatus(rsp.code()) + .url(url.toString()) + .documentBody(strData) + .build(); + } + + private CrawledDocument createRedirectResponse(EdgeUrl url, Response rsp, EdgeUrl responseUrl) { + + return CrawledDocument.builder() + .crawlerStatus(CrawlerDocumentStatus.REDIRECT.name()) + .redirectUrl(responseUrl.toString()) + .headers(rsp.headers().toString()) + .contentType(rsp.header("Content-type")) + .timestamp(LocalDateTime.now().toString()) + .httpStatus(rsp.code()) + .url(url.toString()) + .build(); + + } + + + private final Predicate probableHtmlPattern = Pattern.compile("^.*\\.(htm|html|php|txt)(\\?.*)?$").asPredicate(); + private final Predicate probableBinaryPattern = Pattern.compile("^.*\\.[a-z]+$").asPredicate(); + + public boolean isUrlLikeBinary(EdgeUrl url) { + String urlString = url.toString().toLowerCase(); + + return (!probableHtmlPattern.test(urlString) && probableBinaryPattern.test(urlString)); + } + + private boolean isAllowableContentType(String contentType) { + return allowAllContentTypes || contentType.startsWith("text") + || contentType.startsWith("application/xhtml") + || contentType.startsWith("application/xml") + || contentType.startsWith("application/atom+xml") + || contentType.startsWith("application/rss+xml") + || contentType.startsWith("application/x-rss+xml") + || contentType.startsWith("application/rdf+xml") + || contentType.startsWith("x-rss+xml"); + } + + public SimpleRobotRules fetchRobotRules(EdgeDomain domain) { + return fetchRobotsForProto("https", domain) + .or(() -> fetchRobotsForProto("http", domain)) + .orElseGet(() -> new SimpleRobotRules(SimpleRobotRules.RobotRulesMode.ALLOW_ALL)); + } + + private Optional fetchRobotsForProto(String proto, EdgeDomain domain) { + try { + var url = new EdgeUrl(proto, domain, null, "/robots.txt"); + return Optional.of(parseRobotsTxt(fetchContent(url))); + } + catch (Exception ex) { + return Optional.empty(); + } + } + + private SimpleRobotRules parseRobotsTxt(CrawledDocument doc) { + return robotsParser.parseContent(doc.url, + doc.documentBody.getBytes(StandardCharsets.UTF_8), + doc.contentType, + userAgent); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDao.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDao.java new file mode 100644 index 00000000..9b1ea05e --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDao.java @@ -0,0 +1,65 @@ +package nu.marginalia.wmsa.edge.data.dao; + +import com.google.inject.ImplementedBy; +import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainLink; +import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlVisit; +import nu.marginalia.wmsa.edge.model.search.EdgeUrlDetails; +import nu.marginalia.wmsa.edge.search.BrowseResult; + +import java.util.Collection; +import java.util.List; +import java.util.Optional; + +@ImplementedBy(EdgeDataStoreDaoImpl.class) +public interface EdgeDataStoreDao { + void putUrl(double quality, EdgeUrl... url); + void putFeeds(EdgeUrl... url); + + void putUrlVisited(EdgeUrlVisit... urls); + + void putLink(boolean wipeExisting, EdgeDomainLink... links); + boolean isBlacklisted(EdgeDomain domain); + + EdgeId getDomainId(EdgeDomain domain); + EdgeId getUrlId(EdgeUrl domain); + EdgeUrl getUrl(EdgeId id); + EdgeUrlDetails getUrlDetails(EdgeId id); + + List getDomainNeighbors(EdgeId domainId, EdgeDomainBlacklist backlist, int count); + List getDomainNeighborsAdjacent(EdgeId domainId, EdgeDomainBlacklist backlist, int count); + List getRandomDomains(int count, EdgeDomainBlacklist backlist); + List getUrlDetailsMulti(List> ids); + List> getDomainIdsFromUrlIds(Collection> urlIds); + + + EdgeDomain getDomain(EdgeId id); + + List> inboudUrls(EdgeId id, int limit); + List> outboundUrls(EdgeId id, int limit); + + Optional> resolveAmbiguousDomain(String name); + + void putDomainAlias(EdgeDomain src, EdgeDomain dst); + + int getPagesKnown(EdgeId domainId); + int getPagesVisited(EdgeId domainId); + int getPagesIndexed(EdgeId domainId); + + int getIncomingLinks(EdgeId domainId); + int getOutboundLinks(EdgeId domainId); + + double getDomainQuality(EdgeId domainId); + + EdgeDomainIndexingState getDomainState(EdgeId domainId); + + List getLinkingDomains(EdgeId domainId); + + List getNewUrls(EdgeId domainId, Collection links); + + double getRank(EdgeId domainId); + + void updateDomainIndexTimestamp(EdgeDomain domain, EdgeDomainIndexingState state, EdgeDomain alias, int minIndexed); +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java new file mode 100644 index 00000000..58870ffa --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java @@ -0,0 +1,1180 @@ +package nu.marginalia.wmsa.edge.data.dao; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.util.concurrent.UncheckedExecutionException; +import com.google.inject.Inject; +import com.zaxxer.hikari.HikariDataSource; +import edu.stanford.nlp.parser.lexparser.Edge; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.crawler.domain.UrlsCache; +import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainLink; +import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlVisit; +import nu.marginalia.wmsa.edge.model.search.EdgePageScoreAdjustment; +import nu.marginalia.wmsa.edge.model.search.EdgeUrlDetails; +import nu.marginalia.wmsa.edge.search.BrowseResult; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Types; +import java.util.*; +import java.util.function.Function; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static nu.marginalia.wmsa.edge.crawler.worker.UploaderWorker.QUALITY_LOWER_BOUND_CUTOFF; + +public class EdgeDataStoreDaoImpl implements EdgeDataStoreDao { + private static final int DB_LOCK_RETRIES = 3; + + private final HikariDataSource dataSource; + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private final Cache> urlIdCache = CacheBuilder.newBuilder().maximumSize(100_000).build(); + private final Cache> domainIdCache = CacheBuilder.newBuilder().maximumSize(10_000).build(); + + private final UrlsCache URLS_INSERTED_CACHE = new UrlsCache<>(); + private final UrlsCache DOMAINS_INSERTED_CACHE = new UrlsCache<>(); + + private static final String DEFAULT_PROTOCOL = "http"; + + @Inject + public EdgeDataStoreDaoImpl(HikariDataSource dataSource) + { + this.dataSource = dataSource; + } + + + public synchronized void clearCaches() + { + urlIdCache.invalidateAll(); + domainIdCache.invalidateAll(); + URLS_INSERTED_CACHE.clear(); + DOMAINS_INSERTED_CACHE.clear(); + } + + @Override + @SneakyThrows + public void putUrl(double quality, EdgeUrl... urls) { + if (quality > 0.5) { + logger.warn("Put URL q={} {}", quality, urls); + } + + if (urls.length == 0) { + return; + } + + try (var connection = dataSource.getConnection()) { + + connection.setAutoCommit(false); + + for (int i = 0; i < DB_LOCK_RETRIES; i++) { + try { + var domains = Arrays.stream(urls) + .map(EdgeUrl::getDomain) + .distinct().toArray(EdgeDomain[]::new); + + insert(connection, domains, quality); + insert(connection, urls); + connection.commit(); + break; + } catch (Exception ex) { + logger.error("DB error", ex); + connection.rollback(); + } finally { + connection.setAutoCommit(true); + } + } + } + } + + + @Override + @SneakyThrows + public void putFeeds(EdgeUrl... urls) { + if (urls.length == 0) { + return; + } + + try (var connection = dataSource.getConnection()) { + connection.setAutoCommit(false); + + for (int i = 0; i < DB_LOCK_RETRIES; i++) { + try { + insertFeed(connection, urls); + connection.commit(); + break; + } catch (Exception ex) { + logger.error("DB error", ex); + connection.rollback(); + } + finally { + connection.setAutoCommit(true); + } + } + } + } + + @Override + @SneakyThrows + public void putUrlVisited(EdgeUrlVisit... urls) { + if (urls.length == 0) { + return; + } + + try (var connection = dataSource.getConnection()) { + + connection.setAutoCommit(false); + + for (int i = 0; i < DB_LOCK_RETRIES; i++) { + try { + insert(connection, Arrays.stream(urls).map(url -> url.getUrl().domain).toArray(EdgeDomain[]::new), Optional.ofNullable(urls[0].quality).orElse(-2.)); + visited(connection, urls); + connection.commit(); + break; + } catch (Exception ex) { + logger.error("DB error", ex); + connection.rollback(); + } finally { + connection.setAutoCommit(true); + } + } + } + } + + @SneakyThrows + private void insert(Connection connection, EdgeUrl[] urls) { + + EdgeUrl[] toCommitUrls = Arrays + .stream(urls) + .filter(URLS_INSERTED_CACHE::isMissing) + .sorted(Comparator.comparing(url -> url.toString().length())) + .distinct() + .toArray(EdgeUrl[]::new); + + int size = 0; + try (var stmt = + connection.prepareStatement("INSERT IGNORE INTO EC_URL (URL, DOMAIN_ID, PROTO, PORT) SELECT ? AS URL, ID, ?, ? AS DOMAIN_ID FROM EC_DOMAIN WHERE URL_PART=?")) { + for (var url : toCommitUrls) { + logger.trace("insert({})", url); + + if (url.path.length() > 255) { + logger.warn("(insert) URL too long: {}", url); + continue; + } + + stmt.setString(1, url.path); + stmt.setString(2, url.proto); + if (url.port != null) { + stmt.setInt(3, url.port); + } + else { + stmt.setNull(3, Types.INTEGER); + } + stmt.setString(4, url.domain.toString()); + stmt.addBatch(); + if ((++size % 100) == 0) { + int[] status = stmt.executeBatch(); + checkExecuteStatus("insert", status); + } + } + int[] status = stmt.executeBatch(); + checkExecuteStatus("insert", status); + + URLS_INSERTED_CACHE.addAll(toCommitUrls); + } + + } + + @SneakyThrows + private void visited(Connection connection, EdgeUrlVisit[] visits) { + int size = 0; + + try (var stmt = + connection.prepareStatement( + "UPDATE EC_URL INNER JOIN EC_DOMAIN ON DOMAIN_ID=EC_DOMAIN.ID " + + "SET QUALITY_MEASURE=?, DATA_HASH=?, IP=?, EC_URL.STATE=?, VISITED=TRUE " + + " WHERE URL_PART=? AND URL=?")) { + for (var visit : visits) { + logger.trace("(visit) insert({})", visit); + + if (visit.url.path.length() > 255) { + logger.warn("URL too long: {}", visit.url); + continue; + } + + if (visit.quality != null) { + stmt.setDouble(1, visit.quality); + } else { + stmt.setNull(1, Types.DOUBLE); + } + + if (visit.data_hash_code != null) { + stmt.setInt(2, visit.data_hash_code); + } else { + stmt.setNull(2, Types.INTEGER); + } + + stmt.setString(3, visit.ipAddress); + stmt.setString(4, visit.urlState.toString()); + stmt.setString(5, visit.url.domain.toString()); + stmt.setString(6, visit.url.path); + stmt.addBatch(); + if ((++size % 100) == 0) { + int[] status = stmt.executeBatch(); + checkExecuteStatus("set-visited", status); + } + } + var status = stmt.executeBatch(); + + checkExecuteStatus("set-visited", status); + } + + try (var stmt = + connection.prepareStatement("REPLACE INTO EC_PAGE_DATA (ID, TITLE, DESCRIPTION, WORDS_DISTINCT, WORDS_TOTAL, FORMAT, FEATURES) SELECT ID, ?,?,?,?,?,? FROM EC_URL_VIEW WHERE URL_DOMAIN=? AND URL_PATH=? AND URL_PROTO=? AND IFNULL(URL_PORT,-1)=IFNULL(?,-1)")) { + for (var visit : visits) { + + + if (visit.title != null) { + stmt.setString(1, StringUtils.truncate(visit.title, 255)); + } + else { + stmt.setNull(1, Types.VARCHAR); + } + + if (visit.description != null) { + stmt.setString(2, StringUtils.truncate(visit.description, 255)); + } + else { + stmt.setNull(2, Types.VARCHAR); + } + + stmt.setInt(3, visit.wordCountDistinct); + stmt.setInt(4, visit.wordCountTotal); + stmt.setString(5, visit.format); + stmt.setInt(6, visit.features); + stmt.setString(7, visit.url.domain.toString()); + stmt.setString(8, visit.url.path); + stmt.setString(9, visit.url.proto); + if (visit.url.port == null) { + stmt.setNull(10, Types.INTEGER); + } else { + stmt.setInt(10, visit.url.port); + } + stmt.addBatch(); + } + var status = stmt.executeBatch(); + checkExecuteStatus("set-visited2", status); + + } + } + + private void checkExecuteStatus(String operation, int[] status) { + } + + @SneakyThrows + private void insertFeed(Connection connection, EdgeUrl[] urls) { + + int size = 0; + try (var stmt = + connection.prepareStatement("INSERT IGNORE INTO EC_FEED_URL (URL, DOMAIN_ID, PROTO, PORT) SELECT ? AS URL, ID, ?, ? AS DOMAIN_ID FROM EC_DOMAIN WHERE URL_PART=?")) { + for (var url : urls) { + logger.trace("insert({})", url); + + if (url.path.length() > 255) { + logger.warn("(insert) URL too long: {}", url); + continue; + } + + stmt.setString(1, url.path); + stmt.setString(2, url.proto); + if (url.port != null) { + stmt.setInt(3, url.port); + } + else { + stmt.setNull(3, Types.INTEGER); + } + stmt.setString(4, url.domain.toString()); + stmt.addBatch(); + if ((++size % 100) == 0) { + int[] status = stmt.executeBatch(); + checkExecuteStatus("insert", status); + } + } + int[] status = stmt.executeBatch(); + checkExecuteStatus("insert", status); + } + + } + @SneakyThrows + private void insert(Connection connection, EdgeDomain[] domains, double quality) { + EdgeDomain[] toCommitDomains = Arrays.stream(domains).filter(DOMAINS_INSERTED_CACHE::isMissing).distinct().toArray(EdgeDomain[]::new); + + try (var stmt = + connection.prepareStatement("INSERT IGNORE INTO EC_TOP_DOMAIN (URL_PART) VALUES (?)")) { + for (var domain : toCommitDomains) { + stmt.setString(1, domain.getDomain()); + stmt.addBatch(); + } + var status = stmt.executeBatch(); + checkExecuteStatus("insert", status); + } + + int size = 0; + + if (quality > 0.5) { + logger.warn("1 quality insert? {}", quality); + } + + try (var stmt = + connection.prepareStatement("INSERT IGNORE INTO EC_DOMAIN (URL_PART, QUALITY, QUALITY_ORIGINAL, URL_TOP_DOMAIN_ID, URL_SUBDOMAIN, RANK) SELECT ?, IFNULL(EC_DOMAIN_HISTORY.QUALITY_MEASURE*IFNULL(EC_DOMAIN_HISTORY.RANK, 1), ?), ?, EC_TOP_DOMAIN.ID, ?, IFNULL(EC_DOMAIN_HISTORY.RANK,1) FROM EC_TOP_DOMAIN LEFT JOIN EC_DOMAIN_HISTORY ON EC_DOMAIN_HISTORY.URL_PART=? WHERE EC_TOP_DOMAIN.URL_PART=?")) { + for (var domain : toCommitDomains) { + logger.trace("insert({})", domain); + stmt.setString(1, domain.toString()); + stmt.setDouble(2, quality); + stmt.setDouble(3, quality); + + stmt.setString(4, domain.subDomain); + + stmt.setString(5, domain.toString()); + stmt.setString(6, domain.domain); + + stmt.addBatch(); + + if ((++size % 100) == 0) { + int[] status = stmt.executeBatch(); + checkExecuteStatus("insert", status); + } + } + var status = stmt.executeBatch(); + checkExecuteStatus("insert", status); + + DOMAINS_INSERTED_CACHE.addAll(toCommitDomains); + } + + } + + @Override + @SneakyThrows + public void putLink(boolean wipeExisting, EdgeDomainLink... links) { + if (links.length == 0) { + return; + } + + try (var connection = dataSource.getConnection()) { + connection.setAutoCommit(false); + + var domains = Arrays.stream(links).flatMap(link -> + Stream.concat(Stream.of(link.destination), + Stream.of(link.source))) + .distinct().toArray(EdgeDomain[]::new); + + for (int i = 0; i < DB_LOCK_RETRIES; i++) { + try { + insert(connection, domains, -5); + insert(connection, links, wipeExisting); + connection.commit(); + break; + + } catch (Exception ex) { + logger.error("DB error", ex); + connection.rollback(); + } finally { + connection.setAutoCommit(true); + } + } + } + } + + @SneakyThrows + @Override + public boolean isBlacklisted(EdgeDomain domain) { + + try (var connection = dataSource.getConnection()) { + try (var stmt = connection.prepareStatement("SELECT ID FROM EC_DOMAIN_BLACKLIST WHERE URL_DOMAIN=?")) { + stmt.setString(1, domain.domain); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + return true; + } else { + return false; + } + } + } + } + + @SneakyThrows + private void insert(Connection connection, EdgeDomainLink[] links, boolean wipeExisting) { + + int size = 0; + if (wipeExisting) { + try (var stmt = connection.prepareStatement("DELETE FROM EC_DOMAIN_LINK WHERE SOURCE_DOMAIN_ID=?")) { + EdgeDomain[] sources = Arrays.stream(links).map(EdgeDomainLink::getSource).distinct().toArray(EdgeDomain[]::new); + for (var source : sources) { + stmt.setInt(1, getDomainId(source).getId()); + stmt.executeUpdate(); + } + } + } + + try (var stmt = + connection.prepareStatement( + "INSERT IGNORE INTO EC_DOMAIN_LINK (SOURCE_DOMAIN_ID, DEST_DOMAIN_ID) SELECT SRC.DOMAIN_ID, DEST.DOMAIN_ID FROM (SELECT EC_DOMAIN.ID AS DOMAIN_ID FROM EC_DOMAIN WHERE EC_DOMAIN.URL_PART=?) AS SRC, (SELECT EC_DOMAIN.ID AS DOMAIN_ID FROM EC_DOMAIN WHERE EC_DOMAIN.URL_PART=?) AS DEST")) { + + for (EdgeDomainLink link : links) { + stmt.setString(1, link.source.toString()); + stmt.setString(2, link.destination.toString()); + + stmt.addBatch(); + + if ((++size % 100) == 0) { + int[] status = stmt.executeBatch(); + checkExecuteStatus("insert", status); + } + } + + var status = stmt.executeBatch(); + checkExecuteStatus("insert", status); + } + } + + @SneakyThrows + @Override + public EdgeId getDomainId(EdgeDomain domain) { + try (var connection = dataSource.getConnection()) { + + return domainIdCache.get(domain, () -> { + try (var stmt = connection.prepareStatement("SELECT ID FROM EC_DOMAIN WHERE URL_PART=?")) { + stmt.setString(1, domain.toString()); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + return new EdgeId<>(rsp.getInt(1)); + } + } + throw new NoSuchElementException(); + }); + } + catch (UncheckedExecutionException ex) { + throw ex.getCause(); + } + } + + @Override + @SneakyThrows + public EdgeId getUrlId(EdgeUrl url) { + try (var connection = dataSource.getConnection()) { + + return urlIdCache.get(url, () -> { + try (var stmt = connection.prepareStatement("SELECT ID FROM EC_URL_VIEW WHERE URL_PATH=? AND URL_DOMAIN=? AND URL_PROTO=?")) { + stmt.setString(1, url.path); + stmt.setString(2, url.domain.toString()); + stmt.setString(3, url.proto); + + var rsp = stmt.executeQuery(); + if (rsp.next()) { + return new EdgeId<>(rsp.getInt(1)); + } + } + // Lenient mode for http->https upgrades etc + try (var stmt = connection.prepareStatement("SELECT ID FROM EC_URL_VIEW WHERE URL_PATH=? AND URL_DOMAIN=?")) { + stmt.setString(1, url.path); + stmt.setString(2, url.domain.toString()); + + var rsp = stmt.executeQuery(); + if (rsp.next()) { + return new EdgeId<>(rsp.getInt(1)); + } + } + throw new NoSuchElementException(url.toString()); + }); + } + catch (UncheckedExecutionException ex) { + throw ex.getCause(); + } + } + + + @SneakyThrows + @Override + public List> getDomainIdsFromUrlIds(Collection> urlIds) { + List> results = new ArrayList<>(urlIds.size()); + + if (urlIds.isEmpty()) + return results; + + try (var connection = dataSource.getConnection()) { + + try (var stmt = connection.prepareStatement("SELECT DOMAIN_ID FROM EC_URL WHERE ID IN " + urlIds + .stream() + .map(EdgeId::getId) + .map(Object::toString) + .collect(Collectors.joining(",", "(", ")")))) + { + var rsp = stmt.executeQuery(); + while (rsp.next()) { + results.add(new EdgeId<>(rsp.getInt(1))); + } + + } + } + + return results; + } + + static Pattern badChars = Pattern.compile("[';\\\\]"); + private String saneString(String s) { + return "\'"+badChars.matcher(s).replaceAll("?")+"\'"; + } + @SneakyThrows + @Override + public EdgeUrl getUrl(EdgeId id) { + try (var connection = dataSource.getConnection()) { + + try (var stmt = connection.createStatement()) { + var rsp = stmt.executeQuery("SELECT URL_PROTO, URL_DOMAIN,URL_PORT,URL_PATH FROM EC_URL_VIEW WHERE ID=" + id.getId()); + if (rsp.next()) { + return new EdgeUrl(rsp.getString(1), new EdgeDomain(rsp.getString(2)), rsp.getInt(3), rsp.getString(4)); + } + throw new NoSuchElementException(); + } + } + } + + @SneakyThrows + @Override + public EdgeUrlDetails getUrlDetails(EdgeId id) { + try (var connection = dataSource.getConnection()) { + + try (var stmt = connection.createStatement()) { + var rsp = stmt.executeQuery("SELECT ID,URL_PROTO,URL_DOMAIN,URL_PORT,URL_PATH,TITLE,DESCRIPTION,URL_QUALITY_MEASURE,DOMAIN_QUALITY_MEASURE,IFNULL(EC_DOMAIN_LINK_AGGREGATE.LINKS,1),WORDS_TOTAL,FORMAT,FEATURES,\"\",QUALITY_RAW,DOMAIN_STATE,DATA_HASH FROM EC_URL_VIEW LEFT JOIN EC_DOMAIN_LINK_AGGREGATE ON EC_DOMAIN_LINK_AGGREGATE.DOMAIN_ID=EC_URL_VIEW.DOMAIN_ID WHERE ID=" + id.getId()); + if (rsp.next()) { + EdgeUrl url = new EdgeUrl(rsp.getString(2), new EdgeDomain(rsp.getString(3)), rsp.getInt(4), rsp.getString(5)); + return new EdgeUrlDetails(rsp.getInt(1), url, rsp.getString(6), rsp.getString(7), rsp.getDouble(8), rsp.getDouble(15), rsp.getDouble(9), rsp.getInt(10), rsp.getInt(11), rsp.getString(12), rsp.getInt(13), EdgePageScoreAdjustment.zero(), Integer.MAX_VALUE, Double.MAX_VALUE, rsp.getString(14), rsp.getInt(16), 0, rsp.getInt(17)); + } + throw new NoSuchElementException(); + } + } + } + + + @SneakyThrows + @Override + public List getUrlDetailsMulti(List> ids) { + if (ids.isEmpty()) { + return Collections.emptyList(); + } + List result = new ArrayList<>(ids.size()); + + try (var connection = dataSource.getConnection()) { + // This is SQL-injection safe, the IDs are of type int + String idString = ids.stream().map(EdgeId::getId).map(Objects::toString).collect(Collectors.joining(",", "(", ")")); + + try (var stmt = connection.prepareStatement("SELECT ID,URL_PROTO,URL_DOMAIN,URL_PORT,URL_PATH,TITLE,DESCRIPTION,URL_QUALITY_MEASURE,DOMAIN_QUALITY_MEASURE,IFNULL(EC_DOMAIN_LINK_AGGREGATE.LINKS,1),WORDS_TOTAL,FORMAT,FEATURES,\"\",QUALITY_RAW,DOMAIN_STATE,DATA_HASH FROM EC_URL_VIEW LEFT JOIN EC_DOMAIN_LINK_AGGREGATE ON EC_DOMAIN_LINK_AGGREGATE.DOMAIN_ID=EC_URL_VIEW.DOMAIN_ID WHERE ID IN " + idString)) { + stmt.setFetchSize(ids.size()); + + var rsp = stmt.executeQuery(); + while (rsp.next()) { + EdgeUrl url = new EdgeUrl(rsp.getString(2), new EdgeDomain(rsp.getString(3)), rsp.getInt(4), rsp.getString(5)); + var val = new EdgeUrlDetails(rsp.getInt(1), url, rsp.getString(6), rsp.getString(7), rsp.getDouble(8), rsp.getDouble(15), rsp.getDouble(9), rsp.getInt(10), rsp.getInt(11), rsp.getString(12), rsp.getInt(13), EdgePageScoreAdjustment.zero(), Integer.MAX_VALUE, Double.MAX_VALUE, rsp.getString(14), rsp.getInt(16), 0, rsp.getInt(17)); + if (val.urlQuality >= QUALITY_LOWER_BOUND_CUTOFF) { + result.add(val); + } + + } + } + } + + return result; + } + + @Override + public List getDomainNeighbors(EdgeId domainId, EdgeDomainBlacklist blacklist, int count) { + final Set domains = new HashSet<>(count*3); + + final String q = "SELECT EC_DOMAIN.ID AS NEIGHBOR_ID, URL_PART from EC_DOMAIN_NEIGHBORS INNER JOIN EC_DOMAIN ON NEIGHBOR_ID=EC_DOMAIN.ID WHERE STATE<2 AND DOMAIN_ALIAS IS NULL AND EC_DOMAIN_NEIGHBORS.DOMAIN_ID = ? ORDER BY ADJ_IDX LIMIT ?"; + + try (var connection = dataSource.getConnection()) { + try (var stmt = connection.prepareStatement(q)) { + stmt.setFetchSize(count); + stmt.setInt(1, domainId.getId()); + stmt.setInt(2, count); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + int id = rsp.getInt(1); + String domain = rsp.getString(2); + + if (!blacklist.isBlacklisted(id)) { + var url = new EdgeUrl(DEFAULT_PROTOCOL, new EdgeDomain(domain), null, "/"); + + domains.add(new BrowseResult(url, id)); + } + } + } + + final String q2 = "SELECT EC_DOMAIN.ID, URL_PART FROM EC_DOMAIN_LINK INNER JOIN EC_DOMAIN ON DEST_DOMAIN_ID=EC_DOMAIN.ID WHERE SOURCE_DOMAIN_ID=? AND STATE<2 AND DOMAIN_ALIAS IS NULL GROUP BY EC_DOMAIN.ID ORDER BY RANK ASC LIMIT ?"; + try (var stmt = connection.prepareStatement(q2)) { + + stmt.setFetchSize(count); + stmt.setInt(1, domainId.getId()); + stmt.setInt(2, count); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + int id = rsp.getInt(1); + String domain = rsp.getString(2); + + if (!blacklist.isBlacklisted(id)) { + var url = new EdgeUrl(DEFAULT_PROTOCOL, new EdgeDomain(domain), null, "/"); + + domains.add(new BrowseResult(url, id)); + } + } + } + + final String q3 = "SELECT EC_DOMAIN.ID, URL_PART FROM EC_DOMAIN_LINK INNER JOIN EC_DOMAIN ON DEST_DOMAIN_ID=EC_DOMAIN.ID WHERE DEST_DOMAIN_ID=? AND STATE<2 AND DOMAIN_ALIAS IS NULL GROUP BY EC_DOMAIN.ID ORDER BY RANK ASC LIMIT ?"; + try (var stmt = connection.prepareStatement(q3)) { + stmt.setFetchSize(count); + stmt.setInt(1, domainId.getId()); + stmt.setInt(2, count); + + var rsp = stmt.executeQuery(); + while (rsp.next()) { + int id = rsp.getInt(1); + String domain = rsp.getString(2); + + if (!blacklist.isBlacklisted(id)) { + var url = new EdgeUrl(DEFAULT_PROTOCOL, new EdgeDomain(domain), null, "/"); + + domains.add(new BrowseResult(url, id)); + } + } + } + } catch (SQLException throwables) { + throwables.printStackTrace(); + } + + + return new ArrayList<>(domains); + } + + + @Override + public List getDomainNeighborsAdjacent(EdgeId domainId, EdgeDomainBlacklist blacklist, int count) { + final Set domains = new HashSet<>(count*3); + + final String q = """ + SELECT EC_DOMAIN.ID AS NEIGHBOR_ID, URL_PART, COUNT(*) AS CNT + FROM EC_DOMAIN_NEIGHBORS + INNER JOIN EC_DOMAIN ON NEIGHBOR_ID=EC_DOMAIN.ID + INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID + INNER JOIN EC_DOMAIN_LINK ON DEST_DOMAIN_ID=EC_DOMAIN.ID + WHERE + STATE<2 + AND KNOWN_URLS<1000 + AND DOMAIN_ALIAS IS NULL + AND EC_DOMAIN_NEIGHBORS.DOMAIN_ID = ? + GROUP BY EC_DOMAIN.ID + HAVING CNT < 100 + ORDER BY ADJ_IDX + LIMIT ? + """; + + try (var connection = dataSource.getConnection()) { + try (var stmt = connection.prepareStatement(q)) { + stmt.setFetchSize(count); + stmt.setInt(1, domainId.getId()); + stmt.setInt(2, count); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + int id = rsp.getInt(1); + String domain = rsp.getString(2); + + if (!blacklist.isBlacklisted(id)) { + var url = new EdgeUrl(DEFAULT_PROTOCOL, new EdgeDomain(domain), null, "/"); + + domains.add(new BrowseResult(url, id)); + } + } + } + + if (domains.size() < count/2) { + final String q2 = """ + SELECT EC_DOMAIN.ID, URL_PART + FROM EC_DOMAIN + INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID + INNER JOIN EC_DOMAIN_LINK B ON DEST_DOMAIN_ID=EC_DOMAIN.ID + INNER JOIN EC_DOMAIN_LINK O ON O.DEST_DOMAIN_ID=EC_DOMAIN.ID + WHERE B.SOURCE_DOMAIN_ID=? + AND STATE<2 + AND KNOWN_URLS<1000 + AND DOMAIN_ALIAS IS NULL + GROUP BY EC_DOMAIN.ID + HAVING COUNT(*) < 100 ORDER BY RANK ASC LIMIT ?"""; + try (var stmt = connection.prepareStatement(q2)) { + + stmt.setFetchSize(count/2); + stmt.setInt(1, domainId.getId()); + stmt.setInt(2, count/2 - domains.size()); + var rsp = stmt.executeQuery(); + while (rsp.next() && domains.size() < count/2) { + int id = rsp.getInt(1); + String domain = rsp.getString(2); + + if (!blacklist.isBlacklisted(id)) { + var url = new EdgeUrl(DEFAULT_PROTOCOL, new EdgeDomain(domain), null, "/"); + + domains.add(new BrowseResult(url, id)); + } + } + } + } + + if (domains.size() < count/2) { + final String q3 = """ + SELECT EC_DOMAIN.ID, URL_PART + FROM EC_DOMAIN + INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID + INNER JOIN EC_DOMAIN_LINK B ON B.SOURCE_DOMAIN_ID=EC_DOMAIN.ID + INNER JOIN EC_DOMAIN_LINK O ON O.DEST_DOMAIN_ID=EC_DOMAIN.ID + WHERE B.DEST_DOMAIN_ID=? + AND STATE<2 + AND KNOWN_URLS<1000 + AND DOMAIN_ALIAS IS NULL + GROUP BY EC_DOMAIN.ID + HAVING COUNT(*) < 100 + ORDER BY RANK ASC + LIMIT ?"""; + try (var stmt = connection.prepareStatement(q3)) { + stmt.setFetchSize(count/2); + stmt.setInt(1, domainId.getId()); + stmt.setInt(2, count/2 - domains.size()); + + var rsp = stmt.executeQuery(); + while (rsp.next() && domains.size() < count/2) { + int id = rsp.getInt(1); + String domain = rsp.getString(2); + + if (!blacklist.isBlacklisted(id)) { + var url = new EdgeUrl(DEFAULT_PROTOCOL, new EdgeDomain(domain), null, "/"); + + domains.add(new BrowseResult(url, id)); + } + } + } + } + } catch (SQLException throwables) { + throwables.printStackTrace(); + } + + + return new ArrayList<>(domains); + } + + @Override + public List getRandomDomains(int count, EdgeDomainBlacklist blacklist) { + + final String q = "SELECT DOMAIN_ID,URL_PART FROM EC_RANDOM_DOMAINS INNER JOIN EC_DOMAIN ON EC_DOMAIN.ID=DOMAIN_ID WHERE STATE<2 AND DOMAIN_ALIAS IS NULL ORDER BY RAND() LIMIT ?"; + List domains = new ArrayList<>(count); + try (var conn = dataSource.getConnection()) { + try (var stmt = conn.prepareStatement(q)) { + stmt.setInt(1, count); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + int id = rsp.getInt(1); + String domain = rsp.getString(2); + + if (!blacklist.isBlacklisted(id)) { + var url = new EdgeUrl(DEFAULT_PROTOCOL, new EdgeDomain(domain), null, "/"); + + domains.add(new BrowseResult(url, id)); + } + } + } + } + catch (SQLException ex) { + logger.error("SQL error", ex); + } + return domains; + } + + @Override + @SneakyThrows + public EdgeDomain getDomain(EdgeId id) { + try (var connection = dataSource.getConnection()) { + + try (var stmt = connection.prepareStatement("SELECT URL_PART FROM EC_DOMAIN WHERE ID=?")) { + stmt.setInt(1, id.getId()); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + return new EdgeDomain(rsp.getString(1)); + } + throw new NoSuchElementException(); + } + } + } + + @Override @SneakyThrows + public List> inboudUrls(EdgeId id, int limit) { + + List> ret = new ArrayList<>(); + try (var connection = dataSource.getConnection()) { + + try (var stmt = + connection.prepareStatement("SELECT SRC_URL_ID FROM EC_RELATED_LINKS_IN WHERE DEST_URL_ID=? ORDER BY SRC_URL_QUALITY DESC LIMIT ?")) { + stmt.setFetchSize(limit); + stmt.setInt(1, id.getId()); + stmt.setInt(2, limit); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + ret.add(new EdgeId<>(rsp.getInt(1))); + } + } + + } + + return ret; + } + + + @Override @SneakyThrows + public List> outboundUrls(EdgeId id, int limit) { + + List> ret = new ArrayList<>(); + try (var connection = dataSource.getConnection()) { + + try (var stmt = + connection.prepareStatement("SELECT DEST_URL_ID FROM EC_RELATED_LINKS_IN WHERE SRC_URL_ID=? ORDER BY SRC_URL_QUALITY DESC LIMIT ?")) { + stmt.setFetchSize(limit); + stmt.setInt(1, id.getId()); + stmt.setInt(2, limit); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + ret.add(new EdgeId<>(rsp.getInt(1))); + } + } + + } + + return ret; + } + + @Override + public Optional> resolveAmbiguousDomain(String name) { + try (var connection = dataSource.getConnection()) { + try (var stmt = connection.prepareStatement("SELECT IFNULL(DOMAIN_ALIAS,ID) FROM EC_DOMAIN WHERE URL_PART=?")) { + stmt.setString(1, name); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + return Optional.of(new EdgeId<>(rsp.getInt(1))); + } + } + + try (var stmt = connection.prepareStatement("SELECT IFNULL(DOMAIN_ALIAS,ID) FROM EC_DOMAIN WHERE URL_PART=?")) { + stmt.setString(1, "https://"+name); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + return Optional.of(new EdgeId<>(rsp.getInt(1))); + } + } + + try (var stmt = connection.prepareStatement("SELECT IFNULL(DOMAIN_ALIAS,ID) FROM EC_DOMAIN WHERE URL_PART=?")) { + stmt.setString(1, "http://"+name); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + return Optional.of(new EdgeId<>(rsp.getInt(1))); + } + } + + try (var stmt = connection.prepareStatement("SELECT IFNULL(DOMAIN_ALIAS,ID) FROM EC_DOMAIN WHERE URL_PART=?")) { + stmt.setString(1, "https://www."+name); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + return Optional.of(new EdgeId<>(rsp.getInt(1))); + } + } + + try (var stmt = connection.prepareStatement("SELECT IFNULL(DOMAIN_ALIAS,ID) FROM EC_DOMAIN WHERE URL_PART=?")) { + stmt.setString(1, "http://www."+name); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + return Optional.of(new EdgeId<>(rsp.getInt(1))); + } + } + + } catch (SQLException throwables) { + logger.info("Could not resolve domain id for {}", name); + } + + return Optional.empty(); + } + + @SneakyThrows + @Override + public void putDomainAlias(EdgeDomain src, EdgeDomain dst) { + try (var connection = dataSource.getConnection()) { + + for (int i = 0; i < DB_LOCK_RETRIES; i++) { + connection.setAutoCommit(false); + + if (!DOMAINS_INSERTED_CACHE.contains(dst)) { + insert(connection, new EdgeDomain[] { dst }, getDomainQuality(connection, src)); + } + + try (var stmt = connection.prepareStatement("UPDATE EC_DOMAIN AS D, EC_DOMAIN AS S SET S.DOMAIN_ALIAS=D.ID WHERE S.URL_PART=? AND D.URL_PART=?")) { + stmt.setString(1, src.toString()); + stmt.setString(2, dst.toString()); + stmt.executeUpdate(); + connection.commit(); + break; + } catch (SQLException ex) { + logger.error("DB Error", ex); + connection.rollback(); + } + finally { + connection.setAutoCommit(true); + } + } + + } + + } + + @SneakyThrows + @Override + public int getPagesKnown(EdgeId domainId) { + try (var connection = dataSource.getConnection()) { + + try (var stmt = connection.prepareStatement("SELECT KNOWN_URLS FROM DOMAIN_METADATA WHERE ID=?")) { + stmt.setInt(1, domainId.getId()); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + return rsp.getInt(1); + } + } catch (Exception ex) { + logger.error("DB error", ex); + } + return 0; + } + } + + @SneakyThrows + @Override + public int getPagesVisited(EdgeId domainId) { + try (var connection = dataSource.getConnection()) { + + try (var stmt = connection.prepareStatement("SELECT VISITED_URLS FROM DOMAIN_METADATA WHERE ID=?")) { + stmt.setInt(1, domainId.getId()); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + return rsp.getInt(1); + } + } catch (Exception ex) { + logger.error("DB error", ex); + } + return 0; + } + } + + + @SneakyThrows + @Override + public int getPagesIndexed(EdgeId domainId) { + try (var connection = dataSource.getConnection()) { + + try (var stmt = connection.prepareStatement("SELECT GOOD_URLS FROM DOMAIN_METADATA WHERE ID=?")) { + stmt.setInt(1, domainId.getId()); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + return rsp.getInt(1); + } + } catch (Exception ex) { + logger.error("DB error", ex); + } + return 0; + } + } + + @SneakyThrows + @Override + public int getIncomingLinks(EdgeId domainId) { + try (var connection = dataSource.getConnection()) { + + try (var stmt = connection.prepareStatement("SELECT COUNT(ID) FROM EC_DOMAIN_LINK WHERE DEST_DOMAIN_ID=?")) { + stmt.setInt(1, domainId.getId()); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + return rsp.getInt(1); + } + } catch (Exception ex) { + logger.error("DB error", ex); + } + return 0; + } + } + @SneakyThrows + @Override + public int getOutboundLinks(EdgeId domainId) { + try (var connection = dataSource.getConnection()) { + + try (var stmt = connection.prepareStatement("SELECT COUNT(ID) FROM EC_DOMAIN_LINK WHERE SOURCE_DOMAIN_ID=?")) { + stmt.setInt(1, domainId.getId()); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + return rsp.getInt(1); + } + } catch (Exception ex) { + logger.error("DB error", ex); + } + return 0; + } + } + + @SneakyThrows + @Override + public double getDomainQuality(EdgeId domainId) { + try (var connection = dataSource.getConnection()) { + + try (var stmt = connection.prepareStatement("SELECT QUALITY FROM EC_DOMAIN WHERE ID=?")) { + stmt.setInt(1, domainId.getId()); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + return rsp.getDouble(1); + } + } catch (Exception ex) { + logger.error("DB error", ex); + } + return -5; + } + } + + @Override + public EdgeDomainIndexingState getDomainState(EdgeId domainId) { + try (var connection = dataSource.getConnection()) { + + try (var stmt = connection.prepareStatement("SELECT STATE FROM EC_DOMAIN WHERE ID=?")) { + stmt.setInt(1, domainId.getId()); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + return EdgeDomainIndexingState.fromCode(rsp.getInt(1)); + } + } catch (Exception ex) { + logger.error("DB error", ex); + } + } catch (SQLException throwables) { + throwables.printStackTrace(); + } + return EdgeDomainIndexingState.ERROR; + } + + @Override + public List getLinkingDomains(EdgeId domainId) { + try (var connection = dataSource.getConnection()) { + List results = new ArrayList<>(25); + try (var stmt = connection.prepareStatement("SELECT SOURCE_URL FROM EC_RELATED_LINKS_VIEW WHERE DEST_DOMAIN_ID=? ORDER BY SOURCE_DOMAIN_ID LIMIT 25")) { + stmt.setInt(1, domainId.getId()); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + results.add(new EdgeDomain(rsp.getString(1))); + } + return results; + } catch (Exception ex) { + logger.error("DB error", ex); + } + + } catch (SQLException throwables) { + throwables.printStackTrace(); + } + return Collections.emptyList(); + } + + @Override + public List getNewUrls(EdgeId domainId, Collection links) { + Map edgeUrlByPath = links.stream().collect(Collectors.toMap(EdgeUrl::getPath, Function.identity(), (a,b)->a)); + + try (var connection = dataSource.getConnection()) { + try (var stmt = connection.prepareStatement("SELECT URL FROM EC_URL WHERE DOMAIN_ID=?")) { + stmt.setFetchSize(500); + stmt.setInt(1, domainId.getId()); + var rs = stmt.executeQuery(); + while (rs.next()) { + edgeUrlByPath.remove(rs.getString(1)); + } + } + } + catch (Exception ex) { + return Collections.emptyList(); + } + return new ArrayList<>(edgeUrlByPath.values()); + + } + + @Override + public double getRank(EdgeId domainId) { + try (var connection = dataSource.getConnection()) { + + try (var stmt = connection.prepareStatement("SELECT IFNULL(RANK, 1) FROM EC_DOMAIN WHERE ID=?")) { + stmt.setInt(1, domainId.getId()); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + return rsp.getDouble(1); + } + } catch (Exception ex) { + logger.error("DB error", ex); + } + } catch (SQLException throwables) { + throwables.printStackTrace(); + } + return 1; + } + + @Override + public void updateDomainIndexTimestamp(EdgeDomain domain, EdgeDomainIndexingState state, EdgeDomain alias, int minIndexed) { + try (var connection = dataSource.getConnection(); + var stmt = connection.prepareStatement("UPDATE EC_DOMAIN SET INDEX_DATE=NOW(), STATE=?, DOMAIN_ALIAS=?, INDEXED=GREATEST(INDEXED,?) WHERE ID=?")) { + stmt.setInt(1, state.code); + if (null == alias) { + stmt.setNull(2, Types.INTEGER); + } + else { + stmt.setInt(2, getDomainId(alias).getId()); + } + + stmt.setInt(3, minIndexed); + stmt.setInt(4, getDomainId(domain).getId()); + stmt.executeUpdate(); + connection.commit(); + } + catch (SQLException throwables) { + logger.error("SQL error", throwables); + } + } + + @SneakyThrows + private double getDomainQuality(Connection connection, EdgeDomain src) { + try (var stmt = connection.prepareStatement("SELECT QUALITY_RAW FROM EC_DOMAIN WHERE URL_PART=?")) { + stmt.setString(1, src.toString()); + var res = stmt.executeQuery(); + + if (res.next()) { + var q = res.getDouble(1); + if (q > 0.5) { + logger.warn("gDQ({}) -> 1", src); + } + return 0; + } + } + catch (SQLException ex) { + logger.error("DB error", ex); + } + + return -5; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreTaskDao.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreTaskDao.java new file mode 100644 index 00000000..3bcfc79f --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreTaskDao.java @@ -0,0 +1,18 @@ +package nu.marginalia.wmsa.edge.data.dao; + +import com.google.inject.ImplementedBy; +import nu.marginalia.wmsa.edge.data.dao.task.EdgeDataStoreTaskDaoImpl; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import nu.marginalia.wmsa.edge.model.crawl.EdgeIndexTask; + + +@ImplementedBy(EdgeDataStoreTaskDaoImpl.class) +public interface EdgeDataStoreTaskDao { + EdgeIndexTask getIndexTask(int pass, int limit); + EdgeIndexTask getDiscoverTask(); + void finishIndexTask(EdgeDomain domain, double quality, EdgeDomainIndexingState state); + void finishBadIndexTask(EdgeDomain domain, EdgeDomainIndexingState state); + void flushOngoingJobs(); + boolean isBlocked(); +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/task/EdgeDataStoreTaskDaoImpl.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/task/EdgeDataStoreTaskDaoImpl.java new file mode 100644 index 00000000..e1bd5f6d --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/task/EdgeDataStoreTaskDaoImpl.java @@ -0,0 +1,496 @@ +package nu.marginalia.wmsa.edge.data.dao.task; + +import com.google.inject.Inject; +import com.zaxxer.hikari.HikariDataSource; +import io.prometheus.client.Gauge; +import io.reactivex.rxjava3.schedulers.Schedulers; +import lombok.AllArgsConstructor; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDaoImpl; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreTaskDao; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import nu.marginalia.wmsa.edge.model.crawl.EdgeIndexTask; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; + +import static java.sql.Connection.TRANSACTION_READ_COMMITTED; +import static java.sql.Connection.TRANSACTION_READ_UNCOMMITTED; + +public class EdgeDataStoreTaskDaoImpl implements EdgeDataStoreTaskDao { + + private final HikariDataSource dataSource; + private final Logger logger = LoggerFactory.getLogger(getClass()); + private static final Gauge wmsa_index_task_quality = Gauge.build("wmsa_discover_index_task_quality", "wmsa_discover_index_task_quality").labelNames("depth").register(); + + private final LinkedBlockingQueue discoverDomainQueue = new LinkedBlockingQueue<>(200); + private final LinkedBlockingQueue indexDomainQueue1 = new LinkedBlockingQueue<>(200); + + private final EdgeDomainBlacklist blacklist; + private final EdgeDataStoreTaskTuner taskQueryTuner; + private final EdgeDataStoreDaoImpl baseDao; + private final EdgeDataStoreTaskOngoingJobs ongoingJobs; + private final EdgeFinishTasksQueue finishTasksQueue; + private final Initialization initialization; + private final Semaphore taskFetchSem = new Semaphore(3, true); + private final LinkedBlockingDeque blockingJobs = new LinkedBlockingDeque<>(); + + + @Inject + public EdgeDataStoreTaskDaoImpl(HikariDataSource dataSource, + EdgeDomainBlacklist blacklist, + EdgeDataStoreTaskTuner taskQueryTuner, + EdgeDataStoreTaskOngoingJobs ongoingJobs, + EdgeFinishTasksQueue finishTasksQueue, + Initialization initialization) + { + this.dataSource = dataSource; + baseDao = new EdgeDataStoreDaoImpl(dataSource); + this.blacklist = blacklist; + this.taskQueryTuner = taskQueryTuner; + this.ongoingJobs = ongoingJobs; + this.finishTasksQueue = finishTasksQueue; + this.initialization = initialization; + + Schedulers.io().schedulePeriodicallyDirect(this::repopulateUrlLinkDensity, 7, 360, TimeUnit.MINUTES); +// Schedulers.io().schedulePeriodicallyDirect(this::blacklistLinkfarms, 60, 600, TimeUnit.SECONDS); + + var updateDiscoverQueue = new Thread(this::updateDiscoverQueue, "UpdateDiscoverQueue"); + updateDiscoverQueue.setDaemon(true); + updateDiscoverQueue.start(); + + var updateIndexQueue = new Thread(this::updateIndexQueue, "UpdateIndexQueue"); + updateIndexQueue.setDaemon(true); + updateIndexQueue.start(); + + } + + @Override + public boolean isBlocked() { + return !blockingJobs.isEmpty(); + } + + + @SneakyThrows + private void blacklistLinkfarms() { + try (var connection = dataSource.getConnection()) { + connection.setAutoCommit(false); + + List ids = new ArrayList<>(1000); + + try (var stmt = connection.prepareStatement("SELECT SQL_BUFFER_RESULT URL_TOP_DOMAIN_ID from EC_DOMAIN USE INDEX(EC_DOMAIN_ID_INDEXED_INDEX) WHERE INDEXED>=1 GROUP BY URL_TOP_DOMAIN_ID HAVING COUNT(ID)>100")) { + connection.setTransactionIsolation(TRANSACTION_READ_UNCOMMITTED); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + ids.add(rsp.getInt(1)); + } + } + catch (Exception ex) { + logger.error("DB error", ex); + return; + } + finally { + connection.setTransactionIsolation(TRANSACTION_READ_COMMITTED); + } + + try (var stmt = connection.prepareStatement("UPDATE EC_TOP_DOMAIN SET ALIVE=0 WHERE ID=?")) { + for (int id : ids) { + stmt.setInt(1, id); + stmt.addBatch(); + } + stmt.executeBatch(); + connection.commit(); + } + catch (Exception ex) { + logger.error("DB error", ex); + connection.rollback(); + } + } + } + + public synchronized void clearCaches() + { + ongoingJobs.clear(); + } + + + @SneakyThrows + private EdgeIndexTask updateIndexQueue() { + + List fetchedDomains = new ArrayList<>(100); + + initialization.waitReady(); + + for (;;) { + if (!blockingJobs.isEmpty()) { + Thread.sleep(1000); + continue; + } + + try (var connection = dataSource.getConnection()) { + try (var stmt = + connection.prepareStatement("SELECT ID,URL_PART,IFNULL(RANK,1) FROM EC_DOMAIN USE INDEX(EC_DOMAIN_TRIO) WHERE DOMAIN_ALIAS IS NULL AND STATE=0 AND QUALITY > ? AND INDEXED = 1 ORDER BY QUALITY DESC LIMIT 100")) { + + stmt.setDouble(1, taskQueryTuner.getIndexQualityLimit()); + stmt.setFetchSize(100); + + var rsp = stmt.executeQuery(); + + while (rsp.next()) { + int domainId = rsp.getInt(1); + var domain = new EdgeDomain(rsp.getString(2)); + + if (blacklist.isBlacklisted(domainId)) { + finishBadIndexTask(domain, EdgeDomainIndexingState.BLOCKED); + continue; + } + + if (!ongoingJobs.isOngoing(domain)) { + fetchedDomains.add(new EdgeDomainDiscoverTask(domain, rsp.getInt(1), rsp.getDouble(3))); + } + } + } catch (Exception ex) { + logger.error("DB error", ex); + } + } + + indexDomainQueue1.removeIf(d -> ongoingJobs.isOngoing(d.domain)); + + for (var d : fetchedDomains) { + if (!blacklist.isBlacklisted(d.id)) { + if (!indexDomainQueue1.contains(d) && !ongoingJobs.isOngoing(d.domain)) { + indexDomainQueue1.put(d); + } + } + else { + finishBadIndexTask(d.domain, EdgeDomainIndexingState.BLOCKED); + } + } + fetchedDomains.clear(); + } + } + + + @Override + @SneakyThrows + public EdgeIndexTask getIndexTask(int pass, int limit) { + + if (!blockingJobs.isEmpty()) { + return new EdgeIndexTask(null, 0, limit, 1.); + } + boolean acquired = taskFetchSem.tryAcquire(); + if (!acquired) { + return new EdgeIndexTask(null, 0, 1, 1.); + } + + try { + + if (pass == 1) { + var task = tryGetIndexTask(0, pass, limit); + if (task.isPresent()) { + return task.get(); + } + } + + try (var connection = dataSource.getConnection()) { + + for (double adj = 1; adj < 10; adj *= 1.5) { + try (var stmt = + connection.prepareStatement("SELECT ID,URL_PART,INDEXED,QUALITY,IFNULL(RANK,1) FROM EC_DOMAIN USE INDEX(EC_DOMAIN_TRIO) WHERE DOMAIN_ALIAS IS NULL AND STATE=0 AND QUALITY > ? AND INDEXED > ? AND INDEXED <= ? ORDER BY QUALITY DESC LIMIT 100")) { + stmt.setFetchSize(100); + stmt.setDouble(1, taskQueryTuner.getIndexQualityLimit() - (adj - 1) - Math.random()); + stmt.setInt(2, pass / 10); + stmt.setInt(3, pass); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + var domain = new EdgeDomain(rsp.getString(2)); + int domainId = rsp.getInt(1); + + if (blacklist.isBlacklisted(domainId)) { + finishBadIndexTask(domain, EdgeDomainIndexingState.BLOCKED); + continue; + } + + if (ongoingJobs.add(domain)) { + var task = getUrlsForIndexTask(domain, domainId, rsp.getInt(3), limit, rsp.getDouble(5)); + + if (task.isEmpty()) { + finishBadIndexTask(domain, EdgeDomainIndexingState.EXHAUSTED); + } else { + wmsa_index_task_quality + .labels(String.format("%02d", pass)) + .set(rsp.getDouble(4)); + + return task; + } + } + } + } catch (Exception ex) { + logger.error("DB error", ex); + } + } + + return new EdgeIndexTask(null, 0, limit, 1.); + } + } + finally { + taskFetchSem.release(); + } + } + + private Optional tryGetIndexTask(int attempt, int pass, int limit) { + if (attempt > 5) { + return Optional.of(new EdgeIndexTask(null, 1, limit, 1.)); + } + + var t = indexDomainQueue1.poll(); + + if (t != null) { + if (!validateIndexedState(t.id, 1)) { + return tryGetIndexTask(attempt + 1, pass, limit); + } + if (!ongoingJobs.add(t.domain)) { + return tryGetIndexTask(attempt + 1, pass, limit); + } + + var task = getUrlsForIndexTask(t.domain, t.id, pass, limit, t.rank); + + if (task.isEmpty()) { + finishBadIndexTask(t.domain, EdgeDomainIndexingState.EXHAUSTED); + return tryGetIndexTask(attempt + 1, pass, limit); + } + return Optional.of(task); + } + return Optional.of(new EdgeIndexTask(null, 1, limit, 1.)); + } + + + @SneakyThrows + private EdgeIndexTask updateDiscoverQueue() { + + List fetchedDomains = new ArrayList<>(100); + + initialization.waitReady(); + + for (;;) { + if (!blockingJobs.isEmpty()) { + Thread.sleep(1000); + continue; + } + + try (var connection = dataSource.getConnection()) { + try (var stmt = + connection.prepareStatement("SELECT EC_DOMAIN.ID,EC_DOMAIN.URL_PART,IFNULL(RANK, 1) FROM EC_DOMAIN USE INDEX(EC_DOMAIN_TRIO) INNER JOIN EC_TOP_DOMAIN ON EC_TOP_DOMAIN.ID=URL_TOP_DOMAIN_ID WHERE DOMAIN_ALIAS IS NULL AND STATE=0 AND QUALITY > ? AND INDEXED = 0 AND ALIVE = 1 ORDER BY QUALITY DESC LIMIT 100")) { + + stmt.setDouble(1, taskQueryTuner.getDiscoverQualityLimit()); + stmt.setFetchSize(100); + + var rsp = stmt.executeQuery(); + + while (rsp.next()) { + var domain = new EdgeDomain(rsp.getString(2)); + + if (!ongoingJobs.isOngoing(domain)) { + fetchedDomains.add(new EdgeDomainDiscoverTask(domain, rsp.getInt(1), rsp.getDouble(3))); + } + } + } catch (Exception ex) { + logger.error("DB error", ex); + } + } + + discoverDomainQueue.removeIf(d -> ongoingJobs.isOngoing(d.domain)); + + for (var d : fetchedDomains) { + if (!blacklist.isBlacklisted(d.id)) { + if (!discoverDomainQueue.contains(d) && !ongoingJobs.isOngoing(d.domain)) { + discoverDomainQueue.put(d); + } + } + else { + finishIndexTask(d.domain, -1000, EdgeDomainIndexingState.BLOCKED); + } + } + fetchedDomains.clear(); + } + } + + @Override + @SneakyThrows + public EdgeIndexTask getDiscoverTask() { + boolean acquired = taskFetchSem.tryAcquire(); + if (!acquired) { + return new EdgeIndexTask(null, 0, 1, 1.); + } + + try { + + if (!blockingJobs.isEmpty()) { + return new EdgeIndexTask(null, 0, 1, 1.); + } + + return tryGetDiscoverTask(0) + .orElseGet(() -> new EdgeIndexTask(null, 0, 1, 1.)); + } + finally { + taskFetchSem.release(); + } + } + + @SneakyThrows + private Optional tryGetDiscoverTask(int attempt) { + if (attempt > 5) { + return Optional.empty(); + } + var t = discoverDomainQueue.poll(50, TimeUnit.MILLISECONDS); + + if (t != null) { + if (!validateIndexedState(t.id, 0)) { + return tryGetDiscoverTask(attempt+1); + } + if (!ongoingJobs.add(t.domain)) { + return tryGetDiscoverTask(attempt+1); + } + + var task = getUrlsForIndexTask(t.domain, t.id, 0, 10, t.rank); + if (task.isEmpty()) { + if (task.visited.isEmpty()) { + logger.warn("No url for {}", t.domain); + var rootUrl = new EdgeUrl("https", t.domain, null, "/"); + baseDao.putUrl(-5, rootUrl); + + task.urls.add(rootUrl); + } else { + ongoingJobs.remove(t.domain); + return tryGetDiscoverTask(attempt+1); + } + } + + return Optional.of(task); + } + return Optional.of(new EdgeIndexTask(null, 0, 1, 1.)); + } + + @Override + @SneakyThrows + public void finishIndexTask(EdgeDomain domain, double quality, EdgeDomainIndexingState state) { + finishTasksQueue.add(domain, quality, state); + } + @SneakyThrows + public void finishBadIndexTask(EdgeDomain domain, EdgeDomainIndexingState state) { + finishTasksQueue.addError(domain, state); + } + + @Override + public void flushOngoingJobs() { + ongoingJobs.clear(); + } + + private void repopulateUrlLinkDensity() { + try (var connection = dataSource.getConnection(); + var stmt = connection.createStatement() + ) { + blockingJobs.push("Repopulate URL Link Density"); + logger.info("Starting link details sync"); + stmt.executeUpdate("INSERT INTO EC_DOMAIN_LINK_AGGREGATE(DOMAIN_ID,LINKS) SELECT DEST_DOMAIN_ID AS ID, 100*SUM(EXP(EC_DOMAIN.QUALITY_RAW))/SQRT(GREATEST(1,COUNT(EC_DOMAIN.ID))) AS LINKS FROM EC_DOMAIN_LINK INNER JOIN EC_DOMAIN ON EC_DOMAIN.ID=EC_DOMAIN_LINK.SOURCE_DOMAIN_ID GROUP BY EC_DOMAIN_LINK.DEST_DOMAIN_ID ON DUPLICATE KEY UPDATE LINKS=VALUES(LINKS)"); + logger.info("Finished link details sync"); + } + catch (Exception ex) { + logger.error("DB error", ex); + } + finally { + blockingJobs.pop(); + } + } + + private boolean validateIndexedState(int domainId, int expected) { + try (var connection = dataSource.getConnection()) { + + try (var stmt = + connection.prepareStatement("select INDEXED from EC_DOMAIN WHERE ID=?")) { + stmt.setInt(1, domainId); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + return rsp.getInt(1) == expected; + } + else { + return false; + } + } catch (Exception ex) { + logger.error("DB error", ex); + } + } + catch (Exception ex) { + logger.error("DB error", ex); + } + return false; + + } + + @SneakyThrows + private EdgeIndexTask getUrlsForIndexTask(EdgeDomain domain, int domainId, int pass, int limit, double rank) { + try (var connection = dataSource.getConnection()) { + + EdgeIndexTask indexTask = new EdgeIndexTask(domain, pass, limit, rank); + + try (var stmt = + connection.prepareStatement("select SQL_BUFFER_RESULT proto,url,port,visited from EC_URL USE INDEX (EC_URL_DOMAIN_ID) WHERE DOMAIN_ID=?")) { + stmt.setFetchSize(limit); + stmt.setInt(1, domainId); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + var url = new EdgeUrl(rsp.getString(1), + domain, + rsp.getInt(3), + rsp.getString(2) + ); + + if (rsp.getBoolean(4)) { + indexTask.visited.add(url.hashCode()); + } else if (indexTask.urls.size() < limit) { + indexTask.urls.add(url); + } + } + } catch (Exception ex) { + logger.error("DB error", ex); + } + return indexTask; + } + } + + +} + +@AllArgsConstructor +class EdgeDomainDiscoverTask { + public final EdgeDomain domain; + public final int id; + public final double rank; + + @Override + public int hashCode() { + return domain.hashCode(); + } + + @Override + public boolean equals(Object other) { + if (other == this) { + return true; + } + if (other instanceof EdgeDomainDiscoverTask) { + EdgeDomainDiscoverTask o = (EdgeDomainDiscoverTask)other; + return id == o.id; + } + return false; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/task/EdgeDataStoreTaskOngoingJobs.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/task/EdgeDataStoreTaskOngoingJobs.java new file mode 100644 index 00000000..ad3d4cfe --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/task/EdgeDataStoreTaskOngoingJobs.java @@ -0,0 +1,49 @@ +package nu.marginalia.wmsa.edge.data.dao.task; + +import com.google.inject.Singleton; +import io.prometheus.client.Gauge; +import io.reactivex.rxjava3.schedulers.Schedulers; +import nu.marginalia.wmsa.edge.model.EdgeDomain; + +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; + +@Singleton +public class EdgeDataStoreTaskOngoingJobs { + private final ConcurrentHashMap indexingDomains = new ConcurrentHashMap<>(); + private static final long STALE_JOB_TIMEOUT = 30*60; + + private static final Gauge wmsa_director_ongoing_jobs = Gauge.build("wmsa_director_ongoing_jobs", + "wmsa_director_ongoing_jobs").register(); + + public EdgeDataStoreTaskOngoingJobs() { + Schedulers.computation().schedulePeriodicallyDirect(this::purgeOngoingJobs, 60, 60, TimeUnit.SECONDS); + } + + + private void purgeOngoingJobs() { + final long now = System.currentTimeMillis(); + + indexingDomains + .entrySet() + .removeIf(e -> (now - e.getValue()) / 1000 >= STALE_JOB_TIMEOUT); + + wmsa_director_ongoing_jobs.set(indexingDomains.size()); + } + + public boolean isOngoing(EdgeDomain domain) { + return indexingDomains.containsKey(domain.getDomainKey()); + } + + public boolean add(EdgeDomain job) { + return indexingDomains.putIfAbsent(job.getDomainKey(), System.currentTimeMillis()) == null; + } + + public void clear() { + indexingDomains.clear(); + } + + public void remove(EdgeDomain domain) { + indexingDomains.remove(domain.getDomainKey()); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/task/EdgeDataStoreTaskTuner.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/task/EdgeDataStoreTaskTuner.java new file mode 100644 index 00000000..1e7da96d --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/task/EdgeDataStoreTaskTuner.java @@ -0,0 +1,142 @@ +package nu.marginalia.wmsa.edge.data.dao.task; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import com.zaxxer.hikari.HikariDataSource; +import io.prometheus.client.Gauge; +import io.prometheus.client.Histogram; +import io.reactivex.rxjava3.schedulers.Schedulers; +import lombok.SneakyThrows; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.PreparedStatement; +import java.util.concurrent.TimeUnit; + +@Singleton +public class EdgeDataStoreTaskTuner { + + private static final Gauge wmsa_discover_queue_discover_quality_limit = Gauge.build("wmsa_discover_queue_discover_quality_limit", + "wmsa_discover_queue_discover_quality_limit").register(); + private static final Gauge wmsa_discover_queue_index_quality_limit = Gauge.build("wmsa_discover_queue_index_quality_limit", + "wmsa_discover_queue_index_quality_limit").register(); + private static final Gauge wmsa_discover_queue_discover_quality_pool_size = Gauge.build("wmsa_discover_queue_discover_quality_pool_size", + "wmsa_discover_queue_discover_quality_pool_size").register(); + private static final Gauge wmsa_discover_queue_index_quality_pool_size = Gauge.build("wmsa_discover_queue_index_quality_pool_size", + "wmsa_discover_queue_index_quality_pool_size").register(); + private static final Histogram wmsa_discover_queue_tune_time = Histogram.build("wmsa_discover_queue_tune_time", + "wmsa_discover_queue_tune_time").register(); + private static final int INDEX_TARGET = 50; + private static final int DISCOVER_TARGET = 100; + + + private volatile double discoverQualityLimit = -2.; + private volatile double indexQualityLimit = -2.; + + private final HikariDataSource dataSource; + private Logger logger = LoggerFactory.getLogger(getClass()); + + @Inject + public EdgeDataStoreTaskTuner(HikariDataSource dataSource) { + this.dataSource = dataSource; + + Schedulers.io().schedulePeriodicallyDirect(this::tuneDiscoverQualityLimit, 1, 5, TimeUnit.SECONDS); + } + + public double getIndexQualityLimit() { + return indexQualityLimit; + } + public double getDiscoverQualityLimit() { + return discoverQualityLimit; + } + + @SneakyThrows + private double linearSearchBounds(PreparedStatement stmt, + int target, + double delta, + double min, + double max) { + for (double i = max; i >= min; i-=delta) { + int cnt; + stmt.setDouble(1, i); + try (var rsp = stmt.executeQuery()) { + rsp.next(); + cnt = rsp.getInt(1); + } + if (cnt >= target) { + return i; + } + } + + return min; + } + @SneakyThrows + private double binarySearchBounds(PreparedStatement stmt, + int target, + double eps, + double min, + double max) { + while (max - min >= eps) { + double v = (max + min)/2; + stmt.setDouble(1, v); + int cnt; + try (var rsp = stmt.executeQuery()) { + rsp.next(); + cnt = rsp.getInt(1); + } + + if (cnt == target) { + return v; + } else if (cnt > target) { + min = v; + } else { + max = v; + } + } + return min; + } + + @SneakyThrows + private void tuneDiscoverQualityLimit() { + var timer = wmsa_discover_queue_tune_time.startTimer(); + + + try (var connection = dataSource.getConnection()) { + + double delta = 0.1; + double epsilon = 0.000001; + try (var stmt = + connection.prepareStatement("SELECT COUNT(EC_DOMAIN.ID) FROM EC_DOMAIN USE INDEX(EC_DOMAIN_TRIO) WHERE DOMAIN_ALIAS IS NULL AND STATE = 0 AND QUALITY > ? AND INDEXED = 1")) { + + double lower = linearSearchBounds(stmt, INDEX_TARGET, delta, -100, 0); + indexQualityLimit = binarySearchBounds(stmt, INDEX_TARGET, epsilon, lower, lower+delta); + wmsa_discover_queue_index_quality_limit.set(indexQualityLimit); + + var rsp = stmt.executeQuery(); + rsp.next(); + wmsa_discover_queue_index_quality_pool_size.set(rsp.getInt(1)); + } + + + try (var stmt = + connection.prepareStatement("SELECT COUNT(EC_DOMAIN.ID) FROM EC_DOMAIN USE INDEX(EC_DOMAIN_TRIO) INNER JOIN EC_TOP_DOMAIN ON EC_TOP_DOMAIN.ID=URL_TOP_DOMAIN_ID WHERE ALIVE = 1 AND DOMAIN_ALIAS IS NULL AND STATE = 0 AND QUALITY > ? AND INDEXED = 0")) { + + double lower = linearSearchBounds(stmt, DISCOVER_TARGET, delta, -100, 0); + discoverQualityLimit = binarySearchBounds(stmt, DISCOVER_TARGET, epsilon, lower, lower+delta); + + wmsa_discover_queue_discover_quality_limit.set(discoverQualityLimit); + + var rsp = stmt.executeQuery(); + rsp.next(); + wmsa_discover_queue_discover_quality_pool_size.set(rsp.getInt(1)); + } + + } + catch (Exception ex) { + logger.error("Failed to tune quality limits", ex); + } + + timer.observeDuration(); + + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/task/EdgeDomainBlacklist.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/task/EdgeDomainBlacklist.java new file mode 100644 index 00000000..fa1899b1 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/task/EdgeDomainBlacklist.java @@ -0,0 +1,17 @@ +package nu.marginalia.wmsa.edge.data.dao.task; + +import com.google.inject.ImplementedBy; +import gnu.trove.set.hash.TIntHashSet; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeId; + +@ImplementedBy(EdgeDomainBlacklistImpl.class) +public interface EdgeDomainBlacklist { + boolean isBlacklisted(int domainId); + default boolean isBlacklisted(EdgeId domainId) { + return isBlacklisted(domainId.getId()); + } + default TIntHashSet getSpamDomains() { + return new TIntHashSet(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/task/EdgeDomainBlacklistImpl.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/task/EdgeDomainBlacklistImpl.java new file mode 100644 index 00000000..ea89e7fb --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/task/EdgeDomainBlacklistImpl.java @@ -0,0 +1,77 @@ +package nu.marginalia.wmsa.edge.data.dao.task; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import com.zaxxer.hikari.HikariDataSource; +import gnu.trove.set.hash.TIntHashSet; +import io.prometheus.client.Counter; +import io.reactivex.rxjava3.schedulers.Schedulers; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeId; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.concurrent.TimeUnit; + +@Singleton +public class EdgeDomainBlacklistImpl implements EdgeDomainBlacklist { + private volatile TIntHashSet spamDomainSet = new TIntHashSet(); + private final HikariDataSource dataSource; + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private static final Counter wmsa_blacklist_intercept = Counter.build("wmsa_blacklist_intercept", + "wmsa_blacklist_intercept").register(); + @Inject + public EdgeDomainBlacklistImpl(HikariDataSource dataSource) { + this.dataSource = dataSource; + + Schedulers.io().schedulePeriodicallyDirect(this::updateSpamList, 5, 600, TimeUnit.SECONDS); + + updateSpamList(); + } + + private void updateSpamList() { + try { + int oldSetSize = spamDomainSet.size(); + + spamDomainSet = getSpamDomains(); + + if (oldSetSize == 0) { + logger.info("Synchronized {} spam domains", spamDomainSet.size()); + } + } + catch (Exception ex) { + logger.error("Failed to synchronize spam domains", ex); + } + } + + + @SneakyThrows + public TIntHashSet getSpamDomains() { + final TIntHashSet result = new TIntHashSet(1_000_000); + + try (var connection = dataSource.getConnection()) { + try (var stmt = connection.prepareStatement("SELECT EC_DOMAIN.ID FROM EC_DOMAIN INNER JOIN EC_TOP_DOMAIN ON EC_DOMAIN.URL_TOP_DOMAIN_ID = EC_TOP_DOMAIN.ID INNER JOIN EC_DOMAIN_BLACKLIST ON EC_DOMAIN_BLACKLIST.URL_DOMAIN = EC_TOP_DOMAIN.URL_PART")) { + stmt.setFetchSize(1000); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + result.add(rsp.getInt(1)); + } + } + } + + return result; + } + + @Override + public boolean isBlacklisted(int domainId) { + if (spamDomainSet.contains(domainId)) { + wmsa_blacklist_intercept.inc(); + return true; + } + + return false; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/task/EdgeFinishTasksQueue.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/task/EdgeFinishTasksQueue.java new file mode 100644 index 00000000..d374eea4 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/task/EdgeFinishTasksQueue.java @@ -0,0 +1,91 @@ +package nu.marginalia.wmsa.edge.data.dao.task; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import com.zaxxer.hikari.HikariDataSource; +import lombok.AllArgsConstructor; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.concurrent.LinkedBlockingQueue; + +@Singleton +public class EdgeFinishTasksQueue { + private final HikariDataSource dataSource; + private final EdgeDataStoreTaskOngoingJobs ongoingJobs; + private final LinkedBlockingQueue finishTasksQueue = new LinkedBlockingQueue<>(10); + private final Logger logger = LoggerFactory.getLogger(getClass()); + + @Inject + public EdgeFinishTasksQueue(HikariDataSource dataSource, EdgeDataStoreTaskOngoingJobs ongoingJobs) { + this.dataSource = dataSource; + this.ongoingJobs = ongoingJobs; + + var finishIndexTasks = new Thread(this::finishIndexTasks, "FinishIndexTasks"); + finishIndexTasks.setDaemon(true); + finishIndexTasks.start(); + } + + + private void finishIndexTasks() { + for (;;) { + try (var connection = dataSource.getConnection()) { + + var task = finishTasksQueue.take(); + + connection.setAutoCommit(false); + + if (task.quality != null) { + + try (var stmt = + connection.prepareStatement("UPDATE EC_DOMAIN LEFT JOIN (SELECT DOMAIN_ID, AVG(QUALITY_MEASURE) AVGQ FROM EC_URL GROUP BY DOMAIN_ID) QUALITY ON EC_DOMAIN.ID = QUALITY.DOMAIN_ID SET INDEXED=INDEXED+1, INDEX_DATE=NOW(), QUALITY=IFNULL(AVGQ,?-INDEXED/2)*IFNULL(RANK,1), QUALITY_RAW=IFNULL(AVGQ,-5), STATE=? WHERE EC_DOMAIN.URL_PART=?")) { + stmt.setDouble(1, task.quality); + stmt.setInt(2, task.state.code); + stmt.setString(3, task.domain.toString()); + stmt.execute(); + + connection.commit(); + ongoingJobs.remove(task.domain); + } catch (Exception ex) { + logger.error("DB error", ex); + connection.rollback(); + } + } + else { + try (var stmt = + connection.prepareStatement("UPDATE EC_DOMAIN SET STATE=? WHERE URL_PART=?")) { + stmt.setInt(1, task.state.code); + stmt.setString(2, task.domain.toString()); + stmt.execute(); + + connection.commit(); + ongoingJobs.remove(task.domain); + } catch (Exception ex) { + logger.error("DB error", ex); + connection.rollback(); + } + } + } + catch (Exception ex) { + logger.error("DB error", ex); + } + } + } + + public void add(EdgeDomain domain, double quality, EdgeDomainIndexingState state) throws InterruptedException { + finishTasksQueue.put(new EdgeFinishTaskSpecs(domain, quality, state)); + } + public void addError(EdgeDomain domain, EdgeDomainIndexingState state) throws InterruptedException { + finishTasksQueue.put(new EdgeFinishTaskSpecs(domain, null, state)); + } + @AllArgsConstructor + private static class EdgeFinishTaskSpecs { + public final EdgeDomain domain; + public final Double quality; + public final EdgeDomainIndexingState state; + } + +} + diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingMain.java new file mode 100644 index 00000000..1405f981 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingMain.java @@ -0,0 +1,37 @@ +package nu.marginalia.wmsa.edge.dating; + +import com.google.inject.Guice; +import com.google.inject.Inject; +import com.google.inject.Injector; +import nu.marginalia.wmsa.configuration.MainClass; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.module.ConfigurationModule; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.configuration.server.Initialization; +import spark.Spark; + +import java.io.IOException; + +public class DatingMain extends MainClass { + DatingService service; + + @Inject + public DatingMain(DatingService service) throws IOException { + this.service = service; + } + + public static void main(String... args) { + init(ServiceDescriptor.DATING, args); + + Spark.staticFileLocation("/static/dating/"); + + Injector injector = Guice.createInjector( + new DatingModule(), + new ConfigurationModule(), + new DatabaseModule() + ); + + injector.getInstance(DatingMain.class); + injector.getInstance(Initialization.class).setReady(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingModule.java new file mode 100644 index 00000000..b92f67ba --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingModule.java @@ -0,0 +1,6 @@ +package nu.marginalia.wmsa.edge.dating; + +import com.google.inject.AbstractModule; + +public class DatingModule extends AbstractModule { +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingService.java new file mode 100644 index 00000000..406a7dfd --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingService.java @@ -0,0 +1,184 @@ +package nu.marginalia.wmsa.edge.dating; + +import com.google.inject.Inject; +import com.google.inject.name.Named; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.configuration.server.MetricsServer; +import nu.marginalia.wmsa.configuration.server.Service; +import nu.marginalia.wmsa.edge.assistant.screenshot.ScreenshotService; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao; +import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist; +import nu.marginalia.wmsa.edge.model.EdgeId; +import nu.marginalia.wmsa.edge.search.BrowseResult; +import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer; +import nu.marginalia.wmsa.renderer.mustache.RendererFactory; +import org.jetbrains.annotations.NotNull; +import spark.Request; +import spark.Response; +import spark.Spark; +import spark.resource.ClassPathResource; + +import java.io.FileNotFoundException; +import java.util.Map; +import java.util.Optional; + +public class DatingService extends Service { + private final EdgeDataStoreDao edgeDataStoreDao; + private final EdgeDomainBlacklist blacklist; + private final MustacheRenderer datingRenderer; + private final ScreenshotService screenshotService; + private final String SESSION_OBJECT_NAME = "so"; + @SneakyThrows + @Inject + public DatingService(@Named("service-host") String ip, + @Named("service-port") Integer port, + EdgeDataStoreDao edgeDataStoreDao, + RendererFactory rendererFactory, + Initialization initialization, + MetricsServer metricsServer, + EdgeDomainBlacklist blacklist, + ScreenshotService screenshotService) { + + super(ip, port, initialization, metricsServer); + + this.edgeDataStoreDao = edgeDataStoreDao; + this.blacklist = blacklist; + + datingRenderer = rendererFactory.renderer("dating/dating-view"); + this.screenshotService = screenshotService; + + Spark.get("/public/reset", this::getReset); + Spark.get("/public/", this::serveIndex); + Spark.get("/public/view", this::getCurrent); + Spark.get("/public/next", this::getNext); + Spark.get("/public/similar/:id", this::getSimilar); + Spark.get("/public/rewind", this::getRewind); + Spark.get("/public/init", this::getInitSession); + } + + @SneakyThrows + private Object serveIndex(Request request, Response response) { + try { + ClassPathResource resource = new ClassPathResource("static/dating/index.html"); + resource.getInputStream().transferTo(response.raw().getOutputStream()); + } + catch (IllegalArgumentException| FileNotFoundException ex) { + return false; + } + return ""; + } + + private Object getInitSession(Request request, Response response) { + var sessionObjectOpt = getSession(request); + if (sessionObjectOpt.isEmpty()) { + request.session(true).attribute(SESSION_OBJECT_NAME, new DatingSessionObject()); + } + response.redirect("https://explore.marginalia.nu/view"); + return ""; + } + + private String getReset(Request request, Response response) { + var sessionObjectOpt = getSession(request); + if (sessionObjectOpt.isEmpty()) { + response.redirect("https://explore.marginalia.nu/"); + return ""; + } + var session = sessionObjectOpt.get(); + session.resetQueue(); + + return getNext(request, response); + } + + private String getCurrent(Request request, Response response) { + var sessionObjectOpt = getSession(request); + if (sessionObjectOpt.isEmpty()) { + response.redirect("https://explore.marginalia.nu/"); + return ""; + } + var session = sessionObjectOpt.get(); + + var current = session.getCurrent(); + if (current == null) { + BrowseResult res = session.next(edgeDataStoreDao, blacklist); + res = findViableDomain(session, res); + session.browseForward(res); + current = session.getCurrent(); + } + + return datingRenderer.render(current, Map.of("back", session.hasHistory())); + } + + private String getNext(Request request, Response response) { + var sessionObjectOpt = getSession(request); + if (sessionObjectOpt.isEmpty()) { + response.redirect("https://explore.marginalia.nu/"); + return ""; + } + var session = sessionObjectOpt.get(); + + BrowseResult res = session.next(edgeDataStoreDao, blacklist); + + res = findViableDomain(session, res); + + session.browseForward(res); + + response.redirect("https://explore.marginalia.nu/view"); + return ""; + } + + private String getRewind(Request request, Response response) { + var sessionObjectOpt = getSession(request); + if (sessionObjectOpt.isEmpty()) { + response.redirect("https://explore.marginalia.nu/"); + return ""; + } + var session = sessionObjectOpt.get(); + + BrowseResult res = session.takeFromHistory(); + if (res == null) { + Spark.halt(404); + return ""; + } + + session.browseBackward(res); + + response.redirect("https://explore.marginalia.nu/view"); + return ""; + } + + + private String getSimilar(Request request, Response response) { + var sessionObjectOpt = getSession(request); + if (sessionObjectOpt.isEmpty()) { + response.redirect("https://explore.marginalia.nu/"); + return ""; + } + var session = sessionObjectOpt.get(); + + int id = Integer.parseInt(request.params("id")); + BrowseResult res = session.nextSimilar(new EdgeId<>(id), edgeDataStoreDao, blacklist); + + res = findViableDomain(session, res); + + session.browseForward(res); + + response.redirect("https://explore.marginalia.nu/view"); + return ""; + } + + @NotNull + private BrowseResult findViableDomain(DatingSessionObject session, BrowseResult res) { + while (!screenshotService.hasScreenshot(new EdgeId<>(res.domainId)) || session.isRecent(res)) { + res = session.next(edgeDataStoreDao, blacklist); + } + return res; + } + + + private Optional getSession(Request request) { + return Optional.ofNullable(request.session(false)) + .map(s -> s.attribute(SESSION_OBJECT_NAME)) + .map(DatingSessionObject.class::cast); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingSessionObject.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingSessionObject.java new file mode 100644 index 00000000..c67cb51d --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingSessionObject.java @@ -0,0 +1,89 @@ +package nu.marginalia.wmsa.edge.dating; + +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao; +import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeId; +import nu.marginalia.wmsa.edge.search.BrowseResult; + +import java.util.LinkedList; + +public class DatingSessionObject { + public final LinkedList queue = new LinkedList<>(); + public final LinkedList recentlyViewed = new LinkedList<>(); + private BrowseResult current; + + private static final int MAX_HISTORY_SIZE = 100; + private static final int MAX_QUEUE_SIZE = 100; + + public BrowseResult setCurrent(BrowseResult result) { + current = result; + return current; + } + + public BrowseResult next(EdgeDataStoreDao dao, EdgeDomainBlacklist blacklist) { + if (queue.isEmpty()) { + dao.getRandomDomains(25, blacklist).forEach(queue::addLast); + } + return queue.pollFirst(); + } + + public BrowseResult nextSimilar(EdgeId id, EdgeDataStoreDao dao, EdgeDomainBlacklist blacklist) { + dao.getDomainNeighborsAdjacent(id, blacklist, 25).forEach(queue::addFirst); + + while (queue.size() > MAX_QUEUE_SIZE) { + queue.removeLast(); + } + + return queue.pollFirst(); + } + + public void browseForward(BrowseResult res) { + if (current != null) { + addToHistory(current); + } + setCurrent(res); + } + + public void browseBackward(BrowseResult res) { + if (current != null) { + addToQueue(current); + } + setCurrent(res); + } + + public BrowseResult addToHistory(BrowseResult res) { + recentlyViewed.addFirst(res); + while (recentlyViewed.size() > MAX_HISTORY_SIZE) { + recentlyViewed.removeLast(); + } + return res; + } + + public BrowseResult addToQueue(BrowseResult res) { + queue.addFirst(res); + while (queue.size() > MAX_QUEUE_SIZE) { + queue.removeLast(); + } + return res; + } + + public BrowseResult takeFromHistory() { + return recentlyViewed.pollFirst(); + } + + public boolean hasHistory() { + return !recentlyViewed.isEmpty(); + } + + public boolean isRecent(BrowseResult res) { + return recentlyViewed.contains(res) || res.equals(current); + } + public void resetQueue() { + queue.clear(); + } + + public BrowseResult getCurrent() { + return current; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/director/EdgeDirectorMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/director/EdgeDirectorMain.java new file mode 100644 index 00000000..66b3e998 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/director/EdgeDirectorMain.java @@ -0,0 +1,35 @@ +package nu.marginalia.wmsa.edge.director; + +import com.google.inject.Guice; +import com.google.inject.Inject; +import com.google.inject.Injector; +import nu.marginalia.wmsa.configuration.MainClass; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.module.ConfigurationModule; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.configuration.server.Initialization; + +import java.io.IOException; + +public class EdgeDirectorMain extends MainClass { + private EdgeDirectorService service; + + @Inject + public EdgeDirectorMain(EdgeDirectorService service) throws IOException { + this.service = service; + } + + public static void main(String... args) { + init(ServiceDescriptor.EDGE_DIRECTOR, args); + + Injector injector = Guice.createInjector( + new DatabaseModule(), + new EdgeDirectorModule(), + new ConfigurationModule() + ); + + injector.getInstance(EdgeDirectorMain.class); + injector.getInstance(Initialization.class).setReady(); + + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/director/EdgeDirectorModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/director/EdgeDirectorModule.java new file mode 100644 index 00000000..c0271b07 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/director/EdgeDirectorModule.java @@ -0,0 +1,6 @@ +package nu.marginalia.wmsa.edge.director; + +import com.google.inject.AbstractModule; + +public class EdgeDirectorModule extends AbstractModule { +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/director/EdgeDirectorService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/director/EdgeDirectorService.java new file mode 100644 index 00000000..1edfd17e --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/director/EdgeDirectorService.java @@ -0,0 +1,109 @@ +package nu.marginalia.wmsa.edge.director; + + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.inject.Inject; +import com.google.inject.name.Named; +import io.prometheus.client.Histogram; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.configuration.server.MetricsServer; +import nu.marginalia.wmsa.configuration.server.Service; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreTaskDao; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import org.eclipse.jetty.util.UrlEncoded; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Request; +import spark.Response; +import spark.Spark; + +import static spark.Spark.*; + +public class EdgeDirectorService extends Service { + private final Gson gson = new GsonBuilder().create(); + private final EdgeDataStoreTaskDao taskDao; + + static final Histogram request_time_metrics + = Histogram.build("wmsa_edge_director_request_time", "DB Request Time") + .labelNames("request") + .register(); + private Logger logger = LoggerFactory.getLogger(getClass()); + + @Inject + public EdgeDirectorService(@Named("service-host") String ip, + @Named("service-port") Integer port, + Initialization init, + EdgeDataStoreTaskDao taskDao, + MetricsServer metricsServer) + { + super(ip, port, init, metricsServer); + this.taskDao = taskDao; + + + Spark.path("edge", () -> { + get("/task/index/:pass", this::getIndexTask, this::convertToJson); + get("/task/discover/", this::getDiscoverTask, this::convertToJson); + delete("/task/*", this::finishTask, this::convertToJson); + get("/task/blocked", this::isBlocked, this::convertToJson); + post("/task/flush", this::flushTasks, this::convertToJson); + }); + + } + + private Object flushTasks(Request request, Response response) { + logger.info("Flushing ongoing jobs"); + taskDao.flushOngoingJobs(); + return "Ok"; + } + + private Object isBlocked(Request request, Response response) { + return taskDao.isBlocked(); + } + + public Object getIndexTask(Request request, Response response) { + final long start = System.currentTimeMillis(); + + response.header("Content-Encoding", "gzip"); + var ret = taskDao.getIndexTask(Integer.parseInt(request.params("pass")), Integer.parseInt(request.queryParams("limit"))); + + request_time_metrics.labels("get_index_task").observe(System.currentTimeMillis() - start); + + return ret; + } + + public Object getDiscoverTask(Request request, Response response) { + final long start = System.currentTimeMillis(); + + response.header("Content-Encoding", "gzip"); + var ret = taskDao.getDiscoverTask(); + + request_time_metrics.labels("get_discover_task").observe(System.currentTimeMillis() - start); + + return ret; + } + + public Object finishTask(Request request, Response response) { + final long start = System.currentTimeMillis(); + + var domain = UrlEncoded.decodeString(request.splat()[0]); + EdgeDomainIndexingState state = EdgeDomainIndexingState.valueOf(request.queryParams("state")); + + if (state.code < 0) { + taskDao.finishBadIndexTask(new EdgeDomain(domain), state); + } + else { + double quality = Double.parseDouble(request.queryParams("quality")); + taskDao.finishIndexTask(new EdgeDomain(domain), quality, state); + } + + request_time_metrics.labels("finish_task").observe(System.currentTimeMillis() - start); + return null; + } + + + private String convertToJson(Object o) { + return gson.toJson(o); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/director/client/EdgeDirectorClient.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/director/client/EdgeDirectorClient.java new file mode 100644 index 00000000..3ff70623 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/director/client/EdgeDirectorClient.java @@ -0,0 +1,49 @@ +package nu.marginalia.wmsa.edge.director.client; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import io.reactivex.rxjava3.core.Observable; +import nu.marginalia.wmsa.client.AbstractDynamicClient; +import nu.marginalia.wmsa.client.HttpStatusCode; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import nu.marginalia.wmsa.edge.model.crawl.EdgeIndexTask; +import org.eclipse.jetty.util.UrlEncoded; + +import javax.annotation.CheckReturnValue; + +@Singleton +public class EdgeDirectorClient extends AbstractDynamicClient { + + @Inject + public EdgeDirectorClient() { + super(ServiceDescriptor.EDGE_DIRECTOR); + } + + @CheckReturnValue + public Observable getIndexTask(Context ctx, int pass, int limit) { + return super.get(ctx, "/edge/task/index/"+pass+"?limit="+limit, EdgeIndexTask.class); + } + @CheckReturnValue + public Observable getDiscoverTask(Context ctx) { + return super.get(ctx, "/edge/task/discover/", EdgeIndexTask.class); + } + + @CheckReturnValue + public Observable finishTask(Context ctx, EdgeDomain domain, double quality, EdgeDomainIndexingState state) { + return super.delete(ctx, "/edge/task/"+ UrlEncoded.encodeString(domain.toString())+"?quality="+quality+"&state="+state.toString()); + } + + @CheckReturnValue + public Observable isBlocked(Context ctx) { + return super.get(ctx, "/edge/task/blocked", Boolean.class); + } + + @CheckReturnValue + public void flushOngoingJobs(Context ctx) { + super.post(ctx, "/edge/task/flush", new Object()).blockingSubscribe(); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexControl.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexControl.java new file mode 100644 index 00000000..3c65464e --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexControl.java @@ -0,0 +1,40 @@ +package nu.marginalia.wmsa.edge.index; + + +import com.google.inject.Inject; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; + + +public class EdgeIndexControl { + + private final IndexServicesFactory servicesFactory; + + @Inject + public EdgeIndexControl(IndexServicesFactory servicesFactory) { + this.servicesFactory = servicesFactory; + } + + public void regenerateIndex(int id) { + System.runFinalization(); + System.gc(); + + for (IndexBlock block : IndexBlock.values()) { + + servicesFactory.getIndexConverter(id, block); + + System.runFinalization(); + System.gc(); + } + + System.runFinalization(); + System.gc(); + } + + public long wordCount(int id) { + return servicesFactory.wordCount(id); + } + + public void switchIndexFiles(int id) throws Exception { + servicesFactory.switchFilesJob(id).call(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexMain.java new file mode 100644 index 00000000..61b57dbb --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexMain.java @@ -0,0 +1,36 @@ +package nu.marginalia.wmsa.edge.index; + +import com.google.inject.Guice; +import com.google.inject.Inject; +import com.google.inject.Injector; +import nu.marginalia.wmsa.configuration.MainClass; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.module.ConfigurationModule; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.configuration.server.Initialization; + +import java.io.IOException; + +public class EdgeIndexMain extends MainClass { + private EdgeIndexService service; + + @Inject + public EdgeIndexMain(EdgeIndexService service) throws IOException { + this.service = service; + } + + public static void main(String... args) { + init(ServiceDescriptor.EDGE_INDEX, args); + + Injector injector = Guice.createInjector( + new EdgeTablesModule(), + new EdgeIndexModule(), + new DatabaseModule(), + new ConfigurationModule() + ); + + injector.getInstance(EdgeIndexMain.class); + injector.getInstance(Initialization.class).setReady(); + + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexModule.java new file mode 100644 index 00000000..f12212ec --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexModule.java @@ -0,0 +1,12 @@ +package nu.marginalia.wmsa.edge.index; + +import com.google.inject.AbstractModule; +import com.google.inject.name.Names; + +public class EdgeIndexModule extends AbstractModule { + + public void configure() { + bind(Long.class).annotatedWith(Names.named("edge-dictionary-hash-map-size")).toInstance(1L << 31); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexService.java new file mode 100644 index 00000000..14ce0673 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexService.java @@ -0,0 +1,506 @@ +package nu.marginalia.wmsa.edge.index; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.inject.Inject; +import com.google.inject.name.Named; +import gnu.trove.map.TLongIntMap; +import gnu.trove.map.hash.TIntIntHashMap; +import gnu.trove.map.hash.TLongIntHashMap; +import gnu.trove.set.hash.TIntHashSet; +import io.prometheus.client.Counter; +import io.prometheus.client.Histogram; +import io.reactivex.rxjava3.schedulers.Schedulers; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.configuration.server.MetricsServer; +import nu.marginalia.wmsa.configuration.server.Service; +import nu.marginalia.wmsa.edge.index.model.*; +import nu.marginalia.wmsa.edge.index.service.SearchIndexes; +import nu.marginalia.wmsa.edge.index.service.index.SearchIndexWriterImpl; +import nu.marginalia.wmsa.edge.index.service.query.IndexSearchBudget; +import nu.marginalia.util.dict.DictionaryHashMap; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.crawl.EdgePageWordSet; +import nu.marginalia.wmsa.edge.model.crawl.EdgePageWords; +import nu.marginalia.wmsa.edge.model.search.*; +import org.apache.http.HttpStatus; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.HaltException; +import spark.Request; +import spark.Response; +import spark.Spark; + +import java.util.*; +import java.util.concurrent.TimeUnit; +import java.util.function.LongPredicate; +import java.util.stream.LongStream; + +import static spark.Spark.get; +import static spark.Spark.halt; + +public class EdgeIndexService extends Service { + private static final int SEARCH_BUDGET_LIMIT = 1_000_000; + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + @NotNull + private final Initialization init; + private final SearchIndexes indexes; + + private final Gson gson = new GsonBuilder().create(); + + private static final Histogram wmsa_edge_index_query_time + = Histogram.build().name("wmsa_edge_index_query_time").help("-").register(); + private static final Counter wmsa_edge_index_query_count + = Counter.build().name("wmsa_edge_index_query_count").help("-").register(); + private static final Histogram wmsa_edge_index_put_words_time + = Histogram.build().name("wmsa_edge_index_put_words_time").help("-").register(); + + public static final int DYNAMIC_BUCKET_LENGTH = 7; + + + @Inject + public EdgeIndexService(@Named("service-host") String ip, + @Named("service-port") Integer port, + Initialization init, + MetricsServer metricsServer, + SearchIndexes indexes + ) { + super(ip, port, init, metricsServer); + + this.init = init; + this.indexes = indexes; + + Spark.post("/words/", this::putWords); + Spark.post("/search/", this::search, gson::toJson); + + Spark.post("/dictionary/*", this::getWordId, gson::toJson); + + Spark.post("/ops/repartition", this::repartitionEndpoint); + Spark.post("/ops/preconvert", this::preconvertEndpoint); + Spark.post("/ops/reindex/:id", this::reindexEndpoint); + + get("/is-blocked", this::isBlocked, gson::toJson); + + Schedulers.newThread().scheduleDirect(this::initialize, 5, TimeUnit.SECONDS); + } + + private Object getWordId(Request request, Response response) { + final String word = request.splat()[0]; + + var dr = indexes.getDictionaryReader(); + if (null == dr) { + response.status(HttpStatus.SC_FAILED_DEPENDENCY); + return ""; + } + + final int wordId = dr.get(word); + + if (DictionaryHashMap.NO_VALUE == wordId) { + response.status(404); + return ""; + } + + return wordId; + } + + private Object repartitionEndpoint(Request request, Response response) { + + if (!indexes.repartition()) { + Spark.halt(503, "Operations busy"); + } + return "OK"; + } + + private Object preconvertEndpoint(Request request, Response response) { + if (!indexes.preconvert()) { + Spark.halt(503, "Operations busy"); + } + return "OK"; + } + + private Object reindexEndpoint(Request request, Response response) { + int id = Integer.parseInt(request.params("id")); + + if (!indexes.reindex(id)) { + Spark.halt(503, "Operations busy"); + } + return "OK"; + } + + private Object isBlocked(Request request, Response response) { + return indexes.isBusy() || !initialized; + } + + volatile boolean initialized = false; + public void initialize() { + if (!initialized) { + initialized = true; + } + else { + return; + } + indexes.initialize(init); + } + + private Object putWords(Request request, Response response) { + var putWordsRequest = gson.fromJson(request.body(), EdgePutWordsRequest.class); + + synchronized (this) { + putWords(putWordsRequest.getDomainId(), putWordsRequest.getUrlId(), + putWordsRequest.wordSet, putWordsRequest.getIndex()); + } + + response.status(HttpStatus.SC_ACCEPTED); + return ""; + } + + public void putWords(EdgeId domainId, EdgeId urlId, + EdgePageWordSet wordSet, int idx + ) { + + wmsa_edge_index_put_words_time.time(() -> { + for (EdgePageWords words : wordSet.values()) { + putWords(domainId, urlId, words, idx); + } + }); + + } + + public void putWords(EdgeId domainId, EdgeId urlId, + EdgePageWords words, int idx + ) { + SearchIndexWriterImpl indexWriter = indexes.getIndexWriter(idx); + + if (!words.words.isEmpty()) { + if (words.size() < 1000) { + indexWriter.put(domainId, urlId, words.block, words.words); + } else { + chunks(words.words, 1000).forEach(chunk -> { + indexWriter.put(domainId, urlId, words.block, chunk); + }); + } + } + } + + + private List> chunks(Collection coll, int size) { + List> ret = new ArrayList<>(); + List data = List.copyOf(coll); + + for (int i = 0; i < data.size(); i+=size) { + ret.add(data.subList(i, Math.min(data.size(), i+size))); + } + + return ret; + } + + private Object search(Request request, Response response) { + if (indexes.getDictionaryReader() == null) { + halt(HttpStatus.SC_SERVICE_UNAVAILABLE, "Come back in a few minutes"); + } + + String json = request.body(); + EdgeSearchSpecification specsSet = gson.fromJson(json, EdgeSearchSpecification.class); + + long start = System.currentTimeMillis(); + + + try { + if (specsSet.isStagger()) { + return new EdgeSearchResultSet(searchStaggered(specsSet)); + } + else { + return new EdgeSearchResultSet(searchStraight(specsSet)); + } + } + catch (HaltException ex) { + logger.warn("Halt", ex); + throw ex; + } + catch (Exception ex) { + logger.info("Error during search {}({}) (query: {})", ex.getClass().getSimpleName(), ex.getMessage(), json); + logger.info("Error", ex); + Spark.halt(500, "Error"); + return null; + } + finally { + wmsa_edge_index_query_time.observe(System.currentTimeMillis() - start); + wmsa_edge_index_query_count.inc(); + } + } + + private Map> searchStaggered(EdgeSearchSpecification specsSet) { + int count = 0; + + final Map> results = new HashMap<>(); + final TIntHashSet seenResults = new TIntHashSet(); + + final DomainResultCountFilter[] domainCountFilter = new DomainResultCountFilter[] { + new DomainResultCountFilter(specsSet.limitByDomain), + new DomainResultCountFilter(specsSet.limitByDomain) + }; + + final IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_LIMIT); + final TIntIntHashMap limitsPerBucketRemaining = new TIntIntHashMap(6, 0.7f, 0, specsSet.limitByBucket); + + for (int i = 0; i < specsSet.buckets.size(); i+=2) { + for (var sq : specsSet.subqueries) { + for (int j = 0; j < 2 && i + j < specsSet.buckets.size(); j++) { + Optional searchTerms = getSearchTerms(sq); + + if (searchTerms.isEmpty()) + continue; + + var result = performSearch(searchTerms.get(), + budget, + seenResults, + domainCountFilter[j], + sq, + List.of(specsSet.buckets.get(i+j)), + specsSet, + Math.min(limitsPerBucketRemaining.get(i+j), specsSet.limitTotal - count) + ); + + if (logger.isDebugEnabled()) { + logger.debug("{} -> {} {} {}", sq.block, specsSet.buckets.get(i+j), sq.searchTermsInclude, result.results.values().stream().mapToInt(List::size).sum()); + } + + int sz = result.size(); + count += sz; + limitsPerBucketRemaining.adjustOrPutValue(i+j, -sz, specsSet.limitByBucket-sz); + + if (sz > 0) { + results.computeIfAbsent(sq.block, s -> new ArrayList<>()).add(result); + } + } + } + } + + if (budget.used() > 0) { + logger.debug("Query used ${}", budget.used()); + } + + return results; + } + + @NotNull + private Map> searchStraight(EdgeSearchSpecification specsSet) { + Map> results = new HashMap<>(); + int count = 0; + TIntHashSet seenResults = new TIntHashSet(); + + final DomainResultCountFilter domainCountFilter = new DomainResultCountFilter(specsSet.limitByDomain); + + IndexSearchBudget budget = new IndexSearchBudget(SEARCH_BUDGET_LIMIT); + for (var sq : specsSet.subqueries) { + Optional searchTerms = getSearchTerms(sq); + + if (searchTerms.isEmpty()) + continue; + + var result = performSearch(searchTerms.get(), + budget, seenResults, domainCountFilter, + sq, specsSet.buckets, specsSet, + specsSet.limitTotal - count); + + if (logger.isDebugEnabled()) { + logger.debug("{} -> {} {}", sq.block, sq.searchTermsInclude, result.size()); + } + + count += result.size(); + if (result.size() > 0) { + results.computeIfAbsent(sq.block, s -> new ArrayList<>()).add(result); + } + } + + if (budget.used() > 0) { + logger.debug("Query used ${}", budget.used()); + } + + return results; + } + + private EdgeSearchResults performSearch(EdgeIndexSearchTerms searchTerms, + IndexSearchBudget budget, + TIntHashSet seenResults, + DomainResultCountFilter domainCountFilter, + EdgeSearchSubquery sq, + List specBuckets, + EdgeSearchSpecification specs, + int limit) + { + if (limit <= 0) { + return new EdgeSearchResults(); + } + + final Map> results = new HashMap<>(); + final DomainResultCountFilter localFilter = new DomainResultCountFilter(specs.limitByDomain); + + boolean debug = sq.searchTermsExclude.contains("special:debug"); + + for (int i : specBuckets) { + int foundResultsCount = results.values().stream().mapToInt(List::size).sum(); + + if (foundResultsCount >= specs.limitTotal || foundResultsCount >= limit) + break; + + List resultsForBucket = new ArrayList<>(specs.limitByBucket); + + if (debug) { + getQuery(i, budget, sq.block, lv -> localFilter.filterRawValue(i, lv), searchTerms) + .peek(l -> logger.info("Considering {}", Long.toHexString(l))) + .mapToObj(id -> new EdgeSearchResultItem(i, sq.termSize(), id)) + .filter(ri -> { + if (seenResults.contains(ri.url.getId())) { + logger.info("Seen before: {}", Integer.toHexString(ri.url.getId())); + return false; + } + else if (!localFilter.test(i, domainCountFilter, ri)) { + logger.info("DCF: {} - {}:{}", ri.blockId, Integer.toHexString(ri.domain.getId()), Integer.toHexString(ri.url.getId())); + return false; + } + return true; + }) + .limit(specs.limitTotal * 3L) + .distinct() + .limit(Math.min(specs.limitByBucket + - results.values().stream().mapToInt(Collection::size).sum(), limit - foundResultsCount)) + .forEach(resultsForBucket::add); + } + else { + getQuery(i, budget, sq.block, lv -> localFilter.filterRawValue(i, lv), searchTerms) + .mapToObj(id -> new EdgeSearchResultItem(i, sq.termSize(), id)) + .filter(ri -> !seenResults.contains(ri.url.getId()) && localFilter.test(i, domainCountFilter, ri)) + .limit(specs.limitTotal * 3L) + .distinct() + .limit(Math.min(specs.limitByBucket + - results.values().stream().mapToInt(Collection::size).sum(), limit - foundResultsCount)) + .forEach(resultsForBucket::add); + } + + for (var result : resultsForBucket) { + seenResults.add(result.url.getId()); + } + for (var result : resultsForBucket) { + for (var searchTerm : sq.searchTermsInclude) { + result.scores.add(getSearchTermScore(i, searchTerm, result.getCombinedId())); + } + } + + domainCountFilter.addAll(i, resultsForBucket); + + if (!resultsForBucket.isEmpty()) { + results.put(i, resultsForBucket); + } + } + + return new EdgeSearchResults(results); + } + + private EdgeSearchResultKeywordScore getSearchTermScore(int bucketId, String term, long urlId) { + final int termId = indexes.getDictionaryReader().get(term); + + var bucket = indexes.getBucket(bucketId); + + return new EdgeSearchResultKeywordScore(term, + bucket.getTermScore(termId, urlId), + bucket.isTermInBucket(IndexBlock.Title, termId, urlId), + bucket.isTermInBucket(IndexBlock.Link, termId, urlId) + ); + + } + + private LongStream getQuery(int bucket, IndexSearchBudget budget, IndexBlock block, + LongPredicate filter, EdgeIndexSearchTerms searchTerms) { + if (!indexes.isValidBucket(bucket)) { + logger.warn("Invalid bucket {}", bucket); + return LongStream.empty(); + } + return indexes.getBucket(bucket).getQuery(block, filter, budget, searchTerms); + } + + static class DomainResultCountFilter { + final TLongIntMap resultsByDomain = new TLongIntHashMap(200, 0.75f, -1, 0); + final int limitByDomain; + + DomainResultCountFilter(int limitByDomain) { + this.limitByDomain = limitByDomain; + } + + public boolean filterRawValue(int bucket, long value) { + var domain = new EdgeId((int)(value >>> 32)); + + if (domain.getId() == Integer.MAX_VALUE) { + return true; + } + + return resultsByDomain.get(getKey(bucket, domain)) <= limitByDomain; + } + + long getKey(int bucket, EdgeId id) { + return ((long)bucket) << 32 | id.getId(); + } + + public boolean test(int bucket, EdgeSearchResultItem item) { + if (item.domain.getId() == Integer.MAX_VALUE) { + return true; + } + + return resultsByDomain.adjustOrPutValue(getKey(bucket, item.domain), 1, 1) <= limitByDomain; + } + + int getCount(int bucket, EdgeSearchResultItem item) { + return resultsByDomain.get(getKey(bucket, item.domain)); + } + + public void addAll(int bucket, List items) { + items.forEach(item -> { + resultsByDomain.adjustOrPutValue(getKey(bucket, item.domain), 1, 1); + }); + } + + public boolean test(int bucket, DomainResultCountFilter root, EdgeSearchResultItem item) { + if (item.domain.getId() == Integer.MAX_VALUE) { + return true; + } + return root.getCount(bucket, item) + resultsByDomain.adjustOrPutValue(getKey(bucket, item.domain), 1, 1) <= limitByDomain; + } + } + + private Optional getSearchTerms(EdgeSearchSubquery request) { + final List excludes = new ArrayList<>(); + final List includes = new ArrayList<>(); + + for (var include : request.searchTermsInclude) { + var word = lookUpWord(include); + if (word.isEmpty()) { + logger.debug("Unknown search term: " + include); + return Optional.empty(); + } + includes.add(word.getAsInt()); + } + + for (var exclude : request.searchTermsExclude) { + lookUpWord(exclude).ifPresent(excludes::add); + } + + if (includes.isEmpty()) { + return Optional.empty(); + } + + return Optional.of(new EdgeIndexSearchTerms(includes, excludes)); + } + + private OptionalInt lookUpWord(String s) { + int ret = indexes.getDictionaryReader().get(s); + if (ret == DictionaryHashMap.NO_VALUE) { + return OptionalInt.empty(); + } + return OptionalInt.of(ret); + } + +} + diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeTablesModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeTablesModule.java new file mode 100644 index 00000000..bc9c2f44 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeTablesModule.java @@ -0,0 +1,28 @@ +package nu.marginalia.wmsa.edge.index; + +import com.google.inject.AbstractModule; +import com.google.inject.name.Names; + +import java.nio.file.Path; + +public class EdgeTablesModule extends AbstractModule { + + public void configure() { + bind(Path.class).annotatedWith(Names.named("partition-root-slow")).toInstance(Path.of("/var/lib/wmsa/index/write")); + bind(Path.class).annotatedWith(Names.named("partition-root-slow-tmp")).toInstance(Path.of("/backup/work/index-tmp/")); + + bind(Path.class).annotatedWith(Names.named("partition-root-fast")).toInstance(Path.of("/var/lib/wmsa/index/read")); + bind(Path.class).annotatedWith(Names.named("tmp-file-dir")).toInstance(Path.of("/var/lib/wmsa/index/read")); + + bind(String.class).annotatedWith(Names.named("edge-writer-page-index-file")).toInstance("page-index.dat"); + bind(String.class).annotatedWith(Names.named("edge-writer-dictionary-file")).toInstance("dictionary.dat"); + + bind(String.class).annotatedWith(Names.named("edge-index-write-words-file")).toInstance("words.dat.wip"); + bind(String.class).annotatedWith(Names.named("edge-index-write-urls-file")).toInstance("urls.dat.wip"); + + bind(String.class).annotatedWith(Names.named("edge-index-read-words-file")).toInstance("words.dat"); + bind(String.class).annotatedWith(Names.named("edge-index-read-urls-file")).toInstance("urls.dat"); + + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/IndexServicesFactory.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/IndexServicesFactory.java new file mode 100644 index 00000000..9b26989c --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/IndexServicesFactory.java @@ -0,0 +1,223 @@ +package nu.marginalia.wmsa.edge.index; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import com.google.inject.name.Named; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.index.radix.EdgeIndexBucket; +import nu.marginalia.wmsa.edge.index.service.dictionary.DictionaryReader; +import nu.marginalia.wmsa.edge.index.service.dictionary.DictionaryWriter; +import nu.marginalia.wmsa.edge.index.service.index.*; +import nu.marginalia.wmsa.edge.index.service.query.SearchIndexPartitioner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.util.EnumMap; +import java.util.concurrent.Callable; + +import static nu.marginalia.wmsa.edge.index.EdgeIndexService.DYNAMIC_BUCKET_LENGTH; + +@Singleton +public class IndexServicesFactory { + private final Path tmpFileDir; + private final EdgeDomainBlacklist domainBlacklist; + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private final PartitionedDataFile writerIndexFile; + private final RootDataFile writerDictionaryFile; + private final PartitionedDataFile preconverterOutputFile; + private final DoublePartitionedDataFile indexReadWordsFile; + private final DoublePartitionedDataFile indexReadUrlsFile; + private final DoublePartitionedDataFile indexWriteWordsFile; + private final DoublePartitionedDataFile indexWriteUrlsFile; + private volatile static DictionaryWriter dictionaryWriter; + private final Long dictionaryHashMapSize; + private final SearchIndexPartitioner partitoner; + @Inject + public IndexServicesFactory( + @Named("tmp-file-dir") Path tmpFileDir, + @Named("partition-root-slow") Path partitionRootSlow, + @Named("partition-root-slow-tmp") Path partitionRootSlowTmp, + @Named("partition-root-fast") Path partitionRootFast, + @Named("edge-writer-page-index-file") String writerIndexFile, + @Named("edge-writer-dictionary-file") String writerDictionaryFile, + @Named("edge-index-read-words-file") String indexReadWordsFile, + @Named("edge-index-read-urls-file") String indexReadUrlsFile, + @Named("edge-index-write-words-file") String indexWriteWordsFile, + @Named("edge-index-write-urls-file") String indexWriteUrlsFile, + @Named("edge-dictionary-hash-map-size") Long dictionaryHashMapSize, + EdgeDomainBlacklist domainBlacklist, + SearchIndexPartitioner partitoner + ) { + + this.tmpFileDir = tmpFileDir; + this.dictionaryHashMapSize = dictionaryHashMapSize; + this.domainBlacklist = domainBlacklist; + + this.writerIndexFile = new PartitionedDataFile(partitionRootSlow, writerIndexFile); + this.writerDictionaryFile = new RootDataFile(partitionRootSlow, writerDictionaryFile); + this.indexReadWordsFile = new DoublePartitionedDataFile(partitionRootFast, indexReadWordsFile); + this.indexReadUrlsFile = new DoublePartitionedDataFile(partitionRootFast, indexReadUrlsFile); + this.indexWriteWordsFile = new DoublePartitionedDataFile(partitionRootFast, indexWriteWordsFile); + this.indexWriteUrlsFile = new DoublePartitionedDataFile(partitionRootFast, indexWriteUrlsFile); + this.preconverterOutputFile = new PartitionedDataFile(partitionRootSlowTmp, "preconverted.dat"); + this.partitoner = partitoner; + } + + public SearchIndexWriterImpl getIndexWriter(int idx) { + return new SearchIndexWriterImpl(getDictionaryWriter(), writerIndexFile.get(idx)); + } + + public DictionaryWriter getDictionaryWriter() { + if (dictionaryWriter == null) { + dictionaryWriter = new DictionaryWriter(writerDictionaryFile.get(), dictionaryHashMapSize, true); + } + return dictionaryWriter; + } + + @SneakyThrows + public DictionaryReader getDictionaryReader() { + return new DictionaryReader(getDictionaryWriter()); + + } + @SneakyThrows + public SearchIndexConverter getIndexConverter(int id, IndexBlock block) { + return new SearchIndexConverter(block, id, tmpFileDir, + preconverterOutputFile.get(id), + indexWriteWordsFile.get(id, block.id), + indexWriteUrlsFile.get(id, block.id), + partitoner, + domainBlacklist + ); + } + @SneakyThrows + public SearchIndexPreconverter getIndexPreconverter() { + File[] outputFiles = new File[DYNAMIC_BUCKET_LENGTH+1]; + for (int i = 0; i < outputFiles.length; i++) { + outputFiles[i] = getPreconverterOutputFile(i); + } + return new SearchIndexPreconverter(writerIndexFile.get(0), + outputFiles, + partitoner, + domainBlacklist + ); + } + + private File getPreconverterOutputFile(int i) { + return preconverterOutputFile.get(i); + } + + public long wordCount(int id) { + return SearchIndexConverter.wordCount(writerIndexFile.get(0)); + } + + @SneakyThrows + public SearchIndexReader getIndexReader(int id) { + EnumMap indexMap = new EnumMap<>(IndexBlock.class); + for (IndexBlock block : IndexBlock.values()) { + try { + indexMap.put(block, createSearchIndex(id, block)); + } + catch (Exception ex) { + logger.error("Could not create index {}-{}", id, block); + } + } + return new SearchIndexReader(indexMap); + } + + private SearchIndex createSearchIndex(int bucketId, IndexBlock block) { + try { + return new SearchIndex("IndexReader"+bucketId+":"+ block.name(), + indexReadUrlsFile.get(bucketId, block.id), + indexReadWordsFile.get(bucketId, block.id)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public Callable switchFilesJob(int id) { + return () -> { + for (int block = 0; block < IndexBlock.values().length; block++) { + Files.move( + indexWriteWordsFile.get(id, block).toPath(), + indexReadWordsFile.get(id, block).toPath(), + StandardCopyOption.REPLACE_EXISTING); + Files.move( + indexWriteUrlsFile.get(id, block).toPath(), + indexReadUrlsFile.get(id, block).toPath(), + StandardCopyOption.REPLACE_EXISTING); + } + return true; + }; + } + + public EdgeIndexBucket createIndexBucket(int id) { + return new EdgeIndexBucket(this, new EdgeIndexControl(this), id); + } +} + +class RootDataFile { + private final Path partition; + private final String pattern; + + RootDataFile(Path partition, String pattern) { + this.partition = partition; + this.pattern = pattern; + } + + public File get() { + return partition.resolve(pattern).toFile(); + } +} + + +class PartitionedDataFile { + private final Path partition; + private final String pattern; + + PartitionedDataFile(Path partition, String pattern) { + this.partition = partition; + this.pattern = pattern; + } + + public File get(int id) { + Path partitionDir = partition.resolve(Integer.toString(id)); + if (!partitionDir.toFile().exists()) { + partitionDir.toFile().mkdir(); + } + return partitionDir.resolve(pattern).toFile(); + } +} + +class DoublePartitionedDataFile { + private final Path partition; + private final String pattern; + + DoublePartitionedDataFile(Path partition, String pattern) { + this.partition = partition; + this.pattern = pattern; + } + + public File get(int id, int id2) { + Path partitionDir = partition.resolve(Integer.toString(id)); + + if (!partitionDir.toFile().exists()) { + partitionDir.toFile().mkdir(); + } + partitionDir = partitionDir.resolve(Integer.toString(id2)); + if (!partitionDir.toFile().exists()) { + partitionDir.toFile().mkdir(); + } + + return partitionDir.resolve(pattern).toFile(); + } + +} \ No newline at end of file diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/client/EdgeIndexClient.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/client/EdgeIndexClient.java new file mode 100644 index 00000000..6f64ceae --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/client/EdgeIndexClient.java @@ -0,0 +1,69 @@ +package nu.marginalia.wmsa.edge.index.client; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.inject.Singleton; +import io.reactivex.rxjava3.core.Observable; +import io.reactivex.rxjava3.schedulers.Schedulers; +import nu.marginalia.wmsa.client.AbstractDynamicClient; +import nu.marginalia.wmsa.client.HttpStatusCode; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.edge.index.model.EdgePutWordsRequest; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.crawl.EdgePageWordSet; +import nu.marginalia.wmsa.edge.model.search.EdgeSearchResultSet; +import nu.marginalia.wmsa.edge.model.search.EdgeSearchSpecification; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.CheckReturnValue; +import java.util.List; +import java.util.concurrent.TimeUnit; + +@Singleton +public class EdgeIndexClient extends AbstractDynamicClient { + private final Gson gson = new GsonBuilder() + .create(); + private final Logger logger = LoggerFactory.getLogger(getClass()); + + public EdgeIndexClient() { + super(ServiceDescriptor.EDGE_INDEX); + setTimeout(30); + } + + @CheckReturnValue + public Observable putWords(Context ctx, EdgeId domain, EdgeId url, double quality, + EdgePageWordSet wordSet, int writer + ) + { + EdgePutWordsRequest request = new EdgePutWordsRequest(domain, url, quality, wordSet, writer); + + return this.post(ctx, "/words/", request); + } + + + @CheckReturnValue + public EdgeSearchResultSet query(Context ctx, EdgeSearchSpecification specs) { + return this.postGet(ctx, "/search/", specs, EdgeSearchResultSet.class).blockingFirst(); + } + + @CheckReturnValue + public List multiQuery(Context ctx, EdgeSearchSpecification... specs) { + + return Observable.fromArray(specs) + .concatMap(s -> postGet(ctx, "/search/", s, EdgeSearchResultSet.class) + .subscribeOn(Schedulers.io()) + .timeout(1, TimeUnit.SECONDS) + .onErrorComplete()) + .toList() + .blockingGet(); + } + + + @CheckReturnValue + public Observable isBlocked(Context ctx) { + return super.get(ctx, "/is-blocked", Boolean.class); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/model/EdgeIndexSearchTerms.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/model/EdgeIndexSearchTerms.java new file mode 100644 index 00000000..6d4119e1 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/model/EdgeIndexSearchTerms.java @@ -0,0 +1,12 @@ +package nu.marginalia.wmsa.edge.index.model; + +import lombok.AllArgsConstructor; + +import java.util.ArrayList; +import java.util.List; + +@AllArgsConstructor +public class EdgeIndexSearchTerms { + public List includes = new ArrayList<>(); + public List excludes = new ArrayList<>(); +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/model/EdgePutWordsRequest.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/model/EdgePutWordsRequest.java new file mode 100644 index 00000000..dc541c5b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/model/EdgePutWordsRequest.java @@ -0,0 +1,20 @@ +package nu.marginalia.wmsa.edge.index.model; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeId; +import nu.marginalia.wmsa.edge.model.crawl.EdgePageWordSet; +import nu.marginalia.wmsa.edge.model.EdgeUrl; + +@AllArgsConstructor @Getter +@ToString +public class EdgePutWordsRequest { + public final EdgeId domainId; + public final EdgeId urlId; + public final double quality; + + public final EdgePageWordSet wordSet; + private int index = 0; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/model/IndexBlock.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/model/IndexBlock.java new file mode 100644 index 00000000..a347d2e4 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/model/IndexBlock.java @@ -0,0 +1,34 @@ +package nu.marginalia.wmsa.edge.index.model; + +public enum IndexBlock { + TitleKeywords(0, 0), + Title(1, 1), + Link(2, 1.25), + Top(3, 2), + Middle(4, 3), + Low(5, 4), + Words(6, 6), + Meta(7, 7), + PositionWords(8, 4.5), + NamesWords(9, 5), + TermFreq(10, 10), + Topic(11, 0.5); + + public final int id; + public final double sortOrder; + + IndexBlock(int id, double sortOrder) { + this.sortOrder = sortOrder; + this.id = id; + + } + + public static IndexBlock byId(int id) { + for (IndexBlock block : values()) { + if (id == block.id) { + return block; + } + } + throw new IllegalArgumentException("Bad block id"); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/radix/EdgeIndexBucket.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/radix/EdgeIndexBucket.java new file mode 100644 index 00000000..a257e5f3 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/radix/EdgeIndexBucket.java @@ -0,0 +1,155 @@ +package nu.marginalia.wmsa.edge.index.radix; + +import nu.marginalia.wmsa.edge.index.EdgeIndexControl; +import nu.marginalia.wmsa.edge.index.IndexServicesFactory; +import nu.marginalia.wmsa.edge.index.model.EdgeIndexSearchTerms; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.index.service.index.SearchIndexReader; +import nu.marginalia.wmsa.edge.index.service.index.SearchIndexWriter; +import nu.marginalia.wmsa.edge.index.service.query.IndexSearchBudget; +import nu.marginalia.wmsa.edge.index.service.query.Query; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeId; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; +import java.util.*; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.function.LongPredicate; +import java.util.stream.Collectors; +import java.util.stream.LongStream; + +public class EdgeIndexBucket { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private volatile SearchIndexReader indexReader; + + private final ReadWriteLock indexReplacementLock = new ReentrantReadWriteLock(); + + @NotNull + private final IndexServicesFactory servicesFactory; + private final EdgeIndexControl indexControl; + private final SearchIndexWriter writer; + + private final int id; + + public EdgeIndexBucket(@NotNull IndexServicesFactory servicesFactory, EdgeIndexControl indexControl, int id) { + this.servicesFactory = servicesFactory; + this.indexControl = indexControl; + this.id = id; + + writer = servicesFactory.getIndexWriter(0); + } + + public void init() { + Lock lock = indexReplacementLock.writeLock(); + try { + lock.lock(); + logger.info("Initializing bucket {}", id); + + if (indexReader == null) { + indexReader = servicesFactory.getIndexReader(id); + } + + } + catch (Exception ex) { + logger.error("Uncaught exception", ex); + } + finally { + lock.unlock(); + } + } + + public void preconvert() { + + writer.forceWrite(); + writer.flushWords(); + + servicesFactory.getIndexPreconverter(); + + System.runFinalization(); + System.gc(); + + } + public void switchIndex() { + + indexControl.regenerateIndex(id); + + Lock lock = indexReplacementLock.writeLock(); + try { + lock.lock(); + + indexControl.switchIndexFiles(id); + + if (indexReader != null) { + indexReader.close(); + } + + indexReader = servicesFactory.getIndexReader(id); + + } + catch (Exception ex) { + logger.error("Uncaught exception", ex); + } + finally { + lock.unlock(); + } + } + + + public boolean isAvailable() { + return indexReader != null; + } + + public LongStream getQuery(IndexBlock block, LongPredicate filter, IndexSearchBudget budget, EdgeIndexSearchTerms searchTerms) { + if (null == indexReader) { + logger.warn("Index reader not neady {}", block); + return LongStream.empty(); + } + + var orderedIncludes = searchTerms.includes + .stream() + .sorted(Comparator.comparingLong(i -> indexReader.numHits(block, i))) + .distinct() + .mapToInt(Integer::intValue) + .toArray(); + + + if (logger.isDebugEnabled()) { + logger.debug("Includes: ({}); excludes: ({})", Arrays. + stream(orderedIncludes) + .mapToObj(String::valueOf) + .collect(Collectors.joining(",")), + searchTerms.excludes.stream().map(String::valueOf).collect(Collectors.joining(","))); + } + Query query; + if (orderedIncludes.length == 1) { + query = indexReader.findUnderspecified(block, budget, filter, orderedIncludes[0]); + } + else { + query = indexReader.findWord(block, budget, filter, orderedIncludes[0]); + } + + for (int i = 1; i < orderedIncludes.length; i++) { + query = query.also(orderedIncludes[i]); + } + for (int term : searchTerms.excludes) { + query = query.not(term); + } + return query.stream(); + } + + + public IndexBlock getTermScore(int termId, long urlId) { + return indexReader.getBlockForResult(termId, urlId); + } + + public boolean isTermInBucket(IndexBlock block, int termId, long urlId) { + return indexReader.isTermInBucket(block, termId, urlId); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/SearchEngineRanking.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/SearchEngineRanking.java new file mode 100644 index 00000000..abaced82 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/SearchEngineRanking.java @@ -0,0 +1,56 @@ +package nu.marginalia.wmsa.edge.index.service; + +import gnu.trove.list.TIntList; +import gnu.trove.map.hash.TIntIntHashMap; +import gnu.trove.set.hash.TIntHashSet; + +import static nu.marginalia.wmsa.edge.index.EdgeIndexService.DYNAMIC_BUCKET_LENGTH; + +public class SearchEngineRanking { + + private final TIntIntHashMap domainToId + = new TIntIntHashMap(1_000_000, 0.5f, -1, Integer.MAX_VALUE); + + private final TIntHashSet[] domainToBucket = new TIntHashSet[DYNAMIC_BUCKET_LENGTH+1]; + + private final int offset; + private final double[] limits; + + public SearchEngineRanking(int offset, TIntList domains, double... limits) { + this.offset = offset; + this.limits = limits; + + for (int i = offset; i < offset+limits.length; i++) { + domainToBucket[i] = new TIntHashSet(100, 0.5f, DYNAMIC_BUCKET_LENGTH); + } + + for (int i = 0; i < domains.size(); i++) { + double relPortion = i / (double) domains.size(); + + for (int limit = 0; limit < limits.length; limit++) { + if (relPortion < limits[limit]) { + domainToBucket[limit+offset].add(domains.get(i)); + break; + } + } + + domainToId.put(domains.get(i), i); + } + } + + public boolean ownsBucket(int bucketId) { + return bucketId >= offset && bucketId < offset + limits.length; + } + + public boolean hasBucket(int bucket, int domain) { + var set = domainToBucket[bucket]; + if (set == null) { + return false; + } + return set.contains(domain); + } + + public int translateId(int id) { + return domainToId.get(id); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/SearchIndexDao.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/SearchIndexDao.java new file mode 100644 index 00000000..0ecf8f42 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/SearchIndexDao.java @@ -0,0 +1,113 @@ +package nu.marginalia.wmsa.edge.index.service; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import com.zaxxer.hikari.HikariDataSource; +import gnu.trove.list.TIntList; +import gnu.trove.list.array.TIntArrayList; +import gnu.trove.set.hash.TIntHashSet; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.edge.index.service.util.ranking.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Singleton +public class SearchIndexDao { + private final HikariDataSource dataSource; + private final Logger logger = LoggerFactory.getLogger(getClass()); + + @Inject + public SearchIndexDao(HikariDataSource dataSource) + { + this.dataSource = dataSource; + } + + @SneakyThrows + public TIntHashSet getSpamDomains() { + final TIntHashSet result = new TIntHashSet(1_000_000); + + try (var connection = dataSource.getConnection()) { + try (var stmt = connection.prepareStatement("SELECT EC_DOMAIN.ID FROM EC_DOMAIN INNER JOIN EC_TOP_DOMAIN ON EC_DOMAIN.URL_TOP_DOMAIN_ID = EC_TOP_DOMAIN.ID INNER JOIN EC_DOMAIN_BLACKLIST ON EC_DOMAIN_BLACKLIST.URL_DOMAIN = EC_TOP_DOMAIN.URL_PART")) { + var rsp = stmt.executeQuery(); + while (rsp.next()) { + result.add(rsp.getInt(1)); + } + } + } + + return result; + } + + @SneakyThrows + public TIntHashSet goodUrls() { + TIntHashSet domains = new TIntHashSet(10_000_000, 0.5f, -1); + TIntHashSet urls = new TIntHashSet(100_000_000, 0.5f, -1); + + try (var connection = dataSource.getConnection()) { + try (var stmt = connection.prepareStatement("SELECT ID FROM EC_DOMAIN WHERE DOMAIN_ALIAS IS NULL AND STATE>=0")) { + stmt.setFetchSize(10_000); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + domains.add(rsp.getInt(1)); + } + } + + // For some reason, doing this "INNER JOIN" in Java is significantly faster than doing it in SQL + + try (var stmt = connection.prepareStatement("SELECT ID,DOMAIN_ID FROM EC_URL WHERE VISITED AND EC_URL.STATE='OK'")) { + stmt.setFetchSize(10_000); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + if (domains.contains(rsp.getInt(2))) { + urls.add(rsp.getInt(1)); + } + } + } + + } + + return urls; + } + + @SneakyThrows + public TIntList getDomainsByRealPageRank() { + var spr = new BetterStandardPageRank(dataSource,"www.rep.routledge.com", "www.personal.kent.edu", "xroads.virginia.edu", "classics.mit.edu", "faculty.washington.edu", "monadnock.net", "memex.marginalia.nu", "wiki.xxiivv.com", "bikobatanari.art", "sadgrl.online", "lileks.com"); + return spr.pageRankWithPeripheralNodes(spr.size()/2, false); + } + + @SneakyThrows + public TIntList getSmallWebDomains() { + var rpr = new BetterReversePageRank(new DatabaseModule().provideConnection(), "bikobatanari.art", "sadgrl.online", "wiki.xxiivv.com", "%neocities.org"); + + rpr.setMaxKnownUrls(750); + + return rpr.pageRankWithPeripheralNodes(rpr.size(), false); + } + + @SneakyThrows + public TIntList getAcademiaDomains() { + var spr = new BetterStandardPageRank(new DatabaseModule().provideConnection(), "%edu"); + return spr.pageRankWithPeripheralNodes(spr.size()/2, false); + } + + @SneakyThrows + public TIntList getDomainsByStandardPageRank() { + var spr = new BuggyStandardPageRank(dataSource,"memex.marginalia.nu"); + return spr.pageRankWithPeripheralNodes(spr.size()/2, false); + } + + @SneakyThrows + public TIntList getSpecialDomains() { + TIntArrayList results = new TIntArrayList(); + try (var connection = dataSource.getConnection(); + var stmt = connection.prepareStatement("SELECT ID FROM EC_DOMAIN WHERE STATE=2") + ) { + var rs = stmt.executeQuery(); + while (rs.next()) { + results.add(rs.getInt(1)); + } + } + return results; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/SearchIndexes.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/SearchIndexes.java new file mode 100644 index 00000000..91065101 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/SearchIndexes.java @@ -0,0 +1,153 @@ +package nu.marginalia.wmsa.edge.index.service; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.edge.index.IndexServicesFactory; +import nu.marginalia.wmsa.edge.index.radix.EdgeIndexBucket; +import nu.marginalia.wmsa.edge.index.service.dictionary.DictionaryReader; +import nu.marginalia.wmsa.edge.index.service.index.SearchIndexWriterImpl; +import nu.marginalia.wmsa.edge.index.service.query.SearchIndexPartitioner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; +import java.util.concurrent.locks.ReentrantLock; + +import static nu.marginalia.wmsa.edge.index.EdgeIndexService.DYNAMIC_BUCKET_LENGTH; + +@Singleton +public class SearchIndexes { + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private final EdgeIndexBucket[] buckets + = new EdgeIndexBucket[DYNAMIC_BUCKET_LENGTH + 1]; + private final IndexServicesFactory servicesFactory; + private final SearchIndexPartitioner partitioner; + + private final ReentrantLock opsLock = new ReentrantLock(false); + + private final SearchIndexWriterImpl primaryIndexWriter; + private final SearchIndexWriterImpl secondaryIndexWriter; + private DictionaryReader dictionaryReader = null; + + @Inject + public SearchIndexes(IndexServicesFactory servicesFactory, SearchIndexPartitioner partitioner) { + this.servicesFactory = servicesFactory; + this.partitioner = partitioner; + + this.primaryIndexWriter = servicesFactory.getIndexWriter(0); + this.secondaryIndexWriter = servicesFactory.getIndexWriter(1); + + for (int i = 0; i < buckets.length; i++) { + buckets[i] = servicesFactory.createIndexBucket(i); + } + } + + public boolean repartition() { + + if (!opsLock.tryLock()) { + return false; + } + try { + partitioner.reloadPartitions(); + } + finally { + opsLock.unlock(); + } + + return true; + } + + public boolean preconvert() { + + if (!opsLock.tryLock()) { + return false; + } + try { + buckets[0].preconvert(); + } + finally { + opsLock.unlock(); + } + + return true; + } + + public boolean reindex(int id) { + + if (!opsLock.tryLock()) { + return false; + } + try { + buckets[id].switchIndex(); + } + finally { + opsLock.unlock(); + } + + return true; + } + + public boolean reindexAll() { + if (!opsLock.tryLock()) { + return false; + } + try { + for (var bucket : buckets) { + bucket.switchIndex(); + } + } finally { + opsLock.unlock(); + } + + return true; + } + + @Nullable + public DictionaryReader getDictionaryReader() { + return dictionaryReader; + } + + + public boolean isBusy() { + return partitioner.isBusy(); + } + + public void initialize(Initialization init) { + + logger.info("Waiting for init"); + init.waitReady(); + + opsLock.lock(); + try { + logger.info("Initializing buckets"); + for (EdgeIndexBucket bucket : buckets) { + bucket.init(); + } + + logger.info("Initializing dictionary reader"); + dictionaryReader = servicesFactory.getDictionaryReader(); + } + finally { + opsLock.unlock(); + } + } + + public SearchIndexWriterImpl getIndexWriter(int idx) { + if (idx == 0) { + return primaryIndexWriter; + } + else { + return secondaryIndexWriter; + } + } + + public EdgeIndexBucket getBucket(int bucketId) { + return buckets[bucketId]; + } + public boolean isValidBucket(int bucketId) { + return bucketId >= 0 && bucketId < buckets.length; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/SearchOrder.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/SearchOrder.java new file mode 100644 index 00000000..d1c9f10a --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/SearchOrder.java @@ -0,0 +1,6 @@ +package nu.marginalia.wmsa.edge.index.service; + +public enum SearchOrder { + ASCENDING, + REVERSED +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/dictionary/DictionaryReader.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/dictionary/DictionaryReader.java new file mode 100644 index 00000000..90d270d2 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/dictionary/DictionaryReader.java @@ -0,0 +1,27 @@ +package nu.marginalia.wmsa.edge.index.service.dictionary; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.inject.Inject; +import com.google.inject.Singleton; +import lombok.SneakyThrows; + +import java.util.concurrent.TimeUnit; + +@Singleton +public class DictionaryReader { + private final DictionaryWriter writer; + + private final Cache cache = CacheBuilder.newBuilder().maximumSize(10_000).expireAfterAccess(60, TimeUnit.SECONDS).build(); + + @SneakyThrows @Inject + public DictionaryReader(DictionaryWriter writer) { + this.writer = writer; + } + + @SneakyThrows + public int get(String word) { + return cache.get(word, () -> writer.getReadOnly(word)); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/dictionary/DictionaryWriter.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/dictionary/DictionaryWriter.java new file mode 100644 index 00000000..c943e50b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/dictionary/DictionaryWriter.java @@ -0,0 +1,376 @@ +package nu.marginalia.wmsa.edge.index.service.dictionary; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import com.google.inject.name.Named; +import io.prometheus.client.Gauge; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.crawler.domain.language.WordPatterns; +import nu.marginalia.util.dict.DictionaryHashMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.*; +import java.nio.ByteBuffer; +import java.util.*; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +@Singleton +public class DictionaryWriter implements AutoCloseable { + private final ArrayList commitQueue = new ArrayList<>(10_000); + + private final DictionaryHashMap reverseIndex; + private boolean prepopulate; + + private final ReadWriteLock memoryLock = new ReentrantReadWriteLock(); + private final ReadWriteLock diskLock = new ReentrantReadWriteLock(); + private final RandomAccessFile raf; + + private final Map stats = new HashMap<>(); + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + static volatile AtomicInteger instances = new AtomicInteger(); + + private final TokenCompressor readOnlyTokenCompressor = new TokenCompressor(this::getReadOnly); + private final TokenCompressor tokenCompressor = new TokenCompressor(this::get); + + private static final Gauge request_time_metrics + = Gauge.build("wmsa_edge_index_dictionary_size", "Dictionary Size") + .register(); + + private volatile boolean running = true; + private final Thread commitToDiskThread; + @SneakyThrows + public long getPos() { + return raf.getFilePointer(); + } + public void printStats() { + stats + .entrySet() + .stream() + .filter(e -> e.getValue() > 10) + .sorted(Map.Entry.comparingByValue()) + .forEach(e -> System.out.println(e.getKey() + " " + e.getValue())); + } + @SneakyThrows @Inject + public DictionaryWriter( + @Named("edge-writer-dictionary-file") File dictionaryFile, + @Named("edge-dictionary-hash-map-size") Long hashMapSize, + boolean prepopulate) { + logger.info("Creating dictionary writer"); + raf = new RandomAccessFile(dictionaryFile, "rw"); + reverseIndex = new DictionaryHashMap(hashMapSize); + this.prepopulate = prepopulate; + + Lock writeLock = diskLock.writeLock(); + try { + writeLock.lock(); + loadFile(dictionaryFile); + } + finally { + writeLock.unlock(); + } + + commitToDiskThread = new Thread(this::commitToDiskRunner, "CommitToDiskThread"); + commitToDiskThread.start(); + + Runtime.getRuntime().addShutdownHook(new Thread(this::commitToDisk)); + + if (!instances.compareAndSet(0, 1)) { + logger.error("MULTIPLE WRITER INSTANCES!"); + } + logger.info("Done creating dictionary writer"); + } + + + public void commitToDiskRunner() { + while (running) { + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + e.printStackTrace(); + } + + commitToDisk(); + } + } + + public void prepare() { + if (!prepopulate) + return; + + try (var resource = Objects.requireNonNull(ClassLoader.getSystemResourceAsStream("dictionary/word-frequency"), + "Could not load word frequency table"); + var br = new BufferedReader(new InputStreamReader(resource)) + ) { + for (;;) { + var line = br.readLine(); + if (line == null) { + break; + } + if (WordPatterns.wordPredicateEither.test(line)) { + get(line); + } + } + } catch (IOException e) { + e.printStackTrace(); + } + + } + @SneakyThrows + private void loadFile(File dictionaryFile) { + if (!dictionaryFile.exists()) { + logger.info("File {} does not exist, can't load", dictionaryFile); + return; + } + + logger.info("Reading {}", dictionaryFile); + + long pos; + if (raf.length() < 8) { + pos = 8; + raf.writeLong(pos); + } + else { + pos = raf.readLong(); + } + + logger.info("Length {} ({})", pos, raf.length()); + if (pos == 8) { + logger.info("Empty DB, prepopulating"); + prepare(); + } + + ByteBuffer buffer = ByteBuffer.allocateDirect(8192); + + var channel = raf.getChannel(); + + long cp = channel.position(); + int debugNext = 0; + try { + buffer.limit(0); + long loaded = 0; + + while (cp < pos || buffer.hasRemaining()) { + if (buffer.limit() - buffer.position() < 4) { + buffer.compact(); + + long rb = channel.read(buffer); + if (rb <= 0) { + break; + } + cp += rb; + + buffer.flip(); + } + + int len = buffer.get(); + if (debugNext > 0) { + logger.warn("NextLen: {} ({})", len, (char) len); + } + while (buffer.limit() - buffer.position() < len) { + buffer.compact(); + int rb = channel.read(buffer); + if (rb <= 0) break; + cp += rb; + buffer.flip(); + } + + if (buffer.limit() < len) { + + logger.warn("Partial write at end-of-file!"); + + if (cp >= pos) { + logger.info("... but it's ok"); + } + break; + } + + boolean negativeLen = false; + if (len < 0) { + len = (len&0xFF); + negativeLen = true; + + } + + byte[] data = new byte[len]; + buffer.get(data); + if ((++loaded % 10_000_000) == 0L) { + logger.info("Loaded {} million items", loaded/1_000_000); + } + + if (debugNext > 0) { + logger.warn("Next word {}", new String(data)); + if (--debugNext == 0) { + logger.info(" "); + } + } + if (negativeLen) { + logger.warn("Negative length of word {} {}@{}", len, new String(data), reverseIndex.size()); + debugNext = 10; + } + +// if (reverseIndex.get(data) != DictionaryHashMap.NO_VALUE) { +// logger.error("Duplicate insert"); +// } + reverseIndex.put(data, reverseIndex.size()); + } + } + catch (Exception ex) { + logger.error("IO Exception", ex); + } + + raf.seek(pos); + request_time_metrics.set(reverseIndex.size()); + + logger.info("Initial loading done, dictionary size {}", reverseIndex.size()); + } + + private final ByteBuffer commitBuffer = ByteBuffer.allocateDirect(4096); + public volatile boolean noCommit = false; + @SneakyThrows + public void commitToDisk() { + if (noCommit) return; + + if (!raf.getChannel().isOpen()) { + logger.error("commitToDisk() with closed channel! Cannot commit!"); + return; + } + + Lock memLock = memoryLock.readLock(); + List data; + try { + memLock.lock(); + if (commitQueue.isEmpty()) + return; + data = new ArrayList<>(commitQueue); + commitQueue.clear(); + } + finally { + memLock.unlock(); + } + + var channel = raf.getChannel(); + commitBuffer.clear(); + + Lock writeLock = diskLock.writeLock(); + // Only acquire memory lock if there's a risk of backpressure + if (data.size() < 1000) { + memLock = null; + } + + try { + if (memLock != null) memLock.lock(); + writeLock.lock(); + + long start = System.currentTimeMillis(); + int ct = data.size(); + + for (byte[] item : data) { + commitBuffer.clear(); + commitBuffer.put((byte) item.length); + commitBuffer.put(item); + commitBuffer.flip(); + + while (commitBuffer.position() < commitBuffer.limit()) + channel.write(commitBuffer, channel.size()); + } + + long pos = channel.size(); + commitBuffer.clear(); + commitBuffer.putLong(pos); + commitBuffer.flip(); + channel.write(commitBuffer, 0); + + channel.force(false); + + logger.debug("Comitted {} items in {} ms", ct, System.currentTimeMillis() - start); + } + catch (Exception ex) { + logger.error("Error during dictionary commit!!!", ex); + } + finally { + writeLock.unlock(); + if (memLock != null) { + memLock.unlock(); + } + } + } + + public int get(String macroWord) { + byte[] word = tokenCompressor.getWordBytes(macroWord); + + Lock lock = memoryLock.readLock(); + try { + lock.lock(); + int idx = reverseIndex.get(word); + if (idx >= 0) { + return idx; + } + } + finally { + lock.unlock(); + } + + lock = memoryLock.writeLock(); + try { + lock.lock(); + int idx = reverseIndex.get(word); + if (idx >= 0) { + return idx; + } + + if (!noCommit) { + commitQueue.add(word); + } + + idx = reverseIndex.size(); + + reverseIndex.put(word, idx); + + request_time_metrics.set(reverseIndex.size()); + + return idx; + } + finally { + + lock.unlock(); + } + } + + public int getReadOnly(String word) { + var bytes = readOnlyTokenCompressor.getWordBytes(word); + if (bytes.length == 0) { + return DictionaryHashMap.NO_VALUE; + } + return reverseIndex.get(bytes); + } + + public int size() { + Lock lock = memoryLock.readLock(); + try { + lock.lock(); + return reverseIndex.size(); + } + finally { + lock.unlock(); + } + } + + @Override + public void close() throws Exception { + logger.warn("Closing DictionaryWriter"); + + running = false; + commitToDiskThread.join(); + commitToDisk(); + + raf.close(); + } + +} + diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/dictionary/TokenCompressor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/dictionary/TokenCompressor.java new file mode 100644 index 00000000..9f26fffd --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/dictionary/TokenCompressor.java @@ -0,0 +1,83 @@ +package nu.marginalia.wmsa.edge.index.service.dictionary; + +import nu.marginalia.util.ByteFolder; +import nu.marginalia.util.dict.DictionaryHashMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Arrays; +import java.util.function.Predicate; +import java.util.function.ToIntFunction; +import java.util.regex.Pattern; + +public class TokenCompressor { + private final ToIntFunction mapper; + private final ByteFolder folder = new ByteFolder(); + public static final byte[] EMPTY = new byte[0]; + + private static final Logger logger = LoggerFactory.getLogger(TokenCompressor.class); + + private static final Predicate intPatternMatcher = Pattern.compile("[1-9][0-9]{1,8}").asMatchPredicate(); + + + public TokenCompressor(ToIntFunction mapper) { + this.mapper = mapper; + } + final char[] separators = new char[] { '_', '-', '.', '/' }; + public synchronized byte[] getWordBytes(String macroWord) { + int ui = -1; + + for (char c : separators) { + int ui2 = macroWord.indexOf(c); + if (ui < 0) ui = ui2; + else if (ui2 >= 0) ui = Math.min(ui, ui2); + } + + if (ui <= 0 || ui >= macroWord.length()-1) { + return getByteRepresentation(macroWord); + } + + String car = macroWord.substring(0, ui); + String cdr = macroWord.substring(ui+1); + + int carId = mapper.applyAsInt(car); + int cdrId = mapper.applyAsInt(cdr); + + if (carId == DictionaryHashMap.NO_VALUE || cdrId == DictionaryHashMap.NO_VALUE) { + return EMPTY; + } + + return folder.foldBytes(carId, cdrId); + } + + private byte[] getByteRepresentation(String word) { + if (intPatternMatcher.test(word)) { + long val = Long.parseLong(word); + if (val < 0x100) { + return new byte[] { 'A', (byte) (val & 0xFF)}; + } + else if (val < 0x10000) { + return new byte[] { 'B', (byte)((val & 0xFF00)>>8), (byte) (val & 0xFF)}; + } + else if (val < 0x1000000) { + return new byte[] { 'C', (byte)((val & 0xFF0000)>>12), (byte)((val & 0xFF00)>>8), (byte) (val & 0xFF)}; + } + else if (val < 0x100000000L) { + return new byte[] { 'D', (byte)((val & 0xFF0000)>>16), (byte)((val & 0xFF0000)>>12), (byte)((val & 0xFF00)>>8), (byte) (val & 0xFF)}; + } + } + + var bytes = word.getBytes(); + for (int i = 0; i < bytes.length; i++) { + if (bytes[i] < 32 && (bytes[i] & 0x80) == 0) { + logger.error("Bad byte in {} -> {} ({})", word, bytes[i], (char) bytes[i]); + bytes[i] = '?'; + } + } + if (bytes.length >= Byte.MAX_VALUE) { + return Arrays.copyOf(bytes, Byte.MAX_VALUE); + } + return bytes; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndex.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndex.java new file mode 100644 index 00000000..c25100f4 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndex.java @@ -0,0 +1,115 @@ +package nu.marginalia.wmsa.edge.index.service.index; + +import com.google.inject.Inject; +import com.google.inject.name.Named; +import com.upserve.uppend.blobs.NativeIO; +import nu.marginalia.wmsa.edge.index.service.index.wordstable.IndexWordsTable; +import nu.marginalia.util.btree.BTreeReader; +import nu.marginalia.util.multimap.MultimapFileLong; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.util.stream.LongStream; + +public class SearchIndex implements AutoCloseable { + + private final MultimapFileLong urls; + private final IndexWordsTable words; + private final RandomAccessFile wordsFile; + private final BTreeReader bTreeReader; + private final Logger logger; + + @Inject + public SearchIndex( + String name, + @Named("edge-index-read-urls-file") File inUrls, + @Named("edge-index-read-words-file") File inWords) + throws IOException { + + logger = LoggerFactory.getLogger(name); + wordsFile = new RandomAccessFile(inWords, "r"); + + logger.info("{} : Loading {}", name, inUrls); + logger.info("{} : Loading {}", name, inWords); + + urls = MultimapFileLong.forReading(inUrls.toPath()); + words = IndexWordsTable.ofFile(wordsFile); + + bTreeReader = new BTreeReader(urls, SearchIndexConverter.urlsBTreeContext); + + madvise(urls, bTreeReader); + } + + private void madvise(MultimapFileLong urls, BTreeReader reader) { + + urls.advice(NativeIO.Advice.Sequential); + words.forEachWordsOffset(offset -> { + var h = reader.getHeader(offset); + int length = (int) (h.dataOffsetLongs() - h.indexOffsetLongs()); + + if (length > 0) { + urls.adviceRange(NativeIO.Advice.WillNeed, h.indexOffsetLongs(), length); + urls.pokeRange(h.indexOffsetLongs(), length); + } + }); + } + + + public long numUrls(int wordId) { + int length = words.wordLength(wordId); + if (length < 0) return 0; + if (length > 0) return length; + + var range = rangeForWord(wordId); + if (range.isPresent()) { + return bTreeReader.getHeader(range.dataOffset).numEntries(); + } + return 0; + } + + public UrlIndexTree rangeForWord(int wordId) { + return new UrlIndexTree(words.positionForWord(wordId)); + } + + public boolean hasUrl(long url, UrlIndexTree range) { + if (!range.isPresent()) + return false; + + return bTreeReader.offsetForEntry(bTreeReader.getHeader(range.dataOffset), url) >= 0; + } + + public class UrlIndexTree { + final long dataOffset; + + public UrlIndexTree(long dataOffset) { + this.dataOffset = dataOffset; + } + + public LongStream stream() { + if (dataOffset < 0) { + return LongStream.empty(); + } + var header = bTreeReader.getHeader(dataOffset); + + long urlOffset = header.dataOffsetLongs(); + return LongStream.range(urlOffset, urlOffset + header.numEntries()).map(urls::get); + } + + public boolean isPresent() { + return dataOffset >= 0; + } + } + + + + @Override + public void close() throws Exception { + urls.close(); + words.close(); + + wordsFile.close(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexConverter.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexConverter.java new file mode 100644 index 00000000..95a47a69 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexConverter.java @@ -0,0 +1,383 @@ +package nu.marginalia.wmsa.edge.index.service.index; + +import com.google.inject.Inject; +import com.google.inject.name.Named; +import gnu.trove.set.hash.TIntHashSet; +import lombok.RequiredArgsConstructor; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.index.service.index.wordstable.WordsTableWriter; +import nu.marginalia.wmsa.edge.index.service.query.SearchIndexPartitioner; +import nu.marginalia.util.btree.BTreeWriter; +import nu.marginalia.util.btree.model.BTreeContext; +import nu.marginalia.util.multimap.MultimapFileLong; +import nu.marginalia.util.RandomWriteFunnel; +import nu.marginalia.util.multimap.MultimapSorter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.*; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.concurrent.locks.Lock; + +public class SearchIndexConverter { + private static final long FILE_HEADER_SIZE = 12; + private static final int CHUNK_HEADER_SIZE = 16; + + public static final BTreeContext urlsBTreeContext = new BTreeContext(5, 1, ~0, 8); + + private final long fileLength; + private final long urlsFileSize; + private final FileChannel urlsTmpFileChannel; + private final int wordCount; + private final MultimapFileLong urlsTmpFileMap; + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final IndexBlock block; + private final int bucketId; + @org.jetbrains.annotations.NotNull + private final File urlsFile; + private final SearchIndexPartitioner partitioner; + private final TIntHashSet spamDomains; + private final MultimapSorter urlTmpFileSorter; + + @SneakyThrows + public static long wordCount(File inputFile) { + try (RandomAccessFile raf = new RandomAccessFile(inputFile, "r")) { + raf.readLong(); + return raf.readInt(); + } + } + + @SneakyThrows + @Inject + public SearchIndexConverter(IndexBlock block, + int bucketId, @Named("tmp-file-dir") Path tmpFileDir, + @Named("edge-writer-page-index-file") File inputFile, + @Named("edge-index-write-words-file") File outputFileWords, + @Named("edge-index-write-urls-file") File outputFileUrls, + SearchIndexPartitioner partitioner, + EdgeDomainBlacklist blacklist) + { + this.block = block; + this.bucketId = bucketId; + urlsFile = outputFileUrls; + this.partitioner = partitioner; + this.spamDomains = blacklist.getSpamDomains(); + logger.info("Converting {} ({}) {}", block.id, block, inputFile); + + Files.deleteIfExists(outputFileWords.toPath()); + Files.deleteIfExists(outputFileUrls.toPath()); + + final RandomAccessFile raf = new RandomAccessFile(inputFile, "r"); + + this.fileLength = raf.readLong(); + this.wordCount = raf.readInt(); + + var inputChannel = raf.getChannel(); + + ByteBuffer buffer = ByteBuffer.allocateDirect(10_000); + + urlsFileSize = getUrlsSize(buffer, raf); + + var tmpUrlsFile = Files.createTempFile(tmpFileDir, "urls-sorted", ".dat"); + + var urlsTmpFileRaf = new RandomAccessFile(tmpUrlsFile.toFile(), "rw"); + urlsTmpFileChannel = new RandomAccessFile(tmpUrlsFile.toFile(), "rw").getChannel(); + urlsTmpFileMap = new MultimapFileLong(urlsTmpFileRaf, FileChannel.MapMode.READ_WRITE, urlsFileSize, 8*1024*1024, false); + urlTmpFileSorter = urlsTmpFileMap.createSorter(tmpFileDir, 1024*1024*256); + + logger.info("Creating word index table {} for block {} ({})", outputFileWords, block.id, block); + long[] wordIndexTable = createWordIndexTable(outputFileWords, inputChannel); + + logger.info("Creating word urls table {} for block {} ({})", outputFileUrls, block.id, block); + createUrlTable(tmpFileDir, buffer, raf, wordIndexTable); + + Files.delete(tmpUrlsFile); + raf.close(); + + urlsTmpFileChannel.close(); + urlsTmpFileMap.force(); + + } + + private boolean isUrlAllowed(long url) { + return !spamDomains.contains((int)(url >>> 32)); + } + + public long translateUrl(long url) { + int domainId = partitioner.translateId(bucketId, (int) (url >>> 32)); + return ((long)domainId << 32) | (url & 0xFFFFFFFFL); + } + + + @RequiredArgsConstructor + private class IndexReader { + private final ByteBuffer buffer; + private final FileChannel channel; + public long filtered; + + public void read() throws IOException { + var lock = partitioner.getReadLock(); + try { + lock.lock(); + outer: + while (channel.position() < fileLength) { + buffer.clear(); + buffer.limit(CHUNK_HEADER_SIZE); + channel.read(buffer); + buffer.flip(); + long urlId = buffer.getLong(); + int chunkBlock = buffer.getInt(); + int count = buffer.getInt(); + + if (count > 1000) { + + int tries = 0; + logger.warn("Terminating garbage @{}b, attempting repair", channel.position()); + + for (; ; ) { + tries++; + long p = channel.position(); + buffer.clear(); + buffer.limit(8); + if (channel.read(buffer) != 8) { + break outer; // EOF...? + } + + buffer.flip(); + int pcb = buffer.getInt(); + int pct = buffer.getInt(); + if (pcb == 0 || pcb == 1 && pct >= 0 && pct <= 1000) { + chunkBlock = pcb; + count = pct; + break; + } else { + channel.position(p + 1); + } + } + logger.warn("Skipped {}b", tries); + } + + buffer.clear(); + buffer.limit(count * 4); + + int trb = 0; + while (trb < count * 4) { + int rb = channel.read(buffer); + if (rb <= 0) { + throw new ArrayIndexOutOfBoundsException(trb + " - " + count * 4 + " " + rb); + } + trb += rb; + } + + buffer.flip(); + + if (isUrlAllowed(urlId)) { + if (block.id == chunkBlock) { + eachUrl(lock, count, urlId); + } + } else { + filtered++; + } + } + } + finally { + lock.unlock(); + } + } + + public void eachUrl(Lock lock, int count, long urlId) throws IOException { + for (int i = 0; i < count; i++) { + int wordId = buffer.getInt(); + if (acceptWord(lock, urlId, wordId, i, block.id)) { + eachWord(urlId, wordId); + } + } + } + public void eachWord(long urlId, int wordId) throws IOException { + + } + } + + private long getUrlsSize(ByteBuffer buffer, RandomAccessFile raf) throws IOException { + raf.seek(FILE_HEADER_SIZE); + + var channel = raf.getChannel(); + + var reader = new IndexReader(buffer, channel) { + public long size; + + @Override + public void eachWord(long urlId, int wordId) { + size++; + } + }; + + reader.read(); + + logger.info("Blacklist filtered {} URLs", reader.filtered); + logger.debug("URLs Size {} Mb", channel.position()/(1024*1024)); + + return reader.size; + } + + private void createUrlTable(Path tmpFileDir, ByteBuffer buffer, RandomAccessFile raf, long[] wordIndexTable) throws IOException { + logger.debug("Table size = {}", wordIndexTable.length); + int[] wordIndex = new int[wordIndexTable.length]; + raf.seek(FILE_HEADER_SIZE); + + var channel = raf.getChannel(); + + try (RandomWriteFunnel rwf = new RandomWriteFunnel(tmpFileDir, urlsFileSize, 10_000_000)) { + var reader = new IndexReader(buffer, channel) { + @Override + public void eachWord(long urlId, int wordId) throws IOException { + if (wordId >= wordIndex.length) + return; + + if (wordId != 0) { + if (!(wordIndexTable[wordId - 1] + wordIndex[wordId] <= wordIndexTable[wordId])) { + logger.error("Crazy state: wordId={}, index={}, lower={}, upper={}", + wordId, + wordIndex[wordId], + wordIndexTable[wordId - 1], + wordIndexTable[wordId]); + throw new IllegalStateException(); + } + } + if (wordId > 0) { + rwf.put(wordIndexTable[wordId - 1] + wordIndex[wordId]++, translateUrl(urlId)); + } else { + rwf.put(wordIndex[wordId]++, translateUrl(urlId)); + } + } + }; + + reader.read(); + + rwf.write(urlsTmpFileChannel); + } + + urlsTmpFileChannel.force(false); + + logger.debug("URL TMP Table: {} Mb", channel.position()/(1024*1024)); + + if (wordIndexTable.length > 0) { + logger.debug("Sorting urls table"); + sortUrls(wordIndexTable); + urlsTmpFileMap.force(); + } + else { + logger.warn("urls table empty -- nothing to sort"); + } + + + long idx = 0; + + var copyBuffer = ByteBuffer.allocateDirect(4096); + try (var urlsFileMap = MultimapFileLong.forOutput(urlsFile.toPath(), 1024)) { + var writer = new BTreeWriter(urlsFileMap, urlsBTreeContext); + + if (wordIndexTable[0] != 0) { + int start = 0; + int end = (int) wordIndexTable[0]; + + idx += writer.write(idx, (int) wordIndexTable[0], + offset -> urlsFileMap.transferFromFileChannel(urlsTmpFileChannel, offset, start, end)); + } + + for (int i = 1; i < wordIndexTable.length; i++) { + if (wordIndexTable[i] != wordIndexTable[i - 1]) { + long start = wordIndexTable[i-1]; + long end = wordIndexTable[i]; + + idx += writer.write(idx, (int) (end-start), + offset -> urlsFileMap.transferFromFileChannel(urlsTmpFileChannel, offset, start, end)); + } + } + } catch (Exception e) { + e.printStackTrace(); + } + + logger.warn("BTrees generated"); + } + + public void transfer(ByteBuffer buffer, MultimapFileLong dest, FileChannel sourceChannel, long destOffset, long sourceStart, long sourceEnd) throws IOException { + int tbw = 0; + + buffer.limit(Math.min(buffer.capacity(), (int)(sourceEnd - sourceStart)*8)); + while (sourceEnd - sourceStart - tbw > buffer.limit()/8) { + int bw = 0; + while (buffer.position() < buffer.limit()) { + int r = sourceChannel.read(buffer, sourceStart*8 + bw); + if (r < 0) { + throw new IOException(""); + } + bw += r; + } + buffer.flip(); + dest.write(buffer.asLongBuffer(), destOffset + tbw); + tbw += bw/8; + buffer.clear(); + buffer.limit(Math.min(buffer.capacity(), (int)(sourceEnd*8 - sourceStart*8 - tbw))); + } + buffer.clear(); + buffer.limit((int)(sourceEnd - (sourceStart + tbw))*8); + int bw = 0; + while (bw < buffer.limit()) { + bw += sourceChannel.read(buffer, sourceStart + bw); + } + buffer.flip(); + dest.write(buffer.asLongBuffer(), destOffset + tbw); + } + + @SneakyThrows + private void sortUrls(long[] wordIndices) { + urlTmpFileSorter.sort( 0, (int) wordIndices[0]); + + for (int i = 1; i < wordIndices.length; i++) { + urlTmpFileSorter.sort(wordIndices[i-1], (int) (wordIndices[i] - wordIndices[i-1])); + } + } + + private long[] createWordIndexTable(File outputFileWords, FileChannel inputChannel) throws Exception { + inputChannel.position(FILE_HEADER_SIZE); + + logger.debug("Table size = {}", wordCount); + WordsTableWriter wordsTableWriter = new WordsTableWriter(wordCount); + ByteBuffer buffer = ByteBuffer.allocateDirect(8*SearchIndexWriterImpl.MAX_BLOCK_SIZE); + + logger.debug("Reading words"); + + var reader = new IndexReader(buffer, inputChannel) { + @Override + public void eachWord(long urlId, int wordId) { + wordsTableWriter.acceptWord(wordId); + } + }; + reader.read(); + + logger.debug("Rearranging table"); + + inputChannel.position(FILE_HEADER_SIZE); + + wordsTableWriter.write(outputFileWords); + + return wordsTableWriter.getTable(); + } + + boolean acceptWord(Lock lock, long urlId, int wordId, int wordIdx, int block) { + int domainId = (int) (urlId >>> 32L); + + if (!partitioner.filterUnsafe(lock, domainId, bucketId)) { + return false; + } + + return true; + } +} + diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexPreconverter.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexPreconverter.java new file mode 100644 index 00000000..02197e3d --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexPreconverter.java @@ -0,0 +1,135 @@ +package nu.marginalia.wmsa.edge.index.service.index; + +import com.google.inject.Inject; +import gnu.trove.set.hash.TIntHashSet; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist; +import nu.marginalia.wmsa.edge.index.service.query.SearchIndexPartitioner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.util.Objects; + +public class SearchIndexPreconverter { + private static final int CHUNK_HEADER_SIZE = 16; + + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final SearchIndexPartitioner partitioner; + private final TIntHashSet spamDomains; + + @SneakyThrows + public static long wordCount(File inputFile) { + try (RandomAccessFile raf = new RandomAccessFile(inputFile, "r")) { + raf.readLong(); + return raf.readInt(); + } + } + + @SneakyThrows + @Inject + public SearchIndexPreconverter(File inputFile, + File[] outputFiles, + SearchIndexPartitioner partitioner, + EdgeDomainBlacklist blacklist) + { + this.partitioner = partitioner; + this.spamDomains = blacklist.getSpamDomains(); + logger.info("Preconverting {}", inputFile); + + for (File f : outputFiles) { + if (f.exists()) { + Files.deleteIfExists(Objects.requireNonNull(f).toPath()); + } + } + + final RandomAccessFile raf = new RandomAccessFile(inputFile, "r"); + + var fileLength = raf.readLong(); + var wordCount = raf.readInt(); + final int wordCountOriginal = wordCount; + + logger.info("Word Count: {}", wordCount); + logger.info("File Length: {}", fileLength); + + var channel = raf.getChannel(); + + ByteBuffer inByteBuffer = ByteBuffer.allocateDirect(10_000); + + RandomAccessFile[] randomAccessFiles = new RandomAccessFile[outputFiles.length]; + for (int i = 0; i < randomAccessFiles.length; i++) { + randomAccessFiles[i] = new RandomAccessFile(outputFiles[i], "rw"); + randomAccessFiles[i].seek(12); + } + FileChannel[] fileChannels = new FileChannel[outputFiles.length]; + for (int i = 0; i < fileChannels.length; i++) { + fileChannels[i] = randomAccessFiles[i].getChannel(); + } + + + var lock = partitioner.getReadLock(); + try { + lock.lock(); + + while (channel.position() < fileLength) { + inByteBuffer.clear(); + inByteBuffer.limit(CHUNK_HEADER_SIZE); + channel.read(inByteBuffer); + inByteBuffer.flip(); + long urlId = inByteBuffer.getLong(); + int chunkBlock = inByteBuffer.getInt(); + int count = inByteBuffer.getInt(); + // inByteBuffer.clear(); + inByteBuffer.limit(count * 4 + CHUNK_HEADER_SIZE); + channel.read(inByteBuffer); + inByteBuffer.position(CHUNK_HEADER_SIZE); + + for (int i = 0; i < count; i++) { + wordCount = Math.max(wordCount, 1 + inByteBuffer.getInt()); + } + + inByteBuffer.position(count * 4 + CHUNK_HEADER_SIZE); + + + if (isUrlAllowed(urlId)) { + for (int i = 0; i < randomAccessFiles.length; i++) { + if (partitioner.filterUnsafe(lock, (int) (urlId >>> 32L), i)) { + inByteBuffer.flip(); + fileChannels[i].write(inByteBuffer); + } + } + } + } + } + finally { + lock.unlock(); + } + + if (wordCountOriginal < wordCount) { + logger.warn("Raised word count {} => {}", wordCountOriginal, wordCount); + } + + for (int i = 0; i < randomAccessFiles.length; i++) { + long pos = randomAccessFiles[i].getFilePointer(); + randomAccessFiles[i].seek(0); + randomAccessFiles[i].writeLong(pos); + randomAccessFiles[i].writeInt(wordCount); + fileChannels[i].force(true); + fileChannels[i].close(); + randomAccessFiles[i].close(); + } + }; + + private boolean isUrlAllowed(long url) { + int urlId = (int)(url & 0xFFFF_FFFFL); + int domainId = (int)(url >>> 32); + + return partitioner.isGoodUrl(urlId) && !spamDomains.contains(domainId); + } + +} + diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexReader.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexReader.java new file mode 100644 index 00000000..df269034 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexReader.java @@ -0,0 +1,134 @@ +package nu.marginalia.wmsa.edge.index.service.index; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.inject.Inject; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.index.service.query.IndexQueryBuilder; +import nu.marginalia.wmsa.edge.index.service.query.IndexSearchBudget; +import nu.marginalia.wmsa.edge.index.service.query.Query; +import org.apache.commons.lang3.tuple.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.EnumMap; +import java.util.function.LongPredicate; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class SearchIndexReader implements AutoCloseable { + + private final EnumMap indices; + + private final EnumMap queryBuilders; + private final EnumMap underspecifiedQueryBuilders; + + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final Cache, Long> numHitsCache = CacheBuilder.newBuilder().maximumSize(1000).build(); + + private static final IndexBlock[] indicesBySearchOrder = new IndexBlock[] { + IndexBlock.Top, + IndexBlock.Middle, + IndexBlock.Low, + IndexBlock.Words, + IndexBlock.NamesWords, + }; + + @Inject + public SearchIndexReader( + EnumMap indices) { + this.indices = indices; + + var lowIndex = indices.get(IndexBlock.Low); + var midIndex = indices.get(IndexBlock.Middle); + var topIndex = indices.get(IndexBlock.Top); + var linkIndex = indices.get(IndexBlock.Link); + var titleIndex = indices.get(IndexBlock.Title); + var namesIndex = indices.get(IndexBlock.NamesWords); + var positionIndex = indices.get(IndexBlock.PositionWords); + var titleKeywordsIndex = indices.get(IndexBlock.TitleKeywords); + var wordsIndex = indices.get(IndexBlock.Words); + var metaIndex = indices.get(IndexBlock.Meta); + var topicIndex = indices.get(IndexBlock.Topic); + + queryBuilders = new EnumMap<>(IndexBlock.class); + underspecifiedQueryBuilders = new EnumMap<>(IndexBlock.class); + + queryBuilders.put(IndexBlock.Words, new IndexQueryBuilder(Stream.of(metaIndex, titleKeywordsIndex, topicIndex, titleIndex, topIndex, midIndex, lowIndex, namesIndex, wordsIndex).collect(Collectors.toList()), wordsIndex)); + queryBuilders.put(IndexBlock.Low, new IndexQueryBuilder(Stream.of(metaIndex, titleKeywordsIndex, topicIndex, titleIndex, topIndex, midIndex, lowIndex, namesIndex).collect(Collectors.toList()), wordsIndex)); + queryBuilders.put(IndexBlock.Middle, new IndexQueryBuilder(Stream.of(metaIndex, titleKeywordsIndex, topicIndex, titleIndex, topIndex, midIndex).collect(Collectors.toList()), wordsIndex)); + queryBuilders.put(IndexBlock.Top, new IndexQueryBuilder(Stream.of(metaIndex, titleKeywordsIndex, topicIndex, titleIndex, topIndex).collect(Collectors.toList()), wordsIndex)); + queryBuilders.put(IndexBlock.PositionWords, new IndexQueryBuilder(Stream.of(metaIndex, titleKeywordsIndex, topicIndex, titleIndex, namesIndex, positionIndex).collect(Collectors.toList()), wordsIndex)); + queryBuilders.put(IndexBlock.NamesWords, new IndexQueryBuilder(Stream.of(metaIndex, titleKeywordsIndex, topicIndex, titleIndex, namesIndex).collect(Collectors.toList()), wordsIndex)); + queryBuilders.put(IndexBlock.Link, new IndexQueryBuilder(Stream.of(metaIndex, titleKeywordsIndex, topicIndex, titleIndex, linkIndex).collect(Collectors.toList()), wordsIndex)); + queryBuilders.put(IndexBlock.Title, new IndexQueryBuilder(Stream.of(metaIndex, titleKeywordsIndex, topicIndex, titleIndex).collect(Collectors.toList()), wordsIndex)); + queryBuilders.put(IndexBlock.TitleKeywords, new IndexQueryBuilder(Stream.of(metaIndex, titleKeywordsIndex).collect(Collectors.toList()), wordsIndex)); + + underspecifiedQueryBuilders.put(IndexBlock.TitleKeywords, new IndexQueryBuilder(Stream.of(titleKeywordsIndex, linkIndex, topicIndex, topIndex, midIndex, lowIndex, namesIndex, positionIndex, metaIndex).collect(Collectors.toList()), wordsIndex)); + underspecifiedQueryBuilders.put(IndexBlock.Link, new IndexQueryBuilder(Stream.of(linkIndex, topicIndex, topIndex, midIndex, lowIndex, namesIndex, positionIndex, metaIndex).collect(Collectors.toList()), wordsIndex)); + } + + public Query findUnderspecified( + IndexBlock block, + IndexSearchBudget budget, + LongPredicate filter, + int wordId) { + var builder = underspecifiedQueryBuilders.get(block); + if (null != builder) { + return builder.buildUnderspecified(budget, filter, wordId); + } + return findWord(block, budget, filter, wordId); + } + + public Query findWord(IndexBlock block, IndexSearchBudget budget, LongPredicate filter, int wordId) { + return queryBuilders.get(block).build(budget, filter, wordId); + } + + @Override + public void close() throws Exception { + for (var idx : indices.values()) { + idx.close(); + } + numHitsCache.invalidateAll(); + numHitsCache.cleanUp(); + } + + @SneakyThrows + public long numHits(IndexBlock block, int word) { + return numHitsCache.get(Pair.of(block, word), + () -> queryBuilders.get(block) + .getIndicies() + .stream() + .mapToLong(idx -> idx.numUrls(word)) + .sum() + ); + + } + + + public IndexBlock getBlockForResult(int searchTerm, long urlId) { + for (var block : indicesBySearchOrder) { + var index = indices.get(block); + + if (null == index) { + continue; + } + + var range = index.rangeForWord(searchTerm); + if (index.hasUrl(urlId, range)) { + return block; + } + } + return IndexBlock.Words; + } + + public boolean isTermInBucket(IndexBlock block, int searchTerm, long urlId) { + final var index = indices.get(block); + if (null == index) return false; + + final var range = index.rangeForWord(searchTerm); + + return index.hasUrl(urlId, range); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexScrubberMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexScrubberMain.java new file mode 100644 index 00000000..7225dbb5 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexScrubberMain.java @@ -0,0 +1,79 @@ +package nu.marginalia.wmsa.edge.index.service.index; + +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Path; + +public class SearchIndexScrubberMain { + public static final Logger logger = LoggerFactory.getLogger(SearchIndexScrubberMain.class); + private static final int CHUNK_HEADER_SIZE = 16; + + public static void main(String... args) throws IOException { + var inputFile = Path.of(args[0]).toFile(); + var outputFile = Path.of(args[1]).toFile(); + + logger.info("Scrubbing {}", inputFile); + + final RandomAccessFile raf = new RandomAccessFile(inputFile, "r"); + + var fileLength = raf.readLong(); + var wordCount = raf.readInt(); + + logger.info("Word Count: {}", wordCount); + logger.info("File Length: {}", fileLength); + + var channel = raf.getChannel(); + + ByteBuffer inByteBuffer = ByteBuffer.allocateDirect(10_000); + + RandomAccessFile[] randomAccessFiles = new RandomAccessFile[1]; + + for (int i = 0; i < randomAccessFiles.length; i++) { + randomAccessFiles[i] = new RandomAccessFile(outputFile, "rw"); + randomAccessFiles[i].seek(12); + } + FileChannel[] fileChannels = new FileChannel[1]; + for (int i = 0; i < fileChannels.length; i++) { + fileChannels[i] = randomAccessFiles[i].getChannel(); + } + + while (channel.position() < fileLength) { + inByteBuffer.clear(); + inByteBuffer.limit(CHUNK_HEADER_SIZE); + channel.read(inByteBuffer); + inByteBuffer.flip(); + long urlId = inByteBuffer.getLong(); + int chunkBlock = inByteBuffer.getInt(); + int count = inByteBuffer.getInt(); + inByteBuffer.clear(); + inByteBuffer.limit(count*4+CHUNK_HEADER_SIZE); + inByteBuffer.putLong(urlId); + inByteBuffer.putInt(chunkBlock); + inByteBuffer.putInt(count); + channel.read(inByteBuffer); + + + if (chunkBlock == IndexBlock.Link.id) { + for (int i = 0; i < randomAccessFiles.length; i++) { + inByteBuffer.flip(); + fileChannels[i].write(inByteBuffer); + } + } + + } + + long size = randomAccessFiles[0].getFilePointer(); + + randomAccessFiles[0].seek(0); + randomAccessFiles[0].writeLong(size); + randomAccessFiles[0].writeInt(wordCount); + + randomAccessFiles[0].close(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexWriter.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexWriter.java new file mode 100644 index 00000000..ca5d70b3 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexWriter.java @@ -0,0 +1,16 @@ +package nu.marginalia.wmsa.edge.index.service.index; + +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeId; +import nu.marginalia.wmsa.edge.model.EdgeUrl; + +import java.util.List; + +public interface SearchIndexWriter { + void put(EdgeId domainId, EdgeId urlId, IndexBlock block, List words); + void forceWrite(); + + void flushWords(); + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexWriterImpl.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexWriterImpl.java new file mode 100644 index 00000000..2f482815 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/SearchIndexWriterImpl.java @@ -0,0 +1,121 @@ +package nu.marginalia.wmsa.edge.index.service.index; + +import io.reactivex.rxjava3.disposables.Disposable; +import io.reactivex.rxjava3.schedulers.Schedulers; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.index.service.dictionary.DictionaryWriter; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeId; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.EOFException; +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.util.List; +import java.util.concurrent.TimeUnit; + +public class SearchIndexWriterImpl implements SearchIndexWriter { + private final DictionaryWriter dictionaryWriter; + + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final Disposable writerTask; + private RandomAccessFile raf; + private FileChannel channel; + + public static final int MAX_BLOCK_SIZE = 1000*32*8*4; + private final ByteBuffer byteBuffer; + private long pos; + + @SneakyThrows + public SearchIndexWriterImpl(DictionaryWriter dictionaryWriter, File indexFile) { + this.dictionaryWriter = dictionaryWriter; + initializeIndexFile(indexFile); + + byteBuffer = ByteBuffer.allocate(MAX_BLOCK_SIZE); + + writerTask = Schedulers.io().schedulePeriodicallyDirect(this::forceWrite, 1, 1, TimeUnit.SECONDS); + Runtime.getRuntime().addShutdownHook(new Thread(this::forceWrite)); + } + + private void initializeIndexFile(File indexFile) throws IOException { + raf = new RandomAccessFile(indexFile, "rw"); + channel = raf.getChannel(); + + try { + pos = raf.readLong(); + raf.seek(pos); + logger.info("Resuming index file of size {}", pos); + } + catch (EOFException ex) { + logger.info("Clean index file"); + writePositionMarker(); + writePositionMarker(); + } + } + + @Override + @SneakyThrows + public synchronized void put(EdgeId domainId, EdgeId urlId, IndexBlock block, List wordsSuspect) { + int numGoodWords = 0; + for (String word : wordsSuspect) { + if (word.length() < Byte.MAX_VALUE) numGoodWords++; + } + + byteBuffer.clear(); + long url_id = ((long) domainId.getId() << 32) | urlId.getId(); + byteBuffer.putLong(url_id); + byteBuffer.putInt(block.id); + byteBuffer.putInt(numGoodWords); + + for (String word : wordsSuspect) { + if (word.length() < Byte.MAX_VALUE) { + byteBuffer.putInt(dictionaryWriter.get(word)); + } + } + byteBuffer.limit(byteBuffer.position()); + byteBuffer.rewind(); + + while (byteBuffer.position() < byteBuffer.limit()) + channel.write(byteBuffer); + + writePositionMarker(); + } + + @Override + public synchronized void forceWrite() { + try { + channel.force(false); + } + catch (IOException ex) { + logger.error("IO Exception", ex); + } + } + + + @Override + public void flushWords() { + dictionaryWriter.commitToDisk(); + } + + private void writePositionMarker() throws IOException { + var lock = channel.lock(0, 12, false); + pos = channel.size(); + raf.seek(0); + raf.writeLong(pos); + raf.writeInt(dictionaryWriter.size()); + raf.seek(pos); + lock.release(); + } + + public synchronized void close() throws IOException { + writerTask.dispose(); + channel.close(); + raf.close(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/wordstable/BtreeWordsTable.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/wordstable/BtreeWordsTable.java new file mode 100644 index 00000000..0a6a70c0 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/wordstable/BtreeWordsTable.java @@ -0,0 +1,88 @@ +package nu.marginalia.wmsa.edge.index.service.index.wordstable; + +import com.upserve.uppend.blobs.NativeIO; +import nu.marginalia.util.btree.BTreeReader; +import nu.marginalia.util.btree.model.BTreeHeader; +import nu.marginalia.util.multimap.MultimapFileLong; + +import java.util.function.LongConsumer; + +import static nu.marginalia.wmsa.edge.index.service.index.wordstable.WordsTableWriter.wordsBTreeContext; + +public class BtreeWordsTable extends IndexWordsTable{ + private final MultimapFileLong words; + private final BTreeReader reader; + private final BTreeHeader header; + private final int HEADER_OFFSET = 1; + + public BtreeWordsTable(MultimapFileLong words) { + this.words = words; + + + reader = new BTreeReader(words, wordsBTreeContext); + header = reader.getHeader(HEADER_OFFSET); + + madvise(); + } + + private void madvise() { + words.advice(NativeIO.Advice.Random); + words.advice0(NativeIO.Advice.WillNeed); + + var h = reader.getHeader(HEADER_OFFSET); + int length = (int)(h.dataOffsetLongs() - h.indexOffsetLongs()); + words.adviceRange(NativeIO.Advice.WillNeed, h.indexOffsetLongs(), length); + words.pokeRange(h.indexOffsetLongs(), length); + } + + public void forEachWordsOffset(LongConsumer offsetConsumer) { + int n = header.numEntries(); + long offset = header.dataOffsetLongs(); + + for (int i = 0; i < n; i++) { + try { + long posOffset = 2*(offset + i); + if (posOffset * 8 >= words.size()) { + break; + } + + long sz = words.get(posOffset); + if ((sz>> 32) > 0) { + offsetConsumer.accept(words.get(posOffset+1)); + } + } + catch (RuntimeException ex) { + logger.warn("Error @ " + i, ex); + break; + } + } + } + + @Override + public long positionForWord(int wordId) { + + long offset = reader.offsetForEntry(header, wordId); + if (offset < 0) { + return -1L; + } + + return words.get(offset+1); + } + + @Override + public int wordLength(int wordId) { + + long offset = reader.offsetForEntry(header, wordId); + if (offset < 0) { + return -1; + } + + return (int)(words.get(offset) >> 32); + } + + @Override + public void close() throws Exception { + words.close(); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/wordstable/IndexWordsTable.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/wordstable/IndexWordsTable.java new file mode 100644 index 00000000..5b557db1 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/wordstable/IndexWordsTable.java @@ -0,0 +1,48 @@ +package nu.marginalia.wmsa.edge.index.service.index.wordstable; + +import nu.marginalia.util.multimap.MultimapFileLong; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.channels.FileChannel; +import java.util.function.LongConsumer; + +public abstract class IndexWordsTable implements AutoCloseable { + final Logger logger = LoggerFactory.getLogger(getClass()); + + private static final int BUFFER_SIZE = 1024*1024*64; + + public static IndexWordsTable ofFile(RandomAccessFile file) throws IOException { + var wordsFile = openWordsFile(file); + long signature = wordsFile.get(0); + + if (signature == Strategy.BTREE.ordinal()) { + return new BtreeWordsTable(wordsFile); + } + throw new IllegalArgumentException("Unknown signature " + signature); + } + + private static MultimapFileLong openWordsFile(RandomAccessFile wordsFile) throws IOException { + return new MultimapFileLong(wordsFile, + FileChannel.MapMode.READ_ONLY, wordsFile.length(), BUFFER_SIZE, false); + } + + public abstract long positionForWord(int wordId); + + public abstract int wordLength(int wordId); + public abstract void forEachWordsOffset(LongConsumer offsetConsumer); + + @Override + public void close() throws Exception { + + } + + public record TableWordRange(long start, long end) {} + + public enum Strategy { + FLAT, HASH, BTREE_OLD, BTREE + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/wordstable/WordsTableWriter.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/wordstable/WordsTableWriter.java new file mode 100644 index 00000000..3097dd47 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/index/wordstable/WordsTableWriter.java @@ -0,0 +1,85 @@ +package nu.marginalia.wmsa.edge.index.service.index.wordstable; + +import nu.marginalia.util.btree.BTreeWriter; +import nu.marginalia.util.btree.model.BTreeContext; +import nu.marginalia.util.multimap.MultimapFileLong; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.*; + +import static nu.marginalia.wmsa.edge.index.service.index.SearchIndexConverter.urlsBTreeContext; + +public class WordsTableWriter { + private final long[] table; + private final Logger logger = LoggerFactory.getLogger(getClass()); + + public static final BTreeContext wordsBTreeContext = new BTreeContext(7, 2, 0x0000_0000_FFFF_FFFFL, 8); + + public WordsTableWriter(int length) { + table = new long[length]; + } + + public void acceptWord(int wordId) { + if (wordId >= table.length) { + logger.warn("Invalid word-id {}", wordId); + } + else { + table[wordId]++; + } + } + + public long[] getTable() { + return table; + } + public void write(File file) throws Exception { + + int tableSize = 0; + + if (table[0] != 0) tableSize = 1; + + for (int i = 1; i < table.length; i++) { + if (table[i] != 0) { + tableSize++; + } + table[i] += table[i-1]; + } + + logger.info("Writing table {} words {} max", tableSize, table.length); + + writeBtreeWordsFile(file, table, tableSize); + + } + + private void writeBtreeWordsFile(File outputFileWords, long[] table, int tableSize) throws Exception { + try (var mmf = MultimapFileLong.forOutput(outputFileWords.toPath(), tableSize/8L)) { + mmf.put(0, IndexWordsTable.Strategy.BTREE.ordinal()); + long offset = 1; + + var writer = new BTreeWriter(mmf, wordsBTreeContext); + + writer.write(offset, tableSize, (idx) -> { + long urlFileOffset = 0; + + if (table[0] != 0) { + int length = (int) table[0]; + mmf.put(idx++, (long)length<<32); + mmf.put(idx++, 0); + + urlFileOffset += (urlsBTreeContext.calculateSize(length)); + } + + for (int i = 1; i < table.length; i++) { + if (table[i] != table[i - 1]) { + int length = (int)(table[i] - table[i-1]); + mmf.put(idx++, (long)length << 32 | i); + mmf.put(idx++, urlFileOffset); + + urlFileOffset += (urlsBTreeContext.calculateSize(length)); + } + } + }); + } + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/query/IndexQueryBuilder.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/query/IndexQueryBuilder.java new file mode 100644 index 00000000..de3f1435 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/query/IndexQueryBuilder.java @@ -0,0 +1,128 @@ +package nu.marginalia.wmsa.edge.index.service.query; + +import com.google.common.collect.Streams; +import nu.marginalia.wmsa.edge.index.service.index.SearchIndex; + +import java.util.Collection; +import java.util.List; +import java.util.Objects; +import java.util.function.Function; +import java.util.function.LongPredicate; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.LongStream; + +public class IndexQueryBuilder { + private final List requiredIndices; + private final SearchIndex excludeIndex; + + public Collection getIndicies() { + return requiredIndices; + } + + public IndexQueryBuilder(List requiredIndices, SearchIndex excludeIndex) { + this.requiredIndices = requiredIndices.stream().filter(Objects::nonNull).collect(Collectors.toList()); + this.excludeIndex = excludeIndex; + } + + public Query build(IndexSearchBudget budget, + LongPredicate filter, + int wordId) { + return new QueryForIndices(budget, filter, wordId); + } + + public Query buildUnderspecified(IndexSearchBudget budget, LongPredicate filter, int wordId) { + if (requiredIndices.size() == 1) { + return build(budget, filter, wordId); + } + + var ranges = requiredIndices.stream().map(idx -> idx.rangeForWord(wordId)).toArray(SearchIndex.UrlIndexTree[]::new); + var relevantIndices = IntStream.range(0, requiredIndices.size()).filter(i -> ranges[i].isPresent()).toArray(); + + if (relevantIndices.length == 0) { + return new QueryForIndices(budget, LongStream::empty); + } + else if (relevantIndices.length == 1 || relevantIndices[0] != 0) { + return build(budget, filter, wordId); + } + + var fstRange = requiredIndices.get(relevantIndices[0]).rangeForWord(wordId); + + return new QueryForIndices(budget, () -> + Streams.concat(IntStream.range(1, relevantIndices.length) + .mapToObj(i -> underspecifiedPairStream(budget, (int) budget.limit()/(relevantIndices.length*2), relevantIndices[0], relevantIndices[i], wordId)) + .flatMapToLong(Function.identity()), + fstRange.stream().takeWhile(budget::take)) + .filter(filter) + ); + } + + private LongStream underspecifiedPairStream(IndexSearchBudget budget, int limit, int firstIdx, int otherIdx, int wordId) { + SearchIndex first = requiredIndices.get(firstIdx), + second = requiredIndices.get(otherIdx); + + if (first.numUrls(wordId) > second.numUrls(wordId)) { + SearchIndex tmp = first; + first = second; + second = tmp; + } + + SearchIndex fst = first; + SearchIndex snd = second; + + var sndRange = snd.rangeForWord(wordId); + + return fst.rangeForWord(wordId).stream().takeWhile(budget::take).limit(limit).filter( + url -> snd.hasUrl(url, sndRange) + ); + } + + + + private class QueryForIndices implements Query { + private final Supplier supp; + private final IndexSearchBudget budget; + + private QueryForIndices(IndexSearchBudget budget, LongPredicate filter, int wordId) { + this.budget = budget; + supp = () -> + requiredIndices.stream().flatMapToLong(idx -> { + var range = idx.rangeForWord(wordId); + return range.stream().takeWhile(budget::take); + }) + .filter(filter); + } + + private QueryForIndices(IndexSearchBudget budget, Supplier supp) { + this.budget = budget; + this.supp = supp; + } + + @Override + public Query also(int wordId) { + return new QueryForIndices(budget, + () -> requiredIndices.stream().flatMapToLong(idx -> alsoStream(idx, wordId))); + } + + @Override + public Query not(int wordId) { + return new QueryForIndices(budget, () -> notStream(wordId)); + } + + private LongStream alsoStream(SearchIndex idx, int wordId) { + var range = idx.rangeForWord(wordId); + + return stream().filter(url -> idx.hasUrl(url, range)).takeWhile(budget::take); + } + + private LongStream notStream(int wordId) { + var bodyRange = excludeIndex.rangeForWord(wordId); + return stream().filter(url -> !excludeIndex.hasUrl(url, bodyRange)).takeWhile(budget::take); + } + + public LongStream stream() { + return supp.get(); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/query/IndexSearchBudget.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/query/IndexSearchBudget.java new file mode 100644 index 00000000..96940b07 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/query/IndexSearchBudget.java @@ -0,0 +1,21 @@ +package nu.marginalia.wmsa.edge.index.service.query; + +import lombok.AllArgsConstructor; +import lombok.RequiredArgsConstructor; + +import java.util.concurrent.atomic.AtomicInteger; + +@RequiredArgsConstructor +public class IndexSearchBudget { + private final long limit; + private long used = 0; + + public boolean take(long unused) { + return used++ < limit; + } + + public long used() { + return used; + } + public long limit() { return limit; } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/query/Query.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/query/Query.java new file mode 100644 index 00000000..09f7701b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/query/Query.java @@ -0,0 +1,10 @@ +package nu.marginalia.wmsa.edge.index.service.query; + +import java.util.stream.LongStream; + +public interface Query { + Query also(int wordId); + Query not(int wordId); + + LongStream stream(); +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/query/SearchIndexPartitioner.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/query/SearchIndexPartitioner.java new file mode 100644 index 00000000..b8c93f24 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/query/SearchIndexPartitioner.java @@ -0,0 +1,168 @@ +package nu.marginalia.wmsa.edge.index.service.query; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import gnu.trove.set.hash.TIntHashSet; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.index.service.SearchEngineRanking; +import nu.marginalia.wmsa.edge.index.service.SearchIndexDao; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +import static nu.marginalia.wmsa.edge.index.EdgeIndexService.DYNAMIC_BUCKET_LENGTH; + +@Singleton +public class SearchIndexPartitioner { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final PartitionSet partitionSet; + + private SearchEngineRanking retroRanking = null; + private SearchEngineRanking smallWebRanking = null; + private SearchEngineRanking prWebRanking = null; + private SearchEngineRanking specialDomainRanking = null; + private SearchEngineRanking academiaRanking = null; + + private volatile TIntHashSet goodUrls; + + private final SearchIndexDao dao; + private final ReadWriteLock rwl = new ReentrantReadWriteLock(); + + @Inject + public SearchIndexPartitioner(SearchIndexDao dao) { + this.dao = dao; + + if (null == dao) { + partitionSet = this::yesFilter; + } + else { + partitionSet = this::byPartitionTable; + } + } + + public boolean isBusy() { + var readLock = rwl.readLock(); + try { + return !readLock.tryLock(); + } + finally { + readLock.unlock(); + } + } + + public void reloadPartitions() { + if (dao == null) { + logger.info("No dao = no partition table"); + return; + } + + logger.info("Fetching URLs"); + + if (goodUrls != null) { + goodUrls.clear(); + } + goodUrls = dao.goodUrls(); + + logger.info("Fetching domains"); + + var retroDomains = dao.getDomainsByRealPageRank(); + var smallWebDomains = dao.getSmallWebDomains(); + var academiaDomains = dao.getAcademiaDomains(); + var prWebDomains = dao.getDomainsByStandardPageRank(); + var specialDomains = dao.getSpecialDomains(); + + logger.info("Got {} retro domains", retroDomains.size()); + logger.info("Got {} small domains", smallWebDomains.size()); + logger.info("Got {} academia domains", academiaDomains.size()); + logger.info("Got {} corpo domains", prWebDomains.size()); + logger.info("Got {} special domains", specialDomains.size()); + + var lock = rwl.writeLock(); + try { + lock.lock(); + retroRanking = new SearchEngineRanking(0, retroDomains, 0.2, 1); + smallWebRanking = new SearchEngineRanking(2, smallWebDomains, 0.15); + academiaRanking = new SearchEngineRanking(3, academiaDomains, 1); + prWebRanking = new SearchEngineRanking(4, prWebDomains, 0.2, 1); + specialDomainRanking = new SearchEngineRanking(6, specialDomains, 1); + logger.info("Finished building partitions table"); + } + finally { + lock.unlock(); + } + } + + public boolean isGoodUrl(int urlId) { + if (goodUrls == null) + return true; + return goodUrls.contains(urlId); + } + + private boolean yesFilter(int domainId, int bucketId) { + return true; + } + private boolean byPartitionTable(int domainId, int bucketId) { + if (retroRanking.hasBucket(bucketId, domainId)) + return true; + if (smallWebRanking.hasBucket(bucketId, domainId)) + return true; + if (academiaRanking.hasBucket(bucketId, domainId)) + return true; + if (prWebRanking.hasBucket(bucketId, domainId)) + return true; + if (specialDomainRanking.hasBucket(bucketId, domainId)) + return true; + + return DYNAMIC_BUCKET_LENGTH == bucketId; + } + + @SneakyThrows + public Lock getReadLock() { + return rwl.readLock(); + } + public boolean filterUnsafe(Lock lock, int domainId, int bucketId) { + return partitionSet.test(domainId, bucketId); + } + + @Deprecated + public boolean filter(int domainId, int bucketId) { + var lock = rwl.readLock(); + try { + lock.lock(); + return partitionSet.test(domainId, bucketId); + } + finally { + lock.unlock(); + } + } + + public int translateId(int bucketId, int id) { + if (retroRanking != null && retroRanking.ownsBucket(bucketId)) { + return retroRanking.translateId(id); + } + if (smallWebRanking != null && smallWebRanking.ownsBucket(bucketId)) { + return smallWebRanking.translateId(id); + } + if (academiaRanking != null && academiaRanking.ownsBucket(bucketId)) { + return academiaRanking.translateId(id); + } + if (prWebRanking != null && prWebRanking.ownsBucket(bucketId)) { + return prWebRanking.translateId(id); + } + if (specialDomainRanking != null && specialDomainRanking.ownsBucket(bucketId)) { + return specialDomainRanking.translateId(id); + } + if (retroRanking != null) { + return retroRanking.translateId(id); + } + return id; + } + + interface PartitionSet { + boolean test(int domainId, int bucketId); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/AcademiaRank.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/AcademiaRank.java new file mode 100644 index 00000000..b14dc405 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/AcademiaRank.java @@ -0,0 +1,49 @@ +package nu.marginalia.wmsa.edge.index.service.util.ranking; + +import com.zaxxer.hikari.HikariDataSource; +import gnu.trove.list.TIntList; +import gnu.trove.list.array.TIntArrayList; +import gnu.trove.map.hash.TIntIntHashMap; +import it.unimi.dsi.fastutil.ints.IntArrays; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.sql.SQLException; + +public class AcademiaRank { + private final TIntArrayList result; + private static final Logger logger = LoggerFactory.getLogger(AcademiaRank.class); + + public AcademiaRank(HikariDataSource ds, String... origins) throws IOException { + + TIntList rankingResults = new BetterStandardPageRank(ds, origins).pageRank(100_000); + TIntIntHashMap idToRanking = new TIntIntHashMap(100_000, 0.5f, -1, 1_000_000_000); + + for (int i = 0; i < rankingResults.size(); i++) { + idToRanking.put(rankingResults.get(i), i); + } + + result = new TIntArrayList(10000); + try (var conn = ds.getConnection(); + var stmt = conn.prepareStatement("select EC_DOMAIN.ID,COUNT(SOURCE_DOMAIN_ID) AS CNT from EC_DOMAIN INNER JOIN DOMAIN_METADATA ON DOMAIN_METADATA.ID=EC_DOMAIN.ID INNER JOIN EC_DOMAIN_LINK ON EC_DOMAIN_LINK.DEST_DOMAIN_ID=EC_DOMAIN.ID WHERE INDEXED>0 AND STATE>=0 AND STATE<2 AND ((VISITED_URLS>1000+1500*RANK AND RANK<1) OR (GOOD_URLS>1000 AND URL_PART LIKE '%edu')) GROUP BY EC_DOMAIN.ID HAVING CNT<1500 ORDER BY RANK ASC")) { + + stmt.setFetchSize(1000); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + result.add(rsp.getInt(1)); + } + } + catch (SQLException ex) { + logger.error("SQL error", ex); + } + + int[] internalArray = result.toArray(); + IntArrays.quickSort(internalArray, (a,b) -> idToRanking.get(a) - idToRanking.get(b)); + result.set(0, internalArray); + } + + public TIntArrayList getResult() { + return result; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/BetterReversePageRank.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/BetterReversePageRank.java new file mode 100644 index 00000000..798be55a --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/BetterReversePageRank.java @@ -0,0 +1,46 @@ +package nu.marginalia.wmsa.edge.index.service.util.ranking; + + +import com.zaxxer.hikari.HikariDataSource; + +import java.io.IOException; + +public class BetterReversePageRank extends RankingAlgorithm { + + + public BetterReversePageRank(HikariDataSource dataSource, String... origins) throws IOException { + super(dataSource, origins); + } + + @Override + RankVector createNewRankVector(RankVector rank) { + + double rankNorm = rank.norm(); + RankVector newRank = new RankVector(0); + + for (int domainId = 0; domainId < domainIndexToId.size(); domainId++) { + + var links = linkDataSrc2Dest[domainId]; + double newRankValue = 0; + + if (links != null && links.size() > 0) { + + + for (int j = 0; j < links.size(); j++) { + var revLinks = linkDataDest2Src[links.getQuick(j)]; + newRankValue += rank.get(links.getQuick(j)) / revLinks.size(); + } + } + + newRank.set(domainId, 0.85*newRankValue/rankNorm); + } + + return newRank; + } + + @Override + void adjustRankVector(RankVector vector, double dNorm, double oldNorm) { + originDomainIds.forEach(id -> vector.increment(id, 1.0 / originDomainIds.size())); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/BetterStandardPageRank.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/BetterStandardPageRank.java new file mode 100644 index 00000000..497ac146 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/BetterStandardPageRank.java @@ -0,0 +1,50 @@ +package nu.marginalia.wmsa.edge.index.service.util.ranking; + + +import com.zaxxer.hikari.HikariDataSource; +import gnu.trove.list.array.TIntArrayList; + +import java.io.IOException; + +public class BetterStandardPageRank extends RankingAlgorithm { + + public BetterStandardPageRank(HikariDataSource dataSource, String... origins) throws IOException { + super(dataSource, origins); + } + + @Override + RankVector createNewRankVector(RankVector rank) { + RankVector newRank = new RankVector(0); + + for (int domainId = 0; domainId < domainIndexToId.size(); domainId++) { + + var links = linkDataDest2Src[domainId]; + double newRankValue = 0; + + if (links != null && links.size() > 0) { + for (int j = 0; j < links.size(); j++) { + int linkedDomain = links.getQuick(j); + + int linkSize = 1; + var bl = linkDataSrc2Dest[linkedDomain]; + if (bl != null) { + linkSize = bl.size(); + } + + newRankValue += rank.get(linkedDomain) / linkSize; + + } + } + + newRank.set(domainId, 0.85 * newRankValue); + } + return newRank; + } + + @Override + void adjustRankVector(RankVector vector, double dNorm, double oldNorm) { + originDomainIds.forEach(id -> vector.increment(id, 0.15 / originDomainIds.size() /* dNorm/originDomainIds.size() */ )); +// vector.incrementAll(0.14*dNorm/vector.size()); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/BuggyReversePageRank.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/BuggyReversePageRank.java new file mode 100644 index 00000000..1fd696ab --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/BuggyReversePageRank.java @@ -0,0 +1,43 @@ +package nu.marginalia.wmsa.edge.index.service.util.ranking; + + +import com.zaxxer.hikari.HikariDataSource; + +import java.io.IOException; + +public class BuggyReversePageRank extends RankingAlgorithm { + + + public BuggyReversePageRank(HikariDataSource dataSource, String... origins) throws IOException { + super(dataSource, origins); + } + + @Override + RankVector createNewRankVector(RankVector rank) { + + double rankNorm = rank.norm(); + RankVector newRank = new RankVector(0); + + for (int domainId = 0; domainId < domainIndexToId.size(); domainId++) { + + var links = linkDataSrc2Dest[domainId]; + + if (links != null && links.size() > 0) { + double newRankValue = 0; + + for (int j = 0; j < links.size(); j++) { + newRankValue += rank.get(links.getQuick(j)) / links.size(); + } + + newRank.set(domainId, 0.85*newRankValue/rankNorm); + } + } + return newRank; + } + + @Override + void adjustRankVector(RankVector vector, double dNorm, double oldNorm) { + originDomainIds.forEach(id -> vector.increment(domainIdToIndex.get(id), dNorm/oldNorm)); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/BuggyStandardPageRank.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/BuggyStandardPageRank.java new file mode 100644 index 00000000..c2bf65b4 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/BuggyStandardPageRank.java @@ -0,0 +1,49 @@ +package nu.marginalia.wmsa.edge.index.service.util.ranking; + + +import com.zaxxer.hikari.HikariDataSource; + +import java.io.IOException; + +public class BuggyStandardPageRank extends RankingAlgorithm { + + public BuggyStandardPageRank(HikariDataSource dataSource, String... origins) throws IOException { + super(dataSource, origins); + } + + @Override + RankingAlgorithm.RankVector createNewRankVector(RankingAlgorithm.RankVector rank) { + RankVector newRank = new RankVector(0); + + for (int domainId = 0; domainId < domainIndexToId.size(); domainId++) { + + var links = linkDataSrc2Dest[domainId]; + double newRankValue = 0; + + if (links != null && links.size() > 0) { + for (int j = 0; j < links.size(); j++) { + int linkedDomain = links.getQuick(j); + + int linkSize = 1; + var bl = linkDataSrc2Dest[linkedDomain]; + if (bl != null) { + linkSize = bl.size(); + } + + newRankValue += rank.get(linkedDomain) / linkSize; + + } + } + + newRank.set(domainId, 0.85 * newRankValue); + } + return newRank; + } + + @Override + void adjustRankVector(RankingAlgorithm.RankVector vector, double dNorm, double oldNorm) { + originDomainIds.forEach(id -> vector.increment(id, dNorm/originDomainIds.size())); + vector.incrementAll(0.14*dNorm/vector.size()); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/RankingAlgorithm.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/RankingAlgorithm.java new file mode 100644 index 00000000..ce63c0a6 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/RankingAlgorithm.java @@ -0,0 +1,476 @@ +package nu.marginalia.wmsa.edge.index.service.util.ranking; + +import com.zaxxer.hikari.HikariDataSource; +import gnu.trove.list.TIntList; +import gnu.trove.list.array.TIntArrayList; +import gnu.trove.map.hash.TIntIntHashMap; +import gnu.trove.map.hash.TIntObjectHashMap; +import gnu.trove.set.hash.TIntHashSet; +import it.unimi.dsi.fastutil.ints.IntComparator; +import lombok.AllArgsConstructor; +import lombok.Data; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklistImpl; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.*; +import java.util.function.IntToDoubleFunction; +import java.util.stream.IntStream; +import it.unimi.dsi.fastutil.ints.IntArrays; + +public abstract class RankingAlgorithm { + final TIntObjectHashMap domainsById = new TIntObjectHashMap<>(); + final TIntIntHashMap domainIndexToId = new TIntIntHashMap(); + final TIntIntHashMap domainIdToIndex = new TIntIntHashMap(); + + private final TIntHashSet spamDomains; + private final HikariDataSource dataSource; + + TIntArrayList[] linkDataSrc2Dest; + TIntArrayList[] linkDataDest2Src; + + public Set originDomains = new HashSet<>(); + public Set originDomainIds = new HashSet<>(); + + private int maxKnownUrls = Integer.MAX_VALUE; + + private static boolean getNames = true; + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + public static void main(String... args) throws IOException { + var rpr = new BuggyReversePageRank(new DatabaseModule().provideConnection(), "wiki.xxiivv.com"); + var spr = new BuggyStandardPageRank(new DatabaseModule().provideConnection(), "memex.marginalia.nu"); + + var rankVector = spr.pageRankVector(); + var norm = rankVector.norm(); + rpr.pageRank(i -> rankVector.get(i) / norm, 25).forEach(i -> { + System.out.println(spr.domainNameFromId(i)); + return true; + }); + } + + public String domainNameFromId(int id) { + return domainsById.get(id).name; + } + public boolean isPeripheral(int id) { + return domainsById.get(id).peripheral; + } + + public RankingAlgorithm(HikariDataSource dataSource, String... origins) { + this.dataSource = dataSource; + var blacklist = new EdgeDomainBlacklistImpl(dataSource); + + spamDomains = blacklist.getSpamDomains(); + originDomains.addAll(Arrays.asList(origins)); + + try (var conn = dataSource.getConnection()) { + + String s; + if (getNames) { + s = "SELECT EC_DOMAIN.ID,URL_PART,DOMAIN_ALIAS,STATE,KNOWN_URLS FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID INNER JOIN EC_DOMAIN_LINK ON SOURCE_DOMAIN_ID=EC_DOMAIN.ID WHERE ((INDEXED>1 AND STATE >= 0) OR (INDEXED=1 AND VISITED_URLS=KNOWN_URLS AND GOOD_URLS>0)) AND QUALITY_RAW>=-20 AND SOURCE_DOMAIN_ID!=DEST_DOMAIN_ID GROUP BY EC_DOMAIN.ID"; + } + else { + s = "SELECT EC_DOMAIN.ID,\"\",DOMAIN_ALIAS,STATE,KNOWN_URLS FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID INNER JOIN EC_DOMAIN_LINK ON SOURCE_DOMAIN_ID=EC_DOMAIN.ID WHERE ((INDEXED>1 AND STATE >= 0) OR (INDEXED=1 AND VISITED_URLS=KNOWN_URLS AND GOOD_URLS>0)) AND QUALITY_RAW>=-20 AND SOURCE_DOMAIN_ID!=DEST_DOMAIN_ID GROUP BY EC_DOMAIN.ID"; + } + try (var stmt = conn.prepareStatement(s)) { + stmt.setFetchSize(10000); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + int id = rsp.getInt(1); + if (!spamDomains.contains(id)) { + + domainsById.put(id, new DomainData(id, rsp.getString(2), rsp.getInt(3), rsp.getInt(4), rsp.getInt(5), false)); + + domainIndexToId.put(domainIndexToId.size(), id); + domainIdToIndex.put(id, domainIdToIndex.size()); + } + } + } + + + linkDataSrc2Dest = new TIntArrayList[domainIndexToId.size()]; + linkDataDest2Src = new TIntArrayList[domainIndexToId.size()]; + + try (var stmt = conn.prepareStatement("SELECT SOURCE_DOMAIN_ID, DEST_DOMAIN_ID FROM EC_DOMAIN_LINK")) { + stmt.setFetchSize(10000); + + var rsp = stmt.executeQuery(); + + while (rsp.next()) { + int src = rsp.getInt(1); + int dst = rsp.getInt(2); + + if (src == dst) continue; + + if (domainsById.contains(src) && domainsById.contains(dst)) { + + int srcIdx = domainIdToIndex.get(src); + int dstIdx = domainIdToIndex.get(domainsById.get(dst).resolveAlias()); + + if (linkDataSrc2Dest[srcIdx] == null) { + linkDataSrc2Dest[srcIdx] = new TIntArrayList(); + } + linkDataSrc2Dest[srcIdx].add(dstIdx); + + if (linkDataDest2Src[dstIdx] == null) { + linkDataDest2Src[dstIdx] = new TIntArrayList(); + } + linkDataDest2Src[dstIdx].add(srcIdx); + } + } + } + + try (var stmt = conn.prepareStatement("SELECT ID FROM EC_DOMAIN WHERE URL_PART LIKE ?")) { + for (var seed : this.originDomains) { + stmt.setString(1, seed); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + int i = rsp.getInt(1); + int ival = domainIdToIndex.get(i); + if (ival != domainIdToIndex.getNoEntryValue() || domainIndexToId.get(0) == i) { + originDomainIds.add(ival); + } + else { + logger.debug("No value for {}", i); + } + } + logger.debug("{} -> {}", seed, originDomainIds.size()); + } + } + + logger.info("Origin Domains: {}", originDomainIds.size()); + + } catch (SQLException throwables) { + logger.error("SQL error", throwables); + } + } + + public void addPeripheralNodes(boolean includeErrorStates) { + + int newNodesIdxCutoff = domainIdToIndex.size(); + + logger.info("Inserting peripheral nodes"); + + try (var conn = dataSource.getConnection()) { + String s; + if (getNames) { + s = "SELECT EC_DOMAIN.ID,URL_PART,DOMAIN_ALIAS,STATE,KNOWN_URLS FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID LEFT JOIN EC_DOMAIN_LINK ON SOURCE_DOMAIN_ID=EC_DOMAIN.ID WHERE ((INDEXED>1 AND STATE >= 0) OR (INDEXED=1 AND VISITED_URLS=KNOWN_URLS AND GOOD_URLS>0)) AND QUALITY_RAW>=-20 AND EC_DOMAIN_LINK.ID IS NULL GROUP BY EC_DOMAIN.ID"; + } + else { + s = "SELECT EC_DOMAIN.ID,\"\",DOMAIN_ALIAS,STATE,KNOWN_URLS FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID LEFT JOIN EC_DOMAIN_LINK ON SOURCE_DOMAIN_ID=EC_DOMAIN.ID WHERE ((INDEXED>1 AND STATE >= 0) OR (INDEXED=1 AND VISITED_URLS=KNOWN_URLS AND GOOD_URLS>0)) AND QUALITY_RAW>=-20 AND EC_DOMAIN_LINK.ID IS NULL GROUP BY EC_DOMAIN.ID"; + } + try (var stmt = conn.prepareStatement(s)) { + stmt.setFetchSize(10000); + var rsp = stmt.executeQuery(); + + while (rsp.next()) { + int id = rsp.getInt(1); + + if (!spamDomains.contains(id)) { + domainsById.put(id, new DomainData(id, rsp.getString(2), rsp.getInt(3), rsp.getInt(4), rsp.getInt(5), true)); + + domainIndexToId.put(domainIndexToId.size(), id); + domainIdToIndex.put(id, domainIdToIndex.size()); + } + } + + } + + linkDataSrc2Dest = Arrays.copyOf(linkDataSrc2Dest, domainIndexToId.size()); + linkDataDest2Src = Arrays.copyOf(linkDataDest2Src, domainIndexToId.size()); + + try (var stmt = conn.prepareStatement("SELECT SOURCE_DOMAIN_ID, DEST_DOMAIN_ID FROM EC_DOMAIN_LINK")) { + stmt.setFetchSize(10000); + + var rsp = stmt.executeQuery(); + + while (rsp.next()) { + int src = rsp.getInt(1); + int dst = rsp.getInt(2); + + if (src == dst) continue; + + if (domainsById.contains(src) && domainsById.contains(dst)) { + + int srcIdx = domainIdToIndex.get(src); + int dstIdx = domainIdToIndex.get(domainsById.get(dst).resolveAlias()); + + // This looks like a bug, but it improves the results + if (srcIdx < newNodesIdxCutoff || dstIdx < newNodesIdxCutoff) + continue; + + if (linkDataSrc2Dest[srcIdx] == null) { + linkDataSrc2Dest[srcIdx] = new TIntArrayList(); + } + linkDataSrc2Dest[srcIdx].add(dstIdx); + + if (linkDataDest2Src[dstIdx] == null) { + linkDataDest2Src[dstIdx] = new TIntArrayList(); + } + linkDataDest2Src[dstIdx].add(srcIdx); + } + } + } + } catch (SQLException throwables) { + logger.error("SQL error", throwables); + } + + logger.info("Peripheral nodes inserted {} -> {}", newNodesIdxCutoff, domainIdToIndex.size()); + } + + public int size() { + return domainsById.size(); + } + + + public RankVector pageRankVector() { + RankVector rank = new RankVector(1.d / domainsById.size()); + + int iter_max = 100; + for (int i = 0; i < iter_max; i++) { + RankVector newRank = createNewRankVector(rank); + + double oldNorm = rank.norm(); + double newNorm = newRank.norm(); + double dNorm = oldNorm - newNorm ; + if (i < iter_max-1) { + adjustRankVector(newRank, dNorm, oldNorm); + } + + rank = newRank; + } + + return rank; + } + + + public TIntList pageRank(int resultCount) { + RankVector rank = new RankVector(1.d / domainsById.size()); + + int iter_max = 100; + for (int i = 0; i < iter_max; i++) { + RankVector newRank = createNewRankVector(rank); + + double oldNorm = rank.norm(); + double newNorm = newRank.norm(); + double dNorm = oldNorm - newNorm; + + if (i < iter_max-1) { + adjustRankVector(newRank, dNorm, oldNorm); + } + + rank = newRank; + } + + + return rank.getRanking(resultCount); + } + + public TIntList pageRankWithPeripheralNodes(int resultCount, boolean includeErrorStates) { + RankVector rank = new RankVector(1.d / domainsById.size()); + + int iter_max = 100; + + for (int i = 0; i < iter_max; i++) { + if (i == iter_max-1) { + addPeripheralNodes(includeErrorStates); + } + RankVector newRank = createNewRankVector(rank); + + double oldNorm = rank.norm(); + double newNorm = newRank.norm(); + double dNorm = oldNorm - newNorm; + + if (i < iter_max-1) { + adjustRankVector(newRank, dNorm, oldNorm); + } + + rank = newRank; + } + + logger.info("PRWPN iteration done"); + + return rank.getRanking(resultCount); + } + + abstract void adjustRankVector(RankVector vector, double dNorm, double oldNorm); + + public TIntList pageRank(IntToDoubleFunction weight, int resultCount) { + RankVector rank = new RankVector(1.d / domainsById.size()); + + int iter_max = 100; + for (int i = 0; i < iter_max; i++) { + RankVector newRank = createNewRankVector(rank); + + double oldNorm = rank.norm(); + double newNorm = newRank.norm(); + double dNorm = oldNorm - newNorm ; + + if (i < iter_max-1) { + adjustRankVector(newRank, dNorm, oldNorm); + } + + rank = newRank; + } + + return rank.getRanking(weight, resultCount); + } + + abstract RankVector createNewRankVector(RankVector rank); + + public boolean includeInRanking(DomainData data) { + if (data.isAlias()) + return false; + if (data.isSpecial()) + return false; + if (data.isSocialMedia()) + return false; + if (data.knownUrls > maxKnownUrls) + return false; + + return true; + } + + public void setMaxKnownUrls(int maxKnownUrls) { + this.maxKnownUrls = maxKnownUrls; + } + + public class RankVector { + private final double[] rank; + public RankVector(double defaultValue) { + rank = new double[domainIndexToId.size()]; + if (defaultValue != 0.) { + Arrays.fill(rank, defaultValue); + } + } + + public void set(int id, double value) { + rank[id] = value; + } + + public void increment(int id, double value) { + rank[id] += value; + } + + public double get(int id) { + if (id >= rank.length) return 0.; + + return rank[id]; + } + + public double norm() { + double v = 0.; + for (int i = 0; i < rank.length; i++) { + if (rank[i] > 0) { v+=rank[i]; } + else { v -= rank[i]; } + } + return v; + } + + public double norm(RankVector other) { + double v = 0.; + for (int i = 0; i < rank.length; i++) { + double dv = rank[i] - other.get(i); + + if (dv > 0) { v+=dv; } + else { v -= dv; } + } + return v; + } + + public TIntList getRanking(IntToDoubleFunction other, int numResults) { + TIntArrayList list = new TIntArrayList(numResults); + + Comparator comparator = Comparator.comparing(i -> Math.sqrt(other.applyAsDouble(domainIdToIndex.get(i)) * rank[i])); + + IntStream.range(0, rank.length) + .boxed() + .sorted(comparator.reversed()) + .map(domainIndexToId::get) + .limit(numResults) + .forEach(list::add); + + return list; + } + + public TIntList getRanking(int numResults) { + if (numResults < 0) { + numResults = domainIdToIndex.size(); + } + if (numResults >= rank.length) { + numResults = rank.length; + } + + TIntArrayList list = new TIntArrayList(numResults); + + int[] nodes = new int[rank.length]; + Arrays.setAll(nodes, i->i); + IntComparator comp = (i,j) -> (int) Math.signum(rank[j] - rank[i]); + IntArrays.quickSort(nodes, comp); + + int i; + + for (i = 0; i < numResults; i++) { + int id = domainIndexToId.get(nodes[i]); + + if (includeInRanking(domainsById.get(id))) + list.add(id); + } + + for (; i < nodes.length && domainsById.size() < numResults; i++) { + int id = domainIndexToId.get(nodes[i]); + + if (includeInRanking(domainsById.get(id))) + list.add(id); + } + + + return list; + } + + + public void incrementAll(double v) { + for (int i = 0; i < rank.length; i++) { + rank[i]+=v; + } + } + + int size() { + return domainsById.size(); + } + } + + @Data + @AllArgsConstructor + static class DomainData { + public final int id; + public final String name; + private int alias; + private int state; + public final int knownUrls; + public boolean peripheral; + + public int resolveAlias() { + if (alias == 0) return id; + return alias; + } + + public boolean isAlias() { + return alias != 0; + } + + public boolean isSpecial() { + return EdgeDomainIndexingState.SPECIAL.code == state; + } + + public boolean isSocialMedia() { + return EdgeDomainIndexingState.SOCIAL_MEDIA.code == state; + } + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/old/OldReversePageRankV2.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/old/OldReversePageRankV2.java new file mode 100644 index 00000000..54b88edc --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/old/OldReversePageRankV2.java @@ -0,0 +1,261 @@ +package nu.marginalia.wmsa.edge.index.service.util.ranking.old; + + +import com.zaxxer.hikari.HikariDataSource; +import gnu.trove.list.TIntList; +import gnu.trove.list.array.TIntArrayList; +import gnu.trove.map.hash.TIntDoubleHashMap; +import gnu.trove.map.hash.TIntObjectHashMap; +import lombok.AllArgsConstructor; +import lombok.Data; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.sql.SQLException; +import java.util.*; + +public class OldReversePageRankV2 { + + private final TIntObjectHashMap domains = new TIntObjectHashMap<>(); + private final TIntObjectHashMap linkData = new TIntObjectHashMap<>(); + private final TIntObjectHashMap reverseLinkData = new TIntObjectHashMap<>(); + private final Logger logger = LoggerFactory.getLogger(getClass()); + + public Set originDomains = new HashSet<>(); + public Set originDomainIds = new HashSet<>(); + + public static void main(String... args) throws IOException { + new OldReversePageRankV2( +// "wiki.xxiivv.com", +// "stpeter.im", +// "datagubbe.se", "midnight.pub", +// "www.gameboomers.com", +// "www.wild-seven.org", "iocane-powder.net", "www.doujinshi.org", "ohmydarling.org", +// "lobste.rs", +// "dataswamp.org", "www.ohtori.nu", +// "lukesmith.xyz", "internetgirlfriend.club", +// "tilde.town", "tilde.team", +// "felix.plesoianu.ro", +// "www.neustadt.fr", + "memex.marginalia.nu" + ); + } + + public OldReversePageRankV2(String... seedDomains) throws IOException { + loadDataFromFile(); + + long start = System.currentTimeMillis(); + for (int i = 0; i < 100; i++) { + if (domains.contains(i)) { + int[] ids = pageRank(10).toArray(); + System.out.printf("%d %d\n", i, ids.length); + } +// Arrays.stream(ids).mapToObj(domains::get).map(data -> +// String.format("%3d %2.2f %s", Optional.ofNullable(reverseLinkData.get(data.id)).map(TIntArrayList::size).orElse(0), data.quality, data.name) +// ).forEach(System.out::println); + } + long end = System.currentTimeMillis(); + System.out.printf("%2.2f", (end - start)/1000.0); + } + + public OldReversePageRankV2(HikariDataSource dataSource) throws IOException { + originDomains.add("memex.marginalia.nu"); + + try (var conn = dataSource.getConnection()) { + try (var stmt = conn.prepareStatement("SELECT ID,INDEXED,STATE FROM EC_DOMAIN WHERE INDEXED>1 AND STATE>=0 AND QUALITY_RAW>=-10")) { + stmt.setFetchSize(10000); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + domains.put(rsp.getInt(1), new DomainData("", 0.0, rsp.getInt(1), rsp.getInt(2), rsp.getInt(3))); + } + } + try (var stmt = conn.prepareStatement("SELECT SOURCE_DOMAIN_ID, DEST_DOMAIN_ID FROM EC_DOMAIN_LINK")) { + stmt.setFetchSize(10000); + + var rsp = stmt.executeQuery(); + + while (rsp.next()) { + int src = rsp.getInt(1); + int dst = rsp.getInt(2); + if (domains.contains(src) && domains.contains(dst) && domains.get(src).quality >= -5) { + if (!linkData.contains(src)) { + linkData.put(src, new TIntArrayList()); + } + linkData.get(src).add(dst); + } + } + } + + try (var stmt = conn.prepareStatement("SELECT ID FROM EC_DOMAIN WHERE URL_PART=?")) { + stmt.setFetchSize(10000); + + for (var seed : this.originDomains) { + stmt.setString(1, seed); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + originDomainIds.add(rsp.getInt(1)); + } + } + } + + } catch (SQLException throwables) { + logger.error("SQL error", throwables); + } + + } + + public int size() { + return domains.size(); + } + + public TIntList pageRank(int resultCount) { + RankVector rank = new RankVector(1.d / domains.size()); + + for (int i = 0; i < 100; i++) { + RankVector newRank = createNewRankVector(rank); + + double oldNorm = rank.norm(); + double newNorm = newRank.norm(); + double dNorm = oldNorm - newNorm ; + originDomainIds.forEach(id -> newRank.increment(id, dNorm/oldNorm)); +// newRank.increment(14880, dNorm/rank.norm()); + rank = newRank; + } + + for (var id : originDomainIds) { + rank.increment(id, -1); + } + + return rank.getRanking(resultCount); + } + + @NotNull + private RankVector createNewRankVector(RankVector rank) { + + final TIntArrayList empty = new TIntArrayList(); + + double rankNorm = rank.norm(); + RankVector newRank = new RankVector(0); + + for (DomainData domain : domains.values(new DomainData[domains.size()])) { + + var links = Optional.ofNullable(linkData.get(domain.id)).orElse(empty); + if (links.size() > 0) { + double newRankValue = 0; + for (int linkedDomain : links.toArray()) { + newRankValue += rank.get(linkedDomain) / links.size(); + } + + newRank.set(domain.id, 0.85*newRankValue/rankNorm); + } + } + return newRank; + } + + private void loadDataFromFile() throws IOException { + + try (var str = Files.lines(Path.of("/home/vlofgren/Work/data-domains.txt"))) { + str.map(DomainData::new) + .filter(domain -> domain.indexed>1) + .filter(domain -> domain.state>=1) + .peek(domain -> { + if (originDomains.contains(domain.name)) { + originDomainIds.add(domain.id); + } + }) + .forEach(data -> domains.put(data.id, data)); + } + + try (var str = Files.lines(Path.of("/home/vlofgren/Work/data-links.txt"))) { + str.map(s->s.split("\\s+")).forEach(bits -> { + + int src = Integer.parseInt(bits[0]); + int dst = Integer.parseInt(bits[1]); + + if (domains.contains(src) && domains.contains(dst) && domains.get(src).quality >= -5) { + if (!linkData.contains(src)) { + linkData.put(src, new TIntArrayList()); + } + linkData.get(src).add(dst); + } + + + if (!reverseLinkData.contains(dst)) { + reverseLinkData.put(dst, new TIntArrayList()); + } + reverseLinkData.get(dst).add(src); + }); + } + } + + private class RankVector { + private final TIntDoubleHashMap rank; + private final double defaultValue; + public RankVector(double defaultValue) { + rank = new TIntDoubleHashMap(domains.size(), 0.75f, -1, defaultValue); + this.defaultValue = defaultValue; + } + + public void set(int id, double value) { + rank.put(id, value); + } + + + public void increment(int id, double value) { + rank.adjustOrPutValue(id, value, value); + } + + public double get(int id) { + return rank.get(id); + } + + public double norm() { + if (rank.isEmpty()) { + return defaultValue * domains.size(); + } + return Arrays.stream(rank.values()).map(Math::abs).sum(); + } + + public double norm(RankVector other) { + return Arrays.stream(rank.keys()).mapToDouble(k -> Math.abs(rank.get(k) - other.get(k))).sum(); + } + + public TIntList getRanking(int numResults) { + TIntArrayList list = new TIntArrayList(numResults); + + Comparator comparator = Comparator.comparing(e -> rank.get(e.id)); + + domains.valueCollection().stream() + .sorted(comparator.reversed()) + .map(DomainData::getId) + .limit(numResults) + .forEach(list::add); + + return list; + } + + } + @Data @AllArgsConstructor + static class DomainData { + + public DomainData(String str) { + String[] parts = str.split("\\s+"); + + id = Integer.parseInt(parts[0]); + quality = Double.parseDouble(parts[1]); + name = parts[2]; + indexed = Integer.parseInt(parts[3]); + state = Integer.parseInt(parts[4]); + } + public final String name; + public final double quality; + public final int id; + public final int indexed; + public final int state; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/old/StandardPageRank.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/old/StandardPageRank.java new file mode 100644 index 00000000..613a8aa2 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/old/StandardPageRank.java @@ -0,0 +1,270 @@ +package nu.marginalia.wmsa.edge.index.service.util.ranking.old; + + +import com.zaxxer.hikari.HikariDataSource; +import gnu.trove.list.TIntList; +import gnu.trove.list.array.TIntArrayList; +import gnu.trove.map.hash.TIntDoubleHashMap; +import gnu.trove.map.hash.TIntObjectHashMap; +import gnu.trove.set.hash.TIntHashSet; +import lombok.AllArgsConstructor; +import lombok.Data; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.sql.SQLException; +import java.util.*; +import java.util.function.IntToDoubleFunction; + +public class StandardPageRank { + + private final TIntObjectHashMap domains = new TIntObjectHashMap<>(); + private final TIntObjectHashMap linkData = new TIntObjectHashMap<>(); + private final TIntObjectHashMap reverseLinkData = new TIntObjectHashMap<>(); + private final Logger logger = LoggerFactory.getLogger(getClass()); + + public Set originDomains = new HashSet(); + public Set originDomainIds = new HashSet<>(); + + public StandardPageRank(IntToDoubleFunction weight, String... seedDomains) throws IOException { + originDomains.addAll(Arrays.asList(seedDomains)); + loadDataFromFile(); + + int[] ids = pageRank(weight, 1000).toArray(); + Arrays.stream(ids).mapToObj(domains::get).map(data -> + String.format("%3d %2.2f %s", Optional.ofNullable(reverseLinkData.get(data.id)).map(TIntArrayList::size).orElse(0), data.quality, data.name) + ).forEach(System.out::println); + } + + public String domainNameFromId(int id) { + return domains.get(id).name; + } + + public StandardPageRank(HikariDataSource dataSource, String... origins) throws IOException { + originDomains.addAll(Arrays.asList(origins)); + + try (var conn = dataSource.getConnection()) { + try (var stmt = conn.prepareStatement("SELECT ID,INDEXED,STATE,URL_PART FROM EC_DOMAIN WHERE INDEXED>1 AND STATE>=0 AND QUALITY>=-10")) { + stmt.setFetchSize(10000); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + domains.put(rsp.getInt(1), new DomainData(rsp.getInt(1), rsp.getString(4), rsp.getInt(2), rsp.getInt(3), 0)); + } + } + try (var stmt = conn.prepareStatement("SELECT SOURCE_DOMAIN_ID, DEST_DOMAIN_ID FROM EC_DOMAIN_LINK")) { + stmt.setFetchSize(10000); + + var rsp = stmt.executeQuery(); + + while (rsp.next()) { + int src = rsp.getInt(1); + int dst = rsp.getInt(2); + + if (domains.contains(src) && domains.contains(dst) && domains.get(src).quality >= -5) { + if (!linkData.contains(src)) { + linkData.put(src, new TIntArrayList()); + } + linkData.get(src).add(dst); + + if (!reverseLinkData.contains(dst)) { + reverseLinkData.put(dst, new TIntArrayList()); + } + reverseLinkData.get(dst).add(src); + } + } + } + + try (var stmt = conn.prepareStatement("SELECT ID FROM EC_DOMAIN WHERE URL_PART=?")) { + for (var seed : this.originDomains) { + stmt.setString(1, seed); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + originDomainIds.add(rsp.getInt(1)); + } + } + } + + } catch (SQLException throwables) { + logger.error("SQL error", throwables); + } + + } + + public int size() { + return domains.size(); + } + + public TIntList pageRank(IntToDoubleFunction weight, int resultCount) { + RankVector rank = new RankVector(1.d / domains.size()); + + int iter_max = 100; + for (int i = 0; i < iter_max; i++) { + RankVector newRank = createNewRankVector(rank); + + double oldNorm = rank.norm(); + double newNorm = newRank.norm(); + double dNorm = oldNorm - newNorm; + if (i < iter_max-1) { + originDomainIds.forEach(id -> newRank.increment(id, dNorm/originDomainIds.size())); + newRank.incrementAll(0.14*dNorm/rank.size()); + } + logger.debug("{} {} {}", dNorm, newNorm, rank.norm(newRank)); + rank = newRank; + } + + + return rank.getRanking(weight, resultCount); + } + + @NotNull + private RankVector createNewRankVector(RankVector rank) { + + final TIntArrayList empty = new TIntArrayList(); + + double rankNorm = rank.norm(); + RankVector newRank = new RankVector(0); + + for (DomainData domain : domains.valueCollection()) { + + var links = Optional.ofNullable(reverseLinkData.get(domain.id)).orElse(empty); + double newRankValue = 0; + if (links.size() > 0) { + for (int linkedDomain : links.toArray()) { + newRankValue += rank.get(linkedDomain) / linkData.get(linkedDomain).size(); + } + } + + newRank.set(domain.id, 0.85 * newRankValue); + } + return newRank; + } + + private void loadDataFromFile() throws IOException { + + try (var str = Files.lines(Path.of("/home/vlofgren/Work/data-domains.txt"))) { + str.map(DomainData::new) + .filter(domain -> domain.indexed>1) + .filter(domain -> domain.quality>=0.1) + .peek(domain -> { + if (originDomains.contains(domain.name)) { + originDomainIds.add(domain.id); + } + }) + .forEach(data -> domains.put(data.id, data)); + } + + try (var str = Files.lines(Path.of("/home/vlofgren/Work/data-links.txt"))) { + str.map(s->s.split("\\s+")).forEach(bits -> { + + int src = Integer.parseInt(bits[0]); + int dst = Integer.parseInt(bits[1]); + + if (domains.contains(src) && domains.contains(dst) && domains.get(src).quality >= -5) { + if (!linkData.contains(src)) { + linkData.put(src, new TIntArrayList()); + } + linkData.get(src).add(dst); + + if (!reverseLinkData.contains(dst)) { + reverseLinkData.put(dst, new TIntArrayList()); + } + reverseLinkData.get(dst).add(src); + } + }); + } + + TIntHashSet deadEnds = new TIntHashSet(domains.size()); + } + + private class RankVector { + private final TIntDoubleHashMap rank; + private final double defaultValue; + public RankVector(double defaultValue) { + rank = new TIntDoubleHashMap(domains.size(), 0.75f, -1, defaultValue); + this.defaultValue = defaultValue; + } + + public void set(int id, double value) { + rank.put(id, value); + } + + public void increment(int id, double value) { + rank.adjustOrPutValue(id, value, value); + } + + public double get(int id) { + return rank.get(id); + } + + public double norm() { + if (rank.isEmpty()) { + return defaultValue * domains.size(); + } + return Arrays.stream(rank.values()).map(Math::abs).sum(); + } + + public double norm(RankVector other) { + return Arrays.stream(rank.keys()).mapToDouble(k -> Math.abs(rank.get(k) - other.get(k))).sum(); + } + + public TIntList getRanking(IntToDoubleFunction other, int numResults) { + TIntArrayList list = new TIntArrayList(numResults); + + Comparator comparator = Comparator.comparing(e -> Math.sqrt(other.applyAsDouble(e.id) * rank.get(e.id))); + + domains.valueCollection().stream() + .sorted(comparator.reversed()) + .map(DomainData::getId) + .limit(numResults) + .forEach(list::add); + + return list; + } + + public TIntList getRanking2(int numResults) { + TIntArrayList list = new TIntArrayList(numResults); + + Comparator comparator = Comparator.comparing(e -> rank.get(e.id)); + + domains.valueCollection().stream() + .sorted(comparator.reversed()) + .map(DomainData::getId) + .limit(numResults) + .forEach(list::add); + + return list; + } + + public void incrementAll(double v) { + rank.transformValues(oldv -> oldv + v); + } + + int size() { + return domains.size(); + } + } + @Data @AllArgsConstructor + static class DomainData { + + public DomainData(String str) { + String[] parts = str.split("\\s+"); + + id = Integer.parseInt(parts[0]); + name = parts[2]; + indexed = Integer.parseInt(parts[3]); + state = Integer.parseInt(parts[4]); + quality = Double.parseDouble(parts[5]); + } + public final int id; + public final String name; + public final int indexed; + public final int state; + public double quality; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/tool/DedupTool.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/tool/DedupTool.java new file mode 100644 index 00000000..9e0423cd --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/tool/DedupTool.java @@ -0,0 +1,89 @@ +package nu.marginalia.wmsa.edge.index.service.util.ranking.tool; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.SneakyThrows; +import lombok.ToString; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import org.mariadb.jdbc.Driver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.*; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class DedupTool { + + private static final Logger logger = LoggerFactory.getLogger(DedupTool.class); + + public Set originDomains = new HashSet<>(); + public Set originDomainIds = new HashSet<>(); + public long domainIdMax = -1; + public int domainCount; + private volatile static int rankMax; + + public int maxId() { + return (int) domainIdMax; + } + public int domainCount() { + return domainCount; + } + + static LinkedBlockingQueue uploadQueue = new LinkedBlockingQueue<>(10); + volatile static boolean running = true; + + @AllArgsConstructor @ToString @Getter + static class Data { + String url; + int id; + String domain; + } + + @SneakyThrows + public static void main(String... args) throws IOException { + Driver driver = new Driver(); + var ds = new DatabaseModule().provideConnection(); + + Map>> domainToHashToUrl = new HashMap<>(); + + try (var conn = ds.getConnection(); + var fetchStmt = conn.prepareStatement("SELECT URL_TOP_DOMAIN_ID,DATA_HASH,URL,EC_URL.ID,EC_DOMAIN.URL_PART FROM EC_URL INNER JOIN EC_DOMAIN ON EC_DOMAIN.ID=DOMAIN_ID WHERE DATA_HASH IS NOT NULL"); + var updateStmt = conn.prepareStatement("UPDATE EC_URL SET STATE='redirect' WHERE ID=?"); + + ) { + fetchStmt.setFetchSize(10_000); + var rsp = fetchStmt.executeQuery(); + while (rsp.next()) { + domainToHashToUrl.computeIfAbsent(rsp.getInt(1), i -> new HashMap<>()) + .computeIfAbsent(rsp.getInt(2), i -> new ArrayList<>()).add(new Data(rsp.getString(3), rsp.getInt(4), rsp.getString(5))); + } + + + List updateIds = new ArrayList<>(); + + domainToHashToUrl.forEach((domain, hashes) -> { + hashes.forEach((hash, urls) -> { + if (urls.size() > 1) { + Comparator c = Comparator.comparing(d -> d.domain.length()); + var urls2 = urls.stream().sorted(c.thenComparing(d -> d.url.length())) + .collect(Collectors.partitioningBy(d -> d.url.endsWith("/"))); + + Stream + .concat(urls2.get(true).stream(),urls2.get(false).stream()).skip(1) + .map(Data::getId) + .forEach(updateIds::add); + } + }); + }); + + for (int id : updateIds) { + updateStmt.setInt(1, id); + updateStmt.executeUpdate(); + } + } + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/tool/PerusePageRankV2.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/tool/PerusePageRankV2.java new file mode 100644 index 00000000..7f525daf --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/tool/PerusePageRankV2.java @@ -0,0 +1,340 @@ +package nu.marginalia.wmsa.edge.index.service.util.ranking.tool; + + +import com.zaxxer.hikari.HikariDataSource; +import gnu.trove.list.TIntList; +import gnu.trove.list.array.TIntArrayList; +import gnu.trove.map.hash.TIntDoubleHashMap; +import gnu.trove.map.hash.TIntIntHashMap; +import gnu.trove.map.hash.TIntObjectHashMap; +import gnu.trove.set.hash.TIntHashSet; +import it.unimi.dsi.fastutil.ints.IntArrays; +import it.unimi.dsi.fastutil.ints.IntComparator; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklistImpl; +import nu.marginalia.wmsa.edge.index.service.util.ranking.RankingAlgorithm; +import org.jetbrains.annotations.NotNull; +import org.mariadb.jdbc.Driver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.*; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.function.IntToDoubleFunction; +import java.util.stream.IntStream; + +public class PerusePageRankV2 { + + final TIntObjectHashMap domainsById = new TIntObjectHashMap<>(); + final TIntIntHashMap domainIndexToId = new TIntIntHashMap(); + final TIntIntHashMap domainIdToIndex = new TIntIntHashMap(); + + private final TIntHashSet spamDomains; + private final HikariDataSource dataSource; + + TIntArrayList[] linkDataSrc2Dest; + TIntArrayList[] linkDataDest2Src; + + private static boolean getNames = true; + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + static LinkedBlockingQueue uploadQueue = new LinkedBlockingQueue<>(10); + volatile static boolean running = true; + + public int indexMax() { + return domainIndexToId.size(); + } + + public int getDomainId(int idx) { + return domainIndexToId.get(idx); + } + + @SneakyThrows + public static void main(String... args) throws IOException { + org.mariadb.jdbc.Driver driver = new Driver(); + var conn = new DatabaseModule().provideConnection(); + var rank = new PerusePageRankV2(conn); + + long start = System.currentTimeMillis(); + var uploader = new Thread(() -> uploadThread(conn)); + uploader.start(); + + IntStream.range(0, rank.indexMax()).parallel().forEach(i -> { + int[] ids = rank.pageRank(i, 25).toArray(); + try { + uploadQueue.put(new LinkAdjacencies(rank.getDomainId(i), ids)); + } catch (InterruptedException e) { + e.printStackTrace(); + } + }); + + long end = System.currentTimeMillis(); + running = false; + uploader.join(); + System.out.printf("%2.2f", (end - start)/1000.0); + } + + @AllArgsConstructor + static class LinkAdjacencies { + public final int id; + public final int[] neighbors; + }; + + public static void uploadThread(HikariDataSource dataSource) { + try (var conn = dataSource.getConnection()) { + try (var stmt = conn.prepareStatement("INSERT INTO EC_DOMAIN_NEIGHBORS(DOMAIN_ID, NEIGHBOR_ID, ADJ_IDX) VALUES (?,?,?) ON DUPLICATE KEY UPDATE NEIGHBOR_ID=VALUES(NEIGHBOR_ID)")) { + while (running || (!running && !uploadQueue.isEmpty())) { + var job = uploadQueue.take(); + for (int i = 0; i < job.neighbors.length; i++) { + stmt.setInt(1, job.id); + stmt.setInt(2, job.neighbors[i]); + stmt.setInt(3, i); + stmt.addBatch(); + } + stmt.executeBatch(); + } + } + } catch (SQLException | InterruptedException throwables) { + throwables.printStackTrace(); + } + } + + public PerusePageRankV2(HikariDataSource dataSource) throws IOException { + var blacklist = new EdgeDomainBlacklistImpl(dataSource); + spamDomains = blacklist.getSpamDomains(); + this.dataSource = dataSource; + + try (var conn = dataSource.getConnection()) { + String s; + if (getNames) { + s = "SELECT EC_DOMAIN.ID,URL_PART,DOMAIN_ALIAS FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA ON EC_DOMAIN.ID=DOMAIN_METADATA.ID WHERE ((INDEXED>1 AND STATE >= 0) OR (INDEXED=1 AND VISITED_URLS=KNOWN_URLS AND GOOD_URLS>0)) AND QUALITY_RAW>=-20 GROUP BY EC_DOMAIN.ID"; + } + else { + s = "SELECT EC_DOMAIN.ID,\"\",DOMAIN_ALIAS FROM EC_DOMAIN ON EC_DOMAIN.ID=DOMAIN_METADATA.ID WHERE ((INDEXED>1 AND STATE >= 0) OR (INDEXED=1 AND VISITED_URLS=KNOWN_URLS AND GOOD_URLS>0)) AND QUALITY_RAW>=-20 GROUP BY EC_DOMAIN.ID"; + } + try (var stmt = conn.prepareStatement(s)) { + stmt.setFetchSize(10000); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + int id = rsp.getInt(1); + if (!spamDomains.contains(id)) { + + domainsById.put(id, new DomainData(id, rsp.getString(2), rsp.getInt(3), false)); + + domainIndexToId.put(domainIndexToId.size(), id); + domainIdToIndex.put(id, domainIdToIndex.size()); + } + } + } + + + linkDataSrc2Dest = new TIntArrayList[domainIndexToId.size()]; + linkDataDest2Src = new TIntArrayList[domainIndexToId.size()]; + + try (var stmt = conn.prepareStatement("SELECT SOURCE_DOMAIN_ID, DEST_DOMAIN_ID FROM EC_DOMAIN_LINK")) { + stmt.setFetchSize(10000); + + var rsp = stmt.executeQuery(); + + while (rsp.next()) { + int src = rsp.getInt(1); + int dst = rsp.getInt(2); + + if (src == dst) continue; + + if (domainsById.contains(src) && domainsById.contains(dst)) { + + int srcIdx = domainIdToIndex.get(src); + int dstIdx = domainIdToIndex.get(domainsById.get(dst).resolveAlias()); + + if (linkDataSrc2Dest[srcIdx] == null) { + linkDataSrc2Dest[srcIdx] = new TIntArrayList(); + } + linkDataSrc2Dest[srcIdx].add(dstIdx); + + if (linkDataDest2Src[dstIdx] == null) { + linkDataDest2Src[dstIdx] = new TIntArrayList(); + } + linkDataDest2Src[dstIdx].add(srcIdx); + } + } + } + + } catch (SQLException throwables) { + logger.error("SQL error", throwables); + } + + } + + public TIntList pageRank(int origin, int resultCount) { + RankVector rank = new RankVector(1.d / domainsById.size()); + + int iter_max = 10; + for (int i = 0; i < iter_max; i++) { + RankVector newRank = createNewRankVector(rank); + + double oldNorm = rank.norm(); + double newNorm = newRank.norm(); + double dNorm = oldNorm - newNorm ; + + newRank.increment(origin, dNorm/oldNorm); + + rank = newRank; + } + + rank.increment(origin, -1); + + return rank.getRanking(resultCount); + } + + @NotNull + private RankVector createNewRankVector(RankVector rank) { + + double rankNorm = rank.norm(); + RankVector newRank = new RankVector(0); + + for (int domainId = 0; domainId < domainIndexToId.size(); domainId++) { + + var links = linkDataSrc2Dest[domainId]; + double newRankValue = 0; + + if (links != null && links.size() > 0) { + + + for (int j = 0; j < links.size(); j++) { + var revLinks = linkDataDest2Src[links.getQuick(j)]; + newRankValue += rank.get(links.getQuick(j)) / revLinks.size(); + } + } + + newRank.set(domainId, 0.85*newRankValue/rankNorm); + } + + return newRank; + } + + public class RankVector { + private final double[] rank; + public RankVector(double defaultValue) { + rank = new double[domainIndexToId.size()]; + if (defaultValue != 0.) { + Arrays.fill(rank, defaultValue); + } + } + + public void set(int id, double value) { + rank[id] = value; + } + + public void increment(int id, double value) { + rank[id] += value; + } + + public double get(int id) { + if (id >= rank.length) return 0.; + + return rank[id]; + } + + public double norm() { + double v = 0.; + for (int i = 0; i < rank.length; i++) { + if (rank[i] > 0) { v+=rank[i]; } + else { v -= rank[i]; } + } + return v; + } + + public double norm(RankingAlgorithm.RankVector other) { + double v = 0.; + for (int i = 0; i < rank.length; i++) { + double dv = rank[i] - other.get(i); + + if (dv > 0) { v+=dv; } + else { v -= dv; } + } + return v; + } + + public TIntList getRanking(IntToDoubleFunction other, int numResults) { + TIntArrayList list = new TIntArrayList(numResults); + + Comparator comparator = Comparator.comparing(i -> Math.sqrt(other.applyAsDouble(domainIdToIndex.get(i)) * rank[i])); + + IntStream.range(0, rank.length) + .boxed() + .sorted(comparator.reversed()) + .map(domainIndexToId::get) + .limit(numResults) + .forEach(list::add); + + return list; + } + + public TIntList getRanking(int numResults) { + if (numResults < 0) { + numResults = domainIdToIndex.size(); + } + TIntArrayList list = new TIntArrayList(numResults); + + int[] nodes = new int[rank.length]; + Arrays.setAll(nodes, i->i); + IntComparator comp = (i,j) -> (int) Math.signum(rank[j] - rank[i]); + IntArrays.quickSort(nodes, comp); + + int i; + + for (i = 0; i < numResults; i++) { + int id = domainIndexToId.get(nodes[i]); + + if (!domainsById.get(id).isAlias()) + list.add(id); + } + + for (; i < nodes.length && domainsById.size() < numResults; i++) { + int id = domainIndexToId.get(nodes[i]); + + if (!domainsById.get(id).isAlias()) + list.add(id); + } + + + return list; + } + + public void incrementAll(double v) { + for (int i = 0; i < rank.length; i++) { + rank[i]+=v; + } + } + + int size() { + return domainsById.size(); + } + } + + @Data + @AllArgsConstructor + static class DomainData { + public final int id; + public final String name; + private int alias; + + public int resolveAlias() { + if (alias == 0) return id; + return alias; + } + + public boolean isAlias() { + return alias != 0; + } + + public boolean peripheral; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/tool/TestAcademiaRankTool.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/tool/TestAcademiaRankTool.java new file mode 100644 index 00000000..638e3f6d --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/tool/TestAcademiaRankTool.java @@ -0,0 +1,30 @@ +package nu.marginalia.wmsa.edge.index.service.util.ranking.tool; + +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.edge.index.service.util.ranking.AcademiaRank; +import org.mariadb.jdbc.Driver; + +import java.io.IOException; + +public class TestAcademiaRankTool { + + @SneakyThrows + public static void main(String... args) throws IOException { + Driver driver = new Driver(); + var conn = new DatabaseModule().provideConnection(); + + var rank = new AcademiaRank(new DatabaseModule().provideConnection(), "www.perseus.tufts.edu", "xroads.virginia.edu"); + var res = rank.getResult(); + + try (var c = conn.getConnection(); var stmt = c.prepareStatement("SELECT URL_PART FROM EC_DOMAIN WHERE ID=?")) { + for (int i = 0; i < Math.min(res.size(), 100); i++) { + stmt.setInt(1, res.getQuick(i)); + var rsp = stmt.executeQuery(); + while (rsp.next()) + System.out.println(rsp.getString(1)); + } + } + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/tool/UpdateDomainRanksTool.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/tool/UpdateDomainRanksTool.java new file mode 100644 index 00000000..a78dae31 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/tool/UpdateDomainRanksTool.java @@ -0,0 +1,95 @@ +package nu.marginalia.wmsa.edge.index.service.util.ranking.tool; + +import com.zaxxer.hikari.HikariDataSource; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.edge.index.service.util.ranking.BuggyStandardPageRank; +import org.mariadb.jdbc.Driver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.LinkedBlockingQueue; + +public class UpdateDomainRanksTool { + + private static final Logger logger = LoggerFactory.getLogger(UpdateDomainRanksTool.class); + + public Set originDomains = new HashSet<>(); + public Set originDomainIds = new HashSet<>(); + public long domainIdMax = -1; + public int domainCount; + private volatile static int rankMax; + + public int maxId() { + return (int) domainIdMax; + } + public int domainCount() { + return domainCount; + } + + static LinkedBlockingQueue uploadQueue = new LinkedBlockingQueue<>(10); + volatile static boolean running = true; + + @SneakyThrows + public static void main(String... args) throws IOException { + org.mariadb.jdbc.Driver driver = new Driver(); + var conn = new DatabaseModule().provideConnection(); + + long start = System.currentTimeMillis(); + var uploader = new Thread(() -> uploadThread(conn), "Uploader"); + + logger.info("Ranking"); + var spr = new BuggyStandardPageRank(new DatabaseModule().provideConnection(),"memex.marginalia.nu"); + + rankMax = spr.size()*2; + uploader.start(); + + spr.pageRankWithPeripheralNodes(rankMax, false).forEach(i -> { + try { + uploadQueue.put(i); + } catch (InterruptedException e) { + e.printStackTrace(); + } + return true; + }); + + long end = System.currentTimeMillis(); + running = false; + uploader.join(); + + logger.info("Done in {}", (end - start)/1000.0); + } + + public static void uploadThread(HikariDataSource dataSource) { + int i = 0; + + try (var conn = dataSource.getConnection()) { + logger.info("Resetting rank"); + try (var stmt = conn.prepareStatement("UPDATE EC_DOMAIN SET RANK=1")) { + stmt.executeUpdate(); + } + + logger.info("Updating ranks"); + try (var stmt = conn.prepareStatement("UPDATE EC_DOMAIN SET RANK=? WHERE ID=?")) { + while (running || (!running && !uploadQueue.isEmpty())) { + var job = uploadQueue.take(); + stmt.setDouble(1, i++ / (double) rankMax); + stmt.setInt(2, job); + stmt.executeUpdate(); + } + } + + logger.info("Recalculating quality"); + try (var stmt = conn.prepareStatement("UPDATE EC_DOMAIN SET QUALITY=-5*RANK+IF(RANK=1,RANK*GREATEST(QUALITY_RAW,QUALITY_ORIGINAL)/2, 0)")) { + stmt.executeUpdate(); + } + + } catch (SQLException | InterruptedException throwables) { + throwables.printStackTrace(); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/tool/UpdateDomainRanksTool2.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/tool/UpdateDomainRanksTool2.java new file mode 100644 index 00000000..4ac2600d --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/service/util/ranking/tool/UpdateDomainRanksTool2.java @@ -0,0 +1,105 @@ +package nu.marginalia.wmsa.edge.index.service.util.ranking.tool; + +import com.zaxxer.hikari.HikariDataSource; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.edge.index.service.util.ranking.BetterReversePageRank; +import org.mariadb.jdbc.Driver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.LinkedBlockingQueue; + +public class UpdateDomainRanksTool2 { + + private static final Logger logger = LoggerFactory.getLogger(UpdateDomainRanksTool2.class); + + public Set originDomains = new HashSet<>(); + public Set originDomainIds = new HashSet<>(); + public long domainIdMax = -1; + public int domainCount; + private volatile static int rankMax; + + public int maxId() { + return (int) domainIdMax; + } + public int domainCount() { + return domainCount; + } + + static LinkedBlockingQueue uploadQueue = new LinkedBlockingQueue<>(10); + volatile static boolean running = true; + + @SneakyThrows + public static void main(String... args) throws IOException { + Driver driver = new Driver(); + var conn = new DatabaseModule().provideConnection(); + + long start = System.currentTimeMillis(); + var uploader = new Thread(() -> uploadThread(conn), "Uploader"); + + logger.info("Ranking"); + // "memex.marginalia.nu", "wiki.xxiivv.com", "bikobatanari.art", "sadgrl.online", "lileks.com", + // "www.rep.routledge.com", "www.personal.kent.edu", "xroads.virginia.edu", "classics.mit.edu", "faculty.washington.edu", "monadnock.net" + var rpr = new BetterReversePageRank(new DatabaseModule().provideConnection(), "memex.marginalia.nu", "bikobatanari.art", "sadgrl.online", "wiki.xxiivv.com", "%neocities.org"); +// var rpr = new BetterStandardPageRank(new DatabaseModule().provideConnection(), "%edu"); +// var spr = new BetterStandardPageRank(new DatabaseModule().provideConnection(), "memex.marginalia.nu"); + + var rankVector = rpr.pageRankVector(); + var norm = rankVector.norm(); + rankMax = rpr.size(); + uploader.start(); + + + rankMax = rpr.size(); + + + rpr.pageRankWithPeripheralNodes(rankMax, false).forEach(i -> { + try { + uploadQueue.put(i); + } catch (InterruptedException e) { + e.printStackTrace(); + } + return true; + }); + + long end = System.currentTimeMillis(); + running = false; + uploader.join(); + + logger.info("Done in {}", (end - start)/1000.0); + } + + public static void uploadThread(HikariDataSource dataSource) { + int i = 0; + + try (var conn = dataSource.getConnection()) { + logger.info("Resetting rank"); + try (var stmt = conn.prepareStatement("UPDATE EC_DOMAIN SET RANK=1")) { + stmt.executeUpdate(); + } + + logger.info("Updating ranks"); + try (var stmt = conn.prepareStatement("UPDATE EC_DOMAIN SET RANK=? WHERE ID=?")) { + while (running || (!running && !uploadQueue.isEmpty())) { + var job = uploadQueue.take(); + stmt.setDouble(1, i++ / (double) rankMax); + stmt.setInt(2, job); + stmt.executeUpdate(); + } + } + + logger.info("Recalculating quality"); + try (var stmt = conn.prepareStatement("UPDATE EC_DOMAIN SET QUALITY=-5*RANK+IF(RANK=1,RANK*GREATEST(QUALITY_RAW,QUALITY_ORIGINAL)/2, 0)")) { + stmt.executeUpdate(); + } + + } catch (SQLException | InterruptedException throwables) { + throwables.printStackTrace(); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/BasicPageUploader.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/BasicPageUploader.java new file mode 100644 index 00000000..3408bd25 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/BasicPageUploader.java @@ -0,0 +1,57 @@ +package nu.marginalia.wmsa.edge.integration; + +import com.google.inject.Inject; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.edge.crawler.domain.processor.HtmlFeature; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao; +import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient; +import nu.marginalia.wmsa.edge.integration.model.BasicDocumentData; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.crawl.EdgeHtmlStandard; +import nu.marginalia.wmsa.edge.model.crawl.EdgePageWordSet; +import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlState; +import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlVisit; + +import java.util.EnumSet; + +public class BasicPageUploader { + private final EdgeDataStoreDao edgeStoreDao; + private final EdgeIndexClient indexClient; + + private final int features; + + @Inject + public BasicPageUploader(EdgeDataStoreDao edgeStoreDao, EdgeIndexClient indexClient, + EnumSet features) { + + this.edgeStoreDao = edgeStoreDao; + this.indexClient = indexClient; + this.features = HtmlFeature.encode(features); + + } + + public void upload(BasicDocumentData indexData) { + var url = indexData.getUrl(); + + edgeStoreDao.putUrl(-2, url); + edgeStoreDao.putUrlVisited(new EdgeUrlVisit(url, indexData.getHashCode(), -2., + indexData.getTitle(), + indexData.getDescription() + , "", + EdgeHtmlStandard.HTML5.toString(), + features, + indexData.wordCount, indexData.wordCount, EdgeUrlState.OK)); + edgeStoreDao.putLink(false, indexData.domainLinks); + + putWords(edgeStoreDao.getDomainId(url.domain).getId(), + edgeStoreDao.getUrlId(url).getId(), + -2, + indexData.words); + } + + void putWords(int didx, int idx, double quality, EdgePageWordSet wordsSet) { + indexClient.putWords(Context.internal(), new EdgeId<>(didx), new EdgeId<>(idx), quality, + wordsSet, 0).blockingSubscribe(); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/arxiv/ArxivParser.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/arxiv/ArxivParser.java new file mode 100644 index 00000000..ed6a656b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/arxiv/ArxivParser.java @@ -0,0 +1,29 @@ +package nu.marginalia.wmsa.edge.integration.arxiv; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import nu.marginalia.wmsa.edge.integration.arxiv.model.ArxivMetadata; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.List; + +public class ArxivParser { + private final Gson gson = new GsonBuilder().create(); + + public ArxivParser() { + + } + + public List parse(File jsonFile) throws IOException { + + List ret = new ArrayList<>(); + try (var lines = Files.lines(jsonFile.toPath())) { + lines.map(line -> gson.fromJson(line, ArxivMetadata.class)).forEach(ret::add); + } + + return ret; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/arxiv/model/ArxivMetadata.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/arxiv/model/ArxivMetadata.java new file mode 100644 index 00000000..ba6307a8 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/arxiv/model/ArxivMetadata.java @@ -0,0 +1,21 @@ +package nu.marginalia.wmsa.edge.integration.arxiv.model; + +import com.google.gson.annotations.SerializedName; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; + +@Getter +@AllArgsConstructor @NoArgsConstructor +public class ArxivMetadata { + public String id; + public String submitter; + public String authors; + public String title; + @SerializedName("abstract") + public String _abstract; + + public String getAbstract() { + return _abstract; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/model/BasicDocumentData.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/model/BasicDocumentData.java new file mode 100644 index 00000000..905b7486 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/model/BasicDocumentData.java @@ -0,0 +1,22 @@ +package nu.marginalia.wmsa.edge.integration.model; + +import lombok.AllArgsConstructor; +import lombok.Data; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainLink; +import nu.marginalia.wmsa.edge.model.crawl.EdgePageWordSet; +import nu.marginalia.wmsa.edge.model.EdgeUrl; + + +@Data +@AllArgsConstructor +public class BasicDocumentData { + public final EdgeUrl url; + + public final String title; + public final String description; + public int hashCode; + + public final EdgePageWordSet words; + public final EdgeDomainLink[] domainLinks; + public final int wordCount; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/stackoverflow/StackOverflowPostProcessor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/stackoverflow/StackOverflowPostProcessor.java new file mode 100644 index 00000000..dcb29ace --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/stackoverflow/StackOverflowPostProcessor.java @@ -0,0 +1,83 @@ +package nu.marginalia.wmsa.edge.integration.stackoverflow; + +import com.google.inject.Inject; +import nu.marginalia.wmsa.edge.crawler.domain.LinkParser; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.integration.model.BasicDocumentData; +import nu.marginalia.wmsa.edge.integration.stackoverflow.model.StackOverflowPost; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainLink; +import org.apache.commons.lang3.StringUtils; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +public class StackOverflowPostProcessor { + private final LinkParser linkParser = new LinkParser(); + + private final SentenceExtractor sentenceExtractor; + private final DocumentKeywordExtractor documentKeywordExtractor; + + @Inject + public StackOverflowPostProcessor(SentenceExtractor sentenceExtractor, DocumentKeywordExtractor documentKeywordExtractor) { + this.sentenceExtractor = sentenceExtractor; + this.documentKeywordExtractor = documentKeywordExtractor; + } + + public BasicDocumentData process(StackOverflowPost post) { + + final var docUrl = post.getUrl(); + final var doc = Jsoup.parseBodyFragment(""+post.getTitle()+"" + post.getFullBody()); + + EdgeDomainLink[] domainLinks = getDomainLinks(docUrl, doc); + + for (var tag : doc.getElementsByTag("code")) { + if (tag.text().length() > 32) { + tag.remove(); + } + } + + var dld = sentenceExtractor.extractSentences(doc); + var keywords = documentKeywordExtractor.extractKeywords(dld); + + keywords.get(IndexBlock.Meta).addJust("site:"+post.getUrl().domain); + keywords.get(IndexBlock.Words).addJust("site:"+post.getUrl().domain); + keywords.get(IndexBlock.Words).addJust("special:wikipedia"); + keywords.get(IndexBlock.Meta).addJust("special:wikipedia"); + keywords.get(IndexBlock.Meta).addJust("js:true"); + + String title = StringUtils.abbreviate(post.getTitle(), 255); + String description = StringUtils.abbreviate(Jsoup.parseBodyFragment(post.getJustBody()).text(), 255); + + return new BasicDocumentData(docUrl, title, description, post.fullBody.hashCode(), keywords, domainLinks, + dld.totalNumWords()); + + } + + private EdgeDomainLink[] getDomainLinks(EdgeUrl docUrl, Document doc) { + List links = new ArrayList<>(10); + + for (var tag : doc.getElementsByTag("a")) { + if (!tag.hasAttr("href")) { + continue; + } + String href = tag.attr("href"); + if (href.length()<10 || !href.contains(".") || !href.contains("://")) { + continue; + } + + linkParser.parseLink(docUrl, tag) + .filter(url -> !Objects.equals(docUrl.getDomain(), url.getDomain())) + .ifPresent(links::add); + } + + return links.stream().map(EdgeUrl::getDomain).map(domain -> new EdgeDomainLink(docUrl.domain, domain)) + .distinct().toArray(EdgeDomainLink[]::new); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/stackoverflow/StackOverflowPostsReader.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/stackoverflow/StackOverflowPostsReader.java new file mode 100644 index 00000000..17b88447 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/stackoverflow/StackOverflowPostsReader.java @@ -0,0 +1,123 @@ +package nu.marginalia.wmsa.edge.integration.stackoverflow; + +import gnu.trove.map.hash.TIntObjectHashMap; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.integration.stackoverflow.model.StackOverflowPost; +import nu.marginalia.wmsa.edge.integration.stackoverflow.model.StackOverflowQuestionData; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.DefaultHandler; + +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; +import java.util.ArrayList; +import java.util.Deque; +import java.util.LinkedList; +import java.util.function.Consumer; + +public class StackOverflowPostsReader extends DefaultHandler { + private static final int MAX_QUESTION_WINDOW_SIZE = 10_000; + + private final Thread runThread; + private final String postsFile; + private final EdgeDomain domain; + private final Consumer postConsumer; + + private Deque questionWindow = new LinkedList<>(); + private final TIntObjectHashMap questionsById = new TIntObjectHashMap<>(1_000_000); + + public StackOverflowPostsReader(String postsFile, EdgeDomain domain, Consumer postConsumer) { + this.postsFile = postsFile; + this.domain = domain; + this.postConsumer = postConsumer; + runThread = new Thread(this::run, "StackOverflowPostReader"); + runThread.start(); + + } + + @Override + public void startElement(String uri, String lName, String qName, Attributes attr) throws SAXException { + if (!"row".equals(qName)) { + return; + } + + if ("1".equals(attr.getValue("PostTypeId"))) { + onQuestion(attr); + } + if ("2".equals(attr.getValue("PostTypeId"))) { + onReply(attr); + } + + while (questionWindow.size() > MAX_QUESTION_WINDOW_SIZE) { + var data = questionWindow.removeFirst(); + finalizeQuestion(data); + } + + } + + private void finalizeQuestion(StackOverflowQuestionData data) { + questionsById.remove(data.getId()); + var post = createPost(data); + postConsumer.accept(post); + } + + private StackOverflowPost createPost(StackOverflowQuestionData data) { + EdgeUrl url = new EdgeUrl("https", domain, null, "/questions/"+data.getId()); + + StringBuilder body = new StringBuilder(); + body.append(data.getQuestion()); + data.getReplies().forEach(body::append); + + return new StackOverflowPost(url, data.getTitle(), body.toString(), data.getQuestion()); + } + + + private void onQuestion(Attributes attr) { + String id = attr.getValue("Id"); + String title = attr.getValue("Title"); + String body = attr.getValue("Body"); + String score = attr.getValue("Score"); + if (parseInt(score) < 0) + return; + + var data = new StackOverflowQuestionData(parseInt(id), title, body, new ArrayList<>()); + questionsById.put(data.getId(), data); + questionWindow.addLast(data); + } + + private void onReply(Attributes attr) { + String parentId = attr.getValue("ParentId"); + String body = attr.getValue("Body"); + String score = attr.getValue("Score"); + if (parseInt(score) < 0) + return; + + var data = questionsById.get(parseInt(parentId)); + if (data != null) { + data.getReplies().add(body); + } + } + + private int parseInt(String id) { + return Integer.parseInt(id); + } + + @SneakyThrows + private void run() { + SAXParserFactory factory = SAXParserFactory.newInstance(); + SAXParser saxParser = factory.newSAXParser(); + + saxParser.parse(postsFile, this); + + while (!questionWindow.isEmpty()) { + var data = questionWindow.removeFirst(); + finalizeQuestion(data); + } + } + + public void join() throws InterruptedException { + runThread.join(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/stackoverflow/model/StackOverflowPost.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/stackoverflow/model/StackOverflowPost.java new file mode 100644 index 00000000..03cc1c90 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/stackoverflow/model/StackOverflowPost.java @@ -0,0 +1,14 @@ +package nu.marginalia.wmsa.edge.integration.stackoverflow.model; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.ToString; +import nu.marginalia.wmsa.edge.model.EdgeUrl; + +@Data @AllArgsConstructor @ToString +public class StackOverflowPost { + public EdgeUrl url; + public String title; + public String fullBody; + public String justBody; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/stackoverflow/model/StackOverflowQuestionData.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/stackoverflow/model/StackOverflowQuestionData.java new file mode 100644 index 00000000..52e2ff6e --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/stackoverflow/model/StackOverflowQuestionData.java @@ -0,0 +1,14 @@ +package nu.marginalia.wmsa.edge.integration.stackoverflow.model; + +import lombok.AllArgsConstructor; +import lombok.Data; + +import java.util.List; + +@Data @AllArgsConstructor +public class StackOverflowQuestionData { + int id; + String title; + String question; + List replies; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/wikipedia/WikipediaProcessor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/wikipedia/WikipediaProcessor.java new file mode 100644 index 00000000..c2577b93 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/wikipedia/WikipediaProcessor.java @@ -0,0 +1,82 @@ +package nu.marginalia.wmsa.edge.integration.wikipedia; + +import nu.marginalia.wmsa.edge.crawler.domain.LinkParser; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.integration.model.BasicDocumentData; +import nu.marginalia.wmsa.edge.integration.wikipedia.model.WikipediaArticle; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainLink; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.apache.commons.lang3.StringUtils; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +public class WikipediaProcessor { + private final LinkParser linkParser = new LinkParser(); + + private final SentenceExtractor sentenceExtractor; + private final DocumentKeywordExtractor documentKeywordExtractor; + + public WikipediaProcessor(SentenceExtractor sentenceExtractor, DocumentKeywordExtractor documentKeywordExtractor) { + this.sentenceExtractor = sentenceExtractor; + this.documentKeywordExtractor = documentKeywordExtractor; + } + + + public BasicDocumentData process(WikipediaArticle post) { + + final var docUrl = post.getUrl(); + final var doc = Jsoup.parseBodyFragment(post.body); + + String title = StringUtils.abbreviate(doc.getElementsByTag("title").text(), 255); + String description = getSummary(doc); + + EdgeDomainLink[] domainLinks = getDomainLinks(docUrl, doc); + + var dld = sentenceExtractor.extractSentences(doc); + var keywords = documentKeywordExtractor.extractKeywords(dld); + + keywords.get(IndexBlock.Meta).addJust("site:"+post.getUrl().domain); + keywords.get(IndexBlock.Words).addJust("site:"+post.getUrl().domain); + keywords.get(IndexBlock.Words).addJust("special:stackoverflow"); + keywords.get(IndexBlock.Meta).addJust("special:stackoverflow"); + keywords.get(IndexBlock.Meta).addJust("js:true"); + + return new BasicDocumentData(docUrl, title, description, post.body.hashCode(), keywords, domainLinks, + dld.totalNumWords()); + + } + + private String getSummary(Document doc) { + doc = doc.clone(); + doc.select("table,sup,.reference").remove(); + return StringUtils.abbreviate(doc.select("#bodyContent p").text(), 255); + } + + private EdgeDomainLink[] getDomainLinks(EdgeUrl docUrl, Document doc) { + List links = new ArrayList<>(10); + + for (var tag : doc.getElementsByTag("a")) { + if (!tag.hasAttr("href")) { + continue; + } + String href = tag.attr("href"); + if (href.length()<10 || !href.contains(".") || !href.contains("://")) { + continue; + } + + linkParser.parseLink(docUrl, tag) + .filter(url -> !Objects.equals(docUrl.getDomain(), url.getDomain())) + .ifPresent(links::add); + } + + return links.stream().map(EdgeUrl::getDomain).map(domain -> new EdgeDomainLink(docUrl.domain, domain)) + .distinct().toArray(EdgeDomainLink[]::new); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/wikipedia/WikipediaReader.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/wikipedia/WikipediaReader.java new file mode 100644 index 00000000..12bfec3f --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/wikipedia/WikipediaReader.java @@ -0,0 +1,46 @@ +package nu.marginalia.wmsa.edge.integration.wikipedia; + +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.integration.wikipedia.model.WikipediaArticle; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.openzim.ZIMTypes.ZIMFile; +import org.openzim.ZIMTypes.ZIMReader; + +import java.util.function.Consumer; + +public class WikipediaReader { + + private final Thread runThread; + private final String zimFile; + private final EdgeDomain domain; + private final Consumer postConsumer; + + public WikipediaReader(String zimFile, EdgeDomain domain, Consumer postConsumer) { + this.zimFile = zimFile; + this.domain = domain; + this.postConsumer = postConsumer; + + runThread = new Thread(this::run, "WikipediaReader"); + runThread.start(); + } + + @SneakyThrows + private void run() { + var zr = new ZIMReader(new ZIMFile(zimFile)); + + zr.forEachArticles((originalUrl, art) -> { + if (art != null) { + postConsumer.accept(new WikipediaArticle(synthesizeUrl(originalUrl), art)); + } + }, p -> true); + } + + private EdgeUrl synthesizeUrl(String originalUrl) { + return new EdgeUrl("https", domain, null, "/wiki/"+originalUrl); + } + + public void join() throws InterruptedException { + runThread.join(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/wikipedia/model/WikipediaArticle.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/wikipedia/model/WikipediaArticle.java new file mode 100644 index 00000000..bc221492 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/integration/wikipedia/model/WikipediaArticle.java @@ -0,0 +1,12 @@ +package nu.marginalia.wmsa.edge.integration.wikipedia.model; + +import lombok.AllArgsConstructor; +import lombok.Data; +import nu.marginalia.wmsa.edge.model.EdgeUrl; + +@Data +@AllArgsConstructor +public class WikipediaArticle { + public final EdgeUrl url; + public final String body; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeCrawlPlan.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeCrawlPlan.java new file mode 100644 index 00000000..4e237908 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeCrawlPlan.java @@ -0,0 +1,30 @@ +package nu.marginalia.wmsa.edge.model; + +import lombok.*; + +import java.nio.file.Path; + +@AllArgsConstructor @NoArgsConstructor @ToString +public class EdgeCrawlPlan { + public String jobSpec; + public WorkDir crawl; + public WorkDir process; + + public Path getJobSpec() { + return Path.of(jobSpec); + } + + @AllArgsConstructor @NoArgsConstructor @ToString + public static class WorkDir { + public String dir; + public String logName; + + public Path getDir() { + return Path.of(dir); + } + public Path getLogFile() { + return Path.of(dir).resolve(logName); + } + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeDomain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeDomain.java new file mode 100644 index 00000000..cb778947 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeDomain.java @@ -0,0 +1,130 @@ +package nu.marginalia.wmsa.edge.model; + +import lombok.*; + +import javax.annotation.Nonnull; +import java.util.Objects; +import java.util.function.Predicate; +import java.util.regex.Pattern; + +@AllArgsConstructor +@Getter @Setter @Builder +public class EdgeDomain implements WideHashable { + + private static final Predicate ipPatternTest = Pattern.compile("[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}").asMatchPredicate(); + private static final Predicate govListTest = Pattern.compile(".*\\.(ac|co|org|gov|edu|com)\\.[a-z]{2}").asMatchPredicate(); + + @Nonnull + public final String subDomain; + @Nonnull + public final String domain; + + @SneakyThrows + public EdgeDomain(String host) { + + var dot = host.lastIndexOf('.'); + + if (dot < 0 || ipPatternTest.test(host)) { // IPV6 >.> + subDomain = ""; + domain = host; + } + else { + int dot2 = host.substring(0, dot).lastIndexOf('.'); + if (dot2 < 0) { + subDomain = ""; + domain = host; + } + else { + if (govListTest.test(host)) + { // Capture .ac.jp, .co.uk + int dot3 = host.substring(0, dot2).lastIndexOf('.'); + if (dot3 >= 0) { + dot2 = dot3; + subDomain = host.substring(0, dot2); + domain = host.substring(dot2 + 1); + } + else { + subDomain = ""; + domain = host; + } + } + else { + subDomain = host.substring(0, dot2); + domain = host.substring(dot2 + 1); + } + } + } + + + } + + public String toString() { + return getAddress(); + } + + public String getAddress() { + if (!subDomain.isEmpty()) { + return subDomain + "." + domain; + } + return domain; + } + + public String getDomainKey() { + int cutPoint = domain.indexOf('.'); + if (cutPoint < 0) { + return domain; + } + return domain.substring(0, cutPoint).toLowerCase(); + } + public String getLongDomainKey() { + StringBuilder ret = new StringBuilder(); + + int cutPoint = domain.indexOf('.'); + if (cutPoint < 0) { + ret.append(domain); + } + else { + ret.append(domain, 0, cutPoint); + } + + if (!"".equals(subDomain) && !"www".equals(subDomain)) { + ret.append(":"); + ret.append(subDomain); + } + + return ret.toString().toLowerCase(); + } + + @Override + public long wideHash() { + return ((long) Objects.hash(domain, subDomain) << 32) | toString().hashCode(); + } + + public boolean equals(final Object o) { + if (o == this) return true; + if (!(o instanceof EdgeDomain)) return false; + final EdgeDomain other = (EdgeDomain) o; + if (!other.canEqual((Object) this)) return false; + final String this$subDomain = this.getSubDomain(); + final String other$subDomain = other.getSubDomain(); + if (!this$subDomain.equalsIgnoreCase(other$subDomain)) return false; + final String this$domain = this.getDomain(); + final String other$domain = other.getDomain(); + if (!this$domain.equalsIgnoreCase(other$domain)) return false; + return true; + } + + protected boolean canEqual(final Object other) { + return other instanceof EdgeDomain; + } + + public int hashCode() { + final int PRIME = 59; + int result = 1; + final Object $subDomain = this.getSubDomain().toLowerCase(); + result = result * PRIME + $subDomain.hashCode(); + final Object $domain = this.getDomain().toLowerCase(); + result = result * PRIME + $domain.hashCode(); + return result; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeId.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeId.java new file mode 100644 index 00000000..f2be15fa --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeId.java @@ -0,0 +1,15 @@ +package nu.marginalia.wmsa.edge.model; + +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; + +/** This exists entirely for strengthening the typing of IDs + * + * @param + */ +@AllArgsConstructor @Getter @EqualsAndHashCode @ToString +public class EdgeId { + private final int id; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeUrl.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeUrl.java new file mode 100644 index 00000000..39bc475b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeUrl.java @@ -0,0 +1,124 @@ +package nu.marginalia.wmsa.edge.model; + +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.Setter; + +import java.net.URI; +import java.net.URISyntaxException; +import java.util.regex.Pattern; + +@Getter @Setter @Builder @EqualsAndHashCode +public class EdgeUrl implements WideHashable { + public final String proto; + public final EdgeDomain domain; + public final Integer port; + public final String path; + + public EdgeUrl(String proto, EdgeDomain domain, Integer port, String path) { + this.proto = proto; + this.domain = domain; + this.port = port(port, proto); + this.path = path; + } + + public EdgeUrl(String url) throws URISyntaxException { + this(new URI(urlencodeFixer(url))); + } + + private static Pattern badCharPattern = Pattern.compile("[ \t\n\"<>\\[\\]()',|]"); + + public static String urlencodeFixer(String url) throws URISyntaxException { + var s = new StringBuilder(); + String goodChars = "&.?:/-;+$"; + String hexChars = "0123456789abcdefABCDEF"; + + int pathIdx = findPathIdx(url); + if (pathIdx < 0) { + return url; + } + s.append(url, 0, pathIdx); + + for (int i = pathIdx; i < url.length(); i++) { + int c = url.charAt(i); + + if (goodChars.indexOf(c) >= 0 || (c >= 'A' && c <='Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) { + s.appendCodePoint(c); + } + else if (c == '%' && i+2= 0 && hexChars.indexOf(cnn) >= 0) { + s.appendCodePoint(c); + } + else { + s.append("%25"); + } + } + else { + s.append(String.format("%%%02X", c)); + } + } + + return s.toString(); + } + + private static int findPathIdx(String url) throws URISyntaxException { + int colonIdx = url.indexOf(':'); + if (colonIdx < 0 || colonIdx + 2 >= url.length()) { + throw new URISyntaxException(url, "Lacking protocol"); + } + return url.indexOf('/', colonIdx+2); + } + + public EdgeUrl(URI URI) { + this.domain = new EdgeDomain(URI.getHost()); + this.path = URI.getPath().isEmpty() ? "/" : URI.getPath(); + this.proto = URI.getScheme().toLowerCase(); + this.port = port(URI.getPort(), proto); + } + + public EdgeUrl sibling(String newPath) { + return new EdgeUrl(proto, domain, port, newPath); + } + + + private static Integer port(Integer port, String protocol) { + if (null == port || port < 1) { + return null; + } + if (protocol.equals("http") && port == 80) { + return null; + } + else if (protocol.equals("https") && port == 443) { + return null; + } + return port; + } + + public String toString() { + String portPart = port == null ? "" : (":" + port); + + return proto + "://" + domain + portPart + "" + path; + } + + public String dir() { + return path.replaceAll("/[^/]+$", "/"); + } + public String fileName() { + return path.replaceAll(".*/", ""); + } + + public long wideHash() { + long domainHash = domain.hashCode(); + long thisHash = hashCode(); + return (domainHash << 32) | thisHash; + } + + public int depth() { + return (int) path.chars().filter(c -> c=='/').count(); + } + + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/WideHashable.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/WideHashable.java new file mode 100644 index 00000000..3b95711d --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/WideHashable.java @@ -0,0 +1,5 @@ +package nu.marginalia.wmsa.edge.model; + +public interface WideHashable { + long wideHash(); +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeContentType.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeContentType.java new file mode 100644 index 00000000..70978166 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeContentType.java @@ -0,0 +1,15 @@ +package nu.marginalia.wmsa.edge.model.crawl; + + +import lombok.*; + +@AllArgsConstructor +@EqualsAndHashCode +@Getter +@Setter +@Builder +@ToString +public class EdgeContentType { + public final String contentType; + public final String charset; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeDomainIndexingState.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeDomainIndexingState.java new file mode 100644 index 00000000..119da59d --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeDomainIndexingState.java @@ -0,0 +1,27 @@ +package nu.marginalia.wmsa.edge.model.crawl; + +public enum EdgeDomainIndexingState { + ACTIVE(0), + EXHAUSTED(1), + SPECIAL(2), + SOCIAL_MEDIA(3), + BLOCKED(-1), + REDIR(-2), + ERROR(-3), + UNKNOWN(-100); + + public final int code; + + EdgeDomainIndexingState(int code) { + this.code = code; + } + + public static EdgeDomainIndexingState fromCode(int code) { + for (var state : values()) { + if (state.code == code) { + return state; + } + } + return UNKNOWN; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeDomainLink.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeDomainLink.java new file mode 100644 index 00000000..7486fec3 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeDomainLink.java @@ -0,0 +1,10 @@ +package nu.marginalia.wmsa.edge.model.crawl; + +import lombok.*; +import nu.marginalia.wmsa.edge.model.EdgeDomain; + +@AllArgsConstructor @EqualsAndHashCode @Getter @Setter @Builder @ToString +public class EdgeDomainLink { + public final EdgeDomain source; + public final EdgeDomain destination; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeHtmlStandard.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeHtmlStandard.java new file mode 100644 index 00000000..18142da2 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeHtmlStandard.java @@ -0,0 +1,19 @@ +package nu.marginalia.wmsa.edge.model.crawl; + +public enum EdgeHtmlStandard { + PLAIN(0, 1), + UNKNOWN(0, 1), + HTML123(0, 1), + HTML4(-0.1, 1.05), + XHTML(-0.1, 1.05), + HTML5(0.5, 1.1); + + public final double offset; + public final double scale; + + EdgeHtmlStandard(double offset, double scale) { + this.offset = offset; + this.scale = scale; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeIndexTask.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeIndexTask.java new file mode 100644 index 00000000..1212bfa9 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeIndexTask.java @@ -0,0 +1,33 @@ +package nu.marginalia.wmsa.edge.model.crawl; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeUrl; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +@Getter @AllArgsConstructor @ToString +public class EdgeIndexTask { + public final EdgeDomain domain; + public final List visited = new ArrayList<>(); + public final List urls = new ArrayList<>(); + public final int pass; + public final int limit; + public double rank; + + public boolean isEmpty() { + return domain == null || urls.isEmpty(); + } + + public Stream streamUrls() { + return urls.stream(); + } + + public int size() { + return urls.size(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgePageContent.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgePageContent.java new file mode 100644 index 00000000..997d25c1 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgePageContent.java @@ -0,0 +1,25 @@ +package nu.marginalia.wmsa.edge.model.crawl; + +import lombok.Data; +import nu.marginalia.wmsa.edge.model.EdgeUrl; + +import java.util.Map; +import java.util.Set; + +@Data +public class EdgePageContent { + public final EdgeUrl url; + public final EdgePageWordSet words; + public final Map> linkWords; + public final EdgePageMetadata metadata; + public final int hash; + public final String ipAddress; + + public boolean hasHotLink(EdgeUrl url) { + return linkWords.containsKey(url); + } + + public int numWords() { + return metadata.totalWords; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgePageMetadata.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgePageMetadata.java new file mode 100644 index 00000000..bb192f9e --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgePageMetadata.java @@ -0,0 +1,50 @@ +package nu.marginalia.wmsa.edge.model.crawl; + +import lombok.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@AllArgsConstructor @EqualsAndHashCode @Getter @Setter @ToString @With +public class EdgePageMetadata { + public final int features; + public final int scriptTags; + public final int rawLength; + public final int textBodyLength; + public final int textDistinctWords; + public final String title; + public final String description; + public final double smutCoefficient; + public final int totalWords; + public final EdgeHtmlStandard htmlStandard; + private static final Logger logger = LoggerFactory.getLogger(EdgePageMetadata.class); + private static EdgePageMetadata _empty + = new EdgePageMetadata(0, 0, + 0, + 0, + 0, + "", + "", + 0., + 1, + EdgeHtmlStandard.UNKNOWN); + public static EdgePageMetadata empty() { + return _empty; + } + + public double quality() { + if (rawLength == 0 || textBodyLength == 0) { + return -5.; + } + +/* double dictionaryFactor = textDistinctWords / 10000.; + if (dictionaryFactor < 0.1) { + dictionaryFactor = 0; + }*/ + + return Math.log(textBodyLength / (double) rawLength)*htmlStandard.scale + + htmlStandard.offset + - scriptTags + // - dictionaryFactor + - smutCoefficient; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgePageWordSet.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgePageWordSet.java new file mode 100644 index 00000000..c4355ae3 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgePageWordSet.java @@ -0,0 +1,48 @@ +package nu.marginalia.wmsa.edge.model.crawl; + +import lombok.Data; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; + +import java.util.*; + +@Data +public class EdgePageWordSet { + public final Map wordSets; + + public EdgePageWordSet(EdgePageWords... words) { + wordSets = new EnumMap<>(IndexBlock.class); + for (EdgePageWords w : words) { + wordSets.put(w.block, w); + } + } + + public EdgePageWords get(IndexBlock block) { + var words = wordSets.get(block); + if (words == null) { + return new EdgePageWords(block); + } + return words; + } + + public void append(IndexBlock block, Collection words) { + wordSets.computeIfAbsent(block, b -> new EdgePageWords(block)).addAll(words); + } + + public Collection values() { + return new ArrayList<>(wordSets.values()); + } + + public boolean isEmpty() { + return 0 == wordSets.values().stream().mapToInt(EdgePageWords::size).sum(); + } + + public String toString() { + var sj = new StringJoiner("\n", "EdgePageWordSet:\n", ""); + wordSets.forEach((block, words) -> { + if (words.size() > 0) { + sj.add("\t" + block + "\t" + words.getWords()); + } + }); + return sj.toString(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgePageWords.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgePageWords.java new file mode 100644 index 00000000..efb20dcc --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgePageWords.java @@ -0,0 +1,34 @@ +package nu.marginalia.wmsa.edge.model.crawl; +import lombok.Getter; +import lombok.ToString; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +@ToString @Getter +public class EdgePageWords { + public final IndexBlock block; + public final List words = new ArrayList<>(); + + public EdgePageWords(IndexBlock block) { + this.block = block; + } + public EdgePageWords(IndexBlock block, Collection initial) { + this.block = block; + + addAll(initial); + } + + public void addAll(Collection words) { + this.words.addAll(words); + } + public void addAllMax(Collection words, int limit) { + words.stream().limit(limit).forEach(this.words::add); + } + public int size() { + return words.size(); + } + public void addJust(String word) { words.add(word); } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeRawPageContents.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeRawPageContents.java new file mode 100644 index 00000000..8bc5c8d2 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeRawPageContents.java @@ -0,0 +1,24 @@ +package nu.marginalia.wmsa.edge.model.crawl; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.Getter; +import nu.marginalia.wmsa.edge.model.EdgeUrl; + +@Data @Getter @AllArgsConstructor +public class EdgeRawPageContents { + public final EdgeUrl url; + public final EdgeUrl redirectUrl; + public final String data; + public final EdgeContentType contentType; + public final String ip; + public boolean hasCookies; + public final String fetchTimestamp; + + public boolean isAfter(String dateIso8601) { + if (fetchTimestamp == null) { + return false; + } + return fetchTimestamp.compareTo(dateIso8601) >= 0; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeRobotsTxt.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeRobotsTxt.java new file mode 100644 index 00000000..5a8bfaea --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeRobotsTxt.java @@ -0,0 +1,10 @@ +package nu.marginalia.wmsa.edge.model.crawl; + +import lombok.*; +import nu.marginalia.wmsa.edge.model.EdgeDomain; + +@AllArgsConstructor @EqualsAndHashCode @Getter @Setter @Builder +public class EdgeRobotsTxt { + public final EdgeDomain domain; + public final String robotsTxt; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeUrlState.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeUrlState.java new file mode 100644 index 00000000..67fc2b61 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeUrlState.java @@ -0,0 +1,10 @@ +package nu.marginalia.wmsa.edge.model.crawl; + +/** This should correspond to EC_URL.STATE */ +public enum EdgeUrlState { + OK, + REDIRECT, + DEAD, + ARCHIVED, + DISQUALIFIED +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeUrlVisit.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeUrlVisit.java new file mode 100644 index 00000000..07d7492e --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/crawl/EdgeUrlVisit.java @@ -0,0 +1,21 @@ +package nu.marginalia.wmsa.edge.model.crawl; + +import lombok.Data; +import nu.marginalia.wmsa.edge.model.EdgeUrl; + +@Data +public class EdgeUrlVisit { + public final EdgeUrl url; + public final Integer data_hash_code; + public final Double quality; + public final String title; + public final String description; + public final String ipAddress; + public final String format; + public final int features; + + public final int wordCountDistinct; + public final int wordCountTotal; + + public final EdgeUrlState urlState; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgePageScoreAdjustment.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgePageScoreAdjustment.java new file mode 100644 index 00000000..e0cda818 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgePageScoreAdjustment.java @@ -0,0 +1,30 @@ +package nu.marginalia.wmsa.edge.model.search; + +import lombok.Builder; +import lombok.Getter; + +@Getter +@Builder +public class EdgePageScoreAdjustment { + final double titleAdj; + final double titleFullHit; + final double urlAdj; + final double domainAdj; + final double descAdj; + final double descHitsAdj; + + private static final EdgePageScoreAdjustment zero = new EdgePageScoreAdjustment(0,0, 0,0,0, 0); + public static EdgePageScoreAdjustment zero() { + return zero; + } + + public double getScore() { + return titleAdj + titleFullHit + urlAdj + domainAdj + descAdj + descHitsAdj; + } + + @Override + public String toString() { + return String.format("(%2.2f %2.2f %2.2f %2.2f %2.2f %2.2f)=%2.2f", + titleAdj, titleFullHit, urlAdj, domainAdj, descAdj, descHitsAdj, getScore()); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchResultItem.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchResultItem.java new file mode 100644 index 00000000..dece8aca --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchResultItem.java @@ -0,0 +1,39 @@ +package nu.marginalia.wmsa.edge.model.search; + +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeId; +import nu.marginalia.wmsa.edge.model.EdgeUrl; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +@AllArgsConstructor @ToString @Getter @EqualsAndHashCode +public class EdgeSearchResultItem { + public final int blockId; + public final int queryLength; + public final EdgeId domain; + public final EdgeId url; + public final List scores; + + public EdgeSearchResultItem(int blockId, int queryLength, long val) { + int urlId = (int) (val & 0xFFFFFFFFL); + int domainId = (int) (val >>> 32); + + this.queryLength = queryLength; + this.blockId = blockId; + + url = new EdgeId<>(urlId); + domain = new EdgeId<>(domainId); + scores = new ArrayList<>(); + } + + public long getCombinedId() { + return ((long) domain.getId() << 32L) | url.getId(); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchResultKeywordScore.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchResultKeywordScore.java new file mode 100644 index 00000000..e20dfbcd --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchResultKeywordScore.java @@ -0,0 +1,14 @@ +package nu.marginalia.wmsa.edge.model.search; + +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.ToString; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; + +@AllArgsConstructor @ToString @EqualsAndHashCode +public class EdgeSearchResultKeywordScore { + public final String keyword; + public final IndexBlock index; + public boolean title; + public boolean link; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchResultSet.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchResultSet.java new file mode 100644 index 00000000..a703636c --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchResultSet.java @@ -0,0 +1,19 @@ +package nu.marginalia.wmsa.edge.model.search; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +@AllArgsConstructor @Getter @ToString +public class EdgeSearchResultSet { + public Map> resultsList; + + public int size() { + return resultsList.values().stream().mapToInt(List::size).sum(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchResults.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchResults.java new file mode 100644 index 00000000..e23496c4 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchResults.java @@ -0,0 +1,32 @@ +package nu.marginalia.wmsa.edge.model.search; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +@AllArgsConstructor @Getter @ToString +public class EdgeSearchResults { + public final Map> results; + + public EdgeSearchResults() { + results = new HashMap<>(); + } + + public int size() { + return results.values().stream().mapToInt(List::size).sum(); + } + + public Stream stream() { + return results.values().stream().flatMap(List::stream); + } + + public List getAllItems() { + return stream().collect(Collectors.toList()); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchResultsKey.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchResultsKey.java new file mode 100644 index 00000000..aefee330 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchResultsKey.java @@ -0,0 +1,13 @@ +package nu.marginalia.wmsa.edge.model.search; + +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; + +@EqualsAndHashCode +@AllArgsConstructor +@Getter +public class EdgeSearchResultsKey { + public final int bucket; + public final int searchTermCount; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchSpecification.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchSpecification.java new file mode 100644 index 00000000..1f88e518 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchSpecification.java @@ -0,0 +1,32 @@ +package nu.marginalia.wmsa.edge.model.search; + +import lombok.*; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.index.service.SearchOrder; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeId; + +import javax.annotation.Nullable; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +@ToString @Getter @Builder @With @AllArgsConstructor +public class EdgeSearchSpecification { + + public List buckets; + public List subqueries; + public final int limitByBucket; + public final int limitByDomain; + public final int limitTotal; + + public final String humanQuery; + public final SearchOrder searchOrder; + public boolean stagger; + public boolean experimental; + + public static EdgeSearchSpecification justIncludes(String... words) { + return new EdgeSearchSpecification(Collections.emptyList(), Collections.singletonList(new EdgeSearchSubquery(Arrays.asList(words), Collections.emptyList(), IndexBlock.Title)), 10, 10, 10, "", SearchOrder.ASCENDING, false, false); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchSubquery.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchSubquery.java new file mode 100644 index 00000000..9de07248 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeSearchSubquery.java @@ -0,0 +1,34 @@ +package nu.marginalia.wmsa.edge.model.search; + +import lombok.*; +import lombok.experimental.FieldNameConstants; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import org.h2.index.Index; + +import java.util.List; + +@ToString +@Getter +@AllArgsConstructor +public class EdgeSearchSubquery { + + public final List searchTermsInclude; + public final List searchTermsExclude; + public final IndexBlock block; + + private final int termSize; + public EdgeSearchSubquery(List searchTermsInclude, List searchTermsExclude, IndexBlock block) { + this.searchTermsInclude = searchTermsInclude; + this.searchTermsExclude = searchTermsExclude; + this.block = block; + this.termSize = (int) searchTermsInclude.stream().flatMapToInt(String::chars).filter(i -> '_'==i).count(); + } + + public EdgeSearchSubquery withBlock(IndexBlock block) { + return new EdgeSearchSubquery(searchTermsInclude, searchTermsExclude, block); + } + + public int termSize() { + return termSize; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeUrlDetails.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeUrlDetails.java new file mode 100644 index 00000000..769f32f8 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeUrlDetails.java @@ -0,0 +1,151 @@ +package nu.marginalia.wmsa.edge.model.search; + +import lombok.*; +import nu.marginalia.wmsa.edge.crawler.domain.processor.HtmlFeature; +import nu.marginalia.wmsa.edge.index.EdgeIndexService; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import nu.marginalia.wmsa.edge.search.EdgeSearchRankingSymbols; + +import java.util.Objects; + +@AllArgsConstructor @NoArgsConstructor @With @Getter @ToString +public class EdgeUrlDetails { + public int id; + public EdgeUrl url; + public String title; + public String description; + + public double urlQuality; + public double urlQualityRaw; + public double domainQuality; + + public int links; // DEAD + public int words; + public String format; + public int features; + + public EdgePageScoreAdjustment urlQualityAdjustment; + + public long rankingId; + public double termScore; + + public String ip; // BROKEN + public int domainState; + public int queryLength; + + public int dataHash; + + public long rankingIdAdjustment() { + int penalty = 0; + + if (words < 500) { + penalty -= 1; + } + if (urlQuality < -10) { + penalty -= 1; + } + if (isSpecialDomain()) { + penalty -= 1; + } + return penalty; //(int)(Math.log(1+rankingId) / Math.log(100))-1-penalty; + } + + public String getFormat() { + if (null == format) { + return "?"; + } + switch (format) { + case "HTML123": + return "HTML 1-3"; + case "HTML4": + return "HTML 4"; + case "XHTML": + return "XHTML"; + case "HTML5": + return "HTML 5"; + case "PLAIN": + return "Plain Text"; + default: + return "?"; + } + } + + public int hashCode() { + return Integer.hashCode(id); + } + + public boolean equals(Object other) { + if (other == null) { + return false; + } + if (other == this) { + return true; + } + if (other instanceof EdgeUrlDetails) { + return ((EdgeUrlDetails) other).id == id; + } + return false; + } + public String getTitle() { + if (title == null || title.isBlank()) { + return url.toString(); + } + return title; + } + + public String getQualityPercent() { + return String.format("%2.2f%%", 100*Math.exp(urlQuality+urlQualityAdjustment.getScore())); + } + public double getRanking() { + double lengthAdjustment = Math.max(1, words / (words + 1000.)); + return (1+termScore)*Math.sqrt(1+rankingId)/Math.max(1E-10, lengthAdjustment *(0.7+0.3*Math.exp(urlQualityAdjustment.getScore()))); + } + + public int getSuperficialHash() { + return Objects.hash(url.path, title); + } + public String getSuperficialHashStr() { + return String.format("%8X", getSuperficialHash()); + } + + + public String getGeminiLink() { + return url.proto + "://" + url.domain.toString() + url.path.replace(" ", "%20").replace("\"", "%22"); + } + public String getGeminiDescription() { + return description.trim(); + } + + public boolean isPlainText() { + return "PLAIN".equals(format); + } + + public boolean isScripts() { + return HtmlFeature.hasFeature(features, HtmlFeature.JS); + } + public boolean isTracking() { + return HtmlFeature.hasFeature(features, HtmlFeature.TRACKING); + } + public boolean isAffiliate() { + return HtmlFeature.hasFeature(features, HtmlFeature.AFFILIATE_LINK); + } + public boolean isMedia() { + return HtmlFeature.hasFeature(features, HtmlFeature.MEDIA); + } + public boolean isCookies() { + return HtmlFeature.hasFeature(features, HtmlFeature.COOKIES); + } + public boolean isSpecialDomain() { + return domainState == EdgeDomainIndexingState.SPECIAL.code; + } + public int getLogRank() { return (int) Math.round(Math.min(Math.log(1+rankingId),10)); } + + public String getRankingSymbol() { + return EdgeSearchRankingSymbols.getRankingSymbol(termScore); + } + + public String getRankingSymbolDesc() { + return EdgeSearchRankingSymbols.getRankingSymbolDescription(termScore); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/BrowseResult.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/BrowseResult.java new file mode 100644 index 00000000..df23d75d --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/BrowseResult.java @@ -0,0 +1,11 @@ +package nu.marginalia.wmsa.edge.search; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import nu.marginalia.wmsa.edge.model.EdgeUrl; + +@Data @EqualsAndHashCode +public class BrowseResult { + public final EdgeUrl url; + public final int domainId; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/BrowseResultSet.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/BrowseResultSet.java new file mode 100644 index 00000000..01f3be99 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/BrowseResultSet.java @@ -0,0 +1,12 @@ +package nu.marginalia.wmsa.edge.search; + +import lombok.AllArgsConstructor; +import lombok.Getter; + +import java.util.List; + +@AllArgsConstructor +@Getter +public class BrowseResultSet { + public final List results; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/DecoratedSearchResultSet.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/DecoratedSearchResultSet.java new file mode 100644 index 00000000..3b0e2074 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/DecoratedSearchResultSet.java @@ -0,0 +1,22 @@ +package nu.marginalia.wmsa.edge.search; + +import lombok.Getter; +import lombok.ToString; +import nu.marginalia.wmsa.edge.model.search.EdgeUrlDetails; + +import java.util.List; +import java.util.Objects; + +@ToString @Getter +public class DecoratedSearchResultSet { + public final List resultSet; + + public int size() { + return resultSet.size(); + } + + public DecoratedSearchResultSet(List resultSet) { + this.resultSet = Objects.requireNonNull(resultSet); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/DecoratedSearchResults.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/DecoratedSearchResults.java new file mode 100644 index 00000000..de831c77 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/DecoratedSearchResults.java @@ -0,0 +1,33 @@ +package nu.marginalia.wmsa.edge.search; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import nu.marginalia.wmsa.edge.assistant.dict.WikiArticles; +import nu.marginalia.wmsa.edge.model.search.EdgeUrlDetails; +import nu.marginalia.wmsa.edge.search.query.model.EdgeUserSearchParameters; + +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.List; + +@AllArgsConstructor @Getter +public class DecoratedSearchResults { + private final EdgeUserSearchParameters params; + private final List problems; + private final String evalResult; + private final WikiArticles wiki; + private final List results; + + private final String focusDomain; + private final int focusDomainId; + + public String getQuery() { + return params.humanQuery; + } + public String getProfile() { + return params.getProfile().name; + } + public String getJs() { + return params.jsSetting; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchMain.java new file mode 100644 index 00000000..cc8b9c0d --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchMain.java @@ -0,0 +1,38 @@ +package nu.marginalia.wmsa.edge.search; + +import com.google.inject.Guice; +import com.google.inject.Inject; +import com.google.inject.Injector; +import nu.marginalia.wmsa.configuration.MainClass; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.module.ConfigurationModule; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.configuration.server.Initialization; +import spark.Spark; + +import java.io.IOException; + +public class EdgeSearchMain extends MainClass { + private EdgeSearchService service; + + @Inject + public EdgeSearchMain(EdgeSearchService service) throws IOException { + this.service = service; + } + + public static void main(String... args) { + init(ServiceDescriptor.EDGE_SEARCH, args); + + Spark.staticFileLocation("/static/edge/"); + + Injector injector = Guice.createInjector( + new EdgeSearchModule(), + new ConfigurationModule(), + new DatabaseModule() + ); + + injector.getInstance(EdgeSearchMain.class); + injector.getInstance(Initialization.class).setReady(); + + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchModule.java new file mode 100644 index 00000000..d40f147a --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchModule.java @@ -0,0 +1,22 @@ +package nu.marginalia.wmsa.edge.search; + +import com.google.inject.AbstractModule; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; + +import java.nio.file.Path; + +public class EdgeSearchModule extends AbstractModule { + + public void configure() { + + bind(LanguageModels.class).toInstance(new LanguageModels( + Path.of("/var/lib/wmsa/model/ngrams-generous-emstr.bin"), + Path.of("/var/lib/wmsa/model/tfreq-new-algo3.bin"), + Path.of("/var/lib/wmsa/model/opennlp-sentence.bin"), + Path.of("/var/lib/wmsa/model/English.RDR"), + Path.of("/var/lib/wmsa/model/English.DICT"), + Path.of("/var/lib/wmsa/model/opennlp-tok.bin") + )); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java new file mode 100644 index 00000000..e8f236e2 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java @@ -0,0 +1,334 @@ +package nu.marginalia.wmsa.edge.search; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import io.reactivex.rxjava3.core.Observable; +import io.reactivex.rxjava3.schedulers.Schedulers; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.edge.assistant.client.AssistantClient; +import nu.marginalia.wmsa.edge.assistant.dict.WikiArticles; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao; +import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.index.service.SearchOrder; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.search.*; +import nu.marginalia.wmsa.edge.search.query.model.EdgeSearchQuery; +import nu.marginalia.wmsa.edge.search.query.QueryFactory; +import nu.marginalia.wmsa.edge.search.query.model.EdgeUserSearchParameters; +import nu.marginalia.wmsa.edge.search.results.SearchResultValuator; +import nu.marginalia.wmsa.edge.search.results.model.AccumulatedQueryResults; +import nu.marginalia.wmsa.edge.search.results.SearchResultDecorator; +import nu.marginalia.wmsa.edge.search.results.UrlDeduplicator; +import org.apache.logging.log4j.util.Strings; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; +import java.util.stream.Collectors; + +@Singleton +public class EdgeSearchOperator { + + private static final Logger logger = LoggerFactory.getLogger(EdgeSearchOperator.class); + private final AssistantClient assistantClient; + private final EdgeDataStoreDao edgeDataStoreDao; + private final EdgeIndexClient indexClient; + private final QueryFactory queryFactory; + private final SearchResultDecorator resultDecorator; + private final SearchResultValuator valuator; + private final Comparator resultListComparator; + + @Inject + public EdgeSearchOperator(AssistantClient assistantClient, + EdgeDataStoreDao edgeDataStoreDao, + EdgeIndexClient indexClient, + QueryFactory queryFactory, + SearchResultDecorator resultDecorator, + SearchResultValuator valuator + ) { + + this.assistantClient = assistantClient; + this.edgeDataStoreDao = edgeDataStoreDao; + this.indexClient = indexClient; + this.queryFactory = queryFactory; + this.resultDecorator = resultDecorator; + this.valuator = valuator; + + Comparator c = Comparator.comparing(ud -> Math.round(10*(ud.getTermScore() - ud.rankingIdAdjustment()))); + resultListComparator = c.thenComparing(EdgeUrlDetails::getRanking).thenComparing(EdgeUrlDetails::getId); + } + + public List doApiSearch(Context ctx, + EdgeUserSearchParameters params) { + + + var processedQuery = queryFactory.createQuery(params); + + logger.info("Human terms (API): {}", Strings.join(processedQuery.searchTermsHuman, ',')); + + DecoratedSearchResultSet queryResults = performQuery(ctx, processedQuery, true); + + return queryResults.resultSet; + } + + public DecoratedSearchResults doSearch(Context ctx, EdgeUserSearchParameters params, String evalResult) { + + + Observable definitions = getWikiArticle(ctx, params.getHumanQuery()); + + var processedQuery = queryFactory.createQuery(params); + + logger.info("Human terms: {}", Strings.join(processedQuery.searchTermsHuman, ',')); + + DecoratedSearchResultSet queryResults = performQuery(ctx, processedQuery, false); + + return new DecoratedSearchResults(params, + getProblems(ctx, params.getHumanQuery(), evalResult, queryResults, processedQuery), + evalResult, + definitions.onErrorReturn((e) -> new WikiArticles()).blockingFirst(), + queryResults.resultSet, + processedQuery.domain, + getDomainId(processedQuery.domain)); + } + + private int getDomainId(String domain) { + int domainId = -1; + try { + if (domain != null) { + return edgeDataStoreDao.getDomainId(new EdgeDomain(domain)).getId(); + } + } + catch (NoSuchElementException ex) { + + } + return domainId; + } + + public DecoratedSearchResultSet performDumbQuery(Context ctx, EdgeSearchProfile profile, IndexBlock block, int limitPerDomain, int limitTotal, String... termsInclude) { + List sqs = new ArrayList<>(); + + sqs.add(new EdgeSearchSubquery(Arrays.asList(termsInclude), Collections.emptyList(), block)); + + EdgeSearchSpecification specs = new EdgeSearchSpecification(profile.buckets, sqs, 100, limitPerDomain, limitTotal, "", SearchOrder.ASCENDING, EdgeSearchProfile.YOLO.equals(profile), false); + + return performQuery(ctx, new EdgeSearchQuery(specs), true); + } + + private DecoratedSearchResultSet performQuery(Context ctx, EdgeSearchQuery processedQuery, boolean asFastAsPossible) { + + AccumulatedQueryResults queryResults = new AccumulatedQueryResults(); + UrlDeduplicator deduplicator = new UrlDeduplicator(processedQuery.specs.limitByDomain); + + if (processedQuery.searchTermsHuman.size()<=4 && !asFastAsPossible) { + fetchResultsMulti(ctx, processedQuery, queryResults, deduplicator); + } + else { + fetchResultsSimple(ctx, processedQuery, queryResults, deduplicator); + } + + List resultList = new ArrayList<>(queryResults.size()); + + for (var details : queryResults.results) { + if (details.getUrlQuality() < -100) { + continue; + } + var scoreAdjustment = adjustScoreBasedOnQuery(details, processedQuery.specs); + details = details.withUrlQualityAdjustment(scoreAdjustment); + + resultList.add(details); + }; + + resultList.sort(resultListComparator); + + return new DecoratedSearchResultSet(resultList); + } + + private List getProblems(Context ctx, String humanQuery, String evalResult, DecoratedSearchResultSet queryResults, EdgeSearchQuery processedQuery) { + final List problems = new ArrayList<>(processedQuery.problems); + boolean siteSearch = processedQuery.domain != null; + + if (!siteSearch) { + if (queryResults.size() <= 5 && null == evalResult) { + spellCheckTerms(ctx, processedQuery).forEach(problems::add); + } + + if (queryResults.size() <= 5) { + problems.add("Try rephrasing the query, changing the word order or using synonyms to get different results. Tips."); + } + + if (humanQuery.toLowerCase().matches(".*(definition|define).*")) { + problems.add("Tip: Try using a query that looks like define:word if you want a dictionary definition"); + } + } + + if (humanQuery.contains("/")) { + problems.clear(); + problems.add("There is a known bug with search terms that contain a slash that causes them to be marked as unsupported; as a workaround, try using a dash instead. AC-DC will work, AC/DC does not."); + } + + return problems; + } + + + private EdgePageScoreAdjustment adjustScoreBasedOnQuery(EdgeUrlDetails p, EdgeSearchSpecification specs) { + String titleLC = p.title == null ? "" : p.title.toLowerCase(); + String descLC = p.description == null ? "" : p.description.toLowerCase(); + String urlLC = p.url == null ? "" : p.url.path.toLowerCase(); + String domainLC = p.url == null ? "" : p.url.domain.toString().toLowerCase(); + + String[] searchTermsLC = specs.subqueries.get(0).searchTermsInclude.stream() + .map(String::toLowerCase) + .flatMap(s -> Arrays.stream(s.split("_"))) + .toArray(String[]::new); + int termCount = searchTermsLC.length; + + String[] titleParts = titleLC.split("[:!|./]|(\\s-|-\\s)|\\s{2,}"); + double titleHitsAdj = 0.; + for (String titlePart : titleParts) { + titleHitsAdj += Arrays.stream(searchTermsLC).filter(titlePart::contains).mapToInt(String::length).sum() + / (double) Math.max(1, titlePart.trim().length()); + } + + double titleFullHit = 0.; + if (termCount > 1 && titleLC.contains(specs.humanQuery.replaceAll("\"", "").toLowerCase())) { + titleFullHit = termCount; + } + long descHits = Arrays.stream(searchTermsLC).filter(descLC::contains).count(); + long urlHits = Arrays.stream(searchTermsLC).filter(urlLC::contains).count(); + long domainHits = Arrays.stream(searchTermsLC).filter(domainLC::contains).count(); + + double descHitsAdj = 0.; + for (String word : descLC.split("[^\\w]+")) { + descHitsAdj += Arrays.stream(searchTermsLC) + .filter(term -> term.length() > word.length()) + .filter(term -> term.contains(word)) + .mapToDouble(term -> word.length() / (double) term.length()) + .sum(); + } + + return EdgePageScoreAdjustment.builder() + .descAdj(Math.min(termCount, descHits) / (10. * termCount)) + .descHitsAdj(descHitsAdj / 10.) + .domainAdj(2 * Math.min(termCount, domainHits) / (double) termCount) + .urlAdj(Math.min(termCount, urlHits) / (10. * termCount)) + .titleAdj(5 * titleHitsAdj / (Math.max(1, titleParts.length) * Math.log(titleLC.length() + 2))) + .titleFullHit(titleFullHit) + .build(); + } + + @NotNull + private Observable getWikiArticle(Context ctx, String humanQuery) { + return assistantClient + .encyclopediaLookup(ctx, + humanQuery.replaceAll("\\s+", "_") + .replaceAll("\"", "") + ).subscribeOn(Schedulers.io()); + } + + private void fetchResultsMulti(Context ctx, EdgeSearchQuery processedQuery, AccumulatedQueryResults queryResults, UrlDeduplicator deduplicator) { + + boolean debug = processedQuery.specs.subqueries.get(0).searchTermsExclude.contains("special:debug"); + + var blocksOrder = processedQuery.specs.subqueries.stream().map(sq -> sq.block).distinct().sorted(Comparator.comparing(block -> block.sortOrder)).toList(); + + EdgeSearchSpecification[] specsArray = + processedQuery.specs.subqueries.stream() + .filter(sq -> sq.block == IndexBlock.TitleKeywords) + .map(sq -> processedQuery.specs.withSubqueries(blocksOrder.stream().map(sq::withBlock).collect(Collectors.toList()))) + //.flatMap(specs -> processedQuery.specs.buckets.stream().map(bucket -> specs.withBuckets(List.of(bucket)))) + .toArray(EdgeSearchSpecification[]::new); + var resultSets = indexClient.multiQuery(ctx, specsArray); + + if (debug) { + for (var s : specsArray) { + logger.info("{}", s); + } + for (IndexBlock block : indexBlockSearchOrder) { + resultSets.forEach(res -> { + res.resultsList.getOrDefault(block, Collections.emptyList()).forEach(b2 -> { + b2.results.forEach((idx,items) -> { + items.forEach(i -> + logger.info("{} {} - {}", block, idx, i) + ); + }); + }); + }); + } + } + + Set> seenUrls = new HashSet<>(); + for (IndexBlock block : indexBlockSearchOrder) { + var resultsJoined = resultSets.stream().flatMap(rs -> rs.resultsList.getOrDefault(block, Collections.emptyList()).stream()) + .map(EdgeSearchResults::getResults) + .flatMap(m -> m.entrySet().stream()) + .flatMap(m -> m.getValue().stream()) + .sorted(Comparator.comparing(item -> preEvaluateItem(item, block))) + .filter(item -> seenUrls.add(item.url)) + .collect(Collectors.toList()); + + queryResults.append( 100, resultDecorator.decorateSearchResults(resultsJoined, block, deduplicator)); + + if (debug) { + logger.info("{} -> {} items", resultsJoined, queryResults.size()); + } + + } + if (debug) { + logger.info("-> {} items", queryResults.size()); + } + + + } + + private final WeakHashMap scoreCache = new WeakHashMap<>(); + private double preEvaluateItem(EdgeSearchResultItem item, IndexBlock block) { + synchronized (scoreCache) { + return scoreCache.computeIfAbsent(item, i -> valuator.evaluateTerms(i.scores, block, 1000)); + } + } + + private void fetchResultsSimple(Context ctx, EdgeSearchQuery processedQuery, AccumulatedQueryResults queryResults, UrlDeduplicator deduplicator) { + var resultSet = indexClient.query(ctx, processedQuery.specs); + + logger.debug("{}", resultSet); + + for (IndexBlock block : indexBlockSearchOrder) { + for (var results : resultSet.resultsList.getOrDefault(block, Collections.emptyList())) { + var items = results.getAllItems(); + queryResults.append(100, resultDecorator.decorateSearchResults(items, block, deduplicator)); + } + } + } + + static IndexBlock[] indexBlockSearchOrder = Arrays.stream(IndexBlock.values()).sorted(Comparator.comparing(i -> i.sortOrder)).toArray(IndexBlock[]::new); + + private Iterable spellCheckTerms(Context ctx, EdgeSearchQuery disjointedQuery) { + return Observable.fromIterable(disjointedQuery.searchTermsHuman) + .subscribeOn(Schedulers.io()) + .flatMap(term -> assistantClient.spellCheck(ctx, term) + .onErrorReturn(e -> Collections.emptyList()) + .filter(results -> hasSpellSuggestions(term, results)) + .map(suggestions -> searchTermToProblemDescription(term, suggestions)) + ) + .blockingIterable(); + } + + private boolean hasSpellSuggestions(String term, List results) { + if (results.size() > 1) { + return true; + } + else if (results.size() == 1) { + return !term.equalsIgnoreCase(results.get(0)); + } + return false; + } + + private String searchTermToProblemDescription(String term, List suggestions) { + return "\"" + term + "\" could be spelled " + + suggestions.stream().map(s -> "\""+s+"\"").collect(Collectors.joining(", ")); + } + + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchProfile.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchProfile.java new file mode 100644 index 00000000..05fcaa04 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchProfile.java @@ -0,0 +1,68 @@ +package nu.marginalia.wmsa.edge.search; + +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.index.service.SearchOrder; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +public enum EdgeSearchProfile { + DEFAULT("default", SearchOrder.ASCENDING, + Collections.emptyList(), + List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords), + 0, 1), + MODERN("modern", SearchOrder.ASCENDING, + Collections.emptyList(), + List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords), + 2), + CORPO("corpo", SearchOrder.ASCENDING, + Collections.emptyList(), + List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords), + 4, 5, 6, 7), + YOLO("yolo", SearchOrder.ASCENDING, + Collections.emptyList(), + List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords), + 0, 2, 1, 3, 4, 6), + CORPO_CLEAN("corpo-clean", SearchOrder.ASCENDING, + Collections.emptyList(), + List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords), + 4, 5), + ACADEMIA("academia", SearchOrder.ASCENDING, + Collections.emptyList(), + List.of(IndexBlock.TitleKeywords, IndexBlock.Title, IndexBlock.Top, IndexBlock.Middle, IndexBlock.Low, IndexBlock.Link, IndexBlock.Words, IndexBlock.NamesWords), + 3), + ; + + + public final String name; + public final SearchOrder order; + public final List additionalSearchTerm; + public final List buckets; + public final List indexBlocks; + + EdgeSearchProfile(String name, SearchOrder order, + List additionalSearchTerm, + List indexBlocks, + int... buckets) { + this.name = name; + this.order = order; + this.additionalSearchTerm = additionalSearchTerm; + this.indexBlocks = indexBlocks; + this.buckets = Arrays.stream(buckets).boxed().collect(Collectors.toList()); + } + + static EdgeSearchProfile getSearchProfile(String param) { + if (null == param) { + return YOLO; + } + return switch (param) { + case "modern" -> MODERN; + case "default" -> DEFAULT; + case "corpo" -> CORPO; + case "academia" -> ACADEMIA; + default -> YOLO; + }; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchRankingSymbols.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchRankingSymbols.java new file mode 100644 index 00000000..43ffd0a8 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchRankingSymbols.java @@ -0,0 +1,34 @@ +package nu.marginalia.wmsa.edge.search; + +import java.util.TreeMap; + +public class EdgeSearchRankingSymbols { + + private static final TreeMap symbols; + static { + symbols = new TreeMap<>(); + symbols.put(1.0, new RankingSymbol("⭐", "Fits search terms very well")); + symbols.put(2.0, new RankingSymbol("🟢", "Fits search terms well")); + symbols.put(4.0, new RankingSymbol("🟡", "Fits search terms decently")); + symbols.put(6.0, new RankingSymbol("🟠", "Could fit search terms")); + symbols.put(100.0, new RankingSymbol("🟤", "Poor fit for search terms, grasping at straws")); + } + + public static String getRankingSymbol(double termScore) { + return forScore(termScore).symbol; + } + public static String getRankingSymbolDescription(double termScore) { + return forScore(termScore).description; + } + + private static RankingSymbol forScore(double score) { + var e = symbols.ceilingEntry(score); + if (e == null) { + e = symbols.lastEntry(); + } + return e.getValue(); + } + + private record RankingSymbol(String symbol, String description) { + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchService.java new file mode 100644 index 00000000..c7e8bbda --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchService.java @@ -0,0 +1,369 @@ +package nu.marginalia.wmsa.edge.search; + +import com.google.common.base.Strings; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.inject.Inject; +import com.google.inject.name.Named; +import io.reactivex.rxjava3.schedulers.Schedulers; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.auth.api.model.ApiSearchResult; +import nu.marginalia.wmsa.auth.api.model.ApiSearchResults; +import nu.marginalia.wmsa.client.exception.TimeoutException; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.configuration.server.MetricsServer; +import nu.marginalia.wmsa.configuration.server.Service; +import nu.marginalia.wmsa.data_store.client.DataStoreClient; +import nu.marginalia.wmsa.data_store.meta.DomainInformation; +import nu.marginalia.wmsa.edge.assistant.client.AssistantClient; +import nu.marginalia.wmsa.edge.assistant.dict.DictionaryResponse; +import nu.marginalia.wmsa.edge.assistant.screenshot.ScreenshotService; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao; +import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist; +import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeId; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import nu.marginalia.wmsa.edge.search.query.model.EdgeUserSearchParameters; +import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer; +import nu.marginalia.wmsa.renderer.mustache.RendererFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Request; +import spark.Response; +import spark.Spark; + +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.util.*; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +public class EdgeSearchService extends Service { + + private final EdgeDataStoreDao edgeDataStoreDao; + private final EdgeIndexClient indexClient; + private final DataStoreClient dataStoreClient; + private final AssistantClient assistantClient; + private final UnitConversion unitConversion; + private final EdgeSearchOperator searchOperator; + private final EdgeDomainBlacklist blacklist; + private final ScreenshotService screenshotService; + + private final MustacheRenderer browseResultsRenderer; + private final MustacheRenderer searchResultsRenderer; + private final MustacheRenderer searchResultsRendererGmi; + private final MustacheRenderer dictionaryRenderer; + private final MustacheRenderer dictionaryRendererGmi; + private final MustacheRenderer> conversionRenderer; + private final MustacheRenderer> conversionRendererGmi; + + private final MustacheRenderer siteInfoRenderer; + private final MustacheRenderer siteInfoRendererGmi; + + private final Gson gson = new GsonBuilder().create(); + + private static final Logger logger = LoggerFactory.getLogger(EdgeSearchService.class); + volatile private int indexSize = 0; + + private final String maintenanceMessage = null; + + @SneakyThrows + @Inject + public EdgeSearchService(@Named("service-host") String ip, + @Named("service-port") Integer port, + EdgeDataStoreDao edgeDataStoreDao, + EdgeIndexClient indexClient, + RendererFactory rendererFactory, + Initialization initialization, + MetricsServer metricsServer, + DataStoreClient dataStoreClient, + AssistantClient assistantClient, + UnitConversion unitConversion, + EdgeSearchOperator searchOperator, + EdgeDomainBlacklist blacklist, + ScreenshotService screenshotService + ) { + super(ip, port, initialization, metricsServer); + this.edgeDataStoreDao = edgeDataStoreDao; + this.indexClient = indexClient; + + browseResultsRenderer = rendererFactory.renderer("edge/browse-results"); + + searchResultsRenderer = rendererFactory.renderer("edge/search-results"); + searchResultsRendererGmi = rendererFactory.renderer("edge/search-results-gmi"); + + dictionaryRenderer = rendererFactory.renderer("edge/dictionary-results"); + dictionaryRendererGmi = rendererFactory.renderer("edge/dictionary-results-gmi"); + + siteInfoRenderer = rendererFactory.renderer("edge/site-info"); + siteInfoRendererGmi = rendererFactory.renderer("edge/site-info-gmi"); + + conversionRenderer = rendererFactory.renderer("edge/conversion-results"); + conversionRendererGmi = rendererFactory.renderer("edge/conversion-results-gmi"); + + this.dataStoreClient = dataStoreClient; + this.assistantClient = assistantClient; + this.unitConversion = unitConversion; + this.searchOperator = searchOperator; + this.blacklist = blacklist; + this.screenshotService = screenshotService; + + Spark.staticFiles.expireTime(600); + + Spark.get("/search", this::pathSearch); + + Spark.get("/api/search", this::apiSearch, gson::toJson); + + Spark.get("/public/search", this::pathSearch); + Spark.get("/public/submit", this::pathSubmit); + + Spark.get("/site-search/:site/*", this::siteSearchRedir); + Spark.get("/public/site-search/:site/*", this::siteSearchRedir); + + Spark.exception(Exception.class, (e,p,q) -> { + logger.error("Error during processing", e); + serveError(Context.fromRequest(p), q); + }); + + Spark.awaitInitialization(); + } + + private Object siteSearchRedir(Request request, Response response) { + final String site = request.params("site"); + final String queryRaw = request.splat()[0]; + + final String query = URLEncoder.encode(String.format("%s site:%s", queryRaw, site), StandardCharsets.UTF_8); + final String profile = request.queryParamOrDefault("profile", "yolo"); + + response.redirect("https://search.marginalia.nu/search?query="+query+"&profile="+profile); + + return null; + } + + + private void serveError(Context ctx, Response rsp) { + boolean isIndexUp = indexClient.isAlive(); + + try { + if (!isIndexUp) { + rsp.body("Error

    Error

    Oops! It appears the index server is offline.

    The server was probably restarted to bring online some changes. Restarting the index typically takes a few minutes, during which searches can't be served.

    This page will attempt to refresh automatically every few seconds.

    "); + } else if (indexClient.isBlocked(ctx).blockingFirst()) { + rsp.body("Error

    Error

    Oops! It appears the index server is starting up.

    The server was probably restarted to bring online some changes. Restarting the index typically takes a few minutes, during which searches can't be served.

    This page will attempt to refresh automatically every few seconds.

    "); + } + else { + rsp.body("Error

    Error

    Oops! An unknown error occurred. The index server seems to be up, so I don't know why this is. Please send an email to kontakt@marginalia.nu telling me what you did :-)

    "); + } + } + catch (Exception ex) { + logger.error("Error", ex); + rsp.body("Error

    Error

    Oops! It appears the index server is unresponsive.

    The server was probably restarted to bring online some changes. Restarting the index typically takes a few minutes, during which searches can't be served.

    This page will attempt to refresh automatically every few seconds.

    "); + } + + } + + @SneakyThrows + private Object pathSubmit(Request request, Response response) { + String url = request.queryString(); + + var urlToSubmit = new EdgeUrl(url); + + logger.info("Submitting {}", url); + edgeDataStoreDao.putUrl(0, urlToSubmit); + + return "ok"; + } + + @SneakyThrows + private Object apiSearch(Request request, Response response) { + + final var ctx = Context.fromRequest(request); + final String queryParam = request.queryParams("query"); + final int limit; + EdgeSearchProfile profile = EdgeSearchProfile.YOLO; + + String count = request.queryParamOrDefault("count", "20"); + limit = Integer.parseInt(count); + + String index = request.queryParamOrDefault("index", "0"); + if (!Strings.isNullOrEmpty(index)) { + profile = switch (index) { + case "0" -> EdgeSearchProfile.YOLO; + case "1" -> EdgeSearchProfile.MODERN; + case "2" -> EdgeSearchProfile.DEFAULT; + case "3" -> EdgeSearchProfile.CORPO_CLEAN; + default -> EdgeSearchProfile.CORPO_CLEAN; + }; + } + + final String humanQuery = queryParam.trim(); + + var results = searchOperator.doApiSearch(ctx, new EdgeUserSearchParameters(humanQuery, profile, "")); + + return new ApiSearchResults("RESTRICTED", humanQuery, results.stream().map(ApiSearchResult::new).limit(limit).collect(Collectors.toList())); + } + + @SneakyThrows + private Object pathSearch(Request request, Response response) { + + final var ctx = Context.fromRequest(request); + + final String queryParam = request.queryParams("query"); + if (null == queryParam || queryParam.isBlank()) { + response.redirect("https://search.marginalia.nu/"); + return null; + } + + final String profileStr = Optional.ofNullable(request.queryParams("profile")).orElse("yolo"); + + try { + final String humanQuery = queryParam.trim(); + final String format = request.queryParams("format"); + + var eval = unitConversion.tryEval(ctx, humanQuery); + var conversion = unitConversion.tryConversion(ctx, humanQuery); + if (conversion.isPresent()) { + if ("gmi".equals(format)) { + response.type("text/gemini"); + return conversionRendererGmi.render(Map.of("query", humanQuery, "result", conversion.get())); + } else { + return conversionRenderer.render(Map.of("query", humanQuery, "result", conversion.get(), "profile", profileStr)); + } + } + if (humanQuery.matches("define:[A-Za-z\\s-0-9]+")) { + var results = lookupDefinition(ctx, humanQuery); + + if ("gmi".equals(format)) { + response.type("text/gemini"); + return dictionaryRendererGmi.render(results, Map.of("query", humanQuery)); + } else { + return dictionaryRenderer.render(results, Map.of("query", humanQuery, "profile", profileStr)); + } + } else if (humanQuery.matches("site:[.A-Za-z\\-0-9]+")) { + var results = siteInfo(ctx, humanQuery); + + + var domain = results.getDomain(); + logger.info("Domain: {}", domain); + + DecoratedSearchResultSet resultSet; + Path screenshotPath = null; + if (null != domain) { + resultSet = searchOperator.performDumbQuery(ctx, EdgeSearchProfile.CORPO, IndexBlock.Words, 100, 100, "site:"+domain); + + screenshotPath = Path.of("/screenshot/" + edgeDataStoreDao.getDomainId(domain).getId()); + } + else { + resultSet = new DecoratedSearchResultSet(Collections.emptyList()); + } + + if ("gmi".equals(format)) { + response.type("text/gemini"); + return siteInfoRendererGmi.render(results, Map.of("query", humanQuery)); + } else { + return siteInfoRenderer.render(results, Map.of("query", humanQuery, "focusDomain", Objects.requireNonNullElse(domain, ""), "profile", profileStr, "results", resultSet.resultSet, "screenshot", screenshotPath == null ? "" : screenshotPath.toString())); + } + } else if (humanQuery.matches("browse:[.A-Za-z\\-0-9]+")) { + var results = browseSite(ctx, humanQuery); + + if (null != results) { + return browseResultsRenderer.render(results, Map.of("query", humanQuery, "profile", profileStr)); + } + } + + + final var jsSetting = Optional.ofNullable(request.queryParams("js")).orElse("default"); + var results = searchOperator.doSearch(ctx, new EdgeUserSearchParameters(humanQuery, + EdgeSearchProfile.getSearchProfile(profileStr), jsSetting), eval.orElse(null) + ); + + results.getResults().removeIf(detail -> blacklist.isBlacklisted(edgeDataStoreDao.getDomainId(detail.url.domain))); + + if ("gmi".equals(format)) { + response.type("text/gemini"); + return searchResultsRendererGmi.render(results); + } else { + if (maintenanceMessage != null) { + return searchResultsRenderer.render(results, Map.of("maintenanceMessage", maintenanceMessage)); + } + else { + return searchResultsRenderer.render(results); + } + } + } + catch (TimeoutException te) { + serveError(ctx, response); + return null; + } + catch (Exception ex) { + logger.error("Error", ex); + serveError(ctx, response); + return null; + } + } + + private DomainInformation siteInfo(Context ctx, String humanQuery) { + String definePrefix = "site:"; + String word = humanQuery.substring(definePrefix.length()).toLowerCase(); + + logger.info("Fetching Site Info: {}", word); + try { + var results = dataStoreClient + .siteInfo(ctx, word) + .blockingFirst(); + logger.debug("Results = {}", results); + + return results; + } + catch (Exception ex) { + logger.debug("No Results"); + + return new DomainInformation(null, false, 0, 0, 0, 0, 0, 0, 0, EdgeDomainIndexingState.UNKNOWN, Collections.emptyList()); + } + + } + + private BrowseResultSet browseSite(Context ctx, String humanQuery) { + String definePrefix = "browse:"; + String word = humanQuery.substring(definePrefix.length()).toLowerCase(); + + try { + if ("random".equals(word)) { + var results = edgeDataStoreDao.getRandomDomains(25, blacklist); + results.removeIf(res -> !screenshotService.hasScreenshot(new EdgeId<>(res.domainId))); + return new BrowseResultSet(results); + } + else { + var domain = edgeDataStoreDao.getDomainId(new EdgeDomain(word)); + var neighbors = edgeDataStoreDao.getDomainNeighborsAdjacent(domain, blacklist, 45); + + neighbors.removeIf(res -> !screenshotService.hasScreenshot(new EdgeId<>(res.domainId))); + + return new BrowseResultSet(neighbors); + } + } + catch (Exception ex) { + logger.info("No Results"); + return null; + } + } + + @SneakyThrows + private DictionaryResponse lookupDefinition(Context ctx, String humanQuery) { + String definePrefix = "define:"; + String word = humanQuery.substring(definePrefix.length()).toLowerCase(); + + logger.info("Defining: {}", word); + var results = assistantClient + .dictionaryLookup(ctx, word) + .blockingFirst(); + logger.debug("Results = {}", results); + + return results; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/UnitConversion.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/UnitConversion.java new file mode 100644 index 00000000..15cbafeb --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/UnitConversion.java @@ -0,0 +1,77 @@ +package nu.marginalia.wmsa.edge.search; + +import nu.marginalia.wmsa.client.exception.RemoteException; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.edge.assistant.client.AssistantClient; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.inject.Inject; +import javax.inject.Singleton; +import java.util.Optional; +import java.util.function.Predicate; +import java.util.regex.Pattern; + +@Singleton +public class UnitConversion { + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final Pattern conversionPattern = Pattern.compile("((\\d+|\\s+|[.()\\-^+%*/]|log[^a-z]|log2[^a-z]|sqrt[^a-z]|log10|cos[^a-z]|sin[^a-z]|tan[^a-z]|log2|pi[^a-z]|e[^a-z]|2pi[^a-z])+)\\s*([a-zA-Z][a-zA-Z^.0-9]*\\s?[a-zA-Z^.0-9]*)\\s+in\\s+([a-zA-Z^.0-9]+\\s?[a-zA-Z^.0-9]*)"); + private final Predicate evalPredicate = Pattern.compile("(\\d+|\\s+|[.()\\-^+%*/]|log|log2|sqrt|log10|cos|sin|tan|pi|e|2pi)+").asMatchPredicate(); + + private final AssistantClient assistantClient; + + @Inject + public UnitConversion(AssistantClient assistantClient) { + this.assistantClient = assistantClient; + } + + public Optional tryConversion(Context context, String query) { + var matcher = conversionPattern.matcher(query); + if (!matcher.matches()) + return Optional.empty(); + + String value = matcher.group(1); + String from = matcher.group(3); + String to = matcher.group(4); + + logger.info("{} -> '{}' '{}' '{}'", query, value, from, to); + + try { + return Optional.of(assistantClient.unitConversion(context, value, from, to).blockingFirst()); + } + catch (RemoteException ex) { + return Optional.empty(); + } + } + + public boolean isNumeric(String str) { + try { + Double.parseDouble(str); + return true; + } + catch (NumberFormatException ex) { + return false; + } + } + + public Optional tryEval(Context context, String query) { + if (!evalPredicate.test(query)) { + return Optional.empty(); + } + + var expr = query.toLowerCase().trim(); + + if (expr.chars().allMatch(Character::isDigit)) { + return Optional.empty(); + } + + logger.info("eval({})", expr); + + try { + return Optional.of(assistantClient.evalMath(context, expr).blockingFirst()); + } + catch (RemoteException ex) { + return Optional.empty(); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/client/EdgeSearchClient.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/client/EdgeSearchClient.java new file mode 100644 index 00000000..53a29067 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/client/EdgeSearchClient.java @@ -0,0 +1,29 @@ +package nu.marginalia.wmsa.edge.search.client; + +import com.google.inject.Singleton; +import io.reactivex.rxjava3.core.Observable; +import nu.marginalia.wmsa.auth.api.model.ApiSearchResults; +import nu.marginalia.wmsa.client.AbstractDynamicClient; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.server.Context; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.CheckReturnValue; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; + +@Singleton +public class EdgeSearchClient extends AbstractDynamicClient { + private final Logger logger = LoggerFactory.getLogger(getClass()); + + public EdgeSearchClient() { + super(ServiceDescriptor.EDGE_SEARCH); + } + + @CheckReturnValue + public Observable query(Context ctx, String queryString, int count, int profile) { + return this.get(ctx, String.format("/api/search?query=%s&count=%d&index=%d", URLEncoder.encode(queryString, StandardCharsets.UTF_8), count, profile), ApiSearchResults.class); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/EnglishDictionary.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/EnglishDictionary.java new file mode 100644 index 00000000..b45cec7d --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/EnglishDictionary.java @@ -0,0 +1,161 @@ +package nu.marginalia.wmsa.edge.search.query; + +import com.google.inject.Inject; +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.util.*; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class EnglishDictionary { + private final Set englishWords = new HashSet<>(); + private final NGramDict dict; + private final Logger logger = LoggerFactory.getLogger(getClass()); + + @Inject + public EnglishDictionary(NGramDict dict) { + this.dict = dict; + try (var resource = Objects.requireNonNull(ClassLoader.getSystemResourceAsStream("dictionary/en-words"), + "Could not load word frequency table"); + var br = new BufferedReader(new InputStreamReader(resource)) + ) { + for (;;) { + String s = br.readLine(); + if (s == null) { + break; + } + englishWords.add(s.toLowerCase()); + } + } + catch (Exception ex) { + throw new RuntimeException(ex); + } + } + + public boolean isWord(String word) { + return englishWords.contains(word); + } + + private static Pattern ingPattern = Pattern.compile(".*(\\w)\\1ing$"); + + public Collection getWordVariants(String s) { + var variants = findWordVariants(s); + long freqBaseline = dict.getTermFreq(s); + + return variants.stream() + .filter(var -> freqBaseline*10 > dict.getTermFreq(var) && freqBaseline/10 < dict.getTermFreq(var) + ).collect(Collectors.toList()); + } + + + public Collection findWordVariants(String s) { + int sl = s.length(); + + if (sl < 2) { + return Collections.emptyList(); + } + if (s.endsWith("s")) { + String a = s.substring(0, sl-1); + String b = s + "es"; + if (isWord(a) && isWord(b)) { + return List.of(a, b); + } + else if (isWord(a)) { + return List.of(a); + } + else if (isWord(b)) { + return List.of(b); + } + } + if (s.endsWith("sm")) { + String a = s.substring(0, sl-1)+"t"; + String b = s.substring(0, sl-1)+"ts"; + if (isWord(a) && isWord(b)) { + return List.of(a, b); + } + else if (isWord(a)) { + return List.of(a); + } + else if (isWord(b)) { + return List.of(b); + } + } + if (s.endsWith("st")) { + String a = s.substring(0, sl-1)+"m"; + String b = s + "s"; + if (isWord(a) && isWord(b)) { + return List.of(a, b); + } + else if (isWord(a)) { + return List.of(a); + } + else if (isWord(b)) { + return List.of(b); + } + } + else if (ingPattern.matcher(s).matches() && sl > 4) { // humming, clapping + var a = s.substring(0, sl-4); + var b = s.substring(0, sl-3) + "ed"; + + if (isWord(a) && isWord(b)) { + return List.of(a, b); + } + else if (isWord(a)) { + return List.of(a); + } + else if (isWord(b)) { + return List.of(b); + } + } + else { + String a = s + "s"; + String b = ingForm(s); + String c = s + "ed"; + + if (isWord(a) && isWord(b) && isWord(c)) { + return List.of(a, b, c); + } + else if (isWord(a) && isWord(b)) { + return List.of(a, b); + } + else if (isWord(b) && isWord(c)) { + return List.of(b, c); + } + else if (isWord(a) && isWord(c)) { + return List.of(a, c); + } + else if (isWord(a)) { + return List.of(a); + } + else if (isWord(b)) { + return List.of(b); + } + else if (isWord(c)) { + return List.of(c); + } + } + + return Collections.emptyList(); + } + + public String ingForm(String s) { + if (s.endsWith("t") && !s.endsWith("tt")) { + return s + "ting"; + } + if (s.endsWith("n") && !s.endsWith("nn")) { + return s + "ning"; + } + if (s.endsWith("m") && !s.endsWith("mm")) { + return s + "ming"; + } + if (s.endsWith("r") && !s.endsWith("rr")) { + return s + "ring"; + } + return s + "ing"; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/QueryFactory.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/QueryFactory.java new file mode 100644 index 00000000..2dd9bab0 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/QueryFactory.java @@ -0,0 +1,186 @@ +package nu.marginalia.wmsa.edge.search.query; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.language.WordPatterns; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.model.search.EdgeSearchSpecification; +import nu.marginalia.wmsa.edge.model.search.EdgeSearchSubquery; +import nu.marginalia.wmsa.edge.search.EdgeSearchProfile; +import nu.marginalia.wmsa.edge.search.query.model.EdgeSearchQuery; +import nu.marginalia.wmsa.edge.search.query.model.EdgeUserSearchParameters; +import org.eclipse.jetty.http.HttpStatus; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Spark; + +import java.util.*; + +@Singleton +public class QueryFactory { + + private final LanguageModels lm; + private final NGramDict dict; + private final EnglishDictionary englishDictionary; + private final Logger logger = LoggerFactory.getLogger(getClass()); + + @Inject + public QueryFactory(LanguageModels lm, NGramDict dict, EnglishDictionary englishDictionary) { + this.lm = lm; + this.dict = dict; + + this.englishDictionary = englishDictionary; + } + + public QueryParser getParser() { + return new QueryParser(englishDictionary, new QueryVariants(lm ,dict, englishDictionary)); + } + + public EdgeSearchQuery createQuery(EdgeUserSearchParameters params) { + final var profile = params.getProfile(); + final var jsSetting = params.getJsSetting(); + + final var processedQuery = createQuery(getParser(), params); + + processedQuery.specs.experimental = EdgeSearchProfile.CORPO.equals(profile); + processedQuery.specs.stagger = EdgeSearchProfile.YOLO.equals(profile); + + List subqueries = new ArrayList<>(processedQuery.specs.subqueries.size() * profile.indexBlocks.size()); + + for (var sq : processedQuery.specs.subqueries) { + for (var block : profile.indexBlocks) { + subqueries.add(sq.withBlock(block)); + } + } + + processedQuery.specs.subqueries.clear(); + processedQuery.specs.subqueries.addAll(subqueries); + + processedQuery.specs.subqueries.forEach(sq -> { + sq.searchTermsInclude.addAll(profile.additionalSearchTerm); + if (jsSetting.equals("yes-js")) { + sq.searchTermsExclude.add("js:false"); + } + if (jsSetting.equals("no-js")) { + sq.searchTermsExclude.add("js:true"); + } + }); + + processedQuery.specs.subqueries.sort(Comparator.comparing(sq -> -sq.termSize()*2.3 + sq.block.sortOrder)); + + return processedQuery; + } + + + public EdgeSearchQuery createQuery(QueryParser queryParser, EdgeUserSearchParameters params) { + final var query = params.humanQuery; + final var profile = params.getProfile(); + + if (query.length() > 1000) { + Spark.halt(HttpStatus.BAD_REQUEST_400, "That's too much, man"); + } + + List searchTermsHuman = new ArrayList<>(); + List problems = new ArrayList<>(); + String domain = null; + + var basicQuery = queryParser.parse(query); + + if (basicQuery.size() >= 8) { + problems.add("Your search query is too long"); + basicQuery.clear(); + } + + for (Token t : basicQuery) { + if (t.type == TokenType.QUOT_TERM || t.type == TokenType.LITERAL_TERM) { + if (t.str.startsWith("site:")) { + t.str = normalizeDomainName(t.str); + } + + searchTermsHuman.addAll(toHumanSearchTerms(t)); + analyzeSearchTerm(problems, t); + } + } + + var queryPermutations = queryParser.permuteQueriesNew(basicQuery); + List subqueries = new ArrayList<>(); + + + for (var parts : queryPermutations) { + List searchTermsExclude = new ArrayList<>(); + List searchTermsInclude = new ArrayList<>(); + + for (Token t : parts) { + switch (t.type) { + case EXCLUDE_TERM: + searchTermsExclude.add(t.str); + break; + case LITERAL_TERM: // fallthrough; + case QUOT_TERM: + searchTermsInclude.add(t.str); + if (t.str.toLowerCase().startsWith("site:")) { + domain = t.str.substring("site:".length()); + } + + break; + default: + logger.warn("Unexpected token type {}", t); + } + } + + subqueries.add(new EdgeSearchSubquery(searchTermsInclude, searchTermsExclude, IndexBlock.TitleKeywords)); + } + + + var specsBuilder = EdgeSearchSpecification.builder() + .subqueries(subqueries) + .limitByBucket(50) + .limitTotal(100) + .searchOrder(profile.order) + .humanQuery(query) + .buckets(profile.buckets); + + if (domain != null) { + specsBuilder = specsBuilder.limitByDomain(100); + } else { + specsBuilder = specsBuilder.limitByDomain(2); + } + + EdgeSearchSpecification specs = specsBuilder.build(); + + return new EdgeSearchQuery(specs, searchTermsHuman, domain); + + } + + private String normalizeDomainName(String str) { + return str.toLowerCase(); + } + + private List toHumanSearchTerms(Token t) { + if (t.type == TokenType.LITERAL_TERM) { + return Arrays.asList(t.displayStr.split("\\s+")); + } + else if (t.type == TokenType.QUOT_TERM) { + return Arrays.asList(t.displayStr.replace("\"", "").split("\\s+")); + + } + return Collections.emptyList(); + } + + private void analyzeSearchTerm(List problems, Token term) { + final String word = term.str; + + if (word.length() < WordPatterns.MIN_WORD_LENGTH) { + problems.add("Search term \"" + term.displayStr + "\" too short"); + } + if (!word.contains("_") && word.length() >= WordPatterns.MAX_WORD_LENGTH) { + problems.add("Search term \"" + term.displayStr + "\" too long"); + } + if (!word.contains("_") && !WordPatterns.wordPattern.matcher(word.replaceAll("[_:]","")).matches()) { + problems.add("The term \"" + term.displayStr + "\" contains characters that are not currently supported"); + } + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/QueryParser.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/QueryParser.java new file mode 100644 index 00000000..ef0ce398 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/QueryParser.java @@ -0,0 +1,428 @@ +package nu.marginalia.wmsa.edge.search.query; + +import lombok.EqualsAndHashCode; +import lombok.ToString; +import nu.marginalia.wmsa.edge.crawler.domain.language.WordPatterns; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static java.util.stream.Stream.concat; + +public class QueryParser { + private static final Logger logger = LoggerFactory.getLogger(QueryParser.class); + + private final EnglishDictionary englishDictionary; + private final QueryVariants queryVariants; + + public QueryParser(EnglishDictionary englishDictionary, QueryVariants queryVariants) { + this.englishDictionary = englishDictionary; + this.queryVariants = queryVariants; + } + + public List parse(String query) { + List tokens = extractBasicTokens(query); + + for (int i = 0; i < tokens.size(); i++) { + var t = tokens.get(i); + if (t.type == TokenType.QUOT) { + tokens.set(i, new Token(TokenType.QUOT_TERM, + t.str.replaceAll("\\s+", WordPatterns.WORD_TOKEN_JOINER), + t.displayStr)); + } + else if (t.type == TokenType.LITERAL_TERM + && (t.str.endsWith(":")||t.str.endsWith(".")) + && t.str.length() > 1) + { + tokens.set(i, + new Token(TokenType.LITERAL_TERM, t.str.substring(0, t.str.length()-1), + t.displayStr)); + } + } + + for (int i = 0; i < tokens.size() - 1; i++) { + var t = tokens.get(i); + var tn = tokens.get(i+1); + + if (t.type == TokenType.MINUS) { + tokens.set(i, new Token(TokenType.EXCLUDE_TERM, tn.str, "-"+tn.str)); + tokens.remove(i+1); + } + } + + return tokens; + } + + private static final Pattern noisePattern = Pattern.compile("[(),]"); + + public List extractBasicTokens(String rawQuery) { + List tokens = new ArrayList<>(); + + String query = noisePattern.matcher(rawQuery).replaceAll(" "); + + for (int i = 0; i < query.length(); i++) { + int chr = query.charAt(i); + if ('"' == chr) { + int end = query.indexOf('"', i+1); + if (end == -1) { + end = query.length(); + } + tokens.add(new Token(TokenType.QUOT, + query.substring(i+1, end).toLowerCase(), + query.substring(i, Math.min(query.length(), end+1)))); + i = end; + } + else if ('\u201C' == chr) { + int end = query.indexOf('\u201D', i+1); + if (end == -1) { + end = query.length(); + } + tokens.add(new Token(TokenType.QUOT, + query.substring(i+1, end).toLowerCase(), + query.substring(i, Math.min(query.length(), end+1)))); + i = end; + } + else if ('-' == chr) { + tokens.add(new Token(TokenType.MINUS, "\"")); + } + else if (Character.isSpaceChar(chr)) { + // + } + else { + int end = query.indexOf(' ', i); + if (end == -1) { + end = query.length(); + } + tokens.add(new Token(TokenType.LITERAL_TERM, + query.substring(i, end).toLowerCase(), + query.substring(i, end))); + i = end; + } + } + return tokens; + } + + + public List> variantQueries(List items) { + int start = -1; + int end = items.size(); + + for (int i = 0; i < items.size(); i++) { + var token = items.get(i); + + if (start < 0) { + if (token.type == TokenType.LITERAL_TERM && WordPatterns.wordQualitiesPredicate.test(token.str)) { + start = i; + } + } + else { + if (token.type != TokenType.LITERAL_TERM || !WordPatterns.wordPredicateEither.test(token.str)) { + end = i; + break; + } + } + } + + if (start >= 0 && end - start > 1) { + List> variantParts = getVariantSearchTerms(items.subList(start, end)); + int s = start; + int e = end; + return variantParts.stream().map(part -> + concat(items.subList(0, s).stream(), concat(part.stream(), items.subList(e, items.size()).stream())) + .collect(Collectors.toList())) + .peek(lst -> lst.removeIf(this::isJunkWord)) + .limit(24) + .collect(Collectors.toList()); + } + else { + return List.of(items); + } + } + + + + public List> permuteQueries(List items) { + int start = -1; + int end = items.size(); + + for (int i = 0; i < items.size(); i++) { + var token = items.get(i); + + if (start < 0) { + if (token.type == TokenType.LITERAL_TERM && WordPatterns.wordQualitiesPredicate.test(token.str)) { + start = i; + } + } + else { + if (token.type != TokenType.LITERAL_TERM || !WordPatterns.wordPredicateEither.test(token.str)) { + end = i; + break; + } + } + } + + if (start >= 0 && end - start > 1) { + List> permuteParts = combineSearchTerms(items.subList(start, end)); + int s = start; + int e = end; + return permuteParts.stream().map(part -> + concat(items.subList(0, s).stream(), concat(part.stream(), items.subList(e, items.size()).stream())) + .collect(Collectors.toList())) + .peek(lst -> lst.removeIf(this::isJunkWord)) + .limit(24) + .collect(Collectors.toList()); + } + else { + return List.of(items); + } + } + + + public List> permuteQueriesNew(List items) { + int start = -1; + int end = items.size(); + + for (int i = 0; i < items.size(); i++) { + var token = items.get(i); + + if (start < 0) { + if (token.type == TokenType.LITERAL_TERM && WordPatterns.wordQualitiesPredicate.test(token.str)) { + start = i; + } + } + else { + if (token.type != TokenType.LITERAL_TERM || !WordPatterns.wordPredicateEither.test(token.str)) { + end = i; + break; + } + } + } + + if (start >= 0 && end - start >= 1) { + var result = queryVariants.getQueryVariants(items.subList(start, end)); + + logger.debug("{}", result); + + if (result.isEmpty()) { + logger.warn("Empty variants result, falling back on old code"); + return permuteQueries(items); + } + + List> basic = new ArrayList<>(); + + for (var query : result.faithful) { + var tokens = query.terms.stream().map(term -> new Token(TokenType.LITERAL_TERM, term)).collect(Collectors.toList()); + + basic.add(tokens); + } + for (var query : result.alternative) { + var tokens = query.terms.stream().map(term -> new Token(TokenType.LITERAL_TERM, term)).collect(Collectors.toList()); + + basic.add(tokens); + } + + int si = start; + int ei = end; + return basic.stream().map(part -> + concat(items.subList(0, si).stream(), concat(part.stream(), items.subList(ei, items.size()).stream())).collect(Collectors.toList())) + .collect(Collectors.toList()); + } + else { + return List.of(items); + } + } + + private boolean isJunkWord(Token token) { + if (WordPatterns.isStopWord(token.str) && + !token.str.matches("^(\\d+|([a-z]+:.*))$")) { + return true; + } + return switch (token.str) { + case "vs", "versus", "or", "and" -> true; + default -> false; + }; + } + + private List> getVariantSearchTerms(List subList) { + int size = subList.size(); + if (size < 1) { + return Collections.emptyList(); + } + else if (size == 1) { + if (WordPatterns.isStopWord(subList.get(0).str)) { + return Collections.emptyList(); + } + return getWordVariants(subList.get(0)).map(List::of).collect(Collectors.toList()); + } + + List> cdrs = getVariantSearchTerms(subList.subList(1, subList.size())); + List cars = getWordVariants(subList.get(0)).collect(Collectors.toList()); + + List> ret = new ArrayList<>(cars.size() * cdrs.size()); + for (var car : cars) { + if (ret.size() >= 32) { + break; + } + for (var cdr : cdrs) { + ret.add(List.of(joinTokens(prepend(car, cdr)))); + } + } + return ret; + } + + private Stream getWordVariants(Token token) { + var s = token.str; + int sl = s.length(); + Stream base = Stream.of(token); + Stream alternatives; + if (sl < 2) { + return base; + } + if (s.endsWith("s")) { + alternatives = Stream.of(s.substring(0, sl-1), s + "es"); + } + else if (s.matches(".*(\\w)\\1ing$") && sl > 4) { // humming, clapping + var basea = s.substring(0, sl-4); + var baseb = s.substring(0, sl-3); + alternatives = Stream.of(basea, baseb + "ed"); + } + else { + alternatives = Stream.of(s+"s", s+"ing", s+"ed"); + } + + return Stream.concat(Stream.of(token), alternatives.filter(englishDictionary::isWord).map(str -> new Token(token.type, str, token.displayStr))); + } + + private List prepend(Token t, List lst) { + List ret = new ArrayList<>(lst.size() + 1); + ret.add(t); + ret.addAll(lst); + return ret; + } + + private List> combineSearchTerms(List subList) { + int size = subList.size(); + if (size < 1) { + return Collections.emptyList(); + } + else if (size == 1) { + if (WordPatterns.isStopWord(subList.get(0).str)) { + return Collections.emptyList(); + } + return List.of(subList); + } + + List> results = new ArrayList<>(size*(size+1)/2); + + if (subList.size() <= 4 && subList.get(0).str.length() >= 2 && !isPrefixWord(subList.get(subList.size()-1).str)) { + results.add(List.of(joinTokens(subList))); + } +outer: for (int i = size - 1; i >= 1; i--) { + + var left = combineSearchTerms(subList.subList(0, i)); + var right = combineSearchTerms(subList.subList(i, size)); + + for (var l : left) { + if (results.size() > 48) { + break outer; + } + + for (var r : right) { + if (results.size() > 48) { + break outer; + } + + List combined = new ArrayList<>(l.size() + r.size()); + combined.addAll(l); + combined.addAll(r); + if (!results.contains(combined)) { + results.add(combined); + } + } + } + } + if (!results.contains(subList)) { + results.add(subList); + } + Comparator> tc = (o1, o2) -> { + int dJoininess = o2.stream().mapToInt(s->(int)Math.pow(joininess(s.str), 2)).sum() - + o1.stream().mapToInt(s->(int)Math.pow(joininess(s.str), 2)).sum(); + if (dJoininess == 0) { + return (o2.stream().mapToInt(s->(int)Math.pow(rightiness(s.str), 2)).sum() - + o1.stream().mapToInt(s->(int)Math.pow(rightiness(s.str), 2)).sum()); + } + return (int) Math.signum(dJoininess); + }; + results.sort(tc); + return results; + } + + private boolean isPrefixWord(String str) { + return switch (str) { + case "the", "of", "when" -> true; + default -> false; + }; + } + + private boolean isSuffixWord(String str) { + return (str.length() < 2); + } + + + int joininess(String s) { + return (int) s.chars().filter(c -> c == '_').count(); + } + int rightiness(String s) { + int rightiness = 0; + for (int i = 0; i < s.length(); i++) { + if (s.charAt(i) == '_') { + rightiness+=i; + } + } + return rightiness; + } + + private Token joinTokens(List subList) { + return new Token(TokenType.LITERAL_TERM, + subList.stream().map(t -> t.str).collect(Collectors.joining("_")), + subList.stream().map(t -> t.str).collect(Collectors.joining(" "))); + } +} + +@ToString @EqualsAndHashCode +class Token { + public final TokenType type; + public String str; + public final String displayStr; + + Token(TokenType type, String str, String displayStr) { + this.type = type; + this.str = str; + this.displayStr = safeString(displayStr); + } + + + Token(TokenType type, String str) { + this.type = type; + this.str = str; + this.displayStr = safeString(str); + } + + private static String safeString(String s) { + return s.replaceAll("<", "<") + .replaceAll(">", ">"); + } +} + +enum TokenType { + TERM, + QUOT, + MINUS, + LITERAL_TERM, + QUOT_TERM, + EXCLUDE_TERM, +}; \ No newline at end of file diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/QueryVariants.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/QueryVariants.java new file mode 100644 index 00000000..92b6cc7e --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/QueryVariants.java @@ -0,0 +1,396 @@ +package nu.marginalia.wmsa.edge.search.query; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.KeywordExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentSentence; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.WordSpan; +import opennlp.tools.stemmer.PorterStemmer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; +import java.util.regex.Pattern; + +@Singleton +public class QueryVariants { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final KeywordExtractor keywordExtractor; + private final SentenceExtractor sentenceExtractor; + private final NGramDict dict; + private final PorterStemmer ps = new PorterStemmer(); + + private final static int MAX_NGRAM_LENGTH = 4; + private final EnglishDictionary englishDictionary; + + @Inject + public QueryVariants(LanguageModels lm, NGramDict dict, EnglishDictionary englishDictionary) { + this.englishDictionary = englishDictionary; + this.keywordExtractor = new KeywordExtractor(); + this.sentenceExtractor = new SentenceExtractor(lm); + this.dict = dict; + } + + + final Pattern numWordBoundary = Pattern.compile("[0-9][a-zA-Z]|[a-zA-Z][0-9]"); + final Pattern dashBoundary = Pattern.compile("-"); + + @AllArgsConstructor + private static class Word { + public final String stemmed; + public final String word; + public final String wordOriginal; + } + + @AllArgsConstructor @Getter @ToString + public static class QueryVariant { + public final List terms; + public final double value; + } + + @Getter @ToString + public static class QueryVariantSet { + List faithful = new ArrayList<>(); + List alternative = new ArrayList<>(); + + public boolean isEmpty() { + return faithful.isEmpty() && alternative.isEmpty(); + } + } + + public QueryVariantSet getQueryVariants(List query) { + final String queryAsString = joinQuery(query); + + final TreeMap> byStart = new TreeMap<>(); + + logger.debug("QAS: {}", queryAsString); + + var sentence = sentenceExtractor.extractSentence(queryAsString); + + for (int i = 0; i < sentence.posTags.length; i++) { + if (sentence.posTags[i].startsWith("N") || sentence.posTags[i].startsWith("V")) { + sentence.posTags[i] = "NNP"; + } + else if ("JJ".equals(sentence.posTags[i]) || "CD".equals(sentence.posTags[i]) || sentence.posTags[i].startsWith("P")) { + sentence.posTags[i] = "NNP"; + sentence.setIsStopWord(i, false); + } + } + + for (var kw : keywordExtractor.getKeywordsFromSentence(sentence)) { + byStart.computeIfAbsent(kw.start, k -> new ArrayList<>()).add(kw); + } + + final List> livingSpans = new ArrayList<>(); + + var first = byStart.firstEntry(); + if (first == null) { + byStart.put(0, List.of(new WordSpan(0, sentence.length()))); + } + else if (first.getKey() > 0) { + List elongatedFirstWords = new ArrayList<>(first.getValue().size()); + + first.getValue().forEach(span -> { + elongatedFirstWords.add(new WordSpan(0, span.end)); + }); + + byStart.put(0, elongatedFirstWords); + } + + final List> goodSpans = getWordSpans(byStart, sentence, livingSpans); + + List> faithfulQueries = new ArrayList<>(); + List> alternativeQueries = new ArrayList<>(); + + for (var ls : goodSpans) { + faithfulQueries.addAll(createTokens(ls)); + } + + for (var span : goodSpans) { + alternativeQueries.addAll(joinTerms(span)); +// alternativeQueries.addAll(swapTerms(span)); + } + + for (var ls : goodSpans) { + var last = ls.get(ls.size() - 1); + + if (!last.wordOriginal.isBlank() && !Character.isUpperCase(last.wordOriginal.charAt(0))) { + var altLast = englishDictionary.getWordVariants(last.word); + for (String s : altLast) { + List newList = new ArrayList<>(ls.size()); + for (int i = 0; i < ls.size() - 1; i++) { + newList.add(ls.get(i).word); + } + newList.add(s); + alternativeQueries.add(newList); + } + } + + } + QueryVariantSet returnValue = new QueryVariantSet(); + returnValue.faithful.addAll(evaluateQueries(faithfulQueries)); + returnValue.faithful.addAll(evaluateQueries(alternativeQueries)); + + returnValue.faithful.sort(Comparator.comparing(QueryVariant::getValue)); + returnValue.alternative.sort(Comparator.comparing(QueryVariant::getValue)); + + return returnValue; + } + + Pattern underscore = Pattern.compile("_"); + + private List evaluateQueries(List> queryStrings) { + List ret = new ArrayList<>(); + for (var lst : queryStrings) { + double q = 0; + for (var word : lst) { + String[] parts = underscore.split(word); + StringJoiner combined = new StringJoiner("_"); + for (String part : parts) { + combined.add(ps.stem(part)); + } + q += Math.log(1 + dict.getTermFreqStemmed(combined.toString())); + } + ret.add(new QueryVariant(lst, q)); + } + return ret; + } + + private Collection> createTokens(List ls) { + List asTokens = new ArrayList<>(); + List> ret = new ArrayList<>(); + + + boolean dash = false; + boolean num = false; + + for (var span : ls) { + dash |= dashBoundary.matcher(span.word).find(); + num |= numWordBoundary.matcher(span.word).find(); + if (ls.size() == 1 || !isOmittableWord(span.word)) { + asTokens.add(span.word); + } + }; + ret.add(asTokens); + + if (dash) { + ret.addAll(combineDashWords(ls)); + } + + if (num) { + ret.addAll(splitWordNum(ls)); + } + + return ret; + } + + private boolean isOmittableWord(String word) { + return switch (word) { + case "vs", "or", "and", "versus", "is", "the", "why", "when", "if", "who", "are", "am" -> true; + default -> false; + }; + } + + private Collection> splitWordNum(List ls) { + List asTokens2 = new ArrayList<>(); + + boolean num = false; + + for (var span : ls) { + var wordMatcher = numWordBoundary.matcher(span.word); + var stemmedMatcher = numWordBoundary.matcher(span.stemmed); + + int ws = 0; + int ss = 0; + boolean didSplit = false; + while (wordMatcher.find(ws) && stemmedMatcher.find(ss)) { + ws = wordMatcher.start()+1; + ss = stemmedMatcher.start()+1; + if (dict.getTermFreqStemmed(splitAtNumBoundaryAndStem(span.word, stemmedMatcher.start(), "_")) > 0 + || dict.getTermFreqStemmed(splitAtNumBoundaryAndStem(span.word, stemmedMatcher.start(), "-")) > 0) + { + String combined = splitAtNumBoundary(span.word, wordMatcher.start(), "_"); + asTokens2.add(combined); + didSplit = true; + num = true; + } + } + + if (!didSplit) { + asTokens2.add(span.word); + } + }; + + if (num) { + return List.of(asTokens2); + } + return Collections.emptyList(); + } + + private Collection> combineDashWords(List ls) { + List asTokens2 = new ArrayList<>(); + boolean dash = false; + + for (var span : ls) { + var matcher = dashBoundary.matcher(span.word); + if (matcher.find() && dict.getTermFreqStemmed(ps.stem(dashBoundary.matcher(span.word).replaceAll(""))) > 0) { + dash = true; + String combined = dashBoundary.matcher(span.word).replaceAll(""); + asTokens2.add(combined); + } + else { + asTokens2.add(span.word); + } + }; + + if (dash) { + return List.of(asTokens2); + } + return Collections.emptyList(); + } + + private String splitAtNumBoundary(String in, int splitPoint, String joiner) { + return in.substring(0, splitPoint+1) + joiner + in.substring(splitPoint+1); + } + + private String splitAtNumBoundaryAndStem(String in, int splitPoint, String joiner) { + return ps.stem(in.substring(0, splitPoint+1)) + joiner + ps.stem(in.substring(splitPoint+1)); + } + + private List> getWordSpans(TreeMap> byStart, DocumentSentence sentence, List> livingSpans) { + List> goodSpans = new ArrayList<>(); + for (int i = 0; i < sentence.length(); i++) { + var spans = byStart.get(i); + + + if (spans == null ) + continue; + + for (var span : spans) { + ArrayList fragment = new ArrayList<>(); + fragment.add(span); + livingSpans.add(fragment); + } + + if (sentence.posTags[i].startsWith("N") || sentence.posTags[i].startsWith("V")) break; + } + + + while (!livingSpans.isEmpty()) { + + final List> newLivingSpans = new ArrayList<>(livingSpans.size()); + + for (var span : livingSpans) { + int end = span.get(span.size()-1).end; + + if (end == sentence.length()) { + var gs = new ArrayList(span.size()); + for (var s : span) { + gs.add(new Word(sentence.constructStemmedWordFromSpan(s), sentence.constructWordFromSpan(s), + s.size() == 1 ? sentence.words[s.start] : "")); + } + goodSpans.add(gs); + } + var nextWordsKey = byStart.ceilingKey(end); + + if (null == nextWordsKey) + continue; + + for (var next : byStart.get(nextWordsKey)) { + var newSpan = new ArrayList(span.size() + 1); + newSpan.addAll(span); + newSpan.add(next); + newLivingSpans.add(newSpan); + } + } + + livingSpans.clear(); + livingSpans.addAll(newLivingSpans); + } + + return goodSpans; + } + + private List> swapTerms(List span) { + List> ret = new ArrayList<>(); + + for (int i = 0; i < span.size()-1; i++) { + var a = span.get(i); + var b = span.get(i+1); + + var stemmed = b.stemmed + "_" + a.stemmed; + + if (dict.getTermFreqStemmed(stemmed) > 0) { + List asTokens = new ArrayList<>(); + + for (int j = 0; j < i; j++) { + var word = span.get(j).word; + asTokens.add(word); + } + { + var word = b.word + "_" + a.word; + asTokens.add(word); + } + for (int j = i+2; j < span.size(); j++) { + var word = span.get(j).word; + asTokens.add(word); + } + + ret.add(asTokens); + } + } + + return ret; + } + + + private List> joinTerms(List span) { + List> ret = new ArrayList<>(); + + for (int i = 0; i < span.size()-1; i++) { + var a = span.get(i); + var b = span.get(i+1); + + var stemmed = ps.stem(a.word + b.word); + + double scoreCombo = dict.getTermFreqStemmed(stemmed); + if (scoreCombo > 0) { + List asTokens = new ArrayList<>(); + + for (int j = 0; j < i; j++) { + var word = span.get(j).word; + asTokens.add(word); + } + { + var word = a.word + b.word; + asTokens.add(word); + } + for (int j = i+2; j < span.size(); j++) { + var word = span.get(j).word; + asTokens.add(word); + } + + ret.add(asTokens); + } + } + + return ret; + } + + private String joinQuery(List query) { + StringJoiner s = new StringJoiner(" "); + + for (var t : query) { + s.add(t.displayStr); + } + + return s.toString(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/model/EdgeSearchQuery.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/model/EdgeSearchQuery.java new file mode 100644 index 00000000..5cdc8892 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/model/EdgeSearchQuery.java @@ -0,0 +1,21 @@ +package nu.marginalia.wmsa.edge.search.query.model; + +import lombok.AllArgsConstructor; +import nu.marginalia.wmsa.edge.model.search.EdgeSearchSpecification; + +import java.util.*; + +@AllArgsConstructor +public class EdgeSearchQuery { + public final EdgeSearchSpecification specs; + + public final Set problems = new TreeSet<>(); + public final List searchTermsHuman; + public String domain; + + public EdgeSearchQuery(EdgeSearchSpecification justSpecs) { + searchTermsHuman = new ArrayList<>(); + specs = justSpecs; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/model/EdgeUserSearchParameters.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/model/EdgeUserSearchParameters.java new file mode 100644 index 00000000..ee58f099 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/query/model/EdgeUserSearchParameters.java @@ -0,0 +1,12 @@ +package nu.marginalia.wmsa.edge.search.query.model; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import nu.marginalia.wmsa.edge.search.EdgeSearchProfile; + +@AllArgsConstructor @Getter +public class EdgeUserSearchParameters { + public final String humanQuery; + public final EdgeSearchProfile profile; + public final String jsSetting; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/results/SearchResultDecorator.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/results/SearchResultDecorator.java new file mode 100644 index 00000000..487e1556 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/results/SearchResultDecorator.java @@ -0,0 +1,113 @@ +package nu.marginalia.wmsa.edge.search.results; + +import com.google.inject.Inject; +import gnu.trove.list.array.TIntArrayList; +import gnu.trove.map.hash.TIntObjectHashMap; +import io.reactivex.rxjava3.annotations.NonNull; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import nu.marginalia.wmsa.edge.model.search.EdgeSearchResultItem; +import nu.marginalia.wmsa.edge.model.search.EdgeUrlDetails; +import nu.marginalia.wmsa.edge.search.results.model.TieredSearchResult; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.TreeMap; +import java.util.stream.Collectors; + +public class SearchResultDecorator { + private final EdgeDataStoreDao edgeDataStoreDao; + private final SearchResultValuator valuator; + private final Logger logger = LoggerFactory.getLogger(getClass()); + + @Inject + public SearchResultDecorator(EdgeDataStoreDao edgeDataStoreDao, SearchResultValuator valuator) { + this.edgeDataStoreDao = edgeDataStoreDao; + this.valuator = valuator; + } + + @NonNull + public List decorateSearchResults(List items, IndexBlock block, UrlDeduplicator deduplicator) { + List results = new ArrayList<>(); + + int dedups = 0; + for (var details : getAllUrlDetails(items, block)) { + if (deduplicator.filter(details)) { + results.add(new TieredSearchResult(details.queryLength, getEffectiveBlock(details), details)); + } + else { + dedups++; + } + } + if (dedups > 0) { + logger.debug("dedups: {}", dedups); + } + + return results; + } + + + private static final TreeMap blocksByOrder = new TreeMap<>(); + static { + for (var block : IndexBlock.values()) { + blocksByOrder.put((double) block.sortOrder, block); + } + } + + private IndexBlock getEffectiveBlock(EdgeUrlDetails details) { + return blocksByOrder.floorEntry(details.termScore).getValue(); + } + + private List getAllUrlDetails(List resultItems, IndexBlock block) { + TIntObjectHashMap detailsById = new TIntObjectHashMap<>(resultItems.size()); + + var idList = resultItems.stream().map(EdgeSearchResultItem::getUrl).collect(Collectors.toList()); + + List ret = edgeDataStoreDao.getUrlDetailsMulti(idList); + + for (var val : ret) { + detailsById.put(val.id, val); + } + + List retList = new ArrayList<>(resultItems.size()); + + TIntArrayList missedIds = new TIntArrayList(); + for (var resultItem : resultItems) { + + var did = resultItem.getDomain().getId(); + var uid = resultItem.getUrl().getId(); + + var details = detailsById.get(uid); + if (details == null) { + missedIds.add(uid); + continue; + } + + if (details.rankingId == Integer.MAX_VALUE) { + details.rankingId = did; + } + + details.termScore = calculateTermScore(block, resultItem, details); + details.queryLength = resultItem.queryLength; + + logger.debug("{} -> {}", details.url, details.termScore); + + retList.add(details); + } + if (!missedIds.isEmpty()) { + logger.warn("Could not look up documents: {}", missedIds.toArray()); + } + retList.sort(Comparator.comparing(EdgeUrlDetails::getTermScore)); + return retList; + } + + private double calculateTermScore(IndexBlock block, EdgeSearchResultItem resultItem, EdgeUrlDetails details) { + return valuator.evaluateTerms(resultItem.scores, block, details.words) / Math.sqrt(1 + resultItem.queryLength) + + ((details.domainState == EdgeDomainIndexingState.SPECIAL.code) ? 1.25 : 0); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/results/SearchResultValuator.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/results/SearchResultValuator.java new file mode 100644 index 00000000..c2cb781e --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/results/SearchResultValuator.java @@ -0,0 +1,92 @@ +package nu.marginalia.wmsa.edge.search.results; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.language.WordPatterns; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.model.search.EdgeSearchResultKeywordScore; + +import java.util.List; +import java.util.regex.Pattern; + +@Singleton +public class SearchResultValuator { + private final NGramDict dict; + + private static final Pattern separator = Pattern.compile("_"); + + private static final int MIN_LENGTH = 500; + private static final int AVG_LENGTH = 1400; + + @Inject + public SearchResultValuator(NGramDict dict) { + this.dict = dict; + } + + + // This is basically a bargain bin BM25 + public double evaluateTerms(List rawScores, IndexBlock block, int length) { + EdgeSearchResultKeywordScore[] scores = rawScores.stream().filter(w -> !w.keyword.contains(":")).toArray(EdgeSearchResultKeywordScore[]::new); + + if (scores.length == 0) { + return IndexBlock.Words.sortOrder; + } + + final double[] weights = getTermWeights(scores); + final double lengthPenalty = getLengthPenalty(length); + + double termSum = 0.; + double factorSum = 0.; + + for (int i = 0; i < scores.length; i++) { + final double factor = 1.0 / (1.0 + weights[i]); + factorSum += factor; + + double termValue = (scores[i].index.sortOrder + 0.5) * factor; + + if (!scores[i].link && !scores[i].title) { + termValue *= lengthPenalty; + } + + termSum += termValue; + } + + assert factorSum != 0 ; + + if (block == IndexBlock.Title || block == IndexBlock.TitleKeywords) { + return block.sortOrder + (termSum / factorSum) / 5; + } + + return termSum / factorSum; + } + + private double getLengthPenalty(int length) { + if (length < MIN_LENGTH) { + length = MIN_LENGTH; + } + return (0.7 + 0.3 * length / AVG_LENGTH); + } + + private double[] getTermWeights(EdgeSearchResultKeywordScore[] scores) { + double[] weights = new double[scores.length]; + + for (int i = 0; i < scores.length; i++) { + String[] parts = separator.split(scores[i].keyword); + double sumScore = 0.; + + int count = 0; + for (String part : parts) { + if (!WordPatterns.isStopWord(part)) { + sumScore += dict.getTermFreq(part); + count++; + } + } + if (count == 0) count = 1; + + weights[i] = Math.sqrt(sumScore)/count; + } + + return weights; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/results/UrlDeduplicator.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/results/UrlDeduplicator.java new file mode 100644 index 00000000..79b2647f --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/results/UrlDeduplicator.java @@ -0,0 +1,41 @@ +package nu.marginalia.wmsa.edge.search.results; + +import com.google.common.base.Strings; +import gnu.trove.map.hash.TObjectIntHashMap; +import gnu.trove.set.hash.TIntHashSet; +import nu.marginalia.wmsa.edge.model.search.EdgeUrlDetails; + +public class UrlDeduplicator { + private final TIntHashSet seenSuperficialhashes = new TIntHashSet(100); + private final TIntHashSet seenDataHashes = new TIntHashSet(100); + private final TObjectIntHashMap ipCount = new TObjectIntHashMap<>(100, 0.75f, 0); + + private final int resultsPerIp; + public UrlDeduplicator(int resultsPerIp) { + this.resultsPerIp = resultsPerIp; + } + + public synchronized boolean filter(EdgeUrlDetails details) { + if (!seenSuperficialhashes.add(details.getSuperficialHash())) { + return false; + } + if (!seenDataHashes.add(details.getDataHash())) { + return false; + } + if (Strings.isNullOrEmpty(details.getIp())) { + final var domain = details.getUrl().getDomain(); + final String key; + + if (!details.isSpecialDomain()) { + key = domain.getLongDomainKey(); + } + else { + key = domain.getDomainKey(); + } + + return ipCount.adjustOrPutValue(key, 1, 1) <= resultsPerIp; + } + + return ipCount.adjustOrPutValue(details.getIp(), 1, 1) < resultsPerIp; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/results/model/AccumulatedQueryResults.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/results/model/AccumulatedQueryResults.java new file mode 100644 index 00000000..ec8d1aa2 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/results/model/AccumulatedQueryResults.java @@ -0,0 +1,39 @@ +package nu.marginalia.wmsa.edge.search.results.model; + +import nu.marginalia.wmsa.edge.model.search.EdgeUrlDetails; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public class AccumulatedQueryResults { + + private static final Logger logger = LoggerFactory.getLogger(AccumulatedQueryResults.class); + + public Set results = new HashSet<>(); + + public void add(EdgeUrlDetails details) { + results.add(details); + } + + public void append(int maxSize, List details) { + for (var result : details) { + + if (size() >= maxSize) { + break; + } + + add(result.details); + } + } + + public int size() { + return results.size(); + } + + public int count() { + return results.size(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/results/model/TieredSearchResult.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/results/model/TieredSearchResult.java new file mode 100644 index 00000000..d98ec566 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/results/model/TieredSearchResult.java @@ -0,0 +1,12 @@ +package nu.marginalia.wmsa.edge.search.results.model; + +import lombok.AllArgsConstructor; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.model.search.EdgeUrlDetails; + +@AllArgsConstructor +public class TieredSearchResult { + public final int length; + public final IndexBlock block; + public final EdgeUrlDetails details; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/ConverterMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/ConverterMain.java new file mode 100644 index 00000000..e5811859 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/ConverterMain.java @@ -0,0 +1,384 @@ +package nu.marginalia.wmsa.edge.tools; + + +import com.zaxxer.hikari.HikariDataSource; +import gnu.trove.map.hash.TIntObjectHashMap; +import gnu.trove.map.hash.TObjectIntHashMap; +import lombok.AllArgsConstructor; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.edge.archive.archiver.ArchiveExtractor; +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.LinkParser; +import nu.marginalia.wmsa.edge.crawler.domain.language.LanguageFilter; +import nu.marginalia.wmsa.edge.crawler.domain.language.WordPatterns; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.processor.HtmlFeature; +import nu.marginalia.wmsa.edge.crawler.domain.processor.HtmlStandardExtractor; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.index.service.dictionary.DictionaryWriter; +import nu.marginalia.wmsa.edge.index.service.index.SearchIndexWriterImpl; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.crawl.EdgeHtmlStandard; +import nu.marginalia.wmsa.edge.model.crawl.EdgePageWordSet; +import nu.marginalia.wmsa.edge.model.crawl.EdgeRawPageContents; +import org.apache.commons.lang3.tuple.Pair; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.mariadb.jdbc.Driver; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.sql.SQLException; +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.LinkedBlockingQueue; + +import static nu.marginalia.wmsa.edge.model.crawl.EdgeHtmlStandard.UNKNOWN; + +public class ConverterMain { + static LinkedBlockingQueue processQueue = new LinkedBlockingQueue<>(20); + static LinkedBlockingQueue uploadQueue = new LinkedBlockingQueue<>(2); + + static TObjectIntHashMap urlToIdMap = new TObjectIntHashMap<>(50_000_000, 0.5f, -1); + static TObjectIntHashMap domainToIdMap = new TObjectIntHashMap<>(5_000_000, 0.5f, -1); + static TIntObjectHashMap idToDomainMap = new TIntObjectHashMap<>(5_000_000, 0.5f, -1); + static HikariDataSource conn; + + private static SearchIndexWriterImpl indexWriter; + private static DictionaryWriter dictionaryWriter; + + @AllArgsConstructor + static class UploadJob { + EdgeId domainId; + EdgeId urlId; + EdgePageWordSet words; + int wordCount; + }; + static volatile boolean running = true; + + public static void main(String... args) throws IOException { + org.mariadb.jdbc.Driver driver = new Driver(); + + dictionaryWriter = new DictionaryWriter(new File(args[0]), 1L << 30, true); + indexWriter = new SearchIndexWriterImpl(dictionaryWriter, new File(args[1])); + + new Thread(ConverterMain::uploadThread, "Uploader").start(); + + for (int i = 0; i < 24; i++) { + new Thread(ConverterMain::processorThread, "Processor-"+i).start(); + } + + conn = new DatabaseModule().provideConnection(); + + System.out.println("Loading URLs and domains"); + try (var c = conn.getConnection(); + var getUrlsStmt = c.prepareStatement("SELECT EC_URL.ID, DOMAIN_ID, PROTO, URL FROM EC_URL WHERE VISITED"); + var getDomainsStmt = c.prepareStatement("SELECT ID, URL_PART FROM EC_DOMAIN WHERE INDEXED>0") + ) { + getUrlsStmt.setFetchSize(10_000); + getDomainsStmt.setFetchSize(10_000); + + System.out.println("Fetch domains"); + var domainRsp = getDomainsStmt.executeQuery(); + while (domainRsp.next()) { + domainToIdMap.put(domainRsp.getString(2), domainRsp.getInt(1)); + idToDomainMap.put(domainRsp.getInt(1), domainRsp.getString(2)); + } + + System.out.println("Fetch URLs"); + var urlRsp = getUrlsStmt.executeQuery(); + while (urlRsp.next()) { + String urlStr = urlRsp.getString(3) + "://" + idToDomainMap.get(urlRsp.getInt(2)) + urlRsp.getString(4); + urlToIdMap.put(urlStr, urlRsp.getInt(1)); + } + } catch (SQLException throwables) { + throwables.printStackTrace(); + } + +// new Thread(ConverterMain::uploadThread, "Uploader").start(); +// +// for (int i = 0; i < 24; i++) { +// new Thread(ConverterMain::processorThread, "Processor-"+i).start(); +// } + + System.out.println("Loaded URLs and domains"); + + new ArchiveExtractor(Path.of(args[2])).forEach( + page -> { + if (page.contentType.contentType.startsWith("application/xhtml") + || page.contentType.contentType.startsWith("text/html")) { + try { + int domainId = domainToIdMap.get(page.url.domain.toString()); + if (domainId >= 0 && page.redirectUrl == null) { + int urlId = urlToIdMap.get(page.url.toString()); + int dataHash = page.data.hashCode(); + try (var c = conn.getConnection(); + var updateHash = c.prepareStatement("UPDATE EC_URL SET DATA_HASH=? WHERE ID=?")) + { + updateHash.setInt(1, dataHash); + updateHash.setInt(2, urlId); + updateHash.executeUpdate(); + } + catch (Exception ex) { + ex.printStackTrace(); + } + } + } catch (Exception e) { + e.printStackTrace(); + } + } + }); + + running = false; + } + + static LanguageModels lm = new LanguageModels( + Path.of("/var/lib/wmsa/model/ngrams-generous-emstr.bin"), + Path.of("/var/lib/wmsa/model/tfreq-new-algo3.bin"), + Path.of("/var/lib/wmsa/model/opennlp-sentence.bin"), + Path.of("/var/lib/wmsa/model/English.RDR"), + Path.of("/var/lib/wmsa/model/English.DICT"), + Path.of("/var/lib/wmsa/model/opennlp-tok.bin") + ); + static NGramDict dict = new NGramDict(lm); + + private static final LanguageFilter languageFilter = new LanguageFilter(); + private static final LinkParser linkParser = new LinkParser(); + public static void processorThread() { + SentenceExtractor newSe = new SentenceExtractor(lm); + DocumentKeywordExtractor documentKeywordExtractor = new DocumentKeywordExtractor(dict); + + try { + while (running || !processQueue.isEmpty()) { + var job = processQueue.take(); + if (job.data.length() > 512*1024) { + System.out.println(job.url + " too big, skipping"); + } + + var parsed = Jsoup.parse(job.data); + var text = parsed.text(); + + if (languageFilter.isBlockedUnicodeRange(text)) { + continue; + } + + var dld = newSe.extractSentences(parsed.clone()); + var keywords = documentKeywordExtractor.extractKeywords(dld); + int wc = dld.totalNumWords(); + + if (wc > 100) { + double languageAgreement = languageFilter.dictionaryAgreement(dld); + if (languageAgreement < 0.05) { + continue; + } + } + + + EdgeHtmlStandard htmlStandard = HtmlStandardExtractor.parseDocType(parsed.documentType()); + if (UNKNOWN.equals(htmlStandard)) { + htmlStandard = HtmlStandardExtractor.sniffHtmlStandard(parsed); + } + + int scriptTags = getScriptPenalty(parsed); + var featureSet = getFeatureSet(parsed, scriptTags, job.hasCookies); + addTags(keywords, htmlStandard, job.url, featureSet); + + extractLinkWords(keywords, job.getUrl(), parsed); + + uploadQueue.put(new UploadJob( + new EdgeId<>(domainToIdMap.get(job.url.domain.toString())), + new EdgeId<>(urlToIdMap.get(job.url.toString())), + keywords, + 0 + )); + + } + } + catch (InterruptedException ex) { + ex.printStackTrace(); + } + } + + + private static Map> extractLinkWords(EdgePageWordSet keywords, EdgeUrl pageUrl, Document parsed) { + + List> urls = new ArrayList<>(); + Set linkKeywords = new HashSet<>(); + Map> linkTextWords = new ConcurrentHashMap<>(); + + for (var tag : parsed.getElementsByTag("a")) { + if (!tag.hasAttr("href")) { + continue; + } + if (urls.size() > 100) { + break; + } + + var linkOpt = linkParser.parseLink(pageUrl, tag); + if (linkOpt.isEmpty()) + continue; + + var link = linkOpt.get(); + + urls.add(Pair.of(link, tag.text())); + + if (!Objects.equals(link.domain.domain, pageUrl.domain.domain) + && linkKeywords.size() <= 25) + { + linkKeywords.add("links:" + link.domain.domain); + } +// +// Set words = new HashSet<>(); +// +// for (var sent : sentenceExtractor.extractSentencesFromString(tag.text())) { +// for (var keyword : keywordExtractor.getWordsFromSentence(sent)) { +// words.add(sent.constructWordFromSpan(keyword)); +// } +// } +// +// linkTextWords.compute(link, (k, set) -> { +// if (set == null) return words; +// else { set.addAll(words); return set; } +// }); + + } + + keywords.get(IndexBlock.Meta).addAll(linkKeywords); + + if (WordPatterns.wordQualitiesPredicate.test(pageUrl.domain.domain.toLowerCase())) { + keywords.get(IndexBlock.Link).addJust(pageUrl.domain.domain.toLowerCase()); + } + + return linkTextWords; + } + + private static int getScriptPenalty(Document parsed) { + var scriptTags = parsed.getElementsByTag("script"); + String scriptText = scriptTags.html(); + int badScript = 0; + if (scriptText.contains(".createElement(")) { + badScript = 1; + } + return scriptTags.size() + badScript + (scriptText.length())/1000; + } + + static List trackers = List.of("adform.net", + "connect.facebook", + "googletagmanager.com", + "googlesyndication.com", + "google.com", + "twitter.com", + "smartadserver.com", + "doubleclick.com", + "2mdn.com", + "dmtry.com", + "bing.com", + "msn.com", + "amazon-adsystem.com", + "alexametrics.com", + "rubiconproject.com", + "chango.com", + "d5nxst8fruw4z.cloudfront.net", + "d31qbv1cthcecs.cloudfront.net", + "linkedin.com"); + + private static Set getFeatureSet(Document parsed, int scriptTags, boolean cookies) { + Set features = new HashSet<>(); + + if (scriptTags > 0) { + features.add(HtmlFeature.JS); + } + if (!parsed.getElementsByTag("object").isEmpty() + || !parsed.getElementsByTag("audio").isEmpty() + || !parsed.getElementsByTag("video").isEmpty()) { + features.add(HtmlFeature.MEDIA); + } + if (parsed.getElementsByTag("script").stream() + .filter(tag -> tag.attr("src") != null) + .anyMatch(tag -> trackers.stream().anyMatch(tracker -> tag.attr("src").contains(tracker)))) { + features.add(HtmlFeature.TRACKING); + } + if (parsed.getElementsByTag("script").html().contains("google-analytics.com")) { + features.add(HtmlFeature.TRACKING); + } + if (parsed.getElementsByTag("a").stream().map(e -> e.attr("href")) + .filter(Objects::nonNull) + .map(String::toLowerCase) + .anyMatch(href -> + href.contains("amzn.to/") || href.contains("amazon.com/"))) { + features.add(HtmlFeature.AFFILIATE_LINK); + } + if (cookies) { + features.add(HtmlFeature.COOKIES); + } + + return features; + } + + private static void addTags(EdgePageWordSet wordSet, EdgeHtmlStandard htmlStandard, EdgeUrl url, Set features) { + List tagWords = new ArrayList<>(); + tagWords.add("format:"+htmlStandard.toString().toLowerCase()); + tagWords.add("site:"+url.domain.toString().toLowerCase()); + tagWords.add("proto:"+url.proto.toLowerCase()); + tagWords.add("js:" + Boolean.toString(features.contains(HtmlFeature.JS)).toLowerCase()); + if (features.contains(HtmlFeature.MEDIA)) { + tagWords.add("special:media"); + } + if (features.contains(HtmlFeature.TRACKING)) { + tagWords.add("special:tracking"); + } + if (features.contains(HtmlFeature.AFFILIATE_LINK)) { + tagWords.add("special:affiliate"); + } + if (features.contains(HtmlFeature.COOKIES)) { + tagWords.add("special:cookies"); + } + wordSet.append(IndexBlock.Meta, tagWords); + wordSet.append(IndexBlock.Words, tagWords); + } + + @SneakyThrows + public static void uploadThread() { + + while (running || !processQueue.isEmpty() || !uploadQueue.isEmpty()) { + var data = uploadQueue.take(); + + if (!data.words.isEmpty()) { + for (var words : data.words.values()) { + if (!words.getWords().isEmpty()) { + if (words.size() < 1000) { + indexWriter.put(data.domainId, data.urlId, words.block, words.words); + } else { + chunks(words.words, 1000).forEach(chunk -> { + indexWriter.put(data.domainId, data.urlId, words.block, chunk); + }); + } + } + } + } + } + + System.out.println("Closing"); + dictionaryWriter.commitToDisk(); + indexWriter.forceWrite(); + dictionaryWriter.close(); + indexWriter.close(); + System.out.println("Done"); + } + + private static List> chunks(Collection coll, int size) { + List> ret = new ArrayList<>(); + List data = List.copyOf(coll); + + for (int i = 0; i < data.size(); i+=size) { + ret.add(data.subList(i, Math.min(data.size(), i+size))); + } + + return ret; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/CrawlDomainMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/CrawlDomainMain.java new file mode 100644 index 00000000..199dcdee --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/CrawlDomainMain.java @@ -0,0 +1,390 @@ +package nu.marginalia.wmsa.edge.tools; + + +import com.opencsv.exceptions.CsvValidationException; +import com.zaxxer.hikari.HikariDataSource; +import gnu.trove.list.array.TIntArrayList; +import lombok.AllArgsConstructor; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.edge.archive.client.ArchiveClient; +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.DomainCrawlResults; +import nu.marginalia.wmsa.edge.crawler.domain.DomainCrawlerFactory; +import nu.marginalia.wmsa.edge.crawler.domain.DomainCrawlerRobotsTxt; +import nu.marginalia.wmsa.edge.crawler.domain.language.LanguageFilter; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.processor.HtmlProcessor; +import nu.marginalia.wmsa.edge.crawler.domain.processor.PlainTextProcessor; +import nu.marginalia.wmsa.edge.crawler.fetcher.HttpFetcher; +import nu.marginalia.wmsa.edge.crawler.worker.GeoIpBlocklist; +import nu.marginalia.wmsa.edge.crawler.worker.IpBlockList; +import nu.marginalia.wmsa.edge.crawler.worker.UploaderWorker; +import nu.marginalia.wmsa.edge.crawler.worker.facade.UploadFacadeDirectImpl; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDaoImpl; +import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklistImpl; +import nu.marginalia.wmsa.edge.director.client.EdgeDirectorClient; +import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainLink; +import nu.marginalia.wmsa.edge.model.crawl.EdgeIndexTask; +import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlVisit; +import org.mariadb.jdbc.Driver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +public class CrawlDomainMain { + static LinkedBlockingQueue processQueue = new LinkedBlockingQueue<>(5); + static LinkedBlockingQueue uploadQueue = new LinkedBlockingQueue<>(2); + + static Logger logger = LoggerFactory.getLogger(CrawlDomainMain.class); + + static HikariDataSource conn; + + private static EdgeIndexClient indexClient; + private static IpBlockList blocklist; + private static HttpFetcher fetcher; + private static UploadFacadeDirectImpl uploadFacade; + + static { + try { + blocklist = new IpBlockList(new GeoIpBlocklist()); + } catch (IOException e) { + e.printStackTrace(); + } catch (CsvValidationException e) { + e.printStackTrace(); + } + } + + @AllArgsConstructor + static class ReindexJob { + EdgeIndexTask task; + Map hashes; + int visitedCount; + }; + + @AllArgsConstructor + static class UploadJob { + DomainCrawlResults results; + ReindexJob job; + }; + + + static volatile boolean running = true; + + public static class AbortMonitor { + private volatile boolean abort = false; + private static volatile AbortMonitor instance = null; + + public static AbortMonitor getInstance() { + if (instance == null) { + synchronized (AbortMonitor.class) { + if (instance == null) { + instance = new AbortMonitor(); + new Thread(instance::run, "AbortMon").start(); + } + } + } + return instance; + } + + private AbortMonitor() { + } + + @SneakyThrows + public void run() { + for (;;) { + Thread.sleep(1000); + if (Files.exists(Path.of("/tmp/stop"))) { + logger.warn("Abort file found"); + abort = true; + Files.delete(Path.of("/tmp/stop")); + } + } + } + public boolean isAlive() { + return !abort; + } + } + + @SneakyThrows + public static void main(String... args) throws IOException { + Driver driver = new Driver(); + + indexClient = new EdgeIndexClient(); + + + conn = new DatabaseModule().provideConnection(); + var blacklist = new EdgeDomainBlacklistImpl(conn); + + EdgeDataStoreDaoImpl dataStoreDao = new EdgeDataStoreDaoImpl(conn); + + TIntArrayList domainIndexOrder = new TIntArrayList(); + + final Thread uploadThread = new Thread(CrawlDomainMain::uploadThread, "Uploader"); + uploadThread.start(); + + SentenceExtractor newSe = new SentenceExtractor(lm); + DocumentKeywordExtractor documentKeywordExtractor = new DocumentKeywordExtractor(dict); + + uploadFacade = new UploadFacadeDirectImpl(new EdgeDataStoreDaoImpl(conn), new EdgeIndexClient(), new EdgeDirectorClient()); + + fetcher = new HttpFetcher("search.marginalia.nu"); + var dcf = new DomainCrawlerFactory(fetcher, + new HtmlProcessor(documentKeywordExtractor, newSe), + new PlainTextProcessor(documentKeywordExtractor, newSe), + new ArchiveClient(), + new DomainCrawlerRobotsTxt(fetcher, "search.marginalia.nu"), + new LanguageFilter(), + blocklist); + + for (int i = 0; i < 64; i++) { + new Thread(() -> processorThread(dcf), "Processor-"+i).start(); + } + + List urls = new ArrayList<>(); + + try (var br = new BufferedReader(new InputStreamReader(System.in))) { + for (;;) { + var urlStr = br.readLine(); + if (urlStr == null) { + break; + } else if (!urlStr.isBlank()) { + urls.add(new EdgeUrl(urlStr)); + } + } + } + uploadFacade.putUrls(urls, -5); + + try (var c = conn.getConnection(); + var fetchDomains = c.prepareStatement("select ID FROM EC_DOMAIN WHERE URL_PART=?"); + var fetchUrlsForDomain = c.prepareStatement("select ID,DATA_HASH,VISITED FROM EC_URL WHERE DOMAIN_ID=? ORDER BY VISITED DESC, DATA_HASH IS NOT NULL DESC, ID") + ) { + + fetchDomains.setFetchSize(1000); + + for (var url : urls) { + fetchDomains.setString(1, url.getDomain().toString()); + fetchDomains.executeQuery(); + + var domainRsp = fetchDomains.executeQuery(); + + logger.info("Fetched {}", url); + + while (domainRsp.next()) { + if (!blacklist.isBlacklisted(domainRsp.getInt(1))) { + domainIndexOrder.add(domainRsp.getInt(1)); + } + } + } + + fetchUrlsForDomain.setFetchSize(10_000); + + for (int i = 0; i < domainIndexOrder.size(); i++) { + if (!AbortMonitor.getInstance().isAlive()) { + break; + } + + int domainId = domainIndexOrder.getQuick(i); + var domain = dataStoreDao.getDomain(new EdgeId<>(domainId)); + + fetchUrlsForDomain.setInt(1, domainId); + var urlRsp = fetchUrlsForDomain.executeQuery(); + + EdgeIndexTask task = new EdgeIndexTask(domain, 1000, 1000, 0); + Map hashes = new HashMap<>(); + + int visitedCount = 0; + while (urlRsp.next()) { + var url = dataStoreDao.getUrl(new EdgeId<>(urlRsp.getInt(1))); + task.urls.add(url); + + if (urlRsp.getBoolean(3)) { + visitedCount++; + } + + int hash = urlRsp.getInt(2); + if (hash != 0) + hashes.put(url, hash); + } + + processQueue.put(new ReindexJob(task, hashes, visitedCount)); + } + } + catch (Exception ex) { + ex.printStackTrace(); + } + + uploadThread.join(); + System.exit(0); + } + + static LanguageModels lm = new LanguageModels( + Path.of("/var/lib/wmsa/model/ngrams-generous-emstr.bin"), + Path.of("/var/lib/wmsa/model/tfreq-new-algo3.bin"), + Path.of("/var/lib/wmsa/model/opennlp-sentence.bin"), + Path.of("/var/lib/wmsa/model/English.RDR"), + Path.of("/var/lib/wmsa/model/English.DICT"), + Path.of("/var/lib/wmsa/model/opennlp-tok.bin") + ); + static NGramDict dict = new NGramDict(lm); + + private static final Semaphore processSem = new Semaphore(500, true); + @SneakyThrows + public static void processorThread(DomainCrawlerFactory dcf) { + String name = Thread.currentThread().getName(); + try { +outer: + while (AbortMonitor.getInstance().isAlive() && (running || !processQueue.isEmpty())) { + + Thread.currentThread().setName(name); + ReindexJob job = null; + + while (job == null) { + job = processQueue.poll(30, TimeUnit.SECONDS); + if (!AbortMonitor.getInstance().isAlive()) { + break outer; + } + } + + Thread.currentThread().setName(name + ":" + job.task.domain); + if (!blocklist.isAllowed(job.task.domain)) { + setDomainError(job.task.domain, new HttpFetcher.FetchResult(HttpFetcher.FetchResultState.ERROR, job.task.domain)); + } + + var probe = fetcher.probeDomain(job.task.urls.get(0)); + + if (!AbortMonitor.getInstance().isAlive()) { + break; + } + + if (!probe.ok()) { + setDomainError(job.task.domain, probe); + } else { + var dc = dcf.domainCrawler(job.task); + + int countProp = Objects.requireNonNullElse(Integer.getInteger("wmsa.edge.crawl.maxCeiling"), 0); + + int countMaxAll = 4000; + int countVisited = (int) (job.visitedCount * 1.1); + int countHash = 100 + job.hashes.size() * 2; + + int maxCount = Math.max(countProp, Math.min(countMaxAll, Math.max(countVisited, countHash))); + + DomainCrawlResults result; + int tokens = Math.max(1,maxCount/1000); + try { + while (!processSem.tryAcquire(tokens)) + Thread.sleep((int)(100 + Math.random() * 100)); + result = dc.crawlToExhaustion(maxCount, AbortMonitor.getInstance()::isAlive); + + if (!AbortMonitor.getInstance().isAlive()) { + break; + } + } + finally { + processSem.release(tokens); + } + + uploadQueue.put(new UploadJob(result, job)); + } + } + } + catch (InterruptedException ex) { + ex.printStackTrace(); + } + + logger.warn("Terminating {}", Thread.currentThread().getName()); + } + + private static void setDomainError(EdgeDomain domain, HttpFetcher.FetchResult probe) { + List links = new ArrayList<>(1); + EdgeDomain alias = null; + if (probe.state == HttpFetcher.FetchResultState.REDIRECT) { + links.add(new EdgeDomainLink(domain, probe.domain)); + alias = probe.domain; + } + uploadFacade.putLinks(links, true); + uploadFacade.updateDomainIndexTimestamp(domain, EdgeDomainIndexingState.ERROR, alias, 1); + } + + + @SneakyThrows + public static void uploadThread() { + + + int count = 0; + long allUrls = 0; + long newUrls = 0; +outer: + while (AbortMonitor.getInstance().isAlive() && (running || !processQueue.isEmpty() || !uploadQueue.isEmpty())) { + + UploadJob job = null; + + while (job == null) { + job = uploadQueue.poll(30, TimeUnit.SECONDS); + if (!AbortMonitor.getInstance().isAlive()) { + break outer; + } + } + + UploadJob data = job; + + logger.info("{} Done - {} : {} : {}", ++count, data.results.domain, allUrls, newUrls); + + var dc = data.results; + + double avgQuality = UploaderWorker.calculateMedianQuality(dc).orElse(-5.); + + if (uploadFacade.isBlacklisted(dc.domain)) { + continue; + } + + final double linkQualityRating = -5; //(avgQuality + UNKNOWN_SITE_ATTRACTOR)/2 - extLinkPenalty; + + var visits = dc.visits(); + allUrls += visits.size(); + + var newContents = visits.stream().filter(visit -> { + var hash = data.job.hashes.get(visit.url); + return (hash == null || !Objects.equals(visit.data_hash_code, hash)); + }).collect(Collectors.toList()); + + var goodUrls = newContents.stream().map(EdgeUrlVisit::getUrl).collect(Collectors.toSet()); + + newUrls += newContents.size(); + + uploadFacade.putUrls(dc.extUrl, linkQualityRating); + uploadFacade.putUrls(dc.intUrl, linkQualityRating); + uploadFacade.putUrlVisits(newContents); + uploadFacade.putFeeds(dc.feeds); + + if (avgQuality < UploaderWorker.QUALITY_LOWER_BOUND_CUTOFF) { + uploadFacade.updateDomainIndexTimestamp(dc.domain, EdgeDomainIndexingState.ACTIVE, null, 1); + continue; + } + + uploadFacade.putLinks(dc.links, true); + uploadFacade.putWords(dc.pageContents.values().stream().filter(pc -> goodUrls.contains(pc.url)).collect(Collectors.toList()), 1); + + uploadFacade.updateDomainIndexTimestamp(dc.domain, EdgeDomainIndexingState.ACTIVE, null, 1); + } + + logger.warn("Terminating {}", Thread.currentThread().getName()); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/DomainInserterMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/DomainInserterMain.java new file mode 100644 index 00000000..28af0926 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/DomainInserterMain.java @@ -0,0 +1,83 @@ +package nu.marginalia.wmsa.edge.tools; + +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDaoImpl; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.mariadb.jdbc.Driver; + +import java.net.URISyntaxException; +import java.net.URLEncoder; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public class DomainInserterMain { + public static void main(String... args) throws Exception { + org.mariadb.jdbc.Driver driver = new Driver(); + + var conn = new DatabaseModule().provideConnection(); + + var dao = new EdgeDataStoreDaoImpl(conn); + Set domains = new HashSet<>(); + + var connection = conn.getConnection(); + + try (var br = Files.newBufferedReader(Path.of(args[0])); + var stmt = connection.prepareStatement("SELECT ID FROM EC_DOMAIN WHERE URL_PART=?"); + var setRankStmt = connection.prepareStatement("UPDATE EC_DOMAIN SET RANK=? WHERE URL_PART=?") + ) { + String line; + + List loadUrls = new ArrayList<>(100); + + for (;;) { + loadUrls.clear(); + + double quality; + + line = br.readLine(); + if (null == line) break; + quality = Double.parseDouble(line); + + line = br.readLine(); + if (null == line) break; + var url = getUrl(line); + stmt.setString(1, url.domain.toString()); + var rsp = stmt.executeQuery(); + if (rsp.next()) { + System.out.println("Known: " + line); + while (null != (line = br.readLine()) && !line.isBlank()) { + if (".".equals(line)) break; + } + setRankStmt.setString(2, url.getDomain().toString()); + setRankStmt.setDouble(1, quality); + setRankStmt.executeUpdate(); + } + else { + loadUrls.add(url); + while (null != (line = br.readLine()) && !line.isBlank()) { + if (".".equals(line)) break; + loadUrls.add(getUrl(line)); + } + + dao.putUrl(-2*quality, loadUrls.toArray(EdgeUrl[]::new)); + + System.out.println(loadUrls); + } + } + } + } + + @SneakyThrows + static EdgeUrl getUrl(String line) { + String[] parts = line.split("/", 4); + return new EdgeUrl(parts[0]+"//"+parts[2]+"/" + URLEncoder.encode(parts[3])); + } + + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/IndexMergerMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/IndexMergerMain.java new file mode 100644 index 00000000..fafa68f1 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/IndexMergerMain.java @@ -0,0 +1,198 @@ +package nu.marginalia.wmsa.edge.tools; + +import com.google.inject.Inject; +import gnu.trove.set.hash.TIntHashSet; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist; +import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklistImpl; +import nu.marginalia.wmsa.edge.index.service.SearchIndexDao; +import nu.marginalia.wmsa.edge.index.service.query.SearchIndexPartitioner; +import org.mariadb.jdbc.Driver; +import org.roaringbitmap.longlong.Roaring64Bitmap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.util.Objects; + +public class IndexMergerMain { + private static final int CHUNK_HEADER_SIZE = 16; + + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final SearchIndexPartitioner partitioner; + private final TIntHashSet spamDomains; + + @SneakyThrows + public static long wordCount(File inputFile) { + try (RandomAccessFile raf = new RandomAccessFile(inputFile, "r")) { + raf.readLong(); + return raf.readInt(); + } + } + + public static void main(String... args) { + Driver driver = new Driver(); + + File file1 = new File(args[0]); + File file2 = new File(args[1]); + File outputFile = new File(args[2]); + + if (!file1.exists()) { + System.err.println("File " + file1 + " does not exist"); + return; + } + if (!file2.exists()) { + System.err.println("File " + file2 + " does not exist"); + return; + } + + if (outputFile.exists()) { // Footgun prevention + System.err.println("File " + outputFile + " already exists"); + return; + } + + var hikari = new DatabaseModule().provideConnection(); + var partitioner = new SearchIndexPartitioner(new SearchIndexDao(hikari)); + var blacklist = new EdgeDomainBlacklistImpl(hikari); + + new IndexMergerMain(file1, file2, outputFile, partitioner, blacklist); + } + + + @SneakyThrows + @Inject + public IndexMergerMain(File inputFile1, File inputFile2, + File outputFile, + SearchIndexPartitioner partitioner, + EdgeDomainBlacklist blacklist) + { + this.partitioner = partitioner; + this.spamDomains = blacklist.getSpamDomains(); + + if (outputFile.exists()) { + Files.deleteIfExists(Objects.requireNonNull(outputFile).toPath()); + } + + Roaring64Bitmap secondFileIndices = findIndices(inputFile2); + + RandomAccessFile randomAccessFile = new RandomAccessFile(outputFile, "rw"); + randomAccessFile.seek(12); + + FileChannel outputFileChannel = randomAccessFile.getChannel(); + + int wc1 = copyToOutputFile(inputFile2, outputFileChannel, secondFileIndices, true); + int wc2 = copyToOutputFile(inputFile1, outputFileChannel, secondFileIndices, false); + + long pos = randomAccessFile.getFilePointer(); + + randomAccessFile.seek(0); + randomAccessFile.writeLong(pos); + randomAccessFile.writeInt(Math.max(wc1, wc2)); + outputFileChannel.force(true); + outputFileChannel.close(); + randomAccessFile.close(); + } + + private Roaring64Bitmap findIndices(File file) throws IOException { + Roaring64Bitmap ret = new Roaring64Bitmap(); + + logger.info("Mapping indices in {}", file); + + try (final RandomAccessFile raf = new RandomAccessFile(file, "r"); var channel = raf.getChannel()) { + + var fileLength = raf.readLong(); + raf.readInt(); + + ByteBuffer inByteBuffer = ByteBuffer.allocateDirect(10_000); + + while (channel.position() < fileLength) { + inByteBuffer.clear(); + inByteBuffer.limit(CHUNK_HEADER_SIZE); + channel.read(inByteBuffer); + inByteBuffer.flip(); + long urlId = inByteBuffer.getLong(); + int chunkBlock = inByteBuffer.getInt(); + int count = inByteBuffer.getInt(); + inByteBuffer.limit(count * 4 + CHUNK_HEADER_SIZE); + channel.read(inByteBuffer); + + ret.add(encodeId(urlId, chunkBlock)); + } + } + + logger.info("Cardinality = {}", ret.getLongCardinality()); + + return ret; + } + + private int copyToOutputFile(File inFile, FileChannel outFile, Roaring64Bitmap urlIdAndBlock, boolean ifInSet) throws IOException { + int wordCount = 0; + + logger.info("Copying from {}", inFile); + long skippedWrongFile = 0; + long skippedBadUrl = 0; + try (final RandomAccessFile raf = new RandomAccessFile(inFile, "r"); var channel = raf.getChannel()) { + + var fileLength = raf.readLong(); + raf.readInt(); + + ByteBuffer inByteBuffer = ByteBuffer.allocateDirect(10_000); + + while (channel.position() < fileLength) { + inByteBuffer.clear(); + inByteBuffer.limit(CHUNK_HEADER_SIZE); + channel.read(inByteBuffer); + inByteBuffer.flip(); + long urlId = inByteBuffer.getLong(); + int chunkBlock = inByteBuffer.getInt(); + int count = inByteBuffer.getInt(); + inByteBuffer.limit(count*4+CHUNK_HEADER_SIZE); + channel.read(inByteBuffer); + inByteBuffer.position(CHUNK_HEADER_SIZE); + + for (int i = 0; i < count; i++) { + wordCount = Math.max(wordCount, 1+inByteBuffer.getInt()); + } + + inByteBuffer.position(count*4+CHUNK_HEADER_SIZE); + + if (urlIdAndBlock.contains(encodeId(urlId, chunkBlock)) == ifInSet) { + if (isUrlAllowed(urlId)) { + inByteBuffer.flip(); + + while (inByteBuffer.position() < inByteBuffer.limit()) + outFile.write(inByteBuffer); + } + else { + skippedBadUrl++; + } + } + else { + skippedWrongFile++; + } + } + + } + + logger.info("Skipped {}, {}", skippedBadUrl, skippedWrongFile); + return wordCount; + } + + private long encodeId(long urlId, int chunkBlock) { + return ((urlId & 0xFFFF_FFFFL) << 4L) | chunkBlock; + } + + private boolean isUrlAllowed(long url) { + int urlId = (int)(url & 0xFFFF_FFFFL); + int domainId = (int)(url >>> 32); + + return partitioner.isGoodUrl(urlId) && !spamDomains.contains(domainId); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/ReindexMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/ReindexMain.java new file mode 100644 index 00000000..04b6056c --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/ReindexMain.java @@ -0,0 +1,354 @@ +package nu.marginalia.wmsa.edge.tools; + +import com.opencsv.exceptions.CsvValidationException; +import com.zaxxer.hikari.HikariDataSource; +import gnu.trove.list.array.TIntArrayList; +import lombok.AllArgsConstructor; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.edge.archive.client.ArchiveClient; +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.*; +import nu.marginalia.wmsa.edge.crawler.domain.language.LanguageFilter; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.processor.HtmlProcessor; +import nu.marginalia.wmsa.edge.crawler.domain.processor.PlainTextProcessor; +import nu.marginalia.wmsa.edge.crawler.fetcher.HttpFetcher; +import nu.marginalia.wmsa.edge.crawler.worker.GeoIpBlocklist; +import nu.marginalia.wmsa.edge.crawler.worker.IpBlockList; +import nu.marginalia.wmsa.edge.crawler.worker.UploaderWorker; +import nu.marginalia.wmsa.edge.crawler.worker.facade.UploadFacadeDirectImpl; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDaoImpl; +import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklistImpl; +import nu.marginalia.wmsa.edge.director.client.EdgeDirectorClient; +import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainLink; +import nu.marginalia.wmsa.edge.model.crawl.EdgeIndexTask; +import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlVisit; +import org.mariadb.jdbc.Driver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +public class ReindexMain { + static LinkedBlockingQueue processQueue = new LinkedBlockingQueue<>(5); + static LinkedBlockingQueue uploadQueue = new LinkedBlockingQueue<>(2); + + static Logger logger = LoggerFactory.getLogger(ReindexMain.class); + + static HikariDataSource conn; + + private static IpBlockList blocklist; + private static HttpFetcher fetcher; + private static UploadFacadeDirectImpl uploadFacade; + + static { + try { + blocklist = new IpBlockList(new GeoIpBlocklist()); + } catch (IOException | CsvValidationException e) { + e.printStackTrace(); + } + } + + @AllArgsConstructor + static class ReindexJob { + EdgeIndexTask task; + Map hashes; + int visitedCount; + }; + + @AllArgsConstructor + static class UploadJob { + DomainCrawlResults results; + ReindexJob job; + }; + + + static volatile boolean running = true; + + public static class AbortMonitor { + private volatile boolean abort = false; + private static volatile AbortMonitor instance = null; + + public static AbortMonitor getInstance() { + if (instance == null) { + synchronized (AbortMonitor.class) { + if (instance == null) { + instance = new AbortMonitor(); + new Thread(instance::run, "AbortMon").start(); + } + } + } + return instance; + } + + private AbortMonitor() { + } + + @SneakyThrows + public void run() { + for (;;) { + Thread.sleep(1000); + if (Files.exists(Path.of("/tmp/stop"))) { + logger.warn("Abort file found"); + abort = true; + Files.delete(Path.of("/tmp/stop")); + } + } + } + public boolean isAlive() { + return !abort; + } + } + + @SneakyThrows + public static void main(String... args) throws IOException { + Driver driver = new Driver(); + + conn = new DatabaseModule().provideConnection(); + var blacklist = new EdgeDomainBlacklistImpl(conn); + + EdgeDataStoreDaoImpl dataStoreDao = new EdgeDataStoreDaoImpl(conn); + + TIntArrayList domainIndexOrder = new TIntArrayList(); + + new Thread(ReindexMain::uploadThread, "Uploader").start(); + + SentenceExtractor newSe = new SentenceExtractor(lm); + DocumentKeywordExtractor documentKeywordExtractor = new DocumentKeywordExtractor(dict); + + uploadFacade = new UploadFacadeDirectImpl(new EdgeDataStoreDaoImpl(conn), new EdgeIndexClient(), new EdgeDirectorClient()); + + fetcher = new HttpFetcher("search.marginalia.nu"); + var dcf = new DomainCrawlerFactory(fetcher, + new HtmlProcessor(documentKeywordExtractor, newSe), + new PlainTextProcessor(documentKeywordExtractor, newSe), + new ArchiveClient(), + new DomainCrawlerRobotsTxt(fetcher, "search.marginalia.nu"), + new LanguageFilter(), + blocklist); + + for (int i = 0; i < 512; i++) { + new Thread(() -> processorThread(dcf), "Processor-"+i).start(); + } + + try (var c = conn.getConnection(); + var fetchDomains = c.prepareStatement("select ID, EC_DOMAIN.URL_PART from EC_DOMAIN WHERE QUALITY_RAW>-100 AND INDEXED>0 AND INDEX_DATE<'2022-03-17' AND STATE<2 ORDER BY INDEX_DATE ASC,DISCOVER_DATE ASC,STATE DESC,INDEXED DESC,EC_DOMAIN.ID"); + var fetchUrlsForDomain = c.prepareStatement("select ID,DATA_HASH,VISITED FROM EC_URL WHERE DOMAIN_ID=? ORDER BY VISITED DESC, DATA_HASH IS NOT NULL DESC, ID") + ) { + fetchDomains.setFetchSize(10_000); + fetchDomains.executeQuery(); + var domainRsp = fetchDomains.executeQuery(); + + logger.info("Fetched domains"); + while (domainRsp.next()) { + if (!blacklist.isBlacklisted(domainRsp.getInt(1))) { + domainIndexOrder.add(domainRsp.getInt(1)); + } + } + + fetchUrlsForDomain.setFetchSize(10_000); + + for (int i = 0; i < domainIndexOrder.size(); i++) { + if (!AbortMonitor.getInstance().isAlive()) { + break; + } + + int domainId = domainIndexOrder.getQuick(i); + var domain = dataStoreDao.getDomain(new EdgeId<>(domainId)); + + fetchUrlsForDomain.setInt(1, domainId); + var urlRsp = fetchUrlsForDomain.executeQuery(); + + EdgeIndexTask task = new EdgeIndexTask(domain, 1000, 1000, 0); + Map hashes = new HashMap<>(); + + int visitedCount = 0; + while (urlRsp.next()) { + var url = dataStoreDao.getUrl(new EdgeId<>(urlRsp.getInt(1))); + task.urls.add(url); + + if (urlRsp.getBoolean(3)) { + visitedCount++; + } + + int hash = urlRsp.getInt(2); + if (hash != 0) + hashes.put(url, hash); + } + + processQueue.put(new ReindexJob(task, hashes, visitedCount)); + } + } + catch (Exception ex) { + ex.printStackTrace(); + } + + + logger.warn("Terminating Main"); + } + + static LanguageModels lm = new LanguageModels( + Path.of("/var/lib/wmsa/model/ngrams-generous-emstr.bin"), + Path.of("/var/lib/wmsa/model/tfreq-new-algo3.bin"), + Path.of("/var/lib/wmsa/model/opennlp-sentence.bin"), + Path.of("/var/lib/wmsa/model/English.RDR"), + Path.of("/var/lib/wmsa/model/English.DICT"), + Path.of("/var/lib/wmsa/model/opennlp-tok.bin") + ); + static NGramDict dict = new NGramDict(lm); + + private static final Semaphore processSem = new Semaphore(500, true); + @SneakyThrows + public static void processorThread(DomainCrawlerFactory dcf) { + String name = Thread.currentThread().getName(); + try { +outer: + while (AbortMonitor.getInstance().isAlive() && (running || !processQueue.isEmpty())) { + Thread.currentThread().setName(name); + ReindexJob job = null; + + while (job == null) { + job = processQueue.poll(30, TimeUnit.SECONDS); + if (!AbortMonitor.getInstance().isAlive()) { + break outer; + } + } + + Thread.currentThread().setName(name + ":" + job.task.domain); + if (!blocklist.isAllowed(job.task.domain)) { + setDomainError(job.task.domain, new HttpFetcher.FetchResult(HttpFetcher.FetchResultState.ERROR, job.task.domain)); + } + + var probe = fetcher.probeDomain(job.task.urls.get(0)); + + if (!AbortMonitor.getInstance().isAlive()) { + break; + } + + if (!probe.ok()) { + setDomainError(job.task.domain, probe); + } else { + var dc = dcf.domainCrawler(job.task); + + int countMaxAll = 4000; + int countVisited = (int) (job.visitedCount * 1.1); + int countHash = 100 + job.hashes.size() * 2; + + int maxCount = Math.min(countMaxAll, Math.max(countVisited, countHash)); + + DomainCrawlResults result; + int tokens = Math.max(1,maxCount/1000); + try { + while (!processSem.tryAcquire(tokens)) + Thread.sleep((int)(100 + Math.random() * 100)); + result = dc.crawlToExhaustion(maxCount, AbortMonitor.getInstance()::isAlive); + + if (!AbortMonitor.getInstance().isAlive()) { + break; + } + } + finally { + processSem.release(tokens); + } + + uploadQueue.put(new UploadJob(result, job)); + } + } + } + catch (InterruptedException ex) { + ex.printStackTrace(); + } + + logger.warn("Terminating {}", Thread.currentThread().getName()); + } + + private static void setDomainError(EdgeDomain domain, HttpFetcher.FetchResult probe) { + List links = new ArrayList<>(1); + EdgeDomain alias = null; + if (probe.state == HttpFetcher.FetchResultState.REDIRECT) { + links.add(new EdgeDomainLink(domain, probe.domain)); + alias = probe.domain; + } + uploadFacade.putLinks(links, true); + uploadFacade.updateDomainIndexTimestamp(domain, EdgeDomainIndexingState.ERROR, alias, 1); + } + + @SneakyThrows + public static void uploadThread() { + int count = 0; + long allUrls = 0; + long newUrls = 0; +outer: + while (AbortMonitor.getInstance().isAlive() && (running || !processQueue.isEmpty() || !uploadQueue.isEmpty())) { + + UploadJob job = null; + + while (job == null) { + job = uploadQueue.poll(30, TimeUnit.SECONDS); + if (!AbortMonitor.getInstance().isAlive()) { + break outer; + } + } + + UploadJob data = job; + + + var dc = data.results; + + double avgQuality = UploaderWorker.calculateMedianQuality(dc).orElse(-5.); + + if (uploadFacade.isBlacklisted(dc.domain)) { + continue; + } + + final double linkQualityRating = -5; //(avgQuality + UNKNOWN_SITE_ATTRACTOR)/2 - extLinkPenalty; + + var visits = dc.visits(); + allUrls += visits.size(); + + var newContents = visits; + + /*.stream().filter(visit -> { + var hash = data.job.hashes.get(visit.url); + return (hash == null || !Objects.equals(visit.data_hash_code, hash)); + }).collect(Collectors.toList());*/ + + var goodUrls = newContents.stream().map(EdgeUrlVisit::getUrl).collect(Collectors.toSet()); + + newUrls += newContents.size(); + + uploadFacade.putUrls(dc.extUrl, linkQualityRating); + uploadFacade.putUrls(dc.intUrl, linkQualityRating); + uploadFacade.putUrlVisits(newContents); + uploadFacade.putFeeds(dc.feeds); + + if (avgQuality < UploaderWorker.QUALITY_LOWER_BOUND_CUTOFF) { + uploadFacade.updateDomainIndexTimestamp(dc.domain, EdgeDomainIndexingState.ACTIVE, null, 1); + continue; + } + + uploadFacade.putLinks(dc.links, true); + uploadFacade.putWords(dc.pageContents.values().stream().filter(pc -> goodUrls.contains(pc.url)).collect(Collectors.toList()), 1); + + uploadFacade.updateDomainIndexTimestamp(dc.domain, EdgeDomainIndexingState.ACTIVE, null, Math.max(1, visits.size()/50)); + + logger.info("{} Done - {} : {} : {}", ++count, data.results.domain, allUrls, newUrls); + } + + logger.warn("Terminating {}", Thread.currentThread().getName()); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/StackOverflowLoaderMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/StackOverflowLoaderMain.java new file mode 100644 index 00000000..8173c1a1 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/StackOverflowLoaderMain.java @@ -0,0 +1,96 @@ +package nu.marginalia.wmsa.edge.tools; + +import com.zaxxer.hikari.HikariDataSource; +import nu.marginalia.util.ParallelPipe; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.processor.HtmlFeature; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDaoImpl; +import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient; +import nu.marginalia.wmsa.edge.integration.stackoverflow.StackOverflowPostProcessor; +import nu.marginalia.wmsa.edge.integration.BasicPageUploader; +import nu.marginalia.wmsa.edge.integration.stackoverflow.StackOverflowPostsReader; +import nu.marginalia.wmsa.edge.integration.model.BasicDocumentData; +import nu.marginalia.wmsa.edge.integration.stackoverflow.model.StackOverflowPost; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.mariadb.jdbc.Driver; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.sql.SQLException; +import java.util.EnumSet; + +public class StackOverflowLoaderMain { + public static void main(String[] args) throws InterruptedException { + String site = args[0]; + String file = args[1]; + + if (!Files.exists(Path.of(file))) { + System.err.println("Invalid file " + file); + return; + } + + org.mariadb.jdbc.Driver driver = new Driver(); + + EdgeDomain domain = new EdgeDomain(site); + + var ds = new DatabaseModule().provideConnection(); + + EdgeDataStoreDaoImpl dataStoreDao = new EdgeDataStoreDaoImpl(ds); + EdgeIndexClient indexClient = new EdgeIndexClient(); + + dataStoreDao.putUrl(-2, new EdgeUrl("https", domain, null, "/")); + setDomainToSpecial(ds, domain); + + var lm = new LanguageModels( + Path.of("/var/lib/wmsa/model/ngrams-generous-emstr.bin"), + Path.of("/var/lib/wmsa/model/tfreq-new-algo3.bin"), + Path.of("/var/lib/wmsa/model/opennlp-sentence.bin"), + Path.of("/var/lib/wmsa/model/English.RDR"), + Path.of("/var/lib/wmsa/model/English.DICT"), + Path.of("/var/lib/wmsa/model/opennlp-tok.bin") + ); + + + var documentKeywordExtractor = new DocumentKeywordExtractor(new NGramDict(lm)); + BasicPageUploader uploader = new BasicPageUploader(dataStoreDao, indexClient, + EnumSet.of(HtmlFeature.TRACKING, HtmlFeature.JS)); + ThreadLocal processor = ThreadLocal.withInitial(() -> new StackOverflowPostProcessor(new SentenceExtractor(lm), documentKeywordExtractor)); + + var pipe = new ParallelPipe("pipe", 32, 5, 2) { + @Override + public BasicDocumentData onProcess(StackOverflowPost stackOverflowPost) { + return processor.get().process(stackOverflowPost); + } + + @Override + public void onReceive(BasicDocumentData stackOverflowIndexData) { + uploader.upload(stackOverflowIndexData); + } + }; + + System.out.println(domain); + var reader = new StackOverflowPostsReader(file, domain, pipe::accept); + reader.join(); + pipe.join(); + + ds.close(); + indexClient.close(); + } + + private static void setDomainToSpecial(HikariDataSource ds, EdgeDomain domain) { + try (var conn = ds.getConnection(); var stmt = conn.prepareStatement("UPDATE EC_DOMAIN SET STATE=? WHERE URL_PART=?")) { + stmt.setInt(1, EdgeDomainIndexingState.SPECIAL.code); + stmt.setString(2, domain.toString()); + stmt.executeUpdate(); + } + catch (SQLException ex) { + ex.printStackTrace(); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/TermFrequencyCounterMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/TermFrequencyCounterMain.java new file mode 100644 index 00000000..efdc9fa6 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/TermFrequencyCounterMain.java @@ -0,0 +1,142 @@ +package nu.marginalia.wmsa.edge.tools; + + +import gnu.trove.set.hash.TLongHashSet; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.archive.archiver.ArchiveExtractor; +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.KeywordExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; +import nu.marginalia.wmsa.edge.model.crawl.EdgeRawPageContents; +import opennlp.tools.stemmer.PorterStemmer; +import org.jsoup.Jsoup; + +import java.io.BufferedOutputStream; +import java.io.DataOutputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.file.Path; +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.atomic.AtomicLong; + +public class TermFrequencyCounterMain { + + static LinkedBlockingQueue processQueue = new LinkedBlockingQueue<>(20); + + public static final String OUTPUT_FILE = "/var/lib/wmsa/archive/tfreq-2022-04-04.bin"; + public static final String ARCHIVE_PATH = "/var/lib/wmsa/archive/webpage"; // "/mnt/storage/wmsa/archive/webpage/" + + @SneakyThrows + public static void main(String... args) throws IOException { + + List pt = new ArrayList<>(); + for (int i = 0; i < 20; i++) { + pt.add(new Thread(TermFrequencyCounterMain::processorThread)); + } + pt.forEach(Thread::start); + + AtomicLong docsTotal = new AtomicLong(); + new ArchiveExtractor(Path.of(ARCHIVE_PATH)).forEach( + page -> { + if (page.contentType.contentType.contains("html") + && page.isAfter("2022-03-15T")) { + try { + long dt = docsTotal.incrementAndGet(); + if (dt == 0) { + System.out.println(docsTotal.get() + " - " + termFreq.size()); + } + if ((dt % 5) != 0) { + processQueue.put(page); + } + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + }); + running = false; + + + System.out.println("Waiting for wrap-up"); + + Thread.sleep(36000); + + for (Thread thread : pt) { + thread.interrupt(); + } + for (Thread thread : pt) { + thread.join(); + } + System.out.println("Total documents = " + docsTotal.get()); + + System.out.println("Writing Frequencies"); + + try (var dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(OUTPUT_FILE))) + ) { + synchronized (termFreq) { + for (var entry : termFreq.entrySet()) { + + if (entry.getValue() > 5) { + dos.writeLong(entry.getKey()); + dos.writeLong(entry.getValue()); + } + } + } + } catch (IOException e) { + e.printStackTrace(); + } + + + System.out.println("All done!"); + } + + public static final ConcurrentHashMap termFreq = new ConcurrentHashMap<>(); + + public static final LanguageModels lm = new LanguageModels( + Path.of("/var/lib/wmsa/model/ngrams-generous-emstr.bin"), + Path.of("/var/lib/wmsa/model/tfreq-generous-emstr.bin"), + Path.of("/var/lib/wmsa/model/opennlp-sentence.bin"), + Path.of("/var/lib/wmsa/model/English.RDR"), + Path.of("/var/lib/wmsa/model/English.DICT"), + Path.of("/var/lib/wmsa/model/opennlp-tok.bin") + ); + public static volatile boolean running = true; + + public static void processorThread() { + var ke = new KeywordExtractor(); + var se = new SentenceExtractor(lm); + var ps = new PorterStemmer(); + try { + TLongHashSet words = new TLongHashSet(10000); + while (running || !processQueue.isEmpty()) { + var job = processQueue.take(); + var sentence = se.extractSentences(Jsoup.parse(job.data)); + + for (var sent : sentence.sentences) { + var keywords = ke.getKeywordsFromSentence(sent); + for (int i = 0; i < keywords.length; i++) { + if (keywords[i].size() > 1) { + words.add(NGramDict.longHash(sent.constructStemmedWordFromSpan(keywords[i]).getBytes())); + } + } + + for (String word : sent.wordsLowerCase) { + words.add(NGramDict.longHash(ps.stem(word).getBytes())); + } + + words.forEach(l -> { + termFreq.merge(l, 1, Integer::sum); + return true; + }); + words.clear(); + } + } + } + catch (InterruptedException ex) { + ex.printStackTrace(); + } + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/ZimConverterMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/ZimConverterMain.java new file mode 100644 index 00000000..10ac9042 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/tools/ZimConverterMain.java @@ -0,0 +1,211 @@ +package nu.marginalia.wmsa.edge.tools; + +import lombok.AllArgsConstructor; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.archive.client.ArchiveClient; +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.assistant.dict.WikiCleaner; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import org.jsoup.Jsoup; +import org.openzim.ZIMTypes.ZIMFile; +import org.openzim.ZIMTypes.ZIMReader; + +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.PrintWriter; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.LinkedBlockingQueue; + +public class ZimConverterMain { + + static LinkedBlockingQueue jobQueue = new LinkedBlockingQueue<>(100); + static LinkedBlockingQueue analysisQueue = new LinkedBlockingQueue<>(100); + static boolean hasData = true; + static ArchiveClient archiveClient = new ArchiveClient(); + static NGramDict dict = new NGramDict(new LanguageModels( + Path.of("/var/lib/wmsa/model/ngrams-generous-emstr.bin"), + Path.of("/var/lib/wmsa/model/tfreq-generous-emstr.bin"), + Path.of("/var/lib/wmsa/model/opennlp-sentence.bin"), + Path.of("/var/lib/wmsa/model/English.RDR"), + Path.of("/var/lib/wmsa/model/English.DICT"), + Path.of("/var/lib/wmsa/model/opennlp-tok.bin") + ) + ); + public void extractUrlList() throws IOException { + var zr = new ZIMReader(new ZIMFile("/home/vlofgren/Work/wikipedia_en_all_nopic_2021-01.zim")); + + var urlList = zr.getURLListByURL(); + + try (PrintWriter pw = new PrintWriter(new FileOutputStream("/home/vlofgren/Work/wikiTitlesAndRedirects.sql"))) { + zr.forEachTitles( + ae -> { + pw.printf("INSERT INTO REF_WIKI_TITLE(NAME) VALUES (\"%s\");\n", ae.getUrl().replace("\\", "\\\\").replace("\"", "\\\"")); + }, + re -> { + pw.printf("INSERT INTO REF_WIKI_TITLE(NAME, REF_NAME) VALUES (\"%s\",\"%s\");\n", re.getUrl().replace("\\", "\\\\").replace("\"", "\\\""), urlList.get(re.getRedirectIndex()).replace("\\", "\\\\").replace("\"", "\\\"")); + } + ); + } + } + + public static void main(String[] args) throws IOException { +// convertJust("Aleph_number"); +// convertJust("Floyd–Steinberg_dithering"); +// convertJust("Laplace's_equation"); +// convertJust("John_Fahey"); +// convertJust("Plotinus"); +// convertJust("C++"); + convertAll(args); + archiveClient.close(); + } + + @SneakyThrows + private static void convertJust(String url) { + String newData = new WikiCleaner().cleanWikiJunk("https://en.wikipedia.org/wiki/" + url, + Files.readString(Path.of("/home/vlofgren/Work/wiki-convert/", "in-" + url + ".html"))); + Files.writeString(Path.of("/home/vlofgren/Work/wiki-convert/", "out-" + url + ".html"), newData); + } + + private static void extractOne(String which, int clusterId) throws IOException { +// var zr = new ZIMReader(new ZIMFile(args[1])); + var zr = new ZIMReader(new ZIMFile("/home/vlofgren/Work/wikipedia_en_all_nopic_2021-01.zim")); + + int[] cluster = new int[] { clusterId }; + if (clusterId == -1) { + zr.forEachTitles(ae -> { + if (ae.getUrl().equals(which)) { + System.err.print(ae.getUrl() + " " + ae.getClusterNumber()); + cluster[0] = ae.getClusterNumber(); + } + }, re -> { + }); + } + + System.err.println("Extracting cluster " + cluster[0] ); + if (cluster[0] == -1) { + return; + } + zr.forEachArticles((url, art) -> { + if (art != null) { + if (which.equals(url)) { + try { + Files.writeString(Path.of("/home/vlofgren/Work/wiki-convert/","in-" + url + ".html"), art); + String newData = new WikiCleaner().cleanWikiJunk("https://en.wikipedia.org/wiki/" + url, art); + Files.writeString(Path.of("/home/vlofgren/Work/wiki-convert/", "out-" + url + ".html"), newData); + } catch (IOException e) { + e.printStackTrace(); + } + + } + scheduleJob(url, art); + } + }, p -> p == cluster[0]); + + } + + private static void convertAll(String[] args) throws IOException { + archiveClient.setServiceRoute("127.0.0.1", Integer.parseInt(args[0])); + var zr = new ZIMReader(new ZIMFile(args[1])); +// var zr = new ZIMReader(new ZIMFile("/home/vlofgren/Work/wikipedia_en_all_nopic_2021-01.zim")); + + for (int i = 0; i < 8; i++) { + Thread t = new Thread(ZimConverterMain::jobExecutor); + t.setName("Converter"); + t.start(); + + Thread t2 = new Thread(() -> { + for (; ; ) { + String pt; + try { + pt = analysisQueue.take(); + } catch (InterruptedException e) { + e.printStackTrace(); + return; + } +// var topic = new TopicWordExtractor().extractWords(pt); +// var words = new NGramTextRankExtractor(dict, topic).extractWords(Collections.emptyList(), pt); +// System.out.println(Strings.join(words, ',')); + } + }); + t2.setName("Analysis"); + t2.start(); + } + + zr.forEachArticles((url, art) -> { + if (art != null) { + scheduleJob(url, art); + } + }, p -> true); + + hasData = false; + archiveClient.close(); + } + + @SneakyThrows + private static void jobExecutor() { + while (hasData || !jobQueue.isEmpty()) { + var job = jobQueue.take(); + try { + job.convert(); + } + catch (Exception ex) { + System.err.println("Error in " + job.url); + ex.printStackTrace(); + } + } + } + + @SneakyThrows + private static void scheduleJob(String url, String art) { + jobQueue.put(new ConversionJob(art, url)); + } + + static Map wordCount = new ConcurrentHashMap<>(); + static boolean isKeyword(String word) { + + int limit = 100_000; + long n = word.chars().filter(c -> c=='_').count(); + if (n == 0) limit = 2; + if (n == 1) limit = 1; + if (n == 2) limit = 1; + if (n >= 3) limit = 1; + + long c = word.chars().filter(ch -> ch >= 'a' && ch <= 'z').count(); + if (c-2 <= n) { + return false; + } + int hashA = word.hashCode(); + int hashB = Objects.hash(n, c, word.length(), word.charAt(0)); + long hash = (long) hashA + ((long) hashB << 32); + + return wordCount.compute(hash, (k, v) -> v == null ? 1 : v+1) == limit; + } + @AllArgsConstructor + private static class ConversionJob { + private final String data; + private final String url; + + + public void convert() throws IOException, InterruptedException { + var page = new WikiCleaner().cleanWikiJunk("https://en.wikipedia.org/wiki/" + url, data); + String pt = Jsoup.parse(page).text(); + analysisQueue.put(pt); + + /* + + String newData = new WikiCleaner().cleanWikiJunk("https://en.wikipedia.org/wiki/" + url, data); + + + if (null != newData) { + archiveClient.submitWiki(Context.internal(), url, newData) + .retry(5) + .blockingSubscribe(); + + }*/ + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/Memex.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/Memex.java new file mode 100644 index 00000000..41959971 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/Memex.java @@ -0,0 +1,244 @@ +package nu.marginalia.wmsa.memex; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import com.google.inject.name.Named; +import io.reactivex.rxjava3.schedulers.Schedulers; +import nu.marginalia.gemini.GeminiService; +import nu.marginalia.gemini.gmi.GemtextDatabase; +import nu.marginalia.util.graphics.dithering.FloydSteinbergDither; +import nu.marginalia.util.graphics.dithering.Palettes; +import nu.marginalia.gemini.gmi.GemtextDocument; +import nu.marginalia.wmsa.memex.change.GemtextTombstoneUpdateCaclulator; +import nu.marginalia.wmsa.memex.model.MemexImage; +import nu.marginalia.wmsa.memex.model.MemexNode; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import nu.marginalia.wmsa.memex.renderer.MemexRendererers; +import nu.marginalia.wmsa.memex.system.MemexFileSystemMonitor; +import nu.marginalia.wmsa.memex.system.MemexFileWriter; +import nu.marginalia.wmsa.memex.system.MemexGitRepo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; +import javax.imageio.ImageIO; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.List; +import java.util.Objects; +import java.util.concurrent.TimeUnit; + +@Singleton +public class Memex { + + private final MemexData data; + private final MemexFileSystemMonitor monitor; + private final MemexGitRepo gitRepo; + private final MemexLoader loader; + + private final MemexFileWriter resources; + private final GemtextTombstoneUpdateCaclulator tombstoneUpdateCaclulator; + + private final FloydSteinbergDither ditherer = new FloydSteinbergDither(Palettes.MARGINALIA_PALETTE, 640, 480); + private final MemexRendererers renderers; + + private static final Logger logger = LoggerFactory.getLogger(Memex.class); + + @Inject + public Memex(MemexData data, + @Nullable MemexFileSystemMonitor monitor, + MemexGitRepo gitRepo, MemexLoader loader, + @Named("html") MemexFileWriter htmlFiles, + GemtextTombstoneUpdateCaclulator tombstoneUpdateCaclulator, + MemexRendererers renderers, + GeminiService geminiService) throws IOException { + this.data = data; + this.monitor = monitor; + this.gitRepo = gitRepo; + this.loader = loader; + this.resources = htmlFiles; + this.tombstoneUpdateCaclulator = tombstoneUpdateCaclulator; + this.renderers = renderers; + + Schedulers.io().scheduleDirect(this::load); + if (monitor != null) { + Schedulers.io().schedulePeriodicallyDirect(this::refreshUpdatedUrls, 1, 1, TimeUnit.SECONDS); + } + + Schedulers.newThread().scheduleDirect(geminiService::run); + } + + private void refreshUpdatedUrls() { + var updatedUrls = monitor.getUpdatedUrls(); + for (var url : updatedUrls) { + try { + if (url.toString().endsWith(".gmi")) { + var updates = loader.reloadNode(url); + updates.forEach(renderers::render); + + if (!updates.isEmpty()) { + renderers.render(url.getParentUrl()); + } + } else if (url.toString().endsWith(".png")) { + var updates = loader.reloadImage(url); + renderers.render(url); + + if (!updates.isEmpty()) { + renderers.render(url.getParentUrl()); + } + } + + if (tombstoneUpdateCaclulator.isTombstoneFile(url)) { + loader.loadTombstones().forEach(renderers::render); + } + if (tombstoneUpdateCaclulator.isRedirectFile(url)) { + loader.loadRedirects().forEach(renderers::render); + } + } + catch (Exception ex) { + logger.error("Failed to refresh URL " + url, ex); + } + } + } + + private void load() { + copyStylesheet(); + + try { + loader.load(); + renderAll(); + } + catch (IOException ex) { + logger.error("Failed to load", ex); + } + } + + private void copyStylesheet() { + try (var resource = Objects.requireNonNull( + ClassLoader.getSystemResourceAsStream("static/memex/style-new.css"), "Could not load stylesheet")) { + resources.write(new MemexNodeUrl("/style-new.css"), resource.readAllBytes()); + } + catch (Exception ex) { + logger.error("Failed to copy stylesheet", ex); + } + + try (var resource = Objects.requireNonNull( + ClassLoader.getSystemResourceAsStream("static/memex/ico/dir.png"), "Could not copy file")) { + resources.write(new MemexNodeUrl("/ico/dir.png"), resource.readAllBytes()); + } + catch (Exception ex) { + logger.error("Failed to copy file", ex); + } + + + try (var resource = Objects.requireNonNull( + ClassLoader.getSystemResourceAsStream("static/memex/ico/file.png"), "Could not copy file")) { + resources.write(new MemexNodeUrl("/ico/file.png"), resource.readAllBytes()); + } + catch (Exception ex) { + logger.error("Failed to copy file", ex); + } + + + try (var resource = Objects.requireNonNull( + ClassLoader.getSystemResourceAsStream("static/memex/ico/root.png"), "Could not copy file")) { + resources.write(new MemexNodeUrl("/ico/root.png"), resource.readAllBytes()); + } + catch (Exception ex) { + logger.error("Failed to copy file", ex); + } + + try (var resource = Objects.requireNonNull( + ClassLoader.getSystemResourceAsStream("static/memex/ico/pic16.png"), "Could not copy file")) { + resources.write(new MemexNodeUrl("/ico/pic16.png"), resource.readAllBytes()); + } + catch (Exception ex) { + logger.error("Failed to copy file", ex); + } + } + + private void renderAll() { + data.forEach((url, doc) -> { + renderers.render(url); + }); + data.getDirectories().forEach(renderers::render); + data.getImages().forEach(img -> renderers.render(img.path)); + + data.getTombstones().ifPresent(this::renderTombstoneFromGemtextDb); + data.getRedirects().ifPresent(this::renderTombstoneFromGemtextDb); + } + + + private void renderTombstoneFromGemtextDb(GemtextDatabase db) { + db.keys() + .stream() + .map(MemexNodeUrl::new) + .filter(url -> getDocument(url) == null) + .forEach(renderers::render); + } + + public void updateNode(MemexNodeUrl node, String text) throws IOException { + var nodes = loader.updateNode(node, text); + + nodes.forEach(renderers::render); + + renderers.render(node.getParentUrl()); + } + + public GemtextDocument getDocument(MemexNodeUrl url) { + return data.getDocument(url); + } + public MemexImage getImage(MemexNodeUrl url) { + return data.getImage(url); + } + + + public void createNode(MemexNodeUrl node, String text) throws IOException { + var nodes = loader.createNode(node, text); + + nodes.forEach(renderers::render); + + renderers.render(node.getParentUrl()); + } + + + public void uploadImage(MemexNodeUrl url, byte[] bytes) throws IOException { + + var image = ImageIO.read(new ByteArrayInputStream(bytes)); + var convertedImage = ditherer.convert(image); + var baosOut = new ByteArrayOutputStream(); + ImageIO.write(convertedImage, "png", baosOut); + + loader.uploadImage(url, baosOut.toByteArray()); + + renderers.render(url); + renderers.render(url.getParentUrl()); + } + + public void delete(MemexNode node, String message) throws IOException { + tombstoneUpdateCaclulator.addTombstone(node.getUrl(), message) + .visit(this); + loader.loadTombstones(); + loader.delete(node).forEach(renderers::render); + } + + public List getDocumentsByPath(MemexNodeUrl url) { + return data.getDocumentsByPath(url); + } + + public void gitPull() { + gitRepo.pull(); + } + + public void rename(MemexNode src, MemexNodeUrl dst) throws IOException { + tombstoneUpdateCaclulator.addRedirect(src.getUrl(), dst.toString()) + .visit(this); + loader.loadRedirects(); + loader.rename(src, dst).forEach(renderers::render); + } + + public byte[] getRaw(MemexNodeUrl url) throws IOException { + return loader.getRaw(url); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexConfigurationModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexConfigurationModule.java new file mode 100644 index 00000000..676ebc05 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexConfigurationModule.java @@ -0,0 +1,51 @@ +package nu.marginalia.wmsa.memex; + +import com.google.inject.AbstractModule; +import com.google.inject.Inject; +import com.google.inject.Provider; +import com.google.inject.name.Named; +import com.google.inject.name.Names; +import nu.marginalia.wmsa.memex.system.MemexFileWriter; + +import java.nio.file.Path; + +public class MemexConfigurationModule extends AbstractModule { + public void configure() { + bind(Path.class).annotatedWith(Names.named("memex-root")).toInstance(Path.of("/var/lib/wmsa/memex")); + bind(Path.class).annotatedWith(Names.named("memex-html-resources")).toInstance(Path.of("/var/lib/wmsa/memex-html")); + bind(Path.class).annotatedWith(Names.named("memex-gmi-resources")).toInstance(Path.of("/var/lib/wmsa/memex-gmi")); + bind(String.class).annotatedWith(Names.named("tombestone-special-file")).toInstance("/special/tombstone.gmi"); + bind(String.class).annotatedWith(Names.named("redirects-special-file")).toInstance("/special/redirect.gmi"); + + bind(MemexFileWriter.class).annotatedWith(Names.named("html")).toProvider(MemexHtmlWriterProvider.class); + bind(MemexFileWriter.class).annotatedWith(Names.named("gmi")).toProvider(MemexGmiWriterProvider.class); + } + + + + public static class MemexHtmlWriterProvider implements Provider { + private final Path path; + + @Inject + public MemexHtmlWriterProvider(@Named("memex-html-resources") Path resources) { + this.path = resources; + } + @Override + public MemexFileWriter get() { + return new MemexFileWriter(path); + } + } + + public static class MemexGmiWriterProvider implements Provider { + private final Path path; + + @Inject + public MemexGmiWriterProvider(@Named("memex-gmi-resources") Path resources) { + this.path = resources; + } + @Override + public MemexFileWriter get() { + return new MemexFileWriter(path); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexData.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexData.java new file mode 100644 index 00000000..22c20f8f --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexData.java @@ -0,0 +1,150 @@ +package nu.marginalia.wmsa.memex; + +import com.google.inject.Singleton; +import nu.marginalia.gemini.gmi.GemtextDatabase; +import nu.marginalia.gemini.gmi.GemtextDocument; +import nu.marginalia.wmsa.memex.model.MemexLink; +import nu.marginalia.wmsa.memex.model.MemexImage; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import nu.marginalia.wmsa.memex.model.fs.MemexFileSystem; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; +import java.util.function.BiConsumer; + +@Singleton +public class MemexData { + private final MemexLinks links = new MemexLinks(); + private final Map documents = new HashMap<>(); + + private final Map images = new HashMap<>(); + private final MemexFileSystem fileSystem = new MemexFileSystem(); + private final Logger logger = LoggerFactory.getLogger(getClass()); + private GemtextDatabase tombstones = null; + private GemtextDatabase redirects = null; + + public synchronized Collection getImages() { + return new ArrayList<>(images.values()); + } + public synchronized Collection getDocuments() { return new ArrayList<>(documents.values()); } + + public synchronized void setTombstones(GemtextDatabase tombstones) { + this.tombstones = tombstones; + } + public synchronized void setRedirects(GemtextDatabase redirects) { + this.redirects = redirects; + } + + public synchronized void addDocument(MemexNodeUrl url, GemtextDocument doc) { + logger.debug("addDocument({})", url); + documents.put(url, doc); + fileSystem.register(doc); + } + + public synchronized void addImage(MemexNodeUrl url, MemexImage img) { + images.put(url, img); + fileSystem.register(img); + } + + public Optional getTombstones() { + return Optional.ofNullable(tombstones); + } + public Optional getRedirects() { + return Optional.ofNullable(redirects); + } + + public synchronized void updateOutlinks(MemexNodeUrl url, GemtextDocument doc) { + + var linksForNode = new TreeSet<>(Comparator.comparing(MemexLink::getDest)); + + MemexNodeUrl srcUrl = "index.gmi".equals(url.getFilename()) ? url.getParentUrl() : url; + + for (var link : doc.getLinks()) { + link.getUrl().visitNodeUrl(nodeUrl -> + linksForNode.add(new MemexLink(nodeUrl, srcUrl, doc.getTitle(), doc.getHeadingForElement(link), link.getHeading())) + ); + } + + links.setOutlinks(srcUrl, linksForNode); + } + + public synchronized Set getNeighbors(MemexNodeUrl url) { + return links.getNeighbors(url); + } + + public synchronized void forEach(BiConsumer consumer) { + documents.forEach(consumer); + } + + public synchronized GemtextDocument getDocument(MemexNodeUrl url) { + return documents.get(url); + } + + public synchronized MemexImage getImage(MemexNodeUrl url) { + return images.get(url); + } + public synchronized List getBacklinks(MemexNodeUrl... urls) { + return links.getBacklinks(urls); + } + + public synchronized List getDocumentsByPath(MemexNodeUrl url) { + return fileSystem.getDocuments(url); + } + public synchronized List getImagesByPath(MemexNodeUrl url) { + return fileSystem.getImages(url); + } + public synchronized List getSubdirsByPath(MemexNodeUrl url) { + return fileSystem.getSubdirs(url); + } + + public MemexFileSystem getFilesystem() { + return fileSystem; + } + + public List getDirectories() { + return fileSystem.getAllDirectories(); + } + public boolean isDirectory(MemexNodeUrl url) { + return fileSystem.isDirectory(url); + } + + public synchronized Set deleteImage(MemexNodeUrl url) { + images.remove(url); + fileSystem.remove(url); + + Set affectedUrls = new HashSet<>(); + + affectedUrls.add(url); + affectedUrls.add(url.getParentUrl()); + + return affectedUrls; + } + + public synchronized Set deleteDocument(MemexNodeUrl url) { + Set affectedUrls = new HashSet<>(); + + affectedUrls.add(url); + affectedUrls.add(url.getParentUrl()); + + links.getOutlinks(url) + .stream() + .map(MemexLink::getDest) + .forEach(affectedUrls::add); + + documents.remove(url); + fileSystem.remove(url); + + links.remove(url); + + return affectedUrls; + } + + public boolean hasTombstone(MemexNodeUrl url) { + if (tombstones != null && tombstones.getLinkData(url).isPresent()) + return true; + if (redirects != null && redirects.getLinkData(url).isPresent()) + return true; + return false; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexLinks.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexLinks.java new file mode 100644 index 00000000..8d491494 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexLinks.java @@ -0,0 +1,54 @@ +package nu.marginalia.wmsa.memex; + +import nu.marginalia.wmsa.memex.model.MemexLink; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; + +import java.util.*; +import java.util.stream.Collectors; + +public class MemexLinks { + private Map> backLinks = new HashMap<>(); + private final Map> links = new HashMap<>(); + + public void updateBacklinks() { + backLinks.clear(); + backLinks = links.values().stream() + .flatMap(Set::stream) + .collect(Collectors.groupingBy(MemexLink::getDest)); + } + + public Set getNeighbors(MemexNodeUrl url) { + final Set neighbors = new HashSet<>(); + + links.getOrDefault(url, Collections.emptySet()).stream().map(MemexLink::getDest) + .forEach(neighbors::add); + backLinks.getOrDefault(url, Collections.emptyList()).stream() + .map(MemexLink::getSrc) + .forEach(neighbors::add); + + return neighbors; + } + + public void setOutlinks(MemexNodeUrl url, TreeSet linksForNode) { + links.put(url, linksForNode); + updateBacklinks(); + } + + public List getBacklinks(MemexNodeUrl... urls) { + return Arrays.stream(urls) + .map(backLinks::get) + .filter(Objects::nonNull) + .flatMap(List::stream) + .sorted(Comparator.comparing(MemexLink::getSrc)) + .collect(Collectors.toList()); + } + + public Set getOutlinks(MemexNodeUrl url) { + return links.getOrDefault(url, Collections.emptySet()); + } + + public void remove(MemexNodeUrl url) { + links.remove(url); + updateBacklinks(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexLoader.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexLoader.java new file mode 100644 index 00000000..f5f6b29b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexLoader.java @@ -0,0 +1,265 @@ +package nu.marginalia.wmsa.memex; + +import com.google.common.collect.Sets; +import com.google.inject.Inject; +import com.google.inject.name.Named; +import nu.marginalia.gemini.gmi.GemtextDatabase; +import nu.marginalia.gemini.gmi.GemtextDocument; +import nu.marginalia.wmsa.memex.model.MemexImage; +import nu.marginalia.wmsa.memex.model.MemexNode; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import nu.marginalia.wmsa.memex.system.MemexFileSystemModifiedTimes; +import nu.marginalia.wmsa.memex.system.MemexSourceFileSystem; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.CheckReturnValue; +import java.io.File; +import java.io.IOException; +import java.nio.file.*; +import java.util.*; + +public class MemexLoader { + private final MemexData data; + private final MemexFileSystemModifiedTimes modifiedTimes; + private final Path root; + private final MemexSourceFileSystem sourceFileSystem; + + private final String tombstonePath; + private final String redirectsPath; + + private static final Logger logger = LoggerFactory.getLogger(MemexLoader.class); + + @Inject + public MemexLoader(MemexData data, + MemexFileSystemModifiedTimes modifiedTimes, + MemexSourceFileSystem sourceFileSystem, + @Named("memex-root") Path root, + @Named("tombestone-special-file") String tombstonePath, + @Named("redirects-special-file") String redirectsPath) { + + this.data = data; + this.modifiedTimes = modifiedTimes; + this.sourceFileSystem = sourceFileSystem; + this.root = root; + this.tombstonePath = tombstonePath; + this.redirectsPath = redirectsPath; + } + + + public void load() throws IOException { + + loadTombstones(); + loadRedirects(); + + try (var files = Files.walk(root)) { + files.forEach(this::loadFile); + } + + data.getFilesystem().recalculateDirectories(); + + } + + private void loadFile(Path p) { + var file = p.toFile(); + + try { + if (p.toString().contains(".git")) { + return; + } + if (file.isDirectory() && !file.getName().startsWith(".")) { + data.getFilesystem().registerDir(MemexNodeUrl.ofRelativePath(root, p)); + } else if (isGemtext(file)) { + loadNode(p); + } else if (isImage(file)) { + loadImage(p); + } + } + catch (IOException ex) { + logger.error("Failed to load file " + p, ex); + } + } + + public void loadImage(Path p) throws IOException { + if (!modifiedTimes.isFreshUpdate(p)) { + return; + } + + var url = MemexNodeUrl.ofRelativePath(root, p); + data.addImage(url, new MemexImage(url, p)); + logger.info("Loading {}", p); + } + + public Set loadTombstones() { + var oldValues = data.getTombstones(); + var newValues = loadGemtextDb(Path.of(root + tombstonePath)); + + newValues.ifPresent(data::setTombstones); + + + if (newValues.isPresent()) { + if (oldValues.isPresent()) { + var oldTs = oldValues.get(); + var newTs = newValues.get(); + return oldTs.difference(newTs); + } + } + + return Collections.emptySet(); + } + + public Set loadRedirects() { + var oldValues = data.getTombstones(); + var newValues = loadGemtextDb(Path.of(root + redirectsPath)); + + newValues.ifPresent(data::setRedirects); + + if (newValues.isPresent()) { + if (oldValues.isPresent()) { + var oldTs = oldValues.get(); + var newTs = newValues.get(); + return oldTs.difference(newTs); + } + } + + return Collections.emptySet(); + } + + private Optional loadGemtextDb(Path p) { + if (Files.exists(p)) { + try { + return Optional.of(GemtextDatabase.of(MemexNodeUrl.ofRelativePath(root, p), p)); + } catch (IOException e) { + logger.error("Failed to load database " + p, e); + } + } + return Optional.empty(); + } + + private boolean isGemtext(File f) { + return f.isFile() && f.getName().endsWith(".gmi"); + } + + private boolean isImage(File f) { + return f.isFile() && f.getName().endsWith(".png"); + } + + @CheckReturnValue + public Collection updateNode(MemexNodeUrl url, String contents) throws IOException { + sourceFileSystem.replaceFile(url, contents); + return loadNode(url); + } + + @CheckReturnValue + public Collection createNode(MemexNodeUrl url, String contents) throws IOException { + sourceFileSystem.createFile(url, contents); + return loadNode(url); + } + + + public MemexImage uploadImage(MemexNodeUrl url, byte[] bytes) throws IOException { + sourceFileSystem.createFile(url, bytes); + + var img = new MemexImage(url, url.asAbsolutePath(root)); + data.addImage(url, img); + return img; + } + + + public Set reloadImage(MemexNodeUrl url) throws IOException { + var path = url.asAbsolutePath(root); + if (!Files.exists(path)) { + return data.deleteImage(url); + } + else { + loadImage(path); + Set affectedUrls = new HashSet<>(); + affectedUrls.add(url); + + for (var u = url.getParentUrl(); u != null; u = u.getParentUrl()) { + affectedUrls.add(u); + } + + return affectedUrls; + } + } + + public Set reloadNode(MemexNodeUrl url) throws IOException { + var path = url.asAbsolutePath(root); + if (!Files.exists(path)) { + return data.deleteDocument(url); + } + else { + return loadNode(path); + } + } + + public Set loadNode(Path path) throws IOException { + + if (!modifiedTimes.isFreshUpdate(path)) { + return Set.of(MemexNodeUrl.ofRelativePath(root, path)); + } + + logger.info("Loading {}", path); + + return loadNode(MemexNodeUrl.ofRelativePath(root, path)); + } + + public Set loadNode(MemexNodeUrl url) throws IOException { + + var doc = GemtextDocument.of(url, url.asAbsolutePath(root)); + + data.addDocument(url, doc); + + Set urlsAffected = data.getNeighbors(url); + + data.updateOutlinks(url, doc); + + urlsAffected.addAll(data.getNeighbors(url)); + urlsAffected.add(url); + urlsAffected.removeIf(u -> null == data.getDocument(u)); + + for (var u = url.getParentUrl(); u != null; u = u.getParentUrl()) { + urlsAffected.add(u); + } + + return urlsAffected; + } + + public Set delete(MemexNode node) throws IOException { + sourceFileSystem.delete(node.getUrl()); + return node.visit(new MemexNode.MemexNodeVisitor<>() { + @Override + public Set onDocument(MemexNodeUrl url) { + return data.deleteDocument(url); + } + + @Override + public Set onImage(MemexNodeUrl url) { + return data.deleteImage(url); + } + }); + } + + public Set rename(MemexNode src, MemexNodeUrl dst) throws IOException { + sourceFileSystem.renameFile(src.getUrl(), dst); + return src.visit(new MemexNode.MemexNodeVisitor>() { + @Override + public Set onDocument(MemexNodeUrl url) throws IOException { + var changes = data.deleteDocument(url); + return Sets.union(changes, reloadNode(dst)); + } + + @Override + public Set onImage(MemexNodeUrl url) throws IOException { + var changes = data.deleteImage(url); + return Sets.union(changes, reloadImage(dst)); + } + }); + + } + + public byte[] getRaw(MemexNodeUrl url) throws IOException { + return sourceFileSystem.getRaw(url); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexMain.java new file mode 100644 index 00000000..9ea88d2e --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexMain.java @@ -0,0 +1,32 @@ +package nu.marginalia.wmsa.memex; + +import com.google.inject.Guice; +import com.google.inject.Inject; +import com.google.inject.Injector; +import nu.marginalia.gemini.GeminiConfigurationModule; +import nu.marginalia.wmsa.configuration.MainClass; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.module.ConfigurationModule; +import nu.marginalia.wmsa.configuration.server.Initialization; + +import java.io.IOException; + +public class MemexMain extends MainClass { + private MemexService service; + + @Inject + public MemexMain(MemexService service) { + this.service = service; + } + + public static void main(String... args) { + init(ServiceDescriptor.EDGE_MEMEX, args); + + Injector injector = Guice.createInjector( + new MemexConfigurationModule(), + new GeminiConfigurationModule(), + new ConfigurationModule()); + injector.getInstance(MemexMain.class); + injector.getInstance(Initialization.class).setReady(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexService.java new file mode 100644 index 00000000..4d22f1af --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexService.java @@ -0,0 +1,280 @@ +package nu.marginalia.wmsa.memex; + +import com.google.inject.Inject; +import com.google.inject.name.Named; +import lombok.SneakyThrows; +import nu.marginalia.gemini.gmi.renderer.GemtextRendererFactory; +import nu.marginalia.wmsa.auth.client.AuthClient; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.configuration.server.MetricsServer; +import nu.marginalia.wmsa.configuration.server.Service; +import nu.marginalia.wmsa.memex.change.GemtextMutation; +import nu.marginalia.gemini.gmi.GemtextDocument; +import nu.marginalia.wmsa.memex.change.update.GemtextDocumentUpdateCalculator; +import nu.marginalia.wmsa.memex.renderer.MemexHtmlRenderer; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import nu.marginalia.wmsa.memex.model.render.*; +import org.apache.http.HttpStatus; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Request; +import spark.Response; +import spark.Spark; + +import javax.servlet.MultipartConfigElement; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Collections; +import java.util.Objects; + +import static spark.Spark.*; + +public class MemexService extends Service { + private final GemtextDocumentUpdateCalculator updateCalculator; + private final Memex memex; + private final MemexHtmlRenderer renderer; + private final AuthClient authClient; + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + @Inject + public MemexService(@Named("service-host") String ip, + @Named("service-port") Integer port, + GemtextDocumentUpdateCalculator updateCalculator, + Memex memex, + MemexHtmlRenderer renderer, + AuthClient authClient, + Initialization initialization, + MetricsServer metricsServer) { + + super(ip, port, initialization, metricsServer); + + this.updateCalculator = updateCalculator; + this.memex = memex; + this.renderer = renderer; + this.authClient = authClient; + + Spark.get("git-pull", this::gitPull); + + Spark.path("public/api", () -> { + before((req, rsp) -> { + logger.info("{} {}", req.requestMethod(), req.pathInfo()); + }); + + post("/create", this::create); + get("/create", this::createForm, this::renderModel); + post("/upload", this::upload); + get("/upload", this::uploadForm, this::renderModel); + post("/update", this::update); + get("/update", this::updateForm, this::renderModel); + post("/rename", this::rename); + get("/rename", this::renameForm, this::renderModel); + post("/delete", this::delete); + get("/delete", this::deleteForm, this::renderModel); + + get("/raw", this::raw); + }); + } + + private Object raw(Request request, Response response) throws IOException { + final MemexNodeUrl url = new MemexNodeUrl(Objects.requireNonNull(request.queryParams("url"))); + + response.type(url.toNode().getType().mime); + response.header("Content-Disposition", "attachment; filename=" + url.getFilename()); + response.raw().getOutputStream().write(memex.getRaw(url)); + + return ""; + } + + private Object renameForm(Request request, Response response) { + final String type = Objects.requireNonNull(request.queryParams("type")); + final MemexNodeUrl url = new MemexNodeUrl(Objects.requireNonNull(request.queryParams("url"))); + + authClient.redirectToLoginIfUnauthenticated("MEMEX", request, response); + + if ("gmi".equals(type)) { + var doc = memex.getDocument(url); + if (null == doc) { + Spark.halt(404); + } + + final String docHtml = doc.render(new GemtextRendererFactory("", url.toString()).htmlRendererEditable()); + return new MemexRendererRenameFormModel(docHtml, + null, url, "gmi"); + } + else if ("img".equals(type)) { + var img = memex.getImage(url); + if (null == img) { + Spark.halt(404); + } + return new MemexRendererRenameFormModel(null, + new MemexRendererImageModel(img, Collections.emptyList(), null), + url, "img"); + } + + Spark.halt(HttpStatus.SC_BAD_REQUEST); + return null; + } + + private Object rename(Request request, Response response) throws IOException { + authClient.redirectToLoginIfUnauthenticated("MEMEX", request, response); + + var url = Objects.requireNonNull(request.queryParams("url")); + var name = Objects.requireNonNull(request.queryParams("name")); + var type = Objects.requireNonNull(request.queryParams("type")); + var confirm = Objects.requireNonNull(request.queryParams("confirm")); + + if (!"on".equals(confirm)) { + logger.error("Confirm dialog not checked, was {}", confirm); + Spark.halt(HttpStatus.SC_BAD_REQUEST, "Confirm was not checked"); + } + + memex.rename(new MemexNodeUrl(url).toNode(), new MemexNodeUrl(name)); + + response.redirect("https://memex.marginalia.nu/"+name); + return null; + + } + + private Object gitPull(Request request, Response response) { + logger.info("Git pull by request"); + memex.gitPull(); + return "Ok"; + } + + private String renderModel(Object model) { + return ((MemexRendererableDirect)model).render(renderer); + } + + private MemexRendererDeleteFormModel deleteForm(Request request, Response response) { + final String type = Objects.requireNonNull(request.queryParams("type")); + final MemexNodeUrl url = new MemexNodeUrl(Objects.requireNonNull(request.queryParams("url"))); + + authClient.redirectToLoginIfUnauthenticated("MEMEX", request, response); + + if ("gmi".equals(type)) { + var doc = memex.getDocument(url); + if (null == doc) { + Spark.halt(404); + } + + final String docHtml = doc.render(new GemtextRendererFactory("", url.toString()).htmlRendererEditable()); + return new MemexRendererDeleteFormModel(docHtml, + null, url, "gmi"); + } + else if ("img".equals(type)) { + var img = memex.getImage(url); + if (null == img) { + Spark.halt(404); + } + return new MemexRendererDeleteFormModel(null, + new MemexRendererImageModel(img, Collections.emptyList(), null), + url, "img"); + } + + Spark.halt(HttpStatus.SC_BAD_REQUEST); + return null; + } + + private Object delete(Request request, Response response) throws IOException { + authClient.requireLogIn(Context.fromRequest(request)); + + var url = Objects.requireNonNull(request.queryParams("url")); + var message = Objects.requireNonNull(request.queryParams("note")); + var type = Objects.requireNonNull(request.queryParams("type")); + var confirm = Objects.requireNonNull(request.queryParams("confirm")); + + if (!"on".equals(confirm)) { + logger.error("Confirm dialog not checked, was {}", confirm); + Spark.halt(HttpStatus.SC_BAD_REQUEST, "Confirm was not checked"); + } + + memex.delete(new MemexNodeUrl(url).toNode(), message); + + response.redirect("https://memex.marginalia.nu/"+url); + return null; + } + + private Object update(Request request, Response response) throws IOException { + authClient.requireLogIn(Context.fromRequest(request)); + + String extUrl = Objects.requireNonNull(request.queryParams("url")); + String extSection = Objects.requireNonNull(request.queryParams("section")); + String newSectionText = Objects.requireNonNull(request.queryParams("text")); + + var url = new MemexNodeUrl(extUrl); + var section = MemexNodeHeadingId.parse(extSection); + var lines = Arrays.asList(newSectionText.split("\r?\n")).toArray(String[]:: new); + + var sectionGemtext = new GemtextDocument(url, lines, section); + var updates = updateCalculator.calculateUpdates(memex.getDocument(url), section, sectionGemtext); + + for (GemtextMutation mutation : updates) { + mutation.visit(memex); + } + + response.redirect("https://memex.marginalia.nu/"+extUrl); + return ""; + } + + private Object create(Request request, Response response) throws IOException { + authClient.requireLogIn(Context.fromRequest(request)); + + String directory = Objects.requireNonNull(request.queryParams("directory")); + String filename = Objects.requireNonNull(request.queryParams("filename")); + String text = Objects.requireNonNull(request.queryParams("text")); + var url = new MemexNodeUrl(Path.of(directory).resolve(filename).toString()); + + memex.createNode(url, text); + + response.redirect("https://memex.marginalia.nu/"+directory + "/" + filename); + return ""; + } + + private Object createForm(Request request, Response response) { + final MemexNodeUrl url = new MemexNodeUrl(Objects.requireNonNull(request.queryParams("url"))); + authClient.redirectToLoginIfUnauthenticated("MEMEX", request, response); + + return new MemexRenderCreateFormModel(url, memex.getDocumentsByPath(url)); + } + + private Object uploadForm(Request request, Response response) { + final MemexNodeUrl url = new MemexNodeUrl(Objects.requireNonNull(request.queryParams("url"))); + authClient.redirectToLoginIfUnauthenticated("MEMEX", request, response); + + return new MemexRenderUploadFormModel(url, memex.getDocumentsByPath(url)); + } + + private Object updateForm(Request request, Response response) { + final MemexNodeUrl url = new MemexNodeUrl(Objects.requireNonNull(request.queryParams("url"))); + authClient.redirectToLoginIfUnauthenticated("MEMEX", request, response); + + var doc = memex.getDocument(url); + + return new MemexRenderUpdateFormModel(url, doc.getTitle(), "0", doc.getSectionGemtext(MemexNodeHeadingId.ROOT)); + } + + + @SneakyThrows + private Object upload(Request request, Response response) { + authClient.requireLogIn(Context.fromRequest(request)); + + request.attribute("org.eclipse.jetty.multipartConfig", new MultipartConfigElement("/temp", 50*1024*1024, 50*1024*1024, 25*1024*1024)); + + String directory = Objects.requireNonNull(request.queryParams("directory")); + String filename = Objects.requireNonNull(request.queryParams("filename")); + var url = new MemexNodeUrl(Path.of(directory).resolve(filename).toString()); + try (InputStream input = request.raw().getPart("file").getInputStream()) { + byte[] data = input.readAllBytes(); + memex.uploadImage(url, data); + } + + response.redirect("https://memex.marginalia.nu/"+directory + "/" + filename); + return ""; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/GemtextAppend.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/GemtextAppend.java new file mode 100644 index 00000000..be9c34dd --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/GemtextAppend.java @@ -0,0 +1,70 @@ +package nu.marginalia.wmsa.memex.change; + +import lombok.AllArgsConstructor; +import lombok.ToString; +import nu.marginalia.wmsa.memex.Memex; +import nu.marginalia.gemini.gmi.GemtextDocument; +import nu.marginalia.gemini.gmi.renderer.GemtextRendererFactory; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; + +import java.io.IOException; + +@AllArgsConstructor @ToString +public class GemtextAppend implements GemtextMutation { + public final MemexNodeUrl doc; + public final MemexNodeHeadingId id; + public final String[] lines; + + @Override + public void visit(Memex memex) throws IOException { + memex.updateNode(doc, calculateAppend(memex.getDocument(doc))); + } + + public String calculateAppend(GemtextDocument document) { + + StringBuilder result = new StringBuilder(); + var renderer = new GemtextRendererFactory().gemtextRendererAsIs(); + + var lines = document.getLines(); + + int i = 0; + // Copy from before heading + for (; i < lines.length; i++) { + var item = lines[i]; + + if (item.getHeading().isChildOf(id)) { + break; + } + else { + result.append(item.visit(renderer)).append('\n'); + } + } + + // Copy contents of heading + for (; i < lines.length; i++) { + var item = lines[i]; + + if (!item.getHeading().isChildOf(id)) { + break; + } + else { + result.append(item.visit(renderer)).append('\n'); + } + } + + // Insert new lines + for (String newLine : this.lines) { + result.append(newLine).append('\n'); + } + + // Copy contents from after heading + for (;i < lines.length; i++) { + var item = lines[i]; + result.append(item.visit(renderer)).append('\n'); + } + + return result.toString(); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/GemtextCreate.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/GemtextCreate.java new file mode 100644 index 00000000..9e479376 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/GemtextCreate.java @@ -0,0 +1,20 @@ +package nu.marginalia.wmsa.memex.change; + +import lombok.AllArgsConstructor; +import lombok.ToString; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.memex.Memex; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; + +import java.io.IOException; + +@AllArgsConstructor @ToString +public class GemtextCreate implements GemtextMutation { + public final MemexNodeUrl doc; + public final String text; + + @Override + public void visit(Memex memex) throws IOException { + memex.createNode(doc, text); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/GemtextCreateOrMutate.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/GemtextCreateOrMutate.java new file mode 100644 index 00000000..1d6498c2 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/GemtextCreateOrMutate.java @@ -0,0 +1,27 @@ +package nu.marginalia.wmsa.memex.change; + +import lombok.AllArgsConstructor; +import lombok.ToString; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.memex.Memex; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; + +import java.io.IOException; + +@AllArgsConstructor @ToString +public class GemtextCreateOrMutate implements GemtextMutation { + public final MemexNodeUrl doc; + public final String text; + public final GemtextMutation mutation; + + @Override + public void visit(Memex memex) throws IOException { + if (memex.getDocument(doc) == null) { + memex.createNode(doc, text); + } + if (memex.getDocument(doc) == null) + throw new IllegalStateException(); + + mutation.visit(memex); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/GemtextMutation.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/GemtextMutation.java new file mode 100644 index 00000000..eab2e7b1 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/GemtextMutation.java @@ -0,0 +1,20 @@ +package nu.marginalia.wmsa.memex.change; + +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.memex.Memex; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import nu.marginalia.wmsa.memex.model.MemexUrl; + +import java.io.IOException; + +public interface GemtextMutation { + void visit(Memex memex) throws IOException; + + static GemtextMutation createOrAppend(MemexNodeUrl url, String template, MemexNodeHeadingId heading, String... lines) { + return new GemtextCreateOrMutate(url, template, new GemtextAppend(url, heading, lines)); + } + static GemtextMutation createOrPrepend(MemexNodeUrl url, String template, MemexNodeHeadingId heading, String... lines) { + return new GemtextCreateOrMutate(url, template, new GemtextPrepend(url, heading, lines)); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/GemtextPrepend.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/GemtextPrepend.java new file mode 100644 index 00000000..873348d3 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/GemtextPrepend.java @@ -0,0 +1,64 @@ +package nu.marginalia.wmsa.memex.change; + +import lombok.AllArgsConstructor; +import lombok.ToString; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.memex.Memex; +import nu.marginalia.gemini.gmi.GemtextDocument; +import nu.marginalia.gemini.gmi.renderer.GemtextRendererFactory; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +@AllArgsConstructor @ToString +public class GemtextPrepend implements GemtextMutation { + public final MemexNodeUrl doc; + public final MemexNodeHeadingId id; + public final String[] lines; + + private static final Logger logger = LoggerFactory.getLogger(GemtextPrepend.class); + + @Override + public void visit(Memex memex) throws IOException { + memex.updateNode(doc, calculatePrepend(memex.getDocument(doc))); + } + + public String calculatePrepend(GemtextDocument document) { + StringBuilder result = new StringBuilder(); + var renderer = new GemtextRendererFactory().gemtextRendererAsIs(); + var lines = document.getLines(); + int i = 0; + for (; i < lines.length; i++) { + var item = lines[i]; + + if (item.getHeading().isChildOf(id)) { + if (!id.equals(MemexNodeHeadingId.ROOT)) { + result.append(item.visit(renderer)).append('\n'); + i++; + } + break; + } + else { + result.append(item.visit(renderer)).append('\n'); + } + } + + if (i == lines.length) { + logger.warn("Heading not found in prepending heading {} of {}, falling back to append-like behavior", + id, document.getUrl()); + } + for (String newLine : this.lines) { + result.append(newLine).append('\n'); + } + + for (;i < lines.length; i++) { + var item = lines[i]; + result.append(item.visit(renderer)).append('\n'); + } + + return result.toString(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/GemtextReplace.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/GemtextReplace.java new file mode 100644 index 00000000..a4caf685 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/GemtextReplace.java @@ -0,0 +1,66 @@ +package nu.marginalia.wmsa.memex.change; + +import lombok.AllArgsConstructor; +import lombok.ToString; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.memex.Memex; +import nu.marginalia.gemini.gmi.GemtextDocument; +import nu.marginalia.gemini.gmi.renderer.GemtextRendererFactory; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +@AllArgsConstructor @ToString +public class GemtextReplace implements GemtextMutation { + public final MemexNodeUrl doc; + public final MemexNodeHeadingId id; + public final String[] lines; + + private static final Logger logger = LoggerFactory.getLogger(GemtextPrepend.class); + + @Override + public void visit(Memex memex) throws IOException { + memex.updateNode(doc, calculateReplace(memex.getDocument(doc))); + } + + public String calculateReplace(GemtextDocument document) { + StringBuilder result = new StringBuilder(); + var renderer = new GemtextRendererFactory().gemtextRendererAsIs(); + + var lines = document.getLines(); + int i = 0; + for (; i < lines.length; i++) { + var item = lines[i]; + + if (item.getHeading().isChildOf(id)) { + break; + } + else { + result.append(item.visit(renderer)).append('\n'); + } + } + + if (i == lines.length) { + logger.error("Heading not found in replacing heading {} of {}, writing change-data to file", + id, document.getUrl()); + result.append("# Error! Replace failed!\n"); + } + + for (;i < lines.length && lines[i].getHeading().isChildOf(id); i++) { + } + + for (String newLine : this.lines) { + result.append(newLine).append('\n'); + } + + for (;i < lines.length; i++) { + var item = lines[i]; + result.append(item.visit(renderer)).append('\n'); + } + + return result.toString(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/GemtextTombstoneUpdateCaclulator.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/GemtextTombstoneUpdateCaclulator.java new file mode 100644 index 00000000..711e1f55 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/GemtextTombstoneUpdateCaclulator.java @@ -0,0 +1,48 @@ +package nu.marginalia.wmsa.memex.change; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; + +import com.google.inject.name.Named; + +import java.util.Objects; + +@Singleton +public class GemtextTombstoneUpdateCaclulator { + private final String tombstonePath; + private final String redirectsPath; + + @Inject + public GemtextTombstoneUpdateCaclulator(@Named("tombestone-special-file") String tombstonePath, + @Named("redirects-special-file") String redirectsPath) { + + this.tombstonePath = tombstonePath; + this.redirectsPath = redirectsPath; + } + + public boolean isTombstoneFile(MemexNodeUrl url) { + return Objects.equals(url, new MemexNodeUrl(tombstonePath)); + } + public boolean isRedirectFile(MemexNodeUrl url) { + return Objects.equals(url, new MemexNodeUrl(redirectsPath)); + } + + public GemtextMutation addTombstone(MemexNodeUrl url, String message) { + var tombstoneUrl = new MemexNodeUrl(tombstonePath); + + return new GemtextCreateOrMutate(tombstoneUrl, "# Tombstones", + new GemtextAppend(tombstoneUrl, new MemexNodeHeadingId(0), + new String[] { String.format("=> %s\t%s", url, message)})); + } + + public GemtextMutation addRedirect(MemexNodeUrl url, String message) { + var redirectsUrl = new MemexNodeUrl(redirectsPath); + + return new GemtextCreateOrMutate(redirectsUrl, "# Redirects", + new GemtextAppend(redirectsUrl, new MemexNodeHeadingId(0), + new String[] { String.format("=> %s\t%s", url, message)})); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/update/GemtextDocumentUpdateCalculator.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/update/GemtextDocumentUpdateCalculator.java new file mode 100644 index 00000000..51142ed0 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/update/GemtextDocumentUpdateCalculator.java @@ -0,0 +1,109 @@ +package nu.marginalia.wmsa.memex.change.update; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import nu.marginalia.gemini.gmi.line.GemtextText; +import nu.marginalia.wmsa.memex.Memex; +import nu.marginalia.gemini.gmi.GemtextDocument; +import nu.marginalia.gemini.gmi.line.AbstractGemtextLine; +import nu.marginalia.gemini.gmi.line.GemtextHeading; +import nu.marginalia.gemini.gmi.renderer.GemtextRenderer; +import nu.marginalia.gemini.gmi.renderer.GemtextRendererFactory; +import nu.marginalia.wmsa.memex.change.*; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; + +@Singleton +public class GemtextDocumentUpdateCalculator { + private final GemtextRenderer rawRenderer = new GemtextRendererFactory().gemtextRendererAsIs(); + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final Memex memex; + + @Inject + public GemtextDocumentUpdateCalculator(Memex memex) { + this.memex = memex; + } + + public List calculateUpdates(GemtextDocument original, + MemexNodeHeadingId destId, + GemtextDocument newSection) + { + + var rewrite = new GemtextTasksRewrite(memex, original, destId, newSection); + var lines = newSection.getLines(); + + for (int i = 0; i < lines.length; i = Math.max(i+1, rewrite.processLine(lines, i))); + + List updates = new ArrayList<>(); + + updates.addAll(createKeepUpdates(original, destId, rewrite)); + updates.addAll(createDoneUpdates(original, destId, rewrite)); + updates.addAll(createTodoUpdates(original, rewrite)); + + return updates; + } + + private Collection createTodoUpdates(GemtextDocument original, GemtextTasksRewrite rewrite) { + if (!rewrite.getPushToTodo().isEmpty()) { + var doneDoc = original.getUrl().sibling("todo.gmi"); + + var update = createTodoAction(rewrite.getPushToTodo(), doneDoc); + return List.of(update); + } + return Collections.emptyList(); + } + + private Collection createDoneUpdates(GemtextDocument original, MemexNodeHeadingId destId, GemtextTasksRewrite rewrite) { + if (!rewrite.getPushToDone().isEmpty()) { + + var doneDocUrl = original.getUrl().sibling("done.gmi"); + final String doneHeadingName = rewrite.getTodaysDoneHeadingName(); + + var newDestId = + Optional.ofNullable(memex.getDocument(doneDocUrl)) + .flatMap(dest -> dest.getHeadingByName(MemexNodeHeadingId.ROOT, doneHeadingName)); + + if (newDestId.isEmpty()) { + rewrite.getPushToDone().addAll(0, + List.of(new GemtextText("", MemexNodeHeadingId.ROOT), + new GemtextHeading(new MemexNodeHeadingId(1,1), doneHeadingName, destId)) + ); + } + + var update = createDoneAction(rewrite.getPushToDone(), doneDocUrl, newDestId.orElse(new MemexNodeHeadingId(1))); + return List.of(update); + } + return Collections.emptyList(); + } + + private Collection createKeepUpdates(GemtextDocument original, MemexNodeHeadingId destId, GemtextTasksRewrite rewrite) { + if (!rewrite.getKeep().isEmpty()) { + return List.of(new GemtextReplace(original.getUrl(), destId, rewrite.getKeep().stream().map(rawRenderer::renderLine).toArray(String[]::new))); + } + return Collections.emptyList(); + } + + + @NotNull + private GemtextCreateOrMutate createDoneAction(List pushToDone, MemexNodeUrl doneDoc, MemexNodeHeadingId newDestId) { + return new GemtextCreateOrMutate( + doneDoc, "%%% TASKS\n# Done", + new GemtextPrepend(doneDoc, newDestId, pushToDone.stream().map(rawRenderer::renderLine).toArray(String[]::new)) + ); + } + + @NotNull + private GemtextCreateOrMutate createTodoAction(List pushToTodo, MemexNodeUrl doneDoc) { + return new GemtextCreateOrMutate( + doneDoc, "%%% TASKS\n# Todo", + new GemtextAppend(doneDoc, new MemexNodeHeadingId(1), pushToTodo.stream().map(rawRenderer::renderLine).toArray(String[]::new)) + ); + } + +} + diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/update/GemtextTaskExtractor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/update/GemtextTaskExtractor.java new file mode 100644 index 00000000..4bdc1ce4 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/update/GemtextTaskExtractor.java @@ -0,0 +1,31 @@ +package nu.marginalia.wmsa.memex.change.update; + +import nu.marginalia.gemini.gmi.line.AbstractGemtextLine; +import nu.marginalia.gemini.gmi.line.GemtextTask; + +import java.util.List; + +class GemtextTaskExtractor { + + public static int extractTask(List dest, AbstractGemtextLine[] lines, int i) { + var taskId = ((GemtextTask) lines[i]).getId(); + + int j; + for (j = i; j < lines.length; j++) { + var item = lines[j]; + if (item.mapTask(GemtextTask::getId).map(id -> id.isChildOf(taskId)).orElse(false)) { + dest.add(item); + } + else if (!item.breaksTask()) { + dest.add(item); + } + else { + break; + } + } + if (j < lines.length) { + return Math.max(i+1, j-1); + } + return lines.length; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/update/GemtextTasksRewrite.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/update/GemtextTasksRewrite.java new file mode 100644 index 00000000..c2266f86 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/change/update/GemtextTasksRewrite.java @@ -0,0 +1,100 @@ +package nu.marginalia.wmsa.memex.change.update; + +import lombok.Getter; +import nu.marginalia.gemini.gmi.GemtextDocument; +import nu.marginalia.gemini.gmi.line.AbstractGemtextLine; +import nu.marginalia.gemini.gmi.line.GemtextTask; +import nu.marginalia.wmsa.memex.Memex; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; +import org.jetbrains.annotations.NotNull; + +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Predicate; + +@Getter +class GemtextTasksRewrite { + private final List keep = new ArrayList<>(); + private final List pushToTodo = new ArrayList<>(); + private final List pushToDone = new ArrayList<>(); + + private final String rootHeadingName = "Done"; + private final String todoHeadingName = "Todo"; + private final String backlogHeadingName = "Backlog"; + + private final boolean isDestTodo; + private final boolean isDestDone; + private final Memex memex; + private final GemtextDocument original; + private final MemexNodeHeadingId destId; + private final GemtextDocument newSection; + + GemtextTasksRewrite(Memex memex, GemtextDocument original, MemexNodeHeadingId destId, GemtextDocument newSection) { + this.memex = memex; + this.original = original; + this.destId = destId; + this.newSection = newSection; + + + isDestTodo = isDestTodo(original, destId); + isDestDone = isDestDone(original, destId); + } + + public int processLine(AbstractGemtextLine[] lines, int i) { + var line = lines[i]; + + if (!line.mapTask(GemtextTask::getLevel).map(level -> 1 == level).orElse(false)) { + keep.add(line); + return i + 1; + } + + // It's a task + + boolean isTaskDone = line.mapTask(GemtextTask::getState).map(state -> state.done).orElse(false); + boolean isChangeDestDone = matchHeadingHierarchy(newSection, line.getHeading(), heading -> heading.contains(rootHeadingName)); + boolean isChangeDestTodo = isDestTodo(newSection, line.getHeading()); + + if (isTaskDone && !isDestDone && !isChangeDestDone) { + return GemtextTaskExtractor.extractTask(pushToDone, lines, i); + } else if (!isTaskDone && !isDestTodo && !isChangeDestTodo) { + return GemtextTaskExtractor.extractTask(pushToTodo, lines, i); + } + + keep.add(line); + return i + 1; + } + + private boolean isDestDone(GemtextDocument original, MemexNodeHeadingId destId) { + + final String currentHeadingName = getTodaysDoneHeadingName(); + + return matchHeadingHierarchy(original, destId, heading -> heading.contains(currentHeadingName)) + || matchHeadingHierarchy(original, destId, heading -> heading.contains(rootHeadingName)); + } + + @NotNull + public String getTodaysDoneHeadingName() { + return "Done " + LocalDate.now().format(DateTimeFormatter.ISO_LOCAL_DATE); + } + + private boolean isDestTodo(GemtextDocument original, MemexNodeHeadingId destId) { + return matchHeadingHierarchy(original, destId, heading -> heading.contains(todoHeadingName)) + || matchHeadingHierarchy(original, destId, heading -> heading.contains(backlogHeadingName)); + } + + + boolean matchHeadingHierarchy(GemtextDocument doc, MemexNodeHeadingId heading, Predicate p) { + + for (; !heading.equals(MemexNodeHeadingId.ROOT); heading = heading.parent()) { + var maybeTitle = doc.getHeading(heading); + if (maybeTitle.map(p::test).orElse(false)) { + return true; + } + + } + return false; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/client/MemexApiClient.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/client/MemexApiClient.java new file mode 100644 index 00000000..96ca5239 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/client/MemexApiClient.java @@ -0,0 +1,16 @@ +package nu.marginalia.wmsa.memex.client; + +import com.google.inject.Inject; +import io.reactivex.rxjava3.core.Observable; +import nu.marginalia.wmsa.client.AbstractDynamicClient; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.server.Context; + + +public class MemexApiClient extends AbstractDynamicClient { + @Inject + public MemexApiClient() { + super(ServiceDescriptor.EDGE_MEMEX); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/GemtextSection.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/GemtextSection.java new file mode 100644 index 00000000..5fc3cc73 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/GemtextSection.java @@ -0,0 +1,11 @@ +package nu.marginalia.wmsa.memex.model; + +import lombok.AllArgsConstructor; +import lombok.Getter; + +@AllArgsConstructor @Getter +public class GemtextSection { + public final MemexNodeHeadingId id; + public final GemtextSectionAction action; + public final String[] lines; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/GemtextSectionAction.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/GemtextSectionAction.java new file mode 100644 index 00000000..16cb8158 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/GemtextSectionAction.java @@ -0,0 +1,6 @@ +package nu.marginalia.wmsa.memex.model; + +public enum GemtextSectionAction { + REPLACE, + APPEND +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexExternalUrl.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexExternalUrl.java new file mode 100644 index 00000000..38775753 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexExternalUrl.java @@ -0,0 +1,18 @@ +package nu.marginalia.wmsa.memex.model; + +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; + +import java.util.Optional; + +@AllArgsConstructor @Getter @EqualsAndHashCode +public class MemexExternalUrl implements MemexUrl { + public final String url; + + public String toString() { + return url; + } + @Override + public Optional getExternUrl() { return Optional.of(this); } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexImage.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexImage.java new file mode 100644 index 00000000..e0184d03 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexImage.java @@ -0,0 +1,13 @@ +package nu.marginalia.wmsa.memex.model; + +import lombok.AllArgsConstructor; +import lombok.Getter; + +import java.nio.file.Path; + +@AllArgsConstructor @Getter +public class MemexImage { + public final MemexNodeUrl path; + public final Path realPath; + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexIndexTask.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexIndexTask.java new file mode 100644 index 00000000..38c79410 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexIndexTask.java @@ -0,0 +1,13 @@ +package nu.marginalia.wmsa.memex.model; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; + +@AllArgsConstructor @ToString @Getter +public class MemexIndexTask { + public final String task; + public final String taskId; + public final String url; + public final String type; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexLink.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexLink.java new file mode 100644 index 00000000..c44eaa26 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexLink.java @@ -0,0 +1,26 @@ +package nu.marginalia.wmsa.memex.model; + +import lombok.AllArgsConstructor; +import lombok.Getter; + +import java.util.Objects; + +@Getter @AllArgsConstructor +public class MemexLink { + public final MemexNodeUrl dest; + public final MemexNodeUrl src; + public final String title; + public final String section; + public final MemexNodeHeadingId sectionId; + + public final MemexNodeUrl getUrl() { + return src; + } + + public String getDescription() { + if (Objects.equals(title, section)) { + return title; + } + return title + " - " + section; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexNode.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexNode.java new file mode 100644 index 00000000..ddd01f82 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexNode.java @@ -0,0 +1,40 @@ +package nu.marginalia.wmsa.memex.model; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.SneakyThrows; + +@AllArgsConstructor @Getter +public class MemexNode { + private final MemexNodeUrl url; + + public MemexNodeType getType() { + var fn = url.getFilename(); + if (fn.endsWith(".gmi")) { + return MemexNodeType.DOCUMENT; + } + else if (fn.endsWith(".png")) { + return MemexNodeType.IMAGE; + } + else if (fn.endsWith(".txt")) { + return MemexNodeType.TEXT; + } + else if (fn.contains(".")) { + return MemexNodeType.OTHER; + } + return MemexNodeType.DIRECTORY; + } + + @SneakyThrows + public T visit(MemexNodeVisitor visitor) { + return switch (getType()) { + case DOCUMENT -> visitor.onDocument(url); + case IMAGE -> visitor.onImage(url); + default -> null; + }; + } + public interface MemexNodeVisitor { + T onDocument(MemexNodeUrl url) throws Exception; + T onImage(MemexNodeUrl url) throws Exception; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexNodeHeadingId.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexNodeHeadingId.java new file mode 100644 index 00000000..084e22fe --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexNodeHeadingId.java @@ -0,0 +1,74 @@ +package nu.marginalia.wmsa.memex.model; + +import lombok.EqualsAndHashCode; + +import java.util.Arrays; +import java.util.stream.Collectors; + +@EqualsAndHashCode +public class MemexNodeHeadingId implements Comparable { + private final int[] ids; + + public static final MemexNodeHeadingId ROOT = new MemexNodeHeadingId(0); + + public MemexNodeHeadingId(int... ids) { + this.ids = ids; + } + + public static MemexNodeHeadingId parse(String section) { + return new MemexNodeHeadingId(Arrays.stream(section.split("\\.")).mapToInt(Integer::parseInt).toArray()); + } + + public int getLevel() { + return ids.length; + } + + public int[] getIds() { + return ids; + } + public boolean isChildOf(MemexNodeHeadingId other) { + if (other.equals(ROOT)) { + return true; + } + if (other.ids.length > ids.length) { + return false; + } + + for (int i = 0; i < other.ids.length; i++) { + if (other.ids[i] != ids[i]) { + return false; + } + } + + return true; + } + + // This does not have the same semantics as Arrays$compare + + public int compareTo(MemexNodeHeadingId other) { + for (int i = 0; i < Math.min(ids.length, other.ids.length); i++) { + if (other.ids[i] != ids[i]) { + return ids[i] - other.ids[i]; + } + } + + return other.ids.length - ids.length; + } + + public MemexNodeHeadingId parent() { + if (ids.length <= 1) + return ROOT; + else return new MemexNodeHeadingId(Arrays.copyOfRange(ids, 0, ids.length-1)); + + } + public MemexNodeHeadingId next(int level) { + int[] newIds = Arrays.copyOf(ids, level+1); + newIds[level]++; + return new MemexNodeHeadingId(newIds); + } + + @Override + public String toString() { + return Arrays.stream(ids).mapToObj(Integer::toString).collect(Collectors.joining(".")); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexNodeTaskId.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexNodeTaskId.java new file mode 100644 index 00000000..e40dccbf --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexNodeTaskId.java @@ -0,0 +1,66 @@ +package nu.marginalia.wmsa.memex.model; + +import lombok.EqualsAndHashCode; + +import java.util.Arrays; +import java.util.stream.Collectors; + +@EqualsAndHashCode +public class MemexNodeTaskId implements Comparable { + private final int[] ids; + + public MemexNodeTaskId(int... ids) { + this.ids = ids; + } + + public static MemexNodeTaskId parse(String section) { + return new MemexNodeTaskId(Arrays.stream(section.split("\\.")).mapToInt(Integer::parseInt).toArray()); + } + + public int level() { + return ids.length; + } + + public boolean isChildOf(MemexNodeTaskId other) { + if (other.ids.length > ids.length) { + return false; + } + + for (int i = 0; i < other.ids.length; i++) { + if (other.ids[i] != ids[i]) { + return false; + } + } + + return true; + } + + // This does not have the same semantics as Arrays$compare + + public int compareTo(MemexNodeTaskId other) { + for (int i = 0; i < Math.min(ids.length, other.ids.length); i++) { + if (other.ids[i] != ids[i]) { + return ids[i] - other.ids[i]; + } + } + + return other.ids.length - ids.length; + } + + public MemexNodeTaskId parent() { + if (ids.length <= 1) + return new MemexNodeTaskId(0); + else return new MemexNodeTaskId(Arrays.copyOfRange(ids, 0, ids.length-1)); + + } + public MemexNodeTaskId next(int level) { + int[] newIds = Arrays.copyOf(ids, level+1); + newIds[level]++; + return new MemexNodeTaskId(newIds); + } + + @Override + public String toString() { + return Arrays.stream(ids).mapToObj(Integer::toString).collect(Collectors.joining(".")); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexNodeType.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexNodeType.java new file mode 100644 index 00000000..6c645921 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexNodeType.java @@ -0,0 +1,14 @@ +package nu.marginalia.wmsa.memex.model; + +import lombok.AllArgsConstructor; + +@AllArgsConstructor +public enum MemexNodeType { + DOCUMENT("text/gemini"), + IMAGE("image/png"), + DIRECTORY("other/directory"), + TEXT("text/plain"), + OTHER("application/binary"); + + public String mime; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexNodeUrl.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexNodeUrl.java new file mode 100644 index 00000000..96d91bd0 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexNodeUrl.java @@ -0,0 +1,98 @@ +package nu.marginalia.wmsa.memex.model; + +import lombok.EqualsAndHashCode; +import lombok.Getter; +import org.jetbrains.annotations.NotNull; + +import java.io.File; +import java.nio.file.Path; +import java.util.Optional; +import java.util.function.Consumer; + +@Getter @EqualsAndHashCode +public class MemexNodeUrl implements MemexUrl, Comparable { + private final String url; + + public MemexNodeUrl(String url) { + if (url.startsWith("//")) { + this.url = url.substring(1); + } else { + this.url = url; + } + } + public static MemexNodeUrl ofRelativePath(Path root, Path relative) { + Path path; + + if (relative.startsWith("/")) { + path = root.relativize(relative); + } + else { + path = relative; + } + + if (File.separatorChar == '\\') + return new MemexNodeUrl("/" + path.toString().replace('\\', '/')); + return new MemexNodeUrl("/" + path); + } + + public String toString() { + return url; + } + + public String getParentStr() { + var path = asRelativePath().getParent(); + if (path == null) { + return null; + } + return path.toString(); + } + public MemexNodeUrl getParentUrl() { + var str = getParentStr(); + if (str == null) { + return null; + } + return new MemexNodeUrl(str); + } + public MemexNodeUrl sibling(String name) { + return new MemexNodeUrl(asRelativePath().resolveSibling(name).toString()); + } + public MemexNodeUrl child(String name) { + return new MemexNodeUrl(asRelativePath().resolve(name).toString()); + } + + public Path asRelativePath() { + return Path.of(url); + } + + public Path asAbsolutePath(Path root) { + Path p = Path.of(root + url); + if (p.toString().contains(".git")) { + throw new IllegalStateException(url + " touched .git"); + } + if (!p.normalize().startsWith(root)) { + throw new IllegalStateException(url + " escaped Memex root as " + p); + } + return p; + } + + + public String getFilename() { return asRelativePath().toFile().getName(); } + + @Override + public void visitNodeUrl(Consumer fn) { + fn.accept(this); + } + + @Override + public Optional getNodeUrl() { + return Optional.of(this); + } + @Override + public int compareTo(@NotNull MemexNodeUrl o) { + return url.compareTo(o.getUrl()); + } + + public MemexNode toNode() { + return new MemexNode(this); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexTaskState.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexTaskState.java new file mode 100644 index 00000000..b110f878 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexTaskState.java @@ -0,0 +1,30 @@ +package nu.marginalia.wmsa.memex.model; + +public enum MemexTaskState { + DONE('/', true,"done"), + SKIP('x', true,"skip"), + SKIP2('-', true,"skip"), + UNKNOWN('?', false, "unknown"), + URGENT('!', false, "urgent"), + TODO(0, false, "todo"); + + public int key; + public String style; + public boolean done; + + MemexTaskState(int key, boolean done, String style) { + this.key = key; + this.style = style; + this.done = done; + } + + public static MemexTaskState of(MemexTaskTags tags) { + for (MemexTaskState state : values()) { + if (tags.hasTag(state.key)) { + return state; + } + } + return TODO; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexTaskTags.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexTaskTags.java new file mode 100644 index 00000000..e19819bc --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexTaskTags.java @@ -0,0 +1,40 @@ +package nu.marginalia.wmsa.memex.model; + +import lombok.Getter; + +import java.util.stream.Collectors; + +@Getter +public class MemexTaskTags { + public final String tagsCondensed; + + private static final int TAG_START = '('; + private static final int TAG_END = ')'; + + public MemexTaskTags(String text) { + tagsCondensed = getTags(text); + } + + public boolean hasTag(int tag) { + return tagsCondensed.indexOf(tag) >= 0; + } + + @Override + public String toString() { + return tagsCondensed.chars().mapToObj(c -> '(' + Character.toString(c) + ')') + .collect(Collectors.joining(" ")); + } + + private static String getTags(String task) { + StringBuilder sb = new StringBuilder(); + for (int i = task.indexOf(TAG_START); + i >= 0 && i+2 < task.length(); + i = task.indexOf(TAG_START, i+1)) + { + if (task.charAt(i+2) == TAG_END) { + sb.append(task.charAt(i+1)); + } + } + return sb.toString(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexUrl.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexUrl.java new file mode 100644 index 00000000..14cff995 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/MemexUrl.java @@ -0,0 +1,13 @@ +package nu.marginalia.wmsa.memex.model; + +import java.util.Optional; +import java.util.function.Consumer; + +public interface MemexUrl { + String getUrl(); + + default void visitNodeUrl(Consumer fn) {} + default void visitExternalUrl(Consumer fn) {} + default Optional getNodeUrl() { return Optional.empty(); } + default Optional getExternUrl() { return Optional.empty(); } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/fs/MemexDirectory.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/fs/MemexDirectory.java new file mode 100644 index 00000000..deacc639 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/fs/MemexDirectory.java @@ -0,0 +1,30 @@ +package nu.marginalia.wmsa.memex.model.fs; + +import lombok.Getter; +import nu.marginalia.gemini.gmi.GemtextDocument; +import nu.marginalia.wmsa.memex.model.MemexImage; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; + +import java.util.HashMap; +import java.util.Map; + +@Getter +public class MemexDirectory { + private final Map documents; + private final Map images; + private final Map subdirs; + + public MemexDirectory() { + documents = new HashMap<>(); + images = new HashMap<>(); + subdirs = new HashMap<>(); + } + + public void removeDocument(MemexNodeUrl url) { + documents.remove(url); + } + + public void removeImage(MemexNodeUrl url) { + images.remove(url); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/fs/MemexFileSystem.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/fs/MemexFileSystem.java new file mode 100644 index 00000000..ffd31508 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/fs/MemexFileSystem.java @@ -0,0 +1,88 @@ +package nu.marginalia.wmsa.memex.model.fs; + +import nu.marginalia.gemini.gmi.GemtextDocument; +import nu.marginalia.wmsa.memex.model.MemexImage; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; + +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; + +public class MemexFileSystem { + private final Map fileSystemContentsByDir = new ConcurrentHashMap<>(); + + public MemexFileSystem() { + } + + public Optional get(MemexNodeUrl url) { + return Optional.ofNullable(fileSystemContentsByDir.get(url)); + } + public List getDocuments(MemexNodeUrl url) { + var contents = fileSystemContentsByDir.get(url); + if (contents == null) { + return Collections.emptyList(); + } + var list = new ArrayList<>(contents.getDocuments().values()); + list.sort(Comparator.comparing(GemtextDocument::getUrl)); + return list; + } + + public List getImages(MemexNodeUrl url) { + var contents = fileSystemContentsByDir.get(url); + if (contents == null) { + return Collections.emptyList(); + } + var list = new ArrayList<>(contents.getImages().values()); + list.sort(Comparator.comparing(MemexImage::getPath)); + return list; + } + + public List getSubdirs(MemexNodeUrl url) { + var contents = fileSystemContentsByDir.get(url); + if (contents == null) { + return Collections.emptyList(); + } + var list = new ArrayList<>(contents.getSubdirs().keySet()); + list.sort(Comparator.naturalOrder()); + return list; + } + + public void recalculateDirectories() { + fileSystemContentsByDir.forEach((k, v) -> { + var parent = k.getParentUrl(); + if (parent != null) { + registerDir(k.getParentUrl()).getSubdirs().put(k, v); + } + }); + } + + public MemexDirectory registerDir(MemexNodeUrl url) { + return fileSystemContentsByDir + .computeIfAbsent(url, p -> new MemexDirectory()); + } + + public void register(MemexImage image) { + registerDir(image.path.getParentUrl()) + .getImages() + .put(image.path, image); + } + + public void register(GemtextDocument document) { + registerDir(document.getUrl().getParentUrl()) + .getDocuments() + .put(document.getUrl(), document); + } + + public void remove(MemexNodeUrl url) { + var contents = fileSystemContentsByDir.get(url.getParentUrl()); + contents.removeDocument(url); + contents.removeImage(url); + } + + public List getAllDirectories() { + return new ArrayList<>(fileSystemContentsByDir.keySet()); + } + + public boolean isDirectory(MemexNodeUrl url) { + return fileSystemContentsByDir.containsKey(url); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRenderCreateFormModel.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRenderCreateFormModel.java new file mode 100644 index 00000000..eb59bcc1 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRenderCreateFormModel.java @@ -0,0 +1,32 @@ +package nu.marginalia.wmsa.memex.model.render; + +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import nu.marginalia.gemini.gmi.GemtextDocument; +import nu.marginalia.wmsa.memex.renderer.MemexHtmlRenderer; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; + +import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; + +@RequiredArgsConstructor +@Getter +public class MemexRenderCreateFormModel implements MemexRendererableDirect { + public final MemexNodeUrl url; + public final List docs; + + public String getFilename() { + return url.getFilename(); + } + + public List getDocs() { + return docs.stream().sorted(Comparator.comparing(GemtextDocument::getUrl).reversed()).collect(Collectors.toList()); + } + + @Override + public String render(MemexHtmlRenderer renderer) { + return renderer.renderModel(this); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRenderUpdateFormModel.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRenderUpdateFormModel.java new file mode 100644 index 00000000..9e32ed95 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRenderUpdateFormModel.java @@ -0,0 +1,19 @@ +package nu.marginalia.wmsa.memex.model.render; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import nu.marginalia.wmsa.memex.renderer.MemexHtmlRenderer; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; + +@AllArgsConstructor @Getter +public class MemexRenderUpdateFormModel implements MemexRendererableDirect { + public final MemexNodeUrl url; + public final String title; + public final String section; + public final String text; + + @Override + public String render(MemexHtmlRenderer renderer) { + return renderer.renderModel(this); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRenderUploadFormModel.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRenderUploadFormModel.java new file mode 100644 index 00000000..e098af52 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRenderUploadFormModel.java @@ -0,0 +1,32 @@ +package nu.marginalia.wmsa.memex.model.render; + +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import nu.marginalia.gemini.gmi.GemtextDocument; +import nu.marginalia.wmsa.memex.renderer.MemexHtmlRenderer; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; + +import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; + +@RequiredArgsConstructor +@Getter +public class MemexRenderUploadFormModel implements MemexRendererableDirect { + public final MemexNodeUrl url; + public final List docs; + + public String getFilename() { + return url.getFilename(); + } + + public List getDocs() { + return docs.stream().sorted(Comparator.comparing(GemtextDocument::getUrl).reversed()).collect(Collectors.toList()); + } + + @Override + public String render(MemexHtmlRenderer renderer) { + return renderer.renderModel(this); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRendererDeleteFormModel.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRendererDeleteFormModel.java new file mode 100644 index 00000000..84760fc2 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRendererDeleteFormModel.java @@ -0,0 +1,19 @@ +package nu.marginalia.wmsa.memex.model.render; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import nu.marginalia.wmsa.memex.renderer.MemexHtmlRenderer; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; + +@AllArgsConstructor @Getter +public class MemexRendererDeleteFormModel implements MemexRendererableDirect { + private final String doc; + private final MemexRendererImageModel image; + private final MemexNodeUrl url; + private final String type; + + @Override + public String render(MemexHtmlRenderer renderer) { + return renderer.renderModel(this); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRendererImageModel.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRendererImageModel.java new file mode 100644 index 00000000..d5534117 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRendererImageModel.java @@ -0,0 +1,36 @@ +package nu.marginalia.wmsa.memex.model.render; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.memex.model.MemexLink; +import nu.marginalia.wmsa.memex.model.MemexImage; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; + +import java.nio.file.Files; +import java.util.Base64; +import java.util.List; + +@AllArgsConstructor @Getter +public class MemexRendererImageModel { + public final MemexImage image; + public final List backlinks; + + public final String parent; + + public String getParent() { + if ("/".equals(parent) || parent.isBlank()) { + return null; + } + return parent; + } + + public MemexNodeUrl getPath() { + return image.path; + } + + @SneakyThrows + public String getData() { + return Base64.getEncoder().encodeToString(Files.readAllBytes(image.realPath)); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRendererIndexModel.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRendererIndexModel.java new file mode 100644 index 00000000..b26c119b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRendererIndexModel.java @@ -0,0 +1,69 @@ +package nu.marginalia.wmsa.memex.model.render; + +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import nu.marginalia.gemini.gmi.GemtextDocument; +import nu.marginalia.gemini.gmi.renderer.GemtextRendererFactory; +import nu.marginalia.wmsa.memex.model.*; + +import java.nio.file.Path; +import java.util.Comparator; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +@RequiredArgsConstructor +@Getter +public class MemexRendererIndexModel { + public final MemexNodeUrl url; + public final List docs; + public final List images; + public final List directories; + public final List tasks; + public final List backlinks; + + public String getFilename() { + return url.getFilename(); + } + + public MemexNodeUrl getParent() { + return url.getParentUrl(); + } + + public List getDocs() { + return docs.stream() + .filter(doc -> !doc.isIndex()) + .sorted(Comparator.comparing(GemtextDocument::getUrl).reversed()) + .collect(Collectors.toList()); + } + + public final String getTitle() { + return Optional.ofNullable(getIndexDocument()).map(GemtextDocument::getTitle).orElse(url.toString()); + } + + public GemtextDocument getDocument(String filename) { + return docs.stream().filter(doc -> doc.getUrl().getFilename().endsWith(filename)).findFirst().orElse(null); + } + + private GemtextDocument getIndexDocument() { + return getDocument("index.gmi"); + } + + public String getIndexData() { + var indexDoc = getIndexDocument(); + if (indexDoc == null) { + return null; + } + var htmlRenderer = new GemtextRendererFactory("").htmlRendererReadOnly(); + return indexDoc.render(htmlRenderer); + } + + public boolean hasPragma(String value) { + var doc = getIndexDocument(); + if (doc == null) { + return false; + } + return doc.getPragmas().contains(value); + + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRendererRenameFormModel.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRendererRenameFormModel.java new file mode 100644 index 00000000..6dcb62c4 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRendererRenameFormModel.java @@ -0,0 +1,19 @@ +package nu.marginalia.wmsa.memex.model.render; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import nu.marginalia.wmsa.memex.renderer.MemexHtmlRenderer; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; + +@AllArgsConstructor @Getter +public class MemexRendererRenameFormModel implements MemexRendererableDirect { + private final String doc; + private final MemexRendererImageModel image; + private final MemexNodeUrl url; + private final String type; + + @Override + public String render(MemexHtmlRenderer renderer) { + return renderer.renderModel(this); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRendererTombstoneModel.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRendererTombstoneModel.java new file mode 100644 index 00000000..60907952 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRendererTombstoneModel.java @@ -0,0 +1,16 @@ +package nu.marginalia.wmsa.memex.model.render; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import nu.marginalia.wmsa.memex.model.MemexLink; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; + +import java.util.List; + +@AllArgsConstructor @Getter +public class MemexRendererTombstoneModel { + private final MemexNodeUrl url; + private final String message; + private final String redirect; + public final List backlinks; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRendererViewModel.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRendererViewModel.java new file mode 100644 index 00000000..f77e4bfd --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRendererViewModel.java @@ -0,0 +1,24 @@ +package nu.marginalia.wmsa.memex.model.render; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import nu.marginalia.gemini.gmi.GemtextDocument; +import nu.marginalia.wmsa.memex.model.MemexLink; + +import java.util.List; + +@AllArgsConstructor @Getter +public class MemexRendererViewModel { + public final GemtextDocument baseDoc; + public final String title; + public final List backlinks; + public final String doc; + public final String parent; + + public String getParent() { + if ("/".equals(parent) || parent.isBlank()) { + return null; + } + return parent; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRendererableDirect.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRendererableDirect.java new file mode 100644 index 00000000..571d5f56 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/model/render/MemexRendererableDirect.java @@ -0,0 +1,7 @@ +package nu.marginalia.wmsa.memex.model.render; + +import nu.marginalia.wmsa.memex.renderer.MemexHtmlRenderer; + +public interface MemexRendererableDirect { + String render(MemexHtmlRenderer renderer); +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/renderer/MemexGmiRenderer.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/renderer/MemexGmiRenderer.java new file mode 100644 index 00000000..e613d746 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/renderer/MemexGmiRenderer.java @@ -0,0 +1,228 @@ +package nu.marginalia.wmsa.memex.renderer; + +import com.google.inject.Inject; +import com.google.inject.name.Named; +import nu.marginalia.gemini.gmi.GemtextDocument; +import nu.marginalia.gemini.gmi.renderer.GemtextRendererFactory; +import nu.marginalia.wmsa.memex.MemexData; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import nu.marginalia.wmsa.memex.system.MemexFileWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.PrintWriter; +import java.util.Comparator; + +public class MemexGmiRenderer { + private final MemexFileWriter renderedResources; + private final MemexData data; + private final Logger logger = LoggerFactory.getLogger(getClass()); + + @Inject + public MemexGmiRenderer(@Named("gmi") MemexFileWriter renderedResources, + MemexData data) + { + this.renderedResources = renderedResources; + this.data = data; + } + + + public void render(MemexNodeUrl url) { + if (data.getDocument(url) != null) { + renderDocument(url); + } + else if(data.getImage(url) != null) { + renderImage(url); + } + else if(data.isDirectory(url)) { + renderIndex(url); + } + else if(data.hasTombstone(url)) { + renderTombstone(url); + } + else { + logger.warn("I don't know how to render {}", url); + } + } + + private void renderDocument(MemexNodeUrl url) { + if ("index.gmi".equals(url.getFilename())) { + return; + } + + var doc = data.getDocument(url); + var renderer = new GemtextRendererFactory().gemtextRendererPublic(); + + try { + renderedResources.write(url, (w) -> { + doc.render(renderer, w); + w.println(); + backlinks(w, url); + w.println("# Navigation\n"); + w.printf("=> %s Back to Index\n", url.getParentUrl()); + w.println("\nReach me at kontakt@marginalia.nu"); + }); + } catch (IOException e) { + logger.error("Failed to render document " + url, e); + } + + } + + private void renderImage(MemexNodeUrl url) { + try { + renderedResources.write(url, data.getImage(url).realPath); + } catch (IOException e) { + logger.error("Failed to image document " + url, e); + } + } + + private void renderIndex(MemexNodeUrl url) { + + var renderer = new GemtextRendererFactory().gemtextRendererPublic(); + + var doc = data.getDocument(url.child("index.gmi")); + boolean feed = doc != null && doc.getPragmas().contains("FEED"); + boolean listing = doc != null && doc.getPragmas().contains("LISTING"); + + try { + renderedResources.write(url.child("index.gmi"), (w) -> { + + if (null != doc) doc.render(renderer, w); + else w.printf("# %s\n", url); + + + if (listing) { + documentsInUrlListing(url, w); + } + + if (feed) { + w.printf("\n=> %s/feed.gmi Clean gemsub feed\n", url); + w.printf("=> %s/feed.xml Atom feed\n", url); + } + + w.println("\n# Directory Contents\n"); + directoriesInUrl(url, w); + if (!listing) { + documentsInUrl(url, w); + } + imagesInUrl(url, w); + backlinks(w, url, url.child("index.gmi")); + w.println("\nReach me at kontakt@marginalia.nu"); + + }); + + if (feed) { + renderedResources.write(url.child("feed.gmi"), (w) -> { + w.printf("# marginalia.nu%s\n", url); + w.println(); + var docs = data.getDocumentsByPath(url); + docs.sort(Comparator.comparing(GemtextDocument::getUrl).reversed()); + for (var d : docs) { + if (d.getUrl().getFilename().equals("index.gmi")) { + continue; + } + if (d.getPragmas().contains("DRAFT")) { + continue; + } + w.printf("=> gemini://marginalia.nu%s\t%s %s\n", d.getUrl(), d.getDate(), d.getTitle().replaceAll("\\[[^\\]]+\\]", "")); + } + }); + } + } catch (IOException e) { + logger.error("Failed to render document " + url, e); + } + } + + private void backlinks(PrintWriter w, MemexNodeUrl... urls) { + var bls = data.getBacklinks(urls); + if (!bls.isEmpty()) { + w.println("\n# Backlinks\n"); + for (var bl : bls) { + w.printf("=> %s\n", bl.src); + } + w.println(); + } + } + + private void documentsInUrl(MemexNodeUrl url, PrintWriter w) { + var docs = data.getDocumentsByPath(url); + if (docs.size() > (data.getDocument(url.child("index.gmi")) == null ? 0 : 1)) { + for (var d : docs) { + if (d.getUrl().getFilename().equals("index.gmi")) { + continue; + } + w.printf("=> %s\t\uD83D\uDDD2 ️️️%s\n", d.getUrl(), d.getTitle()); + } + w.println(); + } + } + + private void documentsInUrlListing(MemexNodeUrl url, PrintWriter w) { + var docs = data.getDocumentsByPath(url); + + docs.sort(Comparator.comparing(GemtextDocument::getUrl).reversed()); + + if (!docs.isEmpty()) { + for (var d : docs) { + if (d.getUrl().getFilename().equals("index.gmi")) { + continue; + } + w.printf("=> %s\t%s\n", d.getUrl(), d.getTitle()); + } + w.println(); + } + } + + private void imagesInUrl(MemexNodeUrl url, PrintWriter w) { + var images = data.getImagesByPath(url); + if (!images.isEmpty()) { + for (var i : images) { + w.printf("=> %s \uD83D\uDDBC️ %s\n", i.path, i.path.getFilename()); + } + w.println(); + } + } + + private void directoriesInUrl(MemexNodeUrl url, PrintWriter w) { + var dirs = data.getSubdirsByPath(url); + final boolean isRoot = url.getParentUrl() == null; + if (isRoot && dirs.isEmpty()) { + return; + } + + if (!isRoot) { + w.println("=> ../ ⬆ ../ "); + + } + if (dirs.isEmpty()) { + w.println(); + } + for (var d : dirs) { + w.printf("=> %s \uD83D\uDDC2️ %s/\n", d, d.getFilename()); + } + } + + + private void renderTombstone(MemexNodeUrl url) { + String message = data.getTombstones().flatMap(tombstones -> tombstones.getLinkData(url)).orElse(null); + String redir = data.getRedirects().flatMap(redirects -> redirects.getLinkData(url)).orElse(null); + + try { + renderedResources.write(url, w -> { + w.printf("# %s is gone\n\n", url); + if (message != null) { + w.printf("%s\n", message); + } + if (redir != null) { + w.println("Please see"); + w.printf("=> %s\n", redir); + } + backlinks(w, url); + w.println("\nReach me at kontakt@marginalia.nu"); + }); + } catch (IOException e) { + logger.error("Failed to render tombstone " + url, e); + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/renderer/MemexHtmlRenderer.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/renderer/MemexHtmlRenderer.java new file mode 100644 index 00000000..a39e5763 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/renderer/MemexHtmlRenderer.java @@ -0,0 +1,197 @@ +package nu.marginalia.wmsa.memex.renderer; + +import com.google.inject.Inject; +import com.google.inject.name.Named; +import lombok.SneakyThrows; +import nu.marginalia.gemini.gmi.renderer.GemtextRendererFactory; +import nu.marginalia.wmsa.memex.MemexData; +import nu.marginalia.wmsa.memex.model.*; +import nu.marginalia.wmsa.memex.model.render.*; +import nu.marginalia.wmsa.memex.system.MemexFileWriter; +import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer; +import nu.marginalia.wmsa.renderer.mustache.RendererFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.time.OffsetDateTime; +import java.time.format.DateTimeFormatter; +import java.time.temporal.ChronoField; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +public class MemexHtmlRenderer { + + private final MemexFileWriter htmlRenderedResources; + private final MemexFileWriter gmiRenderedResources; + + private final MemexData data; + + private final MustacheRenderer viewRenderer; + private final MustacheRenderer indexRenderer; + private final MustacheRenderer indexFeedRenderer; + private final MustacheRenderer imageRenderer; + private final MustacheRenderer tombstoneRenderer; + + private final MustacheRenderer updateFormRenderer; + private final MustacheRenderer uploadFormRenderer; + private final MustacheRenderer createFormRenderer; + private final MustacheRenderer deleteFormRenderer; + private final MustacheRenderer renameFormRenderer; + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + @Inject + public MemexHtmlRenderer( + @Named("html") MemexFileWriter htmlRenderedResources, + @Named("gmi") MemexFileWriter gmiRenderedResources, + MemexData data) throws IOException { + this.htmlRenderedResources = htmlRenderedResources; + this.gmiRenderedResources = gmiRenderedResources; + this.data = data; + + final var rendererFactory = new RendererFactory(); + + viewRenderer = rendererFactory.renderer("memex/memex-view"); + indexRenderer = rendererFactory.renderer("memex/memex-index"); + indexFeedRenderer = rendererFactory.renderer("memex/memex-index-feed"); + imageRenderer = rendererFactory.renderer("memex/memex-image"); + + tombstoneRenderer = rendererFactory.renderer("memex/memex-tombstone"); + + updateFormRenderer = rendererFactory.renderer("memex/memex-update-form"); + uploadFormRenderer = rendererFactory.renderer("memex/memex-upload-form"); + deleteFormRenderer = rendererFactory.renderer("memex/memex-delete-form"); + renameFormRenderer = rendererFactory.renderer("memex/memex-rename-form"); + createFormRenderer = rendererFactory.renderer("memex/memex-create-form"); + + } + + public void render(MemexNodeUrl url) { + if (data.getDocument(url) != null) { + renderDocument(url); + } + else if(data.getImage(url) != null) { + renderImage(url); + } + else if(data.isDirectory(url)) { + renderIndex(url); + } + else if(data.hasTombstone(url)) { + renderTombstone(url); + } + else { + logger.warn("I don't know how to render {}", url); + } + } + + public void renderDocument(MemexNodeUrl url) { + var doc = Objects.requireNonNull(data.getDocument(url), "could not get document " + url); + var htmlRenderer = new GemtextRendererFactory("", url.toString()).htmlRendererEditable(); + var model = new MemexRendererViewModel(doc, + doc.getTitle(), + data.getBacklinks(url), + doc.render(htmlRenderer), + url.getParentStr() + ); + + try { + htmlRenderedResources.write(url, viewRenderer.render(model, Map.of("urlRoot", ""))); + } catch (IOException e) { + logger.error("Failed to render document " + url, e); + } + + } + + public void renderIndex(MemexNodeUrl url) { + + var docs = data.getDocumentsByPath(url); + var images = data.getImagesByPath(url); + var dirs = data.getSubdirsByPath(url); + + var tasks = docs.stream().flatMap(doc -> doc.getOpenTopTasks().entrySet() + .stream() + .sorted(Map.Entry.comparingByKey()) + .map(entry -> new MemexIndexTask(entry.getValue().getLeft(), + entry.getKey().toString(), + doc.getUrl().toString(), + entry.getValue().getRight().style)) + ).collect(Collectors.toList()); + + List backlinks = data.getBacklinks(url, url.child("index.gmi")); + var model = new MemexRendererIndexModel(url, docs, images, new ArrayList<>(dirs), tasks, backlinks); + + try { + htmlRenderedResources.write(url.child("index.html"), indexRenderer.render(model, Map.of("urlRoot", ""))); + if (model.hasPragma("FEED")) { + String nowStr = OffsetDateTime.now().with(ChronoField.MILLI_OF_SECOND, 0).format(DateTimeFormatter.ISO_DATE_TIME); + htmlRenderedResources.write(url.child("feed.xml"), indexFeedRenderer.render(model, + Map.of("domain", "https://memex.marginalia.nu", "now", nowStr))); + gmiRenderedResources.write(url.child("feed.xml"), indexFeedRenderer.render(model, + Map.of("domain", "gemini://marginalia.nu", "now", nowStr))); + } + } catch (IOException e) { + logger.error("Failed to render index model " + url, e); + } + } + + public void renderImage(MemexNodeUrl url) { + var img = data.getImage(url); + var backlinks = data.getBacklinks(img.path); + var parent = img.path.getParentStr(); + var model = new MemexRendererImageModel(img, backlinks, parent); + + try { + htmlRenderedResources.write(img.path, imageRenderer.render(model, Map.of("urlRoot", ""))); + } catch (IOException e) { + logger.error("Failed to render image model " + img.path, e); + } + } + + public void renderTombstone(MemexNodeUrl url) { + + String message = data.getTombstones().flatMap(tombstones -> tombstones.getLinkData(url)).orElse(null); + String redir = data.getRedirects().flatMap(redirects -> redirects.getLinkData(url)).orElse(null); + + var model = new MemexRendererTombstoneModel(url, + message, + redir, + data.getBacklinks(url)); + + try { + htmlRenderedResources.write(url, tombstoneRenderer.render(model, Map.of("urlRoot", ""))); + } catch (IOException e) { + logger.error("Failed to render tombstone model " + url, e); + } + } + + @SneakyThrows + public String renderModel(MemexRendererDeleteFormModel model) { + return deleteFormRenderer.render(model, Map.of("urlRoot", "")); + } + + + @SneakyThrows + public String renderModel(MemexRendererRenameFormModel model) { + return renameFormRenderer.render(model, Map.of("urlRoot", "")); + } + + @SneakyThrows + public String renderModel(MemexRenderCreateFormModel model) { + return createFormRenderer.render(model, Map.of("urlRoot", "")); + } + + @SneakyThrows + public String renderModel(MemexRenderUploadFormModel model) { + return uploadFormRenderer.render(model, Map.of("urlRoot", "")); + } + + @SneakyThrows + public String renderModel(MemexRenderUpdateFormModel model) { + return updateFormRenderer.render(model, Map.of("urlRoot", "")); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/renderer/MemexRendererers.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/renderer/MemexRendererers.java new file mode 100644 index 00000000..b033eabf --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/renderer/MemexRendererers.java @@ -0,0 +1,22 @@ +package nu.marginalia.wmsa.memex.renderer; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; + +@Singleton +public class MemexRendererers { + private final MemexGmiRenderer gmiRenderer; + private final MemexHtmlRenderer htmlRenderer; + + @Inject + public MemexRendererers(MemexGmiRenderer gmiRenderer, MemexHtmlRenderer htmlRenderer) { + this.gmiRenderer = gmiRenderer; + this.htmlRenderer = htmlRenderer; + } + + public void render(MemexNodeUrl url) { + gmiRenderer.render(url); + htmlRenderer.render(url); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexFileSystemModifiedTimes.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexFileSystemModifiedTimes.java new file mode 100644 index 00000000..e0f3428e --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexFileSystemModifiedTimes.java @@ -0,0 +1,23 @@ +package nu.marginalia.wmsa.memex.system; + +import com.google.inject.Singleton; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; + +@Singleton +public class MemexFileSystemModifiedTimes { + + private final Map modifiedTimes = new ConcurrentHashMap<>(); + + public boolean isFreshUpdate(Path node) throws IOException { + long mtime = Files.getLastModifiedTime(node).toMillis(); + + return !Objects.equals(modifiedTimes.put(node, mtime), mtime); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexFileSystemMonitor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexFileSystemMonitor.java new file mode 100644 index 00000000..bbcc2b4b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexFileSystemMonitor.java @@ -0,0 +1,115 @@ +package nu.marginalia.wmsa.memex.system; + +import com.google.inject.Inject; +import com.google.inject.name.Named; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.*; +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; + +import static java.nio.file.StandardWatchEventKinds.*; + +public class MemexFileSystemMonitor { + private final WatchService watchService; + private final Set updatedUrls = new HashSet<>(); + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private final Map roots = new ConcurrentHashMap<>(); + private final Path memexRoot; + + @Inject + public MemexFileSystemMonitor(@Named("memex-root") Path monitorPath) throws IOException { + this.memexRoot = monitorPath; + this.watchService = FileSystems.getDefault().newWatchService(); + + registerWatcher(monitorPath); + + try (var files = Files.walk(monitorPath)) { + files.filter(Files::isDirectory).forEach(this::registerWatcher); + } + + var monitorThread = new Thread(this::monitorWatch, getClass().getSimpleName()); + monitorThread.setDaemon(true); + monitorThread.start(); + } + + private void registerWatcher(Path path) { + if (path.toString().contains(".git")) { + return; + } + + try { + logger.info("Watching " + path); + var key = path.register(watchService, + StandardWatchEventKinds.ENTRY_CREATE, + StandardWatchEventKinds.ENTRY_MODIFY, + StandardWatchEventKinds.ENTRY_DELETE); + roots.put(key, path); + + } catch (IOException e) { + logger.error("Failed to register directory watcher on " + path, e); + } + } + + public List getUpdatedUrls() { + synchronized (updatedUrls) { + if (updatedUrls.isEmpty()) { + return Collections.emptyList(); + } + var ret = new ArrayList<>(updatedUrls); + updatedUrls.clear(); + return ret; + } + } + + + @SneakyThrows + private void monitorWatch() { + for (;;) { + var key = watchService.take(); + + for (var evt : key.pollEvents()) { + var kind = evt.kind(); + + if (kind == OVERFLOW) { + var root = roots.get(key); + + try (var files = Files.list(root)) { + files.forEach(file -> + updatedUrls.add(MemexNodeUrl.ofRelativePath(memexRoot, file)) + ); + } + + continue; + } + + WatchEvent ev = (WatchEvent)evt; + Path root = roots.get(key); + Path filename = ev.context(); + Path absPath = root.resolve(filename); + + + if (kind == ENTRY_CREATE && Files.isDirectory(absPath)) { + registerWatcher(absPath); + } + + MemexNodeUrl url = MemexNodeUrl.ofRelativePath(memexRoot, absPath); + synchronized (updatedUrls) { + updatedUrls.add(url); + } + + } + + boolean valid = key.reset(); + if (!valid) { + logger.info("Deregistering key for " + roots.get(key)); + roots.remove(key); + } + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexFileWriter.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexFileWriter.java new file mode 100644 index 00000000..577da3d9 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexFileWriter.java @@ -0,0 +1,120 @@ +package nu.marginalia.wmsa.memex.system; + +import nu.marginalia.util.FileSizeUtil; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.PrintWriter; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.nio.file.attribute.PosixFilePermission; +import java.util.Set; + +public class MemexFileWriter { + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final Path renderedResourcesRoot; + + + private final Set filePermission = Set.of(PosixFilePermission.OWNER_READ, + PosixFilePermission.OWNER_WRITE, + PosixFilePermission.GROUP_READ, + PosixFilePermission.OTHERS_READ); + + private final Set dirPermission = Set.of(PosixFilePermission.OWNER_READ, + PosixFilePermission.OWNER_WRITE, + PosixFilePermission.OWNER_EXECUTE, + PosixFilePermission.GROUP_READ, + PosixFilePermission.GROUP_EXECUTE, + PosixFilePermission.OTHERS_READ, + PosixFilePermission.OTHERS_EXECUTE); + + public MemexFileWriter(Path renderedResourcesRoot) { + this.renderedResourcesRoot = renderedResourcesRoot; + } + + public boolean exists(MemexNodeUrl url) { + return Files.exists(getPath(url)); + } + + public void write(MemexNodeUrl url, String contents) throws IOException { + logger.info("write({},{})", url, FileSizeUtil.readableSize(contents.length())); + var destPath = getPath(url); + var tempFile = Files.createTempFile(renderedResourcesRoot, url.getFilename(), ".tmp"); + ensureDirectoryExists(destPath.getParent()); + + Files.createDirectories(destPath.getParent()); + Files.writeString(tempFile, contents); + Files.move(tempFile, destPath, StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); + setFilePermissions(destPath); + } + + private void ensureDirectoryExists(Path dir) throws IOException { + Files.createDirectories(dir); + setDirPermissions(dir); + } + + private void setDirPermissions(Path dir) throws IOException { + for (var rel = renderedResourcesRoot.relativize(dir); rel != null; rel = rel.getParent()) { + Files.setPosixFilePermissions(renderedResourcesRoot.resolve(rel), dirPermission); + } + } + + public void write(MemexNodeUrl url, byte[] contents) throws IOException { + logger.info("write({}, {})", url, FileSizeUtil.readableSize(contents.length)); + + var destPath = getPath(url); + var tempFile = Files.createTempFile(renderedResourcesRoot, url.getFilename(), ".tmp"); + ensureDirectoryExists(destPath.getParent()); + Files.write(tempFile, contents); + Files.move(tempFile, destPath, StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); + setFilePermissions(destPath); + } + + public void write(MemexNodeUrl url, Path realPath) throws IOException { + logger.info("copy({} from {})", url, realPath); + + var destPath = getPath(url); + var tempFile = Files.createTempFile(renderedResourcesRoot, url.getFilename(), ".tmp"); + ensureDirectoryExists(destPath.getParent()); + Files.copy(realPath, tempFile, StandardCopyOption.REPLACE_EXISTING); + Files.move(tempFile, destPath, StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); + setFilePermissions(destPath); + } + + public void write(MemexNodeUrl url, WriteOperation wo) throws IOException { + logger.info("write({}, streamed)", url); + + var destPath = getPath(url); + var tempFile = Files.createTempFile(renderedResourcesRoot, url.getFilename(), ".tmp"); + ensureDirectoryExists(destPath.getParent()); + + try (var os = new PrintWriter(Files.newOutputStream(tempFile))) { + wo.write(os); + } + + Files.move(tempFile, destPath, StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING); + setFilePermissions(destPath); + } + + + private void setFilePermissions(Path destPath) throws IOException { + Files.setPosixFilePermissions(destPath, filePermission); + } + + private Path getPath(MemexNodeUrl url) { + final Path path = Path.of(renderedResourcesRoot + url.toString()).normalize(); + + if (!path.startsWith(renderedResourcesRoot)) { + throw new IllegalStateException("URL " + url + " resulted in a path outside of root " + renderedResourcesRoot); + } + + return path; + } + + public interface WriteOperation { + void write(PrintWriter w) throws IOException; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexGitRepo.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexGitRepo.java new file mode 100644 index 00000000..05ca6603 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexGitRepo.java @@ -0,0 +1,135 @@ +package nu.marginalia.wmsa.memex.system; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import com.google.inject.name.Named; +import com.jcraft.jsch.JSch; +import com.jcraft.jsch.JSchException; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import org.eclipse.jgit.api.Git; +import org.eclipse.jgit.api.errors.GitAPIException; +import org.eclipse.jgit.lib.Repository; +import org.eclipse.jgit.storage.file.FileRepositoryBuilder; +import org.eclipse.jgit.transport.*; +import org.eclipse.jgit.util.FS; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Path; + +@Singleton +public class MemexGitRepo { + + private final Git git; + private final Logger logger = LoggerFactory.getLogger(MemexGitRepo.class); + + @Inject + public MemexGitRepo(@Named("memex-root") Path root) throws IOException { + + FileRepositoryBuilder repositoryBuilder = new FileRepositoryBuilder(); + + SshSessionFactory.setInstance(new JschConfigSessionFactory() { + @Override + protected JSch createDefaultJSch(FS fs) throws JSchException { + JSch defaultJSch = super.createDefaultJSch(fs); + defaultJSch.addIdentity("/var/lib/wmsa/.ssh/id_rsa"); + return defaultJSch; + } + }); + + Repository repository = repositoryBuilder.setGitDir(root.resolve(".git").toFile()) + .readEnvironment() + .findGitDir() + .setMustExist(true) + .build(); + + git = new Git(repository); + + pull(); + } + + public void pull() { + try { + git.pull().call(); + } + catch (GitAPIException ex) { + logger.error("Git operation failed", ex); + } + } + + public void remove(MemexNodeUrl url) { + try { + git.rm() + .addFilepattern(filePattern(url)) + .call(); + + commit("Removing " + url); + push(); + } + catch (GitAPIException ex) { + logger.error("Git operation failed", ex); + } + } + + public void add(MemexNodeUrl url) { + try { + git.add() + .addFilepattern(filePattern(url)) + .call(); + + commit("Adding " + url); + push(); + + + } + catch (GitAPIException ex) { + logger.error("Git operation failed", ex); + } + } + public void update(MemexNodeUrl url) { + try { + git.add() + .setUpdate(true) + .addFilepattern(filePattern(url)) + .call(); + + commit("Update " + url); + push(); + + + } + catch (GitAPIException ex) { + logger.error("Git operation failed", ex); + } + } + + + public void rename(MemexNodeUrl src, MemexNodeUrl dst) { + try { + git.rm().addFilepattern(filePattern(src)).call(); + git.add().addFilepattern(filePattern(dst)).call(); + commit("Renaming " + src + " into " + dst); + push(); + } + catch (GitAPIException ex) { + logger.error("Git operation failed", ex); + } + } + + private void push() throws GitAPIException { + git.push().call(); + } + + private void commit(String message) throws GitAPIException { + git.commit() + .setCommitter("marginalia", "system@marginalia.nu") + .setMessage("Changes from web gui: " + message) + .call(); + } + + private String filePattern(MemexNodeUrl url) { + return url.asRelativePath().toString().replaceAll("^/+", ""); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexSourceFileSystem.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexSourceFileSystem.java new file mode 100644 index 00000000..c72e2383 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexSourceFileSystem.java @@ -0,0 +1,83 @@ +package nu.marginalia.wmsa.memex.system; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import com.google.inject.name.Named; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.*; + +@Singleton +public class MemexSourceFileSystem { + + private final Path root; + private final MemexGitRepo gitRepo; + + private static final Logger logger = LoggerFactory.getLogger(MemexSourceFileSystem.class); + + @Inject + public MemexSourceFileSystem(@Named("memex-root") Path root, + MemexGitRepo gitRepo) { + this.root = root; + this.gitRepo = gitRepo; + } + + public void pullChanges() { + gitRepo.pull(); + } + + public void replaceFile(MemexNodeUrl url, String text) throws IOException { + var path = url.asAbsolutePath(root); + Files.writeString(path, text, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); + + gitRepo.update(url); + } + + public void createFile(MemexNodeUrl url, String text) throws IOException { + var path = url.asAbsolutePath(root); + logger.info("Writing {} ({}b)", path, text.length()); + + Files.writeString(path, text, StandardOpenOption.CREATE_NEW); + + gitRepo.add(url); + } + + public void createFile(MemexNodeUrl url, byte[] bytes) throws IOException { + var path = url.asAbsolutePath(root); + logger.info("Writing {} ({}b)", path, bytes.length); + + Files.write(path, bytes, StandardOpenOption.CREATE_NEW); + + gitRepo.add(url); + } + + public void delete(MemexNodeUrl url) throws IOException { + var path = url.asAbsolutePath(root); + + logger.info("Delete {}", path); + Files.delete(path); + + gitRepo.remove(url); + } + + public void renameFile(MemexNodeUrl src, MemexNodeUrl dst) throws IOException { + var srcPath = src.asAbsolutePath(root); + var dstPath = dst.asAbsolutePath(root); + + if (!Files.exists(srcPath) || Files.exists(dstPath)) { + throw new IOException("Could not rename " + src + " into " + dst); + } + + Files.move(srcPath, dstPath, StandardCopyOption.ATOMIC_MOVE); + gitRepo.rename(src, dst); + } + + public byte[] getRaw(MemexNodeUrl url) throws IOException { + logger.info("Getting raw file contents of {}", url); + + return Files.readAllBytes(url.asAbsolutePath(root)); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/PodcastFetcher.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/PodcastFetcher.java new file mode 100644 index 00000000..58fad4c9 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/PodcastFetcher.java @@ -0,0 +1,112 @@ +package nu.marginalia.wmsa.podcasts; + +import com.google.common.escape.Escaper; +import com.google.common.net.PercentEscaper; +import lombok.Getter; +import nu.marginalia.wmsa.podcasts.model.*; +import org.jetbrains.annotations.NotNull; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Element; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URL; +import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +@Getter +public class PodcastFetcher { + + private final List allEpisodes = new ArrayList<>(); + private final List allPodcasts = new ArrayList<>(); + private final Escaper urlEscaper = new PercentEscaper("", true); + + private final static Logger logger = LoggerFactory.getLogger(PodcastFetcher.class); + + private final DateTimeFormatter readableIsoDate = + (new DateTimeFormatterBuilder()).parseCaseInsensitive().append(DateTimeFormatter.ISO_LOCAL_DATE).appendLiteral(' ').append( + DateTimeFormatter.ISO_LOCAL_TIME).toFormatter(); + + public Optional fetchPodcast(String name, String url) { + try { + logger.info("Fetching podcast {} : {}", name, url); + + var doc = Jsoup.parse(new URL(url), 10_000); + + String title = doc.selectFirst("channel > title").text(); + String description = doc.selectFirst("channel > description").text(); + String link = doc.selectFirst("channel > link").text(); + + var podcast = new Podcast(new PodcastMetadata(title, description, name, link)); + doc.getElementsByTag("item").forEach(item -> { + try { + PodcastEpisode episode = fetchEpisode(name, title, item); + podcast.episodes.add(episode); + allEpisodes.add(episode); + } + catch (Exception ex) { + logger.error("Failed to fetch podcast episode", ex); + } + }); + + allPodcasts.add(podcast); + return Optional.of(podcast); + } catch (IOException e) { + logger.error("Failed to fetch podcast", e); + return Optional.empty(); + } + + } + + @NotNull + private PodcastEpisode fetchEpisode(String name, String title, org.jsoup.nodes.Element item) { + String epTitle = item.getElementsByTag("title").text(); + String epGuid = name+":"+escapeUrlString(item.getElementsByTag("guid").text()); + String epDescription = item.getElementsByTag("description").text(); + String epPubDate = getPubDate(item); + String epUrl = item.getElementsByTag("enclosure").attr("url"); + + return new PodcastEpisode(name, title, epGuid, epTitle, epDescription, epPubDate, epUrl); + } + + @NotNull + private String getPubDate(Element item) { + try { + return ZonedDateTime.parse(item.getElementsByTag("pubDate").text(), + DateTimeFormatter.RFC_1123_DATE_TIME) + .format(readableIsoDate); + } + catch (Exception ex) { + logger.error("Failed to parse date", ex); + return item.getElementsByTag("pubDate").text(); + } + } + + private String escapeUrlString(String s) { + return urlEscaper.escape(s).replace("%", "_"); + } + + public PodcastNewEpisodes getNewEpisodes() { + return new PodcastNewEpisodes(allEpisodes + .stream() + .sorted(Comparator.comparing(PodcastEpisode::getDateUploaded).reversed()).limit(10) + .collect(Collectors.toList())); + } + + public PodcastListing getListing() { + final var metadatas = allPodcasts.stream() + .map(Podcast::getMetadata) + .sorted(Comparator.comparing(PodcastMetadata::getTitle)) + .collect(Collectors.toList()); + + return + new PodcastListing(metadatas); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/PodcastScraperMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/PodcastScraperMain.java new file mode 100644 index 00000000..f3850679 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/PodcastScraperMain.java @@ -0,0 +1,31 @@ +package nu.marginalia.wmsa.podcasts; + +import com.google.inject.Guice; +import com.google.inject.Inject; +import com.google.inject.Injector; +import nu.marginalia.wmsa.configuration.MainClass; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.module.ConfigurationModule; +import nu.marginalia.wmsa.configuration.server.Initialization; + +import java.io.IOException; + +public class PodcastScraperMain extends MainClass { + + private final PodcastScraperService service; + + @Inject + public PodcastScraperMain(PodcastScraperService service) throws IOException { + this.service = service; + } + + public static void main(String... args) { + init(ServiceDescriptor.PODCST_SCRAPER, args); + + Injector injector = Guice.createInjector( + new ConfigurationModule()); + injector.getInstance(PodcastScraperMain.class); + injector.getInstance(Initialization.class).setReady(); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/PodcastScraperService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/PodcastScraperService.java new file mode 100644 index 00000000..a36ec3ce --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/PodcastScraperService.java @@ -0,0 +1,78 @@ +package nu.marginalia.wmsa.podcasts; + +import com.google.inject.Inject; +import com.google.inject.name.Named; +import io.reactivex.rxjava3.schedulers.Schedulers; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.configuration.server.MetricsServer; +import nu.marginalia.wmsa.configuration.server.Service; +import nu.marginalia.wmsa.podcasts.model.Podcast; +import nu.marginalia.wmsa.podcasts.model.PodcastEpisode; +import nu.marginalia.wmsa.renderer.client.RendererClient; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Spark; + +import java.util.Map; +import java.util.concurrent.TimeUnit; + +public class PodcastScraperService extends Service { + + private final Map podcastUrls = Map.of( + "SBS", "https://feeds.simplecast.com/Fxu1mrhe", + "hopwag", "https://feed.podbean.com/hopwag/feed.xml", + "philosophizethis", "https://philosophizethis.libsyn.com/rss", + "PEL", "https://partiallyexaminedlife.libsyn.com/rss", + "IOT", "https://podcasts.files.bbci.co.uk/b006qykl.rss", + "SaturaLanx", "https://anchor.fm/s/2c536214/podcast/rss", + "ControversiesInChurchHistory", "https://anchor.fm/s/9b43760/podcast/rss", + "readmeapoem", "https://rss.acast.com/readmeapoem", + "HoL", "https://feeds.megaphone.fm/history-of-literature", + "Revolutions", "https://revolutionspodcast.libsyn.com/rss" + ); + + private final RendererClient rendererClient; + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final Initialization initialization; + + @Inject + public PodcastScraperService(@Named("service-host") String ip, + @Named("service-port") Integer port, + RendererClient rendererClient, + Initialization initialization, + MetricsServer metricsServer) { + super(ip, port, initialization, metricsServer); + this.rendererClient = rendererClient; + this.initialization = initialization; + + Spark.awaitInitialization(); + + Schedulers.io().schedulePeriodicallyDirect(this::fetchPods, 0, 1, TimeUnit.HOURS); + } + + private void fetchPods() { + try { + PodcastFetcher fetcher = new PodcastFetcher(); + + podcastUrls.forEach(fetcher::fetchPodcast); + + rendererClient.render(Context.internal("podcast"), fetcher.getNewEpisodes()).blockingSubscribe(); + rendererClient.render(Context.internal("podcast"), fetcher.getListing()).blockingSubscribe(); + + for (Podcast podcast : fetcher.getAllPodcasts()) { + rendererClient.render(Context.internal("podcast"), podcast).blockingSubscribe(); + } + for (PodcastEpisode episode : fetcher.getAllEpisodes()) { + rendererClient.render(Context.internal("podcast"), episode).blockingSubscribe(); + } + } + catch (RuntimeException ex) { + logger.error("Uncaught exception", ex); + } + } + + public void start() { + logger.info("Started"); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/model/Podcast.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/model/Podcast.java new file mode 100644 index 00000000..7c3ae0ae --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/model/Podcast.java @@ -0,0 +1,16 @@ +package nu.marginalia.wmsa.podcasts.model; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.Setter; +import lombok.ToString; + +import java.util.ArrayList; +import java.util.List; + +@AllArgsConstructor @Getter @Setter @ToString +public class Podcast { + public final PodcastMetadata metadata; + + public final List episodes = new ArrayList<>(); +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/model/PodcastEpisode.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/model/PodcastEpisode.java new file mode 100644 index 00000000..255de6a0 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/model/PodcastEpisode.java @@ -0,0 +1,16 @@ +package nu.marginalia.wmsa.podcasts.model; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; + +@AllArgsConstructor @Getter @ToString +public class PodcastEpisode { + public final String podcastId; + public final String podcastName; + public final String guid; + public final String title; + public final String description; + public final String dateUploaded; + public final String mp3url; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/model/PodcastListing.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/model/PodcastListing.java new file mode 100644 index 00000000..3a3e917a --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/model/PodcastListing.java @@ -0,0 +1,16 @@ +package nu.marginalia.wmsa.podcasts.model; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.Setter; +import lombok.ToString; + +import java.util.List; + +@AllArgsConstructor +@Getter +@Setter +@ToString +public class PodcastListing { + public final List podcasts; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/model/PodcastMetadata.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/model/PodcastMetadata.java new file mode 100644 index 00000000..8f6aad6c --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/model/PodcastMetadata.java @@ -0,0 +1,17 @@ +package nu.marginalia.wmsa.podcasts.model; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.Setter; +import lombok.ToString; + +@AllArgsConstructor +@Getter +@Setter +@ToString +public class PodcastMetadata { + public final String title; + public final String description; + public final String id; + public final String extLink; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/model/PodcastNewEpisodes.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/model/PodcastNewEpisodes.java new file mode 100644 index 00000000..c7562e60 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/podcasts/model/PodcastNewEpisodes.java @@ -0,0 +1,11 @@ +package nu.marginalia.wmsa.podcasts.model; + +import lombok.AllArgsConstructor; +import lombok.Getter; + +import java.util.List; + +@AllArgsConstructor @Getter +public class PodcastNewEpisodes { + public final List episodes; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/PodcastRendererService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/PodcastRendererService.java new file mode 100644 index 00000000..b22386a1 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/PodcastRendererService.java @@ -0,0 +1,130 @@ +package nu.marginalia.wmsa.renderer; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.inject.Inject; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.podcasts.model.Podcast; +import nu.marginalia.wmsa.podcasts.model.PodcastEpisode; +import nu.marginalia.wmsa.podcasts.model.PodcastListing; +import nu.marginalia.wmsa.podcasts.model.PodcastNewEpisodes; +import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer; +import nu.marginalia.wmsa.renderer.mustache.RendererFactory; +import nu.marginalia.wmsa.resource_store.ResourceStoreClient; +import nu.marginalia.wmsa.resource_store.model.RenderedResource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Request; +import spark.Response; +import spark.Spark; + +import java.io.IOException; +import java.time.LocalDateTime; +import java.time.temporal.ChronoUnit; +import java.util.concurrent.TimeUnit; + +public class PodcastRendererService { + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final Gson gson = new GsonBuilder().create(); + + private final RendererFactory rendererFactory = new RendererFactory(); + + private final MustacheRenderer newsRenderer; + private final MustacheRenderer episodeRenderer; + private final MustacheRenderer listingRenderer; + private final MustacheRenderer podcastRenderer; + + private ResourceStoreClient resourceStoreClient; + + + @Inject @SneakyThrows + public PodcastRendererService(ResourceStoreClient resourceStoreClient) { + this.resourceStoreClient = resourceStoreClient; + newsRenderer = rendererFactory.renderer( "podcast/new"); + episodeRenderer = rendererFactory.renderer( "podcast/episode"); + listingRenderer = rendererFactory.renderer( "podcast/listing"); + podcastRenderer = rendererFactory.renderer( "podcast/podcast"); + } + + public void start() { + Spark.post("/render/podcast", this::renderPodcast); + Spark.post("/render/podcast/episode", this::renderPodcastEpisode); + Spark.post("/render/podcast/new", this::renderPodcastNew); + Spark.post("/render/podcast/listing", this::renderPodcastListing); + } + + private Object renderPodcastListing(Request request, Response response) throws IOException { + var requestText = request.body(); + var req = gson.fromJson(requestText, PodcastListing.class); + + logger.info("renderPodcastListing()"); + + var resource = new RenderedResource("list.html", + getRetentionTime(), + listingRenderer.render(req)); + + storeResource(request, resource); + + return ""; + } + + + private Object renderPodcast(Request request, Response response) throws IOException { + var requestText = request.body(); + var req = gson.fromJson(requestText, Podcast.class); + + logger.info("renderPodcast({})", req.metadata.id); + + var resource = new RenderedResource(req.metadata.id+".html", + getRetentionTime(), + podcastRenderer.render(req)); + + storeResource(request, resource); + + return ""; + } + + private Object renderPodcastEpisode(Request request, Response response) throws IOException { + var requestText = request.body(); + var req = gson.fromJson(requestText, PodcastEpisode.class); + Context.fromRequest(request); + + logger.info("renderPodcastEpisode({}/{})", req.podcastName, req.guid); + var resource = new RenderedResource(req.guid+".html", + getRetentionTime(), + episodeRenderer.render(req)); + + storeResource(request, resource); + + return ""; + } + + private Object renderPodcastNew(Request request, Response response) throws IOException { + var requestText = request.body(); + var req = gson.fromJson(requestText, PodcastNewEpisodes.class); + + logger.info("renderPodcastNew()"); + + var resource = new RenderedResource("new.html", + getRetentionTime(), + newsRenderer.render(req)); + + storeResource(request, resource); + + return ""; + } + + + private LocalDateTime getRetentionTime() { + return LocalDateTime.now().plus(24, ChronoUnit.HOURS); + } + + private void storeResource(Request request, RenderedResource resource) { + resourceStoreClient.putResource(Context.fromRequest(request), "podcast", resource) + .timeout(10, TimeUnit.SECONDS) + .blockingSubscribe(); + } + + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/RendererMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/RendererMain.java new file mode 100644 index 00000000..3425416b --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/RendererMain.java @@ -0,0 +1,31 @@ +package nu.marginalia.wmsa.renderer; + +import com.google.inject.Guice; +import com.google.inject.Inject; +import com.google.inject.Injector; +import nu.marginalia.wmsa.configuration.MainClass; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.module.ConfigurationModule; +import nu.marginalia.wmsa.configuration.server.Initialization; + +import java.io.IOException; + +public class RendererMain extends MainClass { + private RendererService service; + + @Inject + public RendererMain(RendererService service + ) throws IOException { + this.service = service; + } + + public static void main(String... args) { + init(ServiceDescriptor.RENDERER, args); + + Injector injector = Guice.createInjector( + new RendererModule(), + new ConfigurationModule()); + injector.getInstance(RendererMain.class); + injector.getInstance(Initialization.class).setReady(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/RendererModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/RendererModule.java new file mode 100644 index 00000000..99422709 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/RendererModule.java @@ -0,0 +1,8 @@ +package nu.marginalia.wmsa.renderer; + +import com.google.inject.AbstractModule; + +public class RendererModule extends AbstractModule { + public void configure() { + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/RendererService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/RendererService.java new file mode 100644 index 00000000..b1606b6d --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/RendererService.java @@ -0,0 +1,46 @@ +package nu.marginalia.wmsa.renderer; + + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.inject.Inject; +import com.google.inject.name.Named; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.configuration.server.MetricsServer; +import nu.marginalia.wmsa.configuration.server.Service; +import nu.marginalia.wmsa.resource_store.ResourceStoreClient; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class RendererService extends Service { + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final Gson gson = new GsonBuilder().create(); + + private final ResourceStoreClient resourceStoreClient; + + + @Inject + public RendererService(ResourceStoreClient resourceStoreClient, + @Named("service-host") String ip, + @Named("service-port") Integer port, + SmhiRendererService smhiRendererService, + PodcastRendererService podcastRendererService, + StatusRendererService statusRendererService, + Initialization initialization, + MetricsServer metricsServer + ) { + super(ip, port, initialization, metricsServer); + + this.resourceStoreClient = resourceStoreClient; + + smhiRendererService.start(); + podcastRendererService.start(); + statusRendererService.start(); + } + + public boolean isReady() { + return resourceStoreClient.isAccepting(); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/ServerStatusModel.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/ServerStatusModel.java new file mode 100644 index 00000000..fea3ee41 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/ServerStatusModel.java @@ -0,0 +1,10 @@ +package nu.marginalia.wmsa.renderer; + +import lombok.AllArgsConstructor; +import lombok.Getter; + +@AllArgsConstructor @Getter +public class ServerStatusModel { + public final String server; + public final String status; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/SmhiRendererService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/SmhiRendererService.java new file mode 100644 index 00000000..ba270308 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/SmhiRendererService.java @@ -0,0 +1,83 @@ +package nu.marginalia.wmsa.renderer; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.inject.Inject; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer; +import nu.marginalia.wmsa.renderer.mustache.RendererFactory; +import nu.marginalia.wmsa.renderer.request.smhi.RenderSmhiIndexReq; +import nu.marginalia.wmsa.renderer.request.smhi.RenderSmhiPrognosReq; +import nu.marginalia.wmsa.resource_store.ResourceStoreClient; +import nu.marginalia.wmsa.resource_store.model.RenderedResource; +import nu.marginalia.wmsa.smhi.model.PrognosData; +import nu.marginalia.wmsa.smhi.model.index.IndexPlatser; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Request; +import spark.Response; +import spark.Spark; + +import java.io.IOException; +import java.time.LocalDateTime; +import java.util.concurrent.TimeUnit; + +public class SmhiRendererService { + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final Gson gson = new GsonBuilder().create(); + + private final RendererFactory rendererFactory = new RendererFactory(); + + private final MustacheRenderer indexRenderer; + private final MustacheRenderer prognosRenderer; + + private ResourceStoreClient resourceStoreClient; + + + @Inject @SneakyThrows + public SmhiRendererService(ResourceStoreClient resourceStoreClient) { + this.resourceStoreClient = resourceStoreClient; + indexRenderer = rendererFactory.renderer( "smhi/index"); + prognosRenderer = rendererFactory.renderer( "smhi/prognos"); + } + + public void start() { + Spark.post("/render/smhi/index", this::renderSmhiIndex); + Spark.post("/render/smhi/prognos", this::renderSmhiPrognos); + } + + + private Object renderSmhiIndex(Request request, Response response) throws IOException { + var requestText = request.body(); + var req = gson.fromJson(requestText, RenderSmhiIndexReq.class); + + logger.info("renderSmhiIndex()"); + var resource = new RenderedResource("index.html", + LocalDateTime.MAX, + indexRenderer.render(new IndexPlatser(req.platser))); + + resourceStoreClient.putResource(Context.fromRequest(request), "smhi", resource) + .timeout(10, TimeUnit.SECONDS) + .blockingSubscribe(); + + return ""; + } + + private Object renderSmhiPrognos(Request request, Response response) throws IOException { + var requestText = request.body(); + var req = gson.fromJson(requestText, RenderSmhiPrognosReq.class); + + logger.info("renderSmhiPrognos({})", req.data.plats.namn); + var resource = new RenderedResource(req.data.plats.getUrl(), + LocalDateTime.now().plusHours(3), + prognosRenderer.render(req.data)); + + resourceStoreClient.putResource(Context.fromRequest(request), "smhi", resource) + .timeout(10, TimeUnit.SECONDS) + .blockingSubscribe(); + + return ""; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/StatusRendererService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/StatusRendererService.java new file mode 100644 index 00000000..1b6d5592 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/StatusRendererService.java @@ -0,0 +1,82 @@ +package nu.marginalia.wmsa.renderer; + +import com.google.inject.Inject; +import io.reactivex.rxjava3.schedulers.Schedulers; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer; +import nu.marginalia.wmsa.renderer.mustache.RendererFactory; +import nu.marginalia.wmsa.resource_store.ResourceStoreClient; +import nu.marginalia.wmsa.resource_store.model.RenderedResource; +import okhttp3.OkHttpClient; +import okhttp3.Request; + +import java.io.IOException; +import java.time.LocalDateTime; +import java.time.temporal.ChronoUnit; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +public class StatusRendererService { + private final MustacheRenderer statusRenderer; + private ResourceStoreClient resourceStoreClient; + + private final OkHttpClient client; + + private final RendererFactory rendererFactory = new RendererFactory(); + + @Inject + @SneakyThrows + public StatusRendererService(ResourceStoreClient resourceStoreClient) { + this.resourceStoreClient = resourceStoreClient; + + client = new OkHttpClient.Builder() + .connectTimeout(50, TimeUnit.MILLISECONDS) + .readTimeout(1, TimeUnit.SECONDS) + .retryOnConnectionFailure(false) + .followRedirects(false) + .build(); + statusRenderer = rendererFactory.renderer( "status/server-status"); + } + + public void start() { + Schedulers.io().schedulePeriodicallyDirect(this::renderStatusPage, 1, 60, TimeUnit.SECONDS); + } + public void renderStatusPage() { + try { + var status = getStatus(); + var page = statusRenderer.render(Map.of("status", status)); + resourceStoreClient + .putResource(Context.internal(), "status", + new RenderedResource("index.html", LocalDateTime.now().plus(2, ChronoUnit.MINUTES), page)) + .blockingSubscribe(); + } catch (Exception e) { + e.printStackTrace(); + } + } + + private List getStatus() { + List status = new ArrayList<>(ServiceDescriptor.values().length); + + for (ServiceDescriptor sd : ServiceDescriptor.values()) { + if (sd.port == 0) { + continue; + } + try { + var req = new Request.Builder().url("http://127.0.0.1:" + sd.port + "/internal/ping").get().build(); + var call = client.newCall(req); + + call.execute().close(); + status.add(new ServerStatusModel(sd.name, "UP")); + + } catch (Exception e) { + status.add(new ServerStatusModel(sd.name, "DOWN")); + } + } + return status; + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/client/RendererClient.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/client/RendererClient.java new file mode 100644 index 00000000..e398f8b7 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/client/RendererClient.java @@ -0,0 +1,65 @@ +package nu.marginalia.wmsa.renderer.client; + +import io.reactivex.rxjava3.core.Observable; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.client.AbstractDynamicClient; +import nu.marginalia.wmsa.client.HttpStatusCode; +import nu.marginalia.wmsa.client.exception.TimeoutException; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.podcasts.model.Podcast; +import nu.marginalia.wmsa.podcasts.model.PodcastEpisode; +import nu.marginalia.wmsa.podcasts.model.PodcastListing; +import nu.marginalia.wmsa.podcasts.model.PodcastNewEpisodes; +import nu.marginalia.wmsa.renderer.request.smhi.RenderSmhiIndexReq; +import nu.marginalia.wmsa.renderer.request.smhi.RenderSmhiPrognosReq; + +import javax.inject.Inject; +import java.util.concurrent.TimeUnit; + + +public class RendererClient extends AbstractDynamicClient{ + @Inject + public RendererClient() { + super(ServiceDescriptor.RENDERER); + } + + @SneakyThrows + public Observable render(Context ctx, RenderSmhiPrognosReq req) { + return post(ctx, "/render/smhi/prognos", req) + .timeout(5, TimeUnit.SECONDS, Observable.error(new TimeoutException("RendererClient.renderSmhiPrognos()"))); + } + + + @SneakyThrows + public Observable render(Context ctx, RenderSmhiIndexReq req) { + return post(ctx, "/render/smhi/index", req) + .timeout(5, TimeUnit.SECONDS, Observable.error(new TimeoutException("RendererClient.renderSmhiIndex()"))); + } + + @SneakyThrows + public Observable render(Context ctx, PodcastNewEpisodes req) { + return post(ctx, "/render/podcast/new", req) + .timeout(5, TimeUnit.SECONDS, Observable.error(new TimeoutException("RendererClient.renderPodcastNew()"))); + } + + + @SneakyThrows + public Observable render(Context ctx, PodcastEpisode req) { + return post(ctx, "/render/podcast/episode", req) + .timeout(5, TimeUnit.SECONDS, Observable.error(new TimeoutException("RendererClient.renderPodcastEpisode()"))); + } + + @SneakyThrows + public Observable render(Context ctx, PodcastListing req) { + return post(ctx, "/render/podcast/listing", req) + .timeout(5, TimeUnit.SECONDS, Observable.error(new TimeoutException("RendererClient.renderPodcastListing()"))); + } + + + @SneakyThrows + public Observable render(Context ctx, Podcast req) { + return post(ctx, "/render/podcast", req) + .timeout(5, TimeUnit.SECONDS, Observable.error(new TimeoutException("RendererClient.renderPodcastEpisode()"))); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/mustache/MustacheRenderer.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/mustache/MustacheRenderer.java new file mode 100644 index 00000000..ae3eadb0 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/mustache/MustacheRenderer.java @@ -0,0 +1,166 @@ +package nu.marginalia.wmsa.renderer.mustache; + +import com.github.jknack.handlebars.*; +import com.github.jknack.handlebars.helper.ConditionalHelpers; +import com.github.jknack.handlebars.io.ClassPathTemplateLoader; +import com.github.jknack.handlebars.io.TemplateLoader; +import lombok.SneakyThrows; +import nu.marginalia.gemini.gmi.GemtextDocument; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import nu.marginalia.wmsa.memex.model.render.MemexRendererIndexModel; +import nu.marginalia.wmsa.memex.model.render.MemexRendererViewModel; +import org.apache.logging.log4j.util.Strings; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +public class MustacheRenderer { + Template template; + private final Logger logger = LoggerFactory.getLogger(getClass()); + + MustacheRenderer(String templateFile) throws IOException { + + TemplateLoader loader = new ClassPathTemplateLoader(); + loader.setPrefix("/templates"); + loader.setSuffix(".hdb"); + + var handlebars = new Handlebars(loader); + handlebars.registerHelpers(ConditionalHelpers.class); + handlebars.registerHelpers(new GeminiHelpers()); + handlebars.registerHelper("md", new MarkdownHelper()); + handlebars.registerHelper("gen-url", this::genereraUrl); + handlebars.registerHelper("gen-thread-url", this::genereraTradUrl); + handlebars.registerHelper("gen-author-url", this::generereraAuthorUrl); + + + + try { + template = handlebars.compile(templateFile); + } + catch (FileNotFoundException ex) { + logger.error("Kunde inte ladda template " + templateFile, ex); + System.exit(2); + } + catch (HandlebarsException ex) { + logger.error("Kunde inte instantiera mall " + templateFile, ex); + System.exit(2); + } + } + + public static final class GeminiHelpers { + + public CharSequence pragma(Options options) throws IOException { + var model = options.context.model(); + GemtextDocument doc; + if (model instanceof MemexRendererIndexModel) { + doc = ((MemexRendererIndexModel) model).getDocument("index.gmi"); + } + else if (model instanceof MemexRendererViewModel) { + doc = ((MemexRendererViewModel)model).baseDoc; + } + else { + doc = null; + } + + if (doc != null && doc.getPragmas().contains((String) options.param(0))) { + return options.fn(options.context); + } + return null; + } + public CharSequence amgarp(Options options) throws IOException { + var model = options.context.model(); + GemtextDocument doc; + if (model instanceof MemexRendererIndexModel) { + doc = ((MemexRendererIndexModel) model).getDocument("index.gmi"); + } + else if (model instanceof MemexRendererViewModel) { + doc = ((MemexRendererViewModel)model).baseDoc; + } + else { + doc = null; + } + + if (doc == null || !doc.getPragmas().contains((String) options.param(0))) { + return options.fn(options.context); + } + return null; + } + + public CharSequence topbar(MemexNodeUrl url, Options options) throws IOException { + var path = url.asRelativePath(); + LinkedList nodes = new LinkedList<>(); + + for (Path p = path; p != null; p = p.getParent()) { + nodes.addFirst(p); + } + StringBuilder sb = new StringBuilder(); + for (var p : nodes) { + String name = p.toFile().getName(); + String type = "dir"; + if ("".equals(name)) { + name = "marginalia"; + type = "root"; + } + if (p.equals(path) && name.contains(".")) { + type = "file"; + } + Context newCtx = Context.newBlockParamContext(options.context, + List.of("url", "name", "type"), + List.of(p, name, type) + ); + sb.append(options.fn(newCtx)); + }; + return sb.toString(); + } + } + + @SneakyThrows + public String render(T model) { + return template.apply(model); + } + + @SneakyThrows + public String render(T model, String name, List children) { + Context ctx = Context.newBuilder(model).combine(name, children).build(); + + return template.apply(ctx); + } + + @SneakyThrows + public String render(T model, Map children) { + Context ctx = Context.newBuilder(model).combine(children).build(); + return template.apply(ctx); + } + + private Object genereraUrl(Object context, Options options) { + if (null != context) { + return context.toString().toLowerCase() + ".html"; + } else { + logger.error("Kunde inte generera URL, blockParams {}", options.blockParams); + return ""; + } + } + private Object genereraTradUrl(Object context, Options options) { + if (null != context) { + return context.toString().toLowerCase() + "/view.html"; + } else { + logger.error("Kunde inte generera URL, blockParams {}", options.blockParams); + return ""; + } + } + private Object generereraAuthorUrl(Object context, Options options) { + if (null != context) { + return "u_" + context.toString().toLowerCase() + ".html"; + } else { + logger.error("Kunde inte generera URL, blockParams {}", options.blockParams); + return ""; + } + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/mustache/RendererFactory.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/mustache/RendererFactory.java new file mode 100644 index 00000000..0fe81abf --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/mustache/RendererFactory.java @@ -0,0 +1,13 @@ +package nu.marginalia.wmsa.renderer.mustache; + +import java.io.IOException; + +public class RendererFactory { + + public RendererFactory() { + } + + public MustacheRenderer renderer(String template) throws IOException { + return new MustacheRenderer<>(template); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/request/smhi/RenderSmhiIndexReq.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/request/smhi/RenderSmhiIndexReq.java new file mode 100644 index 00000000..d585d56f --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/request/smhi/RenderSmhiIndexReq.java @@ -0,0 +1,13 @@ +package nu.marginalia.wmsa.renderer.request.smhi; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; +import nu.marginalia.wmsa.smhi.model.Plats; + +import java.util.List; + +@NoArgsConstructor @AllArgsConstructor @Getter +public class RenderSmhiIndexReq { + public List platser; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/request/smhi/RenderSmhiPrognosReq.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/request/smhi/RenderSmhiPrognosReq.java new file mode 100644 index 00000000..ba1746db --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/renderer/request/smhi/RenderSmhiPrognosReq.java @@ -0,0 +1,11 @@ +package nu.marginalia.wmsa.renderer.request.smhi; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NoArgsConstructor; +import nu.marginalia.wmsa.smhi.model.PrognosData; + +@NoArgsConstructor @AllArgsConstructor @Getter +public class RenderSmhiPrognosReq { + public PrognosData data; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceEntityStore.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceEntityStore.java new file mode 100644 index 00000000..024cec46 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceEntityStore.java @@ -0,0 +1,248 @@ +package nu.marginalia.wmsa.resource_store; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.inject.name.Named; +import io.prometheus.client.Counter; +import io.reactivex.rxjava3.schedulers.Schedulers; +import nu.marginalia.wmsa.resource_store.model.RenderedResource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; +import javax.inject.Inject; +import java.io.File; +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Base64; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.stream.Collectors; + + +public class ResourceEntityStore { + private final Map resources = new HashMap<>(); + private final ReadWriteLock lock = new ReentrantReadWriteLock(); + + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final Path dataPath; + private final Gson gson = new GsonBuilder().create(); + private final Base64.Encoder b64encoder = Base64.getEncoder(); + + private final static Counter wmsa_resource_store_count + = Counter.build("wmsa_resource_store_count", "number of items in the resource store") + .register(); + private final static Counter wmsa_resource_store_eviction_count + = Counter.build("wmsa_resource_store_eviction_count", "evicted items") + .register(); + + @Inject + public ResourceEntityStore(@Named("data-path") Path dataPath) { + this.dataPath = dataPath; + + Schedulers.io().scheduleDirect(() -> loadResourcesFromDisk(dataPath)); + Schedulers.io().schedulePeriodicallyDirect(() -> purgeFileSystem(dataPath), 1, 1, TimeUnit.HOURS); + } + + public ResourceEntityStore(@Named("data-path") Path dataPath, boolean immediate) { + this.dataPath = dataPath; + + loadResourcesFromDisk(dataPath); + } + + public ResourceEntityStore() { + this.dataPath = null; + } + + public RenderedResource getResource(String domain, String resource) { + Lock readLock = lock.readLock(); + try { + readLock.lock(); + return resources.get(getKey(domain, resource)); + } + finally { + readLock.unlock(); + } + } + + public void putResource(String domain, String resource, RenderedResource data) { + RenderedResource oldResource = loadResource(domain, resource, data); + + wmsa_resource_store_count.inc(); + if (dataPath != null) { + Path domainPath = dataPath.resolve(domain); + if (!domainPath.toFile().isDirectory()) { + domainPath.toFile().mkdir(); + } + + if (oldResource != null) { + try { + Path oldResourcePath = domainPath.resolve(oldResource.diskFileName()); + oldResourcePath.toFile().delete(); + } + catch (Exception ex) { + logger.error("Failed to remove old resource {}/{}", domain, oldResource.diskFileName()); + } + } + + Path resourcePath = domainPath.resolve(data.diskFileName()); + try { + Files.writeString(resourcePath, gson.toJson(data)); + } catch (IOException e) { + logger.error("Failed to write resource {}/{}", domain, resource); + logger.error("Exception", e); + } + + + } + } + + @Nullable + private RenderedResource loadResource(String domain, String resource, RenderedResource data) { + Lock writeLock = lock.writeLock(); + RenderedResource oldResource; + try { + writeLock.lock(); + oldResource = resources.put(getKey(domain, resource), data); + } + finally { + writeLock.unlock(); + } + return oldResource; + } + + private String getKey(String domain, String resource) { + return domain + "/" + resource; + } + + + public void reapStaleResources() { + Lock writeLock = lock.writeLock(); + try { + writeLock.lock(); + List expiredResources = resources.entrySet().stream() + .filter(entry -> entry.getValue().isExpired()) + .map(Map.Entry::getKey) + .collect(Collectors.toList()); + + for (String resource : expiredResources) { + logger.info("Reaping expired resource \"{}\"", resource); + var res = resources.remove(resource); + wmsa_resource_store_eviction_count.inc(); + + if (dataPath != null) { + File resourceFile = dataPath.resolve(res.diskFileName()).toFile(); + if (resourceFile.exists()) { + resourceFile.delete(); + } + } + } + } + finally { + writeLock.unlock(); + } + } + + public int numResources() { + Lock readLock = lock.readLock(); + try { + readLock.lock(); + return resources.size(); + } + finally { + readLock.unlock(); + } + } + + public long resourceSize() { + Lock readLock = lock.readLock(); + try { + readLock.lock(); + return resources.values().stream().mapToLong(RenderedResource::size).sum(); + } + finally { + readLock.unlock(); + } + } + + public void loadResourcesFromDisk(Path dataPath) { + File dataDir = dataPath.toFile(); + + for (var dir : dataDir.listFiles()) { + if (!dir.isDirectory()) { + logger.warn("Junk file {} in data directory", dir); + } + else { + for (var file : dir.listFiles()) { + try { + loadFromFile(dir.getName(), file); + } + catch (Exception ex) { + logger.error("Failed to load file {}", file); + logger.error("Failed to load resource from disk", ex); + } + } + } + } + } + + public void purgeFileSystem(Path dataPath) { + File dataDir = dataPath.toFile(); + + for (var dir : dataDir.listFiles()) { + if (!dir.isDirectory()) { + logger.warn("Junk file {} in data directory", dir); + } + else { + for (var file : dir.listFiles()) { + try { + purgeFile(file); + } + catch (Exception ex) { + logger.error("Failed to purge resource from disk", ex); + } + } + } + } + } + + private void purgeFile(File file) throws IOException { + String json = Files.readString(file.toPath(), Charset.defaultCharset()); + var resource = gson.fromJson(json, RenderedResource.class); + + if (resource.isExpired()) { + logger.info("Deleting expired resource {}", file); + + file.delete(); + } + + } + + + private void loadFromFile(String domain, File file) { + try { + String json = Files.readString(file.toPath(), Charset.defaultCharset()); + var resource = gson.fromJson(json, RenderedResource.class); + + if (resource.isExpired() || resources.containsKey(getKey(domain, resource.getFilename()))) { + logger.info("Deleting expired resource {}", file); + + file.delete(); + } + else { + logger.info("Re-loading resource {}", file); + loadResource(domain, resource.getFilename(), resource); + } + } catch (IOException e) { + logger.error("Could not read file {}", file.toString()); + } + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceStoreClient.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceStoreClient.java new file mode 100644 index 00000000..b057d450 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceStoreClient.java @@ -0,0 +1,46 @@ +package nu.marginalia.wmsa.resource_store; + +import io.reactivex.rxjava3.core.Observable; +import io.reactivex.rxjava3.schedulers.Schedulers; +import nu.marginalia.wmsa.client.AbstractDynamicClient; +import nu.marginalia.wmsa.client.HttpStatusCode; +import nu.marginalia.wmsa.client.exception.TimeoutException; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.resource_store.model.RenderedResource; + +import javax.inject.Inject; +import javax.inject.Singleton; +import java.time.LocalDateTime; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; + +@Singleton +public class ResourceStoreClient extends AbstractDynamicClient{ + + @Inject + public ResourceStoreClient() { + super(ServiceDescriptor.RESOURCE_STORE); + } + + public Observable getResource(Context ctx, String domain, String resource) { + return get(ctx, "/"+domain+"/"+resource) + .timeout(5, TimeUnit.SECONDS, Observable.error(new TimeoutException("ResourceStoreClient.getResource()"))) + ; + } + + public Observable putResource(Context ctx, String domain, RenderedResource data) { + return post(ctx, "/"+domain, data) + .timeout(5, TimeUnit.SECONDS, Observable.error(new TimeoutException("ResourceStoreClient.putResource()"))); + + } + + public Observable cacheResource(Context ctx, String domain, String resource, Supplier generator, LocalDateTime expiry) { + return getResource(ctx, domain, resource) + .onErrorReturn(e -> { + var renderedResource = new RenderedResource(resource, expiry, generator.get()); + putResource(ctx, "wiki", renderedResource).subscribeOn(Schedulers.io()).blockingSubscribe(); + return renderedResource.data; + }); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceStoreMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceStoreMain.java new file mode 100644 index 00000000..ddcb8c4d --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceStoreMain.java @@ -0,0 +1,32 @@ +package nu.marginalia.wmsa.resource_store; + +import com.google.inject.Guice; +import com.google.inject.Inject; +import com.google.inject.Injector; +import nu.marginalia.wmsa.configuration.MainClass; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.module.ConfigurationModule; +import nu.marginalia.wmsa.configuration.server.Initialization; + +import java.io.IOException; + +public class ResourceStoreMain extends MainClass { + private ResourceStoreService service; + + @Inject + public ResourceStoreMain(ResourceStoreService service) throws IOException { + this.service = service; + + } + + public static void main(String... args) { + init(ServiceDescriptor.RESOURCE_STORE, args); + + Injector injector = Guice.createInjector( + new ResourceStoreModule(), + new ConfigurationModule() + ); + injector.getInstance(ResourceStoreMain.class); + injector.getInstance(Initialization.class).setReady(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceStoreModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceStoreModule.java new file mode 100644 index 00000000..2de9e931 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceStoreModule.java @@ -0,0 +1,15 @@ +package nu.marginalia.wmsa.resource_store; + +import com.google.inject.AbstractModule; +import com.google.inject.name.Names; + +import java.nio.file.Path; + +public class ResourceStoreModule extends AbstractModule { + public void configure() { + bind(String.class).annotatedWith(Names.named("external-url")).toInstance("https://reddit.marginalia.nu/"); + bind(Path.class).annotatedWith(Names.named("data-path")).toInstance(Path.of("/var/lib/wmsa/archive.fast/resources")); + } + + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceStoreService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceStoreService.java new file mode 100644 index 00000000..2870c4d6 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceStoreService.java @@ -0,0 +1,191 @@ +package nu.marginalia.wmsa.resource_store; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.inject.Inject; +import com.google.inject.name.Named; +import io.reactivex.rxjava3.schedulers.Schedulers; +import kotlin.text.Charsets; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.auth.client.AuthClient; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.configuration.server.MetricsServer; +import nu.marginalia.wmsa.configuration.server.Service; +import nu.marginalia.wmsa.resource_store.model.RenderedResource; +import org.apache.http.HttpStatus; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Request; +import spark.Response; +import spark.Spark; +import spark.resource.ClassPathResource; +import spark.staticfiles.MimeType; + +import java.io.FileNotFoundException; +import java.net.URLEncoder; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.util.concurrent.TimeUnit; + +public class ResourceStoreService extends Service { + private Gson gson = new GsonBuilder().create(); + private Logger logger = LoggerFactory.getLogger(getClass()); + private final long startTime = LocalDateTime.now().toEpochSecond(ZoneOffset.UTC); + + private AuthClient authClient; + private final ResourceEntityStore resourceStore; + + @Inject + public ResourceStoreService(@Named("service-host") String ip, + @Named("service-port") Integer port, + AuthClient authClient, + ResourceEntityStore resourceStore, + Initialization initialization, + MetricsServer metricsServer + ) { + super(ip, port, initialization, metricsServer); + this.authClient = authClient; + this.resourceStore = resourceStore; + + Schedulers.io().schedulePeriodicallyDirect(resourceStore::reapStaleResources, + 5, 5, TimeUnit.MINUTES); + + Spark.get("/public/*", this::getDefaultResource); + + Spark.get("/:domain/*", this::getResource); + Spark.post("/:domain", this::storeResource); + + } + + private Object getDefaultResource(Request request, Response response) { + String headerDomain = request.headers("X-Domain"); + + if (headerDomain == null) { + Spark.halt(404); + } + + var splat = request.splat(); + var resource = splat.length == 0 ? "index.html" : splat[0]; + + return getResource(request, response, headerDomain, resource); + + } + + private Object storeResource(Request request, Response response) { + var domain = request.params("domain"); + var data = gson.fromJson(request.body(), RenderedResource.class); + + logger.info("storeResource({}/{}, {})", domain, data.filename, data.etag()); + + resourceStore.putResource(domain, data.filename, data); + + Spark.halt(HttpStatus.SC_ACCEPTED); + return null; + } + + private Object getResource(Request request, Response response) { + String headerDomain = request.headers("X-Domain"); + var domain = request.params("domain"); + + if (headerDomain != null && !domain.equals(headerDomain)) { + logger.warn("{} - domain mismatch: Header = {}, request = {}", Context.fromRequest(request), headerDomain, domain); + Spark.halt(403); + } + + var splat = request.splat(); + var resource = splat.length == 0 ? "index.html" : splat[0]; + + return getResource(request, response, domain, resource); + } + + private String getResource(Request request, Response response, String domain, String resource) { + + var data = resourceStore.getResource(domain, resource); + + if (data != null) { + logger.info("getResource({}/{}, {})", domain, resource, data.etag()); + validatePermission(Context.fromRequest(request), request, response, domain, data); + + return serveDynamic(data, request, response); + } + else if (serveStatic(domain + "/" + resource, request, response)) { + logger.info("getResource({}/{}, static)", domain, resource); + } + else { + logger.info("Could not serve {}/{}", domain, resource); + Spark.halt(404, "Not Found"); + } + return ""; + } + + + private void validatePermission(Context ctx, Request req, Response rsp, String domain, RenderedResource resource) { + if ("memex".equals(domain)) { + if (resource.requireLogin && !memexIsLoggedIn(ctx)) { + rsp.redirect("https://www.marginalia.nu/auth/login?service=MEMEX&redirect="+ URLEncoder.encode(req.headers("X-Extern-Url"), Charsets.UTF_8)); + Spark.halt(); + } + } + } + + private boolean memexIsLoggedIn(Context ctx) { + return authClient.isLoggedIn(ctx).timeout(1, TimeUnit.SECONDS).blockingFirst(); + } + private String serveDynamic(RenderedResource data, Request request, Response response) { + handleEtag(data, request, response); + + return data.data; + } + + @SneakyThrows + private boolean serveStatic(String path, Request req, Response rsp) { + try { + ClassPathResource resource = new ClassPathResource("static/" + path); + handleEtagStatic(resource, req, rsp); + resource.getInputStream().transferTo(rsp.raw().getOutputStream()); + } + catch (IllegalArgumentException|FileNotFoundException ex) { + return false; + } + + return true; + } + + @SneakyThrows + private void handleEtag(RenderedResource page, Request req, Response rsp) { + rsp.header("Cache-Control", "private, must-revalidate"); + + if (!page.filename.endsWith(".txt")) { + rsp.type("text/html"); + } + else { + rsp.type(MimeType.fromResource(new ClassPathResource(page.filename))); + } + final String etag = page.etag(); + + if (etag.equals(req.headers("If-None-Match"))) { + Spark.halt(304); + } + + rsp.header("ETag", etag); + } + + @SneakyThrows + private void handleEtagStatic(ClassPathResource resource, Request req, Response rsp) { + rsp.header("Cache-Control", "public,max-age=3600"); + rsp.type(MimeType.fromResource(resource)); + + final String etag = staticResourceEtag(resource.getFilename()); + + if (etag.equals(req.headers("If-None-Match"))) { + Spark.halt(304); + } + + rsp.header("ETag", etag); + } + + private String staticResourceEtag(String resource) { + return "\"" + resource.hashCode() + "-" + startTime + "\""; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/model/RenderedResource.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/model/RenderedResource.java new file mode 100644 index 00000000..f445a6c7 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/model/RenderedResource.java @@ -0,0 +1,46 @@ +package nu.marginalia.wmsa.resource_store.model; + +import lombok.Getter; + +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; + +@Getter +public class RenderedResource { + public final String filename; + public final String data; + public final String genTime = LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); + public final long genTimeMillis = System.currentTimeMillis(); + public final String expiry; + public final boolean requireLogin; + + public RenderedResource(String filename, LocalDateTime expiryDate, String data) { + this.filename = filename; + this.data = data; + this.expiry = expiryDate.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); + this.requireLogin = false; + } + public RenderedResource(String filename, LocalDateTime expiryDate, String data, boolean requireLogin) { + this.filename = filename; + this.data = data; + this.expiry = expiryDate.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); + this.requireLogin = requireLogin; + } + public boolean isExpired() { + var expiryDate = LocalDateTime.parse(expiry, DateTimeFormatter.ISO_LOCAL_DATE_TIME); + return expiryDate.isBefore(LocalDateTime.now()); + + } + + public String etag() { + return "\"" + genTime.hashCode() + "-" + data.hashCode() + "\""; + } + + public String diskFileName() { + return filename.hashCode() + "-" + data.hashCode() + ".html"; + } + + public long size() { + return 2L*(data.length()+filename.length()); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/SmhiScraperService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/SmhiScraperService.java new file mode 100644 index 00000000..2b074efb --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/SmhiScraperService.java @@ -0,0 +1,79 @@ +package nu.marginalia.wmsa.smhi; + +import com.google.inject.Inject; +import com.google.inject.name.Named; +import io.reactivex.rxjava3.schedulers.Schedulers; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.configuration.server.MetricsServer; +import nu.marginalia.wmsa.configuration.server.Service; +import nu.marginalia.wmsa.renderer.client.RendererClient; +import nu.marginalia.wmsa.renderer.request.smhi.RenderSmhiIndexReq; +import nu.marginalia.wmsa.renderer.request.smhi.RenderSmhiPrognosReq; +import nu.marginalia.wmsa.smhi.model.Plats; +import nu.marginalia.wmsa.smhi.model.PrognosData; +import nu.marginalia.wmsa.smhi.scraper.crawler.SmhiCrawler; +import nu.marginalia.wmsa.smhi.scraper.crawler.entity.SmhiEntityStore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Spark; + +import java.util.Comparator; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +public class SmhiScraperService extends Service { + + private final SmhiCrawler crawler; + private final SmhiEntityStore entityStore; + private final RendererClient rendererClient; + private final Logger logger = LoggerFactory.getLogger(getClass()); + private final Initialization initialization; + @Inject + public SmhiScraperService(@Named("service-host") String ip, + @Named("service-port") Integer port, + SmhiCrawler crawler, + SmhiEntityStore entityStore, + RendererClient rendererClient, + Initialization initialization, + MetricsServer metricsServer) { + super(ip, port, initialization, metricsServer); + this.crawler = crawler; + this.entityStore = entityStore; + this.rendererClient = rendererClient; + this.initialization = initialization; + + Spark.awaitInitialization(); + + Schedulers.newThread().scheduleDirect(this::start); + } + + private void start() { + initialization.waitReady(); + rendererClient.waitReady(); + + entityStore.platser.debounce(6, TimeUnit.SECONDS) + .subscribe(this::updateIndex); + entityStore.prognosdata.subscribe(this::updatePrognos); + + crawler.start(); + } + + private void updatePrognos(PrognosData prognosData) { + rendererClient + .render(Context.internal(), new RenderSmhiPrognosReq(prognosData)) + .timeout(30, TimeUnit.SECONDS) + .blockingSubscribe(); + } + + private void updateIndex(Plats unused) { + var platser = entityStore.platser().stream() + .sorted(Comparator.comparing(plats -> plats.namn)) + .collect(Collectors.toList()); + + rendererClient + .render(Context.internal(), new RenderSmhiIndexReq(platser)) + .timeout(30, TimeUnit.SECONDS) + .blockingSubscribe(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/Parameter.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/Parameter.java new file mode 100644 index 00000000..012e9c24 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/Parameter.java @@ -0,0 +1,9 @@ +package nu.marginalia.wmsa.smhi.model; + +public class Parameter { + public String name; + public String levelType; + public String level; + public String unit; + public String[] values; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/Plats.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/Plats.java new file mode 100644 index 00000000..7ae39675 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/Plats.java @@ -0,0 +1,42 @@ +package nu.marginalia.wmsa.smhi.model; + +import lombok.Getter; +import org.apache.commons.lang3.builder.EqualsBuilder; +import org.apache.commons.lang3.builder.HashCodeBuilder; + +@Getter +public class Plats { + public final String namn; + public final double latitud; + public final double longitud; + + public String getUrl() { + return namn.toLowerCase()+".html"; + } + + public Plats(String namn, String latitud, String longitud) { + this.namn = namn; + this.longitud = Double.parseDouble(longitud); + this.latitud = Double.parseDouble(latitud); + } + + public String toString() { + return String.format("Plats[%s %s %s]", namn, longitud, latitud); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + + if (o == null || getClass() != o.getClass()) return false; + + Plats plats = (Plats) o; + + return new EqualsBuilder().append(latitud, plats.latitud).append(longitud, plats.longitud).append(namn, plats.namn).isEquals(); + } + + @Override + public int hashCode() { + return new HashCodeBuilder(17, 37).append(namn).append(latitud).append(longitud).toHashCode(); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/Platser.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/Platser.java new file mode 100644 index 00000000..c0f7c15f --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/Platser.java @@ -0,0 +1,16 @@ +package nu.marginalia.wmsa.smhi.model; + + +import java.util.List; + +public class Platser { + private final List platser; + + public Platser(List platser) { + this.platser = platser; + } + + public List getPlatser() { + return platser; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/PrognosData.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/PrognosData.java new file mode 100644 index 00000000..7b7d0516 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/PrognosData.java @@ -0,0 +1,41 @@ +package nu.marginalia.wmsa.smhi.model; + +import nu.marginalia.wmsa.smhi.model.dyn.Dygnsdata; + +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +public class PrognosData { + + public String crawlTime = LocalDateTime.now().toString(); + + public String approvedTime; + public String referenceTime; + public String expires; + + public Plats plats; + + public List timeSeries = new ArrayList<>(); + + public String getBastFore() { + return LocalDateTime.parse(crawlTime).atZone(ZoneId.of("Europe/Stockholm")) + .plusHours(3) + .format(DateTimeFormatter.ISO_TIME); + } + public Plats getPlats() { + return plats; + } + + public List getTidpunkter() { + return timeSeries; + } + public List getDygn() { + return timeSeries.stream().map(Tidpunkt::getDate).distinct() + .map(datum -> new Dygnsdata(datum, this)) + .collect(Collectors.toList()); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/Tidpunkt.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/Tidpunkt.java new file mode 100644 index 00000000..d83ee7c4 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/Tidpunkt.java @@ -0,0 +1,75 @@ +package nu.marginalia.wmsa.smhi.model; + +import java.time.ZoneId; +import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.temporal.ChronoField; +import java.util.ArrayList; +import java.util.List; + +public class Tidpunkt { + + private static final ZoneId serverZoneId = ZoneId.of("GMT"); + private static final ZoneId localZoneId = ZoneId.of("Europe/Stockholm"); + private static DateTimeFormatter timeFormatter = (new DateTimeFormatterBuilder()) + .appendValue(ChronoField.HOUR_OF_DAY, 2) + .appendLiteral(':') + .appendValue(ChronoField.MINUTE_OF_HOUR, 2) + .toFormatter(); + + public String validTime; + + public List parameters = new ArrayList<>(); + + + private String getParam(String name) { + var data = parameters.stream().filter(p -> name.equals(p.name)).map(p->p.values).findFirst().orElseGet(() -> new String[0]); + if (data.length > 0) { + return data[0]; + } + return null; + } + public String getDate() { + return ZonedDateTime.parse(validTime).toLocalDateTime().atZone(serverZoneId).toOffsetDateTime().atZoneSameInstant(localZoneId).format(DateTimeFormatter.ISO_LOCAL_DATE); + } + + public String getTime() { + return ZonedDateTime.parse(validTime).toLocalDateTime().atZone(serverZoneId).toOffsetDateTime().atZoneSameInstant(localZoneId).format(timeFormatter); + } + + public String getTemp() { + return getParam("t"); + } + public String getMoln() { + return getParam("tcc_mean"); + } + public String getVind() { + return getParam("ws"); + } + public String getByvind() { + return getParam("gust"); + } + public String getNederbord() { + return getParam("pmedian"); + } + public String getNederbordTyp() { + switch(getParam("pcat")) { + case "1": return "S"; + case "2": return "SB"; + case "3": return "R"; + case "4": return "D"; + case "5": return "UKR"; + case "6": return "UKD"; + default: + return ""; + + } + } + public String getVindRiktning() { + return getParam("wd"); + } + public String toString() { + return String.format("Tidpunkt[%s %s]", validTime, getTemp()); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/dyn/Dygnsdata.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/dyn/Dygnsdata.java new file mode 100644 index 00000000..05f2246a --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/dyn/Dygnsdata.java @@ -0,0 +1,40 @@ +package nu.marginalia.wmsa.smhi.model.dyn; + +import nu.marginalia.wmsa.smhi.model.PrognosData; +import nu.marginalia.wmsa.smhi.model.Tidpunkt; + +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.util.List; +import java.util.stream.Collectors; + +public class Dygnsdata { + public final String date; + private final PrognosData data; + + public Dygnsdata(String date, PrognosData data) { + this.date = date; + this.data = data; + } + + public String getDate() { + return date; + } + public List getData() { + String d = getDate(); + return data.timeSeries.stream().filter(p -> d.equals(p.getDate())).collect(Collectors.toList()); + } + + public String getVeckodag() { + switch (LocalDate.parse(date, DateTimeFormatter.ISO_LOCAL_DATE).getDayOfWeek()) { + case MONDAY: return "Måndag"; + case TUESDAY: return "Tisdag"; + case WEDNESDAY: return "Onsdag"; + case THURSDAY: return "Torsdag"; + case FRIDAY: return "Fredag"; + case SATURDAY: return "Lördag"; + case SUNDAY: return "Söndag"; + } + return "Annandag"; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/index/IndexPlats.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/index/IndexPlats.java new file mode 100644 index 00000000..5e3f3a19 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/index/IndexPlats.java @@ -0,0 +1,13 @@ +package nu.marginalia.wmsa.smhi.model.index; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import nu.marginalia.wmsa.smhi.model.Plats; + +import java.util.List; + +@Getter @AllArgsConstructor +public class IndexPlats { + String nyckel; + List platser; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/index/IndexPlatser.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/index/IndexPlatser.java new file mode 100644 index 00000000..b67e7817 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/model/index/IndexPlatser.java @@ -0,0 +1,28 @@ +package nu.marginalia.wmsa.smhi.model.index; + +import lombok.Getter; +import nu.marginalia.wmsa.smhi.model.Plats; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +@Getter +public class IndexPlatser { + List platserPerNyckel = new ArrayList<>(); + + public IndexPlatser(List platser) { + var platsMap = kategoriseraEfterNyckel(platser); + + platsMap.keySet().stream().sorted() + .forEach(p -> platserPerNyckel.add(new IndexPlats(p, platsMap.get(p)))); + } + + private Map> kategoriseraEfterNyckel(List platser) { + return platser.stream().collect( + Collectors.groupingBy(p -> + p.namn.substring(0, 1) + .toUpperCase())); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/scraper/PlatsReader.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/scraper/PlatsReader.java new file mode 100644 index 00000000..3ea0d8cc --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/scraper/PlatsReader.java @@ -0,0 +1,44 @@ +package nu.marginalia.wmsa.smhi.scraper; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import com.google.inject.name.Named; +import com.opencsv.CSVReader; +import nu.marginalia.wmsa.smhi.model.Plats; + +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +@Singleton +public class PlatsReader { + private final String fileName; + + @Inject + public PlatsReader(@Named("plats-csv-file") String fileName) { + this.fileName = fileName; + } + + public List readPlatser() throws Exception { + List platser = new ArrayList<>(); + + var resource = Objects.requireNonNull(ClassLoader.getSystemResourceAsStream(fileName), + "Kunde inte ladda " + fileName); + try (var reader = new CSVReader(new InputStreamReader(resource, StandardCharsets.UTF_8))) { + for (;;) { + String[] strings = reader.readNext(); + if (strings == null) { + return platser; + } + platser.add(skapaPlats(strings)); + } + } + + } + + private Plats skapaPlats(String[] strings) { + return new Plats(strings[0], strings[1], strings[2]); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/scraper/SmhiScraperMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/scraper/SmhiScraperMain.java new file mode 100644 index 00000000..8898247e --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/scraper/SmhiScraperMain.java @@ -0,0 +1,32 @@ +package nu.marginalia.wmsa.smhi.scraper; + +import com.google.inject.Guice; +import com.google.inject.Inject; +import com.google.inject.Injector; +import nu.marginalia.wmsa.configuration.MainClass; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.module.ConfigurationModule; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.smhi.SmhiScraperService; + +import java.io.IOException; + +public class SmhiScraperMain extends MainClass { + private final SmhiScraperService service; + + @Inject + public SmhiScraperMain(SmhiScraperService service) throws IOException { + this.service = service; + } + + public static void main(String... args) { + init(ServiceDescriptor.SMHI_SCRAPER, args); + + Injector injector = Guice.createInjector( + new SmhiScraperModule(), + new ConfigurationModule()); + injector.getInstance(SmhiScraperMain.class); + injector.getInstance(Initialization.class).setReady(); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/scraper/SmhiScraperModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/scraper/SmhiScraperModule.java new file mode 100644 index 00000000..ffb1793a --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/scraper/SmhiScraperModule.java @@ -0,0 +1,12 @@ +package nu.marginalia.wmsa.smhi.scraper; + +import com.google.inject.AbstractModule; +import com.google.inject.name.Names; + +public class SmhiScraperModule extends AbstractModule { + public void configure() { + bind(String.class).annotatedWith(Names.named("plats-csv-file")).toInstance("data/smhi/stader.csv"); + bind(String.class).annotatedWith(Names.named("smhi-user-agent")).toInstance("kontakt@marginalia.nu"); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/scraper/crawler/SmhiBackendApi.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/scraper/crawler/SmhiBackendApi.java new file mode 100644 index 00000000..9880e317 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/scraper/crawler/SmhiBackendApi.java @@ -0,0 +1,88 @@ +package nu.marginalia.wmsa.smhi.scraper.crawler; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import com.google.inject.name.Named; +import nu.marginalia.wmsa.smhi.model.Plats; +import org.apache.http.Header; +import org.apache.http.HttpHost; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.conn.routing.HttpRoute; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.Arrays; +import java.util.Locale; + +@Singleton +public class SmhiBackendApi { + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private final String server = "https://opendata-download-metfcst.smhi.se/api"; + private final PoolingHttpClientConnectionManager connectionManager; + private final String userAgent; + + @Inject + public SmhiBackendApi(@Named("smhi-user-agent") String userAgent) { + this.userAgent = userAgent; + + connectionManager = new PoolingHttpClientConnectionManager(); + connectionManager.setMaxTotal(200); + connectionManager.setDefaultMaxPerRoute(20); + HttpHost host = new HttpHost("https://opendata-download-metfcst.smhi.se"); + connectionManager.setMaxPerRoute(new HttpRoute(host), 50); + } + + public SmhiApiRespons hamtaData(Plats plats) throws Exception { + var client = HttpClients.custom() + .setConnectionManager(connectionManager) + .build(); + + String url = String.format(Locale.US, "%s/category/pmp3g/version/2/geotype/point/lon/%f/lat/%f/data.json", + server, plats.longitud, plats.latitud); + + Thread.sleep(100); + + logger.info("Fetching {} - {}", plats, url); + + HttpGet get = new HttpGet(url); + get.addHeader("User-Agent", userAgent); + + try (var rsp = client.execute(get)) { + var entity = rsp.getEntity(); + String content = new String(entity.getContent().readAllBytes()); + int statusCode = rsp.getStatusLine().getStatusCode(); + + var expires = + Arrays.stream(rsp.getHeaders("Expires")) + .map(Header::getValue) + .map(DateTimeFormatter.RFC_1123_DATE_TIME::parse) + .map(LocalDateTime::from) + .findFirst().map(Object::toString).orElse(""); + + + if (statusCode == 200) { + return new SmhiApiRespons(content, expires, plats); + } + throw new IllegalStateException("Fel i backend " + statusCode + " " + content); + } + + } + +} + +class SmhiApiRespons { + public final String jsonContent; + public final String expiryDate; + public final Plats plats; + + SmhiApiRespons(String jsonContent, String expiryDate, Plats plats) { + this.jsonContent = jsonContent; + this.expiryDate = expiryDate; + this.plats = plats; + } +} \ No newline at end of file diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/scraper/crawler/SmhiCrawler.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/scraper/crawler/SmhiCrawler.java new file mode 100644 index 00000000..b9b97fb5 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/scraper/crawler/SmhiCrawler.java @@ -0,0 +1,106 @@ +package nu.marginalia.wmsa.smhi.scraper.crawler; + +import com.google.gson.*; +import com.google.inject.Inject; +import io.reactivex.rxjava3.core.Maybe; +import io.reactivex.rxjava3.core.Observable; +import io.reactivex.rxjava3.disposables.Disposable; +import io.reactivex.rxjava3.schedulers.Schedulers; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.smhi.model.Plats; +import nu.marginalia.wmsa.smhi.model.PrognosData; +import nu.marginalia.wmsa.smhi.scraper.PlatsReader; +import nu.marginalia.wmsa.smhi.scraper.crawler.entity.SmhiEntityStore; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.reflect.Type; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.TimeUnit; + +public class SmhiCrawler { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private final Gson gson; + private SmhiBackendApi api; + private SmhiEntityStore store; + private final List platser; + private Disposable job; + + @Inject @SneakyThrows + public SmhiCrawler(SmhiBackendApi backendApi, SmhiEntityStore store, PlatsReader platsReader) { + this.api = backendApi; + this.store = store; + this.platser = platsReader.readPlatser(); + + class LocalDateAdapter implements JsonDeserializer { + @Override + public LocalDateTime deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) throws JsonParseException { + return LocalDateTime + .parse(json.getAsString(), DateTimeFormatter.ISO_ZONED_DATE_TIME); + } + } + + gson = new GsonBuilder() + .registerTypeAdapter(LocalDateTime.class, new LocalDateAdapter()) + .create(); + } + + public void start() { + job = Observable + .fromIterable(new ArrayList<>(platser)) + .subscribeOn(Schedulers.io()) + .filter(this::isNeedsUpdate) + .take(5) + .flatMapMaybe(this::hamtaData) + .repeatWhen(this::repeatDelay) + .doOnError(this::handleError) + .subscribe(store::offer); + } + public void stop() { + Optional.ofNullable(job).ifPresent(Disposable::dispose); + } + + private Observable repeatDelay(Observable completed) { + return completed.delay(1, TimeUnit.SECONDS); + } + + protected void handleError(Throwable throwable) { + logger.error("Caught error", throwable); + } + + public Maybe hamtaData(Plats plats) { + try { + var data = api.hamtaData(plats); + + PrognosData model = gson.fromJson(data.jsonContent, PrognosData.class); + + model.expires = data.expiryDate; + model.plats = plats; + + return Maybe.just(model); + } + catch (Exception ex) { + logger.error("Failed to fetch data", ex); + return Maybe.empty(); + } + } + + + boolean isNeedsUpdate(Plats plats) { + var prognos = store.prognos(plats); + + if (null == prognos) { + return true; + } + + LocalDateTime crawlTime = LocalDateTime.parse(prognos.crawlTime); + return crawlTime.plusHours(1).isBefore(LocalDateTime.now()); + } + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/scraper/crawler/entity/SmhiEntityStore.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/scraper/crawler/entity/SmhiEntityStore.java new file mode 100644 index 00000000..d2f608aa --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/smhi/scraper/crawler/entity/SmhiEntityStore.java @@ -0,0 +1,62 @@ +package nu.marginalia.wmsa.smhi.scraper.crawler.entity; + +import com.google.inject.Singleton; +import io.reactivex.rxjava3.subjects.PublishSubject; +import nu.marginalia.wmsa.smhi.model.Plats; +import nu.marginalia.wmsa.smhi.model.PrognosData; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +@Singleton +public class SmhiEntityStore { + private final ReadWriteLock rwl = new ReentrantReadWriteLock(); + private final Map data = new HashMap<>(); + + public final PublishSubject platser = PublishSubject.create(); + public final PublishSubject prognosdata = PublishSubject.create(); + Logger logger = LoggerFactory.getLogger(getClass()); + public boolean offer(PrognosData modell) { + Lock lock = this.rwl.writeLock(); + try { + lock.lock(); + if (data.put(modell.plats, modell) == null) { + platser.onNext(modell.plats); + } + prognosdata.onNext(modell); + } + finally { + lock.unlock(); + } + return true; + } + + public List platser() { + Lock lock = this.rwl.readLock(); + try { + lock.lock(); + return new ArrayList<>(data.keySet()); + } + finally { + lock.unlock(); + } + } + + public PrognosData prognos(Plats plats) { + Lock lock = this.rwl.readLock(); + try { + lock.lock(); + return data.get(plats); + } + finally { + lock.unlock(); + } + } +} diff --git a/marginalia_nu/src/main/nlp-models/README.md b/marginalia_nu/src/main/nlp-models/README.md new file mode 100644 index 00000000..3f46af3a --- /dev/null +++ b/marginalia_nu/src/main/nlp-models/README.md @@ -0,0 +1,3 @@ +# OpenNLP models + +[https://opennlp.apache.org/](https://opennlp.apache.org/) \ No newline at end of file diff --git a/marginalia_nu/src/main/nlp-models/en-token.bin b/marginalia_nu/src/main/nlp-models/en-token.bin new file mode 100644 index 00000000..c417277c Binary files /dev/null and b/marginalia_nu/src/main/nlp-models/en-token.bin differ diff --git a/marginalia_nu/src/main/nlp-models/se-token.bin b/marginalia_nu/src/main/nlp-models/se-token.bin new file mode 100644 index 00000000..d66c8709 Binary files /dev/null and b/marginalia_nu/src/main/nlp-models/se-token.bin differ diff --git a/marginalia_nu/src/main/resources/data/smhi/stader.csv b/marginalia_nu/src/main/resources/data/smhi/stader.csv new file mode 100644 index 00000000..0dc649c4 --- /dev/null +++ b/marginalia_nu/src/main/resources/data/smhi/stader.csv @@ -0,0 +1,134 @@ +"Åkersberga",59.47944,18.29967 +"Alby",59.2335,17.8538 +"Alingsås",57.93033,12.53345 +"Ängelholm",56.2428,12.86219 +"Arboga",59.39387,15.83882 +"Årsta",59.2978,18.0514 +"Arvika",59.65528,12.58518 +"Avesta",60.14274,16.16295 +"Bålsta",59.5671,17.52781 +"Boden",65.82518,21.68864 +"Bollnäs",61.34817,16.39464 +"Boo",59.33333,18.28333 +"Borås",57.72101,12.9401 +"Borlänge",60.4858,15.43714 +"Bromma",59.34,17.94 +"Enköping",59.63607,17.07768 +"Eskilstuna",59.36661,16.5077 +"Eslöv",55.83928,13.30393 +"Fagersta",60.00418,15.79316 +"Falkenberg",56.90552,12.49118 +"Falköping",58.17347,13.55068 +"Falun",60.60357,15.62597 +"Finspång",58.70578,15.76739 +"Gävle",60.67452,17.14174 +"Gislaved",57.3044,13.54078 +"Göteborg",57.70716,11.96679 +"Hallstahammar",59.61395,16.22846 +"Halmstad",56.67446,12.85676 +"Handen",59.16809,18.13796 +"Haninge",59.16775,18.14478 +"Härnösand",62.63228,17.93794 +"Hässleholm",56.15905,13.76638 +"Helsingborg",56.04673,12.69437 +"Höganäs",56.19971,12.55795 +"Höllviken",55.40982,12.9558 +"Huddinge",59.23705,17.98192 +"Hudiksvall",61.72897,17.10358 +"Huskvarna",57.78596,14.30214 +"Jakobsberg",59.42268,17.83508 +"Jönköping",57.78145,14.15618 +"Kalmar",56.66157,16.36163 +"Karlshamn",56.1706,14.86188 +"Karlskoga",59.32667,14.52386 +"Karlskrona",56.16156,15.58661 +"Karlstad",59.3793,13.50357 +"Katrineholm",58.99587,16.20721 +"Kävlinge",55.79188,13.11021 +"Kinna",57.50728,12.69463 +"Kiruna",67.85572,20.22513 +"Kista",59.40316,17.94479 +"Köping",59.51404,15.99255 +"Kristianstad",56.03129,14.15242 +"Kristinehamn",59.30978,14.10808 +"Kumla",59.1277,15.14341 +"Kungälv",57.87096,11.98054 +"Kungsbacka",57.48719,12.07612 +"Landskrona",55.8708,12.83016 +"Lerum",57.77051,12.26904 +"Lidingö",59.36667,18.13333 +"Lidköping",58.50517,13.15765 +"Lindome",57.56667,12.08333 +"Linköping",58.41086,15.62157 +"Ljungby",56.83324,13.94082 +"Ludvika",60.14959,15.18776 +"Luleå",65.58415,22.15465 +"Lund",55.70584,13.19321 +"Majorna",57.69195,11.91605 +"Malmö",55.60587,13.00073 +"Mariestad",58.70971,13.82367 +"Märsta",59.62157,17.85476 +"Mjölby",58.32595,15.12365 +"Mölndal",57.6554,12.01378 +"Mölnlycke",57.65893,12.11792 +"Mora",61.00704,14.54316 +"Motala",58.53706,15.03649 +"Nacka",59.31053,18.16372 +"Nässjö",57.65307,14.69676 +"Norrköping",58.59419,16.1826 +"Norrtälje",59.75799,18.70496 +"Nybro",56.74461,15.90714 +"Nyköping",58.753,17.00788 +"Nynäshamn",58.90337,17.94793 +"Onsala",57.42531,12.02903 +"Örebro",59.27412,15.2066 +"Örnsköldsvik",63.29091,18.71525 +"Oskarshamn",57.26455,16.44837 +"Östermalm",59.33879,18.08487 +"Östersund",63.1792,14.63566 +"Oxelösund",58.67057,17.10152 +"Partille",57.7395,12.10642 +"Piteå",65.31717,21.47944 +"Råsunda",59.36667,17.98333 +"Ronneby",56.20999,15.27602 +"Sala",59.91993,16.60655 +"Salem",59.20186,17.76646 +"Sandviken",60.61667,16.76667 +"Segeltorp",59.27597,17.93072 +"Skara",58.38659,13.43836 +"Skellefteå",64.75067,20.95279 +"Skoghall",59.32324,13.46552 +"Skövde",58.39118,13.84506 +"Söderhamn",61.30373,17.05921 +"Södertälje",59.19554,17.62525 +"Sollentuna",59.42804,17.95093 +"Solna",59.36004,18.00086 +"Staffanstorp",55.64277,13.20638 +"Stenungsund",58.07046,11.8181 +"Stockholm",59.33258,18.0649 +"Strängnäs",59.37741,17.03119 +"Sundbyberg",59.36128,17.97114 +"Sundsvall",62.39129,17.3063 +"Täby",59.4439,18.06872 +"Timrå",62.48703,17.3257 +"Torslanda",57.72432,11.77013 +"Tranås",58.03717,14.9782 +"Trelleborg",55.37514,13.15691 +"Trollhättan",58.28365,12.28864 +"Tullinge",59.2,17.9 +"Tumba",59.19858,17.83317 +"Uddevalla",58.34784,11.9424 +"Umeå",63.82842,20.25972 +"Upplands Väsby",59.51839,17.91128 +"Uppsala",59.85882,17.63889 +"Vallentuna",59.53436,18.07758 +"Vänersborg",58.38075,12.3234 +"Varberg",57.10557,12.25078 +"Värnamo",57.18604,14.04001 +"Västerås",59.61617,16.55276 +"Västerhaninge",59.11667,18.1 +"Västervik",57.7584,16.63733 +"Växjö",56.87767,14.80906 +"Vetlanda",57.42887,15.07762 +"Visby",57.64089,18.29602 +"Ystad",55.42966,13.82041 diff --git a/marginalia_nu/src/main/resources/dictionary/en-1000 b/marginalia_nu/src/main/resources/dictionary/en-1000 new file mode 100644 index 00000000..f5f8eda9 --- /dev/null +++ b/marginalia_nu/src/main/resources/dictionary/en-1000 @@ -0,0 +1,1003 @@ +the +of +and +in +to +was +is +for +on +as +with +by +he +that +at +from +his +it +an +were +which +are +this +also +be +had +or +has +first +their +after +its +one +new +but +who +her +not +she +they +have +two +been +other +when +during +all +into +there +time +may +more +school +years +over +only +would +later +most +where +between +some +up +world +city +national +about +such +him +then +made +out +state +three +while +used +university +can +united +under +known +season +many +year +part +became +born +film +these +than +team +no +second +including +states +being +through +before +both +american +south +early +war +history +against +however +family +until +well +since +them +work +life +following +area +people +series +north +name +career +album +music +played +group +district +number +several +high +released +county +de +company +called +will +league +won +four +house +government +each +march +same +game +international +september +january +club +found +june +october +began +located +july +so +west +use +august +now +college +john +station +population +april +public +home +end +november +member +place +general +town +former +december +church +if +age +held +named +system +because +york +took +day +river +around +football +british +line +east +local +any +song +due +along +service +party +best +february +served +did +back +another +based +could +within +received +century +village +built +like +members +building +major +final +show +games +although +include +species +death +band +small +main +left +president +said +published +died +large +last +five +couldn't +what +me +order +st +single +set +third +own +those +education +according +included +long +very +park +still +road +army +division +book +development +among +law +often +french +moved +times +what +community +central +led +english +original +old +son +children +million +different +near +just +top +late +again +water +air +great +center +form +much +research +side +us +art +court +play +down +country +off +even +council +german +street +record +power +established +ii +london +land +cup +having +title +started +support +political +students +award +military +period +came +went +production +white +way +given +island +make +next +role +television +king +region +works +total +championship +using +various +head +office +six +do +player +become +father +list +business +western +produced +director +married +program +association +england +field +worked +election +black +department +joined +announced +created +point +returned +professional +union +written +few +you +young +without +take +described +site +royal +services +radio +together +social +force +northern +per +founded +act +though +society +wrote +further +women +days +lost +continued +design +william +every +version +project +summer +live +men +man +european +we +southern +position +board +india +france +round +railway +open +level +considered +control +opened +run +australia +recorded +important +san +once +video +california +special +win +popular +appeared +match +release +common +battle +areas +hall +event +working +records +james +formed +right +playing +see +average +others +short +similar +teams +elected +george +currently +making +example +awards +construction +story +living +red +originally +debut +race +language +forces +lead +la +signed +developed +modern +appointed +case +addition +police +wife +result +minister +schools +events +america +route +little +lake +canada +himself +songs +current +upon +how +points +rock +present +never +free +science +information +health +training +class +throughout +track +good +media +museum +across +australian +human +census +indian +style +personal +love +germany +available +province +tour +away +eventually +body +despite +eastern +sold +committee +performance +players +features +festival +coach +should +return +taken +sea +seven +centre +followed +designed +performed +official +david +less +gave +months +finished +daughter +process +refer +study +europe +institute +stage +term +range +chief +fire +does +rights +completed +arts +half +remained +largest +mother +character +includes +civil +private +light +leading +reported +network +help +usually +seen +groups +studies +featured +federal +full +episode +thus +academy +night +competition +women's +space +get +instead +china +must +robert +japanese +go +washington +front +uk +directed +tournament +my +thomas +news +books +brother +involved +campaign +independent +either +model +countries +awarded +able +japan +sports +charles +gold +section +capital +kingdom +close +middle +added +fourth +sent +movement +eight +studio +previous +provided +conference +above +soon +today +grand +magazine +canadian +replaced +aircraft +change +films +ten +medical +organization +bank +historic +coast +killed +management +degree +rather +industry +russian +professor +chinese +action +car +senior +systems +green +bridge +technology +almost +shows +big +lower +week +success +writing +base +data +families +post +least +market +primary +female +reached +beginning +valley +ground +type +stated +tv +operations +attack +hospital +saw +approximately +paul +culture +republic +size +previously +decided +introduced +hill +buildings +championships +provide +native +successful +outside +parts +via +theatre +placed +behind +bay +sometimes +los +prior +whose +natural +active +future +scored +italian +africa +spanish +attended +put +listed +brought +regional +structure +units +michael +possible +henry +municipality +higher +start +collection +regular +star +results +square +interest +leader +economic +especially +contract +too +trade +texas +goal +below +winning +officer +foreign +generally +operation +runs +medal +changed +taking +novel +staff +significant +real +standard +far +limited +traditional +african +come +initially +itself +location +commission +roman +me +artist +christian +plays +money +parliament +food +hit +governor +low +defeated +energy +student +strong +towards +notable +child +assembly +owned +catholic +course +commercial +ship +foundation +channel +allowed +represented +property +places +navy +unit +ended +annual +command +paris +km +library +companies +whom +met +ever +activities +spent +plan +numerous +blue +earlier +means +highway +dr +required +musical +additional +practice +bill +noted +mountain +airport +ireland +plant +security +income +issues +associated +manager +artists +related +access +brown +running +peter +individual +richard +older +victory +opening +programs +past +report +sound +press +woman +finally +find +background +policy +our +youth +financial +date +executive +launched +soviet +administration +historical +closed +here +mark +captain +basketball +lived +ran +better +edition +famous +contains +chicago +subsequently +move +selected +already +legal +rural +religious +studied +entered +cultural +lines +person +test +appearance +complete +increased +rest +men's +zealand +secretary +complex +seat +changes +matches +majority +room +loss +terms +review +empire +mission +virginia +angeles +olympics +italy +highest +stadium +becoming +goals +starting +wide +characters +writer +particularly +fact +mostly +mexico +thought +hours +stone +retired +recording +going +give +feature +cross +smith +author +operated +sir +recent +status +chart +theory +greek +islands +caused +entire +got +remains +nine +engine +source +genus +forced +issue +singles +evidence +meeting +congress +port +variety +pennsylvania +forest +passed +lord +uses +particular +key +supported +word +create +relationship +overall +hand +democratic +certain +castle +biography +nature +mary +names +fort +parish +decision +serving +score +cover +wales +singer +need +material +shown +florida +upper +referred +larger +marriage +length +leaving +weeks +movie +raised +rate +justice +fall +always +minutes +junior +competed +stations +turn +irish +temple +cases +era +individuals +township +claimed +friends +van \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/dictionary/en-stopwords b/marginalia_nu/src/main/resources/dictionary/en-stopwords new file mode 100644 index 00000000..cdcd342d --- /dev/null +++ b/marginalia_nu/src/main/resources/dictionary/en-stopwords @@ -0,0 +1,181 @@ +i +a +e.g +i.e +the +of +and +in +to +was +is +for +on +as +with +by +he +that +at +from +his +it +an +were +we've +we're +which +are +this +also +be +had +or +has +first +their +after +its +new +but +who +her +not +she +she's +they +have +been +other +when +during +all +into +there +time +may +more +school +years +over +only +would +later +most +where +between +some +up +city +about +such +him +then +made +out +state +three +while +used +can +under +known +many +year +part +became +these +than +team +no +second +including +being +through +before +both +however +how +until +well +since +them +de +each +same +found +so +use +now +end +if +age +day +any +due +did +own +led +off +do +you +you're +young +without +take +described +site +royal +services +radio +together +social +force +northern +per +we +my +want +your +seem +else's +don't +me +couldn't +what +me +doesn't +can't +isn't +i've +it's +it +i'm +1 +2 +3 +4 +5 +6 +7 +8 +9 +. +.. +... +.... +..... +...... +....... +........ +......... +.......... +will +us +much +our +what +what's +often +few +lot \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/dictionary/en-words b/marginalia_nu/src/main/resources/dictionary/en-words new file mode 100644 index 00000000..01807612 --- /dev/null +++ b/marginalia_nu/src/main/resources/dictionary/en-words @@ -0,0 +1,102401 @@ +A +A's +AMD +AMD's +AOL +AOL's +AWS +AWS's +Aachen +Aachen's +Aaliyah +Aaliyah's +Aaron +Aaron's +Abbas +Abbas's +Abbasid +Abbasid's +Abbott +Abbott's +Abby +Abby's +Abdul +Abdul's +Abe +Abe's +Abel +Abel's +Abelard +Abelard's +Abelson +Abelson's +Aberdeen +Aberdeen's +Abernathy +Abernathy's +Abidjan +Abidjan's +Abigail +Abigail's +Abilene +Abilene's +Abner +Abner's +Abraham +Abraham's +Abram +Abram's +Abrams +Abrams's +Absalom +Absalom's +Abuja +Abuja's +Abyssinia +Abyssinia's +Abyssinian +Abyssinian's +Ac +Ac's +Acadia +Acadia's +Acapulco +Acapulco's +Accenture +Accenture's +Accra +Accra's +Acevedo +Acevedo's +Achaean +Achaean's +Achebe +Achebe's +Achernar +Achernar's +Acheson +Acheson's +Achilles +Achilles's +Aconcagua +Aconcagua's +Acosta +Acosta's +Acropolis +Acrux +Acrux's +Actaeon +Actaeon's +Acton +Acton's +Acts +Acts's +Acuff +Acuff's +Ada +Ada's +Adam +Adam's +Adams +Adams's +Adan +Adan's +Adana +Adana's +Adar +Adar's +Addams +Addams's +Adderley +Adderley's +Addie +Addie's +Addison +Addison's +Adela +Adela's +Adelaide +Adelaide's +Adele +Adele's +Adeline +Adeline's +Aden +Aden's +Adenauer +Adenauer's +Adhara +Adhara's +Adidas +Adidas's +Adirondack +Adirondack's +Adirondacks +Adirondacks's +Adkins +Adkins's +Adler +Adler's +Adolf +Adolf's +Adolfo +Adolfo's +Adolph +Adolph's +Adonis +Adonis's +Adonises +Adrian +Adrian's +Adriana +Adriana's +Adriatic +Adriatic's +Adrienne +Adrienne's +Advent +Advent's +Adventist +Adventist's +Advents +Advil +Advil's +Aegean +Aegean's +Aelfric +Aelfric's +Aeneas +Aeneas's +Aeneid +Aeneid's +Aeolus +Aeolus's +Aeroflot +Aeroflot's +Aeschylus +Aeschylus's +Aesculapius +Aesculapius's +Aesop +Aesop's +Afghan +Afghan's +Afghani +Afghani's +Afghanistan +Afghanistan's +Afghans +Africa +Africa's +African +African's +Africans +Afrikaans +Afrikaans's +Afrikaner +Afrikaner's +Afrikaners +Afro +Afro's +Afrocentrism +Afrocentrism's +Afros +Ag +Ag's +Agamemnon +Agamemnon's +Agassi +Agassi's +Agassiz +Agassiz's +Agatha +Agatha's +Aggie +Aggie's +Aglaia +Aglaia's +Agnes +Agnes's +Agnew +Agnew's +Agni +Agni's +Agra +Agra's +Agricola +Agricola's +Agrippa +Agrippa's +Agrippina +Agrippina's +Aguilar +Aguilar's +Aguinaldo +Aguinaldo's +Aguirre +Aguirre's +Agustin +Agustin's +Ahab +Ahab's +Ahmad +Ahmad's +Ahmadabad +Ahmadabad's +Ahmadinejad +Ahmadinejad's +Ahmed +Ahmed's +Ahriman +Ahriman's +Aida +Aida's +Aiken +Aiken's +Aileen +Aileen's +Aimee +Aimee's +Ainu +Ainu's +Airedale +Airedale's +Airedales +Aires +Aires's +Aisha +Aisha's +Ajax +Ajax's +Akbar +Akbar's +Akhmatova +Akhmatova's +Akihito +Akihito's +Akita +Akita's +Akiva +Akiva's +Akkad +Akkad's +Akron +Akron's +Al +Al's +Alabama +Alabama's +Alabaman +Alabaman's +Alabamans +Alabamian +Alabamian's +Alabamians +Aladdin +Aladdin's +Alamo +Alamo's +Alamogordo +Alamogordo's +Alan +Alan's +Alana +Alana's +Alar +Alar's +Alaric +Alaric's +Alaska +Alaska's +Alaskan +Alaskan's +Alaskans +Alba +Alba's +Albania +Albania's +Albanian +Albanian's +Albanians +Albany +Albany's +Albee +Albee's +Alberio +Alberio's +Albert +Albert's +Alberta +Alberta's +Alberto +Alberto's +Albigensian +Albigensian's +Albion +Albion's +Albireo +Albireo's +Albuquerque +Albuquerque's +Alcatraz +Alcatraz's +Alcestis +Alcestis's +Alcibiades +Alcibiades's +Alcindor +Alcindor's +Alcmena +Alcmena's +Alcoa +Alcoa's +Alcott +Alcott's +Alcuin +Alcuin's +Alcyone +Alcyone's +Aldan +Aldan's +Aldebaran +Aldebaran's +Alden +Alden's +Alderamin +Alderamin's +Aldo +Aldo's +Aldrin +Aldrin's +Alec +Alec's +Aleichem +Aleichem's +Alejandra +Alejandra's +Alejandro +Alejandro's +Alembert +Alembert's +Aleppo +Aleppo's +Aleut +Aleut's +Aleutian +Aleutian's +Alex +Alex's +Alexander +Alexander's +Alexandra +Alexandra's +Alexandria +Alexandria's +Alexei +Alexei's +Alexis +Alexis's +Alfonso +Alfonso's +Alfonzo +Alfonzo's +Alford +Alford's +Alfred +Alfred's +Alfreda +Alfreda's +Alfredo +Alfredo's +Algenib +Algenib's +Alger +Alger's +Algeria +Algeria's +Algerian +Algerian's +Algerians +Algieba +Algieba's +Algiers +Algiers's +Algol +Algol's +Algonquian +Algonquian's +Algonquians +Algonquin +Algonquin's +Alhambra +Alhambra's +Alhena +Alhena's +Ali +Ali's +Alice +Alice's +Alicia +Alicia's +Alighieri +Alighieri's +Aline +Aline's +Alioth +Alioth's +Alisa +Alisa's +Alisha +Alisha's +Alison +Alison's +Alissa +Alissa's +Alistair +Alistair's +Alkaid +Alkaid's +Allah +Allah's +Allahabad +Allahabad's +Allan +Allan's +Alleghenies +Alleghenies's +Allegheny +Allegheny's +Allegra +Allegra's +Allen +Allen's +Allende +Allende's +Allentown +Allentown's +Allie +Allie's +Allison +Allison's +Allstate +Allstate's +Allyson +Allyson's +Alma +Alma's +Almach +Almach's +Almaty +Almaty's +Almighty +Almighty's +Almohad +Almohad's +Almoravid +Almoravid's +Alnilam +Alnilam's +Alnitak +Alnitak's +Alonzo +Alonzo's +Alpert +Alpert's +Alphard +Alphard's +Alphecca +Alphecca's +Alpheratz +Alpheratz's +Alphonse +Alphonse's +Alphonso +Alphonso's +Alpine +Alpine's +Alpo +Alpo's +Alps +Alps's +Alsace +Alsace's +Alsatian +Alsatian's +Alsop +Alsop's +Alston +Alston's +Altaba +Altaba's +Altai +Altai's +Altaic +Altaic's +Altair +Altair's +Altamira +Altamira's +Althea +Althea's +Altiplano +Altiplano's +Altman +Altman's +Altoids +Altoids's +Alton +Alton's +Aludra +Aludra's +Alva +Alva's +Alvarado +Alvarado's +Alvarez +Alvarez's +Alvaro +Alvaro's +Alvin +Alvin's +Alyce +Alyce's +Alyson +Alyson's +Alyssa +Alyssa's +Alzheimer +Alzheimer's +Am +Am's +Amadeus +Amadeus's +Amado +Amado's +Amalia +Amalia's +Amanda +Amanda's +Amarillo +Amarillo's +Amaru +Amaru's +Amaterasu +Amaterasu's +Amati +Amati's +Amazon +Amazon's +Amazons +Amber +Amber's +Amelia +Amelia's +Amenhotep +Amenhotep's +Amerasian +Amerasian's +America +America's +American +American's +Americana +Americana's +Americanism +Americanism's +Americanisms +Americanization +Americanization's +Americanizations +Americanize +Americanized +Americanizes +Americanizing +Americans +Americas +Amerind +Amerind's +Amerindian +Amerindian's +Amerindians +Amerinds +Ameslan +Ameslan's +Amgen +Amgen's +Amharic +Amharic's +Amherst +Amherst's +Amie +Amie's +Amiga +Amiga's +Amish +Amish's +Amman +Amman's +Amoco +Amoco's +Amos +Amos's +Amparo +Amparo's +Ampere +Ampere's +Amritsar +Amritsar's +Amsterdam +Amsterdam's +Amtrak +Amtrak's +Amundsen +Amundsen's +Amur +Amur's +Amway +Amway's +Amy +Amy's +Ana +Ana's +Anabaptist +Anabaptist's +Anabel +Anabel's +Anacin +Anacin's +Anacreon +Anacreon's +Anaheim +Anaheim's +Analects +Analects's +Ananias +Ananias's +Anasazi +Anasazi's +Anastasia +Anastasia's +Anatole +Anatole's +Anatolia +Anatolia's +Anatolian +Anatolian's +Anaxagoras +Anaxagoras's +Anchorage +Anchorage's +Andalusia +Andalusia's +Andalusian +Andalusian's +Andaman +Andaman's +Andean +Andean's +Andersen +Andersen's +Anderson +Anderson's +Andes +Andes's +Andorra +Andorra's +Andre +Andre's +Andrea +Andrea's +Andrei +Andrei's +Andres +Andres's +Andretti +Andretti's +Andrew +Andrew's +Andrews +Andrews's +Andrianampoinimerina +Andrianampoinimerina's +Android +Android's +Andromache +Andromache's +Andromeda +Andromeda's +Andropov +Andropov's +Andy +Andy's +Angara +Angara's +Angel +Angel's +Angela +Angela's +Angeles +Angeles's +Angelia +Angelia's +Angelica +Angelica's +Angelico +Angelico's +Angelina +Angelina's +Angeline +Angeline's +Angelique +Angelique's +Angelita +Angelita's +Angelo +Angelo's +Angelou +Angelou's +Angevin +Angevin's +Angie +Angie's +Angkor +Angkor's +Anglia +Anglia's +Anglican +Anglican's +Anglicanism +Anglicanism's +Anglicanisms +Anglicans +Anglicize +Anglo +Anglo's +Anglophile +Anglophile's +Angola +Angola's +Angolan +Angolan's +Angolans +Angora +Angora's +Angoras +Anguilla +Anguilla's +Angus +Angus's +Aniakchak +Aniakchak's +Anibal +Anibal's +Anita +Anita's +Ankara +Ankara's +Ann +Ann's +Anna +Anna's +Annabel +Annabel's +Annabelle +Annabelle's +Annam +Annam's +Annapolis +Annapolis's +Annapurna +Annapurna's +Anne +Anne's +Annette +Annette's +Annie +Annie's +Annmarie +Annmarie's +Anouilh +Anouilh's +Anselm +Anselm's +Anselmo +Anselmo's +Anshan +Anshan's +Antaeus +Antaeus's +Antananarivo +Antananarivo's +Antarctic +Antarctic's +Antarctica +Antarctica's +Antares +Antares's +Anthony +Anthony's +Anthropocene +Antichrist +Antichrist's +Antichrists +Antietam +Antietam's +Antigone +Antigone's +Antigua +Antigua's +Antilles +Antilles's +Antioch +Antioch's +Antipas +Antipas's +Antofagasta +Antofagasta's +Antoine +Antoine's +Antoinette +Antoinette's +Anton +Anton's +Antone +Antone's +Antonia +Antonia's +Antoninus +Antoninus's +Antonio +Antonio's +Antonius +Antonius's +Antony +Antony's +Antwan +Antwan's +Antwerp +Antwerp's +Anubis +Anubis's +Anzac +Anzac's +Apache +Apache's +Apaches +Apalachicola +Apalachicola's +Apatosaurus +Apennines +Apennines's +Aphrodite +Aphrodite's +Apia +Apia's +Apocrypha +Apocrypha's +Apollinaire +Apollinaire's +Apollo +Apollo's +Apollonian +Apollonian's +Apollos +Appalachia +Appalachia's +Appalachian +Appalachian's +Appalachians +Appalachians's +Appaloosa +Appaloosa's +Apple +Apple's +Appleseed +Appleseed's +Appleton +Appleton's +Appomattox +Appomattox's +Apr +Apr's +April +April's +Aprils +Apuleius +Apuleius's +Aquafresh +Aquafresh's +Aquarius +Aquarius's +Aquariuses +Aquila +Aquila's +Aquinas +Aquinas's +Aquino +Aquino's +Aquitaine +Aquitaine's +Ara +Ara's +Arab +Arab's +Arabia +Arabia's +Arabian +Arabian's +Arabians +Arabic +Arabic's +Arabs +Araby +Araby's +Araceli +Araceli's +Arafat +Arafat's +Araguaya +Araguaya's +Aral +Aral's +Aramaic +Aramaic's +Aramco +Aramco's +Arapaho +Arapaho's +Ararat +Ararat's +Araucanian +Araucanian's +Arawak +Arawak's +Arawakan +Arawakan's +Arbitron +Arbitron's +Arcadia +Arcadia's +Arcadian +Arcadian's +Archean +Archean's +Archibald +Archibald's +Archie +Archie's +Archimedes +Archimedes's +Arctic +Arctic's +Arcturus +Arcturus's +Arden +Arden's +Arduino +Arduino's +Arequipa +Arequipa's +Ares +Ares's +Argentina +Argentina's +Argentine +Argentine's +Argentinian +Argentinian's +Argentinians +Argo +Argo's +Argonaut +Argonaut's +Argonne +Argonne's +Argos +Argos's +Argus +Argus's +Ariadne +Ariadne's +Arianism +Arianism's +Ariel +Ariel's +Aries +Aries's +Arieses +Ariosto +Ariosto's +Aristarchus +Aristarchus's +Aristides +Aristides's +Aristophanes +Aristophanes's +Aristotelian +Aristotelian's +Aristotle +Aristotle's +Arius +Arius's +Arizona +Arizona's +Arizonan +Arizonan's +Arizonans +Arizonian +Arizonian's +Arizonians +Arjuna +Arjuna's +Arkansan +Arkansan's +Arkansas +Arkansas's +Arkhangelsk +Arkhangelsk's +Arkwright +Arkwright's +Arlene +Arlene's +Arline +Arline's +Arlington +Arlington's +Armageddon +Armageddon's +Armageddons +Armagnac +Armagnac's +Armand +Armand's +Armando +Armando's +Armani +Armani's +Armenia +Armenia's +Armenian +Armenian's +Armenians +Arminius +Arminius's +Armonk +Armonk's +Armour +Armour's +Armstrong +Armstrong's +Arneb +Arneb's +Arnhem +Arnhem's +Arno +Arno's +Arnold +Arnold's +Arnulfo +Arnulfo's +Aron +Aron's +Arrhenius +Arrhenius's +Arron +Arron's +Art +Art's +Artaxerxes +Artaxerxes's +Artemis +Artemis's +Arthur +Arthur's +Arthurian +Arthurian's +Artie +Artie's +Arturo +Arturo's +Aruba +Aruba's +Aryan +Aryan's +Aryans +As +As's +Asama +Asama's +Ascella +Ascella's +Asgard +Asgard's +Ashanti +Ashanti's +Ashcroft +Ashcroft's +Ashe +Ashe's +Ashikaga +Ashikaga's +Ashkenazim +Ashkenazim's +Ashkhabad +Ashkhabad's +Ashlee +Ashlee's +Ashley +Ashley's +Ashmolean +Ashmolean's +Ashurbanipal +Ashurbanipal's +Asia +Asia's +Asiago +Asian +Asian's +Asians +Asiatic +Asiatic's +Asiatics +Asimov +Asimov's +Asmara +Asmara's +Asoka +Asoka's +Aspell +Aspell's +Aspen +Aspen's +Asperger +Asperger's +Aspidiske +Aspidiske's +Asquith +Asquith's +Assad +Assad's +Assam +Assam's +Assamese +Assamese's +Assisi +Assisi's +Assyria +Assyria's +Assyrian +Assyrian's +Assyrians +Astaire +Astaire's +Astana +Astana's +Astarte +Astarte's +Aston +Aston's +Astor +Astor's +Astoria +Astoria's +Astrakhan +Astrakhan's +AstroTurf +AstroTurf's +Asturias +Asturias's +Asunción +Asunción's +Aswan +Aswan's +Atacama +Atacama's +Atahualpa +Atahualpa's +Atalanta +Atalanta's +Atari +Atari's +Atatürk +Atatürk's +Athabasca +Athabasca's +Athabascan +Athabascan's +Athena +Athena's +Athenian +Athenian's +Athenians +Athens +Athens's +Atkins +Atkins's +Atkinson +Atkinson's +Atlanta +Atlanta's +Atlantes +Atlantic +Atlantic's +Atlantis +Atlantis's +Atlas +Atlas's +Atlases +Atman +Atman's +Atreus +Atreus's +Atria +Atria's +Atropos +Atropos's +Ats +Attic +Attic's +Attica +Attica's +Attila +Attila's +Attlee +Attlee's +Attucks +Attucks's +Atwood +Atwood's +Au +Au's +Aubrey +Aubrey's +Auckland +Auckland's +Auden +Auden's +Audi +Audi's +Audion +Audion's +Audra +Audra's +Audrey +Audrey's +Audubon +Audubon's +Aug +Aug's +Augean +Augean's +Augsburg +Augsburg's +August +August's +Augusta +Augusta's +Augustan +Augustan's +Augustine +Augustine's +Augusts +Augustus +Augustus's +Aurangzeb +Aurangzeb's +Aurelia +Aurelia's +Aurelio +Aurelio's +Aurelius +Aurelius's +Aureomycin +Aureomycin's +Auriga +Auriga's +Aurora +Aurora's +Auschwitz +Auschwitz's +Aussie +Aussie's +Aussies +Austen +Austen's +Austerlitz +Austerlitz's +Austin +Austin's +Austins +Australasia +Australasia's +Australia +Australia's +Australian +Australian's +Australians +Australoid +Australoid's +Australopithecus +Australopithecus's +Austria +Austria's +Austrian +Austrian's +Austrians +Austronesian +Austronesian's +Autumn +Autumn's +Ava +Ava's +Avalon +Avalon's +Aventine +Aventine's +Avernus +Avernus's +Averroes +Averroes's +Avery +Avery's +Avesta +Avesta's +Avicenna +Avicenna's +Avignon +Avignon's +Avila +Avila's +Avior +Avior's +Avis +Avis's +Avogadro +Avogadro's +Avon +Avon's +Axum +Axum's +Ayala +Ayala's +Ayers +Ayers's +Aymara +Aymara's +Ayrshire +Ayrshire's +Ayurveda +Ayurveda's +Ayyubid +Ayyubid's +Azana +Azana's +Azania +Azania's +Azazel +Azazel's +Azerbaijan +Azerbaijan's +Azerbaijani +Azerbaijani's +Azores +Azores's +Azov +Azov's +Aztec +Aztec's +Aztecan +Aztecan's +Aztecs +Aztlan +Aztlan's +B +B's +BBB +BBB's +BMW +BMW's +BP +BP's +BSD +BSD's +Ba +Ba's +Baal +Baal's +Baath +Baath's +Baathist +Baathist's +Babar +Babar's +Babbage +Babbage's +Babbitt +Babbitt's +Babel +Babel's +Babels +Babur +Babur's +Babylon +Babylon's +Babylonian +Babylonian's +Babylons +Bacall +Bacall's +Bacardi +Bacardi's +Bacchanalia +Bacchanalia's +Bacchus +Bacchus's +Bach +Bach's +Backus +Backus's +Bacon +Bacon's +Bactria +Bactria's +Baden +Baden's +Badlands +Badlands's +Baedeker +Baedeker's +Baez +Baez's +Baffin +Baffin's +Baggies +Baggies's +Baghdad +Baghdad's +Baguio +Baguio's +Baha'i +Baha'i's +Baha'ullah +Baha'ullah's +Bahama +Bahama's +Bahamas +Bahamas's +Bahamian +Bahamian's +Bahamians +Bahia +Bahia's +Bahrain +Bahrain's +Baidu +Baidu's +Baikal +Baikal's +Bailey +Bailey's +Baird +Baird's +Bakelite +Bakelite's +Baker +Baker's +Bakersfield +Bakersfield's +Baku +Baku's +Bakunin +Bakunin's +Balanchine +Balanchine's +Balaton +Balaton's +Balboa +Balboa's +Balder +Balder's +Baldwin +Baldwin's +Balearic +Balearic's +Balfour +Balfour's +Bali +Bali's +Balinese +Balinese's +Balkan +Balkan's +Balkans +Balkans's +Balkhash +Balkhash's +Ball +Ball's +Ballard +Ballard's +Balthazar +Balthazar's +Baltic +Baltic's +Baltimore +Baltimore's +Baluchistan +Baluchistan's +Balzac +Balzac's +Bamako +Bamako's +Bambi +Bambi's +Banach +Banach's +Bancroft +Bancroft's +Bandung +Bandung's +Bangalore +Bangalore's +Bangkok +Bangkok's +Bangladesh +Bangladesh's +Bangladeshi +Bangladeshi's +Bangladeshis +Bangor +Bangor's +Bangui +Bangui's +Banjarmasin +Banjarmasin's +Banjul +Banjul's +Banks +Banks's +Banneker +Banneker's +Bannister +Bannister's +Banting +Banting's +Bantu +Bantu's +Bantus +Baotou +Baotou's +Baptist +Baptist's +Baptiste +Baptiste's +Baptists +Barabbas +Barabbas's +Barack +Barack's +Barbadian +Barbadian's +Barbadians +Barbados +Barbados's +Barbara +Barbara's +Barbarella +Barbarella's +Barbarossa +Barbarossa's +Barbary +Barbary's +Barber +Barber's +Barbie +Barbie's +Barbour +Barbour's +Barbra +Barbra's +Barbuda +Barbuda's +Barcelona +Barcelona's +Barclay +Barclay's +Bardeen +Bardeen's +Barents +Barents's +Barker +Barker's +Barkley +Barkley's +Barlow +Barlow's +Barnabas +Barnabas's +Barnaby +Barnaby's +Barnard +Barnard's +Barnaul +Barnaul's +Barnes +Barnes's +Barnett +Barnett's +Barney +Barney's +Barnum +Barnum's +Baroda +Baroda's +Barquisimeto +Barquisimeto's +Barr +Barr's +Barranquilla +Barranquilla's +Barrera +Barrera's +Barrett +Barrett's +Barrie +Barrie's +Barron +Barron's +Barry +Barry's +Barrymore +Barrymore's +Barth +Barth's +Bartholdi +Bartholdi's +Bartholomew +Bartholomew's +Bartlett +Bartlett's +Barton +Barton's +Bartók +Bartók's +Baruch +Baruch's +Baryshnikov +Baryshnikov's +Basel +Basel's +Basho +Basho's +Basie +Basie's +Basil +Basil's +Basque +Basque's +Basques +Basra +Basra's +Bass +Bass's +Basseterre +Basseterre's +Bastille +Bastille's +Bataan +Bataan's +Bates +Bates's +Bathsheba +Bathsheba's +Batista +Batista's +Batman +Batman's +Battle +Battle's +Batu +Batu's +Baudelaire +Baudelaire's +Baudouin +Baudouin's +Bauer +Bauer's +Bauhaus +Bauhaus's +Baum +Baum's +Bavaria +Bavaria's +Bavarian +Bavarian's +Baxter +Baxter's +Bayer +Bayer's +Bayes +Bayes's +Bayesian +Bayesian's +Bayeux +Bayeux's +Baylor +Baylor's +Bayonne +Bayonne's +Bayreuth +Bayreuth's +Baywatch +Baywatch's +Beach +Beach's +Beadle +Beadle's +Bean +Bean's +Beard +Beard's +Beardmore +Beardmore's +Beardsley +Beardsley's +Bearnaise +Bearnaise's +Beasley +Beasley's +Beatlemania +Beatlemania's +Beatles +Beatles's +Beatrice +Beatrice's +Beatrix +Beatrix's +Beatriz +Beatriz's +Beau +Beau's +Beaufort +Beaufort's +Beaujolais +Beaujolais's +Beaumarchais +Beaumarchais's +Beaumont +Beaumont's +Beauregard +Beauregard's +Beauvoir +Beauvoir's +Bechtel +Bechtel's +Beck +Beck's +Becker +Becker's +Becket +Becket's +Beckett +Beckett's +Becky +Becky's +Becquerel +Becquerel's +Bede +Bede's +Bedouin +Bedouin's +Bedouins +Beebe +Beebe's +Beecher +Beecher's +Beefaroni +Beefaroni's +Beelzebub +Beelzebub's +Beerbohm +Beerbohm's +Beethoven +Beethoven's +Beeton +Beeton's +Begin +Begin's +Behan +Behan's +Behring +Behring's +Beiderbecke +Beiderbecke's +Beijing +Beijing's +Beirut +Beirut's +Bekesy +Bekesy's +Bela +Bela's +Belarus +Belarus's +Belau +Belau's +Belem +Belem's +Belfast +Belfast's +Belgian +Belgian's +Belgians +Belgium +Belgium's +Belgrade +Belgrade's +Belinda +Belinda's +Belize +Belize's +Bell +Bell's +Bella +Bella's +Bellamy +Bellamy's +Bellatrix +Bellatrix's +Belleek +Belleek's +Bellini +Bellini's +Bellow +Bellow's +Belmont +Belmont's +Belmopan +Belmopan's +Belshazzar +Belshazzar's +Beltane +Beltane's +Belushi +Belushi's +Ben +Ben's +Benacerraf +Benacerraf's +Benares +Benares's +Benchley +Benchley's +Bender +Bender's +Bendix +Bendix's +Benedict +Benedict's +Benedictine +Benedictine's +Benelux +Benelux's +Benet +Benet's +Benetton +Benetton's +Bengal +Bengal's +Bengali +Bengali's +Benghazi +Benghazi's +Benin +Benin's +Benita +Benita's +Benito +Benito's +Benjamin +Benjamin's +Bennett +Bennett's +Bennie +Bennie's +Benny +Benny's +Benson +Benson's +Bentham +Bentham's +Bentley +Bentley's +Benton +Benton's +Benz +Benz's +Benzedrine +Benzedrine's +Beowulf +Beowulf's +Berber +Berber's +Berbers +Berenice +Berenice's +Beretta +Beretta's +Berg +Berg's +Bergen +Bergen's +Berger +Berger's +Bergerac +Bergerac's +Bergman +Bergman's +Bergson +Bergson's +Beria +Beria's +Bering +Bering's +Berkeley +Berkeley's +Berkshire +Berkshire's +Berkshires +Berkshires's +Berle +Berle's +Berlin +Berlin's +Berliner +Berliner's +Berlins +Berlioz +Berlioz's +Berlitz +Berlitz's +Bermuda +Bermuda's +Bermudas +Bern +Bern's +Bernadette +Bernadette's +Bernadine +Bernadine's +Bernanke +Bernanke's +Bernard +Bernard's +Bernardo +Bernardo's +Bernays +Bernays's +Bernbach +Bernbach's +Berne +Berne's +Bernhardt +Bernhardt's +Bernice +Bernice's +Bernie +Bernie's +Bernini +Bernini's +Bernoulli +Bernoulli's +Bernstein +Bernstein's +Berra +Berra's +Berry +Berry's +Bert +Bert's +Berta +Berta's +Bertelsmann +Bertelsmann's +Bertha +Bertha's +Bertie +Bertie's +Bertillon +Bertillon's +Bertram +Bertram's +Bertrand +Bertrand's +Beryl +Beryl's +Berzelius +Berzelius's +Bess +Bess's +Bessel +Bessel's +Bessemer +Bessemer's +Bessie +Bessie's +Best +Best's +Betelgeuse +Betelgeuse's +Beth +Beth's +Bethany +Bethany's +Bethe +Bethe's +Bethesda +Bethesda's +Bethlehem +Bethlehem's +Bethune +Bethune's +Betsy +Betsy's +Bette +Bette's +Bettie +Bettie's +Betty +Betty's +Bettye +Bettye's +Beulah +Beulah's +Beverley +Beverley's +Beverly +Beverly's +Beyer +Beyer's +Bhopal +Bhopal's +Bhutan +Bhutan's +Bhutto +Bhutto's +Bialystok +Bialystok's +Bianca +Bianca's +Bible +Bible's +Bibles +Biblical +Biblical's +Bic +Bic's +Biddle +Biddle's +Biden +Biden's +Bierce +Bierce's +BigQuery +BigQuery's +Bigfoot +Bigfoot's +Biggles +Biggles's +Biko +Biko's +Bilbao +Bilbao's +Bilbo +Bilbo's +Bill +Bill's +Billie +Billie's +Billings +Billings's +Billy +Billy's +Bimini +Bimini's +Biogen +Biogen's +Bioko +Bioko's +Bird +Bird's +Birdseye +Birdseye's +Birkenstock +Birkenstock's +Birmingham +Birmingham's +Biro +Biro's +Biscay +Biscay's +Biscayne +Biscayne's +Bishkek +Bishkek's +Bishop +Bishop's +Bismarck +Bismarck's +Bismark +Bismark's +Bisquick +Bisquick's +Bissau +Bissau's +BitTorrent +BitTorrent's +Bizet +Bizet's +Bjerknes +Bjerknes's +Bjork +Bjork's +Black +Black's +Blackbeard +Blackbeard's +Blackburn +Blackburn's +Blackfoot +Blackfoot's +Blacks +Blackshirt +Blackshirt's +Blackstone +Blackstone's +Blackwell +Blackwell's +Blaine +Blaine's +Blair +Blair's +Blake +Blake's +Blanca +Blanca's +Blanchard +Blanchard's +Blanche +Blanche's +Blankenship +Blankenship's +Blantyre +Blantyre's +Blatz +Blatz's +Blavatsky +Blavatsky's +Blenheim +Blenheim's +Blevins +Blevins's +Bligh +Bligh's +Bloch +Bloch's +Blockbuster +Blockbuster's +Bloemfontein +Bloemfontein's +Blondel +Blondel's +Blondie +Blondie's +Bloom +Bloom's +Bloomer +Bloomer's +Bloomfield +Bloomfield's +Bloomingdale +Bloomingdale's +Bloomsbury +Bloomsbury's +Blu +Blucher +Blucher's +Bluebeard +Bluebeard's +Bluetooth +Bluetooth's +Blythe +Blythe's +Boas +Boas's +Bob +Bob's +Bobbi +Bobbi's +Bobbie +Bobbie's +Bobbitt +Bobbitt's +Bobby +Bobby's +Boccaccio +Boccaccio's +Bodhidharma +Bodhidharma's +Bodhisattva +Bodhisattva's +Boeing +Boeing's +Boeotia +Boeotia's +Boeotian +Boeotian's +Boer +Boer's +Boers +Boethius +Boethius's +Bogart +Bogart's +Bogotá +Bogotá's +Bohemia +Bohemia's +Bohemian +Bohemian's +Bohemians +Bohr +Bohr's +Boise +Boise's +Bojangles +Bojangles's +Boleyn +Boleyn's +Bolivar +Bolivar's +Bolivia +Bolivia's +Bolivian +Bolivian's +Bolivians +Bollywood +Bollywood's +Bologna +Bologna's +Bolshevik +Bolshevik's +Bolsheviks +Bolshevism +Bolshevism's +Bolshevist +Bolshevist's +Bolshoi +Bolshoi's +Bolton +Bolton's +Boltzmann +Boltzmann's +Bombay +Bombay's +Bonaparte +Bonaparte's +Bonaventure +Bonaventure's +Bond +Bond's +Bonhoeffer +Bonhoeffer's +Boniface +Boniface's +Bonita +Bonita's +Bonn +Bonn's +Bonner +Bonner's +Bonneville +Bonneville's +Bonnie +Bonnie's +Bono +Bono's +Booker +Booker's +Boole +Boole's +Boolean +Boolean's +Boone +Boone's +Booth +Booth's +Bordeaux +Bordeaux's +Borden +Borden's +Bordon +Bordon's +Boreas +Boreas's +Borg +Borg's +Borges +Borges's +Borgia +Borgia's +Borglum +Borglum's +Boris +Boris's +Bork +Bork's +Borlaug +Borlaug's +Born +Born's +Borneo +Borneo's +Borobudur +Borobudur's +Borodin +Borodin's +Boru +Boru's +Bosch +Bosch's +Bose +Bose's +Bosnia +Bosnia's +Bosporus +Bosporus's +Boston +Boston's +Bostonian +Bostonian's +Bostons +Boswell +Boswell's +Botox +Botswana +Botswana's +Botticelli +Botticelli's +Boulder +Boulder's +Boulez +Boulez's +Bourbaki +Bourbaki's +Bourbon +Bourbon's +Bournemouth +Bournemouth's +Bovary +Bovary's +Bowditch +Bowditch's +Bowell +Bowell's +Bowen +Bowen's +Bowers +Bowers's +Bowery +Bowery's +Bowie +Bowie's +Bowman +Bowman's +Boyd +Boyd's +Boyer +Boyer's +Boyle +Boyle's +Boötes +Boötes's +Brad +Brad's +Bradbury +Bradbury's +Braddock +Braddock's +Bradford +Bradford's +Bradley +Bradley's +Bradly +Bradly's +Bradshaw +Bradshaw's +Bradstreet +Bradstreet's +Brady +Brady's +Bragg +Bragg's +Brahe +Brahe's +Brahma +Brahma's +Brahmagupta +Brahmagupta's +Brahman +Brahman's +Brahmanism +Brahmanism's +Brahmanisms +Brahmans +Brahmaputra +Brahmaputra's +Brahmas +Brahmin +Brahmin's +Brahmins +Brahms +Brahms's +Braille +Braille's +Brailles +Brain +Brain's +Brampton +Brampton's +Bran +Bran's +Branch +Branch's +Brandeis +Brandeis's +Branden +Branden's +Brandenburg +Brandenburg's +Brandi +Brandi's +Brandie +Brandie's +Brando +Brando's +Brandon +Brandon's +Brandt +Brandt's +Brandy +Brandy's +Brant +Brant's +Braque +Braque's +Brasilia +Brasilia's +Bratislava +Bratislava's +Brattain +Brattain's +Bray +Bray's +Brazil +Brazil's +Brazilian +Brazilian's +Brazilians +Brazos +Brazos's +Brazzaville +Brazzaville's +Breakspear +Breakspear's +Brecht +Brecht's +Breckenridge +Breckenridge's +Bremen +Bremen's +Brenda +Brenda's +Brendan +Brendan's +Brennan +Brennan's +Brenner +Brenner's +Brent +Brent's +Brenton +Brenton's +Brest +Brest's +Bret +Bret's +Breton +Breton's +Brett +Brett's +Brewer +Brewer's +Brewster +Brewster's +Brexit +Brezhnev +Brezhnev's +Brian +Brian's +Briana +Briana's +Brianna +Brianna's +Brice +Brice's +Bridalveil +Bridalveil's +Bridgeport +Bridgeport's +Bridger +Bridger's +Bridges +Bridges's +Bridget +Bridget's +Bridgetown +Bridgetown's +Bridgett +Bridgett's +Bridgette +Bridgette's +Bridgman +Bridgman's +Brie +Brie's +Brigadoon +Brigadoon's +Briggs +Briggs's +Brigham +Brigham's +Bright +Bright's +Brighton +Brighton's +Brigid +Brigid's +Brigitte +Brigitte's +Brillo +Brillo's +Brinkley +Brinkley's +Brisbane +Brisbane's +Bristol +Bristol's +Brit +Brit's +Britain +Britain's +Britannia +Britannia's +Britannic +Britannic's +Britannica +Britannica's +British +British's +Britisher +Britney +Britney's +Briton +Briton's +Britons +Brits +Britt +Britt's +Brittany +Brittany's +Britten +Britten's +Brittney +Brittney's +Brno +Brno's +Broadway +Broadway's +Broadways +Brobdingnag +Brobdingnag's +Brobdingnagian +Brobdingnagian's +Brock +Brock's +Brokaw +Brokaw's +Bronson +Bronson's +Bronte +Bronte's +Brontosaurus +Bronx +Bronx's +Brooke +Brooke's +Brooklyn +Brooklyn's +Brooks +Brooks's +Brown +Brown's +Browne +Browne's +Brownian +Brownian's +Brownie +Brownies +Browning +Browning's +Brownshirt +Brownshirt's +Brownsville +Brownsville's +Brubeck +Brubeck's +Bruce +Bruce's +Bruckner +Bruckner's +Brueghel +Brueghel's +Brummel +Brummel's +Brunei +Brunei's +Brunelleschi +Brunelleschi's +Brunhilde +Brunhilde's +Bruno +Bruno's +Brunswick +Brunswick's +Brussels +Brussels's +Brut +Brut's +Brutus +Brutus's +Bryan +Bryan's +Bryant +Bryant's +Bryce +Bryce's +Brynner +Brynner's +Bryon +Bryon's +Brzezinski +Brzezinski's +Btu +Btu's +Buber +Buber's +Buchanan +Buchanan's +Bucharest +Bucharest's +Buchenwald +Buchenwald's +Buchwald +Buchwald's +Buck +Buck's +Buckingham +Buckingham's +Buckley +Buckley's +Buckner +Buckner's +Bud +Bud's +Budapest +Budapest's +Buddha +Buddha's +Buddhas +Buddhism +Buddhism's +Buddhisms +Buddhist +Buddhist's +Buddhists +Buddy +Buddy's +Budweiser +Budweiser's +Buffalo +Buffalo's +Buffy +Buffy's +Buford +Buford's +Bugatti +Bugatti's +Bugzilla +Bugzilla's +Buick +Buick's +Bujumbura +Bujumbura's +Bukhara +Bukhara's +Bukharin +Bukharin's +Bulawayo +Bulawayo's +Bulfinch +Bulfinch's +Bulganin +Bulganin's +Bulgar +Bulgar's +Bulgari +Bulgari's +Bulgaria +Bulgaria's +Bulgarian +Bulgarian's +Bulgarians +Bullock +Bullock's +Bullwinkle +Bullwinkle's +Bultmann +Bultmann's +Bumppo +Bumppo's +Bunche +Bunche's +Bundesbank +Bundesbank's +Bundestag +Bundestag's +Bunin +Bunin's +Bunker +Bunker's +Bunsen +Bunsen's +Bunyan +Bunyan's +Burbank +Burbank's +Burberry +Burberry's +Burch +Burch's +Burger +Burger's +Burgess +Burgess's +Burgoyne +Burgoyne's +Burgundian +Burgundian's +Burgundies +Burgundy +Burgundy's +Burke +Burke's +Burks +Burks's +Burl +Burl's +Burma +Burma's +Burmese +Burmese's +Burnett +Burnett's +Burns +Burns's +Burnside +Burnside's +Burr +Burr's +Burris +Burris's +Burroughs +Burroughs's +Bursa +Bursa's +Burt +Burt's +Burton +Burton's +Burundi +Burundi's +Busch +Busch's +Bush +Bush's +Bushido +Bushido's +Bushnell +Bushnell's +Butler +Butler's +Butterfingers +Butterfingers's +Buxtehude +Buxtehude's +Buñuel +Buñuel's +Byblos +Byblos's +Byelorussia +Byelorussia's +Byers +Byers's +Byrd +Byrd's +Byron +Byron's +Byronic +Byronic's +Byzantine +Byzantine's +Byzantines +Byzantium +Byzantium's +C +C's +CSS +CSS's +CVS +CVS's +Ca +Ca's +Cabernet +Cabernet's +Cabinet +Cabot +Cabot's +Cabral +Cabral's +Cabrera +Cabrera's +Cabrini +Cabrini's +Cadillac +Cadillac's +Cadiz +Cadiz's +Caedmon +Caedmon's +Caerphilly +Caerphilly's +Caesar +Caesar's +Caesarean +Caesars +Cage +Cage's +Cagney +Cagney's +Cahokia +Cahokia's +Caiaphas +Caiaphas's +Cain +Cain's +Cains +Cairo +Cairo's +Caitlin +Caitlin's +Cajun +Cajun's +Cajuns +Calais +Calais's +Calcutta +Calcutta's +Calder +Calder's +Calderon +Calderon's +Caldwell +Caldwell's +Caleb +Caleb's +Caledonia +Caledonia's +Calgary +Calgary's +Calhoun +Calhoun's +Cali +Cali's +Caliban +Caliban's +California +California's +Californian +Californian's +Californians +Caligula +Caligula's +Callaghan +Callaghan's +Callahan +Callahan's +Callao +Callao's +Callas +Callas's +Callie +Callie's +Calliope +Calliope's +Callisto +Callisto's +Caloocan +Caloocan's +Calvary +Calvary's +Calvert +Calvert's +Calvin +Calvin's +Calvinism +Calvinism's +Calvinisms +Calvinist +Calvinist's +Calvinistic +Calvinists +Camacho +Camacho's +Cambodia +Cambodia's +Cambodian +Cambodian's +Cambodians +Cambrian +Cambrian's +Cambridge +Cambridge's +Camel +Camel's +Camelopardalis +Camelopardalis's +Camelot +Camelot's +Camembert +Camembert's +Camemberts +Cameron +Cameron's +Cameroon +Cameroon's +Cameroons +Camilla +Camilla's +Camille +Camille's +Camoens +Camoens's +Campanella +Campanella's +Campbell +Campbell's +Campinas +Campinas's +Campos +Campos's +Camry +Camry's +Camus +Camus's +Canaan +Canaan's +Canada +Canada's +Canadian +Canadian's +Canadians +Canaletto +Canaletto's +Canaries +Canaries's +Canaveral +Canaveral's +Canberra +Canberra's +Cancer +Cancer's +Cancers +Cancun +Cancun's +Candace +Candace's +Candice +Candice's +Candide +Candide's +Candy +Candy's +Cannes +Cannes's +Cannon +Cannon's +Canon +Canon's +Canopus +Canopus's +Cantabrigian +Cantabrigian's +Canterbury +Canterbury's +Canton +Canton's +Cantonese +Cantonese's +Cantor +Cantor's +Cantrell +Cantrell's +Cantu +Cantu's +Canute +Canute's +Capablanca +Capablanca's +Capek +Capek's +Capella +Capella's +Capet +Capet's +Capetian +Capetian's +Capetown +Capetown's +Caph +Caph's +Capistrano +Capistrano's +Capitol +Capitol's +Capitoline +Capitoline's +Capitols +Capone +Capone's +Capote +Capote's +Capra +Capra's +Capri +Capri's +Capricorn +Capricorn's +Capricorns +Capuchin +Capuchin's +Capulet +Capulet's +Cara +Cara's +Caracalla +Caracalla's +Caracas +Caracas's +Caravaggio +Caravaggio's +Carboloy +Carboloy's +Carboniferous +Carboniferous's +Carborundum +Carborundum's +Cardenas +Cardenas's +Cardiff +Cardiff's +Cardin +Cardin's +Cardozo +Cardozo's +Carey +Carey's +Carib +Carib's +Caribbean +Caribbean's +Caribbeans +Carina +Carina's +Carissa +Carissa's +Carl +Carl's +Carla +Carla's +Carlene +Carlene's +Carlin +Carlin's +Carlo +Carlo's +Carlos +Carlos's +Carlsbad +Carlsbad's +Carlson +Carlson's +Carlton +Carlton's +Carly +Carly's +Carlyle +Carlyle's +Carmela +Carmela's +Carmella +Carmella's +Carmelo +Carmelo's +Carmen +Carmen's +Carmichael +Carmichael's +Carmine +Carmine's +Carnap +Carnap's +Carnation +Carnation's +Carnegie +Carnegie's +Carney +Carney's +Carnot +Carnot's +Carol +Carol's +Carole +Carole's +Carolina +Carolina's +Caroline +Caroline's +Carolingian +Carolingian's +Carolinian +Carolinian's +Carolyn +Carolyn's +Carpathian +Carpathian's +Carpathians +Carpathians's +Carpenter +Carpenter's +Carr +Carr's +Carranza +Carranza's +Carrie +Carrie's +Carrier +Carrier's +Carrillo +Carrillo's +Carroll +Carroll's +Carson +Carson's +Carter +Carter's +Cartesian +Cartesian's +Carthage +Carthage's +Carthaginian +Carthaginian's +Cartier +Cartier's +Cartwright +Cartwright's +Caruso +Caruso's +Carver +Carver's +Cary +Cary's +Casablanca +Casablanca's +Casals +Casals's +Casandra +Casandra's +Casanova +Casanova's +Casanovas +Cascades +Cascades's +Case +Case's +Casey +Casey's +Cash +Cash's +Casio +Casio's +Caspar +Caspar's +Caspian +Caspian's +Cassandra +Cassandra's +Cassatt +Cassatt's +Cassie +Cassie's +Cassiopeia +Cassiopeia's +Cassius +Cassius's +Castaneda +Castaneda's +Castillo +Castillo's +Castlereagh +Castlereagh's +Castor +Castor's +Castries +Castries's +Castro +Castro's +Catalan +Catalan's +Catalina +Catalina's +Catalonia +Catalonia's +Catawba +Catawba's +Caterpillar +Caterpillar's +Cathay +Cathay's +Cather +Cather's +Catherine +Catherine's +Cathleen +Cathleen's +Catholic +Catholic's +Catholicism +Catholicism's +Catholicisms +Catholics +Cathryn +Cathryn's +Cathy +Cathy's +Catiline +Catiline's +Cato +Cato's +Catskill +Catskill's +Catskills +Catskills's +Catt +Catt's +Catullus +Catullus's +Caucasian +Caucasian's +Caucasians +Caucasoid +Caucasus +Caucasus's +Cauchy +Cauchy's +Cavendish +Cavendish's +Cavour +Cavour's +Caxton +Caxton's +Cayenne +Cayenne's +Cayman +Cayman's +Cayuga +Cayuga's +Cd +Cd's +Ceausescu +Ceausescu's +Cebu +Cebu's +Cebuano +Cebuano's +Cecelia +Cecelia's +Cecil +Cecil's +Cecile +Cecile's +Cecilia +Cecilia's +Cecily +Cecily's +Cedric +Cedric's +Celebes +Celebes's +Celeste +Celeste's +Celgene +Celgene's +Celia +Celia's +Celina +Celina's +Cellini +Cellini's +Celsius +Celsius's +Celt +Celt's +Celtic +Celtic's +Celtics +Celts +Cenozoic +Cenozoic's +Centaurus +Centaurus's +Centigrade +Cepheid +Cepheid's +Cepheus +Cepheus's +Cerberus +Cerberus's +Cerenkov +Cerenkov's +Ceres +Ceres's +Cerf +Cerf's +Cervantes +Cervantes's +Cesar +Cesar's +Cesarean +Cesarean's +Cessna +Cessna's +Cetus +Cetus's +Ceylon +Ceylon's +Cezanne +Cezanne's +Ch'in +Ch'in's +Chablis +Chablis's +Chad +Chad's +Chadwick +Chadwick's +Chagall +Chagall's +Chaitanya +Chaitanya's +Chaitin +Chaitin's +Chaldean +Chaldean's +Challenger +Challenger's +Chamberlain +Chamberlain's +Chambers +Chambers's +Champlain +Champlain's +Champollion +Champollion's +Chan +Chan's +Chance +Chance's +Chancellorsville +Chancellorsville's +Chandigarh +Chandigarh's +Chandler +Chandler's +Chandon +Chandon's +Chandra +Chandra's +Chandragupta +Chandragupta's +Chandrasekhar +Chandrasekhar's +Chanel +Chanel's +Chaney +Chaney's +Chang +Chang's +Changchun +Changchun's +Changsha +Changsha's +Chantilly +Chantilly's +Chanukah +Chanukah's +Chanukahs +Chaplin +Chaplin's +Chapman +Chapman's +Chappaquiddick +Chappaquiddick's +Chapultepec +Chapultepec's +Charbray +Charbray's +Chardonnay +Chardonnay's +Charity +Charity's +Charlemagne +Charlemagne's +Charlene +Charlene's +Charles +Charles's +Charleston +Charleston's +Charlestons +Charley +Charley's +Charlie +Charlie's +Charlotte +Charlotte's +Charlottetown +Charlottetown's +Charmaine +Charmaine's +Charmin +Charmin's +Charolais +Charolais's +Charon +Charon's +Chartism +Chartism's +Chartres +Chartres's +Charybdis +Charybdis's +Chase +Chase's +Chasity +Chasity's +Chateaubriand +Chateaubriand's +Chattahoochee +Chattahoochee's +Chattanooga +Chattanooga's +Chatterley +Chatterley's +Chatterton +Chatterton's +Chaucer +Chaucer's +Chauncey +Chauncey's +Chautauqua +Chautauqua's +Chavez +Chavez's +Chayefsky +Chayefsky's +Che +Che's +Chechen +Chechen's +Chechnya +Chechnya's +Cheddar +Cheddar's +Cheer +Cheer's +Cheerios +Cheerios's +Cheetos +Cheetos's +Cheever +Cheever's +Chekhov +Chekhov's +Chelsea +Chelsea's +Chelyabinsk +Chelyabinsk's +Chen +Chen's +Cheney +Cheney's +Chengdu +Chengdu's +Chennai +Chennai's +Cheops +Cheops's +Cheri +Cheri's +Cherie +Cherie's +Chernenko +Chernenko's +Chernobyl +Chernobyl's +Chernomyrdin +Chernomyrdin's +Cherokee +Cherokee's +Cherokees +Cherry +Cherry's +Cheryl +Cheryl's +Chesapeake +Chesapeake's +Cheshire +Cheshire's +Chester +Chester's +Chesterfield +Chesterfield's +Chesterton +Chesterton's +Chevalier +Chevalier's +Cheviot +Cheviot's +Chevrolet +Chevrolet's +Chevron +Chevron's +Chevy +Chevy's +Cheyenne +Cheyenne's +Cheyennes +Chi +Chi's +Chianti +Chianti's +Chiantis +Chiba +Chiba's +Chibcha +Chibcha's +Chicago +Chicago's +Chicagoan +Chicagoan's +Chicana +Chicana's +Chicano +Chicano's +Chickasaw +Chickasaw's +Chiclets +Chiclets's +Chihuahua +Chihuahua's +Chihuahuas +Chile +Chile's +Chilean +Chilean's +Chileans +Chimborazo +Chimborazo's +Chimera +Chimera's +Chimu +Chimu's +China +China's +Chinatown +Chinatown's +Chinese +Chinese's +Chinook +Chinook's +Chinooks +Chipewyan +Chipewyan's +Chippendale +Chippendale's +Chippewa +Chippewa's +Chiquita +Chiquita's +Chirico +Chirico's +Chisholm +Chisholm's +Chisinau +Chisinau's +Chittagong +Chittagong's +Chivas +Chivas's +Chloe +Chloe's +Choctaw +Choctaw's +Chomsky +Chomsky's +Chongqing +Chongqing's +Chopin +Chopin's +Chopra +Chopra's +Chou +Chou's +Chretien +Chretien's +Chris +Chris's +Christ +Christ's +Christa +Christa's +Christchurch +Christchurch's +Christendom +Christendom's +Christendoms +Christensen +Christensen's +Christi +Christi's +Christian +Christian's +Christianities +Christianity +Christianity's +Christians +Christie +Christie's +Christina +Christina's +Christine +Christine's +Christmas +Christmas's +Christmases +Christoper +Christoper's +Christopher +Christopher's +Christs +Christy +Christy's +Chrysler +Chrysler's +Chrysostom +Chrysostom's +Chrystal +Chrystal's +Chuck +Chuck's +Chukchi +Chukchi's +Chumash +Chumash's +Chung +Chung's +Chungking +Chungking's +Church +Church's +Churchill +Churchill's +Churriguera +Churriguera's +Chuvash +Chuvash's +Ci +Ci's +Cicero +Cicero's +Cid +Cid's +Cimabue +Cimabue's +Cincinnati +Cincinnati's +Cinderella +Cinderella's +Cinderellas +Cindy +Cindy's +CinemaScope +CinemaScope's +Cinerama +Cinerama's +Cipro +Cipro's +Circe +Circe's +Cisco +Cisco's +Citibank +Citibank's +Citigroup +Citigroup's +Citroen +Citroen's +Cl +Cl's +Claiborne +Claiborne's +Clair +Clair's +Claire +Claire's +Clairol +Clairol's +Clancy +Clancy's +Clapeyron +Clapeyron's +Clapton +Clapton's +Clara +Clara's +Clare +Clare's +Clarence +Clarence's +Clarendon +Clarendon's +Clarice +Clarice's +Clarissa +Clarissa's +Clark +Clark's +Clarke +Clarke's +Claude +Claude's +Claudette +Claudette's +Claudia +Claudia's +Claudine +Claudine's +Claudio +Claudio's +Claudius +Claudius's +Claus +Claus's +Clausewitz +Clausewitz's +Clausius +Clausius's +Clay +Clay's +Clayton +Clayton's +Clearasil +Clearasil's +Clem +Clem's +Clemenceau +Clemenceau's +Clemens +Clemens's +Clement +Clement's +Clementine +Clementine's +Clements +Clements's +Clemons +Clemons's +Clemson +Clemson's +Cleo +Cleo's +Cleopatra +Cleopatra's +Cleveland +Cleveland's +Cliburn +Cliburn's +Cliff +Cliff's +Clifford +Clifford's +Clifton +Clifton's +Cline +Cline's +Clint +Clint's +Clinton +Clinton's +Clio +Clio's +Clive +Clive's +Clojure +Clojure's +Clorets +Clorets's +Clorox +Clorox's +Closure +Closure's +Clotho +Clotho's +Clouseau +Clouseau's +Clovis +Clovis's +Clyde +Clyde's +Clydesdale +Clydesdale's +Clytemnestra +Clytemnestra's +Cobain +Cobain's +Cobb +Cobb's +Cochabamba +Cochabamba's +Cochin +Cochin's +Cochise +Cochise's +Cochran +Cochran's +Cockney +Cockney's +Cocteau +Cocteau's +Cody +Cody's +Coffey +Coffey's +Cognac +Cognac's +Cohan +Cohan's +Cohen +Cohen's +Coimbatore +Coimbatore's +Cointreau +Cointreau's +Coke +Coke's +Cokes +Colbert +Colbert's +Colby +Colby's +Cole +Cole's +Coleen +Coleen's +Coleman +Coleman's +Coleridge +Coleridge's +Colette +Colette's +Colfax +Colfax's +Colgate +Colgate's +Colin +Colin's +Colleen +Colleen's +Collier +Collier's +Collin +Collin's +Collins +Collins's +Cologne +Cologne's +Colombia +Colombia's +Colombian +Colombian's +Colombians +Colombo +Colombo's +Colon +Colon's +Colonial +Colorado +Colorado's +Colosseum +Colosseum's +Colt +Colt's +Coltrane +Coltrane's +Columbia +Columbia's +Columbine +Columbine's +Columbus +Columbus's +Comanche +Comanche's +Comanches +Combs +Combs's +Comintern +Comintern's +Commons +Commons's +Commonwealth +Communion +Communion's +Communions +Communism +Communist +Communist's +Communists +Como +Como's +Comoros +Comoros's +Compaq +Compaq's +Compton +Compton's +CompuServe +CompuServe's +Comte +Comte's +Conakry +Conakry's +Conan +Conan's +Concepción +Concepción's +Concetta +Concetta's +Concord +Concord's +Concorde +Concorde's +Concords +Condillac +Condillac's +Condorcet +Condorcet's +Conestoga +Conestoga's +Confederacy +Confederacy's +Confederate +Confederate's +Confederates +Confucian +Confucian's +Confucianism +Confucianism's +Confucianisms +Confucians +Confucius +Confucius's +Congo +Congo's +Congolese +Congolese's +Congregationalist +Congregationalist's +Congregationalists +Congress +Congress's +Congresses +Congreve +Congreve's +Conley +Conley's +Connecticut +Connecticut's +Connemara +Connemara's +Conner +Conner's +Connery +Connery's +Connie +Connie's +Connolly +Connolly's +Connors +Connors's +Conrad +Conrad's +Conrail +Conrail's +Constable +Constable's +Constance +Constance's +Constantine +Constantine's +Constantinople +Constantinople's +Constitution +Consuelo +Consuelo's +Continent +Continent's +Continental +Continental's +Contreras +Contreras's +Conway +Conway's +Cook +Cook's +Cooke +Cooke's +Cooley +Cooley's +Coolidge +Coolidge's +Cooper +Cooper's +Cooperstown +Cooperstown's +Coors +Coors's +Copacabana +Copacabana's +Copeland +Copeland's +Copenhagen +Copenhagen's +Copernican +Copernican's +Copernicus +Copernicus's +Copland +Copland's +Copley +Copley's +Copperfield +Copperfield's +Coppertone +Coppertone's +Coppola +Coppola's +Coptic +Coptic's +Cora +Cora's +Cordelia +Cordelia's +Cordilleras +Cordilleras's +Cordoba +Cordoba's +Cordova +Cordova's +Corey +Corey's +Corfu +Corfu's +Corina +Corina's +Corine +Corine's +Corinne +Corinne's +Corinth +Corinth's +Corinthian +Corinthian's +Corinthians +Corinthians's +Coriolanus +Coriolanus's +Coriolis +Coriolis's +Corleone +Corleone's +Cormack +Cormack's +Corneille +Corneille's +Cornelia +Cornelia's +Cornelius +Cornelius's +Cornell +Cornell's +Corning +Corning's +Cornish +Cornish's +Cornwall +Cornwall's +Cornwallis +Cornwallis's +Coronado +Coronado's +Corot +Corot's +Correggio +Correggio's +Corrine +Corrine's +Corsica +Corsica's +Corsican +Corsican's +Cortes +Cortes's +Corteses +Cortez +Cortez's +Cortland +Cortland's +Corvallis +Corvallis's +Corvette +Corvette's +Corvus +Corvus's +Cory +Cory's +Cosby +Cosby's +CosmosDB +CosmosDB's +Cossack +Cossack's +Costco +Costco's +Costello +Costello's +Costner +Costner's +Cote +Cote's +Cotonou +Cotonou's +Cotopaxi +Cotopaxi's +Cotswold +Cotswold's +Cotton +Cotton's +Coulomb +Coulomb's +Coulter +Coulter's +Couperin +Couperin's +Courbet +Courbet's +Courtney +Courtney's +Cousteau +Cousteau's +Coventries +Coventry +Coventry's +Coward +Coward's +Cowley +Cowley's +Cowper +Cowper's +Cox +Cox's +Coy +Coy's +Cozumel +Cozumel's +Cr +Cr's +Crabbe +Crabbe's +Craft +Craft's +Craig +Craig's +Cranach +Cranach's +Crane +Crane's +Cranmer +Cranmer's +Crater +Crater's +Crawford +Crawford's +Cray +Cray's +Crayola +Crayola's +Creation +Creation's +Creator +Creator's +Crecy +Crecy's +Cree +Cree's +Creek +Creek's +Creighton +Creighton's +Creole +Creole's +Creoles +Creon +Creon's +Cressida +Cressida's +Crest +Crest's +Cretaceous +Cretaceous's +Cretan +Cretan's +Crete +Crete's +Crichton +Crichton's +Crick +Crick's +Crimea +Crimea's +Crimean +Crimean's +Criollo +Criollo's +Crisco +Crisco's +Cristina +Cristina's +Croat +Croat's +Croatia +Croatia's +Croatian +Croatian's +Croatians +Croats +Croce +Croce's +Crockett +Crockett's +Croesus +Croesus's +Cromwell +Cromwell's +Cromwellian +Cromwellian's +Cronin +Cronin's +Cronkite +Cronkite's +Cronus +Cronus's +Crookes +Crookes's +Crosby +Crosby's +Cross +Cross's +Crowley +Crowley's +Cruikshank +Cruikshank's +Cruise +Cruise's +Crusades +Crusades's +Crusoe +Crusoe's +Crux +Crux's +Cruz +Cruz's +Cryptozoic +Cryptozoic's +Crystal +Crystal's +Cs +Csonka +Csonka's +Ctesiphon +Ctesiphon's +Cthulhu +Cthulhu's +Cu +Cu's +Cuba +Cuba's +Cuban +Cuban's +Cubans +Cuchulain +Cuchulain's +Cuisinart +Cuisinart's +Culbertson +Culbertson's +Cullen +Cullen's +Cumberland +Cumberland's +Cummings +Cummings's +Cunard +Cunard's +Cunningham +Cunningham's +Cupid +Cupid's +Curacao +Curacao's +Curie +Curie's +Curitiba +Curitiba's +Currier +Currier's +Curry +Curry's +Curt +Curt's +Curtis +Curtis's +Custer +Custer's +Cuvier +Cuvier's +Cuzco +Cuzco's +Cybele +Cybele's +Cyclades +Cyclades's +Cyclops +Cyclops's +Cygnus +Cygnus's +Cymbeline +Cymbeline's +Cynthia +Cynthia's +Cyprian +Cyprian's +Cypriot +Cypriot's +Cypriots +Cyprus +Cyprus's +Cyrano +Cyrano's +Cyril +Cyril's +Cyrillic +Cyrillic's +Cyrus +Cyrus's +Czech +Czech's +Czechia +Czechia's +Czechoslovakia +Czechoslovakia's +Czechoslovakian +Czechoslovakian's +Czechoslovakians +Czechs +Czerny +Czerny's +D +D's +Dacca +Dacca's +Dachau +Dachau's +Dacron +Dacron's +Dacrons +Dada +Dada's +Dadaism +Dadaism's +Daedalus +Daedalus's +Daguerre +Daguerre's +Dagwood +Dagwood's +Dahomey +Dahomey's +Daimler +Daimler's +Daisy +Daisy's +Dakar +Dakar's +Dakota +Dakota's +Dakotan +Dakotan's +Dakotas +Dalai +Dale +Dale's +Daley +Daley's +Dali +Dali's +Dalian +Dalian's +Dallas +Dallas's +Dalmatian +Dalmatian's +Dalmatians +Dalton +Dalton's +Damascus +Damascus's +Damian +Damian's +Damien +Damien's +Damion +Damion's +Damocles +Damocles's +Damon +Damon's +Dana +Dana's +Dane +Dane's +Danelaw +Danelaw's +Danes +Dangerfield +Dangerfield's +Danial +Danial's +Daniel +Daniel's +Danielle +Danielle's +Daniels +Daniels's +Danish +Danish's +Dannie +Dannie's +Danny +Danny's +Danone +Danone's +Dante +Dante's +Danton +Danton's +Danube +Danube's +Danubian +Danubian's +Daphne +Daphne's +Darby +Darby's +Darcy +Darcy's +Dardanelles +Dardanelles's +Dare +Dare's +Daren +Daren's +Darfur +Darfur's +Darin +Darin's +Dario +Dario's +Darius +Darius's +Darjeeling +Darjeeling's +Darla +Darla's +Darlene +Darlene's +Darling +Darling's +Darnell +Darnell's +Darrel +Darrel's +Darrell +Darrell's +Darren +Darren's +Darrin +Darrin's +Darrow +Darrow's +Darryl +Darryl's +Darth +Darth's +Dartmoor +Dartmoor's +Dartmouth +Dartmouth's +Darvon +Darvon's +Darwin +Darwin's +Darwinian +Darwinian's +Darwinism +Darwinism's +Daryl +Daryl's +Daugherty +Daugherty's +Daumier +Daumier's +Davao +Davao's +Dave +Dave's +Davenport +Davenport's +David +David's +Davids +Davidson +Davidson's +Davies +Davies's +Davis +Davis's +Davy +Davy's +Dawes +Dawes's +Dawn +Dawn's +Dawson +Dawson's +Day +Day's +Dayton +Dayton's +DeGeneres +DeGeneres's +Deadhead +Deadhead's +Dean +Dean's +Deana +Deana's +Deandre +Deandre's +Deann +Deann's +Deanna +Deanna's +Deanne +Deanne's +Debbie +Debbie's +Debby +Debby's +Debian +Debian's +Debora +Debora's +Deborah +Deborah's +Debouillet +Debouillet's +Debra +Debra's +Debs +Debs's +Debussy +Debussy's +Dec +Dec's +Decalogue +Decalogue's +Decatur +Decatur's +Decca +Decca's +Deccan +Deccan's +December +December's +Decembers +Decker +Decker's +Dedekind +Dedekind's +Dee +Dee's +Deena +Deena's +Deere +Deere's +Defoe +Defoe's +Degas +Degas's +Deidre +Deidre's +Deimos +Deimos's +Deirdre +Deirdre's +Deity +Dejesus +Dejesus's +Delacroix +Delacroix's +Delacruz +Delacruz's +Delaney +Delaney's +Delano +Delano's +Delaware +Delaware's +Delawarean +Delawarean's +Delawareans +Delawares +Delbert +Delbert's +Deleon +Deleon's +Delgado +Delgado's +Delhi +Delhi's +Delia +Delia's +Delibes +Delibes's +Delicious +Delicious's +Delilah +Delilah's +Delius +Delius's +Dell +Dell's +Della +Della's +Delmar +Delmar's +Delmarva +Delmarva's +Delmer +Delmer's +Delmonico +Delmonico's +Delores +Delores's +Deloris +Deloris's +Delphi +Delphi's +Delphic +Delphic's +Delphinus +Delphinus's +Delta +Delta's +Demavend +Demavend's +Demerol +Demerol's +Demeter +Demeter's +Demetrius +Demetrius's +Deming +Deming's +Democrat +Democrat's +Democratic +Democrats +Democritus +Democritus's +Demosthenes +Demosthenes's +Dempsey +Dempsey's +Dena +Dena's +Deneb +Deneb's +Denebola +Denebola's +Deng +Deng's +Denis +Denis's +Denise +Denise's +Denmark +Denmark's +Dennis +Dennis's +Denny +Denny's +Denver +Denver's +Deon +Deon's +Depp +Depp's +Derby +Derby's +Derek +Derek's +Derick +Derick's +Derrick +Derrick's +Derrida +Derrida's +Descartes +Descartes's +Desdemona +Desdemona's +Desiree +Desiree's +Desmond +Desmond's +Detroit +Detroit's +Deuteronomy +Deuteronomy's +Devanagari +Devanagari's +Devi +Devi's +Devin +Devin's +Devon +Devon's +Devonian +Devonian's +Dewar +Dewar's +Dewayne +Dewayne's +Dewey +Dewey's +Dewitt +Dewitt's +Dexedrine +Dexedrine's +Dexter +Dexter's +Dhaka +Dhaka's +Dhaulagiri +Dhaulagiri's +Di +Di's +DiCaprio +DiCaprio's +DiMaggio +DiMaggio's +Diaghilev +Diaghilev's +Dial +Dial's +Diana +Diana's +Diane +Diane's +Diann +Diann's +Dianna +Dianna's +Dianne +Dianne's +Diaspora +Diaspora's +Diaz +Diaz's +Dick +Dick's +Dickens +Dickens's +Dickerson +Dickerson's +Dickinson +Dickinson's +Dickson +Dickson's +Dictaphone +Dictaphone's +Diderot +Diderot's +Dido +Dido's +Didrikson +Didrikson's +Diefenbaker +Diefenbaker's +Diego +Diego's +Diem +Diem's +Diesel +Diesel's +Dietrich +Dietrich's +Dijkstra +Dijkstra's +Dijon +Dijon's +Dilbert +Dilbert's +Dillard +Dillard's +Dillinger +Dillinger's +Dillon +Dillon's +Dina +Dina's +Dinah +Dinah's +Dino +Dino's +Diocletian +Diocletian's +Diogenes +Diogenes's +Dion +Dion's +Dionne +Dionne's +Dionysian +Dionysian's +Dionysus +Dionysus's +Diophantine +Diophantine's +Dior +Dior's +Dipper +Dipper's +Dirac +Dirac's +Dirichlet +Dirichlet's +Dirk +Dirk's +Dis +Dis's +Disney +Disney's +Disneyland +Disneyland's +Disraeli +Disraeli's +Diwali +Diwali's +Dix +Dix's +Dixie +Dixie's +Dixiecrat +Dixiecrat's +Dixieland +Dixieland's +Dixielands +Dixon +Dixon's +Djakarta +Djakarta's +Django +Django's +Djibouti +Djibouti's +Dmitri +Dmitri's +Dnepropetrovsk +Dnepropetrovsk's +Dnieper +Dnieper's +Dniester +Dniester's +Dobbin +Dobbin's +Doberman +Doberman's +Dobro +Dobro's +Doctor +Doctorow +Doctorow's +Dodge +Dodge's +Dodgson +Dodgson's +Dodoma +Dodoma's +Dodson +Dodson's +Doe +Doe's +Doha +Doha's +Dolby +Dolby's +Dole +Dole's +Dollie +Dollie's +Dolly +Dolly's +Dolores +Dolores's +Domesday +Domesday's +Domingo +Domingo's +Dominguez +Dominguez's +Dominic +Dominic's +Dominica +Dominica's +Dominican +Dominican's +Dominicans +Dominick +Dominick's +Dominique +Dominique's +Domitian +Domitian's +Don +Don's +Dona +Dona's +Donahue +Donahue's +Donald +Donald's +Donaldson +Donaldson's +Donatello +Donatello's +Donetsk +Donetsk's +Donizetti +Donizetti's +Donn +Donn's +Donna +Donna's +Donne +Donne's +Donnell +Donnell's +Donner +Donner's +Donnie +Donnie's +Donny +Donny's +Donovan +Donovan's +Dooley +Dooley's +Doolittle +Doolittle's +Doonesbury +Doonesbury's +Doppler +Doppler's +Dora +Dora's +Dorcas +Dorcas's +Doreen +Doreen's +Dorian +Dorian's +Doric +Doric's +Doris +Doris's +Doritos +Doritos's +Dorothea +Dorothea's +Dorothy +Dorothy's +Dorset +Dorset's +Dorsey +Dorsey's +Dorthy +Dorthy's +Dortmund +Dortmund's +Dostoevsky +Dostoevsky's +Dot +Dot's +Dotson +Dotson's +Douala +Douala's +Douay +Douay's +Doubleday +Doubleday's +Doug +Doug's +Douglas +Douglas's +Douglass +Douglass's +Douro +Douro's +Dover +Dover's +Dow +Dow's +Downs +Downs's +Downy +Downy's +Doyle +Doyle's +Draco +Draco's +Draconian +Draconian's +Dracula +Dracula's +Drake +Drake's +Dramamine +Dramamine's +Drambuie +Drambuie's +Drano +Drano's +Dravidian +Dravidian's +Dreiser +Dreiser's +Dresden +Dresden's +Drew +Drew's +Dreyfus +Dreyfus's +Dristan +Dristan's +Dropbox +Dropbox's +Drudge +Drudge's +Druid +Druid's +Drupal +Drupal's +Dryden +Dryden's +Dschubba +Dschubba's +DuPont +DuPont's +Duane +Duane's +Dubai +Dubai's +Dubcek +Dubcek's +Dubhe +Dubhe's +Dublin +Dublin's +Dubrovnik +Dubrovnik's +Duchamp +Duchamp's +Dudley +Dudley's +Duffy +Duffy's +Duisburg +Duisburg's +Duke +Duke's +Dulles +Dulles's +Duluth +Duluth's +Dumas +Dumas's +Dumbledore +Dumbledore's +Dumbo +Dumbo's +Dumpster +Dumpster's +Dunant +Dunant's +Dunbar +Dunbar's +Duncan +Duncan's +Dunedin +Dunedin's +Dunkirk +Dunkirk's +Dunlap +Dunlap's +Dunn +Dunn's +Dunne +Dunne's +Duracell +Duracell's +Duran +Duran's +Durant +Durant's +Durante +Durante's +Durban +Durban's +Durex +Durex's +Durham +Durham's +Durhams +Durkheim +Durkheim's +Duroc +Duroc's +Durocher +Durocher's +Duse +Duse's +Dushanbe +Dushanbe's +Dustbuster +Dustbuster's +Dustin +Dustin's +Dusty +Dusty's +Dutch +Dutch's +Dutchman +Dutchman's +Dutchmen +Dutchmen's +Duvalier +Duvalier's +Dvina +Dvina's +Dvorák +Dvorák's +Dwayne +Dwayne's +Dwight +Dwight's +Dyer +Dyer's +Dylan +Dylan's +DynamoDB +DynamoDB's +Dyson +Dyson's +Dzerzhinsky +Dzerzhinsky's +Dzungaria +Dzungaria's +Dürer +Dürer's +Düsseldorf +Düsseldorf's +E +E's +ECMAScript +ECMAScript's +Eakins +Eakins's +Earhart +Earhart's +Earl +Earl's +Earle +Earle's +Earlene +Earlene's +Earline +Earline's +Earnest +Earnest's +Earnestine +Earnestine's +Earnhardt +Earnhardt's +Earp +Earp's +Earth +Earth's +East +East's +Easter +Easter's +Eastern +Easterner +Easters +Eastman +Eastman's +Easts +Eastwood +Eastwood's +Eaton +Eaton's +Eben +Eben's +Ebeneezer +Ebeneezer's +Ebert +Ebert's +Ebola +Ebola's +Ebonics +Ebonics's +Ebony +Ebony's +Ebro +Ebro's +Ecclesiastes +Ecclesiastes's +Eco +Eco's +Ecuador +Ecuador's +Ecuadoran +Ecuadoran's +Ecuadorans +Ecuadorian +Ecuadorian's +Ecuadorians +Ed +Ed's +Edam +Edam's +Edams +Edda +Edda's +Eddie +Eddie's +Eddington +Eddington's +Eddy +Eddy's +Eden +Eden's +Edens +Edgar +Edgar's +Edgardo +Edgardo's +Edinburgh +Edinburgh's +Edison +Edison's +Edith +Edith's +Edmond +Edmond's +Edmonton +Edmonton's +Edmund +Edmund's +Edna +Edna's +Edsel +Edsel's +Eduardo +Eduardo's +Edward +Edward's +Edwardian +Edwardian's +Edwardo +Edwardo's +Edwards +Edwards's +Edwin +Edwin's +Edwina +Edwina's +Eeyore +Eeyore's +Effie +Effie's +Efrain +Efrain's +Efren +Efren's +Eggo +Eggo's +Egypt +Egypt's +Egyptian +Egyptian's +Egyptians +Egyptology +Egyptology's +Ehrenberg +Ehrenberg's +Ehrlich +Ehrlich's +Eichmann +Eichmann's +Eiffel +Eiffel's +Eileen +Eileen's +Einstein +Einstein's +Einsteins +Eire +Eire's +Eisenhower +Eisenhower's +Eisenstein +Eisenstein's +Eisner +Eisner's +Elaine +Elaine's +Elam +Elam's +Elanor +Elanor's +Elasticsearch +Elasticsearch's +Elastoplast +Elastoplast's +Elba +Elba's +Elbe +Elbe's +Elbert +Elbert's +Elbrus +Elbrus's +Eldon +Eldon's +Eleanor +Eleanor's +Eleazar +Eleazar's +Electra +Electra's +Elena +Elena's +Elgar +Elgar's +Eli +Eli's +Elias +Elias's +Elijah +Elijah's +Elinor +Elinor's +Eliot +Eliot's +Elisa +Elisa's +Elisabeth +Elisabeth's +Elise +Elise's +Eliseo +Eliseo's +Elisha +Elisha's +Eliza +Eliza's +Elizabeth +Elizabeth's +Elizabethan +Elizabethan's +Elizabethans +Ella +Ella's +Ellen +Ellen's +Ellesmere +Ellesmere's +Ellie +Ellie's +Ellington +Ellington's +Elliot +Elliot's +Elliott +Elliott's +Ellis +Ellis's +Ellison +Ellison's +Elma +Elma's +Elmer +Elmer's +Elmo +Elmo's +Elnath +Elnath's +Elnora +Elnora's +Elohim +Elohim's +Eloise +Eloise's +Eloy +Eloy's +Elroy +Elroy's +Elsa +Elsa's +Elsie +Elsie's +Elsinore +Elsinore's +Eltanin +Eltanin's +Elton +Elton's +Elul +Elul's +Elva +Elva's +Elvia +Elvia's +Elvin +Elvin's +Elvira +Elvira's +Elvis +Elvis's +Elway +Elway's +Elwood +Elwood's +Elysian +Elysian's +Elysium +Elysium's +Elysiums +Elysée +Elysée's +Emacs +Emacs's +Emanuel +Emanuel's +Emerson +Emerson's +Emery +Emery's +Emil +Emil's +Emile +Emile's +Emilia +Emilia's +Emilio +Emilio's +Emily +Emily's +Eminem +Eminem's +Emma +Emma's +Emmanuel +Emmanuel's +Emmett +Emmett's +Emmy +Emmy's +Emory +Emory's +Encarta +Encarta's +Endymion +Endymion's +Engels +Engels's +England +England's +English +English's +Englisher +Englishes +Englishman +Englishman's +Englishmen +Englishmen's +Englishwoman +Englishwoman's +Englishwomen +Englishwomen's +Enid +Enid's +Enif +Enif's +Eniwetok +Eniwetok's +Enkidu +Enkidu's +Enoch +Enoch's +Enos +Enos's +Enrico +Enrico's +Enrique +Enrique's +Enron +Enron's +Enterprise +Enterprise's +Eocene +Eocene's +Epcot +Epcot's +Ephesian +Ephesian's +Ephesus +Ephesus's +Ephraim +Ephraim's +Epictetus +Epictetus's +Epicurean +Epicurean's +Epicurus +Epicurus's +Epimethius +Epimethius's +Epiphanies +Epiphany +Epiphany's +Episcopal +Episcopalian +Episcopalian's +Episcopalians +Epsom +Epsom's +Epson +Epson's +Epstein +Epstein's +Equuleus +Equuleus's +Erasmus +Erasmus's +Erato +Erato's +Eratosthenes +Eratosthenes's +Erebus +Erebus's +Erector +Erector's +Erewhon +Erewhon's +Erhard +Erhard's +Eric +Eric's +Erica +Erica's +Erich +Erich's +Erick +Erick's +Ericka +Ericka's +Erickson +Erickson's +Ericson +Ericson's +Ericsson +Ericsson's +Eridanus +Eridanus's +Erie +Erie's +Erik +Erik's +Erika +Erika's +Erin +Erin's +Eris +Eris's +Eritrea +Eritrea's +Erlang +Erlang's +Erlenmeyer +Erlenmeyer's +Erma +Erma's +Erna +Erna's +Ernest +Ernest's +Ernestine +Ernestine's +Ernesto +Ernesto's +Ernie +Ernie's +Ernst +Ernst's +Eros +Eros's +Eroses +Errol +Errol's +Erse +Erse's +ErvIn +ErvIn's +Erwin +Erwin's +Es +Esau +Esau's +Escher +Escher's +Escherichia +Escherichia's +Eskimo +Eskimo's +Eskimos +Esmeralda +Esmeralda's +Esperanto +Esperanto's +Esperanza +Esperanza's +Espinoza +Espinoza's +Essen +Essen's +Essene +Essene's +Essequibo +Essequibo's +Essex +Essex's +Essie +Essie's +Establishment +Esteban +Esteban's +Estela +Estela's +Estella +Estella's +Estelle +Estelle's +Ester +Ester's +Esterházy +Esterházy's +Estes +Estes's +Esther +Esther's +Estonia +Estonia's +Estonian +Estonian's +Estonians +Estrada +Estrada's +Ethan +Ethan's +Ethel +Ethel's +Ethelred +Ethelred's +Ethernet +Ethernet's +Ethiopia +Ethiopia's +Ethiopian +Ethiopian's +Ethiopians +Etna +Etna's +Eton +Eton's +Etruria +Etruria's +Etruscan +Etruscan's +Etta +Etta's +Eucharist +Eucharist's +Eucharistic +Eucharists +Euclid +Euclid's +Euclidean +Euclidean's +Eugene +Eugene's +Eugenia +Eugenia's +Eugenie +Eugenie's +Eugenio +Eugenio's +Eula +Eula's +Euler +Euler's +Eumenides +Eumenides's +Eunice +Eunice's +Euphrates +Euphrates's +Eurasia +Eurasia's +Eurasian +Eurasian's +Eurasians +Euripides +Euripides's +Eurodollar +Eurodollar's +Eurodollars +Europa +Europa's +Europe +Europe's +European +European's +Europeans +Eurydice +Eurydice's +Eustachian +Eustachian's +Euterpe +Euterpe's +Eva +Eva's +Evan +Evan's +Evangelina +Evangelina's +Evangeline +Evangeline's +Evans +Evans's +Evansville +Evansville's +Eve +Eve's +Evelyn +Evelyn's +Evenki +Evenki's +EverReady +EverReady's +Everest +Everest's +Everett +Everett's +Everette +Everette's +Everglades +Everglades's +Evert +Evert's +Evian +Evian's +Evita +Evita's +Ewing +Ewing's +Excalibur +Excalibur's +Excedrin +Excedrin's +Excellencies +Excellency +Excellency's +Exercycle +Exercycle's +Exocet +Exocet's +Exodus +Exodus's +Exxon +Exxon's +Eyck +Eyck's +Eyre +Eyre's +Eysenck +Eysenck's +Ezekiel +Ezekiel's +Ezra +Ezra's +F +F's +FDR +FDR's +FNMA +FNMA's +FSF +FSF's +Fabergé +Fabergé's +Fabian +Fabian's +Facebook +Facebook's +Faeroe +Faeroe's +Fafnir +Fafnir's +Fagin +Fagin's +Fahd +Fahd's +Fahrenheit +Fahrenheit's +Fairbanks +Fairbanks's +Faisal +Faisal's +Faisalabad +Faisalabad's +Faith +Faith's +Falasha +Falasha's +Falkland +Falkland's +Falklands +Falklands's +Fallopian +Fallopian's +Falstaff +Falstaff's +Falwell +Falwell's +Fannie +Fannie's +Fanny +Fanny's +Faraday +Faraday's +Fargo +Fargo's +Farley +Farley's +Farmer +Farmer's +Farragut +Farragut's +Farrakhan +Farrakhan's +Farrell +Farrell's +Farrow +Farrow's +Farsi +Farsi's +Fascism +Fassbinder +Fassbinder's +Fatah +Fatah's +Fates +Fates's +Father +Father's +Fathers +Fatima +Fatima's +Fatimid +Fatimid's +Faulkner +Faulkner's +Faulknerian +Faulknerian's +Fauntleroy +Fauntleroy's +Faust +Faust's +Faustian +Faustian's +Faustino +Faustino's +Faustus +Faustus's +Fawkes +Fawkes's +Fay +Fay's +Faye +Faye's +Fe +Fe's +Feb +Feb's +Februaries +February +February's +FedEx +FedEx's +Federalist +Federalist's +Federico +Federico's +Feds +Feds's +Felecia +Felecia's +Felice +Felice's +Felicia +Felicia's +Felicity +Felicity's +Felipe +Felipe's +Felix +Felix's +Fellini +Fellini's +Fenian +Fenian's +Ferber +Ferber's +Ferdinand +Ferdinand's +Fergus +Fergus's +Ferguson +Ferguson's +Ferlinghetti +Ferlinghetti's +Fermat +Fermat's +Fermi +Fermi's +Fern +Fern's +Fernandez +Fernandez's +Fernando +Fernando's +Ferrari +Ferrari's +Ferraro +Ferraro's +Ferrell +Ferrell's +Ferris +Ferris's +Feynman +Feynman's +Fez +Fez's +Fiat +Fiat's +Fiberglas +Fiberglas's +Fibonacci +Fibonacci's +Fichte +Fichte's +Fidel +Fidel's +Fido +Fido's +Fielding +Fielding's +Fields +Fields's +Figaro +Figaro's +Figueroa +Figueroa's +Fiji +Fiji's +Fijian +Fijian's +Fijians +Filipino +Filipino's +Filipinos +Fillmore +Fillmore's +Filofax +Filofax's +Finch +Finch's +Finland +Finland's +Finley +Finley's +Finn +Finn's +Finnbogadottir +Finnbogadottir's +Finnegan +Finnegan's +Finnish +Finnish's +Finns +Fiona +Fiona's +Firebase +Firebase's +Firefox +Firefox's +Firestone +Firestone's +Fischer +Fischer's +Fisher +Fisher's +Fisk +Fisk's +Fitch +Fitch's +Fitzgerald +Fitzgerald's +Fitzpatrick +Fitzpatrick's +Fitzroy +Fitzroy's +Fizeau +Fizeau's +Flanagan +Flanagan's +Flanders +Flanders's +Flatt +Flatt's +Flaubert +Flaubert's +Fleischer +Fleischer's +Fleming +Fleming's +Flemish +Flemish's +Fletcher +Fletcher's +Flint +Flint's +Flintstones +Flintstones's +Flo +Flo's +Flora +Flora's +Florence +Florence's +Florentine +Florentine's +Flores +Flores's +Florida +Florida's +Floridan +Floridan's +Florine +Florine's +Florsheim +Florsheim's +Flory +Flory's +Flossie +Flossie's +Flowers +Flowers's +Floyd +Floyd's +Flynn +Flynn's +Foch +Foch's +Fokker +Fokker's +Foley +Foley's +Folgers +Folgers's +Folsom +Folsom's +Fomalhaut +Fomalhaut's +Fonda +Fonda's +Foosball +Foosball's +Forbes +Forbes's +Ford +Ford's +Foreman +Foreman's +Forest +Forest's +Forester +Forester's +Formica +Formica's +Formicas +Formosa +Formosa's +Formosan +Formosan's +Forrest +Forrest's +Forster +Forster's +Fortaleza +Fortaleza's +Fosse +Fosse's +Foster +Foster's +Fotomat +Fotomat's +Foucault +Foucault's +Fourier +Fourier's +Fourneyron +Fourneyron's +Fowler +Fowler's +Fox +Fox's +Fr +Fr's +Fragonard +Fragonard's +Fran +Fran's +France +France's +Frances +Frances's +Francesca +Francesca's +Francine +Francine's +Francis +Francis's +Francisca +Francisca's +Franciscan +Franciscan's +Francisco +Francisco's +Franck +Franck's +Franco +Franco's +Francois +Francois's +Francoise +Francoise's +Franglais +Franglais's +Frank +Frank's +Frankel +Frankel's +Frankenstein +Frankenstein's +Frankfort +Frankfort's +Frankfurt +Frankfurt's +Frankfurter +Frankfurter's +Frankie +Frankie's +Franklin +Franklin's +Franks +Franks's +Franny +Franny's +Franz +Franz's +Fraser +Fraser's +Frazier +Frazier's +Fred +Fred's +Freda +Freda's +Freddie +Freddie's +Freddy +Freddy's +Frederic +Frederic's +Frederick +Frederick's +Fredericton +Fredericton's +Fredric +Fredric's +Fredrick +Fredrick's +Freeman +Freeman's +Freemason +Freemason's +Freemasonries +Freemasonry +Freemasonry's +Freemasons +Freetown +Freetown's +Freida +Freida's +Fremont +Fremont's +French +French's +Frenches +Frenchman +Frenchman's +Frenchmen +Frenchmen's +Frenchwoman +Frenchwoman's +Frenchwomen +Frenchwomen's +Freon +Freon's +Fresnel +Fresnel's +Fresno +Fresno's +Freud +Freud's +Freudian +Freudian's +Frey +Frey's +Freya +Freya's +Friday +Friday's +Fridays +Frieda +Frieda's +Friedan +Friedan's +Friedman +Friedman's +Frigga +Frigga's +Frigidaire +Frigidaire's +Frisbee +Frisbee's +Frisco +Frisco's +Frisian +Frisian's +Frito +Frito's +Fritz +Fritz's +Frobisher +Frobisher's +Froissart +Froissart's +Fromm +Fromm's +Fronde +Fronde's +Frontenac +Frontenac's +Frost +Frost's +Frostbelt +Frostbelt's +Fry +Fry's +Frye +Frye's +Fuchs +Fuchs's +Fuentes +Fuentes's +Fugger +Fugger's +Fuji +Fuji's +Fujitsu +Fujitsu's +Fujiwara +Fujiwara's +Fukuoka +Fukuoka's +Fukuyama +Fukuyama's +Fulani +Fulani's +Fulbright +Fulbright's +Fuller +Fuller's +Fulton +Fulton's +Funafuti +Funafuti's +Fundy +Fundy's +Furtwängler +Furtwängler's +Fushun +Fushun's +Fuzhou +Fuzhou's +Fuzzbuster +Fuzzbuster's +G +G's +GE +GE's +GNU +GNU's +GTE +GTE's +Gable +Gable's +Gabon +Gabon's +Gaborone +Gaborone's +Gabriel +Gabriel's +Gabriela +Gabriela's +Gabrielle +Gabrielle's +Gacrux +Gacrux's +Gadsden +Gadsden's +Gaea +Gaea's +Gael +Gael's +Gaelic +Gaelic's +Gagarin +Gagarin's +Gage +Gage's +Gaia +Gaia's +Gail +Gail's +Gaiman +Gaiman's +Gaines +Gaines's +Gainsborough +Gainsborough's +Galahad +Galahad's +Galahads +Galapagos +Galapagos's +Galatea +Galatea's +Galatia +Galatia's +Galatians +Galatians's +Galbraith +Galbraith's +Gale +Gale's +Galen +Galen's +Galibi +Galibi's +Galilean +Galilean's +Galilee +Galilee's +Galileo +Galileo's +Gall +Gall's +Gallagher +Gallagher's +Gallegos +Gallegos's +Gallic +Gallic's +Gallo +Gallo's +Galloway +Galloway's +Gallup +Gallup's +Galois +Galois's +Galsworthy +Galsworthy's +Galvani +Galvani's +Galveston +Galveston's +Gamay +Gamay's +Gambia +Gambia's +Gamble +Gamble's +Gamow +Gamow's +Gandhi +Gandhi's +Gandhian +Gandhian's +Ganesha +Ganesha's +Ganges +Ganges's +Gangtok +Gangtok's +Gantry +Gantry's +Ganymede +Ganymede's +Gap +Gap's +Garbo +Garbo's +Garcia +Garcia's +Gardner +Gardner's +Gareth +Gareth's +Garfield +Garfield's +Garfunkel +Garfunkel's +Gargantua +Gargantua's +Garibaldi +Garibaldi's +Garland +Garland's +Garner +Garner's +Garrett +Garrett's +Garrick +Garrick's +Garrison +Garrison's +Garry +Garry's +Garth +Garth's +Garvey +Garvey's +Gary +Gary's +Garza +Garza's +Gascony +Gascony's +Gasser +Gasser's +Gates +Gates's +Gatling +Gatling's +Gatorade +Gatorade's +Gatsby +Gatsby's +Gatun +Gatun's +Gauguin +Gauguin's +Gaul +Gaul's +Gauls +Gauss +Gauss's +Gaussian +Gaussian's +Gautama +Gautama's +Gautier +Gautier's +Gavin +Gavin's +Gawain +Gawain's +Gay +Gay's +Gayle +Gayle's +Gaza +Gaza's +Gaziantep +Gaziantep's +Gd +Gd's +Gdansk +Gdansk's +Ge +Ge's +Geffen +Geffen's +Gehenna +Gehenna's +Gehrig +Gehrig's +Geiger +Geiger's +Gelbvieh +Gelbvieh's +Geller +Geller's +Gemini +Gemini's +Geminis +Gena +Gena's +Genaro +Genaro's +Gene +Gene's +Genesis +Genesis's +Genet +Genet's +Geneva +Geneva's +Genevieve +Genevieve's +Genghis +Genghis's +Genoa +Genoa's +Genoas +Gentile +Gentile's +Gentiles +Gentoo +Gentoo's +Gentry +Gentry's +Geo +Geo's +Geoffrey +Geoffrey's +George +George's +Georges +Georgetown +Georgetown's +Georgette +Georgette's +Georgia +Georgia's +Georgian +Georgian's +Georgians +Georgina +Georgina's +Gerald +Gerald's +Geraldine +Geraldine's +Gerard +Gerard's +Gerardo +Gerardo's +Gerber +Gerber's +Gere +Gere's +Geritol +Geritol's +German +German's +Germanic +Germanic's +Germans +Germany +Germany's +Geronimo +Geronimo's +Gerry +Gerry's +Gershwin +Gershwin's +Gertrude +Gertrude's +Gestapo +Gestapo's +Gestapos +Gethsemane +Gethsemane's +Getty +Getty's +Gettysburg +Gettysburg's +Gewürztraminer +Gewürztraminer's +Ghana +Ghana's +Ghanaian +Ghanian +Ghanian's +Ghanians +Ghats +Ghats's +Ghazvanid +Ghazvanid's +Ghent +Ghent's +Ghibelline +Ghibelline's +Giacometti +Giacometti's +Giannini +Giannini's +Giauque +Giauque's +Gibbon +Gibbon's +Gibbs +Gibbs's +Gibraltar +Gibraltar's +Gibraltars +Gibson +Gibson's +Gide +Gide's +Gideon +Gideon's +Gielgud +Gielgud's +Gienah +Gienah's +Gil +Gil's +Gila +Gila's +Gilbert +Gilbert's +Gilberto +Gilberto's +Gilchrist +Gilchrist's +Gilda +Gilda's +Gilead +Gilead's +Giles +Giles's +Gilgamesh +Gilgamesh's +Gill +Gill's +Gillespie +Gillespie's +Gillette +Gillette's +Gilliam +Gilliam's +Gillian +Gillian's +Gilligan +Gilligan's +Gilmore +Gilmore's +Gina +Gina's +Ginger +Ginger's +Gingrich +Gingrich's +Ginny +Ginny's +Gino +Gino's +Ginsberg +Ginsberg's +Ginsburg +Ginsburg's +Ginsu +Ginsu's +Giorgione +Giorgione's +Giotto +Giotto's +Giovanni +Giovanni's +Gipsies +Gipsy +Gipsy's +Giraudoux +Giraudoux's +Giselle +Giselle's +Gish +Gish's +GitHub +GitHub's +Giuliani +Giuliani's +Giuseppe +Giuseppe's +Giza +Giza's +Gladstone +Gladstone's +Gladstones +Gladys +Gladys's +Glaser +Glaser's +Glasgow +Glasgow's +Glass +Glass's +Glastonbury +Glastonbury's +Glaswegian +Glaswegian's +Glaxo +Glaxo's +Gleason +Gleason's +Glen +Glen's +Glenda +Glenda's +Glendale +Glenlivet +Glenlivet's +Glenn +Glenn's +Glenna +Glenna's +Gloria +Gloria's +Gloucester +Gloucester's +Glover +Glover's +Gnostic +Gnostic's +Gnosticism +Gnosticism's +Goa +Goa's +Gobi +Gobi's +God +God's +Goddard +Goddard's +Godiva +Godiva's +Godot +Godot's +Godthaab +Godthaab's +Godunov +Godunov's +Godzilla +Godzilla's +Goebbels +Goebbels's +Goering +Goering's +Goethals +Goethals's +Goethe +Goethe's +Goff +Goff's +Gog +Gog's +Gogol +Gogol's +Goiania +Goiania's +Golan +Golan's +Golconda +Golconda's +Golda +Golda's +Goldberg +Goldberg's +Golden +Golden's +Goldie +Goldie's +Goldilocks +Goldilocks's +Golding +Golding's +Goldman +Goldman's +Goldsmith +Goldsmith's +Goldwater +Goldwater's +Goldwyn +Goldwyn's +Golgi +Golgi's +Golgotha +Golgotha's +Goliath +Goliath's +Gomez +Gomez's +Gomorrah +Gomorrah's +Gompers +Gompers's +Gomulka +Gomulka's +Gondwanaland +Gondwanaland's +Gonzales +Gonzales's +Gonzalez +Gonzalez's +Gonzalo +Gonzalo's +Good +Good's +Goodall +Goodall's +Goodman +Goodman's +Goodrich +Goodrich's +Goodwill +Goodwill's +Goodwin +Goodwin's +Goodyear +Goodyear's +Google +Google's +Goolagong +Goolagong's +Gopher +Gorbachev +Gorbachev's +Gordian +Gordian's +Gordimer +Gordimer's +Gordon +Gordon's +Gore +Gore's +Goren +Goren's +Gorey +Gorey's +Gorgas +Gorgas's +Gorgonzola +Gorgonzola's +Gorky +Gorky's +Gospel +Gospel's +Gospels +Goth +Goth's +Gotham +Gotham's +Gothic +Gothic's +Gothics +Goths +Gouda +Gouda's +Goudas +Gould +Gould's +Gounod +Gounod's +Goya +Goya's +Grable +Grable's +Gracchus +Gracchus's +Grace +Grace's +Graceland +Graceland's +Gracie +Gracie's +Graciela +Graciela's +Grady +Grady's +Graffias +Graffias's +Grafton +Grafton's +Graham +Graham's +Grahame +Grahame's +Grail +Grail's +Grammy +Grammy's +Grampians +Grampians's +Granada +Granada's +Grant +Grant's +Grass +Grass's +Graves +Graves's +Gray +Gray's +Grecian +Grecian's +Greece +Greece's +Greek +Greek's +Greeks +Greeley +Greeley's +Green +Green's +Greene +Greene's +Greenland +Greenland's +Greenpeace +Greenpeace's +Greensboro +Greensboro's +Greensleeves +Greensleeves's +Greenspan +Greenspan's +Greenwich +Greenwich's +Greer +Greer's +Greg +Greg's +Gregg +Gregg's +Gregorian +Gregorian's +Gregorio +Gregorio's +Gregory +Gregory's +Grenada +Grenada's +Grenadines +Grenadines's +Grendel +Grendel's +Grenoble +Grenoble's +Gresham +Gresham's +Greta +Greta's +Gretchen +Gretchen's +Gretel +Gretel's +Gretzky +Gretzky's +Grey +Grey's +Grieg +Grieg's +Griffin +Griffin's +Griffith +Griffith's +Grimes +Grimes's +Grimm +Grimm's +Grinch +Grinch's +Gris +Gris's +Gromyko +Gromyko's +Gropius +Gropius's +Gross +Gross's +Grosz +Grosz's +Grotius +Grotius's +Grover +Grover's +Grumman +Grumman's +Grundy +Grundy's +Grus +Grus's +Gruyeres +Gruyère +Gruyère's +Grünewald +Grünewald's +Guadalajara +Guadalajara's +Guadalcanal +Guadalcanal's +Guadalquivir +Guadalquivir's +Guadalupe +Guadalupe's +Guadeloupe +Guadeloupe's +Guallatiri +Guallatiri's +Guam +Guam's +Guangzhou +Guangzhou's +Guantanamo +Guantanamo's +Guarani +Guarani's +Guarnieri +Guarnieri's +Guatemala +Guatemala's +Guatemalan +Guatemalan's +Guatemalans +Guayaquil +Guayaquil's +Gucci +Gucci's +Guelph +Guelph's +Guernsey +Guernsey's +Guernseys +Guerra +Guerra's +Guerrero +Guerrero's +Guevara +Guevara's +Guggenheim +Guggenheim's +Guiana +Guiana's +Guillermo +Guillermo's +Guinea +Guinea's +Guinean +Guinean's +Guineans +Guinevere +Guinevere's +Guinness +Guinness's +Guiyang +Guiyang's +Guizot +Guizot's +Gujarat +Gujarat's +Gujarati +Gujarati's +Gujranwala +Gujranwala's +Gullah +Gullah's +Gulliver +Gulliver's +Gumbel +Gumbel's +Gunther +Gunther's +Guofeng +Guofeng's +Gupta +Gupta's +Gurkha +Gurkha's +Gus +Gus's +Gustav +Gustav's +Gustavo +Gustavo's +Gustavus +Gustavus's +Gutenberg +Gutenberg's +Guthrie +Guthrie's +Gutierrez +Gutierrez's +Guy +Guy's +Guyana +Guyana's +Guyanese +Guyanese's +Guzman +Guzman's +Gwalior +Gwalior's +Gwen +Gwen's +Gwendoline +Gwendoline's +Gwendolyn +Gwendolyn's +Gwyn +Gwyn's +Gypsies +Gypsy +Gypsy's +Gödel +Gödel's +Göteborg +Göteborg's +H +H's +HBO +HBO's +HBase +HBase's +HSBC +HSBC's +Haas +Haas's +Habakkuk +Habakkuk's +Haber +Haber's +Hadar +Hadar's +Hades +Hades's +Hadoop +Hadoop's +Hadrian +Hadrian's +Hafiz +Hafiz's +Hagar +Hagar's +Haggai +Haggai's +Hagiographa +Hagiographa's +Hague +Hague's +Hahn +Hahn's +Haifa +Haifa's +Haiphong +Haiphong's +Haiti +Haiti's +Haitian +Haitian's +Haitians +Hakka +Hakka's +Hakluyt +Hakluyt's +Hal +Hal's +Haldane +Haldane's +Hale +Hale's +Haleakala +Haleakala's +Haley +Haley's +Halifax +Halifax's +Hall +Hall's +Halley +Halley's +Halliburton +Halliburton's +Hallie +Hallie's +Hallmark +Hallmark's +Hallowe'en +Halloween +Halloween's +Halloweens +Hallstatt +Hallstatt's +Halon +Halon's +Hals +Hals's +Halsey +Halsey's +Ham +Ham's +Haman +Haman's +Hamburg +Hamburg's +Hamburgs +Hamhung +Hamhung's +Hamilcar +Hamilcar's +Hamill +Hamill's +Hamilton +Hamilton's +Hamiltonian +Hamiltonian's +Hamitic +Hamitic's +Hamlet +Hamlet's +Hamlin +Hamlin's +Hammarskjold +Hammarskjold's +Hammerstein +Hammerstein's +Hammett +Hammett's +Hammond +Hammond's +Hammurabi +Hammurabi's +Hampshire +Hampshire's +Hampton +Hampton's +Hamsun +Hamsun's +Han +Han's +Hancock +Hancock's +Handel +Handel's +Handy +Handy's +Haney +Haney's +Hangul +Hangul's +Hangzhou +Hangzhou's +Hank +Hank's +Hanna +Hanna's +Hannah +Hannah's +Hannibal +Hannibal's +Hanoi +Hanoi's +Hanover +Hanover's +Hanoverian +Hanoverian's +Hans +Hans's +Hansel +Hansel's +Hansen +Hansen's +Hanson +Hanson's +Hanukkah +Hanukkah's +Hanukkahs +Hapsburg +Hapsburg's +Harare +Harare's +Harbin +Harbin's +Hardin +Hardin's +Harding +Harding's +Hardy +Hardy's +Hargreaves +Hargreaves's +Harlan +Harlan's +Harlem +Harlem's +Harlequin +Harlequin's +Harley +Harley's +Harlow +Harlow's +Harmon +Harmon's +Harold +Harold's +Harper +Harper's +Harrell +Harrell's +Harriet +Harriet's +Harriett +Harriett's +Harrington +Harrington's +Harris +Harris's +Harrisburg +Harrisburg's +Harrison +Harrison's +Harrods +Harrods's +Harry +Harry's +Hart +Hart's +Harte +Harte's +Hartford +Hartford's +Hartline +Hartline's +Hartman +Hartman's +Harvard +Harvard's +Harvey +Harvey's +Hasbro +Hasbro's +Hasidim +Hasidim's +Hastings +Hastings's +Hatfield +Hatfield's +Hathaway +Hathaway's +Hatsheput +Hatsheput's +Hatteras +Hatteras's +Hattie +Hattie's +Hauptmann +Hauptmann's +Hausa +Hausa's +Hausdorff +Hausdorff's +Havana +Havana's +Havanas +Havarti +Havarti's +Havel +Havel's +Havoline +Havoline's +Hawaii +Hawaii's +Hawaiian +Hawaiian's +Hawaiians +Hawking +Hawking's +Hawkins +Hawkins's +Hawthorne +Hawthorne's +Hay +Hay's +Hayden +Hayden's +Haydn +Haydn's +Hayes +Hayes's +Haynes +Haynes's +Hays +Hays's +Haywood +Haywood's +Hayworth +Hayworth's +Hazel +Hazel's +Hazlitt +Hazlitt's +He +He's +Head +Head's +Hearst +Hearst's +Heath +Heath's +Heather +Heather's +Heaviside +Heaviside's +Hebe +Hebe's +Hebert +Hebert's +Hebraic +Hebraic's +Hebrew +Hebrew's +Hebrews +Hebrews's +Hebrides +Hebrides's +Hecate +Hecate's +Hector +Hector's +Hecuba +Hecuba's +Heep +Heep's +Hefner +Hefner's +Hegel +Hegel's +Hegelian +Hegelian's +Hegira +Hegira's +Heidegger +Heidegger's +Heidelberg +Heidelberg's +Heidi +Heidi's +Heifetz +Heifetz's +Heimlich +Heimlich's +Heine +Heine's +Heineken +Heineken's +Heinlein +Heinlein's +Heinrich +Heinrich's +Heinz +Heinz's +Heisenberg +Heisenberg's +Heisman +Heisman's +Helen +Helen's +Helena +Helena's +Helene +Helene's +Helga +Helga's +Helicon +Helicon's +Heliopolis +Heliopolis's +Helios +Helios's +Hell +Hell's +Hellenic +Hellenic's +Hellenism +Hellenism's +Hellenisms +Hellenistic +Hellenistic's +Hellenization +Hellenization's +Hellenize +Hellenize's +Heller +Heller's +Hellespont +Hellespont's +Hellman +Hellman's +Hells +Helmholtz +Helmholtz's +Helsinki +Helsinki's +Helvetius +Helvetius's +Hemingway +Hemingway's +Hench +Hench's +Henderson +Henderson's +Hendricks +Hendricks's +Hendrix +Hendrix's +Henley +Henley's +Hennessy +Hennessy's +Henri +Henri's +Henrietta +Henrietta's +Henry +Henry's +Hensley +Hensley's +Henson +Henson's +Hepburn +Hepburn's +Hephaestus +Hephaestus's +Hepplewhite +Hepplewhite's +Hera +Hera's +Heraclitus +Heraclitus's +Herbart +Herbart's +Herbert +Herbert's +Herculaneum +Herculaneum's +Hercules +Hercules's +Herder +Herder's +Hereford +Hereford's +Herero +Herero's +Heriberto +Heriberto's +Herman +Herman's +Hermaphroditus +Hermaphroditus's +Hermes +Hermes's +Herminia +Herminia's +Hermitage +Hermitage's +Hermite +Hermite's +Hermosillo +Hermosillo's +Hernandez +Hernandez's +Herod +Herod's +Herodotus +Herodotus's +Heroku +Heroku's +Herrera +Herrera's +Herrick +Herrick's +Herring +Herring's +Herschel +Herschel's +Hersey +Hersey's +Hershel +Hershel's +Hershey +Hershey's +Hertz +Hertz's +Hertzsprung +Hertzsprung's +Herzegovina +Herzegovina's +Herzl +Herzl's +Heshvan +Heshvan's +Hesiod +Hesiod's +Hesperus +Hesperus's +Hess +Hess's +Hesse +Hesse's +Hessian +Hessian's +Hester +Hester's +Heston +Heston's +Hettie +Hettie's +Hewitt +Hewitt's +Hewlett +Hewlett's +Heyerdahl +Heyerdahl's +Heywood +Heywood's +Hezbollah +Hezbollah's +Hezekiah +Hezekiah's +Hg +Hg's +Hialeah +Hialeah's +Hiawatha +Hiawatha's +Hibernia +Hibernia's +Hickman +Hickman's +Hickok +Hickok's +Hicks +Hicks's +Hieronymus +Hieronymus's +Higgins +Higgins's +Highlander +Highlander's +Highlanders +Highness +Highness's +Hilario +Hilario's +Hilary +Hilary's +Hilbert +Hilbert's +Hilda +Hilda's +Hildebrand +Hildebrand's +Hilfiger +Hilfiger's +Hill +Hill's +Hillary +Hillary's +Hillel +Hillel's +Hilton +Hilton's +Himalaya +Himalaya's +Himalayas +Himalayas's +Himmler +Himmler's +Hinayana +Hinayana's +Hindemith +Hindemith's +Hindenburg +Hindenburg's +Hindi +Hindi's +Hindu +Hindu's +Hinduism +Hinduism's +Hinduisms +Hindus +Hindustan +Hindustan's +Hindustani +Hindustani's +Hines +Hines's +Hinton +Hinton's +Hipparchus +Hipparchus's +Hippocrates +Hippocrates's +Hippocratic +Hippocratic's +Hiram +Hiram's +Hirobumi +Hirobumi's +Hirohito +Hirohito's +Hiroshima +Hiroshima's +Hispanic +Hispanic's +Hispanics +Hispaniola +Hispaniola's +Hiss +Hiss's +Hitachi +Hitachi's +Hitchcock +Hitchcock's +Hitler +Hitler's +Hitlers +Hittite +Hittite's +Hmong +Hmong's +Hobart +Hobart's +Hobbes +Hobbes's +Hobbs +Hobbs's +Hockney +Hockney's +Hodge +Hodge's +Hodges +Hodges's +Hodgkin +Hodgkin's +Hoff +Hoff's +Hoffa +Hoffa's +Hoffman +Hoffman's +Hofstadter +Hofstadter's +Hogan +Hogan's +Hogarth +Hogarth's +Hogwarts +Hogwarts's +Hohenlohe +Hohenlohe's +Hohenstaufen +Hohenstaufen's +Hohenzollern +Hohenzollern's +Hohhot +Hohhot's +Hohokam +Hohokam's +Hokkaido +Hokkaido's +Hokusai +Hokusai's +Holbein +Holbein's +Holcomb +Holcomb's +Holden +Holden's +Holder +Holder's +Holiday +Holiday's +Holland +Holland's +Hollands +Hollerith +Hollerith's +Holley +Holley's +Hollie +Hollie's +Hollis +Hollis's +Holloway +Holloway's +Holly +Holly's +Hollywood +Hollywood's +Holman +Holman's +Holmes +Holmes's +Holocaust +Holocaust's +Holocene +Holocene's +Holst +Holst's +Holstein +Holstein's +Holsteins +Holt +Holt's +Homer +Homer's +Homeric +Homeric's +Honda +Honda's +Honduran +Honduran's +Hondurans +Honduras +Honduras's +Honecker +Honecker's +Honeywell +Honeywell's +Hong +Honiara +Honiara's +Honolulu +Honolulu's +Honshu +Honshu's +Hood +Hood's +Hooke +Hooke's +Hooker +Hooker's +Hooper +Hooper's +Hoosier +Hoosier's +Hooters +Hooters's +Hoover +Hoover's +Hoovers +Hope +Hope's +Hopewell +Hopewell's +Hopi +Hopi's +Hopkins +Hopkins's +Hopper +Hopper's +Horace +Horace's +Horacio +Horacio's +Horatio +Horatio's +Hormel +Hormel's +Hormuz +Hormuz's +Horn +Horn's +Hornblower +Hornblower's +Horne +Horne's +Horowitz +Horowitz's +Horthy +Horthy's +Horton +Horton's +Horus +Horus's +Hosea +Hosea's +Hotpoint +Hotpoint's +Hottentot +Hottentot's +Houdini +Houdini's +House +House's +Housman +Housman's +Houston +Houston's +Houyhnhnm +Houyhnhnm's +Hovhaness +Hovhaness's +Howard +Howard's +Howe +Howe's +Howell +Howell's +Howells +Howells's +Hoyle +Hoyle's +Hrothgar +Hrothgar's +Huang +Huang's +Hubbard +Hubbard's +Hubble +Hubble's +Huber +Huber's +Hubert +Hubert's +Huck +Huck's +Hudson +Hudson's +Huerta +Huerta's +Huey +Huey's +Huff +Huff's +Huffman +Huffman's +Huggins +Huggins's +Hugh +Hugh's +Hughes +Hughes's +Hugo +Hugo's +Huguenot +Huguenot's +Huguenots +Hui +Hui's +Huitzilopotchli +Huitzilopotchli's +Hull +Hull's +Humberto +Humberto's +Humboldt +Humboldt's +Hume +Hume's +Hummer +Hummer's +Humphrey +Humphrey's +Humvee +Humvee's +Hun +Hun's +Hungarian +Hungarian's +Hungarians +Hungary +Hungary's +Huns +Hunspell +Hunspell's +Hunt +Hunt's +Hunter +Hunter's +Huntington +Huntington's +Huntley +Huntley's +Huntsville +Huntsville's +Hurd +Hurd's +Hurley +Hurley's +Huron +Huron's +Hurst +Hurst's +Hus +Hus's +Hussein +Hussein's +Husserl +Husserl's +Hussite +Hussite's +Huston +Huston's +Hutchinson +Hutchinson's +Hutton +Hutton's +Hutu +Hutu's +Huxley +Huxley's +Huygens +Huygens's +Hyades +Hyades's +Hyde +Hyde's +Hyderabad +Hyderabad's +Hydra +Hydra's +Hymen +Hymen's +Hyperion +Hyperion's +Hyundai +Hyundai's +Hz +Hz's +Héloise +Héloise's +I +I'd +I'll +I'm +I's +I've +IBM +IBM's +IKEA +IKEA's +ING +ING's +ISO +ISO's +Iaccoca +Iaccoca's +Iago +Iago's +Ian +Ian's +Iapetus +Iapetus's +Ibadan +Ibadan's +Iberia +Iberia's +Iberian +Iberian's +Ibiza +Ibiza's +Iblis +Iblis's +Ibo +Ibo's +Ibsen +Ibsen's +Icahn +Icahn's +Icarus +Icarus's +Iceland +Iceland's +Icelander +Icelander's +Icelanders +Icelandic +Icelandic's +Idaho +Idaho's +Idahoan +Idahoan's +Idahoans +Idahoes +Idahos +Ieyasu +Ieyasu's +Ignacio +Ignacio's +Ignatius +Ignatius's +Igor +Igor's +Iguassu +Iguassu's +Ijssel +Ijssel's +Ijsselmeer +Ijsselmeer's +Ike +Ike's +Ikhnaton +Ikhnaton's +Ila +Ila's +Ilene +Ilene's +Iliad +Iliad's +Illinois +Illinois's +Illuminati +Illuminati's +Ilyushin +Ilyushin's +Imelda +Imelda's +Imhotep +Imhotep's +Imodium +Imodium's +Imogene +Imogene's +Imus +Imus's +Ina +Ina's +Inca +Inca's +Incas +Inchon +Inchon's +Independence +Independence's +India +India's +Indian +Indian's +Indiana +Indiana's +Indianan +Indianan's +Indianans +Indianapolis +Indianapolis's +Indians +Indies +Indies's +Indira +Indira's +Indochina +Indochina's +Indochinese +Indochinese's +Indonesia +Indonesia's +Indonesian +Indonesian's +Indonesians +Indore +Indore's +Indra +Indra's +Indus +Indus's +Indy +Indy's +Ines +Ines's +Inez +Inez's +Inge +Inge's +Inglewood +Ingram +Ingram's +Ingres +Ingres's +Ingrid +Ingrid's +Innocent +Innocent's +Inonu +Inonu's +Inquisition +Inquisition's +Instagram +Instagram's +Instamatic +Instamatic's +Intel +Intel's +Intelsat +Intelsat's +Internationale +Internationale's +Internet +Internet's +Interpol +Interpol's +Inuit +Inuit's +Inuits +Inuktitut +Inuktitut's +Invar +Invar's +Ionesco +Ionesco's +Ionian +Ionian's +Ionic +Ionic's +Ionics +Iowa +Iowa's +Iowan +Iowan's +Iowans +Iowas +Iphigenia +Iphigenia's +Iqaluit +Iqaluit's +Iqbal +Iqbal's +Iquitos +Iquitos's +Ira +Ira's +Iran +Iran's +Iranian +Iranian's +Iranians +Iraq +Iraq's +Iraqi +Iraqi's +Iraqis +Ireland +Ireland's +Irene +Irene's +Iris +Iris's +Irish +Irish's +Irisher +Irishman +Irishman's +Irishmen +Irishmen's +Irishwoman +Irishwoman's +Irishwomen +Irishwomen's +Irkutsk +Irkutsk's +Irma +Irma's +Iroquoian +Iroquoian's +Iroquois +Iroquois's +Irrawaddy +Irrawaddy's +Irtish +Irtish's +Irvin +Irvin's +Irving +Irving's +Irwin +Irwin's +Isaac +Isaac's +Isabel +Isabel's +Isabella +Isabella's +Isabelle +Isabelle's +Isaiah +Isaiah's +Iscariot +Iscariot's +Isfahan +Isfahan's +Isherwood +Isherwood's +Ishim +Ishim's +Ishmael +Ishmael's +Ishtar +Ishtar's +Isiah +Isiah's +Isidro +Isidro's +Isis +Isis's +Islam +Islam's +Islamabad +Islamabad's +Islamic +Islamic's +Islamism +Islamism's +Islamist +Islamist's +Islams +Ismael +Ismael's +Ismail +Ismail's +Isolde +Isolde's +Ispell +Ispell's +Israel +Israel's +Israeli +Israeli's +Israelis +Israelite +Israelite's +Israels +Issac +Issac's +Issachar +Issachar's +Istanbul +Istanbul's +Isuzu +Isuzu's +Itaipu +Itaipu's +Italian +Italian's +Italians +Italy +Italy's +Itasca +Itasca's +Ithaca +Ithaca's +Ithacan +Ithacan's +Ito +Ito's +Iva +Iva's +Ivan +Ivan's +Ivanhoe +Ivanhoe's +Ives +Ives's +Ivory +Ivory's +Ivy +Ivy's +Iyar +Iyar's +Izaak +Izaak's +Izanagi +Izanagi's +Izanami +Izanami's +Izhevsk +Izhevsk's +Izmir +Izmir's +Izod +Izod's +Izvestia +Izvestia's +J +J's +JFK +JFK's +Jack +Jack's +Jackie +Jackie's +Jacklyn +Jacklyn's +Jackson +Jackson's +Jacksonian +Jacksonian's +Jacksonville +Jacksonville's +Jacky +Jacky's +Jaclyn +Jaclyn's +Jacob +Jacob's +Jacobean +Jacobean's +Jacobi +Jacobi's +Jacobin +Jacobin's +Jacobite +Jacobite's +Jacobs +Jacobs's +Jacobson +Jacobson's +Jacquard +Jacquard's +Jacqueline +Jacqueline's +Jacquelyn +Jacquelyn's +Jacques +Jacques's +Jacuzzi +Jacuzzi's +Jagger +Jagger's +Jagiellon +Jagiellon's +Jaguar +Jaguar's +Jahangir +Jahangir's +Jaime +Jaime's +Jain +Jain's +Jainism +Jainism's +Jaipur +Jaipur's +Jakarta +Jakarta's +Jake +Jake's +Jamaal +Jamaal's +Jamaica +Jamaica's +Jamaican +Jamaican's +Jamaicans +Jamal +Jamal's +Jamar +Jamar's +Jame +Jame's +Jamel +Jamel's +James +James's +Jamestown +Jamestown's +Jami +Jami's +Jamie +Jamie's +Jan +Jan's +Jana +Jana's +Janacek +Janacek's +Jane +Jane's +Janell +Janell's +Janelle +Janelle's +Janet +Janet's +Janette +Janette's +Janice +Janice's +Janie +Janie's +Janine +Janine's +Janis +Janis's +Janissary +Janissary's +Janjaweed +Janjaweed's +Janna +Janna's +Jannie +Jannie's +Jansen +Jansen's +Jansenist +Jansenist's +Januaries +January +January's +Janus +Janus's +Japan +Japan's +Japanese +Japanese's +Japaneses +Japura +Japura's +Jared +Jared's +Jarlsberg +Jarlsberg's +Jarred +Jarred's +Jarrett +Jarrett's +Jarrod +Jarrod's +Jarvis +Jarvis's +Jasmine +Jasmine's +Jason +Jason's +Jasper +Jasper's +Jataka +Jataka's +Java +Java's +JavaScript +JavaScript's +Javanese +Javanese's +Javas +Javier +Javier's +Jaxartes +Jaxartes's +Jay +Jay's +Jayapura +Jayapura's +Jayawardene +Jayawardene's +Jaycee +Jaycee's +Jaycees +Jaycees's +Jayne +Jayne's +Jayson +Jayson's +Jean +Jean's +Jeanette +Jeanette's +Jeanie +Jeanie's +Jeanine +Jeanine's +Jeanne +Jeanne's +Jeannette +Jeannette's +Jeannie +Jeannie's +Jeannine +Jeannine's +Jed +Jed's +Jedi +Jedi's +Jeep +Jeep's +Jeeves +Jeeves's +Jeff +Jeff's +Jefferey +Jefferey's +Jefferson +Jefferson's +Jeffersonian +Jeffersonian's +Jeffery +Jeffery's +Jeffrey +Jeffrey's +Jeffry +Jeffry's +Jehoshaphat +Jehoshaphat's +Jehovah +Jehovah's +Jekyll +Jekyll's +Jenifer +Jenifer's +Jenkins +Jenkins's +Jenna +Jenna's +Jenner +Jenner's +Jennie +Jennie's +Jennifer +Jennifer's +Jennings +Jennings's +Jenny +Jenny's +Jensen +Jensen's +Jephthah +Jephthah's +Jerald +Jerald's +Jeremiah +Jeremiah's +Jeremiahs +Jeremy +Jeremy's +Jeri +Jeri's +Jericho +Jericho's +Jermaine +Jermaine's +Jeroboam +Jeroboam's +Jerold +Jerold's +Jerome +Jerome's +Jerri +Jerri's +Jerrod +Jerrod's +Jerrold +Jerrold's +Jerry +Jerry's +Jersey +Jersey's +Jerseys +Jerusalem +Jerusalem's +Jess +Jess's +Jesse +Jesse's +Jessica +Jessica's +Jessie +Jessie's +Jesuit +Jesuit's +Jesuits +Jesus +Jesus's +Jetway +Jetway's +Jew +Jew's +Jewel +Jewel's +Jewell +Jewell's +Jewish +Jewish's +Jewishness +Jewry +Jewry's +Jews +Jezebel +Jezebel's +Jezebels +Jidda +Jidda's +Jilin +Jilin's +Jill +Jill's +Jillian +Jillian's +Jim +Jim's +Jimenez +Jimenez's +Jimmie +Jimmie's +Jimmy +Jimmy's +Jinan +Jinan's +Jinnah +Jinnah's +Jinny +Jinny's +Jivaro +Jivaro's +Jo +Jo's +Joan +Joan's +Joann +Joann's +Joanna +Joanna's +Joanne +Joanne's +Joaquin +Joaquin's +Job +Job's +Jobs +Jobs's +Jocasta +Jocasta's +Jocelyn +Jocelyn's +Jock +Jock's +Jockey +Jockey's +Jodi +Jodi's +Jodie +Jodie's +Jody +Jody's +Joe +Joe's +Joel +Joel's +Joey +Joey's +Jogjakarta +Jogjakarta's +Johann +Johann's +Johanna +Johanna's +Johannes +Johannes's +Johannesburg +Johannesburg's +John +John's +Johnathan +Johnathan's +Johnathon +Johnathon's +Johnie +Johnie's +Johnnie +Johnnie's +Johnny +Johnny's +Johns +Johns's +Johnson +Johnson's +Johnston +Johnston's +Jolene +Jolene's +Joliet +Joliet's +Jolson +Jolson's +Jon +Jon's +Jonah +Jonah's +Jonahs +Jonas +Jonas's +Jonathan +Jonathan's +Jonathon +Jonathon's +Jones +Jones's +Joni +Joni's +Jonson +Jonson's +Joplin +Joplin's +Jordan +Jordan's +Jordanian +Jordanian's +Jordanians +Jorge +Jorge's +Jose +Jose's +Josef +Josef's +Josefa +Josefa's +Josefina +Josefina's +Joseph +Joseph's +Josephine +Josephine's +Josephs +Josephson +Josephson's +Josephus +Josephus's +Joshua +Joshua's +Josiah +Josiah's +Josie +Josie's +Josue +Josue's +Joule +Joule's +Jove +Jove's +Jovian +Jovian's +Joy +Joy's +Joyce +Joyce's +Joycean +Joycean's +Joyner +Joyner's +Juan +Juan's +Juana +Juana's +Juanita +Juanita's +Juarez +Juarez's +Jubal +Jubal's +Judaeo +Judah +Judah's +Judaic +Judaism +Judaism's +Judaisms +Judas +Judas's +Judases +Judd +Judd's +Jude +Jude's +Judea +Judea's +Judith +Judith's +Judson +Judson's +Judy +Judy's +Juggernaut +Juggernaut's +Jules +Jules's +Julia +Julia's +Julian +Julian's +Juliana +Juliana's +Julianne +Julianne's +Julie +Julie's +Julies +Juliet +Juliet's +Juliette +Juliette's +Julio +Julio's +Julius +Julius's +Julliard +Julliard's +July +July's +June +June's +Juneau +Juneau's +Junes +Jung +Jung's +Jungfrau +Jungfrau's +Jungian +Jungian's +Junior +Junior's +Juniors +Juno +Juno's +Jupiter +Jupiter's +Jurassic +Jurassic's +Jurua +Jurua's +Justice +Justice's +Justin +Justin's +Justine +Justine's +Justinian +Justinian's +Jutland +Jutland's +Juvenal +Juvenal's +K +K's +KFC +KFC's +Kaaba +Kaaba's +Kabul +Kabul's +Kafka +Kafka's +Kafkaesque +Kafkaesque's +Kagoshima +Kagoshima's +Kahlua +Kahlua's +Kaifeng +Kaifeng's +Kaiser +Kaiser's +Kaitlin +Kaitlin's +Kalahari +Kalahari's +Kalamazoo +Kalamazoo's +Kalashnikov +Kalashnikov's +Kalb +Kalb's +Kalevala +Kalevala's +Kalgoorlie +Kalgoorlie's +Kali +Kali's +Kalmyk +Kalmyk's +Kama +Kama's +Kamchatka +Kamchatka's +Kamehameha +Kamehameha's +Kampala +Kampala's +Kampuchea +Kampuchea's +Kanchenjunga +Kanchenjunga's +Kandahar +Kandahar's +Kandinsky +Kandinsky's +Kane +Kane's +Kannada +Kannada's +Kano +Kano's +Kanpur +Kanpur's +Kansan +Kansan's +Kansans +Kansas +Kansas's +Kant +Kant's +Kantian +Kantian's +Kaohsiung +Kaohsiung's +Kaposi +Kaposi's +Kara +Kara's +Karachi +Karachi's +Karaganda +Karaganda's +Karakorum +Karakorum's +Karamazov +Karamazov's +Kareem +Kareem's +Karen +Karen's +Karenina +Karenina's +Kari +Kari's +Karin +Karin's +Karina +Karina's +Karl +Karl's +Karla +Karla's +Karloff +Karloff's +Karo +Karo's +Karol +Karol's +Karroo +Karroo's +Karyn +Karyn's +Kasai +Kasai's +Kasey +Kasey's +Kashmir +Kashmir's +Kasparov +Kasparov's +Kate +Kate's +Katelyn +Katelyn's +Katharine +Katharine's +Katherine +Katherine's +Katheryn +Katheryn's +Kathiawar +Kathiawar's +Kathie +Kathie's +Kathleen +Kathleen's +Kathrine +Kathrine's +Kathryn +Kathryn's +Kathy +Kathy's +Katie +Katie's +Katina +Katina's +Katmai +Katmai's +Katmandu +Katmandu's +Katowice +Katowice's +Katrina +Katrina's +Katy +Katy's +Kauai +Kauai's +Kaufman +Kaufman's +Kaunas +Kaunas's +Kaunda +Kaunda's +Kawabata +Kawabata's +Kawasaki +Kawasaki's +Kay +Kay's +Kaye +Kaye's +Kayla +Kayla's +Kazakh +Kazakh's +Kazakhstan +Kazakhstan's +Kazan +Kazan's +Kazantzakis +Kazantzakis's +Keaton +Keaton's +Keats +Keats's +Keck +Keck's +Keenan +Keenan's +Keewatin +Keewatin's +Keillor +Keillor's +Keisha +Keisha's +Keith +Keith's +Keller +Keller's +Kelley +Kelley's +Kelli +Kelli's +Kellie +Kellie's +Kellogg +Kellogg's +Kelly +Kelly's +Kelsey +Kelsey's +Kelvin +Kelvin's +Kemerovo +Kemerovo's +Kemp +Kemp's +Kempis +Kempis's +Kendall +Kendall's +Kendra +Kendra's +Kendrick +Kendrick's +Kenmore +Kenmore's +Kennan +Kennan's +Kennedy +Kennedy's +Kenneth +Kenneth's +Kennith +Kennith's +Kenny +Kenny's +Kent +Kent's +Kenton +Kenton's +Kentuckian +Kentuckian's +Kentuckians +Kentucky +Kentucky's +Kenya +Kenya's +Kenyan +Kenyan's +Kenyans +Kenyatta +Kenyatta's +Kenyon +Kenyon's +Keogh +Keogh's +Keokuk +Keokuk's +Kepler +Kepler's +Kerensky +Kerensky's +Keri +Keri's +Kermit +Kermit's +Kern +Kern's +Kerouac +Kerouac's +Kerr +Kerr's +Kerri +Kerri's +Kerry +Kerry's +Kettering +Kettering's +Keven +Keven's +Kevin +Kevin's +Kevlar +Kevlar's +Kevorkian +Kevorkian's +Kewpie +Kewpie's +Key +Key's +Keynes +Keynes's +Keynesian +Keynesian's +Khabarovsk +Khabarovsk's +Khachaturian +Khachaturian's +Khalid +Khalid's +Khan +Khan's +Kharkov +Kharkov's +Khartoum +Khartoum's +Khayyam +Khayyam's +Khazar +Khazar's +Khmer +Khmer's +Khoikhoi +Khoikhoi's +Khoisan +Khoisan's +Khomeini +Khomeini's +Khorana +Khorana's +Khrushchev +Khrushchev's +Khufu +Khufu's +Khulna +Khulna's +Khwarizmi +Khwarizmi's +Khyber +Khyber's +Kickapoo +Kickapoo's +Kidd +Kidd's +Kiel +Kiel's +Kierkegaard +Kierkegaard's +Kieth +Kieth's +Kiev +Kiev's +Kigali +Kigali's +Kikuyu +Kikuyu's +Kilauea +Kilauea's +Kilimanjaro +Kilimanjaro's +Kilroy +Kilroy's +Kim +Kim's +Kimberley +Kimberley's +Kimberly +Kimberly's +King +King's +Kingston +Kingston's +Kingstown +Kingstown's +Kinko's +Kinney +Kinney's +Kinsey +Kinsey's +Kinshasa +Kinshasa's +Kiowa +Kiowa's +Kip +Kip's +Kipling +Kipling's +Kirby +Kirby's +Kirchhoff +Kirchhoff's +Kirchner +Kirchner's +Kirghistan +Kirghistan's +Kirghiz +Kirghiz's +Kiribati +Kiribati's +Kirinyaga +Kirinyaga's +Kirk +Kirk's +Kirkland +Kirkland's +Kirkpatrick +Kirkpatrick's +Kirov +Kirov's +Kirsten +Kirsten's +Kisangani +Kisangani's +Kishinev +Kishinev's +Kislev +Kislev's +Kissinger +Kissinger's +Kit +Kit's +Kitakyushu +Kitakyushu's +Kitchener +Kitchener's +Kitty +Kitty's +Kiwanis +Kiwanis's +Klan +Klan's +Klansman +Klansman's +Klaus +Klaus's +Klee +Klee's +Kleenex +Kleenex's +Kleenexes +Klein +Klein's +Klimt +Klimt's +Kline +Kline's +Klingon +Klingon's +Klondike +Klondike's +Klondikes +Kmart +Kmart's +Knapp +Knapp's +Knesset +Knesset's +Kngwarreye +Kngwarreye's +Knickerbocker +Knickerbocker's +Knievel +Knievel's +Knight +Knight's +Knopf +Knopf's +Knossos +Knossos's +Knowles +Knowles's +Knox +Knox's +Knoxville +Knoxville's +Knudsen +Knudsen's +Knuth +Knuth's +Kobe +Kobe's +Koch +Koch's +Kochab +Kochab's +Kodachrome +Kodachrome's +Kodak +Kodak's +Kodaly +Kodaly's +Kodiak +Kodiak's +Koestler +Koestler's +Kohinoor +Kohinoor's +Kohl +Kohl's +Koizumi +Koizumi's +Kojak +Kojak's +Kolyma +Kolyma's +Kommunizma +Kommunizma's +Kong +Kong's +Kongo +Kongo's +Konrad +Konrad's +Koontz +Koontz's +Koppel +Koppel's +Koran +Koran's +Korans +Korea +Korea's +Korean +Korean's +Koreans +Kornberg +Kornberg's +Kory +Kory's +Korzybski +Korzybski's +Kosciusko +Kosciusko's +Kossuth +Kossuth's +Kosygin +Kosygin's +Kotlin +Kotlin's +Koufax +Koufax's +Kowloon +Kowloon's +Kr +Kr's +Kraft +Kraft's +Krakatoa +Krakatoa's +Krakow +Krakow's +Kramer +Kramer's +Krasnodar +Krasnodar's +Krasnoyarsk +Krasnoyarsk's +Krebs +Krebs's +Kremlin +Kremlin's +Kremlinologist +Kresge +Kresge's +Kringle +Kringle's +Kris +Kris's +Krishna +Krishna's +Krishnamurti +Krishnamurti's +Krista +Krista's +Kristen +Kristen's +Kristi +Kristi's +Kristie +Kristie's +Kristin +Kristin's +Kristina +Kristina's +Kristine +Kristine's +Kristopher +Kristopher's +Kristy +Kristy's +Kroc +Kroc's +Kroger +Kroger's +Kronecker +Kronecker's +Kropotkin +Kropotkin's +Kruger +Kruger's +Krugerrand +Krugerrand's +Krupp +Krupp's +Krystal +Krystal's +Kshatriya +Kshatriya's +Kublai +Kublai's +Kubrick +Kubrick's +Kuhn +Kuhn's +Kuibyshev +Kuibyshev's +Kulthumm +Kulthumm's +Kunming +Kunming's +Kuomintang +Kuomintang's +Kurd +Kurd's +Kurdish +Kurdish's +Kurdistan +Kurdistan's +Kurile +Kurile's +Kurosawa +Kurosawa's +Kurt +Kurt's +Kurtis +Kurtis's +Kusch +Kusch's +Kutuzov +Kutuzov's +Kuwait +Kuwait's +Kuwaiti +Kuwaiti's +Kuwaitis +Kuznets +Kuznets's +Kuznetsk +Kuznetsk's +Kwakiutl +Kwakiutl's +Kwan +Kwan's +Kwangju +Kwangju's +Kwanzaa +Kwanzaa's +Kwanzaas +Kyle +Kyle's +Kyoto +Kyoto's +Kyrgyzstan +Kyrgyzstan's +Kyushu +Kyushu's +Köln +Köln's +L +L'Amour +L'Amour's +L'Oreal +L'Oreal's +L'Ouverture +L'Ouverture's +L's +LBJ +LBJ's +La +La's +Laban +Laban's +Labrador +Labrador's +Labradors +Lacey +Lacey's +Lachesis +Lachesis's +Lacy +Lacy's +Ladoga +Ladoga's +Ladonna +Ladonna's +Lafayette +Lafayette's +Lafitte +Lafitte's +Lagos +Lagos's +Lagrange +Lagrange's +Lagrangian +Lagrangian's +Lahore +Lahore's +Laius +Laius's +Lajos +Lajos's +Lakeisha +Lakeisha's +Lakewood +Lakisha +Lakisha's +Lakota +Lakota's +Lakshmi +Lakshmi's +Lamar +Lamar's +Lamarck +Lamarck's +Lamaze +Lamaze's +Lamb +Lamb's +Lambert +Lambert's +Lamborghini +Lamborghini's +Lambrusco +Lambrusco's +Lamont +Lamont's +Lana +Lana's +Lanai +Lanai's +Lancashire +Lancashire's +Lancaster +Lancaster's +Lance +Lance's +Lancelot +Lancelot's +Land +Land's +Landon +Landon's +Landry +Landry's +Landsat +Landsat's +Landsteiner +Landsteiner's +Lane +Lane's +Lang +Lang's +Langerhans +Langerhans's +Langland +Langland's +Langley +Langley's +Langmuir +Langmuir's +Lanka +Lanka's +Lanny +Lanny's +Lansing +Lansing's +Lanzhou +Lanzhou's +Lao +Lao's +Laocoon +Laocoon's +Laos +Laos's +Laotian +Laotian's +Laotians +Laplace +Laplace's +Lapland +Lapland's +Lapp +Lapp's +Lapps +Lara +Lara's +Laramie +Laramie's +Lardner +Lardner's +Laredo +Laredo's +Larousse +Larousse's +Larry +Larry's +Lars +Lars's +Larsen +Larsen's +Larson +Larson's +Las +Lascaux +Lascaux's +Lassa +Lassa's +Lassen +Lassen's +Lassie +Lassie's +Latasha +Latasha's +Lateran +Lateran's +Latin +Latin's +Latina +Latiner +Latino +Latino's +Latinos +Latins +Latisha +Latisha's +Latonya +Latonya's +Latoya +Latoya's +Latrobe +Latrobe's +Latvia +Latvia's +Latvian +Latvian's +Latvians +Laud +Laud's +Lauder +Lauder's +Laue +Laue's +Laundromat +Laundromat's +Laura +Laura's +Laurasia +Laurasia's +Laurel +Laurel's +Lauren +Lauren's +Laurence +Laurence's +Laurent +Laurent's +Lauri +Lauri's +Laurie +Laurie's +Laval +Laval's +Lavern +Lavern's +Laverne +Laverne's +Lavoisier +Lavoisier's +Lavonne +Lavonne's +Lawanda +Lawanda's +Lawrence +Lawrence's +Lawson +Lawson's +Layamon +Layamon's +Layla +Layla's +Lazaro +Lazaro's +Lazarus +Lazarus's +Le +Le's +Lea +Lea's +Leach +Leach's +Leadbelly +Leadbelly's +Leah +Leah's +Leakey +Leakey's +Lean +Lean's +Leander +Leander's +Leann +Leann's +Leanna +Leanna's +Leanne +Leanne's +Lear +Lear's +Learjet +Learjet's +Leary +Leary's +Leavenworth +Leavenworth's +Lebanese +Lebanese's +Lebanon +Lebanon's +Lebesgue +Lebesgue's +Leblanc +Leblanc's +Leda +Leda's +Lederberg +Lederberg's +Lee +Lee's +Leeds +Leeds's +Leeuwenhoek +Leeuwenhoek's +Leeward +Leeward's +Left +Legendre +Legendre's +Leger +Leger's +Leghorn +Leghorn's +Lego +Lego's +Legree +Legree's +Lehman +Lehman's +Leibniz +Leibniz's +Leicester +Leicester's +Leiden +Leiden's +Leif +Leif's +Leigh +Leigh's +Leila +Leila's +Leipzig +Leipzig's +Lela +Lela's +Leland +Leland's +Lelia +Lelia's +Lemaitre +Lemaitre's +Lemuel +Lemuel's +Lemuria +Lemuria's +Len +Len's +Lena +Lena's +Lenard +Lenard's +Lenin +Lenin's +Leningrad +Leningrad's +Leninism +Leninism's +Leninist +Leninist's +Lennon +Lennon's +Lenny +Lenny's +Leno +Leno's +Lenoir +Lenoir's +Lenora +Lenora's +Lenore +Lenore's +Lent +Lent's +Lenten +Lenten's +Lents +Leo +Leo's +Leola +Leola's +Leon +Leon's +Leona +Leona's +Leonard +Leonard's +Leonardo +Leonardo's +Leoncavallo +Leoncavallo's +Leonel +Leonel's +Leonid +Leonid's +Leonidas +Leonidas's +Leonor +Leonor's +Leopold +Leopold's +Leopoldo +Leopoldo's +Leos +Lepidus +Lepidus's +Lepke +Lepke's +Lepus +Lepus's +Lerner +Lerner's +Leroy +Leroy's +Les +Les's +Lesa +Lesa's +Lesley +Lesley's +Leslie +Leslie's +Lesotho +Lesotho's +Lesseps +Lesseps's +Lessie +Lessie's +Lester +Lester's +Lestrade +Lestrade's +Leta +Leta's +Letha +Letha's +Lethe +Lethe's +Leticia +Leticia's +Letitia +Letitia's +Letterman +Letterman's +Levant +Levant's +Levesque +Levesque's +Levi +Levi's +Leviathan +Leviathan's +Levine +Levine's +Leviticus +Leviticus's +Levitt +Levitt's +Levy +Levy's +Lew +Lew's +Lewinsky +Lewinsky's +Lewis +Lewis's +Lexington +Lexington's +Lexus +Lexus's +Lhasa +Lhasa's +Lhotse +Lhotse's +Li +Li's +Libby +Libby's +Liberace +Liberace's +Liberia +Liberia's +Liberian +Liberian's +Liberians +Libra +Libra's +Libras +LibreOffice +LibreOffice's +Libreville +Libreville's +Librium +Librium's +Libya +Libya's +Libyan +Libyan's +Libyans +Lichtenstein +Lichtenstein's +Lidia +Lidia's +Lie +Lie's +Lieberman +Lieberman's +Liebfraumilch +Liebfraumilch's +Liechtenstein +Liechtenstein's +Liege +Liege's +Lila +Lila's +Lilia +Lilia's +Lilian +Lilian's +Liliana +Liliana's +Lilith +Lilith's +Liliuokalani +Liliuokalani's +Lille +Lille's +Lillian +Lillian's +Lillie +Lillie's +Lilliput +Lilliput's +Lilliputian +Lilliputian's +Lilliputians +Lilly +Lilly's +Lilongwe +Lilongwe's +Lily +Lily's +Lima +Lima's +Limbaugh +Limbaugh's +Limburger +Limburger's +Limoges +Limoges's +Limousin +Limousin's +Limpopo +Limpopo's +Lin +Lin's +Lina +Lina's +Lincoln +Lincoln's +Lincolns +Lind +Lind's +Linda +Linda's +Lindbergh +Lindbergh's +Lindsay +Lindsay's +Lindsey +Lindsey's +Lindy +Lindy's +Linnaeus +Linnaeus's +Linotype +Linotype's +Linton +Linton's +Linus +Linus's +Linux +Linux's +Linwood +Linwood's +Lionel +Lionel's +Lipizzaner +Lipizzaner's +Lippi +Lippi's +Lippmann +Lippmann's +Lipscomb +Lipscomb's +Lipton +Lipton's +Lisa +Lisa's +Lisbon +Lisbon's +Lissajous +Lissajous's +Lister +Lister's +Listerine +Listerine's +Liston +Liston's +Liszt +Liszt's +Lithuania +Lithuania's +Lithuanian +Lithuanian's +Lithuanians +Little +Little's +Litton +Litton's +Liverpool +Liverpool's +Liverpudlian +Liverpudlian's +Livia +Livia's +Livingston +Livingston's +Livingstone +Livingstone's +Livonia +Livonia's +Livy +Livy's +Liz +Liz's +Liza +Liza's +Lizzie +Lizzie's +Lizzy +Lizzy's +Ljubljana +Ljubljana's +Llewellyn +Llewellyn's +Lloyd +Lloyd's +Loafer +Loafer's +Loafers +Lobachevsky +Lobachevsky's +Lochinvar +Lochinvar's +Locke +Locke's +Lockean +Lockean's +Lockheed +Lockheed's +Lockwood +Lockwood's +Lodge +Lodge's +Lodz +Lodz's +Loewe +Loewe's +Loewi +Loewi's +Loews +Loews's +Logan +Logan's +Lohengrin +Lohengrin's +Loire +Loire's +Lois +Lois's +Loki +Loki's +Lola +Lola's +Lolita +Lolita's +Lollard +Lollard's +Lollobrigida +Lollobrigida's +Lombard +Lombard's +Lombardi +Lombardi's +Lombardy +Lombardy's +Lome +Lome's +Lon +Lon's +London +London's +Londoner +Londoner's +Long +Long's +Longfellow +Longfellow's +Longstreet +Longstreet's +Lonnie +Lonnie's +Lopez +Lopez's +Lora +Lora's +Loraine +Loraine's +Lord +Lord's +Lords +Lorelei +Lorelei's +Loren +Loren's +Lorena +Lorena's +Lorene +Lorene's +Lorentz +Lorentz's +Lorenz +Lorenz's +Lorenzo +Lorenzo's +Loretta +Loretta's +Lori +Lori's +Lorie +Lorie's +Lorna +Lorna's +Lorraine +Lorraine's +Lorre +Lorre's +Lorrie +Lorrie's +Los +Lot +Lot's +Lothario +Lothario's +Lott +Lott's +Lottie +Lottie's +Lou +Lou's +Louella +Louella's +Louie +Louie's +Louis +Louis's +Louisa +Louisa's +Louise +Louise's +Louisiana +Louisiana's +Louisianan +Louisianan's +Louisianans +Louisianian +Louisianian's +Louisianians +Louisville +Louisville's +Lourdes +Lourdes's +Louvre +Louvre's +Love +Love's +Lovecraft +Lovecraft's +Lovelace +Lovelace's +Lowe +Lowe's +Lowell +Lowell's +Lowenbrau +Lowenbrau's +Lowery +Lowery's +Loyang +Loyang's +Loyd +Loyd's +Loyola +Loyola's +Luanda +Luanda's +Luann +Luann's +Lubavitcher +Lubavitcher's +Lubbock +Lubbock's +Lubumbashi +Lubumbashi's +Lucas +Lucas's +Luce +Luce's +Lucia +Lucia's +Lucian +Lucian's +Luciano +Luciano's +Lucien +Lucien's +Lucifer +Lucifer's +Lucile +Lucile's +Lucille +Lucille's +Lucinda +Lucinda's +Lucio +Lucio's +Lucite +Lucite's +Lucius +Lucius's +Lucknow +Lucknow's +Lucretia +Lucretia's +Lucretius +Lucretius's +Lucy +Lucy's +Luddite +Luddite's +Ludhiana +Ludhiana's +Ludwig +Ludwig's +Luella +Luella's +Lufthansa +Lufthansa's +Luftwaffe +Luftwaffe's +Luger +Luger's +Lugosi +Lugosi's +Luigi +Luigi's +Luis +Luis's +Luisa +Luisa's +Luke +Luke's +Lula +Lula's +Lully +Lully's +Lulu +Lulu's +Lumière +Lumière's +Luna +Luna's +Lupe +Lupe's +Lupercalia +Lupercalia's +Lupus +Lupus's +Luria +Luria's +Lusaka +Lusaka's +Lusitania +Lusitania's +Luther +Luther's +Lutheran +Lutheran's +Lutheranism +Lutheranism's +Lutherans +Luvs +Luvs's +Luxembourg +Luxembourg's +Luxembourger +Luxembourger's +Luxembourgers +Luz +Luz's +Luzon +Luzon's +Lvov +Lvov's +LyX +LyX's +Lycra +Lycra's +Lycurgus +Lycurgus's +Lydia +Lydia's +Lyell +Lyell's +Lyle +Lyle's +Lyly +Lyly's +Lyman +Lyman's +Lyme +Lyme's +Lynch +Lynch's +Lynda +Lynda's +Lyndon +Lyndon's +Lynette +Lynette's +Lynn +Lynn's +Lynne +Lynne's +Lynnette +Lynnette's +Lyon +Lyon's +Lyons +Lyons's +Lyra +Lyra's +Lysenko +Lysenko's +Lysistrata +Lysistrata's +Lysol +Lysol's +M +M's +MCI +MCI's +MGM +MGM's +MHz +MIT +MIT's +Maalox +Maalox's +Mabel +Mabel's +Mable +Mable's +MacArthur +MacArthur's +MacBride +MacBride's +MacDonald +MacDonald's +MacLeish +MacLeish's +Macao +Macao's +Macaulay +Macaulay's +Macbeth +Macbeth's +Maccabeus +Maccabeus's +Mace +Mace's +Macedon +Macedon's +Macedonia +Macedonia's +Macedonian +Macedonian's +Macedonians +Mach +Mach's +Machiavelli +Machiavelli's +Machiavellian +Machiavellian's +Macias +Macias's +Macintosh +Macintosh's +Mack +Mack's +Mackenzie +Mackenzie's +Mackinac +Mackinac's +Mackinaw +Mackinaw's +Macmillan +Macmillan's +Macon +Macon's +Macumba +Macumba's +Macy +Macy's +Madagascan +Madagascan's +Madagascans +Madagascar +Madagascar's +Madden +Madden's +Maddox +Maddox's +Madeira +Madeira's +Madeiras +Madeleine +Madeleine's +Madeline +Madeline's +Madelyn +Madelyn's +Madge +Madge's +Madison +Madison's +Madonna +Madonna's +Madonnas +Madras +Madras's +Madrid +Madrid's +Madurai +Madurai's +Mae +Mae's +Maeterlinck +Maeterlinck's +Mafia +Mafia's +Mafias +Mafioso +Mafioso's +Magdalena +Magdalena's +Magdalene +Magdalene's +Magellan +Magellan's +Magellanic +Magellanic's +Maggie +Maggie's +Maghreb +Maghreb's +Magi +Maginot +Maginot's +Magnitogorsk +Magnitogorsk's +Magog +Magog's +Magoo +Magoo's +Magritte +Magritte's +Magsaysay +Magsaysay's +Magyar +Magyar's +Magyars +Mahabharata +Mahabharata's +Maharashtra +Maharashtra's +Mahavira +Mahavira's +Mahayana +Mahayana's +Mahayanist +Mahayanist's +Mahdi +Mahdi's +Mahfouz +Mahfouz's +Mahican +Mahican's +Mahicans +Mahler +Mahler's +Mai +Mai's +Maidenform +Maidenform's +Maigret +Maigret's +Mailer +Mailer's +Maillol +Maillol's +Maiman +Maiman's +Maimonides +Maimonides's +Maine +Maine's +Maisie +Maisie's +Maitreya +Maitreya's +Major +Major's +Majorca +Majorca's +Majuro +Majuro's +Makarios +Makarios's +Malabar +Malabar's +Malabo +Malabo's +Malacca +Malacca's +Malachi +Malachi's +Malagasy +Malagasy's +Malamud +Malamud's +Malaprop +Malaprop's +Malawi +Malawi's +Malay +Malay's +Malayalam +Malayalam's +Malayan +Malayan's +Malays +Malaysia +Malaysia's +Malaysian +Malaysian's +Malaysians +Malcolm +Malcolm's +Maldive +Maldive's +Maldives +Maldives's +Maldivian +Maldivian's +Maldivians +Maldonado +Maldonado's +Male +Male's +Mali +Mali's +Malian +Malian's +Malians +Malibu +Malibu's +Malinda +Malinda's +Malinowski +Malinowski's +Mallarmé +Mallarmé's +Mallomars +Mallomars's +Mallory +Mallory's +Malone +Malone's +Malory +Malory's +Malplaquet +Malplaquet's +Malraux +Malraux's +Malta +Malta's +Maltese +Maltese's +Malthus +Malthus's +Malthusian +Malthusian's +Mameluke +Mameluke's +Mamet +Mamet's +Mamie +Mamie's +Mammon +Mammon's +Mamore +Mamore's +Managua +Managua's +Manama +Manama's +Manasseh +Manasseh's +Manaus +Manaus's +Manchester +Manchester's +Manchu +Manchu's +Manchuria +Manchuria's +Manchurian +Manchurian's +Mancini +Mancini's +Mandalay +Mandalay's +Mandarin +Mandarin's +Mandela +Mandela's +Mandelbrot +Mandelbrot's +Mandingo +Mandingo's +Mandrell +Mandrell's +Mandy +Mandy's +Manet +Manet's +Manfred +Manfred's +Manhattan +Manhattan's +Manhattans +Mani +Mani's +Manichean +Manichean's +Manila +Manila's +Manilas +Manilla +Manilla's +Manitoba +Manitoba's +Manitoulin +Manitoulin's +Manley +Manley's +Mann +Mann's +Mannheim +Mannheim's +Manning +Manning's +Mansfield +Mansfield's +Manson +Manson's +Mantegna +Mantegna's +Mantle +Mantle's +Manuel +Manuel's +Manuela +Manuela's +Manx +Manx's +Mao +Mao's +Maoism +Maoism's +Maoisms +Maoist +Maoist's +Maoists +Maori +Maori's +Maoris +Mapplethorpe +Mapplethorpe's +Maputo +Maputo's +Mar +Mar's +Mara +Mara's +Maracaibo +Maracaibo's +Marat +Marat's +Maratha +Maratha's +Marathi +Marathi's +Marathon +Marathon's +Marc +Marc's +Marceau +Marceau's +Marcel +Marcel's +Marcelino +Marcelino's +Marcella +Marcella's +Marcelo +Marcelo's +March +March's +Marches +Marci +Marci's +Marcia +Marcia's +Marciano +Marciano's +Marcie +Marcie's +Marco +Marco's +Marconi +Marconi's +Marcos +Marcos's +Marcus +Marcus's +Marcy +Marcy's +Marduk +Marduk's +Margaret +Margaret's +Margarita +Margarita's +Margarito +Margarito's +Marge +Marge's +Margery +Margery's +Margie +Margie's +Margo +Margo's +Margret +Margret's +Margrethe +Margrethe's +Marguerite +Marguerite's +Mari +Mari's +Maria +Maria's +MariaDB +MariaDB's +Marian +Marian's +Mariana +Mariana's +Marianas +Marianas's +Marianne +Marianne's +Mariano +Mariano's +Maribel +Maribel's +Maricela +Maricela's +Marie +Marie's +Marietta +Marietta's +Marilyn +Marilyn's +Marin +Marin's +Marina +Marina's +Marine +Marine's +Marines +Mario +Mario's +Marion +Marion's +Maris +Maris's +Marisa +Marisa's +Marisol +Marisol's +Marissa +Marissa's +Maritain +Maritain's +Maritza +Maritza's +Marius +Marius's +Marjorie +Marjorie's +Marjory +Marjory's +Mark +Mark's +Markab +Markab's +Markham +Markham's +Markov +Markov's +Marks +Marks's +Marla +Marla's +Marlboro +Marlboro's +Marlborough +Marlborough's +Marlene +Marlene's +Marley +Marley's +Marlin +Marlin's +Marlon +Marlon's +Marlowe +Marlowe's +Marmara +Marmara's +Marne +Marne's +Maronite +Maronite's +Marple +Marple's +Marquesas +Marquesas's +Marquette +Marquette's +Marquez +Marquez's +Marquis +Marquis's +Marquita +Marquita's +Marrakesh +Marrakesh's +Marriott +Marriott's +Mars +Mars's +Marsala +Marsala's +Marseillaise +Marseillaise's +Marseilles +Marseilles's +Marsh +Marsh's +Marsha +Marsha's +Marshall +Marshall's +Marta +Marta's +Martel +Martel's +Martha +Martha's +Martial +Martial's +Martian +Martian's +Martians +Martin +Martin's +Martina +Martina's +Martinez +Martinez's +Martinique +Martinique's +Marty +Marty's +Marva +Marva's +Marvell +Marvell's +Marvin +Marvin's +Marx +Marx's +Marxism +Marxism's +Marxisms +Marxist +Marxist's +Marxists +Mary +Mary's +Maryann +Maryann's +Maryanne +Maryanne's +Maryellen +Maryellen's +Maryland +Maryland's +Marylander +Marylander's +Marylou +Marylou's +Masada +Masada's +Masai +Masai's +Masaryk +Masaryk's +Mascagni +Mascagni's +Masefield +Masefield's +Maserati +Maserati's +Maseru +Maseru's +Mashhad +Mashhad's +Mason +Mason's +Masonic +Masonic's +Masonite +Masonite's +Masons +Mass +Mass's +Massachusetts +Massachusetts's +Massasoit +Massasoit's +Massenet +Massenet's +Masses +Massey +Massey's +MasterCard +MasterCard's +Masters +Masters's +Mather +Mather's +Mathew +Mathew's +Mathews +Mathews's +Mathewson +Mathewson's +Mathias +Mathias's +Mathis +Mathis's +Matilda +Matilda's +Matisse +Matisse's +Matlab +Matlab's +Mattel +Mattel's +Matterhorn +Matterhorn's +Matthew +Matthew's +Matthews +Matthews's +Matthias +Matthias's +Mattie +Mattie's +Maud +Maud's +Maude +Maude's +Maugham +Maugham's +Maui +Maui's +Maupassant +Maupassant's +Maura +Maura's +Maureen +Maureen's +Mauriac +Mauriac's +Maurice +Maurice's +Mauricio +Mauricio's +Maurine +Maurine's +Mauritania +Mauritania's +Mauritius +Mauritius's +Mauro +Mauro's +Maurois +Maurois's +Mauryan +Mauryan's +Mauser +Mauser's +Mavis +Mavis's +Max +Max's +Maximilian +Maximilian's +Maxine +Maxine's +Maxwell +Maxwell's +May +May's +Maya +Maya's +Mayan +Mayan's +Mayans +Mayas +Mayer +Mayer's +Mayfair +Mayfair's +Mayflower +Mayflower's +Maynard +Maynard's +Mayo +Mayo's +Mayra +Mayra's +Mays +Mays's +Maytag +Maytag's +Mazama +Mazama's +Mazarin +Mazarin's +Mazatlan +Mazatlan's +Mazda +Mazda's +Mazola +Mazola's +Mazzini +Mazzini's +Mbabane +Mbabane's +Mbini +Mbini's +McAdam +McAdam's +McBride +McBride's +McCain +McCain's +McCall +McCall's +McCarthy +McCarthy's +McCarthyism +McCarthyism's +McCartney +McCartney's +McCarty +McCarty's +McClain +McClain's +McClellan +McClellan's +McClure +McClure's +McConnell +McConnell's +McCormick +McCormick's +McCoy +McCoy's +McCray +McCray's +McCullough +McCullough's +McDaniel +McDaniel's +McDonald +McDonald's +McDonnell +McDonnell's +McDowell +McDowell's +McEnroe +McEnroe's +McFadden +McFadden's +McFarland +McFarland's +McGee +McGee's +McGovern +McGovern's +McGowan +McGowan's +McGuffey +McGuffey's +McGuire +McGuire's +McIntosh +McIntosh's +McIntyre +McIntyre's +McKay +McKay's +McKee +McKee's +McKenzie +McKenzie's +McKinley +McKinley's +McKinney +McKinney's +McKnight +McKnight's +McLaughlin +McLaughlin's +McLean +McLean's +McLeod +McLeod's +McLuhan +McLuhan's +McMahon +McMahon's +McMillan +McMillan's +McNamara +McNamara's +McNaughton +McNaughton's +McNeil +McNeil's +McPherson +McPherson's +McQueen +McQueen's +McVeigh +McVeigh's +Md +Md's +Mead +Mead's +Meade +Meade's +Meadows +Meadows's +Meagan +Meagan's +Meany +Meany's +Mecca +Mecca's +Meccas +Medan +Medan's +Medea +Medea's +Medellin +Medellin's +Media +Media's +Medicaid +Medicaid's +Medicaids +Medicare +Medicare's +Medicares +Medici +Medici's +Medina +Medina's +Mediterranean +Mediterranean's +Mediterraneans +Medusa +Medusa's +Meg +Meg's +Megan +Megan's +Meghan +Meghan's +Meier +Meier's +Meighen +Meighen's +Meiji +Meiji's +Meir +Meir's +Mejia +Mejia's +Mekong +Mekong's +Mel +Mel's +Melanesia +Melanesia's +Melanesian +Melanesian's +Melanie +Melanie's +Melba +Melba's +Melbourne +Melbourne's +Melchior +Melchior's +Melchizedek +Melchizedek's +Melendez +Melendez's +Melinda +Melinda's +Melisa +Melisa's +Melisande +Melisande's +Melissa +Melissa's +Mellon +Mellon's +Melody +Melody's +Melpomene +Melpomene's +Melton +Melton's +Melva +Melva's +Melville +Melville's +Melvin +Melvin's +Memcached +Memcached's +Memling +Memling's +Memphis +Memphis's +Menander +Menander's +Mencius +Mencius's +Mencken +Mencken's +Mendel +Mendel's +Mendeleev +Mendeleev's +Mendelian +Mendelian's +Mendelssohn +Mendelssohn's +Mendez +Mendez's +Mendocino +Mendocino's +Mendoza +Mendoza's +Menelaus +Menelaus's +Menelik +Menelik's +Menes +Menes's +Menkalinan +Menkalinan's +Menkar +Menkar's +Menkent +Menkent's +Mennen +Mennen's +Mennonite +Mennonite's +Mennonites +Menominee +Menominee's +Menotti +Menotti's +Mensa +Mensa's +Mentholatum +Mentholatum's +Menuhin +Menuhin's +Menzies +Menzies's +Mephistopheles +Mephistopheles's +Merak +Merak's +Mercado +Mercado's +Mercator +Mercator's +Mercedes +Mercedes's +Mercer +Mercer's +Mercia +Mercia's +Merck +Merck's +Mercuries +Mercurochrome +Mercurochrome's +Mercury +Mercury's +Meredith +Meredith's +Merino +Merino's +Merle +Merle's +Merlin +Merlin's +Merlot +Merlot's +Merovingian +Merovingian's +Merriam +Merriam's +Merrick +Merrick's +Merrill +Merrill's +Merrimack +Merrimack's +Merritt +Merritt's +Merthiolate +Merthiolate's +Merton +Merton's +Mervin +Mervin's +Mesa +Mesa's +Mesabi +Mesabi's +Mesmer +Mesmer's +Mesolithic +Mesolithic's +Mesopotamia +Mesopotamia's +Mesozoic +Mesozoic's +Messerschmidt +Messerschmidt's +Messiaen +Messiaen's +Messiah +Messiah's +Messiahs +Messianic +Metallica +Metallica's +Metamucil +Metamucil's +Methodism +Methodism's +Methodisms +Methodist +Methodist's +Methodists +Methuselah +Methuselah's +Metternich +Metternich's +Meuse +Meuse's +Mexicali +Mexicali's +Mexican +Mexican's +Mexicans +Mexico +Mexico's +Meyer +Meyer's +Meyerbeer +Meyerbeer's +Meyers +Meyers's +Mfume +Mfume's +Mg +Mg's +MiG +MiG's +Mia +Mia's +Miami +Miami's +Miamis +Miaplacidus +Miaplacidus's +Micah +Micah's +Micawber +Micawber's +Michael +Michael's +Micheal +Micheal's +Michel +Michel's +Michelangelo +Michelangelo's +Michele +Michele's +Michelin +Michelin's +Michelle +Michelle's +Michelob +Michelob's +Michelson +Michelson's +Michigan +Michigan's +Michigander +Michigander's +Michiganders +Mick +Mick's +Mickey +Mickey's +Mickie +Mickie's +Micky +Micky's +Micmac +Micmac's +Micronesia +Micronesia's +Micronesian +Micronesian's +Microsoft +Microsoft's +Midas +Midas's +Middleton +Middleton's +Midland +Midland's +Midway +Midway's +Midwest +Midwest's +Midwestern +Midwestern's +Miguel +Miguel's +Mike +Mike's +Mikhail +Mikhail's +Mikoyan +Mikoyan's +Milagros +Milagros's +Milan +Milan's +Mildred +Mildred's +Miles +Miles's +Milford +Milford's +Milken +Milken's +Mill +Mill's +Millard +Millard's +Millay +Millay's +Miller +Miller's +Millet +Millet's +Millicent +Millicent's +Millie +Millie's +Millikan +Millikan's +Mills +Mills's +Milne +Milne's +Milo +Milo's +Milosevic +Milosevic's +Milquetoast +Milquetoast's +Miltiades +Miltiades's +Milton +Milton's +Miltonic +Miltonic's +Miltown +Miltown's +Milwaukee +Milwaukee's +Mimi +Mimi's +Mimosa +Mimosa's +Minamoto +Minamoto's +Mindanao +Mindanao's +Mindoro +Mindoro's +Mindy +Mindy's +Minerva +Minerva's +Ming +Ming's +Mingus +Mingus's +Minneapolis +Minneapolis's +Minnelli +Minnelli's +Minnesota +Minnesota's +Minnesotan +Minnesotan's +Minnesotans +Minnie +Minnie's +Minoan +Minoan's +Minoans +Minolta +Minolta's +Minos +Minos's +Minot +Minot's +Minotaur +Minotaur's +Minsk +Minsk's +Minsky +Minsky's +Mintaka +Mintaka's +Minuit +Minuit's +Miocene +Miocene's +Mir +Mir's +Mira +Mira's +Mirabeau +Mirabeau's +Mirach +Mirach's +Miranda +Miranda's +Mirfak +Mirfak's +Miriam +Miriam's +Miro +Miro's +Mirzam +Mirzam's +Miskito +Miskito's +Miss +Mississauga +Mississauga's +Mississippi +Mississippi's +Mississippian +Mississippian's +Mississippians +Missouri +Missouri's +Missourian +Missourian's +Missourians +Missy +Missy's +Mistassini +Mistassini's +Mister +Misty +Misty's +Mitch +Mitch's +Mitchel +Mitchel's +Mitchell +Mitchell's +Mitford +Mitford's +Mithra +Mithra's +Mithridates +Mithridates's +Mitsubishi +Mitsubishi's +Mitterrand +Mitterrand's +Mitty +Mitty's +Mitzi +Mitzi's +Mixtec +Mixtec's +Mizar +Mizar's +Mn +Mn's +Mnemosyne +Mnemosyne's +Mo +Mo's +Mobil +Mobil's +Mobile +Mobile's +Mobutu +Mobutu's +Modesto +Modesto's +Modigliani +Modigliani's +Moe +Moe's +Moet +Moet's +Mogadishu +Mogadishu's +Mohacs +Mohacs's +Mohamed +Mohamed's +Mohammad +Mohammad's +Mohammed +Mohammed's +Mohammedan +Mohammedan's +Mohammedanism +Mohammedanism's +Mohammedanisms +Mohammedans +Mohawk +Mohawk's +Mohawks +Mohican +Mohican's +Mohicans +Moho +Moho's +Mohorovicic +Mohorovicic's +Moira +Moira's +Moises +Moises's +Moiseyev +Moiseyev's +Mojave +Mojave's +Moldavia +Moldavia's +Moldova +Moldova's +Moliere +Moliere's +Molina +Molina's +Moll +Moll's +Mollie +Mollie's +Molly +Molly's +Molnar +Molnar's +Moloch +Moloch's +Molokai +Molokai's +Molotov +Molotov's +Moluccas +Moluccas's +Mombasa +Mombasa's +Mona +Mona's +Monaco +Monaco's +Mondale +Mondale's +Monday +Monday's +Mondays +Mondrian +Mondrian's +Monera +Monera's +Monet +Monet's +MongoDB +MongoDB's +Mongol +Mongol's +Mongolia +Mongolia's +Mongolian +Mongolian's +Mongolians +Mongoloid +Mongols +Monica +Monica's +Monique +Monique's +Monk +Monk's +Monmouth +Monmouth's +Monongahela +Monongahela's +Monroe +Monroe's +Monrovia +Monrovia's +Mons +Monsanto +Monsanto's +Montague +Montague's +Montaigne +Montaigne's +Montana +Montana's +Montanan +Montanan's +Montanans +Montcalm +Montcalm's +Monte +Monte's +Montenegrin +Montenegrin's +Montenegro +Montenegro's +Monterrey +Monterrey's +Montesquieu +Montesquieu's +Montessori +Montessori's +Monteverdi +Monteverdi's +Montevideo +Montevideo's +Montezuma +Montezuma's +Montgolfier +Montgolfier's +Montgomery +Montgomery's +Monticello +Monticello's +Montoya +Montoya's +Montpelier +Montpelier's +Montrachet +Montrachet's +Montreal +Montreal's +Montserrat +Montserrat's +Monty +Monty's +Moody +Moody's +Moog +Moog's +Moon +Moon's +Mooney +Mooney's +Moor +Moor's +Moore +Moore's +Moorish +Moorish's +Moors +Morales +Morales's +Moran +Moran's +Moravia +Moravia's +Moravian +Moravian's +Mordred +Mordred's +More +More's +Moreno +Moreno's +Morgan +Morgan's +Moriarty +Moriarty's +Morin +Morin's +Morison +Morison's +Morita +Morita's +Morley +Morley's +Mormon +Mormon's +Mormonism +Mormonism's +Mormonisms +Mormons +Moro +Moro's +Moroccan +Moroccan's +Moroccans +Morocco +Morocco's +Moroni +Moroni's +Morpheus +Morpheus's +Morphy +Morphy's +Morris +Morris's +Morrison +Morrison's +Morrow +Morrow's +Morse +Morse's +Mort +Mort's +Mortimer +Mortimer's +Morton +Morton's +Mosaic +Mosaic's +Moscow +Moscow's +Moseley +Moseley's +Moselle +Moselle's +Moses +Moses's +Moslem +Moslem's +Moslems +Mosley +Mosley's +Moss +Moss's +Mosul +Mosul's +Motorola +Motorola's +Motown +Motown's +Motrin +Motrin's +Mott +Mott's +Mount +Mount's +Mountbatten +Mountbatten's +Mountie +Mountie's +Mounties +Moussorgsky +Moussorgsky's +Mouthe +Mouthe's +Mouton +Mouton's +Mowgli +Mowgli's +Mozambican +Mozambican's +Mozambicans +Mozambique +Mozambique's +Mozart +Mozart's +Mozilla +Mozilla's +Ms +Muawiya +Muawiya's +Mubarak +Mubarak's +Mueller +Mueller's +Muenster +Muenster's +Mugabe +Mugabe's +Muhammad +Muhammad's +Muhammadan +Muhammadan's +Muhammadanism +Muhammadanism's +Muhammadanisms +Muhammadans +Muir +Muir's +Mujib +Mujib's +Mulder +Mulder's +Mullen +Mullen's +Muller +Muller's +Mulligan +Mulligan's +Mullikan +Mullikan's +Mullins +Mullins's +Mulroney +Mulroney's +Multan +Multan's +Mumbai +Mumbai's +Mumford +Mumford's +Munch +Munch's +Munich +Munich's +Munoz +Munoz's +Munro +Munro's +Muppet +Muppet's +Murasaki +Murasaki's +Murat +Murat's +Murchison +Murchison's +Murdoch +Murdoch's +Muriel +Muriel's +Murillo +Murillo's +Murine +Murine's +Murmansk +Murmansk's +Murphy +Murphy's +Murray +Murray's +Murrow +Murrow's +Murrumbidgee +Murrumbidgee's +Muscat +Muscat's +Muscovite +Muscovite's +Muscovy +Muscovy's +Muse +Muse's +Musharraf +Musharraf's +Musial +Musial's +Muskogee +Muskogee's +Muslim +Muslim's +Muslims +Mussolini +Mussolini's +Mussorgsky +Mussorgsky's +Mutsuhito +Mutsuhito's +Muzak +Muzak's +MySQL +MySQL's +MySpace +MySpace's +Myanmar +Myanmar's +Mycenae +Mycenae's +Mycenaean +Mycenaean's +Myers +Myers's +Mylar +Mylar's +Mylars +Myles +Myles's +Myra +Myra's +Myrdal +Myrdal's +Myrna +Myrna's +Myron +Myron's +Myrtle +Myrtle's +Mysore +Mysore's +Myst +Myst's +Münchhausen +Münchhausen's +N +N's +NASCAR +NASCAR's +NORAD +NORAD's +NSA +NSA's +NVIDIA +NVIDIA's +Na +Na's +Nabisco +Nabisco's +Nabokov +Nabokov's +Nader +Nader's +Nadia +Nadia's +Nadine +Nadine's +Nagasaki +Nagasaki's +Nagoya +Nagoya's +Nagpur +Nagpur's +Nagy +Nagy's +Nahuatl +Nahuatl's +Nahum +Nahum's +Naipaul +Naipaul's +Nair +Nair's +Nairobi +Nairobi's +Naismith +Naismith's +Nam +Nam's +Namath +Namath's +Namibia +Namibia's +Namibian +Namibian's +Namibians +Nan +Nan's +Nanak +Nanak's +Nanchang +Nanchang's +Nancy +Nancy's +Nanette +Nanette's +Nanjing +Nanjing's +Nanking +Nanking's +Nankings +Nannie +Nannie's +Nanook +Nanook's +Nansen +Nansen's +Nantes +Nantes's +Nantucket +Nantucket's +Naomi +Naomi's +Naphtali +Naphtali's +Napier +Napier's +Naples +Naples's +Napoleon +Napoleon's +Napoleonic +Napoleonic's +Napster +Napster's +Narcissus +Narcissus's +Narmada +Narmada's +Narnia +Narnia's +Narragansett +Narragansett's +Nash +Nash's +Nashua +Nashua's +Nashville +Nashville's +Nassau +Nassau's +Nasser +Nasser's +Nat +Nat's +Natalia +Natalia's +Natalie +Natalie's +Natasha +Natasha's +Natchez +Natchez's +Nate +Nate's +Nathan +Nathan's +Nathaniel +Nathaniel's +Nathans +Nathans's +Nation +Nation's +Nationwide +Nationwide's +Naugahyde +Naugahyde's +Nauru +Nauru's +Nautilus +Nautilus's +Navaho +Navaho's +Navahoes +Navahos +Navajo +Navajo's +Navajoes +Navajos +Navarre +Navarre's +Navarro +Navarro's +Navratilova +Navratilova's +Nazarene +Nazarene's +Nazareth +Nazareth's +Nazca +Nazca's +Nazi +Nazi's +Naziism +Naziism's +Naziisms +Nazis +Nazism +Nazism's +Nazisms +Nd +Nd's +Ndjamena +Ndjamena's +Ne +Ne's +Neal +Neal's +Neanderthal +Neanderthal's +Neanderthals +Neapolitan +Neapolitan's +Nebraska +Nebraska's +Nebraskan +Nebraskan's +Nebraskans +Nebuchadnezzar +Nebuchadnezzar's +Ned +Ned's +Nefertiti +Nefertiti's +Negev +Negev's +Negro +Negro's +Negroes +Negroid +Negroid's +Negroids +Negros +Negros's +Nehemiah +Nehemiah's +Nehru +Nehru's +Neil +Neil's +Nelda +Nelda's +Nell +Nell's +Nellie +Nellie's +Nelly +Nelly's +Nelsen +Nelsen's +Nelson +Nelson's +Nembutal +Nembutal's +Nemesis +Nemesis's +Neo +Neo's +Neogene +Neogene's +Neolithic +Nepal +Nepal's +Nepalese +Nepalese's +Nepali +Nepali's +Neptune +Neptune's +Nereid +Nereid's +Nerf +Nerf's +Nero +Nero's +Neruda +Neruda's +Nescafe +Nescafe's +Nesselrode +Nesselrode's +Nestle +Nestle's +Nestor +Nestor's +Nestorius +Nestorius's +Netflix +Netflix's +Netherlander +Netherlander's +Netherlanders +Netherlands +Netherlands's +Netscape +Netscape's +Nettie +Nettie's +Netzahualcoyotl +Netzahualcoyotl's +Neva +Neva's +Nevada +Nevada's +Nevadan +Nevadan's +Nevadans +Nevis +Nevis's +Nevsky +Nevsky's +Newark +Newark's +Newcastle +Newcastle's +Newfoundland +Newfoundland's +Newfoundlands +Newman +Newman's +Newport +Newport's +Newsweek +Newsweek's +Newton +Newton's +Newtonian +Newtonian's +Nexis +Nexis's +Ngaliema +Ngaliema's +Nguyen +Nguyen's +Ni +Ni's +Niagara +Niagara's +Niamey +Niamey's +Nibelung +Nibelung's +Nicaea +Nicaea's +Nicaragua +Nicaragua's +Nicaraguan +Nicaraguan's +Nicaraguans +Niccolo +Niccolo's +Nice +Nice's +Nicene +Nicene's +Nichiren +Nichiren's +Nicholas +Nicholas's +Nichole +Nichole's +Nichols +Nichols's +Nicholson +Nicholson's +Nick +Nick's +Nickelodeon +Nickelodeon's +Nicklaus +Nicklaus's +Nickolas +Nickolas's +Nicobar +Nicobar's +Nicodemus +Nicodemus's +Nicola +Nicola's +Nicolas +Nicolas's +Nicole +Nicole's +Nicosia +Nicosia's +Niebuhr +Niebuhr's +Nielsen +Nielsen's +Nietzsche +Nietzsche's +Nieves +Nieves's +Nigel +Nigel's +Niger +Niger's +Nigeria +Nigeria's +Nigerian +Nigerian's +Nigerians +Nightingale +Nightingale's +Nijinsky +Nijinsky's +Nike +Nike's +Nikita +Nikita's +Nikkei +Nikkei's +Nikki +Nikki's +Nikolai +Nikolai's +Nikolayev +Nikolayev's +Nikon +Nikon's +Nile +Nile's +Nimitz +Nimitz's +Nimrod +Nimrod's +Nina +Nina's +Nineveh +Nineveh's +Nintendo +Nintendo's +Niobe +Niobe's +Nippon +Nippon's +Nirenberg +Nirenberg's +Nirvana +Nirvana's +Nisan +Nisan's +Nisei +Nisei's +Nissan +Nissan's +Nita +Nita's +Nivea +Nivea's +Nixon +Nixon's +Nkrumah +Nkrumah's +NoDoz +NoDoz's +Noah +Noah's +Nobel +Nobel's +Nobelist +Nobelist's +Nobelists +Noble +Noble's +Noe +Noe's +Noel +Noel's +Noelle +Noelle's +Noels +Noemi +Noemi's +Noh +Noh's +Nokia +Nokia's +Nola +Nola's +Nolan +Nolan's +Nome +Nome's +Nona +Nona's +Nootka +Nootka's +Nora +Nora's +Norbert +Norbert's +Norberto +Norberto's +Nordic +Nordic's +Nordics +Noreen +Noreen's +Norfolk +Norfolk's +Noriega +Noriega's +Norma +Norma's +Norman +Norman's +Normand +Normand's +Normandy +Normandy's +Normans +Norplant +Norplant's +Norris +Norris's +Norse +Norse's +Norseman +Norseman's +Norsemen +Norsemen's +North +North's +Northampton +Northampton's +Northeast +Northeast's +Northeasts +Northerner +Northerner's +Northrop +Northrop's +Northrup +Northrup's +Norths +Northwest +Northwest's +Northwests +Norton +Norton's +Norway +Norway's +Norwegian +Norwegian's +Norwegians +Norwich +Norwich's +Nosferatu +Nosferatu's +Nostradamus +Nostradamus's +Nottingham +Nottingham's +Nouakchott +Nouakchott's +Noumea +Noumea's +Nova +Nova's +Novartis +Novartis's +November +November's +Novembers +Novgorod +Novgorod's +Novocain +Novocain's +Novocaine +Novokuznetsk +Novokuznetsk's +Novosibirsk +Novosibirsk's +Noxzema +Noxzema's +Noyce +Noyce's +Noyes +Noyes's +Np +Np's +Nubia +Nubia's +Nubian +Nubian's +Nukualofa +Nukualofa's +Numbers +Numbers's +Nunavut +Nunavut's +Nunez +Nunez's +Nunki +Nunki's +Nuremberg +Nuremberg's +Nureyev +Nureyev's +NutraSweet +NutraSweet's +NyQuil +NyQuil's +Nyasa +Nyasa's +Nyerere +Nyerere's +O +O'Brien +O'Brien's +O'Casey +O'Casey's +O'Connell +O'Connell's +O'Connor +O'Connor's +O'Donnell +O'Donnell's +O'Hara +O'Hara's +O'Higgins +O'Higgins's +O'Keeffe +O'Keeffe's +O'Neil +O'Neil's +O'Neill +O'Neill's +O'Rourke +O'Rourke's +O'Toole +O'Toole's +O's +OHSA +OHSA's +OK +OK's +OKed +OKing +OKs +Oahu +Oahu's +Oakland +Oakland's +Oakley +Oakley's +Oates +Oates's +Oaxaca +Oaxaca's +Ob +Ob's +Obadiah +Obadiah's +Obama +Obama's +Obamacare +Oberlin +Oberlin's +Oberon +Oberon's +Ocaml +Ocaml's +Occam +Occam's +Occident +Occidental +Occidental's +Occidentals +Oceania +Oceania's +Oceanus +Oceanus's +Ochoa +Ochoa's +Oct +Oct's +Octavia +Octavia's +Octavio +Octavio's +October +October's +Octobers +Odell +Odell's +Oder +Oder's +Odessa +Odessa's +Odets +Odets's +Odin +Odin's +Odis +Odis's +Odom +Odom's +Odysseus +Odysseus's +Odyssey +Odyssey's +Oedipal +Oedipal's +Oedipus +Oedipus's +Oersted +Oersted's +Ofelia +Ofelia's +Offenbach +Offenbach's +OfficeMax +OfficeMax's +Ogbomosho +Ogbomosho's +Ogden +Ogden's +Ogilvy +Ogilvy's +Oglethorpe +Oglethorpe's +Ohio +Ohio's +Ohioan +Ohioan's +Ohioans +Oise +Oise's +Ojibwa +Ojibwa's +Ojibwas +Okeechobee +Okeechobee's +Okefenokee +Okefenokee's +Okhotsk +Okhotsk's +Okinawa +Okinawa's +Oklahoma +Oklahoma's +Oklahoman +Oklahoman's +Oktoberfest +Oktoberfest's +Ola +Ola's +Olaf +Olaf's +Olajuwon +Olajuwon's +Olav +Olav's +Oldenburg +Oldenburg's +Oldfield +Oldfield's +Oldsmobile +Oldsmobile's +Olduvai +Olduvai's +Olen +Olen's +Olenek +Olenek's +Olga +Olga's +Oligocene +Oligocene's +Olin +Olin's +Olive +Olive's +Oliver +Oliver's +Olivetti +Olivetti's +Olivia +Olivia's +Olivier +Olivier's +Ollie +Ollie's +Olmec +Olmec's +Olmsted +Olmsted's +Olsen +Olsen's +Olson +Olson's +Olympia +Olympia's +Olympiad +Olympiad's +Olympiads +Olympian +Olympian's +Olympians +Olympias +Olympic +Olympic's +Olympics +Olympics's +Olympus +Olympus's +Omaha +Omaha's +Omahas +Oman +Oman's +Omar +Omar's +Omayyad +Omayyad's +Omdurman +Omdurman's +Omsk +Omsk's +Onassis +Onassis's +Oneal +Oneal's +Onega +Onega's +Onegin +Onegin's +Oneida +Oneida's +Onion +Onion's +Ono +Ono's +Onondaga +Onondaga's +Onsager +Onsager's +Ontario +Ontario's +Oort +Oort's +Opal +Opal's +Opel +Opel's +OpenOffice +OpenOffice's +Ophelia +Ophelia's +Ophiuchus +Ophiuchus's +Oppenheimer +Oppenheimer's +Oprah +Oprah's +Ora +Ora's +Oracle +Oracle's +Oran +Oran's +Orange +Orange's +Oranjestad +Oranjestad's +Orbison +Orbison's +Ordovician +Ordovician's +Oregon +Oregon's +Oregonian +Oregonian's +Oregonians +Oreo +Oreo's +Orestes +Orestes's +Orient +Orient's +Oriental +Oriental's +Orientals +Orin +Orin's +Orinoco +Orinoco's +Orion +Orion's +Oriya +Oriya's +Orizaba +Orizaba's +Orkney +Orkney's +Orlando +Orlando's +Orleans +Orleans's +Orlon +Orlon's +Orlons +Orly +Orly's +Orpheus +Orpheus's +Orphic +Orphic's +Orr +Orr's +Ortega +Ortega's +Ortiz +Ortiz's +Orval +Orval's +Orville +Orville's +Orwell +Orwell's +Orwellian +Orwellian's +Os +Os's +Osage +Osage's +Osaka +Osaka's +Osbert +Osbert's +Osborn +Osborn's +Osborne +Osborne's +Oscar +Oscar's +Oscars +Osceola +Osceola's +Osgood +Osgood's +Oshawa +Oshawa's +Oshkosh +Oshkosh's +Osiris +Osiris's +Oslo +Oslo's +Osman +Osman's +Ostrogoth +Ostrogoth's +Ostwald +Ostwald's +Osvaldo +Osvaldo's +Oswald +Oswald's +Othello +Othello's +Otis +Otis's +Ottawa +Ottawa's +Ottawas +Otto +Otto's +Ottoman +Ottoman's +Ouagadougou +Ouagadougou's +Ouija +Ouija's +Ovid +Ovid's +Owen +Owen's +Owens +Owens's +Oxford +Oxford's +Oxfords +Oxnard +Oxnard's +Oxonian +Oxonian's +Oxus +Oxus's +Oxycontin +Oxycontin's +Oz +Oz's +Ozark +Ozark's +Ozarks +Ozarks's +Ozymandias +Ozymandias's +Ozzie +Ozzie's +P +P's +PHP +PHP's +Pa +Pa's +Paar +Paar's +Pablo +Pablo's +Pablum +Pablum's +Pabst +Pabst's +Pace +Pace's +Pacheco +Pacheco's +Pacific +Pacific's +Pacino +Pacino's +Packard +Packard's +Paderewski +Paderewski's +Padilla +Padilla's +Paganini +Paganini's +Page +Page's +Paglia +Paglia's +Pahlavi +Pahlavi's +Paige +Paige's +Paine +Paine's +Pakistan +Pakistan's +Pakistani +Pakistani's +Pakistanis +Palau +Palau's +Palembang +Palembang's +Paleocene +Paleocene's +Paleogene +Paleogene's +Paleolithic +Paleolithic's +Paleozoic +Paleozoic's +Palermo +Palermo's +Palestine +Palestine's +Palestinian +Palestinian's +Palestinians +Palestrina +Palestrina's +Paley +Paley's +Palikir +Palikir's +Palisades +Palisades's +Palladio +Palladio's +Palmer +Palmer's +Palmerston +Palmerston's +Palmolive +Palmolive's +Palmyra +Palmyra's +Palomar +Palomar's +Pam +Pam's +Pamela +Pamela's +Pamirs +Pamirs's +Pampers +Pampers's +Pan +Pan's +Panama +Panama's +Panamanian +Panamanian's +Panamanians +Panamas +Panasonic +Panasonic's +Pandora +Pandora's +Pangaea +Pangaea's +Pankhurst +Pankhurst's +Panmunjom +Panmunjom's +Pansy +Pansy's +Pantagruel +Pantagruel's +Pantaloon +Pantaloon's +Pantheon +Pantheon's +Panza +Panza's +Paracelsus +Paracelsus's +Paraclete +Paraclete's +Paradise +Paraguay +Paraguay's +Paraguayan +Paraguayan's +Paraguayans +Paramaribo +Paramaribo's +Paramount +Paramount's +Paraná +Paraná's +Parcheesi +Parcheesi's +Pareto +Pareto's +Paris +Paris's +Parisian +Parisian's +Parisians +Park +Park's +Parker +Parker's +Parkinson +Parkinson's +Parkman +Parkman's +Parks +Parks's +Parliament +Parliament's +Parmesan +Parmesan's +Parmesans +Parnassus +Parnassus's +Parnell +Parnell's +Parr +Parr's +Parrish +Parrish's +Parsi +Parsi's +Parsifal +Parsifal's +Parsons +Parsons's +Parthenon +Parthenon's +Parthia +Parthia's +Pasadena +Pasadena's +Pascal +Pascal's +Pasquale +Pasquale's +Passion +Passion's +Passions +Passover +Passover's +Passovers +Pasternak +Pasternak's +Pasteur +Pasteur's +Pat +Pat's +Patagonia +Patagonia's +Patagonian +Patagonian's +Pate +Pate's +Patel +Patel's +Paterson +Paterson's +Patna +Patna's +Patrica +Patrica's +Patrice +Patrice's +Patricia +Patricia's +Patrick +Patrick's +Patsy +Patsy's +Patterson +Patterson's +Patti +Patti's +Patton +Patton's +Patty +Patty's +Paul +Paul's +Paula +Paula's +Paulette +Paulette's +Pauli +Pauli's +Pauline +Pauline's +Pauling +Pauling's +Pavarotti +Pavarotti's +Pavlov +Pavlov's +Pavlova +Pavlova's +Pavlovian +Pavlovian's +Pawnee +Pawnee's +PayPal +PayPal's +Payne +Payne's +Pb +Pb's +Pd +Pd's +Peabody +Peabody's +Peace +Peace's +Peale +Peale's +Pearl +Pearl's +Pearlie +Pearlie's +Pearson +Pearson's +Peary +Peary's +Pechora +Pechora's +Peck +Peck's +Peckinpah +Peckinpah's +Pecos +Pecos's +Pedro +Pedro's +Peel +Peel's +Peg +Peg's +Pegasus +Pegasus's +Pegasuses +Peggy +Peggy's +Pei +Pei's +Peiping +Peiping's +Pekinese +Pekinese's +Pekineses +Peking +Peking's +Pekingese +Pekingese's +Pekingeses +Pekings +Pele +Pele's +Pelee +Pelee's +Peloponnese +Peloponnese's +Pembroke +Pembroke's +Pena +Pena's +Penderecki +Penderecki's +Penelope +Penelope's +Penn +Penn's +Penney +Penney's +Pennington +Pennington's +Pennsylvania +Pennsylvania's +Pennsylvanian +Pennsylvanian's +Pennsylvanians +Penny +Penny's +Pennzoil +Pennzoil's +Pensacola +Pensacola's +Pentagon +Pentagon's +Pentateuch +Pentateuch's +Pentax +Pentax's +Pentecost +Pentecost's +Pentecostal +Pentecostal's +Pentecostals +Pentecosts +Pentium +Pentium's +Peoria +Peoria's +Pepin +Pepin's +Pepsi +Pepsi's +Pepys +Pepys's +Pequot +Pequot's +Percheron +Percheron's +Percival +Percival's +Percy +Percy's +Perelman +Perelman's +Perez +Perez's +Periclean +Periclean's +Pericles +Pericles's +Perkins +Perkins's +Perl +Perl's +Perm +Perm's +Permalloy +Permalloy's +Permian +Permian's +Pernod +Pernod's +Peron +Peron's +Perot +Perot's +Perrier +Perrier's +Perry +Perry's +Perseid +Perseid's +Persephone +Persephone's +Persepolis +Persepolis's +Perseus +Perseus's +Pershing +Pershing's +Persia +Persia's +Persian +Persian's +Persians +Perth +Perth's +Peru +Peru's +Peruvian +Peruvian's +Peruvians +Peshawar +Peshawar's +Pete +Pete's +Peter +Peter's +Peters +Peters's +Petersen +Petersen's +Peterson +Peterson's +Petra +Petra's +Petrarch +Petrarch's +Petty +Petty's +Peugeot +Peugeot's +Pfizer +Pfizer's +Phaedra +Phaedra's +Phaethon +Phaethon's +Phanerozoic +Phanerozoic's +Pharaoh +Pharaoh's +Pharaohs +Pharisee +Pharisee's +Pharisees +Phekda +Phekda's +Phelps +Phelps's +Phidias +Phidias's +Philadelphia +Philadelphia's +Philby +Philby's +Philip +Philip's +Philippe +Philippe's +Philippians +Philippians's +Philippine +Philippine's +Philippines +Philippines's +Philips +Philips's +Philistine +Philistine's +Phillip +Phillip's +Phillipa +Phillipa's +Phillips +Phillips's +Philly +Philly's +Phipps +Phipps's +Phobos +Phobos's +Phoebe +Phoebe's +Phoenicia +Phoenicia's +Phoenix +Phoenix's +Photostat +Photostat's +Photostats +Photostatted +Photostatting +Phrygia +Phrygia's +Phyllis +Phyllis's +Piaf +Piaf's +Piaget +Piaget's +Pianola +Pianola's +Picasso +Picasso's +Piccadilly +Piccadilly's +Pickering +Pickering's +Pickett +Pickett's +Pickford +Pickford's +Pickwick +Pickwick's +Pict +Pict's +Piedmont +Piedmont's +Pierce +Pierce's +Pierre +Pierre's +Pierrot +Pierrot's +Pigmies +Pigmy +Pigmy's +Pike +Pike's +Pilate +Pilate's +Pilates +Pilates's +Pilcomayo +Pilcomayo's +Pilgrim +Pilgrim's +Pillsbury +Pillsbury's +Pinatubo +Pinatubo's +Pincus +Pincus's +Pindar +Pindar's +Pinkerton +Pinkerton's +Pinocchio +Pinocchio's +Pinochet +Pinochet's +Pinter +Pinter's +Pippin +Pippin's +Piraeus +Piraeus's +Pirandello +Pirandello's +Pisa +Pisa's +Pisces +Pisces's +Pisistratus +Pisistratus's +Pissaro +Pissaro's +Pitcairn +Pitcairn's +Pitt +Pitt's +Pittman +Pittman's +Pitts +Pitts's +Pittsburgh +Pittsburgh's +Pius +Pius's +Pizarro +Pizarro's +Planck +Planck's +Plantagenet +Plantagenet's +Plasticine +Plasticine's +Plataea +Plataea's +Plath +Plath's +Plato +Plato's +Platonic +Platonism +Platonism's +Platonist +Platonist's +Platte +Platte's +Plautus +Plautus's +PlayStation +PlayStation's +Playboy +Playboy's +Playtex +Playtex's +Pleiades +Pleiades's +Pleistocene +Pleistocene's +Plexiglas +Plexiglas's +Plexiglases +Pliny +Pliny's +Pliocene +Pliocene's +Plutarch +Plutarch's +Pluto +Pluto's +Plymouth +Plymouth's +Po +Po's +Pocahontas +Pocahontas's +Pocono +Pocono's +Poconos +Poconos's +Podgorica +Podgorica's +Podhoretz +Podhoretz's +Podunk +Podunk's +Poe +Poe's +Pogo +Pogo's +Poincaré +Poincaré's +Poiret +Poiret's +Poirot +Poirot's +Poisson +Poisson's +Poitier +Poitier's +Pokémon +Pokémon's +Poland +Poland's +Polanski +Polanski's +Polaris +Polaris's +Polaroid +Polaroid's +Polaroids +Pole +Pole's +Poles +Polish +Polish's +Politburo +Politburo's +Polk +Polk's +Pollard +Pollard's +Pollock +Pollock's +Pollux +Pollux's +Polly +Polly's +Pollyanna +Pollyanna's +Polo +Polo's +Poltava +Poltava's +Polyhymnia +Polyhymnia's +Polynesia +Polynesia's +Polynesian +Polynesian's +Polynesians +Polyphemus +Polyphemus's +Pomerania +Pomerania's +Pomeranian +Pomeranian's +Pomona +Pomona's +Pompadour +Pompadour's +Pompeii +Pompeii's +Pompey +Pompey's +Ponce +Ponce's +Pontchartrain +Pontchartrain's +Pontiac +Pontiac's +Pontianak +Pontianak's +Pooh +Pooh's +Poole +Poole's +Poona +Poona's +Pope +Pope's +Popeye +Popeye's +Popocatepetl +Popocatepetl's +Popper +Popper's +Poppins +Poppins's +Popsicle +Popsicle's +Porfirio +Porfirio's +Porrima +Porrima's +Porsche +Porsche's +Porter +Porter's +Portia +Portia's +Portland +Portland's +Portsmouth +Portsmouth's +Portugal +Portugal's +Portuguese +Portuguese's +Poseidon +Poseidon's +Post +Post's +PostgreSQL +PostgreSQL's +Potemkin +Potemkin's +Potomac +Potomac's +Potsdam +Potsdam's +Pottawatomie +Pottawatomie's +Potter +Potter's +Potts +Potts's +Pound +Pound's +Poussin +Poussin's +Powell +Powell's +PowerPC +PowerPC's +PowerPoint +PowerPoint's +Powers +Powers's +Powhatan +Powhatan's +Poznan +Poznan's +Prada +Prada's +Prado +Prado's +Praetorian +Praetorian's +Prague +Prague's +Praia +Praia's +Prakrit +Prakrit's +Pratchett +Pratchett's +Pratt +Pratt's +Pravda +Pravda's +Praxiteles +Praxiteles's +Preakness +Preakness's +Precambrian +Precambrian's +Preminger +Preminger's +Premyslid +Premyslid's +Prensa +Prensa's +Prentice +Prentice's +Presbyterian +Presbyterian's +Presbyterianism +Presbyterianism's +Presbyterians +Prescott +Prescott's +President +President's +Presidents +Presley +Presley's +Preston +Preston's +Pretoria +Pretoria's +Priam +Priam's +Pribilof +Pribilof's +Price +Price's +Priceline +Priceline's +Priestley +Priestley's +Prince +Prince's +Princeton +Princeton's +Principe +Principe's +Priscilla +Priscilla's +Prius +Prius's +Procrustean +Procrustean's +Procrustes +Procrustes's +Procter +Procter's +Procyon +Procyon's +Prohibition +Prokofiev +Prokofiev's +Promethean +Promethean's +Prometheus +Prometheus's +Proserpine +Proserpine's +Protagoras +Protagoras's +Proterozoic +Proterozoic's +Protestant +Protestant's +Protestantism +Protestantism's +Protestantisms +Protestants +Proteus +Proteus's +Proudhon +Proudhon's +Proust +Proust's +Provencals +Provence +Provence's +Provençal +Provençal's +Proverbs +Providence +Providence's +Providences +Provo +Provo's +Prozac +Prozac's +Prudence +Prudence's +Prudential +Prudential's +Pruitt +Pruitt's +Prussia +Prussia's +Prussian +Prussian's +Prut +Prut's +Pryor +Pryor's +Psalms +Psalms's +Psalter +Psalter's +Psalters +Psyche +Psyche's +Pt +Pt's +Ptah +Ptah's +Ptolemaic +Ptolemaic's +Ptolemies +Ptolemy +Ptolemy's +Pu +Pu's +Puccini +Puccini's +Puck +Puck's +Puckett +Puckett's +Puebla +Puebla's +Pueblo +Pueblo's +Puerto +Puget +Puget's +Pugh +Pugh's +Pulaski +Pulaski's +Pulitzer +Pulitzer's +Pullman +Pullman's +Pullmans +Punch +Punch's +Punic +Punic's +Punjab +Punjab's +Punjabi +Punjabi's +Purana +Purana's +Purcell +Purcell's +Purdue +Purdue's +Purim +Purim's +Purims +Purina +Purina's +Puritan +Puritan's +Puritanism +Puritanism's +Puritanisms +Purus +Purus's +Pusan +Pusan's +Pusey +Pusey's +Pushkin +Pushkin's +Pushtu +Pushtu's +Putin +Putin's +Putnam +Putnam's +Puzo +Puzo's +PyTorch +PyTorch's +Pygmalion +Pygmalion's +Pygmies +Pygmy +Pygmy's +Pyle +Pyle's +Pym +Pym's +Pynchon +Pynchon's +Pyongyang +Pyongyang's +Pyotr +Pyotr's +Pyrenees +Pyrenees's +Pyrex +Pyrex's +Pyrexes +Pyrrhic +Pyrrhic's +Pythagoras +Pythagoras's +Pythagorean +Pythagorean's +Pythias +Pythias's +Python +Python's +Pétain +Pétain's +Pôrto +Pôrto's +Q +Qaddafi +Qaddafi's +Qantas +Qantas's +Qatar +Qatar's +Qingdao +Qingdao's +Qiqihar +Qiqihar's +Qom +Qom's +Quaalude +Quaalude's +Quaker +Quaker's +Quakers +Qualcomm +Qualcomm's +Quaoar +Quaoar's +Quasimodo +Quasimodo's +Quaternary +Quaternary's +Quayle +Quayle's +Quebec +Quebec's +Quechua +Quechua's +Queen +Queen's +Queens +Queens's +Queensland +Queensland's +Quentin +Quentin's +Quetzalcoatl +Quetzalcoatl's +Quezon +Quezon's +Quincy +Quincy's +Quinn +Quinn's +Quintilian +Quintilian's +Quinton +Quinton's +Quirinal +Quirinal's +Quisling +Quisling's +Quito +Quito's +Quixote +Quixote's +Quixotism +Quixotism's +Qumran +Qumran's +Quonset +Quonset's +Qur'an +Quran +Québecois +Québecois's +R +R's +RCA +RCA's +RDS +RDS's +Ra +Ra's +Rabat +Rabat's +Rabelais +Rabelais's +Rabelaisian +Rabelaisian's +Rabin +Rabin's +Rachael +Rachael's +Rachel +Rachel's +Rachelle +Rachelle's +Rachmaninoff +Rachmaninoff's +Racine +Racine's +Radcliffe +Radcliffe's +Rae +Rae's +Rafael +Rafael's +Raffles +Raffles's +Ragnarök +Ragnarök's +Rainier +Rainier's +Raleigh +Raleigh's +Ralph +Ralph's +Rama +Rama's +Ramada +Ramada's +Ramadan +Ramadan's +Ramadans +Ramakrishna +Ramakrishna's +Ramanujan +Ramanujan's +Ramayana +Ramayana's +Rambo +Rambo's +Ramirez +Ramirez's +Ramiro +Ramiro's +Ramon +Ramon's +Ramona +Ramona's +Ramos +Ramos's +Ramsay +Ramsay's +Ramses +Ramses's +Ramsey +Ramsey's +Rand +Rand's +Randal +Randal's +Randall +Randall's +Randell +Randell's +Randi +Randi's +Randolph +Randolph's +Randy +Randy's +Rangoon +Rangoon's +Rankin +Rankin's +Rankine +Rankine's +Raoul +Raoul's +Raphael +Raphael's +Rapunzel +Rapunzel's +Raquel +Raquel's +Rasalgethi +Rasalgethi's +Rasalhague +Rasalhague's +Rasmussen +Rasmussen's +Rasputin +Rasputin's +Rasta +Rastaban +Rastaban's +Rastafarian +Rastafarian's +Rastafarianism +Rather +Rather's +Ratliff +Ratliff's +Raul +Raul's +Ravel +Ravel's +Rawalpindi +Rawalpindi's +Ray +Ray's +RayBan +RayBan's +Rayburn +Rayburn's +Rayleigh +Rayleigh's +Raymond +Raymond's +Raymundo +Raymundo's +Reagan +Reagan's +Reaganomics +Reaganomics's +Realtor +Realtor's +Reasoner +Reasoner's +Reba +Reba's +Rebecca +Rebecca's +Rebekah +Rebekah's +Recife +Recife's +Red +Red's +Redford +Redford's +Redgrave +Redgrave's +Redis +Redis's +Redmond +Redmond's +Redshift +Redshift's +Reebok +Reebok's +Reed +Reed's +Reese +Reese's +Reeves +Reeves's +Refugio +Refugio's +Reggie +Reggie's +Regina +Regina's +Reginae +Reginae's +Reginald +Reginald's +Regor +Regor's +Regulus +Regulus's +Rehnquist +Rehnquist's +Reich +Reich's +Reichstag +Reichstag's +Reid +Reid's +Reilly +Reilly's +Reinaldo +Reinaldo's +Reinhardt +Reinhardt's +Reinhold +Reinhold's +Remarque +Remarque's +Rembrandt +Rembrandt's +Remington +Remington's +Remus +Remus's +Rena +Rena's +Renaissance +Renaissance's +Renaissances +Renault +Renault's +Rene +Rene's +Renee +Renee's +Reno +Reno's +Renoir +Renoir's +Representative +Republican +Republican's +Republicans +Resurrection +Reuben +Reuben's +Reunion +Reunion's +Reuters +Reuters's +Reuther +Reuther's +Reva +Reva's +Revelations +Revelations's +Revere +Revere's +Reverend +Reverend's +Revlon +Revlon's +Rex +Rex's +Reyes +Reyes's +Reykjavik +Reykjavik's +Reyna +Reyna's +Reynaldo +Reynaldo's +Reynolds +Reynolds's +Rhea +Rhea's +Rhee +Rhee's +Rheingau +Rheingau's +Rhenish +Rhenish's +Rhiannon +Rhiannon's +Rhine +Rhine's +Rhineland +Rhineland's +Rhoda +Rhoda's +Rhode +Rhodes +Rhodes's +Rhodesia +Rhodesia's +Rhonda +Rhonda's +Rhone +Rhone's +Ribbentrop +Ribbentrop's +Ricardo +Ricardo's +Rice +Rice's +Rich +Rich's +Richard +Richard's +Richards +Richards's +Richardson +Richardson's +Richelieu +Richelieu's +Richie +Richie's +Richmond +Richmond's +Richter +Richter's +Richthofen +Richthofen's +Rick +Rick's +Rickenbacker +Rickenbacker's +Rickey +Rickey's +Rickie +Rickie's +Rickover +Rickover's +Ricky +Ricky's +Rico +Rico's +Riddle +Riddle's +Ride +Ride's +Riefenstahl +Riefenstahl's +Riel +Riel's +Riemann +Riemann's +Riesling +Riesling's +Riga +Riga's +Rigel +Rigel's +Riggs +Riggs's +Rigoberto +Rigoberto's +Rigoletto +Rigoletto's +Riley +Riley's +Rilke +Rilke's +Rimbaud +Rimbaud's +Ringling +Ringling's +Ringo +Ringo's +Rio +Rio's +Rios +Rios's +Ripley +Ripley's +Risorgimento +Risorgimento's +Rita +Rita's +Ritalin +Ritalin's +Ritz +Ritz's +Rivas +Rivas's +Rivera +Rivera's +Rivers +Rivers's +Riverside +Riviera +Riviera's +Rivieras +Riyadh +Riyadh's +Rizal +Rizal's +Rn +Rn's +Roach +Roach's +Rob +Rob's +Robbie +Robbie's +Robbin +Robbin's +Robbins +Robbins's +Robby +Robby's +Roberson +Roberson's +Robert +Robert's +Roberta +Roberta's +Roberto +Roberto's +Roberts +Roberts's +Robertson +Robertson's +Robeson +Robeson's +Robespierre +Robespierre's +Robin +Robin's +Robinson +Robinson's +Robitussin +Robitussin's +Robles +Robles's +Robson +Robson's +Robt +Robt's +Robyn +Robyn's +Rocco +Rocco's +Rocha +Rocha's +Rochambeau +Rochambeau's +Roche +Roche's +Rochelle +Rochelle's +Rochester +Rochester's +Rock +Rock's +Rockefeller +Rockefeller's +Rockford +Rockford's +Rockies +Rockies's +Rockne +Rockne's +Rockwell +Rockwell's +Rocky +Rocky's +Rod +Rod's +Roddenberry +Roddenberry's +Roderick +Roderick's +Rodger +Rodger's +Rodgers +Rodgers's +Rodin +Rodin's +Rodney +Rodney's +Rodolfo +Rodolfo's +Rodrick +Rodrick's +Rodrigo +Rodrigo's +Rodriguez +Rodriguez's +Rodriquez +Rodriquez's +Roeg +Roeg's +Roentgen +Roentgen's +Rogelio +Rogelio's +Roger +Roger's +Rogers +Rogers's +Roget +Roget's +Rojas +Rojas's +Roku +Roku's +Rolaids +Rolaids's +Roland +Roland's +Rolando +Rolando's +Rolex +Rolex's +Rolland +Rolland's +Rollerblade +Rollerblade's +Rollins +Rollins's +Rolodex +Rolodex's +Rolvaag +Rolvaag's +Roman +Roman's +Romanesque +Romanesque's +Romania +Romania's +Romanian +Romanian's +Romanians +Romanies +Romano +Romano's +Romanov +Romanov's +Romans +Romans's +Romansh +Romansh's +Romanticism +Romany +Romany's +Rome +Rome's +Romeo +Romeo's +Romero +Romero's +Romes +Rommel +Rommel's +Romney +Romney's +Romulus +Romulus's +Ron +Ron's +Ronald +Ronald's +Ronda +Ronda's +Ronnie +Ronnie's +Ronny +Ronny's +Ronstadt +Ronstadt's +Rooney +Rooney's +Roosevelt +Roosevelt's +Root +Root's +Roquefort +Roquefort's +Roqueforts +Rorschach +Rorschach's +Rory +Rory's +Rosa +Rosa's +Rosales +Rosales's +Rosalie +Rosalie's +Rosalind +Rosalind's +Rosalinda +Rosalinda's +Rosalyn +Rosalyn's +Rosanna +Rosanna's +Rosanne +Rosanne's +Rosario +Rosario's +Roscoe +Roscoe's +Rose +Rose's +Roseann +Roseann's +Roseau +Roseau's +Rosecrans +Rosecrans's +Rosella +Rosella's +Rosemarie +Rosemarie's +Rosemary +Rosemary's +Rosenberg +Rosenberg's +Rosendo +Rosendo's +Rosenzweig +Rosenzweig's +Rosetta +Rosetta's +Rosicrucian +Rosicrucian's +Rosie +Rosie's +Roslyn +Roslyn's +Ross +Ross's +Rossetti +Rossetti's +Rossini +Rossini's +Rostand +Rostand's +Rostov +Rostov's +Rostropovich +Rostropovich's +Roswell +Roswell's +Rotarian +Rotarian's +Roth +Roth's +Rothko +Rothko's +Rothschild +Rothschild's +Rotterdam +Rotterdam's +Rottweiler +Rottweiler's +Rouault +Rouault's +Roumania +Roumania's +Rourke +Rourke's +Rousseau +Rousseau's +Rove +Rove's +Rover +Rover's +Rowe +Rowe's +Rowena +Rowena's +Rowland +Rowland's +Rowling +Rowling's +Roxanne +Roxanne's +Roxie +Roxie's +Roxy +Roxy's +Roy +Roy's +Royal +Royal's +Royce +Royce's +Rozelle +Rozelle's +Rubaiyat +Rubaiyat's +Rubbermaid +Rubbermaid's +Ruben +Ruben's +Rubens +Rubens's +Rubicon +Rubicon's +Rubik +Rubik's +Rubin +Rubin's +Rubinstein +Rubinstein's +Ruby +Ruby's +Ruchbah +Ruchbah's +Rudolf +Rudolf's +Rudolph +Rudolph's +Rudy +Rudy's +Rudyard +Rudyard's +Rufus +Rufus's +Ruhr +Ruhr's +Ruiz +Ruiz's +Rukeyser +Rukeyser's +Rumania +Rumania's +Rumpelstiltskin +Rumpelstiltskin's +Rumsfeld +Rumsfeld's +Runnymede +Runnymede's +Runyon +Runyon's +Rupert +Rupert's +Rush +Rush's +Rushdie +Rushdie's +Rushmore +Rushmore's +Ruskin +Ruskin's +Russel +Russel's +Russell +Russell's +Russia +Russia's +Russian +Russian's +Russians +Russo +Russo's +Rustbelt +Rustbelt's +Rusty +Rusty's +Rutan +Rutan's +Rutgers +Rutgers's +Ruth +Ruth's +Rutherford +Rutherford's +Ruthie +Ruthie's +Rutledge +Rutledge's +Rwanda +Rwanda's +Rwandan +Rwandan's +Rwandans +Rwandas +Ryan +Ryan's +Rydberg +Rydberg's +Ryder +Ryder's +Ryukyu +Ryukyu's +S +S's +SAP +SAP's +SARS +SARS's +SQLite +SQLite's +SUSE +SUSE's +SVN +SVN's +Saab +Saab's +Saar +Saar's +Saarinen +Saarinen's +Saatchi +Saatchi's +Sabbath +Sabbath's +Sabbaths +Sabik +Sabik's +Sabin +Sabin's +Sabina +Sabina's +Sabine +Sabine's +Sabre +Sabre's +Sabrina +Sabrina's +Sacajawea +Sacajawea's +Sacco +Sacco's +Sachs +Sachs's +Sacramento +Sacramento's +Sadat +Sadat's +Saddam +Saddam's +Sadducee +Sadducee's +Sade +Sade's +Sadie +Sadie's +Sadr +Sadr's +Safavid +Safavid's +Safeway +Safeway's +Sagan +Sagan's +Saginaw +Saginaw's +Sagittarius +Sagittarius's +Sagittariuses +Sahara +Sahara's +Sahel +Sahel's +Saigon +Saigon's +Saiph +Saiph's +Sakai +Sakai's +Sakha +Sakha's +Sakhalin +Sakhalin's +Sakharov +Sakharov's +Saki +Saki's +Saks +Saks's +Sal +Sal's +Saladin +Saladin's +Salado +Salado's +Salamis +Salamis's +Salas +Salas's +Salazar +Salazar's +Salem +Salem's +Salerno +Salerno's +Salesforce +Salesforce's +Salinas +Salinas's +Salinger +Salinger's +Salisbury +Salisbury's +Salish +Salish's +Salk +Salk's +Sallie +Sallie's +Sallust +Sallust's +Sally +Sally's +Salome +Salome's +Salton +Salton's +Salvador +Salvador's +Salvadoran +Salvadoran's +Salvadorans +Salvadorian +Salvadorian's +Salvadorians +Salvatore +Salvatore's +Salween +Salween's +Salyut +Salyut's +Samantha +Samantha's +Samar +Samar's +Samara +Samara's +Samaritan +Samaritan's +Samaritans +Samarkand +Samarkand's +Sammie +Sammie's +Sammy +Sammy's +Samoa +Samoa's +Samoan +Samoan's +Samoset +Samoset's +Samoyed +Samoyed's +Sampson +Sampson's +Samson +Samson's +Samsonite +Samsonite's +Samsung +Samsung's +Samuel +Samuel's +Samuelson +Samuelson's +San +San's +Sana +Sana's +Sanchez +Sanchez's +Sancho +Sancho's +Sand +Sand's +Sandburg +Sandburg's +Sanders +Sanders's +Sandinista +Sandinista's +Sandoval +Sandoval's +Sandra +Sandra's +Sandy +Sandy's +Sanford +Sanford's +Sanforized +Sanforized's +Sang +Sang's +Sanger +Sanger's +Sanhedrin +Sanhedrin's +Sanka +Sanka's +Sankara +Sankara's +Sanskrit +Sanskrit's +Santa +Santa's +Santana +Santana's +Santayana +Santayana's +Santeria +Santeria's +Santiago +Santiago's +Santos +Santos's +Sappho +Sappho's +Sapporo +Sapporo's +Sara +Sara's +Saracen +Saracen's +Saracens +Saragossa +Saragossa's +Sarah +Sarah's +Sarajevo +Sarajevo's +Saran +Saran's +Sarasota +Sarasota's +Saratov +Saratov's +Sarawak +Sarawak's +Sardinia +Sardinia's +Sargasso +Sargasso's +Sargent +Sargent's +Sargon +Sargon's +Sarnoff +Sarnoff's +Saroyan +Saroyan's +Sarto +Sarto's +Sartre +Sartre's +Sasha +Sasha's +Saskatchewan +Saskatchewan's +Saskatoon +Saskatoon's +Sasquatch +Sasquatch's +Sassanian +Sassanian's +Sassoon +Sassoon's +Satan +Satan's +Satanism +Satanism's +Satanist +Satanist's +Saturday +Saturday's +Saturdays +Saturn +Saturn's +Saturnalia +Saturnalia's +Saudi +Saudi's +Saudis +Saul +Saul's +Saunders +Saunders's +Saundra +Saundra's +Saussure +Saussure's +Sauterne +Sauterne's +Savage +Savage's +Savannah +Savannah's +Savior +Savior's +Savonarola +Savonarola's +Savoy +Savoy's +Savoyard +Savoyard's +Sawyer +Sawyer's +Saxon +Saxon's +Saxons +Saxony +Saxony's +Sayers +Sayers's +Sb +Sb's +Scala +Scala's +Scandinavia +Scandinavia's +Scandinavian +Scandinavian's +Scandinavians +Scaramouch +Scaramouch's +Scarborough +Scarborough's +Scarlatti +Scarlatti's +Scheat +Scheat's +Schedar +Schedar's +Scheherazade +Scheherazade's +Schelling +Schelling's +Schenectady +Schenectady's +Schiaparelli +Schiaparelli's +Schick +Schick's +Schiller +Schiller's +Schindler +Schindler's +Schlesinger +Schlesinger's +Schliemann +Schliemann's +Schlitz +Schlitz's +Schmidt +Schmidt's +Schnabel +Schnabel's +Schnauzer +Schnauzer's +Schneider +Schneider's +Schoenberg +Schoenberg's +Schopenhauer +Schopenhauer's +Schrieffer +Schrieffer's +Schroeder +Schroeder's +Schrödinger +Schrödinger's +Schubert +Schubert's +Schultz +Schultz's +Schulz +Schulz's +Schumann +Schumann's +Schumpeter +Schumpeter's +Schuyler +Schuyler's +Schuylkill +Schuylkill's +Schwartz +Schwartz's +Schwarzenegger +Schwarzenegger's +Schwarzkopf +Schwarzkopf's +Schweitzer +Schweitzer's +Schweppes +Schweppes's +Schwinger +Schwinger's +Schwinn +Schwinn's +Scientology +Scientology's +Scipio +Scipio's +Scopes +Scopes's +Scorpio +Scorpio's +Scorpios +Scorpius +Scorpius's +Scorsese +Scorsese's +Scot +Scot's +Scotch +Scotch's +Scotches +Scotchman +Scotchman's +Scotchmen +Scotchmen's +Scotia +Scotia's +Scotland +Scotland's +Scots +Scotsman +Scotsman's +Scotsmen +Scotsmen's +Scotswoman +Scotswoman's +Scotswomen +Scotswomen's +Scott +Scott's +Scottie +Scottie's +Scottish +Scottish's +Scottsdale +Scottsdale's +Scotty +Scotty's +Scout +Scrabble +Scrabble's +Scranton +Scranton's +Scriabin +Scriabin's +Scribner +Scribner's +Scripture +Scripture's +Scriptures +Scrooge +Scrooge's +Scruggs +Scruggs's +Scud +Scud's +Sculley +Sculley's +Scylla +Scylla's +Scythia +Scythia's +Scythian +Scythian's +Se +Se's +Seaborg +Seaborg's +Seagram +Seagram's +Sean +Sean's +Sears +Sears's +Seattle +Seattle's +Sebastian +Sebastian's +Seconal +Seconal's +Secretariat +Secretariat's +Secretary +Seder +Seder's +Seders +Sedna +Sedna's +Seebeck +Seebeck's +Seeger +Seeger's +Sega +Sega's +Segovia +Segovia's +Segre +Segre's +Segundo +Segundo's +Seiko +Seiko's +Seine +Seine's +Seinfeld +Seinfeld's +Sejong +Sejong's +Selassie +Selassie's +Selectric +Selectric's +Selena +Selena's +Seleucid +Seleucid's +Seleucus +Seleucus's +Selim +Selim's +Seljuk +Seljuk's +Selkirk +Selkirk's +Sellers +Sellers's +Selma +Selma's +Selznick +Selznick's +Semarang +Semarang's +Seminole +Seminole's +Seminoles +Semiramis +Semiramis's +Semite +Semite's +Semites +Semitic +Semitic's +Semitics +Semtex +Semtex's +Senate +Senate's +Senates +Senator +Sendai +Sendai's +Seneca +Seneca's +Senecas +Senegal +Senegal's +Senegalese +Senegalese's +Senghor +Senghor's +Senior +Senior's +Sennacherib +Sennacherib's +Sennett +Sennett's +Sensurround +Sensurround's +Seoul +Seoul's +Sephardi +Sephardi's +Sepoy +Sepoy's +September +September's +Septembers +Septuagint +Septuagint's +Septuagints +Sequoya +Sequoya's +Serb +Serb's +Serbia +Serbia's +Serbian +Serbian's +Serbians +Serbs +Serena +Serena's +Serengeti +Serengeti's +Sergei +Sergei's +Sergio +Sergio's +Serpens +Serpens's +Serra +Serra's +Serrano +Serrano's +Set +Set's +Seth +Seth's +Seton +Seton's +Seurat +Seurat's +Seuss +Seuss's +Sevastopol +Sevastopol's +Severn +Severn's +Severus +Severus's +Seville +Seville's +Seward +Seward's +Sextans +Sextans's +Sexton +Sexton's +Seychelles +Seychelles's +Seyfert +Seyfert's +Seymour +Seymour's +Shackleton +Shackleton's +Shaffer +Shaffer's +Shaka +Shaka's +Shakespeare +Shakespeare's +Shakespearean +Shakespearean's +Shana +Shana's +Shane +Shane's +Shanghai +Shanghai's +Shankara +Shankara's +Shanna +Shanna's +Shannon +Shannon's +Shantung +Shantung's +Shapiro +Shapiro's +SharePoint +SharePoint's +Shari +Shari'a +Shari'a's +Shari's +Sharif +Sharif's +Sharlene +Sharlene's +Sharon +Sharon's +Sharp +Sharp's +Sharpe +Sharpe's +Sharron +Sharron's +Shasta +Shasta's +Shaula +Shaula's +Shaun +Shaun's +Shauna +Shauna's +Shavian +Shavian's +Shavuot +Shavuot's +Shaw +Shaw's +Shawn +Shawn's +Shawna +Shawna's +Shawnee +Shawnee's +Shcharansky +Shcharansky's +Shea +Shea's +Sheba +Sheba's +Shebeli +Shebeli's +Sheena +Sheena's +Sheetrock +Sheetrock's +Sheffield +Sheffield's +Sheila +Sheila's +Shelby +Shelby's +Sheldon +Sheldon's +Shelia +Shelia's +Shell +Shell's +Shelley +Shelley's +Shelly +Shelly's +Shelton +Shelton's +Shenandoah +Shenandoah's +Shenyang +Shenyang's +Sheol +Sheol's +Shepard +Shepard's +Shepherd +Shepherd's +Sheppard +Sheppard's +Sheratan +Sheratan's +Sheraton +Sheraton's +Sheree +Sheree's +Sheri +Sheri's +Sheridan +Sheridan's +Sherlock +Sherlock's +Sherman +Sherman's +Sherpa +Sherpa's +Sherri +Sherri's +Sherrie +Sherrie's +Sherry +Sherry's +Sherwood +Sherwood's +Sheryl +Sheryl's +Shetland +Shetland's +Shetlands +Shetlands's +Shevardnadze +Shevardnadze's +Shevat +Shevat's +Shi'ite +Shi'ite's +Shields +Shields's +Shijiazhuang +Shijiazhuang's +Shikoku +Shikoku's +Shillong +Shillong's +Shiloh +Shiloh's +Shinto +Shinto's +Shintoism +Shintoism's +Shintoisms +Shintos +Shiraz +Shiraz's +Shirley +Shirley's +Shiva +Shiva's +Shockley +Shockley's +Short +Short's +Shorthorn +Shorthorn's +Shoshone +Shoshone's +Shostakovitch +Shostakovitch's +Shrek +Shrek's +Shreveport +Shreveport's +Shriner +Shriner's +Shropshire +Shropshire's +Shula +Shula's +Shylock +Shylock's +Shylockian +Shylockian's +Si +Si's +Siam +Siam's +Siamese +Siamese's +Sian +Sian's +Sibelius +Sibelius's +Siberia +Siberia's +Siberian +Siberian's +Sibyl +Sibyl's +Sicilian +Sicilian's +Sicilians +Sicily +Sicily's +Sid +Sid's +Siddhartha +Siddhartha's +Sidney +Sidney's +Siegfried +Siegfried's +Siemens +Siemens's +Sierpinski +Sierpinski's +Sigismund +Sigismund's +Sigmund +Sigmund's +Sigurd +Sigurd's +Sihanouk +Sihanouk's +Sikh +Sikh's +Sikhism +Sikhs +Sikkim +Sikkim's +Sikkimese +Sikkimese's +Sikorsky +Sikorsky's +Silas +Silas's +Silurian +Silurian's +Silva +Silva's +Silvia +Silvia's +Simenon +Simenon's +Simmental +Simmental's +Simmons +Simmons's +Simon +Simon's +Simone +Simone's +Simpson +Simpson's +Simpsons +Simpsons's +Sims +Sims's +Sinai +Sinai's +Sinatra +Sinatra's +Sinclair +Sinclair's +Sindbad +Sindbad's +Sindhi +Sindhi's +Singapore +Singapore's +Singer +Singer's +Singh +Singh's +Singleton +Singleton's +Sinhalese +Sinhalese's +Sinkiang +Sinkiang's +Sioux +Sioux's +Sirius +Sirius's +Sister +Sister's +Sisters +Sistine +Sistine's +Sisyphean +Sisyphean's +Sisyphus +Sisyphus's +Siva +Siva's +Sivan +Sivan's +Sjaelland +Sjaelland's +Skinner +Skinner's +Skippy +Skippy's +Skopje +Skopje's +Skye +Skye's +Skylab +Skylab's +Skype +Skype's +Slackware +Slackware's +Slashdot +Slashdot's +Slater +Slater's +Slav +Slav's +Slavic +Slavic's +Slavonic +Slavonic's +Slavs +Slinky +Slinky's +Sloan +Sloan's +Sloane +Sloane's +Slocum +Slocum's +Slovak +Slovak's +Slovakia +Slovakia's +Slovakian +Slovaks +Slovenia +Slovenia's +Slovenian +Slovenian's +Slovenians +Slurpee +Slurpee's +Small +Small's +Smetana +Smetana's +Smirnoff +Smirnoff's +Smith +Smith's +Smithson +Smithson's +Smithsonian +Smithsonian's +Smokey +Smokey's +Smolensk +Smolensk's +Smollett +Smollett's +Smuts +Smuts's +Sn +Sn's +Snake +Snake's +Snapple +Snapple's +Snead +Snead's +Snell +Snell's +Snickers +Snickers's +Snider +Snider's +Snoopy +Snoopy's +Snow +Snow's +Snowbelt +Snowbelt's +Snyder +Snyder's +Soave +Soave's +Socorro +Socorro's +Socrates +Socrates's +Socratic +Socratic's +Soddy +Soddy's +Sodom +Sodom's +Sofia +Sofia's +Soho +Soho's +Solis +Solis's +Solomon +Solomon's +Solon +Solon's +Solzhenitsyn +Solzhenitsyn's +Somali +Somali's +Somalia +Somalia's +Somalian +Somalian's +Somalians +Somalis +Somme +Somme's +Somoza +Somoza's +Son +Son's +Sondheim +Sondheim's +Sondra +Sondra's +Songhai +Songhai's +Songhua +Songhua's +Sonia +Sonia's +Sonja +Sonja's +Sonny +Sonny's +Sontag +Sontag's +Sony +Sony's +Sonya +Sonya's +Sophia +Sophia's +Sophie +Sophie's +Sophoclean +Sophoclean's +Sophocles +Sophocles's +Sopwith +Sopwith's +Sorbonne +Sorbonne's +Sosa +Sosa's +Soto +Soto's +Souphanouvong +Souphanouvong's +Sourceforge +Sourceforge's +Sousa +Sousa's +South +South's +Southampton +Southampton's +Southeast +Southeast's +Southeasts +Southerner +Southerner's +Southerners +Southey +Southey's +Souths +Southwest +Southwest's +Southwests +Soviet +Soviet's +Soweto +Soweto's +Soyinka +Soyinka's +Soyuz +Soyuz's +Spaatz +Spaatz's +Spackle +Spackle's +Spahn +Spahn's +Spain +Spain's +Spam +Spam's +Spaniard +Spaniard's +Spaniards +Spanish +Spanish's +Sparks +Sparks's +Sparta +Sparta's +Spartacus +Spartacus's +Spartan +Spartan's +Spartans +Spears +Spears's +Speer +Speer's +Spence +Spence's +Spencer +Spencer's +Spencerian +Spencerian's +Spengler +Spengler's +Spenglerian +Spenglerian's +Spenser +Spenser's +Spenserian +Spenserian's +Sperry +Sperry's +Sphinx +Sphinx's +Spica +Spica's +Spielberg +Spielberg's +Spillane +Spillane's +Spinoza +Spinoza's +Spinx +Spinx's +Spiro +Spiro's +Spirograph +Spirograph's +Spitsbergen +Spitsbergen's +Spitz +Spitz's +Spock +Spock's +Spokane +Spokane's +Springfield +Springfield's +Springsteen +Springsteen's +Sprint +Sprint's +Sprite +Sprite's +Sputnik +Sputnik's +Squanto +Squanto's +Squibb +Squibb's +Srinagar +Srinagar's +Srivijaya +Srivijaya's +Stacey +Stacey's +Staci +Staci's +Stacie +Stacie's +Stacy +Stacy's +Stael +Stael's +Stafford +Stafford's +StairMaster +StairMaster's +Stalin +Stalin's +Stalingrad +Stalingrad's +Stalinist +Stalinist's +Stallone +Stallone's +Stamford +Stamford's +Stan +Stan's +Standish +Standish's +Stanford +Stanford's +Stanislavsky +Stanislavsky's +Stanley +Stanley's +Stanton +Stanton's +Staples +Staples's +Starbucks +Starbucks's +Stark +Stark's +Starkey +Starkey's +Starr +Starr's +Staten +Staten's +Staubach +Staubach's +Steadicam +Steadicam's +Steele +Steele's +Stefan +Stefan's +Stefanie +Stefanie's +Stein +Stein's +Steinbeck +Steinbeck's +Steinem +Steinem's +Steiner +Steiner's +Steinmetz +Steinmetz's +Steinway +Steinway's +Stella +Stella's +Stendhal +Stendhal's +Stengel +Stengel's +Stephan +Stephan's +Stephanie +Stephanie's +Stephen +Stephen's +Stephens +Stephens's +Stephenson +Stephenson's +Sterling +Sterling's +Stern +Stern's +Sterne +Sterne's +Sterno +Sterno's +Stetson +Stetson's +Steuben +Steuben's +Steve +Steve's +Steven +Steven's +Stevens +Stevens's +Stevenson +Stevenson's +Stevie +Stevie's +Stewart +Stewart's +Stieglitz +Stieglitz's +Stilton +Stilton's +Stimson +Stimson's +Stine +Stine's +Stirling +Stirling's +Stockhausen +Stockhausen's +Stockholm +Stockholm's +Stockton +Stockton's +Stoic +Stoic's +Stoicism +Stoicism's +Stokes +Stokes's +Stolichnaya +Stolichnaya's +Stolypin +Stolypin's +Stone +Stone's +Stonehenge +Stonehenge's +Stoppard +Stoppard's +Stout +Stout's +Stowe +Stowe's +Strabo +Strabo's +Stradivarius +Stradivarius's +Strasbourg +Strasbourg's +Strauss +Strauss's +Stravinsky +Stravinsky's +Streisand +Streisand's +Strickland +Strickland's +Strindberg +Strindberg's +Stromboli +Stromboli's +Strong +Strong's +Stu +Stu's +Stuart +Stuart's +Stuarts +Studebaker +Studebaker's +Stuttgart +Stuttgart's +Stuyvesant +Stuyvesant's +Stygian +Stygian's +Styrofoam +Styrofoam's +Styrofoams +Styron +Styron's +Styx +Styx's +Suarez +Suarez's +Subaru +Subaru's +Sucre +Sucre's +Sucrets +Sucrets's +Sudan +Sudan's +Sudanese +Sudanese's +Sudetenland +Sudetenland's +Sudoku +Sudoku's +Sudra +Sudra's +Sue +Sue's +Suetonius +Suetonius's +Suez +Suez's +Suffolk +Suffolk's +Sufi +Sufi's +Sufism +Sufism's +Suharto +Suharto's +Sui +Sui's +Sukarno +Sukarno's +Sukkot +Sukkoth +Sukkoth's +Sukkoths +Sulawesi +Sulawesi's +Suleiman +Suleiman's +Sulla +Sulla's +Sullivan +Sullivan's +Sumatra +Sumatra's +Sumeria +Sumeria's +Sumerian +Sumerian's +Summer +Summer's +Summers +Summers's +Sumner +Sumner's +Sumter +Sumter's +Sunbeam +Sunbeam's +Sunbelt +Sunbelt's +Sundanese +Sundanese's +Sundas +Sundas's +Sunday +Sunday's +Sundays +Sung +Sung's +Sunkist +Sunkist's +Sunni +Sunni's +Sunnyvale +Sunnyvale's +Superbowl +Superbowl's +Superfund +Superfund's +Superglue +Superglue's +Superior +Superior's +Superman +Superman's +Surabaya +Surabaya's +Surat +Surat's +Surinam +Surinam's +Suriname +Suriname's +Surya +Surya's +Susan +Susan's +Susana +Susana's +Susanna +Susanna's +Susanne +Susanne's +Susie +Susie's +Susquehanna +Susquehanna's +Sussex +Sussex's +Sutherland +Sutherland's +Sutton +Sutton's +Suva +Suva's +Suwanee +Suwanee's +Suzanne +Suzanne's +Suzette +Suzette's +Suzhou +Suzhou's +Suzuki +Suzuki's +Suzy +Suzy's +Svalbard +Svalbard's +Sven +Sven's +Svengali +Svengali's +Swahili +Swahili's +Swahilis +Swammerdam +Swammerdam's +Swanee +Swanee's +Swansea +Swansea's +Swanson +Swanson's +Swazi +Swazi's +Swaziland +Swaziland's +Swede +Swede's +Sweden +Sweden's +Swedenborg +Swedenborg's +Swedes +Swedish +Swedish's +Sweeney +Sweeney's +Sweet +Sweet's +Swift +Swift's +Swinburne +Swinburne's +Swiss +Swiss's +Swissair +Swissair's +Swisses +Switzerland +Switzerland's +Sybil +Sybil's +Sydney +Sydney's +Sykes +Sykes's +Sylvester +Sylvester's +Sylvia +Sylvia's +Sylvie +Sylvie's +Synge +Synge's +Syracuse +Syracuse's +Syria +Syria's +Syriac +Syriac's +Syrian +Syrian's +Syrians +Szechuan +Szechuan's +Szilard +Szilard's +Szymborska +Szymborska's +Sèvres +Sèvres's +T +T'ang +T'ang's +T's +TWA +TWA's +Tabasco +Tabasco's +Tabatha +Tabatha's +Tabitha +Tabitha's +Tabriz +Tabriz's +Tacitus +Tacitus's +Tacoma +Tacoma's +Tad +Tad's +Tadzhik +Tadzhik's +Tadzhikistan +Tadzhikistan's +Taegu +Taegu's +Taejon +Taejon's +Taft +Taft's +Tagalog +Tagalog's +Tagore +Tagore's +Tagus +Tagus's +Tahiti +Tahiti's +Tahitian +Tahitian's +Tahitians +Tahoe +Tahoe's +Taichung +Taichung's +Taine +Taine's +Taipei +Taipei's +Taiping +Taiping's +Taiwan +Taiwan's +Taiwanese +Taiwanese's +Taiyuan +Taiyuan's +Tajikistan +Tajikistan's +Taklamakan +Taklamakan's +Talbot +Talbot's +Taliban +Taliban's +Taliesin +Taliesin's +Tallahassee +Tallahassee's +Tallchief +Tallchief's +Talley +Talley's +Talleyrand +Talleyrand's +Tallinn +Tallinn's +Talmud +Talmud's +Talmudic +Talmuds +Tamara +Tamara's +Tameka +Tameka's +Tamera +Tamera's +Tamerlane +Tamerlane's +Tami +Tami's +Tamika +Tamika's +Tamil +Tamil's +Tammany +Tammany's +Tammi +Tammi's +Tammie +Tammie's +Tammuz +Tammuz's +Tammy +Tammy's +Tampa +Tampa's +Tampax +Tampax's +Tamra +Tamra's +Tamworth +Tamworth's +Tancred +Tancred's +Taney +Taney's +Tanganyika +Tanganyika's +Tangiers +Tangiers's +Tangshan +Tangshan's +Tania +Tania's +Tanisha +Tanisha's +Tanner +Tanner's +Tannhäuser +Tannhäuser's +Tantalus +Tantalus's +Tanya +Tanya's +Tanzania +Tanzania's +Tanzanian +Tanzanian's +Tanzanians +Tao +Tao's +Taoism +Taoism's +Taoisms +Taoist +Taoist's +Taoists +Tara +Tara's +Tarantino +Tarantino's +Tarawa +Tarawa's +Tarazed +Tarazed's +Tarbell +Tarbell's +Target +Target's +Tarim +Tarim's +Tarkenton +Tarkenton's +Tarkington +Tarkington's +Tartar +Tartar's +Tartars +Tartary +Tartary's +Tartuffe +Tartuffe's +Tarzan +Tarzan's +Tasha +Tasha's +Tashkent +Tashkent's +Tasman +Tasman's +Tasmania +Tasmania's +Tasmanian +Tasmanian's +Tass +Tass's +Tatar +Tatar's +Tatars +Tate +Tate's +Tatum +Tatum's +Taurus +Taurus's +Tauruses +Tawney +Tawney's +Taylor +Taylor's +Tb +Tb's +Tbilisi +Tbilisi's +Tchaikovsky +Tchaikovsky's +Teasdale +Teasdale's +Technicolor +Technicolor's +Tecumseh +Tecumseh's +Ted +Ted's +Teddy +Teddy's +Teflon +Teflon's +Teflons +Tegucigalpa +Tegucigalpa's +Teheran +Teheran's +Tehran +TelePrompter +TelePrompter's +Telemachus +Telemachus's +Telemann +Telemann's +Teletype +Tell +Tell's +Teller +Teller's +Telugu +Telugu's +Tempe +Templar +Templar's +Tennessee +Tennessee's +Tennyson +Tennyson's +Tenochtitlan +Tenochtitlan's +TensorFlow +TensorFlow's +Teotihuacan +Teotihuacan's +Terence +Terence's +Teresa +Teresa's +Tereshkova +Tereshkova's +Teri +Teri's +Terkel +Terkel's +Terpsichore +Terpsichore's +Terr +Terr's +Terra +Terra's +Terran +Terran's +Terrance +Terrance's +Terrell +Terrell's +Terrence +Terrence's +Terri +Terri's +Terrie +Terrie's +Terry +Terry's +Tertiary +Tertiary's +Tesla +Tesla's +Tess +Tess's +Tessa +Tessa's +Tessie +Tessie's +Tet +Tet's +Tethys +Tethys's +Tetons +Tetons's +Teutonic +Teutonic's +Tevet +Tevet's +Texaco +Texaco's +Texan +Texan's +Texans +Texas +Texas's +Th +Th's +Thackeray +Thackeray's +Thad +Thad's +Thaddeus +Thaddeus's +Thai +Thai's +Thailand +Thailand's +Thais +Thales +Thales's +Thalia +Thalia's +Thames +Thames's +Thanh +Thanh's +Thanksgiving +Thanksgiving's +Thanksgivings +Thant +Thant's +Thar +Thar's +Tharp +Tharp's +Thatcher +Thatcher's +Thea +Thea's +Thebes +Thebes's +Theiler +Theiler's +Thelma +Thelma's +Themistocles +Themistocles's +Theocritus +Theocritus's +Theodora +Theodora's +Theodore +Theodore's +Theodoric +Theodoric's +Theodosius +Theodosius's +Theosophy +Theosophy's +Theravada +Theravada's +Theresa +Theresa's +Therese +Therese's +Thermopylae +Thermopylae's +Thermos +Theron +Theron's +Theseus +Theseus's +Thespian +Thespian's +Thespis +Thespis's +Thessalonian +Thessalonian's +Thessaloníki +Thessaloníki's +Thessaly +Thessaly's +Thieu +Thieu's +Thimbu +Thimbu's +Thomas +Thomas's +Thomism +Thomism's +Thomistic +Thomistic's +Thompson +Thompson's +Thomson +Thomson's +Thor +Thor's +Thorazine +Thorazine's +Thoreau +Thoreau's +Thornton +Thornton's +Thoroughbred +Thoroughbred's +Thorpe +Thorpe's +Thoth +Thoth's +Thrace +Thrace's +Thracian +Thracian's +Thucydides +Thucydides's +Thule +Thule's +Thunderbird +Thunderbird's +Thurber +Thurber's +Thurman +Thurman's +Thurmond +Thurmond's +Thursday +Thursday's +Thursdays +Thutmose +Thutmose's +Ti +Ti's +Tia +Tia's +Tianjin +Tianjin's +Tiber +Tiber's +Tiberius +Tiberius's +Tibet +Tibet's +Tibetan +Tibetan's +Tibetans +Ticketmaster +Ticketmaster's +Ticonderoga +Ticonderoga's +Tide +Tide's +Tienanmen +Tienanmen's +Tientsin +Tientsin's +Tiffany +Tiffany's +Tigris +Tigris's +Tijuana +Tijuana's +Tillich +Tillich's +Tillman +Tillman's +Tilsit +Tilsit's +Tim +Tim's +Timbuktu +Timbuktu's +Timex +Timex's +Timmy +Timmy's +Timon +Timon's +Timor +Timor's +Timothy +Timothy's +Timur +Timur's +Timurid +Timurid's +Tina +Tina's +Ting +Ting's +Tinkerbell +Tinkerbell's +Tinkertoy +Tinkertoy's +Tinseltown +Tinseltown's +Tintoretto +Tintoretto's +Tippecanoe +Tippecanoe's +Tipperary +Tipperary's +Tirana +Tirana's +Tiresias +Tiresias's +Tisha +Tisha's +Tishri +Tishri's +Titan +Titan's +Titania +Titania's +Titanic +Titanic's +Titian +Titian's +Titicaca +Titicaca's +Tito +Tito's +Titus +Titus's +Tlaloc +Tlaloc's +Tlingit +Tlingit's +Tobago +Tobago's +Toby +Toby's +Tocantins +Tocantins's +Tocqueville +Tocqueville's +Tod +Tod's +Todd +Todd's +Togo +Togo's +Tojo +Tojo's +Tokay +Tokay's +Tokugawa +Tokugawa's +Tokyo +Tokyo's +Toledo +Toledo's +Toledos +Tolkien +Tolkien's +Tolstoy +Tolstoy's +Toltec +Toltec's +Tolyatti +Tolyatti's +Tom +Tom's +Tomas +Tomas's +Tombaugh +Tombaugh's +Tomlin +Tomlin's +Tommie +Tommie's +Tommy +Tommy's +Tompkins +Tompkins's +Tomsk +Tomsk's +Tonga +Tonga's +Tongan +Tongan's +Tongans +Toni +Toni's +Tonia +Tonia's +Tonto +Tonto's +Tony +Tony's +Tonya +Tonya's +Topeka +Topeka's +Topsy +Topsy's +Torah +Torah's +Torahs +Tories +Toronto +Toronto's +Torquemada +Torquemada's +Torrance +Torrance's +Torrens +Torrens's +Torres +Torres's +Torricelli +Torricelli's +Tortola +Tortola's +Tortuga +Tortuga's +Torvalds +Torvalds's +Tory +Tory's +Tosca +Tosca's +Toscanini +Toscanini's +Toshiba +Toshiba's +Toto +Toto's +Toulouse +Toulouse's +Townes +Townes's +Townsend +Townsend's +Toynbee +Toynbee's +Toyoda +Toyoda's +Toyota +Toyota's +Tracey +Tracey's +Traci +Traci's +Tracie +Tracie's +Tracy +Tracy's +Trafalgar +Trafalgar's +Trailways +Trailways's +Trajan +Trajan's +Tran +Tran's +Transcaucasia +Transcaucasia's +Transvaal +Transvaal's +Transylvania +Transylvania's +Trappist +Trappist's +Travis +Travis's +Travolta +Travolta's +Treasuries +Treasury +Treasury's +Treblinka +Treblinka's +Trekkie +Trekkie's +Trent +Trent's +Trenton +Trenton's +Trevelyan +Trevelyan's +Trevino +Trevino's +Trevor +Trevor's +Trey +Trey's +Triangulum +Triangulum's +Triassic +Triassic's +Tricia +Tricia's +Trident +Trident's +Trieste +Trieste's +Trimurti +Trimurti's +Trina +Trina's +Trinidad +Trinidad's +Trinities +Trinity +Trinity's +Tripitaka +Tripitaka's +Tripoli +Tripoli's +Trippe +Trippe's +Trisha +Trisha's +Tristan +Tristan's +Triton +Triton's +Trobriand +Trobriand's +Troilus +Troilus's +Trojan +Trojan's +Trojans +Trollope +Trollope's +Trondheim +Trondheim's +Tropicana +Tropicana's +Trotsky +Trotsky's +Troy +Troy's +Troyes +Truckee +Truckee's +Trudeau +Trudeau's +Trudy +Trudy's +Truffaut +Truffaut's +Trujillo +Trujillo's +Truman +Truman's +Trumbull +Trumbull's +Trump +Trump's +Truth +Truth's +Tsimshian +Tsimshian's +Tsingtao +Tsingtao's +Tsiolkovsky +Tsiolkovsky's +Tsitsihar +Tsitsihar's +Tsongkhapa +Tsongkhapa's +Tswana +Tswana's +Tuamotu +Tuamotu's +Tuareg +Tuareg's +Tubman +Tubman's +Tucker +Tucker's +Tucson +Tucson's +Tucuman +Tucuman's +Tudor +Tudor's +Tuesday +Tuesday's +Tuesdays +Tulane +Tulane's +Tull +Tull's +Tulsa +Tulsa's +Tulsidas +Tulsidas's +Tums +Tums's +Tungus +Tungus's +Tunguska +Tunguska's +Tunis +Tunis's +Tunisia +Tunisia's +Tunisian +Tunisian's +Tunisians +Tunney +Tunney's +Tupi +Tupi's +Tupperware +Tupperware's +Tupungato +Tupungato's +Turgenev +Turgenev's +Turin +Turin's +Turing +Turing's +Turk +Turk's +Turkestan +Turkestan's +Turkey +Turkey's +Turkish +Turkish's +Turkmenistan +Turkmenistan's +Turks +Turner +Turner's +Turpin +Turpin's +Tuscaloosa +Tuscaloosa's +Tuscan +Tuscan's +Tuscany +Tuscany's +Tuscarora +Tuscarora's +Tuscon +Tuscon's +Tuskegee +Tuskegee's +Tussaud +Tussaud's +Tut +Tut's +Tutankhamen +Tutankhamen's +Tutsi +Tutsi's +Tutu +Tutu's +Tuvalu +Tuvalu's +Twain +Twain's +Tweed +Tweed's +Tweedledee +Tweedledee's +Tweedledum +Tweedledum's +Twila +Twila's +Twinkies +Twinkies's +Twitter +Twitter's +Twizzlers +Twizzlers's +Ty +Ty's +Tycho +Tycho's +Tylenol +Tylenol's +Tyler +Tyler's +Tyndale +Tyndale's +Tyndall +Tyndall's +Tyre +Tyre's +Tyree +Tyree's +Tyrone +Tyrone's +Tyson +Tyson's +U +U's +UBS +UBS's +UCLA +UCLA's +UPS +UPS's +Ubangi +Ubangi's +Ubuntu +Ubuntu's +Ucayali +Ucayali's +Uccello +Uccello's +Udall +Udall's +Ufa +Ufa's +Uganda +Uganda's +Ugandan +Ugandan's +Ugandans +Uighur +Uighur's +Ujungpandang +Ujungpandang's +Ukraine +Ukraine's +Ukrainian +Ukrainian's +Ukrainians +Ulster +Ulster's +Ultrasuede +Ultrasuede's +Ulyanovsk +Ulyanovsk's +Ulysses +Ulysses's +Umbriel +Umbriel's +Underwood +Underwood's +Ungava +Ungava's +Unicode +Unicode's +Unilever +Unilever's +Union +Union's +Unions +Uniroyal +Uniroyal's +Unitarian +Unitarian's +Unitarianism +Unitarianism's +Unitarianisms +Unitarians +Unitas +Unitas's +Unukalhai +Unukalhai's +Upanishads +Upanishads's +Updike +Updike's +Upjohn +Upjohn's +Upton +Upton's +Ur +Ur's +Ural +Ural's +Urals +Urals's +Urania +Urania's +Uranus +Uranus's +Urban +Urban's +Urdu +Urdu's +Urey +Urey's +Uriah +Uriah's +Uriel +Uriel's +Uris +Uris's +Urquhart +Urquhart's +Ursa +Ursa's +Ursula +Ursula's +Ursuline +Ursuline's +Uruguay +Uruguay's +Uruguayan +Uruguayan's +Uruguayans +Urumqi +Urumqi's +Usenet +Usenet's +Ustinov +Ustinov's +Utah +Utah's +Ute +Ute's +Utopia +Utopia's +Utopian +Utopian's +Utopians +Utopias +Utrecht +Utrecht's +Utrillo +Utrillo's +Uzbek +Uzbek's +Uzbekistan +Uzbekistan's +Uzi +Uzi's +V +V's +VBA +VBA's +Vader +Vader's +Vaduz +Vaduz's +Val +Val's +Valarie +Valarie's +Valdez +Valdez's +Valencia +Valencia's +Valenti +Valenti's +Valentin +Valentin's +Valentine +Valentine's +Valentino +Valentino's +Valenzuela +Valenzuela's +Valeria +Valeria's +Valerian +Valerian's +Valerie +Valerie's +Valhalla +Valhalla's +Valium +Valium's +Valiums +Valkyrie +Valkyrie's +Valkyries +Valletta +Valletta's +Valois +Valois's +Valparaiso +Valparaiso's +Valvoline +Valvoline's +Valéry +Valéry's +Van +Van's +Vance +Vance's +Vancouver +Vancouver's +Vandal +Vandal's +Vanderbilt +Vanderbilt's +Vandyke +Vandyke's +Vanessa +Vanessa's +Vang +Vang's +Vanuatu +Vanuatu's +Vanzetti +Vanzetti's +Varanasi +Varanasi's +Varese +Varese's +Vargas +Vargas's +Vaseline +Vaseline's +Vaselines +Vasquez +Vasquez's +Vassar +Vassar's +Vatican +Vatican's +Vauban +Vauban's +Vaughan +Vaughan's +Vaughn +Vaughn's +Vazquez +Vazquez's +Veblen +Veblen's +Veda +Veda's +Vedanta +Vedanta's +Vedas +Vega +Vega's +Vegas +Vegas's +Vegemite +Vegemite's +Vela +Vela's +Velcro +Velcro's +Velcros +Velez +Velez's +Velma +Velma's +Velveeta +Velveeta's +Velásquez +Velásquez's +Velázquez +Velázquez's +Venetian +Venetian's +Venetians +Venezuela +Venezuela's +Venezuelan +Venezuelan's +Venezuelans +Venice +Venice's +Venn +Venn's +Ventolin +Ventolin's +Venus +Venus's +Venuses +Venusian +Venusian's +Vera +Vera's +Veracruz +Veracruz's +Verde +Verde's +Verdi +Verdi's +Verdun +Verdun's +Vergil +Vergil's +Verizon +Verizon's +Verlaine +Verlaine's +Vermeer +Vermeer's +Vermont +Vermont's +Vermonter +Vermonter's +Vern +Vern's +Verna +Verna's +Verne +Verne's +Vernon +Vernon's +Verona +Verona's +Veronese +Veronese's +Veronica +Veronica's +Versailles +Versailles's +Vesalius +Vesalius's +Vespasian +Vespasian's +Vespucci +Vespucci's +Vesta +Vesta's +Vesuvius +Vesuvius's +Viacom +Viacom's +Viagra +Viagra's +Vicente +Vicente's +Vichy +Vichy's +Vicki +Vicki's +Vickie +Vickie's +Vicksburg +Vicksburg's +Vicky +Vicky's +Victor +Victor's +Victoria +Victoria's +Victorian +Victorian's +Victorians +Victrola +Victrola's +Vidal +Vidal's +Vienna +Vienna's +Viennese +Viennese's +Vientiane +Vientiane's +Vietcong +Vietcong's +Vietminh +Vietminh's +Vietnam +Vietnam's +Vietnamese +Vietnamese's +Vijayanagar +Vijayanagar's +Vijayawada +Vijayawada's +Viking +Viking's +Vikings +Vila +Vila's +Villa +Villa's +Villarreal +Villarreal's +Villon +Villon's +Vilma +Vilma's +Vilnius +Vilnius's +Vilyui +Vilyui's +Vince +Vince's +Vincent +Vincent's +Vindemiatrix +Vindemiatrix's +Vinson +Vinson's +Viola +Viola's +Violet +Violet's +Virgie +Virgie's +Virgil +Virgil's +Virginia +Virginia's +Virginian +Virginian's +Virginians +Virgo +Virgo's +Virgos +Visa +Visa's +Visakhapatnam +Visakhapatnam's +Visayans +Visayans's +Vishnu +Vishnu's +Visigoth +Visigoth's +Vistula +Vistula's +Vitim +Vitim's +Vito +Vito's +Vitus +Vitus's +Vivaldi +Vivaldi's +Vivekananda +Vivekananda's +Vivian +Vivian's +Vivienne +Vivienne's +Vlad +Vlad's +Vladimir +Vladimir's +Vladivostok +Vladivostok's +Vlaminck +Vlaminck's +Vlasic +Vlasic's +VoIP +Vogue +Vogue's +Volcker +Volcker's +Voldemort +Voldemort's +Volga +Volga's +Volgograd +Volgograd's +Volkswagen +Volkswagen's +Volstead +Volstead's +Volta +Volta's +Voltaire +Voltaire's +Volvo +Volvo's +Vonda +Vonda's +Vonnegut +Vonnegut's +Voronezh +Voronezh's +Vorster +Vorster's +Voyager +Voyager's +Vuitton +Vuitton's +Vulcan +Vulcan's +Vulgate +Vulgate's +Vulgates +W +W's +Wabash +Wabash's +Waco +Waco's +Wade +Wade's +Wagner +Wagner's +Wagnerian +Wagnerian's +Wahhabi +Wahhabi's +Waikiki +Waikiki's +Waite +Waite's +Wake +Wake's +Waksman +Waksman's +Wald +Wald's +Waldemar +Waldemar's +Walden +Walden's +Waldensian +Waldensian's +Waldheim +Waldheim's +Waldo +Waldo's +Waldorf +Waldorf's +Wales +Wales's +Walesa +Walesa's +Walgreen +Walgreen's +Walgreens +Walgreens's +Walker +Walker's +Walkman +Walkman's +Wall +Wall's +Wallace +Wallace's +Wallenstein +Wallenstein's +Waller +Waller's +Wallis +Wallis's +Walloon +Walloon's +Walls +Walls's +Walmart +Walmart's +Walpole +Walpole's +Walpurgisnacht +Walpurgisnacht's +Walsh +Walsh's +Walt +Walt's +Walter +Walter's +Walters +Walters's +Walton +Walton's +Wanamaker +Wanamaker's +Wanda +Wanda's +Wang +Wang's +Wankel +Wankel's +Ward +Ward's +Ware +Ware's +Warhol +Warhol's +Waring +Waring's +Warner +Warner's +Warren +Warren's +Warsaw +Warsaw's +Warwick +Warwick's +Wasatch +Wasatch's +Washington +Washington's +Washingtonian +Washingtonian's +Washingtonians +Wasp +Wassermann +Wassermann's +Waterbury +Waterbury's +Waterford +Waterford's +Watergate +Watergate's +Waterloo +Waterloo's +Waterloos +Waters +Waters's +Watkins +Watkins's +Watson +Watson's +Watt +Watt's +Watteau +Watteau's +Watts +Watts's +Watusi +Watusi's +Waugh +Waugh's +Wayne +Wayne's +Weaver +Weaver's +Webb +Webb's +Weber +Weber's +Webern +Webern's +Webster +Webster's +Websters +Weddell +Weddell's +Wedgwood +Wedgwood's +Wednesday +Wednesday's +Wednesdays +Weeks +Weeks's +Wehrmacht +Wehrmacht's +Wei +Wei's +Weierstrass +Weierstrass's +Weill +Weill's +Weinberg +Weinberg's +Weiss +Weiss's +Weissmuller +Weissmuller's +Weizmann +Weizmann's +Welch +Welch's +Weldon +Weldon's +Welland +Welland's +Weller +Weller's +Welles +Welles's +Wellington +Wellington's +Wellingtons +Wells +Wells's +Welsh +Welsh's +Welshman +Welshman's +Welshmen +Welshmen's +Wendell +Wendell's +Wendi +Wendi's +Wendy +Wendy's +Wesak +Wesak's +Wesley +Wesley's +Wesleyan +Wesleyan's +Wessex +Wessex's +Wesson +Wesson's +West +West's +Western +Western's +Westerner +Westerns +Westinghouse +Westinghouse's +Westminster +Westminster's +Weston +Weston's +Westphalia +Westphalia's +Wests +Weyden +Weyden's +Wezen +Wezen's +Wharton +Wharton's +Wheaties +Wheaties's +Wheatstone +Wheatstone's +Wheeler +Wheeler's +Wheeling +Wheeling's +Whig +Whig's +Whigs +Whipple +Whipple's +Whirlpool +Whirlpool's +Whistler +Whistler's +Whitaker +Whitaker's +White +White's +Whitefield +Whitefield's +Whitehall +Whitehall's +Whitehead +Whitehead's +Whitehorse +Whitehorse's +Whiteley +Whiteley's +Whites +Whitfield +Whitfield's +Whitley +Whitley's +Whitman +Whitman's +Whitney +Whitney's +Whitsunday +Whitsunday's +Whitsundays +Whittier +Whittier's +WiFi +Wicca +Wicca's +Wichita +Wichita's +Wiemar +Wiemar's +Wiesel +Wiesel's +Wiesenthal +Wiesenthal's +Wiggins +Wiggins's +Wigner +Wigner's +Wii +Wii's +Wikileaks +Wikipedia +Wikipedia's +Wilberforce +Wilberforce's +Wilbert +Wilbert's +Wilbur +Wilbur's +Wilburn +Wilburn's +Wilcox +Wilcox's +Wilda +Wilda's +Wilde +Wilde's +Wilder +Wilder's +Wiles +Wiles's +Wiley +Wiley's +Wilford +Wilford's +Wilfred +Wilfred's +Wilfredo +Wilfredo's +Wilhelm +Wilhelm's +Wilhelmina +Wilhelmina's +Wilkerson +Wilkerson's +Wilkes +Wilkes's +Wilkins +Wilkins's +Wilkinson +Wilkinson's +Will +Will's +Willa +Willa's +Willamette +Willamette's +Willard +Willard's +Willemstad +Willemstad's +William +William's +Williams +Williams's +Williamson +Williamson's +Willie +Willie's +Willis +Willis's +Willy +Willy's +Wilma +Wilma's +Wilmer +Wilmer's +Wilmington +Wilmington's +Wilson +Wilson's +Wilsonian +Wilsonian's +Wilton +Wilton's +Wimbledon +Wimbledon's +Wimsey +Wimsey's +Winchell +Winchell's +Winchester +Winchester's +Windbreaker +Windbreaker's +Windex +Windex's +Windhoek +Windhoek's +Windows +Windows's +Windsor +Windsor's +Windsors +Windward +Windward's +Winesap +Winesap's +Winfred +Winfred's +Winfrey +Winfrey's +Winifred +Winifred's +Winkle +Winkle's +Winnebago +Winnebago's +Winnie +Winnie's +Winnipeg +Winnipeg's +Winston +Winston's +Winters +Winters's +Winthrop +Winthrop's +Wisconsin +Wisconsin's +Wisconsinite +Wisconsinite's +Wisconsinites +Wise +Wise's +Witt +Witt's +Wittgenstein +Wittgenstein's +Witwatersrand +Witwatersrand's +Wm +Wm's +Wobegon +Wobegon's +Wodehouse +Wodehouse's +Wolf +Wolf's +Wolfe +Wolfe's +Wolff +Wolff's +Wolfgang +Wolfgang's +Wollongong +Wollongong's +Wollstonecraft +Wollstonecraft's +Wolsey +Wolsey's +Wonder +Wonder's +Wonderbra +Wonderbra's +Wong +Wong's +Wood +Wood's +Woodard +Woodard's +Woodhull +Woodhull's +Woodrow +Woodrow's +Woods +Woods's +Woodstock +Woodstock's +Woodward +Woodward's +Woolf +Woolf's +Woolite +Woolite's +Woolongong +Woolongong's +Woolworth +Woolworth's +Wooster +Wooster's +Wooten +Wooten's +Worcester +Worcester's +Worcesters +Worcestershire +Worcestershire's +WordPress +WordPress's +Wordsworth +Wordsworth's +Workman +Workman's +Worms +Worms's +Wotan +Wotan's +Wovoka +Wovoka's +Wozniak +Wozniak's +Wozzeck +Wozzeck's +Wrangell +Wrangell's +Wren +Wren's +Wright +Wright's +Wrigley +Wrigley's +Wroclaw +Wroclaw's +Wu +Wu's +Wuhan +Wuhan's +Wurlitzer +Wurlitzer's +Wyatt +Wyatt's +Wycherley +Wycherley's +Wycliffe +Wycliffe's +Wyeth +Wyeth's +Wylie +Wylie's +Wynn +Wynn's +Wyoming +Wyoming's +Wyomingite +Wyomingite's +Wyomingites +X +X's +XEmacs +XEmacs's +Xamarin +Xamarin's +Xanadu +Xanadu's +Xanthippe +Xanthippe's +Xavier +Xavier's +Xe +Xe's +Xenakis +Xenakis's +Xenia +Xenia's +Xenophon +Xenophon's +Xerox +Xerox's +Xeroxes +Xerxes +Xerxes's +Xhosa +Xhosa's +Xi'an +Xi'an's +Xiaoping +Xiaoping's +Ximenes +Ximenes's +Xingu +Xingu's +Xiongnu +Xiongnu's +Xmas +Xmas's +Xmases +Xochipilli +Xochipilli's +Xuzhou +Xuzhou's +Y +Y's +Yacc +Yacc's +Yahoo +Yahoo's +Yahtzee +Yahtzee's +Yahweh +Yahweh's +Yakima +Yakima's +Yakut +Yakut's +Yakutsk +Yakutsk's +Yale +Yale's +Yalow +Yalow's +Yalta +Yalta's +Yalu +Yalu's +Yamagata +Yamagata's +Yamaha +Yamaha's +Yamoussoukro +Yamoussoukro's +Yang +Yang's +Yangon +Yangon's +Yangtze +Yangtze's +Yank +Yank's +Yankee +Yankee's +Yankees +Yanks +Yaobang +Yaobang's +Yaounde +Yaounde's +Yaqui +Yaqui's +Yaroslavl +Yaroslavl's +Yataro +Yataro's +Yates +Yates's +Yeager +Yeager's +Yeats +Yeats's +Yekaterinburg +Yekaterinburg's +Yellowknife +Yellowknife's +Yellowstone +Yellowstone's +Yeltsin +Yeltsin's +Yemen +Yemen's +Yemeni +Yemeni's +Yemenis +Yenisei +Yenisei's +Yerevan +Yerevan's +Yerkes +Yerkes's +Yesenia +Yesenia's +Yevtushenko +Yevtushenko's +Yggdrasil +Yggdrasil's +Yiddish +Yiddish's +Ymir +Ymir's +Yoda +Yoda's +Yoknapatawpha +Yoknapatawpha's +Yoko +Yoko's +Yokohama +Yokohama's +Yolanda +Yolanda's +Yong +Yong's +Yonkers +Yonkers's +York +York's +Yorkie +Yorkie's +Yorkshire +Yorkshire's +Yorktown +Yorktown's +Yoruba +Yoruba's +Yosemite +Yosemite's +Yossarian +Yossarian's +YouTube +YouTube's +Young +Young's +Youngstown +Youngstown's +Ypres +Ypres's +Ypsilanti +Ypsilanti's +Yuan +Yuan's +Yucatan +Yucatan's +Yugoslav +Yugoslav's +Yugoslavia +Yugoslavia's +Yugoslavian +Yugoslavian's +Yugoslavians +Yukon +Yukon's +Yule +Yule's +Yules +Yuletide +Yuletide's +Yuletides +Yunnan +Yunnan's +Yuri +Yuri's +Yves +Yves's +Yvette +Yvette's +Yvonne +Yvonne's +Z +Z's +Zachariah +Zachariah's +Zachary +Zachary's +Zachery +Zachery's +Zagreb +Zagreb's +Zaire +Zaire's +Zairian +Zambezi +Zambezi's +Zambia +Zambia's +Zambian +Zambian's +Zambians +Zamboni +Zamboni's +Zamenhof +Zamenhof's +Zamora +Zamora's +Zane +Zane's +Zanuck +Zanuck's +Zanzibar +Zanzibar's +Zapata +Zapata's +Zaporozhye +Zaporozhye's +Zapotec +Zapotec's +Zappa +Zappa's +Zara +Zara's +Zealand +Zealand's +Zebedee +Zebedee's +Zechariah +Zechariah's +Zedekiah +Zedekiah's +Zedong +Zedong's +Zeffirelli +Zeffirelli's +Zeke +Zeke's +Zelig +Zelig's +Zelma +Zelma's +Zen +Zen's +Zenger +Zenger's +Zeno +Zeno's +Zens +Zephaniah +Zephaniah's +Zephyrus +Zephyrus's +Zeppelin +Zeppelin's +Zest +Zest's +Zeus +Zeus's +Zhengzhou +Zhengzhou's +Zhivago +Zhivago's +Zhukov +Zhukov's +Zibo +Zibo's +Ziegfeld +Ziegfeld's +Ziegler +Ziegler's +Ziggy +Ziggy's +Zimbabwe +Zimbabwe's +Zimbabwean +Zimbabwean's +Zimbabweans +Zimmerman +Zimmerman's +Zinfandel +Zinfandel's +Zion +Zion's +Zionism +Zionism's +Zionisms +Zionist +Zionist's +Zionists +Zions +Ziploc +Ziploc's +Zn +Zn's +Zoe +Zoe's +Zola +Zola's +Zollverein +Zollverein's +Zoloft +Zoloft's +Zomba +Zomba's +Zorn +Zorn's +Zoroaster +Zoroaster's +Zoroastrian +Zoroastrian's +Zoroastrianism +Zoroastrianism's +Zoroastrianisms +Zorro +Zorro's +Zosma +Zosma's +Zr +Zr's +Zsigmondy +Zsigmondy's +Zubenelgenubi +Zubenelgenubi's +Zubeneschamali +Zubeneschamali's +Zukor +Zukor's +Zulu +Zulu's +Zulus +Zuni +Zuni's +Zwingli +Zwingli's +Zworykin +Zworykin's +Zyrtec +Zyrtec's +Zyuganov +Zyuganov's +Zürich +Zürich's +a +aardvark +aardvark's +aardvarks +abaci +aback +abacus +abacus's +abacuses +abaft +abalone +abalone's +abalones +abandon +abandoned +abandoning +abandonment +abandonment's +abandons +abase +abased +abasement +abasement's +abases +abash +abashed +abashes +abashing +abasing +abate +abated +abatement +abatement's +abates +abating +abattoir +abattoir's +abattoirs +abbess +abbess's +abbesses +abbey +abbey's +abbeys +abbot +abbot's +abbots +abbreviate +abbreviated +abbreviates +abbreviating +abbreviation +abbreviation's +abbreviations +abbé +abbé's +abbés +abdicate +abdicated +abdicates +abdicating +abdication +abdication's +abdications +abdomen +abdomen's +abdomens +abdominal +abduct +abducted +abductee +abductee's +abductees +abducting +abduction +abduction's +abductions +abductor +abductor's +abductors +abducts +abeam +abed +aberrant +aberration +aberration's +aberrations +abet +abets +abetted +abetter +abetter's +abetters +abetting +abettor +abettor's +abettors +abeyance +abeyance's +abhor +abhorred +abhorrence +abhorrence's +abhorrent +abhorring +abhors +abide +abided +abides +abiding +abilities +ability +ability's +abject +abjectly +abjuration +abjuration's +abjurations +abjure +abjured +abjures +abjuring +ablative +ablative's +ablatives +ablaze +able +abler +ablest +abloom +ablution +ablution's +ablutions +ably +abnegate +abnegated +abnegates +abnegating +abnegation +abnegation's +abnormal +abnormalities +abnormality +abnormality's +abnormally +aboard +abode +abode's +abodes +abolish +abolished +abolishes +abolishing +abolition +abolition's +abolitionist +abolitionist's +abolitionists +abominable +abominably +abominate +abominated +abominates +abominating +abomination +abomination's +abominations +aboriginal +aboriginal's +aboriginals +aborigine +aborigine's +aborigines +abort +aborted +aborting +abortion +abortion's +abortionist +abortionist's +abortionists +abortions +abortive +aborts +abound +abounded +abounding +abounds +about +above +above's +aboveboard +abracadabra +abracadabra's +abrade +abraded +abrades +abrading +abrasion +abrasion's +abrasions +abrasive +abrasive's +abrasively +abrasiveness +abrasiveness's +abrasives +abreast +abridge +abridged +abridgement +abridgement's +abridgements +abridges +abridging +abridgment +abridgment's +abridgments +abroad +abrogate +abrogated +abrogates +abrogating +abrogation +abrogation's +abrogations +abrupt +abrupter +abruptest +abruptly +abruptness +abruptness's +abscess +abscess's +abscessed +abscesses +abscessing +abscissa +abscissa's +abscissae +abscissas +abscond +absconded +absconding +absconds +absence +absence's +absences +absent +absented +absentee +absentee's +absenteeism +absenteeism's +absentees +absenting +absently +absents +absinth +absinth's +absinthe +absinthe's +absolute +absolute's +absolutely +absolutes +absolutest +absolution +absolution's +absolutism +absolutism's +absolve +absolved +absolves +absolving +absorb +absorbed +absorbency +absorbency's +absorbent +absorbent's +absorbents +absorbing +absorbs +absorption +absorption's +abstain +abstained +abstainer +abstainer's +abstainers +abstaining +abstains +abstemious +abstention +abstention's +abstentions +abstinence +abstinence's +abstinent +abstract +abstract's +abstracted +abstractedly +abstracting +abstraction +abstraction's +abstractions +abstractly +abstractness +abstractness's +abstractnesses +abstracts +abstruse +abstrusely +abstruseness +abstruseness's +absurd +absurder +absurdest +absurdities +absurdity +absurdity's +absurdly +abundance +abundance's +abundances +abundant +abundantly +abuse +abuse's +abused +abuser +abuser's +abusers +abuses +abusing +abusive +abusively +abusiveness +abusiveness's +abut +abutment +abutment's +abutments +abuts +abutted +abutting +abuzz +abysmal +abysmally +abyss +abyss's +abysses +acacia +acacia's +acacias +academia +academia's +academic +academic's +academical +academically +academician +academician's +academicians +academics +academies +academy +academy's +acanthi +acanthus +acanthus's +acanthuses +accede +acceded +accedes +acceding +accelerate +accelerated +accelerates +accelerating +acceleration +acceleration's +accelerations +accelerator +accelerator's +accelerators +accent +accent's +accented +accenting +accents +accentuate +accentuated +accentuates +accentuating +accentuation +accentuation's +accept +acceptability +acceptability's +acceptable +acceptably +acceptance +acceptance's +acceptances +accepted +accepting +accepts +access +access's +accessed +accesses +accessibility +accessibility's +accessible +accessibly +accessing +accession +accession's +accessioned +accessioning +accessions +accessories +accessory +accessory's +accident +accident's +accidental +accidental's +accidentally +accidentals +accidents +acclaim +acclaim's +acclaimed +acclaiming +acclaims +acclamation +acclamation's +acclimate +acclimated +acclimates +acclimating +acclimation +acclimation's +acclimatization +acclimatization's +acclimatize +acclimatized +acclimatizes +acclimatizing +accolade +accolade's +accolades +accommodate +accommodated +accommodates +accommodating +accommodation +accommodation's +accommodations +accompanied +accompanies +accompaniment +accompaniment's +accompaniments +accompanist +accompanist's +accompanists +accompany +accompanying +accomplice +accomplice's +accomplices +accomplish +accomplished +accomplishes +accomplishing +accomplishment +accomplishment's +accomplishments +accord +accord's +accordance +accordance's +accorded +according +accordingly +accordion +accordion's +accordions +accords +accost +accost's +accosted +accosting +accosts +account +account's +accountability +accountability's +accountable +accountancy +accountancy's +accountant +accountant's +accountants +accounted +accounting +accounting's +accounts +accouterments +accouterments's +accoutrements +accredit +accreditation +accreditation's +accredited +accrediting +accredits +accretion +accretion's +accretions +accrual +accrual's +accruals +accrue +accrued +accrues +accruing +acculturation +acculturation's +accumulate +accumulated +accumulates +accumulating +accumulation +accumulation's +accumulations +accumulative +accumulator +accuracy +accuracy's +accurate +accurately +accurateness +accurateness's +accursed +accurst +accusation +accusation's +accusations +accusative +accusative's +accusatives +accusatory +accuse +accused +accuser +accuser's +accusers +accuses +accusing +accusingly +accustom +accustomed +accustoming +accustoms +ace +ace's +aced +acerbic +acerbity +acerbity's +aces +acetaminophen +acetaminophen's +acetate +acetate's +acetates +acetic +acetone +acetone's +acetylene +acetylene's +ache +ache's +ached +aches +achier +achiest +achievable +achieve +achieved +achievement +achievement's +achievements +achiever +achiever's +achievers +achieves +achieving +aching +achoo +achoo's +achromatic +achy +acid +acid's +acidic +acidified +acidifies +acidify +acidifying +acidity +acidity's +acidly +acids +acidulous +acing +acknowledge +acknowledged +acknowledgement +acknowledgement's +acknowledgements +acknowledges +acknowledging +acknowledgment +acknowledgment's +acknowledgments +acme +acme's +acmes +acne +acne's +acolyte +acolyte's +acolytes +aconite +aconite's +aconites +acorn +acorn's +acorns +acoustic +acoustical +acoustically +acoustics +acoustics's +acquaint +acquaintance +acquaintance's +acquaintances +acquainted +acquainting +acquaints +acquiesce +acquiesced +acquiescence +acquiescence's +acquiescent +acquiesces +acquiescing +acquirable +acquire +acquired +acquirement +acquirement's +acquires +acquiring +acquisition +acquisition's +acquisitions +acquisitive +acquisitiveness +acquisitiveness's +acquit +acquits +acquittal +acquittal's +acquittals +acquitted +acquitting +acre +acre's +acreage +acreage's +acreages +acres +acrid +acrider +acridest +acrimonious +acrimony +acrimony's +acrobat +acrobat's +acrobatic +acrobatics +acrobatics's +acrobats +acronym +acronym's +acronyms +across +acrostic +acrostic's +acrostics +acrylic +acrylic's +acrylics +act +act's +acted +acting +acting's +actinium +actinium's +action +action's +actionable +actions +activate +activated +activates +activating +activation +activation's +active +active's +actively +actives +activism +activism's +activist +activist's +activists +activities +activity +activity's +actor +actor's +actors +actress +actress's +actresses +acts +actual +actualities +actuality +actuality's +actualization +actualization's +actualize +actualized +actualizes +actualizing +actually +actuarial +actuaries +actuary +actuary's +actuate +actuated +actuates +actuating +actuator +actuator's +actuators +acuity +acuity's +acumen +acumen's +acupuncture +acupuncture's +acupuncturist +acupuncturist's +acupuncturists +acute +acute's +acutely +acuteness +acuteness's +acuter +acutes +acutest +ad +ad's +adage +adage's +adages +adagio +adagio's +adagios +adamant +adamant's +adamantly +adapt +adaptability +adaptability's +adaptable +adaptation +adaptation's +adaptations +adapted +adapter +adapter's +adapters +adapting +adaptive +adaptor +adaptor's +adaptors +adapts +add +added +addend +addend's +addenda +addends +addendum +addendum's +addendums +adder +adder's +adders +addict +addict's +addicted +addicting +addiction +addiction's +addictions +addictive +addicts +adding +addition +addition's +additional +additionally +additions +additive +additive's +additives +addle +addled +addles +addling +address +address's +addressable +addressed +addressee +addressee's +addressees +addresses +addressing +adds +adduce +adduced +adduces +adducing +adenoid +adenoid's +adenoidal +adenoids +adept +adept's +adeptly +adeptness +adeptness's +adepts +adequacy +adequacy's +adequate +adequately +adhere +adhered +adherence +adherence's +adherent +adherent's +adherents +adheres +adhering +adhesion +adhesion's +adhesive +adhesive's +adhesives +adiabatic +adieu +adieu's +adieus +adieux +adipose +adiós +adjacent +adjacently +adjectival +adjectivally +adjective +adjective's +adjectives +adjoin +adjoined +adjoining +adjoins +adjourn +adjourned +adjourning +adjournment +adjournment's +adjournments +adjourns +adjudge +adjudged +adjudges +adjudging +adjudicate +adjudicated +adjudicates +adjudicating +adjudication +adjudication's +adjudicator +adjudicator's +adjudicators +adjunct +adjunct's +adjuncts +adjuration +adjuration's +adjurations +adjure +adjured +adjures +adjuring +adjust +adjustable +adjusted +adjuster +adjuster's +adjusters +adjusting +adjustment +adjustment's +adjustments +adjustor +adjustor's +adjustors +adjusts +adjutant +adjutant's +adjutants +adman +adman's +admen +administer +administered +administering +administers +administrate +administrated +administrates +administrating +administration +administration's +administrations +administrative +administratively +administrator +administrator's +administrators +admirable +admirably +admiral +admiral's +admirals +admiralty +admiralty's +admiration +admiration's +admire +admired +admirer +admirer's +admirers +admires +admiring +admiringly +admissibility +admissibility's +admissible +admission +admission's +admissions +admit +admits +admittance +admittance's +admitted +admittedly +admitting +admixture +admixture's +admixtures +admonish +admonished +admonishes +admonishing +admonishment +admonishment's +admonishments +admonition +admonition's +admonitions +admonitory +ado +ado's +adobe +adobe's +adobes +adolescence +adolescence's +adolescences +adolescent +adolescent's +adolescents +adopt +adopted +adopting +adoption +adoption's +adoptions +adoptive +adopts +adorable +adorably +adoration +adoration's +adore +adored +adores +adoring +adoringly +adorn +adorned +adorning +adornment +adornment's +adornments +adorns +adrenal +adrenal's +adrenaline +adrenaline's +adrenals +adrift +adroit +adroitly +adroitness +adroitness's +ads +adulate +adulated +adulates +adulating +adulation +adulation's +adult +adult's +adulterant +adulterant's +adulterants +adulterate +adulterated +adulterates +adulterating +adulteration +adulteration's +adulterer +adulterer's +adulterers +adulteress +adulteress's +adulteresses +adulteries +adulterous +adultery +adultery's +adulthood +adulthood's +adults +adumbrate +adumbrated +adumbrates +adumbrating +adumbration +adumbration's +advance +advance's +advanced +advancement +advancement's +advancements +advances +advancing +advantage +advantage's +advantaged +advantageous +advantageously +advantages +advantaging +advent +advent's +adventitious +advents +adventure +adventure's +adventured +adventurer +adventurer's +adventurers +adventures +adventuresome +adventuress +adventuress's +adventuresses +adventuring +adventurous +adventurously +adverb +adverb's +adverbial +adverbial's +adverbials +adverbs +adversarial +adversaries +adversary +adversary's +adverse +adversely +adverser +adversest +adversities +adversity +adversity's +advert +advert's +adverted +adverting +advertise +advertised +advertisement +advertisement's +advertisements +advertiser +advertiser's +advertisers +advertises +advertising +advertising's +adverts +advice +advice's +advisability +advisability's +advisable +advise +advised +advisedly +advisement +advisement's +adviser +adviser's +advisers +advises +advising +advisor +advisor's +advisories +advisors +advisory +advisory's +advocacy +advocacy's +advocate +advocate's +advocated +advocates +advocating +adware +adz +adz's +adze +adze's +adzes +aegis +aegis's +aeon +aeon's +aeons +aerate +aerated +aerates +aerating +aeration +aeration's +aerator +aerator's +aerators +aerial +aerial's +aerialist +aerialist's +aerialists +aerials +aerie +aerie's +aeries +aerobatics +aerobatics's +aerobic +aerobics +aerobics's +aerodynamic +aerodynamically +aerodynamics +aerodynamics's +aeronautical +aeronautics +aeronautics's +aerosol +aerosol's +aerosols +aerospace +aerospace's +aery +aery's +aesthete +aesthete's +aesthetes +aesthetic +aesthetically +aesthetics +aesthetics's +afar +affability +affability's +affable +affably +affair +affair's +affairs +affect +affect's +affectation +affectation's +affectations +affected +affecting +affection +affection's +affectionate +affectionately +affections +affects +affidavit +affidavit's +affidavits +affiliate +affiliate's +affiliated +affiliates +affiliating +affiliation +affiliation's +affiliations +affinities +affinity +affinity's +affirm +affirmation +affirmation's +affirmations +affirmative +affirmative's +affirmatively +affirmatives +affirmed +affirming +affirms +affix +affix's +affixed +affixes +affixing +afflict +afflicted +afflicting +affliction +affliction's +afflictions +afflicts +affluence +affluence's +affluent +affluently +afford +affordable +afforded +affording +affords +afforest +afforestation +afforestation's +afforested +afforesting +afforests +affray +affray's +affrays +affront +affront's +affronted +affronting +affronts +afghan +afghan's +afghans +aficionado +aficionado's +aficionados +afield +afire +aflame +afloat +aflutter +afoot +aforementioned +aforesaid +aforethought +afoul +afraid +afresh +aft +after +afterbirth +afterbirth's +afterbirths +afterburner +afterburner's +afterburners +aftercare +aftercare's +aftereffect +aftereffect's +aftereffects +afterglow +afterglow's +afterglows +afterlife +afterlife's +afterlives +aftermath +aftermath's +aftermaths +afternoon +afternoon's +afternoons +aftershave +aftershave's +aftershaves +aftershock +aftershock's +aftershocks +aftertaste +aftertaste's +aftertastes +afterthought +afterthought's +afterthoughts +afterward +afterwards +afterword +afterword's +afterwords +again +against +agape +agape's +agar +agar's +agate +agate's +agates +agave +agave's +age +age's +aged +ageing +ageing's +ageings +ageism +ageism's +ageless +agencies +agency +agency's +agenda +agenda's +agendas +agent +agent's +agents +ages +agglomerate +agglomerate's +agglomerated +agglomerates +agglomerating +agglomeration +agglomeration's +agglomerations +agglutinate +agglutinated +agglutinates +agglutinating +agglutination +agglutination's +agglutinations +aggrandize +aggrandized +aggrandizement +aggrandizement's +aggrandizes +aggrandizing +aggravate +aggravated +aggravates +aggravating +aggravation +aggravation's +aggravations +aggregate +aggregate's +aggregated +aggregates +aggregating +aggregation +aggregation's +aggregations +aggression +aggression's +aggressive +aggressively +aggressiveness +aggressiveness's +aggressor +aggressor's +aggressors +aggrieve +aggrieved +aggrieves +aggrieving +aghast +agile +agilely +agility +agility's +aging +aging's +agings +agism +agitate +agitated +agitates +agitating +agitation +agitation's +agitations +agitator +agitator's +agitators +agleam +aglitter +aglow +agnostic +agnostic's +agnosticism +agnosticism's +agnostics +ago +agog +agonies +agonize +agonized +agonizes +agonizing +agonizingly +agony +agony's +agrarian +agrarian's +agrarians +agree +agreeable +agreeably +agreed +agreeing +agreement +agreement's +agreements +agrees +agribusiness +agribusiness's +agribusinesses +agricultural +agriculturalist +agriculturalist's +agriculturalists +agriculture +agriculture's +agronomist +agronomist's +agronomists +agronomy +agronomy's +aground +ague +ague's +ah +aha +ahead +ahem +ahoy +aid +aid's +aide +aide's +aided +aides +aiding +aids +ail +ailed +aileron +aileron's +ailerons +ailing +ailment +ailment's +ailments +ails +aim +aim's +aimed +aiming +aimless +aimlessly +aimlessness +aimlessness's +aims +ain't +air +air's +airborne +airbrush +airbrush's +airbrushed +airbrushes +airbrushing +aircraft +aircraft's +airdrop +airdrop's +airdropped +airdropping +airdrops +aired +airfare +airfare's +airfares +airfield +airfield's +airfields +airfoil +airfoil's +airfoils +airhead +airhead's +airheads +airier +airiest +airily +airiness +airiness's +airing +airing's +airings +airless +airlift +airlift's +airlifted +airlifting +airlifts +airline +airline's +airliner +airliner's +airliners +airlines +airmail +airmail's +airmailed +airmailing +airmails +airman +airman's +airmen +airplane +airplane's +airplanes +airport +airport's +airports +airs +airship +airship's +airships +airsick +airsickness +airsickness's +airspace +airspace's +airstrip +airstrip's +airstrips +airtight +airwaves +airwaves's +airway +airway's +airways +airworthy +airy +aisle +aisle's +aisles +ajar +akimbo +akin +alabaster +alabaster's +alacrity +alacrity's +alarm +alarm's +alarmed +alarming +alarmingly +alarmist +alarmist's +alarmists +alarms +alas +alb +alb's +albacore +albacore's +albacores +albatross +albatross's +albatrosses +albeit +albino +albino's +albinos +albs +album +album's +albumen +albumen's +albumin +albumin's +albums +alchemist +alchemist's +alchemists +alchemy +alchemy's +alcohol +alcohol's +alcoholic +alcoholic's +alcoholics +alcoholism +alcoholism's +alcohols +alcove +alcove's +alcoves +alder +alder's +alderman +alderman's +aldermen +alders +alderwoman +alderwoman's +alderwomen +ale +ale's +alert +alert's +alerted +alerting +alertly +alertness +alertness's +alerts +ales +alfalfa +alfalfa's +alfresco +alga +alga's +algae +algebra +algebra's +algebraic +algebraically +algebras +algorithm +algorithm's +algorithmic +algorithms +alias +alias's +aliased +aliases +aliasing +alibi +alibi's +alibied +alibiing +alibis +alien +alien's +alienable +alienate +alienated +alienates +alienating +alienation +alienation's +aliened +aliening +aliens +alight +alighted +alighting +alights +align +aligned +aligning +alignment +alignment's +alignments +aligns +alike +alimentary +alimony +alimony's +aline +alined +alinement +alinement's +alinements +alines +alining +alit +alive +alkali +alkali's +alkalies +alkaline +alkalinity +alkalinity's +alkalis +alkaloid +alkaloid's +alkaloids +all +all's +allay +allayed +allaying +allays +allegation +allegation's +allegations +allege +alleged +allegedly +alleges +allegiance +allegiance's +allegiances +alleging +allegorical +allegorically +allegories +allegory +allegory's +allegro +allegro's +allegros +alleluia +alleluia's +alleluias +allergen +allergen's +allergenic +allergens +allergic +allergies +allergist +allergist's +allergists +allergy +allergy's +alleviate +alleviated +alleviates +alleviating +alleviation +alleviation's +alley +alley's +alleys +alleyway +alleyway's +alleyways +alliance +alliance's +alliances +allied +allies +alligator +alligator's +alligators +alliteration +alliteration's +alliterations +alliterative +allocate +allocated +allocates +allocating +allocation +allocation's +allocations +allot +allotment +allotment's +allotments +allots +allotted +allotting +allover +allow +allowable +allowance +allowance's +allowances +allowed +allowing +allows +alloy +alloy's +alloyed +alloying +alloys +allspice +allspice's +allude +alluded +alludes +alluding +allure +allure's +allured +allures +alluring +allusion +allusion's +allusions +allusive +allusively +alluvia +alluvial +alluvial's +alluvium +alluvium's +alluviums +ally +ally's +allying +almanac +almanac's +almanacs +almighty +almond +almond's +almonds +almost +alms +alms's +aloe +aloe's +aloes +aloft +aloha +aloha's +alohas +alone +along +alongside +aloof +aloofness +aloofness's +aloud +alpaca +alpaca's +alpacas +alpha +alpha's +alphabet +alphabet's +alphabetic +alphabetical +alphabetically +alphabetize +alphabetized +alphabetizes +alphabetizing +alphabets +alphanumeric +alphas +alpine +already +alright +also +altar +altar's +altars +alter +alterable +alteration +alteration's +alterations +altercation +altercation's +altercations +altered +altering +alternate +alternate's +alternated +alternately +alternates +alternating +alternation +alternation's +alternations +alternative +alternative's +alternatively +alternatives +alternator +alternator's +alternators +alters +altho +although +altimeter +altimeter's +altimeters +altitude +altitude's +altitudes +alto +alto's +altogether +altos +altruism +altruism's +altruist +altruist's +altruistic +altruistically +altruists +alum +alum's +aluminum +aluminum's +alumna +alumna's +alumnae +alumni +alumnus +alumnus's +alums +always +am +amalgam +amalgam's +amalgamate +amalgamated +amalgamates +amalgamating +amalgamation +amalgamation's +amalgamations +amalgams +amanuenses +amanuensis +amanuensis's +amaranth +amaranth's +amaranths +amaryllis +amaryllis's +amaryllises +amass +amassed +amasses +amassing +amateur +amateur's +amateurish +amateurism +amateurism's +amateurs +amatory +amaze +amaze's +amazed +amazement +amazement's +amazes +amazing +amazingly +amazon +amazon's +amazons +ambassador +ambassador's +ambassadorial +ambassadors +ambassadorship +ambassadorship's +ambassadorships +amber +amber's +ambergris +ambergris's +ambiance +ambiance's +ambiances +ambidextrous +ambidextrously +ambience +ambience's +ambiences +ambient +ambiguities +ambiguity +ambiguity's +ambiguous +ambiguously +ambition +ambition's +ambitions +ambitious +ambitiously +ambitiousness +ambitiousness's +ambivalence +ambivalence's +ambivalent +ambivalently +amble +amble's +ambled +ambles +ambling +ambrosia +ambrosia's +ambulance +ambulance's +ambulances +ambulatories +ambulatory +ambulatory's +ambush +ambush's +ambushed +ambushes +ambushing +ameba +ameba's +amebae +amebas +amebic +ameer +ameer's +ameers +ameliorate +ameliorated +ameliorates +ameliorating +amelioration +amelioration's +amen +amenable +amend +amendable +amended +amending +amendment +amendment's +amendments +amends +amenities +amenity +amenity's +amethyst +amethyst's +amethysts +amiability +amiability's +amiable +amiably +amicability +amicability's +amicable +amicably +amid +amidships +amidst +amigo +amigo's +amigos +amino +amir +amir's +amirs +amiss +amity +amity's +ammeter +ammeter's +ammeters +ammo +ammo's +ammonia +ammonia's +ammunition +ammunition's +amnesia +amnesia's +amnesiac +amnesiac's +amnesiacs +amnestied +amnesties +amnesty +amnesty's +amnestying +amniocenteses +amniocentesis +amniocentesis's +amoeba +amoeba's +amoebae +amoebas +amoebic +amok +among +amongst +amoral +amorality +amorality's +amorally +amorous +amorously +amorousness +amorousness's +amorphous +amorphously +amorphousness +amorphousness's +amortization +amortization's +amortizations +amortize +amortized +amortizes +amortizing +amount +amount's +amounted +amounting +amounts +amour +amour's +amours +amp +amp's +amperage +amperage's +ampere +ampere's +amperes +ampersand +ampersand's +ampersands +amphetamine +amphetamine's +amphetamines +amphibian +amphibian's +amphibians +amphibious +amphitheater +amphitheater's +amphitheaters +amphitheatre +amphitheatre's +amphitheatres +ample +ampler +amplest +amplification +amplification's +amplifications +amplified +amplifier +amplifier's +amplifiers +amplifies +amplify +amplifying +amplitude +amplitude's +amplitudes +amply +ampoule +ampoule's +ampoules +amps +ampul +ampul's +ampule +ampule's +ampules +ampuls +amputate +amputated +amputates +amputating +amputation +amputation's +amputations +amputee +amputee's +amputees +amuck +amulet +amulet's +amulets +amuse +amused +amusement +amusement's +amusements +amuses +amusing +amusingly +an +anachronism +anachronism's +anachronisms +anachronistic +anaconda +anaconda's +anacondas +anaemia +anaemia's +anaemic +anaerobic +anaesthesia +anaesthesia's +anaesthetic +anaesthetic's +anaesthetics +anaesthetist +anaesthetist's +anaesthetists +anaesthetize +anaesthetized +anaesthetizes +anaesthetizing +anagram +anagram's +anagrams +anal +analgesia +analgesia's +analgesic +analgesic's +analgesics +analog +analog's +analogies +analogous +analogously +analogs +analogue +analogue's +analogues +analogy +analogy's +analyses +analysis +analysis's +analyst +analyst's +analysts +analytic +analytical +analyticalally +analytically +analyze +analyzed +analyzer +analyzer's +analyzers +analyzes +analyzing +anapest +anapest's +anapests +anarchic +anarchically +anarchism +anarchism's +anarchist +anarchist's +anarchistic +anarchists +anarchy +anarchy's +anathema +anathema's +anathemas +anatomic +anatomical +anatomically +anatomies +anatomist +anatomist's +anatomists +anatomy +anatomy's +ancestor +ancestor's +ancestors +ancestral +ancestress +ancestress's +ancestresses +ancestries +ancestry +ancestry's +anchor +anchor's +anchorage +anchorage's +anchorages +anchored +anchoring +anchorite +anchorite's +anchorites +anchorman +anchorman's +anchormen +anchorpeople +anchorperson +anchorperson's +anchorpersons +anchors +anchorwoman +anchorwoman's +anchorwomen +anchovies +anchovy +anchovy's +ancient +ancient's +ancienter +ancientest +ancients +ancillaries +ancillary +ancillary's +and +andante +andante's +andantes +andiron +andiron's +andirons +androgen +androgen's +androgynous +android +android's +androids +anecdota +anecdotal +anecdote +anecdote's +anecdotes +anemia +anemia's +anemic +anemometer +anemometer's +anemometers +anemone +anemone's +anemones +anesthesia +anesthesia's +anesthesiologist +anesthesiologist's +anesthesiologists +anesthesiology +anesthesiology's +anesthetic +anesthetic's +anesthetics +anesthetist +anesthetist's +anesthetists +anesthetize +anesthetized +anesthetizes +anesthetizing +aneurism +aneurism's +aneurisms +aneurysm +aneurysm's +aneurysms +anew +angel +angel's +angelic +angelically +angels +anger +anger's +angered +angering +angers +angina +angina's +angioplasties +angioplasty +angioplasty's +angiosperm +angiosperm's +angiosperms +angle +angle's +angled +angler +angler's +anglers +angles +angleworm +angleworm's +angleworms +angling +angling's +angora +angora's +angoras +angrier +angriest +angrily +angry +angst +angst's +angstrom +angstrom's +angstroms +anguish +anguish's +anguished +anguishes +anguishing +angular +angularities +angularity +angularity's +ani +animal +animal's +animals +animate +animated +animatedly +animates +animating +animation +animation's +animations +animator +animator's +animators +anime +anime's +animism +animism's +animist +animist's +animistic +animists +animosities +animosity +animosity's +animus +animus's +anion +anion's +anions +anise +anise's +aniseed +aniseed's +ankh +ankh's +ankhs +ankle +ankle's +ankles +anklet +anklet's +anklets +annals +annals's +anneal +annealed +annealing +anneals +annex +annex's +annexation +annexation's +annexations +annexed +annexes +annexing +annihilate +annihilated +annihilates +annihilating +annihilation +annihilation's +annihilator +annihilator's +annihilators +anniversaries +anniversary +anniversary's +annotate +annotated +annotates +annotating +annotation +annotation's +annotations +announce +announced +announcement +announcement's +announcements +announcer +announcer's +announcers +announces +announcing +annoy +annoyance +annoyance's +annoyances +annoyed +annoying +annoyingly +annoys +annual +annual's +annually +annuals +annuities +annuity +annuity's +annul +annular +annulled +annulling +annulment +annulment's +annulments +annuls +anode +anode's +anodes +anodyne +anodyne's +anodynes +anoint +anointed +anointing +anointment +anointment's +anoints +anomalies +anomalous +anomaly +anomaly's +anon +anons +anonymity +anonymity's +anonymous +anonymously +anopheles +anopheles's +anorak +anorak's +anoraks +anorexia +anorexia's +anorexic +anorexic's +anorexics +another +answer +answer's +answerable +answered +answering +answers +ant +ant's +antacid +antacid's +antacids +antagonism +antagonism's +antagonisms +antagonist +antagonist's +antagonistic +antagonistically +antagonists +antagonize +antagonized +antagonizes +antagonizing +antarctic +ante +ante's +anteater +anteater's +anteaters +antebellum +antecedent +antecedent's +antecedents +antechamber +antechamber's +antechambers +anted +antedate +antedated +antedates +antedating +antediluvian +anteed +anteing +antelope +antelope's +antelopes +antenna +antenna's +antennae +antennas +anterior +anteroom +anteroom's +anterooms +antes +anthem +anthem's +anthems +anther +anther's +anthers +anthill +anthill's +anthills +anthologies +anthologist +anthologist's +anthologists +anthologize +anthologized +anthologizes +anthologizing +anthology +anthology's +anthracite +anthracite's +anthrax +anthrax's +anthropocentric +anthropoid +anthropoid's +anthropoids +anthropological +anthropologist +anthropologist's +anthropologists +anthropology +anthropology's +anthropomorphic +anthropomorphism +anthropomorphism's +anti +anti's +antiabortion +antiaircraft +antibiotic +antibiotic's +antibiotics +antibodies +antibody +antibody's +antic +antic's +anticipate +anticipated +anticipates +anticipating +anticipation +anticipation's +anticipations +anticipatory +anticked +anticking +anticlimactic +anticlimax +anticlimax's +anticlimaxes +anticlockwise +antics +anticyclone +anticyclone's +anticyclones +antidepressant +antidepressant's +antidepressants +antidote +antidote's +antidotes +antifreeze +antifreeze's +antigen +antigen's +antigens +antihero +antihero's +antiheroes +antihistamine +antihistamine's +antihistamines +antiknock +antiknock's +antimatter +antimatter's +antimony +antimony's +antiparticle +antiparticle's +antiparticles +antipasti +antipasto +antipasto's +antipastos +antipathetic +antipathies +antipathy +antipathy's +antipersonnel +antiperspirant +antiperspirant's +antiperspirants +antiphonal +antiphonal's +antiphonals +antipodes +antipodes's +antiquarian +antiquarian's +antiquarians +antiquaries +antiquary +antiquary's +antiquate +antiquated +antiquates +antiquating +antique +antique's +antiqued +antiques +antiquing +antiquities +antiquity +antiquity's +antis +antiseptic +antiseptic's +antiseptically +antiseptics +antislavery +antisocial +antitheses +antithesis +antithesis's +antithetical +antithetically +antitoxin +antitoxin's +antitoxins +antitrust +antiviral +antiviral's +antivirals +antivirus +antiwar +antler +antler's +antlered +antlers +antonym +antonym's +antonyms +ants +anus +anus's +anuses +anvil +anvil's +anvils +anxieties +anxiety +anxiety's +anxious +anxiously +any +anybodies +anybody +anybody's +anyhow +anymore +anyone +anyone's +anyplace +anything +anything's +anythings +anytime +anyway +anywhere +aorta +aorta's +aortae +aortas +apace +apart +apartheid +apartheid's +apartment +apartment's +apartments +apathetic +apathetically +apathy +apathy's +ape +ape's +aped +aperitif +aperitif's +aperitifs +aperture +aperture's +apertures +apes +apex +apex's +apexes +aphasia +aphasia's +aphasic +aphasic's +aphasics +aphelia +aphelion +aphelion's +aphelions +aphid +aphid's +aphids +aphorism +aphorism's +aphorisms +aphoristic +aphrodisiac +aphrodisiac's +aphrodisiacs +apiaries +apiary +apiary's +apices +apiece +aping +aplenty +aplomb +aplomb's +apocalypse +apocalypse's +apocalypses +apocalyptic +apocryphal +apogee +apogee's +apogees +apolitical +apologetic +apologetically +apologia +apologia's +apologias +apologies +apologist +apologist's +apologists +apologize +apologized +apologizes +apologizing +apology +apology's +apoplectic +apoplexies +apoplexy +apoplexy's +apostasies +apostasy +apostasy's +apostate +apostate's +apostates +apostle +apostle's +apostles +apostolic +apostrophe +apostrophe's +apostrophes +apothecaries +apothecary +apothecary's +apotheoses +apotheosis +apotheosis's +appal +appall +appalled +appalling +appallingly +appalls +appals +apparatus +apparatus's +apparatuses +apparel +apparel's +appareled +appareling +apparelled +apparelling +apparels +apparent +apparently +apparition +apparition's +apparitions +appeal +appeal's +appealed +appealing +appeals +appear +appearance +appearance's +appearances +appeared +appearing +appears +appease +appeased +appeasement +appeasement's +appeasements +appeaser +appeaser's +appeasers +appeases +appeasing +appellant +appellant's +appellants +appellate +appellation +appellation's +appellations +append +appendage +appendage's +appendages +appendectomies +appendectomy +appendectomy's +appended +appendices +appendicitis +appendicitis's +appending +appendix +appendix's +appendixes +appends +appertain +appertained +appertaining +appertains +appetite +appetite's +appetites +appetizer +appetizer's +appetizers +appetizing +appetizingly +applaud +applauded +applauding +applauds +applause +applause's +apple +apple's +applejack +applejack's +apples +applesauce +applesauce's +appliance +appliance's +appliances +applicability +applicability's +applicable +applicant +applicant's +applicants +application +application's +applications +applicator +applicator's +applicators +applied +applies +appliqué +appliqué's +appliquéd +appliquéing +appliqués +apply +applying +appoint +appointed +appointee +appointee's +appointees +appointing +appointive +appointment +appointment's +appointments +appoints +apportion +apportioned +apportioning +apportionment +apportionment's +apportions +apposite +appositely +appositeness +appositeness's +apposition +apposition's +appositive +appositive's +appositives +appraisal +appraisal's +appraisals +appraise +appraised +appraiser +appraiser's +appraisers +appraises +appraising +appreciable +appreciably +appreciate +appreciated +appreciates +appreciating +appreciation +appreciation's +appreciations +appreciative +appreciatively +apprehend +apprehended +apprehending +apprehends +apprehension +apprehension's +apprehensions +apprehensive +apprehensively +apprehensiveness +apprehensiveness's +apprentice +apprentice's +apprenticed +apprentices +apprenticeship +apprenticeship's +apprenticeships +apprenticing +apprise +apprised +apprises +apprising +approach +approach's +approachable +approached +approaches +approaching +approbation +approbation's +approbations +appropriate +appropriated +appropriately +appropriateness +appropriateness's +appropriates +appropriating +appropriation +appropriation's +appropriations +approval +approval's +approvals +approve +approved +approves +approving +approvingly +approximate +approximated +approximately +approximates +approximating +approximation +approximation's +approximations +apps +appurtenance +appurtenance's +appurtenances +apricot +apricot's +apricots +apron +apron's +aprons +apropos +apse +apse's +apses +apt +apter +aptest +aptitude +aptitude's +aptitudes +aptly +aptness +aptness's +aqua +aqua's +aquaculture +aquaculture's +aquae +aquamarine +aquamarine's +aquamarines +aquanaut +aquanaut's +aquanauts +aquaplane +aquaplane's +aquaplaned +aquaplanes +aquaplaning +aquaria +aquarium +aquarium's +aquariums +aquas +aquatic +aquatic's +aquatics +aquavit +aquavit's +aqueduct +aqueduct's +aqueducts +aqueous +aquiculture +aquiculture's +aquifer +aquifer's +aquifers +aquiline +arabesque +arabesque's +arabesques +arable +arachnid +arachnid's +arachnids +arbiter +arbiter's +arbiters +arbitrarily +arbitrariness +arbitrariness's +arbitrary +arbitrate +arbitrated +arbitrates +arbitrating +arbitration +arbitration's +arbitrator +arbitrator's +arbitrators +arbor +arbor's +arboreal +arboreta +arboretum +arboretum's +arboretums +arbors +arborvitae +arborvitae's +arborvitaes +arbutus +arbutus's +arbutuses +arc +arc's +arcade +arcade's +arcades +arcane +arced +arch +arch's +archaeological +archaeologist +archaeologist's +archaeologists +archaeology +archaeology's +archaic +archaically +archaism +archaism's +archaisms +archangel +archangel's +archangels +archbishop +archbishop's +archbishopric +archbishopric's +archbishoprics +archbishops +archdeacon +archdeacon's +archdeacons +archdiocese +archdiocese's +archdioceses +archduke +archduke's +archdukes +arched +archenemies +archenemy +archenemy's +archeological +archeologist +archeologist's +archeologists +archeology +archeology's +archer +archer's +archers +archery +archery's +arches +archest +archetypal +archetype +archetype's +archetypes +arching +archipelago +archipelago's +archipelagoes +archipelagos +architect +architect's +architects +architectural +architecturally +architecture +architecture's +architectures +archive +archive's +archived +archives +archiving +archivist +archivist's +archivists +archly +archness +archness's +archway +archway's +archways +arcing +arcked +arcking +arcs +arctic +arctic's +arctics +ardent +ardently +ardor +ardor's +ardors +arduous +arduously +arduousness +arduousness's +are +are's +area +area's +areas +aren't +arena +arena's +arenas +ares +argon +argon's +argosies +argosy +argosy's +argot +argot's +argots +arguable +arguably +argue +argued +argues +arguing +argument +argument's +argumentation +argumentation's +argumentative +arguments +argyle +argyle's +argyles +aria +aria's +arias +arid +aridity +aridity's +aright +arise +arisen +arises +arising +aristocracies +aristocracy +aristocracy's +aristocrat +aristocrat's +aristocratic +aristocratically +aristocrats +arithmetic +arithmetic's +arithmetical +arithmetically +ark +ark's +arks +arm +arm's +armada +armada's +armadas +armadillo +armadillo's +armadillos +armament +armament's +armaments +armature +armature's +armatures +armband +armband's +armbands +armchair +armchair's +armchairs +armed +armful +armful's +armfuls +armhole +armhole's +armholes +armies +arming +armistice +armistice's +armistices +armlet +armlet's +armlets +armor +armor's +armored +armorer +armorer's +armorers +armories +armoring +armors +armory +armory's +armpit +armpit's +armpits +armrest +armrest's +armrests +arms +armsful +army +army's +aroma +aroma's +aromas +aromatherapy +aromatherapy's +aromatic +aromatic's +aromatics +arose +around +arousal +arousal's +arouse +aroused +arouses +arousing +arpeggio +arpeggio's +arpeggios +arraign +arraigned +arraigning +arraignment +arraignment's +arraignments +arraigns +arrange +arranged +arrangement +arrangement's +arrangements +arranger +arranger's +arrangers +arranges +arranging +arrant +array +array's +arrayed +arraying +arrays +arrears +arrears's +arrest +arrest's +arrested +arresting +arrests +arrival +arrival's +arrivals +arrive +arrived +arrives +arriving +arrogance +arrogance's +arrogant +arrogantly +arrogate +arrogated +arrogates +arrogating +arrow +arrow's +arrowhead +arrowhead's +arrowheads +arrowroot +arrowroot's +arrows +arroyo +arroyo's +arroyos +arsenal +arsenal's +arsenals +arsenic +arsenic's +arson +arson's +arsonist +arsonist's +arsonists +art +art's +artefact +artefact's +artefacts +arterial +arteries +arteriosclerosis +arteriosclerosis's +artery +artery's +artful +artfully +artfulness +artfulness's +arthritic +arthritic's +arthritics +arthritis +arthritis's +arthropod +arthropod's +arthropods +artichoke +artichoke's +artichokes +article +article's +articles +articulate +articulated +articulately +articulateness +articulateness's +articulates +articulating +articulation +articulation's +articulations +artier +artiest +artifact +artifact's +artifacts +artifice +artifice's +artificer +artificer's +artificers +artifices +artificial +artificiality +artificiality's +artificially +artillery +artillery's +artisan +artisan's +artisans +artist +artist's +artiste +artiste's +artistes +artistic +artistically +artistry +artistry's +artists +artless +artlessly +artlessness +artlessness's +arts +artsier +artsiest +artsy +artwork +artwork's +artworks +arty +as +asbestos +asbestos's +ascend +ascendancy +ascendancy's +ascendant +ascendant's +ascendants +ascended +ascendency +ascendency's +ascendent +ascendent's +ascendents +ascending +ascends +ascension +ascension's +ascensions +ascent +ascent's +ascents +ascertain +ascertainable +ascertained +ascertaining +ascertains +ascetic +ascetic's +asceticism +asceticism's +ascetics +ascot +ascot's +ascots +ascribable +ascribe +ascribed +ascribes +ascribing +ascription +ascription's +aseptic +asexual +asexually +ash +ash's +ashamed +ashamedly +ashcan +ashcan's +ashcans +ashed +ashen +ashes +ashier +ashiest +ashing +ashore +ashram +ashram's +ashrams +ashtray +ashtray's +ashtrays +ashy +aside +aside's +asides +asinine +asininities +asininity +asininity's +ask +askance +asked +askew +asking +asks +aslant +asleep +asocial +asp +asp's +asparagus +asparagus's +aspartame +aspartame's +aspect +aspect's +aspects +aspen +aspen's +aspens +asperities +asperity +asperity's +aspersion +aspersion's +aspersions +asphalt +asphalt's +asphalted +asphalting +asphalts +asphyxia +asphyxia's +asphyxiate +asphyxiated +asphyxiates +asphyxiating +asphyxiation +asphyxiation's +asphyxiations +aspic +aspic's +aspics +aspirant +aspirant's +aspirants +aspirate +aspirate's +aspirated +aspirates +aspirating +aspiration +aspiration's +aspirations +aspire +aspired +aspires +aspirin +aspirin's +aspiring +aspirins +asps +ass +ass's +assail +assailable +assailant +assailant's +assailants +assailed +assailing +assails +assassin +assassin's +assassinate +assassinated +assassinates +assassinating +assassination +assassination's +assassinations +assassins +assault +assault's +assaulted +assaulter +assaulting +assaults +assay +assay's +assayed +assaying +assays +assemblage +assemblage's +assemblages +assemble +assembled +assembler +assembler's +assemblers +assembles +assemblies +assembling +assembly +assembly's +assemblyman +assemblyman's +assemblymen +assemblywoman +assemblywoman's +assemblywomen +assent +assent's +assented +assenting +assents +assert +asserted +asserting +assertion +assertion's +assertions +assertive +assertively +assertiveness +assertiveness's +asserts +asses +assess +assessed +assesses +assessing +assessment +assessment's +assessments +assessor +assessor's +assessors +asset +asset's +assets +asseverate +asseverated +asseverates +asseverating +asshole +asshole's +assholes +assiduous +assiduously +assiduousness +assiduousness's +assign +assign's +assignable +assignation +assignation's +assignations +assigned +assigning +assignment +assignment's +assignments +assigns +assimilate +assimilated +assimilates +assimilating +assimilation +assimilation's +assist +assist's +assistance +assistance's +assistant +assistant's +assistants +assisted +assisting +assists +assize +assize's +assizes +associate +associate's +associated +associates +associating +association +association's +associations +associative +assonance +assonance's +assort +assorted +assorting +assortment +assortment's +assortments +assorts +assuage +assuaged +assuages +assuaging +assume +assumed +assumes +assuming +assumption +assumption's +assumptions +assurance +assurance's +assurances +assure +assured +assured's +assuredly +assureds +assures +assuring +aster +aster's +asterisk +asterisk's +asterisked +asterisking +asterisks +astern +asteroid +asteroid's +asteroids +asters +asthma +asthma's +asthmatic +asthmatic's +asthmatics +astigmatic +astigmatism +astigmatism's +astigmatisms +astir +astonish +astonished +astonishes +astonishing +astonishingly +astonishment +astonishment's +astound +astounded +astounding +astoundingly +astounds +astrakhan +astrakhan's +astral +astray +astride +astringency +astringency's +astringent +astringent's +astringents +astrologer +astrologer's +astrologers +astrological +astrology +astrology's +astronaut +astronaut's +astronautics +astronautics's +astronauts +astronomer +astronomer's +astronomers +astronomic +astronomical +astronomically +astronomy +astronomy's +astrophysicist +astrophysicist's +astrophysicists +astrophysics +astrophysics's +astute +astutely +astuteness +astuteness's +astuter +astutest +asunder +asylum +asylum's +asylums +asymmetric +asymmetrical +asymmetrically +asymmetry +asymmetry's +asymptotic +asymptotically +asynchronous +asynchronously +at +atavism +atavism's +atavistic +ate +atelier +atelier's +ateliers +atheism +atheism's +atheist +atheist's +atheistic +atheists +atherosclerosis +atherosclerosis's +athlete +athlete's +athletes +athletic +athletically +athletics +athletics's +atlas +atlas's +atlases +atmosphere +atmosphere's +atmospheres +atmospheric +atmospherically +atoll +atoll's +atolls +atom +atom's +atomic +atomizer +atomizer's +atomizers +atoms +atonal +atonality +atonality's +atone +atoned +atonement +atonement's +atones +atoning +atop +atria +atrium +atrium's +atriums +atrocious +atrociously +atrociousness +atrociousness's +atrocities +atrocity +atrocity's +atrophied +atrophies +atrophy +atrophy's +atrophying +attach +attached +attaching +attachment +attachment's +attachments +attaché +attaché's +attachés +attack +attack's +attacked +attacker +attacker's +attackers +attacking +attacks +attain +attainable +attained +attaining +attainment +attainment's +attainments +attains +attar +attar's +attempt +attempt's +attempted +attempting +attempts +attend +attendance +attendance's +attendances +attendant +attendant's +attendants +attended +attender +attending +attends +attention +attention's +attentions +attentive +attentively +attentiveness +attentiveness's +attenuate +attenuated +attenuates +attenuating +attenuation +attenuation's +attest +attestation +attestation's +attestations +attested +attesting +attests +attic +attic's +attics +attire +attire's +attired +attires +attiring +attitude +attitude's +attitudes +attitudinize +attitudinized +attitudinizes +attitudinizing +attorney +attorney's +attorneys +attract +attracted +attracting +attraction +attraction's +attractions +attractive +attractively +attractiveness +attractiveness's +attracts +attributable +attribute +attribute's +attributed +attributes +attributing +attribution +attribution's +attributions +attributive +attributive's +attributively +attributives +attrition +attrition's +attune +attuned +attunes +attuning +atwitter +atypical +atypically +auburn +auburn's +auction +auction's +auctioned +auctioneer +auctioneer's +auctioneers +auctioning +auctions +audacious +audaciously +audaciousness +audaciousness's +audacity +audacity's +audibility +audibility's +audible +audible's +audibles +audibly +audience +audience's +audiences +audio +audio's +audiophile +audiophile's +audiophiles +audios +audiovisual +audit +audit's +audited +auditing +audition +audition's +auditioned +auditioning +auditions +auditor +auditor's +auditoria +auditorium +auditorium's +auditoriums +auditors +auditory +audits +auger +auger's +augers +aught +aught's +aughts +augment +augmentation +augmentation's +augmentations +augmented +augmenting +augments +augur +augur's +augured +auguries +auguring +augurs +augury +augury's +august +auguster +augustest +auk +auk's +auks +aunt +aunt's +aunts +aura +aura's +aurae +aural +aurally +auras +aureola +aureola's +aureolas +aureole +aureole's +aureoles +auricle +auricle's +auricles +auspice +auspice's +auspices +auspicious +auspiciously +auspiciousness +auspiciousness's +austere +austerely +austerer +austerest +austerities +austerity +austerity's +authentic +authentically +authenticate +authenticated +authenticates +authenticating +authentication +authentication's +authentications +authenticity +authenticity's +author +author's +authored +authoring +authoritarian +authoritarian's +authoritarianism +authoritarianism's +authoritarians +authoritative +authoritatively +authoritativeness +authoritativeness's +authorities +authority +authority's +authorization +authorization's +authorizations +authorize +authorized +authorizes +authorizing +authors +authorship +authorship's +autism +autism's +autistic +auto +auto's +autobiographical +autobiographies +autobiography +autobiography's +autocracies +autocracy +autocracy's +autocrat +autocrat's +autocratic +autocratically +autocrats +autograph +autograph's +autographed +autographing +autographs +autoimmune +automata +automate +automated +automates +automatic +automatic's +automatically +automatics +automating +automation +automation's +automaton +automaton's +automatons +automobile +automobile's +automobiled +automobiles +automobiling +automotive +autonomous +autonomously +autonomy +autonomy's +autopilot +autopilot's +autopilots +autopsied +autopsies +autopsy +autopsy's +autopsying +autos +autoworker +autoworker's +autoworkers +autumn +autumn's +autumnal +autumns +auxiliaries +auxiliary +auxiliary's +avail +avail's +availability +availability's +available +availed +availing +avails +avalanche +avalanche's +avalanches +avarice +avarice's +avaricious +avariciously +avast +avatar +avatar's +avatars +avenge +avenged +avenger +avenger's +avengers +avenges +avenging +avenue +avenue's +avenues +aver +average +average's +averaged +averages +averaging +averred +averring +avers +averse +aversion +aversion's +aversions +avert +averted +averting +averts +avian +aviaries +aviary +aviary's +aviation +aviation's +aviator +aviator's +aviators +aviatrices +aviatrix +aviatrix's +aviatrixes +avid +avidity +avidity's +avidly +avionics +avionics's +avocado +avocado's +avocadoes +avocados +avocation +avocation's +avocations +avoid +avoidable +avoidably +avoidance +avoidance's +avoided +avoiding +avoids +avoirdupois +avoirdupois's +avow +avowal +avowal's +avowals +avowed +avowedly +avowing +avows +avuncular +await +awaited +awaiting +awaits +awake +awaked +awaken +awakened +awakening +awakening's +awakenings +awakens +awakes +awaking +award +award's +awarded +awarding +awards +aware +awareness +awareness's +awash +away +awe +awe's +awed +aweigh +awes +awesome +awesomely +awestricken +awestruck +awful +awfuller +awfullest +awfully +awfulness +awfulness's +awhile +awing +awkward +awkwarder +awkwardest +awkwardly +awkwardness +awkwardness's +awl +awl's +awls +awning +awning's +awnings +awoke +awoken +awol +awry +ax +ax's +axe +axe's +axed +axes +axial +axing +axiom +axiom's +axiomatic +axiomatically +axioms +axis +axis's +axle +axle's +axles +axon +axon's +axons +ay +ay's +ayatollah +ayatollah's +ayatollahs +aye +aye's +ayes +azalea +azalea's +azaleas +azimuth +azimuth's +azimuths +azure +azure's +azures +b +baa +baa's +baaed +baaing +baas +babble +babble's +babbled +babbler +babbler's +babblers +babbles +babbling +babe +babe's +babel +babel's +babels +babes +babied +babier +babies +babiest +baboon +baboon's +baboons +babushka +babushka's +babushkas +baby +baby's +babyhood +babyhood's +babying +babyish +babysat +babysit +babysits +babysitter +babysitter's +babysitters +babysitting +baccalaureate +baccalaureate's +baccalaureates +bacchanal +bacchanal's +bacchanalian +bacchanalian's +bacchanalians +bacchanals +bachelor +bachelor's +bachelors +bacilli +bacillus +bacillus's +back +back's +backache +backache's +backaches +backbit +backbite +backbiter +backbiter's +backbiters +backbites +backbiting +backbitten +backboard +backboard's +backboards +backbone +backbone's +backbones +backbreaking +backdate +backdated +backdates +backdating +backdrop +backdrop's +backdrops +backed +backer +backer's +backers +backfield +backfield's +backfields +backfire +backfire's +backfired +backfires +backfiring +backgammon +backgammon's +background +background's +backgrounds +backhand +backhand's +backhanded +backhanding +backhands +backhoe +backhoe's +backhoes +backing +backing's +backings +backlash +backlash's +backlashes +backless +backlog +backlog's +backlogged +backlogging +backlogs +backpack +backpack's +backpacked +backpacker +backpacker's +backpackers +backpacking +backpacks +backpedal +backpedaled +backpedaling +backpedalled +backpedalling +backpedals +backrest +backrest's +backrests +backs +backside +backside's +backsides +backslapper +backslapper's +backslappers +backslash +backslash's +backslashes +backslid +backslidden +backslide +backslider +backslider's +backsliders +backslides +backsliding +backspace +backspace's +backspaced +backspaces +backspacing +backspin +backspin's +backstabbing +backstage +backstage's +backstairs +backstop +backstop's +backstopped +backstopping +backstops +backstories +backstory +backstretch +backstretch's +backstretches +backstroke +backstroke's +backstroked +backstrokes +backstroking +backtrack +backtracked +backtracking +backtracks +backup +backup's +backups +backward +backwardness +backwardness's +backwards +backwash +backwash's +backwater +backwater's +backwaters +backwoods +backwoods's +backyard +backyard's +backyards +bacon +bacon's +bacteria +bacteria's +bacterial +bacterias +bacteriological +bacteriologist +bacteriologist's +bacteriologists +bacteriology +bacteriology's +bacterium +bacterium's +bad +bad's +badder +baddest +bade +badge +badge's +badger +badger's +badgered +badgering +badgers +badges +badinage +badinage's +badlands +badlands's +badly +badminton +badminton's +badmouth +badmouthed +badmouthing +badmouths +badness +badness's +baffle +baffle's +baffled +bafflement +bafflement's +baffles +baffling +bag +bag's +bagatelle +bagatelle's +bagatelles +bagel +bagel's +bagels +baggage +baggage's +bagged +baggier +baggiest +bagginess +bagginess's +bagging +baggy +bagpipe +bagpipe's +bagpipes +bags +bah +bail +bail's +bailed +bailiff +bailiffs +bailing +bailiwick +bailiwick's +bailiwicks +bailout +bailout's +bailouts +bails +bait +bait's +baited +baiting +baits +baize +baize's +bake +bake's +baked +baker +baker's +bakeries +bakers +bakery +bakery's +bakes +baking +balalaika +balalaika's +balalaikas +balance +balance's +balanced +balances +balancing +balconies +balcony +balcony's +bald +balded +balder +balderdash +balderdash's +baldest +balding +baldly +baldness +baldness's +balds +bale +bale's +baled +baleen +baleen's +baleful +balefully +bales +baling +balk +balk's +balked +balkier +balkiest +balking +balks +balky +ball +ball's +ballad +ballad's +balladeer +balladeer's +balladeers +ballads +ballast +ballast's +ballasted +ballasting +ballasts +balled +ballerina +ballerina's +ballerinas +ballet +ballet's +ballets +balling +ballistic +ballistics +ballistics's +balloon +balloon's +ballooned +ballooning +balloonist +balloonist's +balloonists +balloons +ballot +ballot's +balloted +balloting +ballots +ballpark +ballpark's +ballparks +ballplayer +ballplayer's +ballplayers +ballpoint +ballpoint's +ballpoints +ballroom +ballroom's +ballrooms +balls +ballsier +ballsiest +ballsy +ballyhoo +ballyhoo's +ballyhooed +ballyhooing +ballyhoos +balm +balm's +balmier +balmiest +balminess +balminess's +balms +balmy +baloney +baloney's +balsa +balsa's +balsam +balsam's +balsams +balsas +baluster +baluster's +balusters +balustrade +balustrade's +balustrades +bamboo +bamboo's +bamboos +bamboozle +bamboozled +bamboozles +bamboozling +ban +ban's +banal +banalities +banality +banality's +banana +banana's +bananas +band +band's +bandage +bandage's +bandaged +bandages +bandaging +bandana +bandana's +bandanas +bandanna +bandanna's +bandannas +banded +bandied +bandier +bandies +bandiest +banding +bandit +bandit's +banditry +banditry's +bandits +banditti +bandoleer +bandoleer's +bandoleers +bandolier +bandolier's +bandoliers +bands +bandstand +bandstand's +bandstands +bandwagon +bandwagon's +bandwagons +bandwidth +bandy +bandying +bane +bane's +baneful +banes +bang +bang's +banged +banging +bangle +bangle's +bangles +bangs +bani +banish +banished +banishes +banishing +banishment +banishment's +banister +banister's +banisters +banjo +banjo's +banjoes +banjoist +banjoist's +banjoists +banjos +bank +bank's +bankbook +bankbook's +bankbooks +banked +banker +banker's +bankers +banking +banking's +banknote +banknote's +banknotes +bankroll +bankroll's +bankrolled +bankrolling +bankrolls +bankrupt +bankrupt's +bankruptcies +bankruptcy +bankruptcy's +bankrupted +bankrupting +bankrupts +banks +banned +banner +banner's +banners +banning +bannister +bannister's +bannisters +banns +banns's +banquet +banquet's +banqueted +banqueting +banquets +bans +banshee +banshee's +banshees +bantam +bantam's +bantams +bantamweight +bantamweight's +bantamweights +banter +banter's +bantered +bantering +banters +banyan +banyan's +banyans +baobab +baobab's +baobabs +baptism +baptism's +baptismal +baptisms +baptist +baptisteries +baptistery +baptistery's +baptistries +baptistry +baptistry's +baptists +baptize +baptized +baptizes +baptizing +bar +bar's +barb +barb's +barbacoa +barbarian +barbarian's +barbarians +barbaric +barbarism +barbarism's +barbarisms +barbarities +barbarity +barbarity's +barbarous +barbarously +barbecue +barbecue's +barbecued +barbecues +barbecuing +barbed +barbell +barbell's +barbells +barbeque +barbeque's +barbequed +barbeques +barbequing +barber +barber's +barbered +barbering +barberries +barberry +barberry's +barbers +barbershop +barbershop's +barbershops +barbing +barbiturate +barbiturate's +barbiturates +barbs +bard +bard's +bards +bare +bareback +bared +barefaced +barefoot +barefooted +barehanded +bareheaded +barely +bareness +bareness's +barer +bares +barest +barf +barf's +barfed +barfing +barfs +bargain +bargain's +bargained +bargainer +bargaining +bargains +barge +barge's +barged +barges +barging +baring +barista +barista's +baristas +baritone +baritone's +baritones +barium +barium's +bark +bark's +barked +barker +barker's +barkers +barking +barks +barley +barley's +barmaid +barmaid's +barmaids +barman +barn +barn's +barnacle +barnacle's +barnacles +barns +barnstorm +barnstormed +barnstorming +barnstorms +barnyard +barnyard's +barnyards +barometer +barometer's +barometers +barometric +baron +baron's +baroness +baroness's +baronesses +baronet +baronet's +baronets +baronial +barons +baroque +baroque's +barrack +barrack's +barracks +barracuda +barracuda's +barracudas +barrage +barrage's +barraged +barrages +barraging +barred +barrel +barrel's +barreled +barreling +barrelled +barrelling +barrels +barren +barren's +barrener +barrenest +barrenness +barrenness's +barrens +barrette +barrette's +barrettes +barricade +barricade's +barricaded +barricades +barricading +barrier +barrier's +barriers +barring +barrings +barrio +barrio's +barrios +barrister +barrister's +barristers +barroom +barroom's +barrooms +barrow +barrow's +barrows +bars +bartender +bartender's +bartenders +barter +barter's +bartered +bartering +barters +basal +basalt +basalt's +base +base's +baseball +baseball's +baseballs +baseboard +baseboard's +baseboards +based +baseless +baseline +baseline's +baselines +basely +baseman +baseman's +basemen +basement +basement's +basements +baseness +baseness's +baser +bases +basest +bash +bash's +bashed +bashes +bashful +bashfully +bashfulness +bashfulness's +bashing +bashing's +basic +basic's +basically +basics +basil +basil's +basilica +basilica's +basilicas +basin +basin's +basing +basins +basis +basis's +bask +basked +basket +basket's +basketball +basketball's +basketballs +baskets +basking +basks +bass +bass's +basses +bassi +bassinet +bassinet's +bassinets +bassist +bassist's +bassists +basso +basso's +bassoon +bassoon's +bassoonist +bassoonist's +bassoonists +bassoons +bassos +bast +bast's +bastard +bastard's +bastardize +bastardized +bastardizes +bastardizing +bastards +baste +basted +bastes +basting +bastion +bastion's +bastions +bat +bat's +batch +batch's +batched +batches +batching +bate +bated +bates +bath +bath's +bathe +bathe's +bathed +bather +bather's +bathers +bathes +bathhouse +bathhouse's +bathhouses +bathing +bathmat +bathmat's +bathmats +bathos +bathos's +bathrobe +bathrobe's +bathrobes +bathroom +bathroom's +bathrooms +baths +bathtub +bathtub's +bathtubs +batik +batik's +batiks +bating +baton +baton's +batons +bats +batsman +batsman's +batsmen +battalion +battalion's +battalions +batted +batten +batten's +battened +battening +battens +batter +batter's +battered +batteries +battering +batters +battery +battery's +battier +battiest +batting +batting's +battle +battle's +battled +battlefield +battlefield's +battlefields +battleground +battleground's +battlegrounds +battlement +battlement's +battlements +battles +battleship +battleship's +battleships +battling +batty +bauble +bauble's +baubles +baud +baud's +bauds +bauxite +bauxite's +bawdier +bawdiest +bawdily +bawdiness +bawdiness's +bawdy +bawl +bawl's +bawled +bawling +bawls +bay +bay's +bayberries +bayberry +bayberry's +bayed +baying +bayonet +bayonet's +bayoneted +bayoneting +bayonets +bayonetted +bayonetting +bayou +bayou's +bayous +bays +bazaar +bazaar's +bazaars +bazillion +bazillions +bazooka +bazooka's +bazookas +be +beach +beach's +beachcomber +beachcomber's +beachcombers +beached +beaches +beachhead +beachhead's +beachheads +beaching +beacon +beacon's +beacons +bead +bead's +beaded +beadier +beadiest +beading +beads +beady +beagle +beagle's +beagles +beak +beak's +beaked +beaker +beaker's +beakers +beaks +beam +beam's +beamed +beaming +beams +bean +bean's +beanbag +beanbag's +beanbags +beaned +beaning +beans +bear +bear's +bearable +beard +beard's +bearded +bearding +beards +bearer +bearer's +bearers +bearing +bearing's +bearings +bearish +bears +bearskin +bearskin's +bearskins +beast +beast's +beastlier +beastliest +beastliness +beastliness's +beastly +beastly's +beasts +beat +beat's +beaten +beater +beater's +beaters +beatific +beatification +beatification's +beatifications +beatified +beatifies +beatify +beatifying +beating +beating's +beatings +beatitude +beatitude's +beatitudes +beatnik +beatnik's +beatniks +beats +beau +beau's +beaus +beauteous +beauteously +beautician +beautician's +beauticians +beauties +beautification +beautification's +beautified +beautifier +beautifier's +beautifiers +beautifies +beautiful +beautifully +beautify +beautifying +beauty +beauty's +beaux +beaver +beaver's +beavered +beavering +beavers +bebop +bebop's +bebops +becalm +becalmed +becalming +becalms +became +because +beck +beck's +beckon +beckoned +beckoning +beckons +becks +become +becomes +becoming +becomingly +bed +bed's +bedazzle +bedazzled +bedazzles +bedazzling +bedbug +bedbug's +bedbugs +bedclothes +bedclothes's +bedded +bedder +bedding +bedding's +bedeck +bedecked +bedecking +bedecks +bedevil +bedeviled +bedeviling +bedevilled +bedevilling +bedevilment +bedevilment's +bedevils +bedfellow +bedfellow's +bedfellows +bedlam +bedlam's +bedlams +bedpan +bedpan's +bedpans +bedraggle +bedraggled +bedraggles +bedraggling +bedridden +bedrock +bedrock's +bedrocks +bedroll +bedroll's +bedrolls +bedroom +bedroom's +bedrooms +beds +bedside +bedside's +bedsides +bedsore +bedsore's +bedsores +bedspread +bedspread's +bedspreads +bedstead +bedstead's +bedsteads +bedtime +bedtime's +bedtimes +bee +bee's +beech +beech's +beeches +beechnut +beechnut's +beechnuts +beef +beef's +beefburger +beefed +beefier +beefiest +beefing +beefs +beefsteak +beefsteak's +beefsteaks +beefy +beehive +beehive's +beehives +beekeeper +beekeeper's +beekeepers +beekeeping +beekeeping's +beeline +beeline's +beelines +been +beep +beep's +beeped +beeper +beeper's +beepers +beeping +beeps +beer +beer's +beers +bees +beeswax +beeswax's +beet +beet's +beetle +beetle's +beetled +beetles +beetling +beets +beeves +befall +befallen +befalling +befalls +befell +befit +befits +befitted +befitting +befog +befogged +befogging +befogs +before +beforehand +befoul +befouled +befouling +befouls +befriend +befriended +befriending +befriends +befuddle +befuddled +befuddles +befuddling +beg +began +begat +beget +begets +begetting +beggar +beggar's +beggared +beggaring +beggarly +beggars +begged +begging +begin +beginner +beginner's +beginners +beginning +beginning's +beginnings +begins +begone +begonia +begonia's +begonias +begot +begotten +begrudge +begrudged +begrudges +begrudging +begrudgingly +begs +beguile +beguiled +beguiles +beguiling +beguilingly +begun +behalf +behalf's +behalves +behave +behaved +behaves +behaving +behavior +behavior's +behavioral +behead +beheaded +beheading +beheads +beheld +behemoth +behemoth's +behemoths +behest +behest's +behests +behind +behind's +behinds +behold +beholden +beholder +beholder's +beholders +beholding +beholds +behoove +behooved +behooves +behooving +beige +beige's +being +being's +beings +belabor +belabored +belaboring +belabors +belated +belatedly +belay +belayed +belaying +belays +belch +belch's +belched +belches +belching +beleaguer +beleaguered +beleaguering +beleaguers +belfries +belfry +belfry's +belie +belied +belief +belief's +beliefs +belies +believable +believe +believed +believer +believer's +believers +believes +believing +belittle +belittled +belittles +belittling +bell +bell's +belladonna +belladonna's +bellboy +bellboy's +bellboys +belle +belle's +belled +belles +bellhop +bellhop's +bellhops +bellicose +bellicosity +bellicosity's +bellied +bellies +belligerence +belligerence's +belligerency +belligerency's +belligerent +belligerent's +belligerently +belligerents +belling +bellow +bellow's +bellowed +bellowing +bellows +bells +bellwether +bellwether's +bellwethers +belly +belly's +bellyache +bellyache's +bellyached +bellyaches +bellyaching +bellybutton +bellybutton's +bellybuttons +bellyful +bellyful's +bellyfuls +bellying +belong +belonged +belonging +belonging's +belongings +belongs +beloved +beloved's +beloveds +below +belt +belt's +belted +belting +belts +beltway +beltway's +beltways +belying +bemoan +bemoaned +bemoaning +bemoans +bemuse +bemused +bemuses +bemusing +bench +bench's +benched +benches +benching +benchmark +benchmark's +benchmarks +bend +bend's +bender +bending +bends +beneath +benediction +benediction's +benedictions +benefaction +benefaction's +benefactions +benefactor +benefactor's +benefactors +benefactress +benefactress's +benefactresses +benefice +benefice's +beneficence +beneficence's +beneficent +beneficently +benefices +beneficial +beneficially +beneficiaries +beneficiary +beneficiary's +benefit +benefit's +benefited +benefiting +benefits +benefitted +benefitting +benevolence +benevolence's +benevolences +benevolent +benevolently +benighted +benign +benignly +bent +bent's +bents +benumb +benumbed +benumbing +benumbs +benzene +benzene's +bequeath +bequeathed +bequeathing +bequeaths +bequest +bequest's +bequests +berate +berated +berates +berating +bereave +bereaved +bereavement +bereavement's +bereavements +bereaves +bereaving +bereft +beret +beret's +berets +berg +berg's +bergs +beriberi +beriberi's +berm +berm's +berms +berried +berries +berry +berry's +berrying +berserk +berth +berth's +berthed +berthing +berths +beryl +beryl's +beryllium +beryllium's +beryls +beseech +beseeched +beseeches +beseeching +beset +besets +besetting +beside +besides +besiege +besieged +besieger +besieger's +besiegers +besieges +besieging +besmirch +besmirched +besmirches +besmirching +besom +besom's +besoms +besot +besots +besotted +besotting +besought +bespeak +bespeaking +bespeaks +bespoke +bespoken +best +best's +bested +bestial +bestiality +bestiality's +bestiaries +bestiary +bestiary's +besting +bestir +bestirred +bestirring +bestirs +bestow +bestowal +bestowal's +bestowals +bestowed +bestowing +bestows +bestrid +bestridden +bestride +bestrides +bestriding +bestrode +bests +bestseller +bestseller's +bestsellers +bet +bet's +beta +beta's +betake +betaken +betakes +betaking +betas +betcha +bethink +bethinking +bethinks +bethought +betide +betided +betides +betiding +betoken +betokened +betokening +betokens +betook +betray +betrayal +betrayal's +betrayals +betrayed +betrayer +betrayer's +betrayers +betraying +betrays +betroth +betrothal +betrothal's +betrothals +betrothed +betrothed's +betrothing +betroths +bets +betted +better +better's +bettered +bettering +betterment +betterment's +betters +betting +bettor +bettor's +bettors +between +betwixt +bevel +bevel's +beveled +beveling +bevelled +bevelling +bevels +beverage +beverage's +beverages +bevies +bevy +bevy's +bewail +bewailed +bewailing +bewails +beware +bewared +bewares +bewaring +bewilder +bewildered +bewildering +bewilderment +bewilderment's +bewilders +bewitch +bewitched +bewitches +bewitching +beyond +biannual +biannually +bias +bias's +biased +biases +biasing +biassed +biassing +biathlon +biathlon's +biathlons +bib +bib's +bible +bible's +bibles +biblical +bibliographer +bibliographer's +bibliographers +bibliographic +bibliographical +bibliographies +bibliography +bibliography's +bibliophile +bibliophile's +bibliophiles +bibs +bibulous +bicameral +bicentennial +bicentennial's +bicentennials +bicep +bicep's +biceps +biceps's +bicepses +bicker +bicker's +bickered +bickering +bickers +bicuspid +bicuspid's +bicuspids +bicycle +bicycle's +bicycled +bicycles +bicycling +bicyclist +bicyclist's +bicyclists +bid +bid's +bidden +bidder +bidder's +bidders +biddies +bidding +bidding's +biddy +biddy's +bide +bided +bides +bidet +bidet's +bidets +biding +bidirectional +bids +biennial +biennial's +biennially +biennials +bier +bier's +biers +bifocal +bifocals +bifocals's +bifurcate +bifurcated +bifurcates +bifurcating +bifurcation +bifurcation's +bifurcations +big +bigamist +bigamist's +bigamists +bigamous +bigamy +bigamy's +bigger +biggest +biggie +biggie's +biggies +bighearted +bighorn +bighorn's +bighorns +bight +bight's +bights +bigmouth +bigmouth's +bigmouths +bigness +bigness's +bigot +bigot's +bigoted +bigotries +bigotry +bigotry's +bigots +bigwig +bigwig's +bigwigs +bike +bike's +biked +biker +biker's +bikers +bikes +biking +bikini +bikini's +bikinis +bilateral +bilaterally +bile +bile's +bilge +bilge's +bilges +bilingual +bilingual's +bilinguals +bilious +bilk +bilked +bilking +bilks +bill +bill's +billboard +billboard's +billboards +billed +billet +billet's +billeted +billeting +billets +billfold +billfold's +billfolds +billiards +billiards's +billies +billing +billing's +billings +billion +billion's +billionaire +billionaire's +billionaires +billions +billionth +billionth's +billionths +billow +billow's +billowed +billowing +billows +billowy +bills +billy +billy's +bimbo +bimbo's +bimboes +bimbos +bimonthlies +bimonthly +bimonthly's +bin +bin's +binaries +binary +binary's +bind +bind's +binder +binder's +binderies +binders +bindery +bindery's +binding +binding's +bindings +binds +binge +binge's +binged +bingeing +binges +binging +bingo +bingo's +binnacle +binnacle's +binnacles +binned +binning +binocular +binocular's +binoculars +binomial +binomial's +binomials +bins +biochemical +biochemical's +biochemicals +biochemist +biochemist's +biochemistry +biochemistry's +biochemists +biodegradable +biodiversity +biodiversity's +biofeedback +biofeedback's +biographer +biographer's +biographers +biographical +biographies +biography +biography's +biological +biologically +biologist +biologist's +biologists +biology +biology's +biomedical +bionic +biophysicist +biophysicist's +biophysicists +biophysics +biophysics's +biopsied +biopsies +biopsy +biopsy's +biopsying +biorhythm +biorhythm's +biorhythms +biosphere +biosphere's +biospheres +biotechnology +biotechnology's +bipartisan +bipartite +biped +biped's +bipedal +bipeds +biplane +biplane's +biplanes +bipolar +biracial +birch +birch's +birched +birches +birching +bird +bird's +birdbath +birdbath's +birdbaths +birdbrained +birdcage +birdcages +birded +birdhouse +birdhouse's +birdhouses +birdie +birdie's +birdied +birdieing +birdies +birding +birds +birdseed +birdseed's +birdwatcher +birdwatcher's +birdwatchers +biretta +biretta's +birettas +birth +birth's +birthday +birthday's +birthdays +birthed +birther +birther's +birthers +birthing +birthmark +birthmark's +birthmarks +birthplace +birthplace's +birthplaces +birthrate +birthrate's +birthrates +birthright +birthright's +birthrights +births +birthstone +birthstone's +birthstones +biscuit +biscuit's +biscuits +bisect +bisected +bisecting +bisection +bisection's +bisections +bisector +bisector's +bisectors +bisects +bisexual +bisexual's +bisexuality +bisexuality's +bisexuals +bishop +bishop's +bishopric +bishopric's +bishoprics +bishops +bismuth +bismuth's +bison +bison's +bisons +bisque +bisque's +bistro +bistro's +bistros +bit +bit's +bitch +bitch's +bitched +bitches +bitchier +bitchiest +bitching +bitchy +bitcoin +bitcoin's +bitcoins +bite +bite's +bites +biting +bitingly +bitmap +bits +bitten +bitter +bitter's +bitterer +bitterest +bitterly +bittern +bittern's +bitterness +bitterness's +bitterns +bitters +bitters's +bittersweet +bittersweet's +bittersweets +bitumen +bitumen's +bituminous +bivalve +bivalve's +bivalves +bivouac +bivouac's +bivouacked +bivouacking +bivouacs +biweeklies +biweekly +biweekly's +bizarre +bizarrely +blab +blab's +blabbed +blabbermouth +blabbermouth's +blabbermouths +blabbing +blabs +black +black's +blackball +blackball's +blackballed +blackballing +blackballs +blackberries +blackberry +blackberry's +blackberrying +blackbird +blackbird's +blackbirds +blackboard +blackboard's +blackboards +blackcurrant +blacked +blacken +blackened +blackening +blackens +blacker +blackest +blackguard +blackguard's +blackguards +blackhead +blackhead's +blackheads +blacking +blackish +blackjack +blackjack's +blackjacked +blackjacking +blackjacks +blacklist +blacklist's +blacklisted +blacklisting +blacklists +blackmail +blackmail's +blackmailed +blackmailer +blackmailer's +blackmailers +blackmailing +blackmails +blackness +blackness's +blackout +blackout's +blackouts +blacks +blacksmith +blacksmith's +blacksmiths +blackthorn +blackthorn's +blackthorns +blacktop +blacktop's +blacktopped +blacktopping +blacktops +bladder +bladder's +bladders +blade +blade's +blades +blah +blah's +blame +blame's +blamed +blameless +blamelessly +blamer +blames +blameworthy +blaming +blanch +blanched +blanches +blanching +blancmange +bland +blander +blandest +blandishment +blandishment's +blandishments +blandly +blandness +blandness's +blank +blank's +blanked +blanker +blankest +blanket +blanket's +blanketed +blanketing +blankets +blanking +blankly +blankness +blankness's +blanks +blare +blare's +blared +blares +blaring +blarney +blarney's +blarneyed +blarneying +blarneys +blaspheme +blasphemed +blasphemer +blasphemer's +blasphemers +blasphemes +blasphemies +blaspheming +blasphemous +blasphemously +blasphemy +blasphemy's +blast +blast's +blasted +blaster +blaster's +blasters +blasting +blastoff +blastoff's +blastoffs +blasts +blasé +blatant +blatantly +blaze +blaze's +blazed +blazer +blazer's +blazers +blazes +blazing +blazon +blazon's +blazoned +blazoning +blazons +bleach +bleach's +bleached +bleacher +bleacher's +bleachers +bleaches +bleaching +bleak +bleaker +bleakest +bleakly +bleakness +bleakness's +blearier +bleariest +blearily +bleary +bleat +bleat's +bleated +bleating +bleats +bled +bleed +bleeder +bleeder's +bleeders +bleeding +bleeding's +bleeds +bleep +bleep's +bleeped +bleeping +bleeps +blemish +blemish's +blemished +blemishes +blemishing +blench +blenched +blenches +blenching +blend +blend's +blended +blender +blender's +blenders +blending +blends +blent +bless +blessed +blessedly +blessedness +blessedness's +blesses +blessing +blessing's +blessings +blest +blew +blight +blight's +blighted +blighting +blights +blimp +blimp's +blimps +blind +blind's +blinded +blinder +blinder's +blinders +blindest +blindfold +blindfold's +blindfolded +blindfolding +blindfolds +blinding +blindingly +blindly +blindness +blindness's +blinds +blindside +blindsided +blindsides +blindsiding +bling +blink +blink's +blinked +blinker +blinker's +blinkered +blinkering +blinkers +blinking +blinks +blintz +blintz's +blintze +blintze's +blintzes +blip +blip's +blips +bliss +bliss's +blissful +blissfully +blissfulness +blissfulness's +blister +blister's +blistered +blistering +blisters +blithe +blithely +blither +blithest +blitz +blitz's +blitzed +blitzes +blitzing +blizzard +blizzard's +blizzards +bloat +bloated +bloating +bloats +blob +blob's +blobbed +blobbing +blobs +bloc +bloc's +block +block's +blockade +blockade's +blockaded +blockades +blockading +blockage +blockage's +blockages +blockbuster +blockbuster's +blockbusters +blocked +blockhead +blockhead's +blockheads +blockhouse +blockhouse's +blockhouses +blocking +blocks +blocs +blog +blog's +blogged +blogger +blogger's +bloggers +blogging +blogs +blond +blond's +blonde +blonde's +blonder +blondes +blondest +blondness +blondness's +blonds +blood +blood's +bloodbath +bloodbath's +bloodbaths +bloodcurdling +blooded +bloodhound +bloodhound's +bloodhounds +bloodied +bloodier +bloodies +bloodiest +blooding +bloodless +bloodlessly +bloodmobile +bloodmobile's +bloodmobiles +bloods +bloodshed +bloodshed's +bloodshot +bloodstain +bloodstain's +bloodstained +bloodstains +bloodstream +bloodstream's +bloodstreams +bloodsucker +bloodsucker's +bloodsuckers +bloodthirstier +bloodthirstiest +bloodthirstiness +bloodthirstiness's +bloodthirsty +bloody +bloodying +bloom +bloom's +bloomed +bloomer +bloomer's +bloomers +blooming +blooms +blooper +blooper's +bloopers +blossom +blossom's +blossomed +blossoming +blossoms +blot +blot's +blotch +blotch's +blotched +blotches +blotchier +blotchiest +blotching +blotchy +blots +blotted +blotter +blotter's +blotters +blotting +blouse +blouse's +bloused +blouses +blousing +blow +blow's +blower +blower's +blowers +blowgun +blowgun's +blowguns +blowing +blown +blowout +blowout's +blowouts +blows +blowsier +blowsiest +blowsy +blowtorch +blowtorch's +blowtorches +blowup +blowup's +blowups +blowzier +blowziest +blowzy +blubber +blubber's +blubbered +blubbering +blubbers +bludgeon +bludgeon's +bludgeoned +bludgeoning +bludgeons +blue +blue's +bluebell +bluebell's +bluebells +blueberries +blueberry +blueberry's +bluebird +bluebird's +bluebirds +bluebottle +bluebottle's +bluebottles +blued +bluefish +bluefish's +bluefishes +bluegrass +bluegrass's +blueing +blueing's +bluejacket +bluejacket's +bluejackets +bluejay +bluejay's +bluejays +bluenose +bluenose's +bluenoses +blueprint +blueprint's +blueprinted +blueprinting +blueprints +bluer +blues +bluest +bluestocking +bluestocking's +bluestockings +bluff +bluff's +bluffed +bluffer +bluffer's +bluffers +bluffest +bluffing +bluffs +bluing +bluing's +bluish +blunder +blunder's +blunderbuss +blunderbuss's +blunderbusses +blundered +blunderer +blunderer's +blunderers +blundering +blunders +blunt +blunted +blunter +bluntest +blunting +bluntly +bluntness +bluntness's +blunts +blur +blur's +blurb +blurb's +blurbs +blurred +blurrier +blurriest +blurring +blurry +blurs +blurt +blurted +blurting +blurts +blush +blush's +blushed +blusher +blusher's +blushers +blushes +blushing +bluster +bluster's +blustered +blustering +blusters +blustery +bo's'n +bo's'n's +bo's'ns +bo'sun +bo'sun's +bo'suns +boa +boa's +boar +boar's +board +board's +boarded +boarder +boarder's +boarders +boarding +boardinghouse +boardinghouse's +boardinghouses +boardroom +boardroom's +boardrooms +boards +boardwalk +boardwalk's +boardwalks +boars +boas +boast +boast's +boasted +boaster +boaster's +boasters +boastful +boastfully +boastfulness +boastfulness's +boasting +boasts +boat +boat's +boated +boater +boater's +boaters +boating +boatman +boatman's +boatmen +boats +boatswain +boatswain's +boatswains +bob +bob's +bobbed +bobbies +bobbin +bobbin's +bobbing +bobbins +bobble +bobble's +bobbled +bobbles +bobbling +bobby +bobby's +bobcat +bobcat's +bobcats +bobolink +bobolink's +bobolinks +bobs +bobsled +bobsled's +bobsledded +bobsledding +bobsleds +bobtail +bobtail's +bobtails +bobwhite +bobwhite's +bobwhites +bode +boded +bodega +bodega's +bodegas +bodes +bodice +bodice's +bodices +bodies +bodily +boding +bodkin +bodkin's +bodkins +body +body's +bodybuilding +bodybuilding's +bodyguard +bodyguard's +bodyguards +bodywork +bodywork's +bog +bog's +bogey +bogey's +bogeyed +bogeying +bogeyman +bogeyman's +bogeymen +bogeys +bogged +boggier +boggiest +bogging +boggle +boggled +boggles +boggling +boggy +bogie +bogie's +bogied +bogies +bogs +bogus +bogy +bogy's +bohemian +bohemian's +bohemians +boil +boil's +boiled +boiler +boiler's +boilerplate +boilerplate's +boilers +boiling +boilings +boils +boisterous +boisterously +boisterousness +boisterousness's +bola +bola's +bolas +bold +bolder +boldest +boldface +boldface's +boldly +boldness +boldness's +bole +bole's +bolero +bolero's +boleros +boles +boll +boll's +bolls +bologna +bologna's +boloney +boloney's +bolster +bolster's +bolstered +bolstering +bolsters +bolt +bolt's +bolted +bolting +bolts +bomb +bomb's +bombard +bombarded +bombardier +bombardier's +bombardiers +bombarding +bombardment +bombardment's +bombardments +bombards +bombast +bombast's +bombastic +bombed +bomber +bomber's +bombers +bombing +bombings +bombs +bombshell +bombshell's +bombshells +bonanza +bonanza's +bonanzas +bonbon +bonbon's +bonbons +bond +bond's +bondage +bondage's +bonded +bonding +bonding's +bonds +bondsman +bondsman's +bondsmen +bone +bone's +boned +bonehead +bonehead's +boneheads +boneless +boner +boner's +boners +bones +boney +boneyer +boneyest +bonfire +bonfire's +bonfires +bong +bong's +bonged +bonging +bongo +bongo's +bongoes +bongos +bongs +bonier +boniest +boning +bonito +bonito's +bonitoes +bonitos +bonkers +bonnet +bonnet's +bonnets +bonnie +bonnier +bonniest +bonny +bonsai +bonsai's +bonus +bonus's +bonuses +bony +boo +boo's +boob +boob's +boobed +boobies +boobing +boobs +booby +booby's +boodle +boodle's +boodles +booed +boogie +boogie's +boogied +boogieing +boogies +booing +book +book's +bookcase +bookcase's +bookcases +booked +bookend +bookend's +bookends +bookie +bookie's +bookies +booking +booking's +bookings +bookish +bookkeeper +bookkeeper's +bookkeepers +bookkeeping +bookkeeping's +booklet +booklet's +booklets +bookmaker +bookmaker's +bookmakers +bookmaking +bookmaking's +bookmark +bookmark's +bookmarked +bookmarking +bookmarks +bookmobile +bookmobile's +bookmobiles +books +bookseller +bookseller's +booksellers +bookshelf +bookshelf's +bookshelves +bookshop +bookshop's +bookshops +bookstore +bookstore's +bookstores +bookworm +bookworm's +bookworms +boom +boom's +boomed +boomerang +boomerang's +boomeranged +boomeranging +boomerangs +booming +booms +boon +boon's +boondocks +boondocks's +boondoggle +boondoggle's +boondoggled +boondoggles +boondoggling +boons +boor +boor's +boorish +boorishly +boors +boos +boost +boost's +boosted +booster +booster's +boosters +boosting +boosts +boot +boot's +bootblack +bootblack's +bootblacks +booted +bootee +bootee's +bootees +booth +booth's +booths +bootie +bootie's +booties +booting +bootleg +bootleg's +bootlegged +bootlegger +bootlegger's +bootleggers +bootlegging +bootlegs +bootless +boots +bootstrap +bootstrap's +bootstraps +booty +booty's +booze +booze's +boozed +boozer +boozer's +boozers +boozes +boozier +booziest +boozing +boozy +bop +bop's +bopped +bopping +bops +borax +borax's +bordello +bordello's +bordellos +border +border's +bordered +bordering +borderland +borderland's +borderlands +borderline +borderline's +borderlines +borders +bore +bore's +bored +boredom +boredom's +borer +borer's +borers +bores +boring +boringly +born +borne +boron +boron's +borough +borough's +boroughs +borrow +borrowed +borrower +borrower's +borrowers +borrowing +borrows +borsch +borsch's +borscht +borscht's +bos'n +bos'n's +bos'ns +bosh +bosh's +bosom +bosom's +bosoms +boss +boss's +bossed +bosses +bossier +bossiest +bossily +bossiness +bossiness's +bossing +bossy +bosun +bosun's +bosuns +botanical +botanist +botanist's +botanists +botany +botany's +botch +botch's +botched +botches +botching +both +bother +bother's +bothered +bothering +bothers +bothersome +botnet +botnet's +botnets +bottle +bottle's +bottled +bottleneck +bottleneck's +bottlenecks +bottles +bottling +bottom +bottom's +bottomed +bottoming +bottomless +bottoms +botulism +botulism's +boudoir +boudoir's +boudoirs +bouffant +bouffant's +bouffants +bough +bough's +boughs +bought +bouillabaisse +bouillabaisse's +bouillabaisses +bouillon +bouillon's +bouillons +boulder +boulder's +boulders +boulevard +boulevard's +boulevards +bounce +bounce's +bounced +bouncer +bouncer's +bouncers +bounces +bouncier +bounciest +bouncing +bouncy +bound +bound's +boundaries +boundary +boundary's +bounded +bounden +bounder +bounder's +bounders +bounding +boundless +bounds +bounteous +bounties +bountiful +bountifully +bounty +bounty's +bouquet +bouquet's +bouquets +bourbon +bourbon's +bourgeois +bourgeois's +bourgeoisie +bourgeoisie's +bout +bout's +boutique +boutique's +boutiques +boutonnière +boutonnière's +boutonnières +bouts +bovine +bovine's +bovines +bow +bow's +bowdlerize +bowdlerized +bowdlerizes +bowdlerizing +bowed +bowel +bowel's +bowels +bower +bower's +bowers +bowing +bowl +bowl's +bowlder +bowlder's +bowlders +bowled +bowlegged +bowler +bowler's +bowlers +bowling +bowling's +bowls +bowman +bowman's +bowmen +bows +bowsprit +bowsprit's +bowsprits +bowstring +bowstring's +bowstrings +box +box's +boxcar +boxcar's +boxcars +boxed +boxer +boxer's +boxers +boxes +boxing +boxing's +boxwood +boxwood's +boy +boy's +boycott +boycott's +boycotted +boycotting +boycotts +boyfriend +boyfriend's +boyfriends +boyhood +boyhood's +boyhoods +boyish +boyishly +boyishness +boyishness's +boys +boysenberries +boysenberry +boysenberry's +bozo +bozo's +bozos +bra +bra's +brace +brace's +braced +bracelet +bracelet's +bracelets +braces +bracing +bracken +bracken's +bracket +bracket's +bracketed +bracketing +brackets +brackish +bract +bract's +bracts +brad +brad's +brads +brag +brag's +braggart +braggart's +braggarts +bragged +bragger +bragger's +braggers +bragging +brags +braid +braid's +braided +braiding +braids +braille +braille's +brain +brain's +brainchild +brainchild's +brainchildren +brainchildren's +brained +brainier +brainiest +braining +brainless +brains +brainstorm +brainstorm's +brainstormed +brainstorming +brainstorming's +brainstorms +brainteaser +brainteaser's +brainteasers +brainwash +brainwashed +brainwashes +brainwashing +brainwashing's +brainy +braise +braised +braises +braising +brake +brake's +braked +brakeman +brakeman's +brakemen +brakes +braking +bramble +bramble's +brambles +bran +bran's +branch +branch's +branched +branches +branching +brand +brand's +branded +brandied +brandies +branding +brandish +brandished +brandishes +brandishing +brands +brandy +brandy's +brandying +bras +brash +brasher +brashest +brashly +brashness +brashness's +brass +brass's +brasses +brassier +brassiere +brassiere's +brassieres +brassiest +brassy +brat +brat's +brats +brattier +brattiest +bratty +bravado +bravado's +brave +brave's +braved +bravely +braver +bravery +bravery's +braves +bravest +braving +bravo +bravo's +bravos +bravura +bravura's +bravuras +brawl +brawl's +brawled +brawler +brawler's +brawlers +brawling +brawls +brawn +brawn's +brawnier +brawniest +brawniness +brawniness's +brawny +bray +bray's +brayed +braying +brays +brazen +brazened +brazening +brazenly +brazenness +brazenness's +brazens +brazier +brazier's +braziers +breach +breach's +breached +breaches +breaching +bread +bread's +breadbasket +breadbasket's +breadbaskets +breaded +breadfruit +breadfruit's +breadfruits +breading +breads +breadth +breadth's +breadths +breadwinner +breadwinner's +breadwinners +break +break's +breakable +breakable's +breakables +breakage +breakage's +breakages +breakdown +breakdown's +breakdowns +breaker +breaker's +breakers +breakfast +breakfast's +breakfasted +breakfasting +breakfasts +breaking +breakneck +breakpoints +breaks +breakthrough +breakthrough's +breakthroughs +breakup +breakup's +breakups +breakwater +breakwater's +breakwaters +breast +breast's +breastbone +breastbone's +breastbones +breasted +breasting +breastplate +breastplate's +breastplates +breasts +breaststroke +breaststroke's +breaststrokes +breastwork +breastwork's +breastworks +breath +breath's +breathable +breathe +breathed +breather +breather's +breathers +breathes +breathier +breathiest +breathing +breathing's +breathless +breathlessly +breathlessness +breathlessness's +breaths +breathtaking +breathtakingly +breathy +bred +breech +breech's +breeches +breed +breed's +breeder +breeder's +breeders +breeding +breeding's +breeds +breeze +breeze's +breezed +breezes +breezier +breeziest +breezily +breeziness +breeziness's +breezing +breezy +brethren +breviaries +breviary +breviary's +brevity +brevity's +brew +brew's +brewed +brewer +brewer's +breweries +brewers +brewery +brewery's +brewing +brews +briar +briar's +briars +bribe +bribe's +bribed +bribery +bribery's +bribes +bribing +brick +brick's +brickbat +brickbat's +brickbats +bricked +bricking +bricklayer +bricklayer's +bricklayers +bricklaying +bricklaying's +bricks +bridal +bridal's +bridals +bride +bride's +bridegroom +bridegroom's +bridegrooms +brides +bridesmaid +bridesmaid's +bridesmaids +bridge +bridge's +bridged +bridgehead +bridgehead's +bridgeheads +bridges +bridgework +bridgework's +bridging +bridle +bridle's +bridled +bridles +bridling +brief +brief's +briefcase +briefcase's +briefcases +briefed +briefer +briefest +briefing +briefing's +briefings +briefly +briefness +briefness's +briefs +brier +brier's +briers +brig +brig's +brigade +brigade's +brigades +brigand +brigand's +brigandage +brigandage's +brigands +brigantine +brigantine's +brigantines +bright +brighten +brightened +brightening +brightens +brighter +brightest +brightly +brightness +brightness's +brigs +brilliance +brilliance's +brilliancy +brilliancy's +brilliant +brilliant's +brilliantly +brilliants +brim +brim's +brimful +brimfull +brimmed +brimming +brims +brimstone +brimstone's +brindled +brine +brine's +bring +bringing +brings +brinier +briniest +brink +brink's +brinkmanship +brinkmanship's +brinks +brinksmanship +brinksmanship's +briny +briquet +briquet's +briquets +briquette +briquette's +briquettes +brisk +brisked +brisker +briskest +brisket +brisket's +briskets +brisking +briskly +briskness +briskness's +brisks +bristle +bristle's +bristled +bristles +bristlier +bristliest +bristling +bristly +britches +britches's +brittle +brittle's +brittleness +brittleness's +brittler +brittlest +broach +broach's +broached +broaches +broaching +broad +broad's +broadband +broadband's +broadcast +broadcast's +broadcasted +broadcaster +broadcaster's +broadcasters +broadcasting +broadcasts +broadcloth +broadcloth's +broaden +broadened +broadening +broadens +broader +broadest +broadloom +broadloom's +broadly +broadness +broadness's +broads +broadside +broadside's +broadsided +broadsides +broadsiding +broadsword +broadsword's +broadswords +brocade +brocade's +brocaded +brocades +brocading +broccoli +broccoli's +brochure +brochure's +brochures +brogan +brogan's +brogans +brogue +brogue's +brogues +broil +broil's +broiled +broiler +broiler's +broilers +broiling +broils +broke +broken +brokenhearted +broker +broker's +brokerage +brokerage's +brokerages +brokered +brokering +brokers +bromide +bromide's +bromides +bromine +bromine's +bronchi +bronchial +bronchitis +bronchitis's +broncho +broncho's +bronchos +bronchus +bronchus's +bronco +bronco's +broncos +brontosaur +brontosaur's +brontosauri +brontosaurs +brontosaurus +brontosaurus's +brontosauruses +bronze +bronze's +bronzed +bronzes +bronzing +brooch +brooch's +brooches +brood +brood's +brooded +brooder +brooder's +brooders +brooding +broods +brook +brook's +brooked +brooking +brooks +broom +broom's +brooms +broomstick +broomstick's +broomsticks +broth +broth's +brothel +brothel's +brothels +brother +brother's +brotherhood +brotherhood's +brotherhoods +brotherliness +brotherliness's +brotherly +brothers +broths +brought +brouhaha +brouhaha's +brouhahas +brow +brow's +browbeat +browbeaten +browbeating +browbeats +brown +brown's +browned +browner +brownest +brownie +brownie's +brownies +browning +brownish +brownout +brownout's +brownouts +browns +brownstone +brownstone's +brownstones +brows +browse +browse's +browsed +browser +browser's +browsers +browses +browsing +brr +bruin +bruin's +bruins +bruise +bruise's +bruised +bruiser +bruiser's +bruisers +bruises +bruising +brunch +brunch's +brunched +brunches +brunching +brunet +brunet's +brunets +brunette +brunette's +brunettes +brunt +brunt's +brush +brush's +brushed +brushes +brushing +brushwood +brushwood's +brusk +brusker +bruskest +bruskly +bruskness +bruskness's +brusque +brusquely +brusqueness +brusqueness's +brusquer +brusquest +brutal +brutalities +brutality +brutality's +brutalize +brutalized +brutalizes +brutalizing +brutally +brute +brute's +brutes +brutish +brutishly +bubble +bubble's +bubbled +bubbles +bubblier +bubbliest +bubbling +bubbly +bubbly's +buccaneer +buccaneer's +buccaneered +buccaneering +buccaneers +buck +buck's +buckboard +buckboard's +buckboards +bucked +bucket +bucket's +bucketed +bucketful +bucketful's +bucketfuls +bucketing +buckets +buckeye +buckeye's +buckeyes +bucking +buckle +buckle's +buckled +buckler +buckler's +bucklers +buckles +buckling +buckram +buckram's +bucks +bucksaw +bucksaw's +bucksaws +buckshot +buckshot's +buckskin +buckskin's +buckskins +buckteeth +bucktooth +bucktooth's +bucktoothed +buckwheat +buckwheat's +buckyball +buckyball's +buckyballs +bucolic +bucolic's +bucolics +bud +bud's +budded +buddies +budding +buddings +buddy +buddy's +budge +budged +budgerigar +budgerigar's +budgerigars +budges +budget +budget's +budgetary +budgeted +budgeting +budgets +budgie +budgie's +budgies +budging +buds +buff +buff's +buffalo +buffalo's +buffaloed +buffaloes +buffaloing +buffalos +buffed +buffer +buffer's +buffered +buffering +buffers +buffet +buffet's +buffeted +buffeting +buffets +buffing +buffoon +buffoon's +buffoonery +buffoonery's +buffoons +buffs +bug +bug's +bugaboo +bugaboo's +bugaboos +bugbear +bugbear's +bugbears +bugged +bugger +bugger's +buggers +buggier +buggies +buggiest +bugging +buggy +buggy's +bugle +bugle's +bugled +bugler +bugler's +buglers +bugles +bugling +bugs +build +build's +builder +builder's +builders +building +building's +buildings +builds +buildup +buildup's +buildups +built +builtin +bulb +bulb's +bulbous +bulbs +bulge +bulge's +bulged +bulges +bulgier +bulgiest +bulging +bulgy +bulimia +bulimia's +bulimic +bulimic's +bulimics +bulk +bulk's +bulked +bulkhead +bulkhead's +bulkheads +bulkier +bulkiest +bulkiness +bulkiness's +bulking +bulks +bulky +bull +bull's +bulldog +bulldog's +bulldogged +bulldogging +bulldogs +bulldoze +bulldozed +bulldozer +bulldozer's +bulldozers +bulldozes +bulldozing +bulled +bullet +bullet's +bulletin +bulletin's +bulletined +bulletining +bulletins +bulletproof +bulletproofed +bulletproofing +bulletproofs +bullets +bullfight +bullfight's +bullfighter +bullfighter's +bullfighters +bullfighting +bullfighting's +bullfights +bullfinch +bullfinch's +bullfinches +bullfrog +bullfrog's +bullfrogs +bullheaded +bullhorn +bullhorn's +bullhorns +bullied +bullies +bulling +bullion +bullion's +bullish +bullock +bullock's +bullocks +bullpen +bullpen's +bullpens +bullring +bullring's +bullrings +bulls +bullshit +bullshit's +bullshits +bullshitted +bullshitting +bully +bully's +bullying +bulrush +bulrush's +bulrushes +bulwark +bulwark's +bulwarks +bum +bum's +bumble +bumblebee +bumblebee's +bumblebees +bumbled +bumbler +bumbler's +bumblers +bumbles +bumbling +bummed +bummer +bummer's +bummers +bummest +bumming +bump +bump's +bumped +bumper +bumper's +bumpers +bumpier +bumpiest +bumping +bumpkin +bumpkin's +bumpkins +bumps +bumptious +bumpy +bums +bun +bun's +bunch +bunch's +bunched +bunches +bunching +buncombe +buncombe's +bundle +bundle's +bundled +bundles +bundling +bung +bung's +bungalow +bungalow's +bungalows +bunged +bunghole +bunghole's +bungholes +bunging +bungle +bungle's +bungled +bungler +bungler's +bunglers +bungles +bungling +bungs +bunion +bunion's +bunions +bunk +bunk's +bunked +bunker +bunker's +bunkers +bunkhouse +bunkhouse's +bunkhouses +bunking +bunks +bunkum +bunkum's +bunnies +bunny +bunny's +buns +bunt +bunt's +bunted +bunting +bunting's +buntings +bunts +buoy +buoy's +buoyancy +buoyancy's +buoyant +buoyantly +buoyed +buoying +buoys +bur +bur's +burble +burbled +burbles +burbling +burden +burden's +burdened +burdening +burdens +burdensome +burdock +burdock's +bureau +bureau's +bureaucracies +bureaucracy +bureaucracy's +bureaucrat +bureaucrat's +bureaucratic +bureaucratically +bureaucrats +bureaus +bureaux +burg +burg's +burgeon +burgeoned +burgeoning +burgeons +burger +burger's +burgers +burgher +burgher's +burghers +burglar +burglar's +burglaries +burglarize +burglarized +burglarizes +burglarizing +burglars +burglary +burglary's +burgle +burgled +burgles +burgling +burgs +burial +burial's +burials +buried +buries +burka +burka's +burkas +burlap +burlap's +burlesque +burlesque's +burlesqued +burlesques +burlesquing +burlier +burliest +burliness +burliness's +burly +burn +burn's +burned +burner +burner's +burners +burning +burnish +burnish's +burnished +burnishes +burnishing +burnoose +burnoose's +burnooses +burnous +burnous's +burnouses +burnout +burnout's +burnouts +burns +burnt +burp +burp's +burped +burping +burps +burr +burr's +burred +burring +burrito +burrito's +burritos +burro +burro's +burros +burrow +burrow's +burrowed +burrowing +burrows +burrs +burs +bursar +bursar's +bursars +bursitis +bursitis's +burst +burst's +bursted +bursting +bursts +bury +burying +bus +bus's +busbies +busboy +busboy's +busboys +busby +busby's +bused +buses +bush +bush's +bushed +bushel +bushel's +busheled +busheling +bushelled +bushelling +bushels +bushes +bushier +bushiest +bushiness +bushiness's +bushing +bushing's +bushings +bushman +bushman's +bushmen +bushwhack +bushwhacked +bushwhacker +bushwhacker's +bushwhackers +bushwhacking +bushwhacks +bushy +busied +busier +busies +busiest +busily +business +business's +businesses +businesslike +businessman +businessman's +businessmen +businesswoman +businesswoman's +businesswomen +busing +busing's +buss +buss's +bussed +busses +bussing +bussing's +bust +bust's +busted +buster +buster's +busters +busting +bustle +bustle's +bustled +bustles +bustling +busts +busy +busybodies +busybody +busybody's +busying +busyness +busyness's +busywork +busywork's +but +butane +butane's +butch +butch's +butcher +butcher's +butchered +butcheries +butchering +butchers +butchery +butchery's +butches +butler +butler's +butlers +buts +butt +butt's +butte +butte's +butted +butter +butter's +buttercup +buttercup's +buttercups +buttered +butterfat +butterfat's +butterfingers +butterfingers's +butterflied +butterflies +butterfly +butterfly's +butterflying +butterier +butteries +butteriest +buttering +buttermilk +buttermilk's +butternut +butternut's +butternuts +butters +butterscotch +butterscotch's +buttery +buttery's +buttes +butting +buttock +buttock's +buttocks +button +button's +buttoned +buttonhole +buttonhole's +buttonholed +buttonholes +buttonholing +buttoning +buttons +buttress +buttress's +buttressed +buttresses +buttressing +butts +buxom +buy +buy's +buyer +buyer's +buyers +buying +buyout +buyout's +buyouts +buys +buzz +buzz's +buzzard +buzzard's +buzzards +buzzed +buzzer +buzzer's +buzzers +buzzes +buzzing +buzzkill +buzzkill's +buzzkills +buzzword +buzzword's +buzzwords +by +by's +bye +bye's +byelaw +byelaw's +byelaws +byes +bygone +bygone's +bygones +bylaw +bylaw's +bylaws +byline +byline's +bylines +bypass +bypass's +bypassed +bypasses +bypassing +bypast +byplay +byplay's +byproduct +byproduct's +byproducts +bystander +bystander's +bystanders +byte +byte's +bytes +byway +byway's +byways +byword +byword's +bywords +c +cab +cab's +cabal +cabal's +cabals +cabana +cabana's +cabanas +cabaret +cabaret's +cabarets +cabbage +cabbage's +cabbages +cabbed +cabbie +cabbie's +cabbies +cabbing +cabby +cabby's +cabin +cabin's +cabinet +cabinet's +cabinetmaker +cabinetmaker's +cabinetmakers +cabinets +cabins +cable +cable's +cablecast +cablecast's +cablecasted +cablecasting +cablecasts +cabled +cablegram +cablegram's +cablegrams +cables +cabling +caboodle +caboodle's +caboose +caboose's +cabooses +cabs +cacao +cacao's +cacaos +cache +cache's +cached +caches +cachet +cachet's +cachets +caching +cackle +cackle's +cackled +cackles +cackling +cacophonies +cacophonous +cacophony +cacophony's +cacti +cactus +cactus's +cactuses +cad +cad's +cadaver +cadaver's +cadaverous +cadavers +caddie +caddie's +caddied +caddies +caddish +caddy +caddy's +caddying +cadence +cadence's +cadences +cadenza +cadenza's +cadenzas +cadet +cadet's +cadets +cadge +cadged +cadger +cadger's +cadgers +cadges +cadging +cadmium +cadmium's +cadre +cadre's +cadres +cads +caducei +caduceus +caduceus's +caesarean +caesarean's +caesareans +caesarian +caesarian's +caesarians +caesura +caesura's +caesurae +caesuras +cafeteria +cafeteria's +cafeterias +caffeinated +caffeine +caffeine's +caftan +caftan's +caftans +café +café's +cafés +cage +cage's +caged +cages +cagey +cageyness +cageyness's +cagier +cagiest +cagily +caginess +caginess's +caging +cagy +cahoot +cahoot's +cahoots +cairn +cairn's +cairns +caisson +caisson's +caissons +cajole +cajoled +cajolery +cajolery's +cajoles +cajoling +cake +cake's +caked +cakes +caking +calabash +calabash's +calabashes +calamine +calamine's +calamities +calamitous +calamity +calamity's +calcified +calcifies +calcify +calcifying +calcine +calcined +calcines +calcining +calcite +calcite's +calcium +calcium's +calculable +calculate +calculated +calculates +calculating +calculation +calculation's +calculations +calculator +calculator's +calculators +calculi +calculus +calculus's +calculuses +caldron +caldron's +caldrons +calendar +calendar's +calendared +calendaring +calendars +calf +calf's +calfs +calfskin +calfskin's +caliber +caliber's +calibers +calibrate +calibrated +calibrates +calibrating +calibration +calibration's +calibrations +calibrator +calibrator's +calibrators +calico +calico's +calicoes +calicos +calif +calif's +califs +caliper +caliper's +calipered +calipering +calipers +caliph +caliph's +caliphate +caliphate's +caliphates +caliphs +calisthenic +calisthenics +calisthenics's +calk +calk's +calked +calking +calking's +calkings +calks +call +call's +callable +called +caller +caller's +callers +calligrapher +calligrapher's +calligraphers +calligraphy +calligraphy's +calling +calling's +callings +calliope +calliope's +calliopes +calliper +calliper's +callipered +callipering +callipers +callisthenics +callous +calloused +callouses +callousing +callously +callousness +callousness's +callow +callower +callowest +calls +callus +callus's +callused +calluses +callusing +calm +calm's +calmed +calmer +calmest +calming +calmly +calmness +calmness's +calms +caloric +calorie +calorie's +calories +calorific +calumniate +calumniated +calumniates +calumniating +calumnies +calumny +calumny's +calve +calved +calves +calving +calyces +calypso +calypso's +calypsos +calyx +calyx's +calyxes +cam +cam's +camaraderie +camaraderie's +camber +camber's +cambered +cambering +cambers +cambia +cambium +cambium's +cambiums +cambric +cambric's +camcorder +camcorder's +camcorders +came +camel +camel's +camellia +camellia's +camellias +camels +cameo +cameo's +cameos +camera +camera's +cameraman +cameraman's +cameramen +cameras +camerawoman +camerawoman's +camerawomen +camisole +camisole's +camisoles +camomile +camomile's +camomiles +camouflage +camouflage's +camouflaged +camouflages +camouflaging +camp +camp's +campaign +campaign's +campaigned +campaigner +campaigner's +campaigners +campaigning +campaigns +campanile +campanile's +campaniles +campanili +camped +camper +camper's +campers +campfire +campfire's +campfires +campground +campground's +campgrounds +camphor +camphor's +campier +campiest +camping +camping's +camps +campsite +campsite's +campsites +campus +campus's +campuses +campy +cams +camshaft +camshaft's +camshafts +can +can's +can't +canal +canal's +canals +canapé +canapé's +canapés +canard +canard's +canards +canaries +canary +canary's +canasta +canasta's +cancan +cancan's +cancans +cancel +cancelation +canceled +canceling +cancellation +cancellation's +cancellations +cancelled +cancelling +cancels +cancer +cancer's +cancerous +cancers +candelabra +candelabra's +candelabras +candelabrum +candelabrum's +candelabrums +candid +candidacies +candidacy +candidacy's +candidate +candidate's +candidates +candidly +candidness +candidness's +candied +candies +candle +candle's +candled +candlelight +candlelight's +candles +candlestick +candlestick's +candlesticks +candling +candor +candor's +candy +candy's +candying +cane +cane's +caned +canes +canine +canine's +canines +caning +canister +canister's +canisters +canker +canker's +cankered +cankering +cankerous +cankers +cannabis +cannabis's +cannabises +canned +canneries +cannery +cannery's +cannibal +cannibal's +cannibalism +cannibalism's +cannibalistic +cannibalize +cannibalized +cannibalizes +cannibalizing +cannibals +cannier +canniest +cannily +canniness +canniness's +canning +cannon +cannon's +cannonade +cannonade's +cannonaded +cannonades +cannonading +cannonball +cannonball's +cannonballs +cannoned +cannoning +cannons +cannot +canny +canoe +canoe's +canoed +canoeing +canoeist +canoeist's +canoeists +canoes +canon +canon's +canonical +canonization +canonization's +canonizations +canonize +canonized +canonizes +canonizing +canons +canopied +canopies +canopy +canopy's +canopying +cans +cant +cant's +cantaloup +cantaloup's +cantaloupe +cantaloupe's +cantaloupes +cantaloups +cantankerous +cantankerously +cantankerousness +cantankerousness's +cantata +cantata's +cantatas +canted +canteen +canteen's +canteens +canter +canter's +cantered +cantering +canters +canticle +canticle's +canticles +cantilever +cantilever's +cantilevered +cantilevering +cantilevers +canting +canto +canto's +canton +canton's +cantons +cantor +cantor's +cantors +cantos +cants +canvas +canvas's +canvasback +canvasback's +canvasbacks +canvased +canvases +canvasing +canvass +canvass's +canvassed +canvasser +canvasser's +canvassers +canvasses +canvassing +canyon +canyon's +canyons +cap +cap's +capabilities +capability +capability's +capable +capably +capacious +capaciously +capaciousness +capaciousness's +capacitance +capacities +capacitor +capacitor's +capacitors +capacity +capacity's +caparison +caparison's +caparisoned +caparisoning +caparisons +cape +cape's +caped +caper +caper's +capered +capering +capers +capes +capillaries +capillary +capillary's +capital +capital's +capitalism +capitalism's +capitalist +capitalist's +capitalistic +capitalists +capitalization +capitalization's +capitalize +capitalized +capitalizes +capitalizing +capitals +capitol +capitol's +capitols +capitulate +capitulated +capitulates +capitulating +capitulation +capitulation's +capitulations +caplet +caplet's +caplets +capon +capon's +capons +capped +capping +cappuccino +cappuccino's +cappuccinos +caprice +caprice's +caprices +capricious +capriciously +capriciousness +capriciousness's +caps +capsize +capsized +capsizes +capsizing +capstan +capstan's +capstans +capsule +capsule's +capsuled +capsules +capsuling +captain +captain's +captaincies +captaincy +captaincy's +captained +captaining +captains +caption +caption's +captioned +captioning +captions +captious +captivate +captivated +captivates +captivating +captivation +captivation's +captive +captive's +captives +captivities +captivity +captivity's +captor +captor's +captors +capture +capture's +captured +captures +capturing +car +car's +caracul +caracul's +carafe +carafe's +carafes +caramel +caramel's +caramels +carapace +carapace's +carapaces +carat +carat's +carats +caravan +caravan's +caravans +caraway +caraway's +caraways +carbide +carbide's +carbides +carbine +carbine's +carbines +carbohydrate +carbohydrate's +carbohydrates +carbon +carbon's +carbonate +carbonate's +carbonated +carbonates +carbonating +carbonation +carbonation's +carbons +carboy +carboy's +carboys +carbs +carbuncle +carbuncle's +carbuncles +carburetor +carburetor's +carburetors +carcass +carcass's +carcasses +carcinogen +carcinogen's +carcinogenic +carcinogenic's +carcinogenics +carcinogens +carcinoma +carcinoma's +carcinomas +carcinomata +card +card's +cardboard +cardboard's +carded +cardiac +cardigan +cardigan's +cardigans +cardinal +cardinal's +cardinals +carding +cardio +cardiogram +cardiogram's +cardiograms +cardiologist +cardiologist's +cardiologists +cardiology +cardiology's +cardiopulmonary +cardiovascular +cards +cardsharp +cardsharp's +cardsharps +care +care's +cared +careen +careened +careening +careens +career +career's +careered +careering +careers +carefree +careful +carefuller +carefullest +carefully +carefulness +carefulness's +caregiver +caregiver's +caregivers +careless +carelessly +carelessness +carelessness's +cares +caress +caress's +caressed +caresses +caressing +caret +caret's +caretaker +caretaker's +caretakers +carets +careworn +carfare +carfare's +cargo +cargo's +cargoes +cargos +caribou +caribou's +caribous +caricature +caricature's +caricatured +caricatures +caricaturing +caricaturist +caricaturist's +caricaturists +caries +caries's +carillon +carillon's +carillons +caring +caring's +carjack +carjacked +carjacker +carjacker's +carjackers +carjacking +carjacking's +carjackings +carjacks +carmine +carmine's +carmines +carnage +carnage's +carnal +carnally +carnation +carnation's +carnations +carnelian +carnelian's +carnelians +carnival +carnival's +carnivals +carnivore +carnivore's +carnivores +carnivorous +carol +carol's +caroled +caroler +caroler's +carolers +caroling +carolled +caroller +caroller's +carollers +carolling +carols +carom +carom's +caromed +caroming +caroms +carotid +carotid's +carotids +carousal +carousal's +carousals +carouse +carouse's +caroused +carousel +carousel's +carousels +carouser +carouser's +carousers +carouses +carousing +carp +carp's +carpal +carpal's +carpals +carped +carpel +carpel's +carpels +carpenter +carpenter's +carpentered +carpentering +carpenters +carpentry +carpentry's +carpet +carpet's +carpetbag +carpetbag's +carpetbagged +carpetbagger +carpetbagger's +carpetbaggers +carpetbagging +carpetbags +carpeted +carpeting +carpeting's +carpets +carpi +carping +carport +carport's +carports +carps +carpus +carpus's +carrel +carrel's +carrels +carriage +carriage's +carriages +carriageway +carried +carrier +carrier's +carriers +carries +carrion +carrion's +carrot +carrot's +carrots +carrousel +carrousel's +carrousels +carry +carry's +carryall +carryall's +carryalls +carrying +carryout +cars +carsick +carsickness +carsickness's +cart +cart's +carted +cartel +cartel's +cartels +cartilage +cartilage's +cartilages +cartilaginous +carting +cartographer +cartographer's +cartographers +cartography +cartography's +carton +carton's +cartons +cartoon +cartoon's +cartooned +cartooning +cartoonist +cartoonist's +cartoonists +cartoons +cartridge +cartridge's +cartridges +carts +cartwheel +cartwheel's +cartwheeled +cartwheeling +cartwheels +carve +carved +carver +carver's +carvers +carves +carving +carving's +carvings +caryatid +caryatid's +caryatides +caryatids +cascade +cascade's +cascaded +cascades +cascading +case +case's +cased +casein +casein's +caseload +caseload's +caseloads +casement +casement's +casements +cases +casework +casework's +caseworker +caseworker's +caseworkers +cash +cash's +cashback +cashback's +cashed +cashes +cashew +cashew's +cashews +cashier +cashier's +cashiered +cashiering +cashiers +cashing +cashmere +cashmere's +casing +casing's +casings +casino +casino's +casinos +cask +cask's +casket +casket's +caskets +casks +cassava +cassava's +cassavas +casserole +casserole's +casseroled +casseroles +casseroling +cassette +cassette's +cassettes +cassia +cassia's +cassias +cassino +cassino's +cassinos +cassock +cassock's +cassocks +cast +cast's +castanet +castanet's +castanets +castaway +castaway's +castaways +caste +caste's +caster +caster's +casters +castes +castigate +castigated +castigates +castigating +castigation +castigation's +castigator +castigator's +castigators +casting +casting's +castings +castle +castle's +castled +castles +castling +castoff +castoff's +castoffs +castor +castor's +castors +castrate +castrated +castrates +castrating +castration +castration's +castrations +casts +casual +casual's +casually +casualness +casualness's +casuals +casualties +casualty +casualty's +casuist +casuist's +casuistry +casuistry's +casuists +cat +cat's +cataclysm +cataclysm's +cataclysmic +cataclysms +catacomb +catacomb's +catacombs +catafalque +catafalque's +catafalques +catalepsy +catalepsy's +cataleptic +cataleptic's +cataleptics +catalog +catalog's +cataloged +cataloger +cataloger's +catalogers +cataloging +catalogs +catalogue +catalogue's +catalogued +cataloguer +cataloguer's +cataloguers +catalogues +cataloguing +catalpa +catalpa's +catalpas +catalysis +catalysis's +catalyst +catalyst's +catalysts +catalytic +catalytic's +catalyze +catalyzed +catalyzes +catalyzing +catamaran +catamaran's +catamarans +catapult +catapult's +catapulted +catapulting +catapults +cataract +cataract's +cataracts +catarrh +catarrh's +catastrophe +catastrophe's +catastrophes +catastrophic +catastrophically +catatonic +catatonic's +catatonics +catbird +catbird's +catbirds +catboat +catboat's +catboats +catcall +catcall's +catcalled +catcalling +catcalls +catch +catch's +catchall +catchall's +catchalls +catcher +catcher's +catchers +catches +catchier +catchiest +catching +catchings +catchment +catchphrase +catchup +catchup's +catchword +catchword's +catchwords +catchy +catechise +catechised +catechises +catechising +catechism +catechism's +catechisms +catechize +catechized +catechizes +catechizing +categorical +categorically +categories +categorization +categorization's +categorizations +categorize +categorized +categorizes +categorizing +category +category's +cater +catered +caterer +caterer's +caterers +catering +caterings +caterpillar +caterpillar's +caterpillars +caters +caterwaul +caterwaul's +caterwauled +caterwauling +caterwauls +catfish +catfish's +catfishes +catgut +catgut's +catharses +catharsis +catharsis's +cathartic +cathartic's +cathartics +cathedral +cathedral's +cathedrals +catheter +catheter's +catheters +cathode +cathode's +cathodes +catholic +catholicity +catholicity's +cation +cation's +cations +catkin +catkin's +catkins +catnap +catnap's +catnapped +catnapping +catnaps +catnip +catnip's +cats +catsup +catsup's +cattail +cattail's +cattails +cattier +cattiest +cattily +cattiness +cattiness's +cattle +cattle's +cattleman +cattleman's +cattlemen +catty +catwalk +catwalk's +catwalks +caucus +caucus's +caucused +caucuses +caucusing +caucussed +caucussing +caudal +caught +cauldron +cauldron's +cauldrons +cauliflower +cauliflower's +cauliflowers +caulk +caulk's +caulked +caulking +caulking's +caulkings +caulks +causal +causalities +causality +causality's +causally +causation +causation's +causative +cause +cause's +caused +causeless +causes +causeway +causeway's +causeways +causing +caustic +caustic's +caustically +caustics +cauterize +cauterized +cauterizes +cauterizing +caution +caution's +cautionary +cautioned +cautioning +cautions +cautious +cautiously +cautiousness +cautiousness's +cavalcade +cavalcade's +cavalcades +cavalier +cavalier's +cavaliers +cavalries +cavalry +cavalry's +cavalryman +cavalryman's +cavalrymen +cave +cave's +caveat +caveat's +caveats +caved +caveman +caveman's +cavemen +cavern +cavern's +cavernous +caverns +caves +caviar +caviar's +caviare +caviare's +cavil +cavil's +caviled +caviling +cavilled +cavilling +cavils +caving +cavities +cavity +cavity's +cavort +cavorted +cavorting +cavorts +caw +caw's +cawed +cawing +caws +cayenne +cayenne's +cease +cease's +ceased +ceasefire +ceaseless +ceaselessly +ceases +ceasing +cedar +cedar's +cedars +cede +ceded +cedes +cedilla +cedilla's +cedillas +ceding +ceiling +ceiling's +ceilings +celebrant +celebrant's +celebrants +celebrate +celebrated +celebrates +celebrating +celebration +celebration's +celebrations +celebratory +celebrities +celebrity +celebrity's +celerity +celerity's +celery +celery's +celesta +celesta's +celestas +celestial +celibacy +celibacy's +celibate +celibate's +celibates +cell +cell's +cellar +cellar's +cellars +celli +cellist +cellist's +cellists +cello +cello's +cellophane +cellophane's +cellos +cells +cellular +cellular's +cellulars +cellulite +cellulite's +celluloid +celluloid's +cellulose +cellulose's +cement +cement's +cemented +cementing +cements +cemeteries +cemetery +cemetery's +cenotaph +cenotaph's +cenotaphs +censer +censer's +censers +censor +censor's +censored +censoring +censorious +censoriously +censors +censorship +censorship's +censure +censure's +censured +censures +censuring +census +census's +censused +censuses +censusing +cent +cent's +centaur +centaur's +centaurs +centenarian +centenarian's +centenarians +centenaries +centenary +centenary's +centennial +centennial's +centennials +center +center's +centered +centerfold +centerfold's +centerfolds +centering +centerpiece +centerpiece's +centerpieces +centers +centigrade +centigram +centigram's +centigramme +centigramme's +centigrammes +centigrams +centiliter +centiliter's +centiliters +centime +centime's +centimes +centimeter +centimeter's +centimeters +centipede +centipede's +centipedes +central +central's +centralization +centralization's +centralize +centralized +centralizes +centralizing +centrally +centrals +centrifugal +centrifuge +centrifuge's +centrifuged +centrifuges +centrifuging +centripetal +centrist +centrist's +centrists +cents +centuries +centurion +centurion's +centurions +century +century's +cephalic +ceramic +ceramic's +ceramics +ceramics's +cereal +cereal's +cereals +cerebella +cerebellum +cerebellum's +cerebellums +cerebra +cerebral +cerebrum +cerebrum's +cerebrums +ceremonial +ceremonial's +ceremonially +ceremonials +ceremonies +ceremonious +ceremoniously +ceremony +ceremony's +cerise +cerise's +certain +certainly +certainties +certainty +certainty's +certifiable +certificate +certificate's +certificated +certificates +certificating +certification +certification's +certifications +certified +certifies +certify +certifying +certitude +certitude's +cerulean +cerulean's +cervical +cervices +cervix +cervix's +cervixes +cesarean +cesarean's +cesareans +cesarian +cesarian's +cesarians +cesium +cesium's +cessation +cessation's +cessations +cession +cession's +cessions +cesspool +cesspool's +cesspools +cetacean +cetacean's +cetaceans +chafe +chafed +chafes +chaff +chaff's +chaffed +chaffinch +chaffinch's +chaffinches +chaffing +chaffs +chafing +chagrin +chagrin's +chagrined +chagrining +chagrinned +chagrinning +chagrins +chain +chain's +chained +chaining +chains +chainsaw +chainsaw's +chainsawed +chainsawing +chainsaws +chair +chair's +chaired +chairing +chairlift +chairlift's +chairlifts +chairman +chairman's +chairmanship +chairmanship's +chairmen +chairperson +chairperson's +chairpersons +chairs +chairwoman +chairwoman's +chairwomen +chaise +chaise's +chaises +chalet +chalet's +chalets +chalice +chalice's +chalices +chalk +chalk's +chalkboard +chalkboard's +chalkboards +chalked +chalkier +chalkiest +chalking +chalks +chalky +challenge +challenge's +challenged +challenger +challenger's +challengers +challenges +challenging +chamber +chamber's +chamberlain +chamberlain's +chamberlains +chambermaid +chambermaid's +chambermaids +chambers +chambray +chambray's +chameleon +chameleon's +chameleons +chammies +chammy +chammy's +chamois +chamois's +chamoix +chamomile +chamomile's +chamomiles +champ +champ's +champagne +champagne's +champagnes +champed +champing +champion +champion's +championed +championing +champions +championship +championship's +championships +champs +chance +chance's +chanced +chancel +chancel's +chancelleries +chancellery +chancellery's +chancellor +chancellor's +chancellors +chancels +chanceries +chancery +chancery's +chances +chancier +chanciest +chancing +chancy +chandelier +chandelier's +chandeliers +chandler +chandler's +chandlers +change +change's +changeable +changed +changeling +changeling's +changelings +changeover +changeover's +changeovers +changes +changing +channel +channel's +channeled +channeling +channelled +channelling +channels +chant +chant's +chanted +chanter +chanter's +chanters +chantey +chantey's +chanteys +chanticleer +chanticleer's +chanticleers +chanties +chanting +chants +chanty +chanty's +chaos +chaos's +chaotic +chaotically +chap +chap's +chaparral +chaparral's +chaparrals +chapel +chapel's +chapels +chaperon +chaperon's +chaperone +chaperone's +chaperoned +chaperones +chaperoning +chaperons +chaplain +chaplain's +chaplaincies +chaplaincy +chaplaincy's +chaplains +chaplet +chaplet's +chaplets +chapped +chapping +chaps +chapt +chapter +chapter's +chapters +char +char's +character +character's +characteristic +characteristic's +characteristically +characteristics +characterization +characterization's +characterizations +characterize +characterized +characterizes +characterizing +characters +charade +charade's +charades +charbroil +charbroiled +charbroiling +charbroils +charcoal +charcoal's +charcoals +charge +charge's +chargeable +charged +charger +charger's +chargers +charges +charging +charier +chariest +charily +chariot +chariot's +charioteer +charioteer's +charioteers +chariots +charisma +charisma's +charismatic +charismatic's +charismatics +charitable +charitably +charities +charity +charity's +charlatan +charlatan's +charlatans +charm +charm's +charmed +charmer +charmer's +charmers +charming +charmingly +charms +charred +charring +chars +chart +chart's +charted +charter +charter's +chartered +chartering +charters +charting +chartreuse +chartreuse's +charts +charwoman +charwoman's +charwomen +chary +chase +chase's +chased +chaser +chaser's +chasers +chases +chasing +chasm +chasm's +chasms +chassis +chassis's +chaste +chastely +chasten +chastened +chastening +chastens +chaster +chastest +chastise +chastised +chastisement +chastisement's +chastisements +chastises +chastising +chastity +chastity's +chasuble +chasuble's +chasubles +chat +chat's +chateaus +chats +chatted +chattel +chattel's +chattels +chatter +chatter's +chatterbox +chatterbox's +chatterboxes +chattered +chatterer +chatterer's +chatterers +chattering +chatters +chattier +chattiest +chattily +chattiness +chattiness's +chatting +chatty +chauffeur +chauffeur's +chauffeured +chauffeuring +chauffeurs +chauvinism +chauvinism's +chauvinist +chauvinist's +chauvinistic +chauvinists +cheap +cheapen +cheapened +cheapening +cheapens +cheaper +cheapest +cheaply +cheapness +cheapness's +cheapskate +cheapskate's +cheapskates +cheat +cheat's +cheated +cheater +cheater's +cheaters +cheating +cheats +check +check's +checkbook +checkbook's +checkbooks +checked +checker +checker's +checkerboard +checkerboard's +checkerboards +checkered +checkering +checkers +checkers's +checking +checklist +checklist's +checklists +checkmate +checkmate's +checkmated +checkmates +checkmating +checkout +checkout's +checkouts +checkpoint +checkpoint's +checkpoints +checkroom +checkroom's +checkrooms +checks +checkup +checkup's +checkups +cheddar +cheddar's +cheek +cheek's +cheekbone +cheekbone's +cheekbones +cheeked +cheekier +cheekiest +cheekily +cheekiness +cheekiness's +cheeking +cheeks +cheeky +cheep +cheep's +cheeped +cheeping +cheeps +cheer +cheer's +cheered +cheerful +cheerfuller +cheerfullest +cheerfully +cheerfulness +cheerfulness's +cheerier +cheeriest +cheerily +cheeriness +cheeriness's +cheering +cheerleader +cheerleader's +cheerleaders +cheerless +cheerlessly +cheerlessness +cheerlessness's +cheers +cheery +cheese +cheese's +cheeseburger +cheeseburger's +cheeseburgers +cheesecake +cheesecake's +cheesecakes +cheesecloth +cheesecloth's +cheesed +cheeses +cheesier +cheesiest +cheesing +cheesy +cheetah +cheetah's +cheetahs +chef +chef's +chefs +chemical +chemical's +chemically +chemicals +chemise +chemise's +chemises +chemist +chemist's +chemistry +chemistry's +chemists +chemotherapy +chemotherapy's +chenille +chenille's +cherish +cherished +cherishes +cherishing +cheroot +cheroot's +cheroots +cherries +cherry +cherry's +cherub +cherub's +cherubic +cherubim +cherubims +cherubs +chervil +chervil's +chess +chess's +chessboard +chessboard's +chessboards +chessman +chessman's +chessmen +chest +chest's +chestnut +chestnut's +chestnuts +chests +chevron +chevron's +chevrons +chew +chew's +chewed +chewer +chewer's +chewers +chewier +chewiest +chewing +chews +chewy +chi +chiaroscuro +chiaroscuro's +chic +chic's +chicaneries +chicanery +chicanery's +chicer +chicest +chichi +chichi's +chichis +chick +chick's +chickadee +chickadee's +chickadees +chicken +chicken's +chickened +chickening +chickenpox +chickenpox's +chickens +chickpea +chickpea's +chickpeas +chicks +chickweed +chickweed's +chicle +chicle's +chicories +chicory +chicory's +chid +chidden +chide +chided +chides +chiding +chief +chief's +chiefer +chiefest +chiefly +chiefs +chieftain +chieftain's +chieftains +chiffon +chiffon's +chigger +chigger's +chiggers +chignon +chignon's +chignons +chilblain +chilblain's +chilblains +child +child's +childbearing +childbearing's +childbirth +childbirth's +childbirths +childcare +childcare's +childhood +childhood's +childhoods +childish +childishly +childishness +childishness's +childless +childlessness +childlessness's +childlike +childproof +childproofed +childproofing +childproofs +children +children's +chile +chile's +chiles +chili +chili's +chilies +chilis +chill +chill's +chilled +chiller +chiller's +chillers +chillest +chilli +chilli's +chillier +chillies +chilliest +chilliness +chilliness's +chilling +chillings +chills +chilly +chimaera +chimaera's +chimaeras +chime +chime's +chimed +chimera +chimera's +chimeras +chimerical +chimes +chiming +chimney +chimney's +chimneys +chimp +chimp's +chimpanzee +chimpanzee's +chimpanzees +chimps +chin +chin's +china +china's +chinchilla +chinchilla's +chinchillas +chink +chink's +chinked +chinking +chinks +chinned +chinning +chino +chino's +chinos +chins +chinstrap +chinstrap's +chinstraps +chintz +chintz's +chintzier +chintziest +chintzy +chip +chip's +chipmunk +chipmunk's +chipmunks +chipped +chipper +chipper's +chippers +chipping +chips +chiropodist +chiropodist's +chiropodists +chiropody +chiropody's +chiropractic +chiropractic's +chiropractics +chiropractor +chiropractor's +chiropractors +chirp +chirp's +chirped +chirping +chirps +chirrup +chirrup's +chirruped +chirruping +chirrupped +chirrupping +chirrups +chisel +chisel's +chiseled +chiseler +chiseler's +chiselers +chiseling +chiselled +chiseller +chiseller's +chisellers +chiselling +chisels +chit +chit's +chitchat +chitchat's +chitchats +chitchatted +chitchatting +chitin +chitin's +chitlings +chitlings's +chitlins +chitlins's +chits +chitterlings +chitterlings's +chivalrous +chivalrously +chivalry +chivalry's +chive +chive's +chives +chloride +chloride's +chlorides +chlorinate +chlorinated +chlorinates +chlorinating +chlorination +chlorination's +chlorine +chlorine's +chlorofluorocarbon +chlorofluorocarbon's +chlorofluorocarbons +chloroform +chloroform's +chloroformed +chloroforming +chloroforms +chlorophyll +chlorophyll's +chock +chock's +chocked +chocking +chocks +chocolate +chocolate's +chocolates +choice +choice's +choicer +choices +choicest +choir +choir's +choirs +choke +choke's +choked +choker +choker's +chokers +chokes +choking +choler +choler's +cholera +cholera's +choleric +cholesterol +cholesterol's +chomp +chomp's +chomped +chomping +chomps +choose +chooses +choosey +choosier +choosiest +choosing +choosy +chop +chop's +chopped +chopper +chopper's +choppered +choppering +choppers +choppier +choppiest +choppily +choppiness +choppiness's +chopping +choppy +chops +chopstick +chopstick's +chopsticks +choral +choral's +chorale +chorale's +chorales +chorals +chord +chord's +chords +chore +chore's +choreograph +choreographed +choreographer +choreographer's +choreographers +choreographic +choreographing +choreographs +choreography +choreography's +chores +chorister +chorister's +choristers +chortle +chortle's +chortled +chortles +chortling +chorus +chorus's +chorused +choruses +chorusing +chorussed +chorussing +chose +chosen +chow +chow's +chowder +chowder's +chowders +chowed +chowing +chows +christen +christened +christening +christening's +christenings +christens +chromatic +chrome +chrome's +chromed +chromes +chroming +chromium +chromium's +chromosome +chromosome's +chromosomes +chronic +chronically +chronicle +chronicle's +chronicled +chronicler +chronicler's +chroniclers +chronicles +chronicling +chronological +chronologically +chronologies +chronology +chronology's +chronometer +chronometer's +chronometers +chrysalides +chrysalis +chrysalis's +chrysalises +chrysanthemum +chrysanthemum's +chrysanthemums +chubbier +chubbiest +chubbiness +chubbiness's +chubby +chuck +chuck's +chucked +chuckhole +chuckhole's +chuckholes +chucking +chuckle +chuckle's +chuckled +chuckles +chuckling +chucks +chug +chug's +chugged +chugging +chugs +chum +chum's +chummed +chummier +chummiest +chumminess +chumminess's +chumming +chummy +chump +chump's +chumps +chums +chunk +chunk's +chunkier +chunkiest +chunkiness +chunkiness's +chunks +chunky +church +church's +churches +churchgoer +churchgoer's +churchgoers +churchman +churchman's +churchmen +churchyard +churchyard's +churchyards +churl +churl's +churlish +churlishly +churlishness +churlishness's +churls +churn +churn's +churned +churning +churns +chute +chute's +chutes +chutney +chutney's +chutzpa +chutzpa's +chutzpah +chutzpah's +château +château's +châteaux +châtelaine +châtelaine's +châtelaines +ciabatta +ciabatta's +ciabattas +cicada +cicada's +cicadae +cicadas +cicatrice +cicatrice's +cicatrices +cicatrix +cicatrix's +cider +cider's +ciders +cigar +cigar's +cigaret +cigaret's +cigarets +cigarette +cigarette's +cigarettes +cigarillo +cigarillo's +cigarillos +cigars +cilantro +cilantro's +cilia +cilium +cilium's +cinch +cinch's +cinched +cinches +cinching +cinchona +cinchona's +cinchonas +cincture +cincture's +cinctures +cinder +cinder's +cindered +cindering +cinders +cinema +cinema's +cinemas +cinematic +cinematographer +cinematographer's +cinematographers +cinematography +cinematography's +cinnabar +cinnabar's +cinnamon +cinnamon's +cipher +cipher's +ciphered +ciphering +ciphers +circa +circadian +circle +circle's +circled +circles +circlet +circlet's +circlets +circling +circuit +circuit's +circuited +circuiting +circuitous +circuitously +circuitry +circuitry's +circuits +circular +circular's +circularity +circularity's +circularize +circularized +circularizes +circularizing +circulars +circulate +circulated +circulates +circulating +circulation +circulation's +circulations +circulatory +circumcise +circumcised +circumcises +circumcising +circumcision +circumcision's +circumcisions +circumference +circumference's +circumferences +circumflex +circumflex's +circumflexes +circumlocution +circumlocution's +circumlocutions +circumnavigate +circumnavigated +circumnavigates +circumnavigating +circumnavigation +circumnavigation's +circumnavigations +circumscribe +circumscribed +circumscribes +circumscribing +circumscription +circumscription's +circumscriptions +circumspect +circumspection +circumspection's +circumstance +circumstance's +circumstanced +circumstances +circumstancing +circumstantial +circumstantially +circumvent +circumvented +circumventing +circumvention +circumvention's +circumvents +circus +circus's +circuses +cirrhosis +cirrhosis's +cirrus +cirrus's +cistern +cistern's +cisterns +citadel +citadel's +citadels +citation +citation's +citations +cite +cite's +cited +cites +cities +citing +citizen +citizen's +citizenry +citizenry's +citizens +citizenship +citizenship's +citric +citron +citron's +citronella +citronella's +citrons +citrous +citrus +citrus's +citruses +city +city's +civet +civet's +civets +civic +civics +civics's +civies +civil +civilian +civilian's +civilians +civilities +civility +civility's +civilization +civilization's +civilizations +civilize +civilized +civilizes +civilizing +civilly +civvies +civvies's +clack +clack's +clacked +clacking +clacks +clad +claim +claim's +claimant +claimant's +claimants +claimed +claiming +claims +clairvoyance +clairvoyance's +clairvoyant +clairvoyant's +clairvoyants +clam +clam's +clambake +clambake's +clambakes +clamber +clamber's +clambered +clambering +clambers +clammed +clammier +clammiest +clamminess +clamminess's +clamming +clammy +clamor +clamor's +clamored +clamoring +clamorous +clamors +clamp +clamp's +clampdown +clampdown's +clampdowns +clamped +clamping +clamps +clams +clan +clan's +clandestine +clandestinely +clang +clang's +clanged +clanging +clangor +clangor's +clangs +clank +clank's +clanked +clanking +clanks +clannish +clans +clap +clap's +clapboard +clapboard's +clapboarded +clapboarding +clapboards +clapped +clapper +clapper's +clappers +clapping +claps +claptrap +claptrap's +claret +claret's +clarets +clarification +clarification's +clarifications +clarified +clarifies +clarify +clarifying +clarinet +clarinet's +clarinetist +clarinetist's +clarinetists +clarinets +clarinettist +clarinettist's +clarinettists +clarion +clarion's +clarioned +clarioning +clarions +clarity +clarity's +clash +clash's +clashed +clashes +clashing +clasp +clasp's +clasped +clasping +clasps +class +class's +classed +classes +classic +classic's +classical +classical's +classically +classicism +classicism's +classicist +classicist's +classicists +classics +classier +classiest +classifiable +classification +classification's +classifications +classified +classified's +classifieds +classifies +classify +classifying +classiness +classiness's +classing +classless +classmate +classmate's +classmates +classroom +classroom's +classrooms +classy +clatter +clatter's +clattered +clattering +clatters +clause +clause's +clauses +claustrophobia +claustrophobia's +claustrophobic +clavichord +clavichord's +clavichords +clavicle +clavicle's +clavicles +claw +claw's +clawed +clawing +claws +clay +clay's +clayey +clayier +clayiest +clean +cleaned +cleaner +cleaner's +cleaners +cleanest +cleaning +cleaning's +cleanings +cleanlier +cleanliest +cleanliness +cleanliness's +cleanly +cleanness +cleanness's +cleans +cleanse +cleansed +cleanser +cleanser's +cleansers +cleanses +cleansing +cleanup +cleanup's +cleanups +clear +clear's +clearance +clearance's +clearances +cleared +clearer +clearest +clearing +clearing's +clearinghouse +clearinghouse's +clearinghouses +clearings +clearly +clearness +clearness's +clears +cleat +cleat's +cleats +cleavage +cleavage's +cleavages +cleave +cleaved +cleaver +cleaver's +cleavers +cleaves +cleaving +clef +clef's +clefs +cleft +cleft's +clefts +clematis +clematis's +clematises +clemency +clemency's +clement +clench +clench's +clenched +clenches +clenching +clerestories +clerestory +clerestory's +clergies +clergy +clergy's +clergyman +clergyman's +clergymen +clergywoman +clergywoman's +clergywomen +cleric +cleric's +clerical +clerics +clerk +clerk's +clerked +clerking +clerks +clever +cleverer +cleverest +cleverly +cleverness +cleverness's +clew +clew's +clewed +clewing +clews +cliché +cliché's +clichéd +clichés +click +click's +clickable +clicked +clicking +clicks +client +client's +clients +clientèle +clientèle's +clientèles +cliff +cliff's +cliffhanger +cliffhanger's +cliffhangers +cliffs +climactic +climate +climate's +climates +climatic +climax +climax's +climaxed +climaxes +climaxing +climb +climb's +climbed +climber +climber's +climbers +climbing +climbs +clime +clime's +climes +clinch +clinch's +clinched +clincher +clincher's +clinchers +clinches +clinching +cling +cling's +clingier +clingiest +clinging +clings +clingy +clinic +clinic's +clinical +clinically +clinician +clinician's +clinicians +clinics +clink +clink's +clinked +clinker +clinker's +clinkers +clinking +clinks +clip +clip's +clipboard +clipboard's +clipboards +clipped +clipper +clipper's +clippers +clipping +clipping's +clippings +clips +clipt +clique +clique's +cliques +cliquish +clit +clit's +clitoral +clitoris +clitoris's +clitorises +clits +cloak +cloak's +cloaked +cloaking +cloakroom +cloakroom's +cloakrooms +cloaks +clobber +clobber's +clobbered +clobbering +clobbers +cloche +cloche's +cloches +clock +clock's +clocked +clocking +clocks +clockwise +clockwork +clockwork's +clockworks +clod +clod's +clodhopper +clodhopper's +clodhoppers +clods +clog +clog's +clogged +clogging +clogs +cloister +cloister's +cloistered +cloistering +cloisters +clomp +clomped +clomping +clomps +clone +clone's +cloned +clones +cloning +clop +clop's +clopped +clopping +clops +close +close's +closed +closefisted +closely +closemouthed +closeness +closeness's +closeout +closeout's +closeouts +closer +closes +closest +closet +closet's +closeted +closeting +closets +closing +closure +closure's +closures +clot +clot's +cloth +cloth's +clothe +clothed +clothes +clothesline +clothesline's +clotheslines +clothespin +clothespin's +clothespins +clothier +clothier's +clothiers +clothing +clothing's +cloths +clots +clotted +clotting +cloture +cloture's +clotures +cloud +cloud's +cloudburst +cloudburst's +cloudbursts +clouded +cloudier +cloudiest +cloudiness +cloudiness's +clouding +cloudless +clouds +cloudy +clout +clout's +clouted +clouting +clouts +clove +clove's +cloven +clover +clover's +cloverleaf +cloverleaf's +cloverleafs +cloverleaves +clovers +cloves +clown +clown's +clowned +clowning +clownish +clownishly +clownishness +clownishness's +clowns +cloy +cloyed +cloying +cloys +club +club's +clubbed +clubbing +clubfeet +clubfoot +clubfoot's +clubhouse +clubhouse's +clubhouses +clubs +cluck +cluck's +clucked +clucking +clucks +clue +clue's +clued +clueing +clueless +clues +cluing +clump +clump's +clumped +clumping +clumps +clumsier +clumsiest +clumsily +clumsiness +clumsiness's +clumsy +clung +clunk +clunk's +clunked +clunker +clunker's +clunkers +clunkier +clunkiest +clunking +clunks +clunky +cluster +cluster's +clustered +clustering +clusters +clutch +clutch's +clutched +clutches +clutching +clutter +clutter's +cluttered +cluttering +clutters +coach +coach's +coached +coaches +coaching +coachman +coachman's +coachmen +coagulant +coagulant's +coagulants +coagulate +coagulated +coagulates +coagulating +coagulation +coagulation's +coal +coal's +coaled +coalesce +coalesced +coalescence +coalescence's +coalesces +coalescing +coaling +coalition +coalition's +coalitions +coals +coarse +coarsely +coarsen +coarsened +coarseness +coarseness's +coarsening +coarsens +coarser +coarsest +coast +coast's +coastal +coasted +coaster +coaster's +coasters +coasting +coastline +coastline's +coastlines +coasts +coat +coat's +coated +coating +coating's +coatings +coats +coauthor +coauthor's +coauthored +coauthoring +coauthors +coax +coaxed +coaxes +coaxing +cob +cob's +cobalt +cobalt's +cobble +cobble's +cobbled +cobbler +cobbler's +cobblers +cobbles +cobblestone +cobblestone's +cobblestones +cobbling +cobra +cobra's +cobras +cobs +cobweb +cobweb's +cobwebs +cocaine +cocaine's +cocci +coccis +coccus +coccus's +coccyges +coccyx +coccyx's +coccyxes +cochlea +cochlea's +cochleae +cochleas +cock +cock's +cockade +cockade's +cockades +cockamamie +cockatoo +cockatoo's +cockatoos +cocked +cockerel +cockerel's +cockerels +cockeyed +cockfight +cockfight's +cockfights +cockier +cockiest +cockily +cockiness +cockiness's +cocking +cockle +cockle's +cockles +cockleshell +cockleshell's +cockleshells +cockney +cockney's +cockneys +cockpit +cockpit's +cockpits +cockroach +cockroach's +cockroaches +cocks +cockscomb +cockscomb's +cockscombs +cocksucker +cocksucker's +cocksuckers +cocksure +cocktail +cocktail's +cocktails +cocky +cocoa +cocoa's +cocoanut +cocoanut's +cocoanuts +cocoas +coconut +coconut's +coconuts +cocoon +cocoon's +cocooned +cocooning +cocoons +cod +cod's +coda +coda's +codas +codded +codding +coddle +coddled +coddles +coddling +code +code's +coded +codeine +codeine's +codependency +codependency's +codependent +codependent's +codependents +codes +codex +codex's +codfish +codfish's +codfishes +codger +codger's +codgers +codices +codicil +codicil's +codicils +codification +codification's +codifications +codified +codifies +codify +codifying +coding +cods +coed +coed's +coeds +coeducation +coeducation's +coeducational +coefficient +coefficient's +coefficients +coequal +coequal's +coequals +coerce +coerced +coerces +coercing +coercion +coercion's +coercive +coeval +coeval's +coevals +coexist +coexisted +coexistence +coexistence's +coexisting +coexists +coffee +coffee's +coffeecake +coffeecake's +coffeecakes +coffeehouse +coffeehouse's +coffeehouses +coffeepot +coffeepot's +coffeepots +coffees +coffer +coffer's +coffers +coffin +coffin's +coffined +coffining +coffins +cog +cog's +cogency +cogency's +cogent +cogently +cogitate +cogitated +cogitates +cogitating +cogitation +cogitation's +cognac +cognac's +cognacs +cognate +cognate's +cognates +cognition +cognition's +cognitive +cognizance +cognizance's +cognizant +cognomen +cognomen's +cognomens +cognomina +cogs +cogwheel +cogwheel's +cogwheels +cohabit +cohabitation +cohabitation's +cohabited +cohabiting +cohabits +cohere +cohered +coherence +coherence's +coherent +coherently +coheres +cohering +cohesion +cohesion's +cohesive +cohesively +cohesiveness +cohesiveness's +cohort +cohort's +cohorts +coif +coif's +coifed +coiffed +coiffing +coiffure +coiffure's +coiffured +coiffures +coiffuring +coifing +coifs +coil +coil's +coiled +coiling +coils +coin +coin's +coinage +coinage's +coinages +coincide +coincided +coincidence +coincidence's +coincidences +coincident +coincidental +coincidentally +coincides +coinciding +coined +coining +coins +coital +coitus +coitus's +coke +coke's +coked +cokes +coking +cola +cola's +colander +colander's +colanders +colas +cold +cold's +colder +coldest +coldly +coldness +coldness's +colds +coleslaw +coleslaw's +colic +colic's +colicky +coliseum +coliseum's +coliseums +colitis +colitis's +collaborate +collaborated +collaborates +collaborating +collaboration +collaboration's +collaborations +collaborative +collaborator +collaborator's +collaborators +collage +collage's +collages +collapse +collapse's +collapsed +collapses +collapsible +collapsing +collar +collar's +collarbone +collarbone's +collarbones +collared +collaring +collars +collate +collated +collateral +collateral's +collates +collating +collation +collation's +collations +colleague +colleague's +colleagues +collect +collect's +collectable +collectable's +collectables +collected +collectible +collectible's +collectibles +collecting +collection +collection's +collections +collective +collective's +collectively +collectives +collectivism +collectivism's +collectivist +collectivist's +collectivists +collectivize +collectivized +collectivizes +collectivizing +collector +collector's +collectors +collects +colleen +colleen's +colleens +college +college's +colleges +collegian +collegian's +collegians +collegiate +collide +collided +collides +colliding +collie +collie's +collier +collier's +collieries +colliers +colliery +colliery's +collies +collision +collision's +collisions +collocate +collocate's +collocated +collocates +collocating +collocation +collocation's +collocations +colloid +colloid's +colloids +colloquia +colloquial +colloquialism +colloquialism's +colloquialisms +colloquially +colloquies +colloquium +colloquium's +colloquiums +colloquy +colloquy's +collude +colluded +colludes +colluding +collusion +collusion's +collusive +cologne +cologne's +colognes +colon +colon's +colonel +colonel's +colonels +colones +colonial +colonial's +colonialism +colonialism's +colonialist +colonialist's +colonialists +colonials +colonies +colonist +colonist's +colonists +colonization +colonization's +colonize +colonized +colonizer +colonizer's +colonizers +colonizes +colonizing +colonnade +colonnade's +colonnades +colonoscopies +colonoscopy +colonoscopy's +colons +colony +colony's +color +color's +coloration +coloration's +coloratura +coloratura's +coloraturas +colorblind +colored +colored's +coloreds +colorfast +colorful +colorfully +coloring +coloring's +colorless +colors +colossal +colossally +colossi +colossus +colossus's +colossuses +cols +colt +colt's +coltish +colts +columbine +columbine's +columbines +column +column's +columned +columnist +columnist's +columnists +columns +coma +coma's +comas +comatose +comb +comb's +combat +combat's +combatant +combatant's +combatants +combated +combating +combative +combats +combatted +combatting +combed +combination +combination's +combinations +combine +combine's +combined +combines +combing +combining +combo +combo's +combos +combs +combustibility +combustibility's +combustible +combustible's +combustibles +combustion +combustion's +come +come's +comeback +comeback's +comebacks +comedian +comedian's +comedians +comedic +comedienne +comedienne's +comediennes +comedies +comedown +comedown's +comedowns +comedy +comedy's +comelier +comeliest +comeliness +comeliness's +comely +comer +comer's +comers +comes +comestible +comestible's +comestibles +comet +comet's +comets +comeuppance +comeuppance's +comeuppances +comfier +comfiest +comfort +comfort's +comfortable +comfortably +comforted +comforter +comforter's +comforters +comforting +comfortingly +comforts +comfy +comic +comic's +comical +comically +comics +coming +coming's +comings +comity +comity's +comma +comma's +command +command's +commandant +commandant's +commandants +commanded +commandeer +commandeered +commandeering +commandeers +commander +commander's +commanders +commanding +commandment +commandment's +commandments +commando +commando's +commandoes +commandos +commands +commas +commemorate +commemorated +commemorates +commemorating +commemoration +commemoration's +commemorations +commemorative +commence +commenced +commencement +commencement's +commencements +commences +commencing +commend +commendable +commendably +commendation +commendation's +commendations +commended +commending +commends +commensurable +commensurate +comment +comment's +commentaries +commentary +commentary's +commentate +commentated +commentates +commentating +commentator +commentator's +commentators +commented +commenting +comments +commerce +commerce's +commercial +commercial's +commercialism +commercialism's +commercialization +commercialization's +commercialize +commercialized +commercializes +commercializing +commercially +commercials +commingle +commingled +commingles +commingling +commiserate +commiserated +commiserates +commiserating +commiseration +commiseration's +commiserations +commissar +commissar's +commissariat +commissariat's +commissariats +commissaries +commissars +commissary +commissary's +commission +commission's +commissioned +commissioner +commissioner's +commissioners +commissioning +commissions +commit +commitment +commitment's +commitments +commits +committal +committal's +committals +committed +committee +committee's +committees +committing +commode +commode's +commodes +commodious +commodities +commodity +commodity's +commodore +commodore's +commodores +common +common's +commoner +commoner's +commoners +commonest +commonly +commonplace +commonplace's +commonplaces +commons +commonwealth +commonwealth's +commonwealths +commotion +commotion's +commotions +communal +communally +commune +commune's +communed +communes +communicable +communicant +communicant's +communicants +communicate +communicated +communicates +communicating +communication +communication's +communications +communicative +communicator +communicator's +communicators +communing +communion +communion's +communions +communique +communique's +communiques +communism +communism's +communist +communist's +communistic +communists +communities +community +community's +commutation +commutation's +commutations +commutative +commute +commute's +commuted +commuter +commuter's +commuters +commutes +commuting +compact +compact's +compacted +compacter +compactest +compacting +compaction +compactly +compactness +compactness's +compactor +compactor's +compactors +compacts +companies +companion +companion's +companionable +companions +companionship +companionship's +companionway +companionway's +companionways +company +company's +comparability +comparability's +comparable +comparably +comparative +comparative's +comparatively +comparatives +compare +compare's +compared +compares +comparing +comparison +comparison's +comparisons +compartment +compartment's +compartmentalize +compartmentalized +compartmentalizes +compartmentalizing +compartments +compass +compass's +compassed +compasses +compassing +compassion +compassion's +compassionate +compassionately +compatibility +compatibility's +compatible +compatible's +compatibles +compatibly +compatriot +compatriot's +compatriots +compel +compelled +compelling +compellingly +compels +compendia +compendium +compendium's +compendiums +compensate +compensated +compensates +compensating +compensation +compensation's +compensations +compensatory +compete +competed +competence +competence's +competences +competencies +competency +competency's +competent +competently +competes +competing +competition +competition's +competitions +competitive +competitively +competitiveness +competitiveness's +competitor +competitor's +competitors +compilation +compilation's +compilations +compile +compiled +compiler +compiler's +compilers +compiles +compiling +complacence +complacence's +complacency +complacency's +complacent +complacently +complain +complainant +complainant's +complainants +complained +complainer +complainer's +complainers +complaining +complains +complaint +complaint's +complaints +complaisance +complaisance's +complaisant +complaisantly +complected +complement +complement's +complementary +complemented +complementing +complements +complete +completed +completely +completeness +completeness's +completer +completes +completest +completing +completion +completion's +complex +complex's +complexes +complexion +complexion's +complexioned +complexions +complexities +complexity +complexity's +compliance +compliance's +compliant +complicate +complicated +complicates +complicating +complication +complication's +complications +complicity +complicity's +complied +complies +compliment +compliment's +complimentary +complimented +complimenting +compliments +comply +complying +component +component's +components +comport +comported +comporting +comportment +comportment's +comports +compose +composed +composer +composer's +composers +composes +composing +composite +composite's +composites +composition +composition's +compositions +compositor +compositor's +compositors +compost +compost's +composted +composting +composts +composure +composure's +compote +compote's +compotes +compound +compound's +compounded +compounding +compounds +comprehend +comprehended +comprehending +comprehends +comprehensibility +comprehensibility's +comprehensible +comprehension +comprehension's +comprehensions +comprehensive +comprehensive's +comprehensively +comprehensiveness +comprehensiveness's +comprehensives +compress +compress's +compressed +compresses +compressing +compression +compression's +compressor +compressor's +compressors +comprise +comprised +comprises +comprising +compromise +compromise's +compromised +compromises +compromising +comptroller +comptroller's +comptrollers +compulsion +compulsion's +compulsions +compulsive +compulsively +compulsiveness +compulsiveness's +compulsories +compulsorily +compulsory +compulsory's +compunction +compunction's +compunctions +computation +computation's +computational +computationally +computations +compute +computed +computer +computer's +computerization +computerization's +computerize +computerized +computerizes +computerizing +computers +computes +computing +computing's +comrade +comrade's +comrades +comradeship +comradeship's +con +con's +concatenate +concatenated +concatenates +concatenating +concatenation +concatenation's +concatenations +concave +concavities +concavity +concavity's +conceal +concealed +concealing +concealment +concealment's +conceals +concede +conceded +concedes +conceding +conceit +conceit's +conceited +conceits +conceivable +conceivably +conceive +conceived +conceives +conceiving +concentrate +concentrate's +concentrated +concentrates +concentrating +concentration +concentration's +concentrations +concentric +concentrically +concept +concept's +conception +conception's +conceptions +concepts +conceptual +conceptualization +conceptualization's +conceptualizations +conceptualize +conceptualized +conceptualizes +conceptualizing +conceptually +concern +concern's +concerned +concerning +concerns +concert +concert's +concerted +concerti +concertina +concertina's +concertinaed +concertinaing +concertinas +concerting +concertmaster +concertmaster's +concertmasters +concerto +concerto's +concertos +concerts +concession +concession's +concessionaire +concessionaire's +concessionaires +concessions +conch +conch's +conches +conchs +concierge +concierge's +concierges +conciliate +conciliated +conciliates +conciliating +conciliation +conciliation's +conciliator +conciliator's +conciliators +conciliatory +concise +concisely +conciseness +conciseness's +conciser +concisest +conclave +conclave's +conclaves +conclude +concluded +concludes +concluding +conclusion +conclusion's +conclusions +conclusive +conclusively +concoct +concocted +concocting +concoction +concoction's +concoctions +concocts +concomitant +concomitant's +concomitants +concord +concord's +concordance +concordance's +concordances +concordant +concourse +concourse's +concourses +concrete +concrete's +concreted +concretely +concretes +concreting +concubine +concubine's +concubines +concur +concurred +concurrence +concurrence's +concurrences +concurrency +concurrent +concurrently +concurring +concurs +concussion +concussion's +concussions +condemn +condemnation +condemnation's +condemnations +condemnatory +condemned +condemning +condemns +condensation +condensation's +condensations +condense +condensed +condenser +condenser's +condensers +condenses +condensing +condescend +condescended +condescending +condescendingly +condescends +condescension +condescension's +condiment +condiment's +condiments +condition +condition's +conditional +conditional's +conditionally +conditionals +conditioned +conditioner +conditioner's +conditioners +conditioning +conditions +condo +condo's +condoes +condole +condoled +condolence +condolence's +condolences +condoles +condoling +condom +condom's +condominium +condominium's +condominiums +condoms +condone +condoned +condones +condoning +condor +condor's +condors +condos +conduce +conduced +conduces +conducing +conducive +conduct +conduct's +conducted +conducting +conduction +conduction's +conductive +conductivity +conductivity's +conductor +conductor's +conductors +conducts +conduit +conduit's +conduits +cone +cone's +cones +confab +confab's +confabbed +confabbing +confabs +confection +confection's +confectioner +confectioner's +confectioneries +confectioners +confectionery +confectionery's +confections +confederacies +confederacy +confederacy's +confederate +confederate's +confederated +confederates +confederating +confederation +confederation's +confederations +confer +conference +conference's +conferences +conferencing +conferment +conferment's +conferments +conferred +conferrer +conferring +confers +confess +confessed +confessedly +confesses +confessing +confession +confession's +confessional +confessional's +confessionals +confessions +confessor +confessor's +confessors +confetti +confetti's +confidant +confidant's +confidante +confidante's +confidantes +confidants +confide +confided +confidence +confidence's +confidences +confident +confidential +confidentiality +confidentiality's +confidentially +confidently +confides +confiding +configurable +configuration +configuration's +configurations +configure +configured +configures +configuring +confine +confine's +confined +confinement +confinement's +confinements +confines +confining +confirm +confirmation +confirmation's +confirmations +confirmatory +confirmed +confirming +confirms +confiscate +confiscated +confiscates +confiscating +confiscation +confiscation's +confiscations +conflagration +conflagration's +conflagrations +conflict +conflict's +conflicted +conflicting +conflicts +confluence +confluence's +confluences +confluent +conform +conformance +conformation +conformation's +conformations +conformed +conforming +conformist +conformist's +conformists +conformity +conformity's +conforms +confound +confounded +confounding +confounds +confront +confrontation +confrontation's +confrontational +confrontations +confronted +confronting +confronts +confrère +confrère's +confrères +confuse +confused +confusedly +confuses +confusing +confusingly +confusion +confusion's +confusions +confute +confuted +confutes +confuting +conga +conga's +congaed +congaing +congas +congeal +congealed +congealing +congeals +congenial +congeniality +congeniality's +congenially +congenital +congenitally +congest +congested +congesting +congestion +congestion's +congestive +congests +conglomerate +conglomerate's +conglomerated +conglomerates +conglomerating +conglomeration +conglomeration's +conglomerations +congratulate +congratulated +congratulates +congratulating +congratulation +congratulation's +congratulations +congratulatory +congregate +congregated +congregates +congregating +congregation +congregation's +congregational +congregations +congress +congress's +congresses +congressional +congressman +congressman's +congressmen +congresswoman +congresswoman's +congresswomen +congruence +congruence's +congruent +congruities +congruity +congruity's +congruous +conic +conic's +conical +conics +conifer +conifer's +coniferous +conifers +conjectural +conjecture +conjecture's +conjectured +conjectures +conjecturing +conjoin +conjoined +conjoining +conjoins +conjoint +conjugal +conjugate +conjugated +conjugates +conjugating +conjugation +conjugation's +conjugations +conjunction +conjunction's +conjunctions +conjunctive +conjunctive's +conjunctives +conjunctivitis +conjunctivitis's +conjuncture +conjuncture's +conjunctures +conjure +conjured +conjurer +conjurer's +conjurers +conjures +conjuring +conjuror +conjuror's +conjurors +conk +conk's +conked +conking +conks +connect +connected +connecter +connecter's +connecters +connecting +connection +connection's +connections +connective +connective's +connectives +connectivity +connector +connector's +connectors +connects +conned +conning +connivance +connivance's +connive +connived +conniver +conniver's +connivers +connives +conniving +connoisseur +connoisseur's +connoisseurs +connotation +connotation's +connotations +connotative +connote +connoted +connotes +connoting +connubial +conquer +conquered +conquering +conqueror +conqueror's +conquerors +conquers +conquest +conquest's +conquests +conquistador +conquistador's +conquistadores +conquistadors +cons +consanguinity +consanguinity's +conscience +conscience's +consciences +conscientious +conscientiously +conscientiousness +conscientiousness's +conscious +consciously +consciousness +consciousness's +consciousnesses +conscript +conscript's +conscripted +conscripting +conscription +conscription's +conscripts +consecrate +consecrated +consecrates +consecrating +consecration +consecration's +consecrations +consecutive +consecutively +consensual +consensus +consensus's +consensuses +consent +consent's +consented +consenting +consents +consequence +consequence's +consequences +consequent +consequential +consequently +conservation +conservation's +conservationist +conservationist's +conservationists +conservatism +conservatism's +conservative +conservative's +conservatively +conservatives +conservator +conservator's +conservatories +conservators +conservatory +conservatory's +conserve +conserve's +conserved +conserves +conserving +consider +considerable +considerably +considerate +considerately +consideration +consideration's +considerations +considered +considering +considers +consign +consigned +consigning +consignment +consignment's +consignments +consigns +consist +consisted +consistencies +consistency +consistency's +consistent +consistently +consisting +consists +consolation +consolation's +consolations +console +console's +consoled +consoles +consolidate +consolidated +consolidates +consolidating +consolidation +consolidation's +consolidations +consoling +consommé +consommé's +consonance +consonance's +consonances +consonant +consonant's +consonants +consort +consort's +consorted +consortia +consorting +consortium +consortium's +consortiums +consorts +conspicuous +conspicuously +conspiracies +conspiracy +conspiracy's +conspirator +conspirator's +conspiratorial +conspirators +conspire +conspired +conspires +conspiring +constable +constable's +constables +constabularies +constabulary +constabulary's +constancy +constancy's +constant +constant's +constantly +constants +constellation +constellation's +constellations +consternation +consternation's +constipate +constipated +constipates +constipating +constipation +constipation's +constituencies +constituency +constituency's +constituent +constituent's +constituents +constitute +constituted +constitutes +constituting +constitution +constitution's +constitutional +constitutional's +constitutionality +constitutionality's +constitutionally +constitutionals +constitutions +constrain +constrained +constraining +constrains +constraint +constraint's +constraints +constrict +constricted +constricting +constriction +constriction's +constrictions +constrictive +constrictor +constrictor's +constrictors +constricts +construct +construct's +constructed +constructing +construction +construction's +constructions +constructive +constructively +constructor +constructor's +constructors +constructs +construe +construed +construes +construing +consul +consul's +consular +consulate +consulate's +consulates +consuls +consult +consultancies +consultancy +consultancy's +consultant +consultant's +consultants +consultation +consultation's +consultations +consultative +consulted +consulting +consults +consumable +consumable's +consumables +consume +consumed +consumer +consumer's +consumerism +consumerism's +consumers +consumes +consuming +consummate +consummated +consummates +consummating +consummation +consummation's +consummations +consumption +consumption's +consumptive +consumptive's +consumptives +contact +contact's +contactable +contacted +contacting +contacts +contagion +contagion's +contagions +contagious +contain +contained +container +container's +containers +containing +containment +containment's +contains +contaminant +contaminant's +contaminants +contaminate +contaminated +contaminates +contaminating +contamination +contamination's +contemplate +contemplated +contemplates +contemplating +contemplation +contemplation's +contemplative +contemplative's +contemplatives +contemporaneous +contemporaneously +contemporaries +contemporary +contemporary's +contempt +contempt's +contemptible +contemptibly +contemptuous +contemptuously +contend +contended +contender +contender's +contenders +contending +contends +content +content's +contented +contentedly +contentedness +contentedness's +contenting +contention +contention's +contentions +contentious +contentiously +contentment +contentment's +contents +contest +contest's +contestant +contestant's +contestants +contested +contesting +contests +context +context's +contexts +contextual +contiguity +contiguity's +contiguous +continence +continence's +continent +continent's +continental +continental's +continentals +continents +contingencies +contingency +contingency's +contingent +contingent's +contingents +continua +continual +continually +continuance +continuance's +continuances +continuation +continuation's +continuations +continue +continued +continues +continuing +continuity +continuity's +continuous +continuously +continuum +continuum's +continuums +contort +contorted +contorting +contortion +contortion's +contortionist +contortionist's +contortionists +contortions +contorts +contour +contour's +contoured +contouring +contours +contraband +contraband's +contraception +contraception's +contraceptive +contraceptive's +contraceptives +contract +contract's +contracted +contractile +contracting +contraction +contraction's +contractions +contractor +contractor's +contractors +contracts +contractual +contractually +contradict +contradicted +contradicting +contradiction +contradiction's +contradictions +contradictory +contradicts +contradistinction +contradistinction's +contradistinctions +contrail +contrail's +contrails +contralto +contralto's +contraltos +contraption +contraption's +contraptions +contrapuntal +contraries +contrarily +contrariness +contrariness's +contrariwise +contrary +contrary's +contrast +contrast's +contrasted +contrasting +contrasts +contravene +contravened +contravenes +contravening +contravention +contravention's +contraventions +contretemps +contretemps's +contribute +contributed +contributes +contributing +contribution +contribution's +contributions +contributor +contributor's +contributors +contributory +contrite +contritely +contrition +contrition's +contrivance +contrivance's +contrivances +contrive +contrived +contrives +contriving +control +control's +controllable +controlled +controller +controller's +controllers +controlling +controls +controversial +controversially +controversies +controversy +controversy's +controvert +controverted +controverting +controverts +contumacious +contumelies +contumely +contumely's +contuse +contused +contuses +contusing +contusion +contusion's +contusions +conundrum +conundrum's +conundrums +conurbation +conurbation's +conurbations +convalesce +convalesced +convalescence +convalescence's +convalescences +convalescent +convalescent's +convalescents +convalesces +convalescing +convection +convection's +convene +convened +convenes +convenience +convenience's +conveniences +convenient +conveniently +convening +convent +convent's +convention +convention's +conventional +conventionality +conventionality's +conventionally +conventions +convents +converge +converged +convergence +convergence's +convergences +convergent +converges +converging +conversant +conversation +conversation's +conversational +conversationalist +conversationalist's +conversationalists +conversationally +conversations +converse +converse's +conversed +conversely +converses +conversing +conversion +conversion's +conversions +convert +convert's +converted +converter +converter's +converters +convertible +convertible's +convertibles +converting +convertor +convertor's +convertors +converts +convex +convexity +convexity's +convey +conveyance +conveyance's +conveyances +conveyed +conveyer +conveyer's +conveyers +conveying +conveyor +conveyor's +conveyors +conveys +convict +convict's +convicted +convicting +conviction +conviction's +convictions +convicts +convince +convinced +convinces +convincing +convincingly +convivial +conviviality +conviviality's +convocation +convocation's +convocations +convoke +convoked +convokes +convoking +convoluted +convolution +convolution's +convolutions +convoy +convoy's +convoyed +convoying +convoys +convulse +convulsed +convulses +convulsing +convulsion +convulsion's +convulsions +convulsive +convulsively +coo +coo's +cooed +cooing +cook +cook's +cookbook +cookbook's +cookbooks +cooked +cooker +cooker's +cookeries +cookers +cookery +cookery's +cookie +cookie's +cookies +cooking +cooking's +cookout +cookout's +cookouts +cooks +cooky +cooky's +cool +cool's +coolant +coolant's +coolants +cooled +cooler +cooler's +coolers +coolest +coolie +coolie's +coolies +cooling +coolly +coolness +coolness's +cools +coon +coon's +coons +coop +coop's +cooped +cooper +cooper's +cooperate +cooperated +cooperates +cooperating +cooperation +cooperation's +cooperative +cooperative's +cooperatively +cooperatives +coopered +coopering +coopers +cooping +coops +coordinate +coordinate's +coordinated +coordinates +coordinating +coordination +coordination's +coordinator +coordinator's +coordinators +coos +coot +coot's +cootie +cootie's +cooties +coots +cop +cop's +cope +cope's +copeck +copeck's +copecks +coped +copes +copied +copier +copier's +copiers +copies +copilot +copilot's +copilots +coping +coping's +copings +copious +copiously +copped +copper +copper's +copperhead +copperhead's +copperheads +coppers +coppery +coppice +coppice's +coppices +copping +copra +copra's +cops +copse +copse's +copses +copter +copter's +copters +copula +copula's +copulae +copulas +copulate +copulated +copulates +copulating +copulation +copulation's +copy +copy's +copycat +copycat's +copycats +copycatted +copycatting +copying +copyright +copyright's +copyrighted +copyrighting +copyrights +copywriter +copywriter's +copywriters +coquette +coquette's +coquetted +coquettes +coquetting +coquettish +coral +coral's +corals +cord +cord's +corded +cordial +cordial's +cordiality +cordiality's +cordially +cordials +cording +cordite +cordite's +cordless +cordon +cordon's +cordoned +cordoning +cordons +cords +corduroy +corduroy's +corduroys +corduroys's +core +core's +cored +cores +corespondent +corespondent's +corespondents +coriander +coriander's +coring +cork +cork's +corked +corking +corks +corkscrew +corkscrew's +corkscrewed +corkscrewing +corkscrews +corm +corm's +cormorant +cormorant's +cormorants +corms +corn +corn's +cornball +cornball's +cornballs +cornbread +cornbread's +corncob +corncob's +corncobs +cornea +cornea's +corneal +corneas +corned +corner +corner's +cornered +cornering +corners +cornerstone +cornerstone's +cornerstones +cornet +cornet's +cornets +cornflakes +cornflakes's +cornflower +cornflower's +cornflowers +cornice +cornice's +cornices +cornier +corniest +corning +cornmeal +cornmeal's +cornrow +cornrow's +cornrowed +cornrowing +cornrows +corns +cornstalk +cornstalk's +cornstalks +cornstarch +cornstarch's +cornucopia +cornucopia's +cornucopias +corny +corolla +corolla's +corollaries +corollary +corollary's +corollas +corona +corona's +coronae +coronaries +coronary +coronary's +coronas +coronation +coronation's +coronations +coroner +coroner's +coroners +coronet +coronet's +coronets +corpora +corporal +corporal's +corporals +corporate +corporation +corporation's +corporations +corporeal +corps +corps's +corpse +corpse's +corpses +corpulence +corpulence's +corpulent +corpus +corpus's +corpuscle +corpuscle's +corpuscles +corpuses +corral +corral's +corralled +corralling +corrals +correct +correctable +corrected +correcter +correctest +correcting +correction +correction's +correctional +corrections +corrective +corrective's +correctives +correctly +correctness +correctness's +corrector +corrects +correlate +correlate's +correlated +correlates +correlating +correlation +correlation's +correlations +correlative +correlative's +correlatives +correspond +corresponded +correspondence +correspondence's +correspondences +correspondent +correspondent's +correspondents +corresponding +correspondingly +corresponds +corridor +corridor's +corridors +corroborate +corroborated +corroborates +corroborating +corroboration +corroboration's +corroborations +corroborative +corrode +corroded +corrodes +corroding +corrosion +corrosion's +corrosive +corrosive's +corrosives +corrugate +corrugated +corrugates +corrugating +corrugation +corrugation's +corrugations +corrupt +corrupted +corrupter +corruptest +corruptible +corrupting +corruption +corruption's +corruptions +corruptly +corruptness +corruptness's +corrupts +corsage +corsage's +corsages +corsair +corsair's +corsairs +corset +corset's +corseted +corseting +corsets +cortex +cortex's +cortexes +cortical +cortices +cortisone +cortisone's +cortège +cortège's +cortèges +coruscate +coruscated +coruscates +coruscating +cosier +cosies +cosiest +cosign +cosignatories +cosignatory +cosignatory's +cosigned +cosigner +cosigner's +cosigners +cosigning +cosigns +cosine +cosmetic +cosmetic's +cosmetically +cosmetics +cosmetologist +cosmetologist's +cosmetologists +cosmetology +cosmetology's +cosmic +cosmically +cosmogonies +cosmogony +cosmogony's +cosmological +cosmologies +cosmologist +cosmologist's +cosmologists +cosmology +cosmology's +cosmonaut +cosmonaut's +cosmonauts +cosmopolitan +cosmopolitan's +cosmopolitans +cosmos +cosmos's +cosmoses +cosplay +cosponsor +cosponsor's +cosponsored +cosponsoring +cosponsors +cost +cost's +costar +costar's +costarred +costarring +costars +costed +costing +costings +costlier +costliest +costliness +costliness's +costly +costs +costume +costume's +costumed +costumes +costuming +cosy +cosy's +cot +cot's +cote +cote's +coterie +coterie's +coteries +cotes +cotillion +cotillion's +cotillions +cots +cottage +cottage's +cottages +cotter +cotter's +cotters +cotton +cotton's +cottoned +cottoning +cottonmouth +cottonmouth's +cottonmouths +cottons +cottonseed +cottonseed's +cottonseeds +cottontail +cottontail's +cottontails +cottonwood +cottonwood's +cottonwoods +cotyledon +cotyledon's +cotyledons +couch +couch's +couched +couches +couching +cougar +cougar's +cougars +cough +cough's +coughed +coughing +coughs +could +could've +couldn't +council +council's +councillor +councillor's +councillors +councilman +councilman's +councilmen +councilor +councilor's +councilors +councils +councilwoman +councilwoman's +councilwomen +counsel +counsel's +counseled +counseling +counselings +counselled +counsellor +counsellor's +counsellors +counselor +counselor's +counselors +counsels +count +count's +countable +countably +countdown +countdown's +countdowns +counted +countenance +countenance's +countenanced +countenances +countenancing +counter +counter's +counteract +counteracted +counteracting +counteraction +counteraction's +counteractions +counteracts +counterattack +counterattack's +counterattacked +counterattacking +counterattacks +counterbalance +counterbalance's +counterbalanced +counterbalances +counterbalancing +counterclaim +counterclaim's +counterclaimed +counterclaiming +counterclaims +counterclockwise +counterculture +counterculture's +countered +counterespionage +counterespionage's +counterexample +counterexamples +counterfeit +counterfeit's +counterfeited +counterfeiter +counterfeiter's +counterfeiters +counterfeiting +counterfeits +countering +counterintelligence +counterintelligence's +countermand +countermand's +countermanded +countermanding +countermands +counteroffer +counteroffer's +counteroffers +counterpane +counterpane's +counterpanes +counterpart +counterpart's +counterparts +counterpoint +counterpoint's +counterpoints +counterproductive +counterrevolution +counterrevolution's +counterrevolutionaries +counterrevolutionary +counterrevolutionary's +counterrevolutions +counters +countersank +countersign +countersign's +countersigned +countersigning +countersigns +countersink +countersink's +countersinking +countersinks +countersunk +countertenor +countertenor's +countertenors +counterweight +counterweight's +counterweights +countess +countess's +countesses +counties +counting +countless +countries +countrified +country +country's +countryman +countryman's +countrymen +countryside +countryside's +countrysides +countrywoman +countrywoman's +countrywomen +counts +county +county's +coup +coup's +coupe +coupe's +coupes +couple +couple's +coupled +couples +couplet +couplet's +couplets +coupling +coupling's +couplings +coupon +coupon's +coupons +coups +courage +courage's +courageous +courageously +courier +courier's +couriers +course +course's +coursed +courser +courses +coursing +court +court's +courted +courteous +courteously +courteousness +courteousness's +courtesan +courtesan's +courtesans +courtesies +courtesy +courtesy's +courthouse +courthouse's +courthouses +courtier +courtier's +courtiers +courting +courtlier +courtliest +courtliness +courtliness's +courtly +courtroom +courtroom's +courtrooms +courts +courtship +courtship's +courtships +courtyard +courtyard's +courtyards +cousin +cousin's +cousins +cove +cove's +coven +coven's +covenant +covenant's +covenanted +covenanting +covenants +covens +cover +cover's +coverage +coverage's +coverall +coverall's +coveralls +covered +covering +covering's +coverings +coverlet +coverlet's +coverlets +covers +covert +covert's +covertly +coverts +coves +covet +coveted +coveting +covetous +covetously +covetousness +covetousness's +covets +covey +covey's +coveys +cow +cow's +coward +coward's +cowardice +cowardice's +cowardliness +cowardliness's +cowardly +cowards +cowbird +cowbird's +cowbirds +cowboy +cowboy's +cowboys +cowed +cower +cowered +cowering +cowers +cowgirl +cowgirl's +cowgirls +cowhand +cowhand's +cowhands +cowhide +cowhide's +cowhides +cowing +cowl +cowl's +cowlick +cowlick's +cowlicks +cowling +cowling's +cowlings +cowls +coworker +coworker's +coworkers +cowpoke +cowpoke's +cowpokes +cowpox +cowpox's +cowpuncher +cowpuncher's +cowpunchers +cows +cowslip +cowslip's +cowslips +cox +coxcomb +coxcomb's +coxcombs +coxswain +coxswain's +coxswains +coy +coyer +coyest +coyly +coyness +coyness's +coyote +coyote's +coyotes +cozen +cozened +cozening +cozens +cozier +cozies +coziest +cozily +coziness +coziness's +cozy +cozy's +crab +crab's +crabbed +crabbier +crabbiest +crabbily +crabbiness +crabbiness's +crabbing +crabby +crabs +crack +crack's +crackdown +crackdown's +crackdowns +cracked +cracker +cracker's +crackerjack +crackerjack's +crackerjacks +crackers +cracking +crackle +crackle's +crackled +crackles +crackling +crackly +crackpot +crackpot's +crackpots +cracks +crackup +crackup's +crackups +cradle +cradle's +cradled +cradles +cradling +craft +craft's +crafted +craftier +craftiest +craftily +craftiness +craftiness's +crafting +crafts +craftsman +craftsman's +craftsmanship +craftsmanship's +craftsmen +crafty +crag +crag's +craggier +craggiest +craggy +crags +cram +crammed +cramming +cramp +cramp's +cramped +cramping +cramps +crams +cranberries +cranberry +cranberry's +crane +crane's +craned +cranes +crania +cranial +craning +cranium +cranium's +craniums +crank +crank's +crankcase +crankcase's +crankcases +cranked +crankier +crankiest +crankiness +crankiness's +cranking +cranks +crankshaft +crankshaft's +crankshafts +cranky +crannies +cranny +cranny's +crap +crap's +crape +crape's +crapes +crapped +crappier +crappiest +crapping +crappy +craps +craps's +crash +crash's +crashed +crashes +crashing +crass +crasser +crassest +crassly +crassness +crassness's +crate +crate's +crated +crater +crater's +cratered +cratering +craters +crates +crating +cravat +cravat's +cravats +crave +craved +craven +craven's +cravenly +cravens +craves +craving +craving's +cravings +craw +craw's +crawfish +crawfish's +crawfishes +crawl +crawl's +crawled +crawling +crawls +crawlspace +crawlspace's +crawlspaces +craws +crayfish +crayfish's +crayfishes +crayon +crayon's +crayoned +crayoning +crayons +craze +craze's +crazed +crazes +crazier +crazies +craziest +crazily +craziness +craziness's +crazing +crazy +crazy's +creak +creak's +creaked +creakier +creakiest +creaking +creaks +creaky +cream +cream's +creamed +creamer +creamer's +creameries +creamers +creamery +creamery's +creamier +creamiest +creaminess +creaminess's +creaming +creams +creamy +crease +crease's +creased +creases +creasing +create +created +creates +creating +creation +creation's +creationism +creationism's +creations +creative +creative's +creatively +creativeness +creativeness's +creatives +creativity +creativity's +creator +creator's +creators +creature +creature's +creatures +credence +credence's +credential +credential's +credentials +credenza +credenza's +credenzas +credibility +credibility's +credible +credibly +credit +credit's +creditable +creditably +credited +crediting +creditor +creditor's +creditors +credits +credo +credo's +credos +credulity +credulity's +credulous +credulously +creed +creed's +creeds +creek +creek's +creeks +creel +creel's +creels +creep +creep's +creeper +creeper's +creepers +creepier +creepiest +creepily +creepiness +creepiness's +creeping +creeps +creepy +cremate +cremated +cremates +cremating +cremation +cremation's +cremations +crematoria +crematories +crematorium +crematorium's +crematoriums +crematory +crematory's +creole +creole's +creoles +creosote +creosote's +creosoted +creosotes +creosoting +crepe +crepe's +crepes +crept +crescendi +crescendo +crescendo's +crescendos +crescent +crescent's +crescents +cress +cress's +crest +crest's +crested +crestfallen +cresting +crests +cretin +cretin's +cretinous +cretins +crevasse +crevasse's +crevasses +crevice +crevice's +crevices +crew +crew's +crewed +crewing +crewman +crewman's +crewmen +crews +crib +crib's +cribbage +cribbage's +cribbed +cribbing +cribs +crick +crick's +cricked +cricket +cricket's +cricketer +cricketer's +cricketers +crickets +cricking +cricks +cried +crier +crier's +criers +cries +crime +crime's +crimes +criminal +criminal's +criminally +criminals +criminologist +criminologist's +criminologists +criminology +criminology's +crimp +crimp's +crimped +crimping +crimps +crimson +crimson's +crimsoned +crimsoning +crimsons +cringe +cringe's +cringed +cringes +cringing +crinkle +crinkle's +crinkled +crinkles +crinklier +crinkliest +crinkling +crinkly +crinoline +crinoline's +crinolines +cripple +cripple's +crippled +cripples +crippling +crises +crisis +crisis's +crisp +crisp's +crisped +crisper +crispest +crispier +crispiest +crisping +crisply +crispness +crispness's +crisps +crispy +crisscross +crisscross's +crisscrossed +crisscrosses +crisscrossing +criteria +criterion +criterion's +criterions +critic +critic's +critical +critically +criticism +criticism's +criticisms +criticize +criticized +criticizes +criticizing +critics +critique +critique's +critiqued +critiques +critiquing +critter +critter's +critters +croak +croak's +croaked +croaking +croaks +crochet +crochet's +crocheted +crocheting +crochets +croci +crock +crock's +crocked +crockery +crockery's +crocks +crocodile +crocodile's +crocodiles +crocus +crocus's +crocuses +crofts +croissant +croissant's +croissants +crone +crone's +crones +cronies +crony +crony's +crook +crook's +crooked +crookeder +crookedest +crookedly +crookedness +crookedness's +crooking +crooks +croon +croon's +crooned +crooner +crooner's +crooners +crooning +croons +crop +crop's +cropped +cropper +cropper's +croppers +cropping +crops +croquet +croquet's +croquette +croquette's +croquettes +crosier +crosier's +crosiers +cross +cross's +crossbar +crossbar's +crossbars +crossbeam +crossbeam's +crossbeams +crossbones +crossbones's +crossbow +crossbow's +crossbows +crossbred +crossbreed +crossbreed's +crossbreeding +crossbreeds +crosscheck +crosscheck's +crosschecked +crosschecking +crosschecks +crossed +crosser +crosses +crossest +crossfire +crossfire's +crossfires +crossing +crossing's +crossings +crossly +crossness +crossness's +crossover +crossover's +crossovers +crosspiece +crosspiece's +crosspieces +crossroad +crossroad's +crossroads +crossroads's +crosstown +crosswalk +crosswalk's +crosswalks +crossways +crosswise +crossword +crossword's +crosswords +crotch +crotch's +crotches +crotchet +crotchet's +crotchets +crotchety +crouch +crouch's +crouched +crouches +crouching +croup +croup's +croupier +croupier's +croupiers +croupiest +croupy +crow +crow's +crowbar +crowbar's +crowbars +crowd +crowd's +crowded +crowdfund +crowdfunded +crowdfunding +crowdfunds +crowding +crowds +crowed +crowing +crown +crown's +crowned +crowning +crowns +crows +crozier +crozier's +croziers +croûton +croûton's +croûtons +crucial +crucially +crucible +crucible's +crucibles +crucified +crucifies +crucifix +crucifix's +crucifixes +crucifixion +crucifixion's +crucifixions +cruciform +cruciform's +cruciforms +crucify +crucifying +crud +crud's +cruddier +cruddiest +cruddy +crude +crude's +crudely +crudeness +crudeness's +cruder +crudest +crudities +crudity +crudity's +crudités +crudités's +cruel +crueler +cruelest +crueller +cruellest +cruelly +cruelties +cruelty +cruelty's +cruet +cruet's +cruets +cruise +cruise's +cruised +cruiser +cruiser's +cruisers +cruises +cruising +cruller +cruller's +crullers +crumb +crumb's +crumbed +crumbier +crumbiest +crumbing +crumble +crumble's +crumbled +crumbles +crumblier +crumbliest +crumbling +crumbly +crumbs +crumby +crummier +crummiest +crummy +crumpet +crumpet's +crumpets +crumple +crumple's +crumpled +crumples +crumpling +crunch +crunch's +crunched +cruncher +crunches +crunchier +crunchiest +crunching +crunchy +crusade +crusade's +crusaded +crusader +crusader's +crusaders +crusades +crusading +crush +crush's +crushed +crushes +crushing +crust +crust's +crustacean +crustacean's +crustaceans +crusted +crustier +crustiest +crusting +crusts +crusty +crutch +crutch's +crutches +crux +crux's +cruxes +cry +cry's +crybabies +crybaby +crybaby's +crying +cryings +cryogenics +cryogenics's +crypt +crypt's +cryptic +cryptically +cryptogram +cryptogram's +cryptograms +cryptographer +cryptographer's +cryptographers +cryptography +cryptography's +crypts +crystal +crystal's +crystalize +crystalized +crystalizes +crystalizing +crystalline +crystallization +crystallization's +crystallize +crystallized +crystallizes +crystallizing +crystallographic +crystallography +crystals +crèche +crèche's +crèches +cs +cub +cub's +cubbyhole +cubbyhole's +cubbyholes +cube +cube's +cubed +cubes +cubic +cubical +cubicle +cubicle's +cubicles +cubing +cubism +cubism's +cubist +cubist's +cubists +cubit +cubit's +cubits +cubs +cuckold +cuckold's +cuckolded +cuckolding +cuckolds +cuckoo +cuckoo's +cuckoos +cucumber +cucumber's +cucumbers +cud +cud's +cuddle +cuddle's +cuddled +cuddles +cuddlier +cuddliest +cuddling +cuddly +cudgel +cudgel's +cudgeled +cudgeling +cudgelled +cudgelling +cudgels +cuds +cue +cue's +cued +cueing +cues +cuff +cuff's +cuffed +cuffing +cuffs +cuing +cuisine +cuisine's +cuisines +culinary +cull +cull's +culled +cullender +cullender's +cullenders +culling +culls +culminate +culminated +culminates +culminating +culmination +culmination's +culminations +culotte +culotte's +culottes +culpability +culpability's +culpable +culprit +culprit's +culprits +cult +cult's +cultivate +cultivated +cultivates +cultivating +cultivation +cultivation's +cultivator +cultivator's +cultivators +cults +cultural +culturally +culture +culture's +cultured +cultures +culturing +culvert +culvert's +culverts +cumbersome +cumin +cumin's +cummerbund +cummerbund's +cummerbunds +cumming +cumquat +cumquat's +cumquats +cums +cumulative +cumulatively +cumuli +cumulus +cumulus's +cuneiform +cuneiform's +cunnilingus +cunnilingus's +cunning +cunning's +cunninger +cunningest +cunningly +cunt +cunt's +cunts +cup +cup's +cupboard +cupboard's +cupboards +cupcake +cupcake's +cupcakes +cupful +cupful's +cupfuls +cupid +cupid's +cupidity +cupidity's +cupids +cupola +cupola's +cupolas +cupped +cupping +cups +cupsful +cur +cur's +curable +curacies +curacy +curacy's +curate +curate's +curates +curative +curative's +curatives +curator +curator's +curators +curb +curb's +curbed +curbing +curbs +curd +curd's +curdle +curdled +curdles +curdling +curds +cure +cure's +cured +curer +cures +curfew +curfew's +curfews +curie +curie's +curies +curing +curio +curio's +curios +curiosities +curiosity +curiosity's +curious +curiously +curl +curl's +curled +curler +curler's +curlers +curlew +curlew's +curlews +curlicue +curlicue's +curlicued +curlicues +curlicuing +curlier +curliest +curliness +curliness's +curling +curls +curly +curlycue +curlycue's +curlycues +curmudgeon +curmudgeon's +curmudgeons +currant +currant's +currants +currencies +currency +currency's +current +current's +currently +currents +curricula +curriculum +curriculum's +curriculums +curried +curries +curry +curry's +currycomb +currycomb's +currycombed +currycombing +currycombs +currying +curs +curse +curse's +cursed +curses +cursing +cursive +cursive's +cursor +cursor's +cursorily +cursors +cursory +curst +curt +curtail +curtailed +curtailing +curtailment +curtailment's +curtailments +curtails +curtain +curtain's +curtained +curtaining +curtains +curter +curtest +curtly +curtness +curtness's +curtsey +curtsey's +curtseyed +curtseying +curtseys +curtsied +curtsies +curtsy +curtsy's +curtsying +curvaceous +curvacious +curvature +curvature's +curvatures +curve +curve's +curved +curves +curvier +curviest +curving +curvy +cushier +cushiest +cushion +cushion's +cushioned +cushioning +cushions +cushy +cusp +cusp's +cuspid +cuspid's +cuspids +cusps +cuss +cuss's +cussed +cusses +cussing +custard +custard's +custards +custodial +custodian +custodian's +custodians +custody +custody's +custom +custom's +customarily +customary +customer +customer's +customers +customization +customize +customized +customizes +customizing +customs +cut +cut's +cutback +cutback's +cutbacks +cute +cutely +cuteness +cuteness's +cuter +cutesier +cutesiest +cutest +cutesy +cuticle +cuticle's +cuticles +cutlass +cutlass's +cutlasses +cutlery +cutlery's +cutlet +cutlet's +cutlets +cutoff +cutoff's +cutoffs +cutout +cutout's +cutouts +cuts +cutter +cutter's +cutters +cutthroat +cutthroat's +cutthroats +cutting +cutting's +cuttings +cuttlefish +cuttlefish's +cuttlefishes +cutup +cutup's +cutups +cyanide +cyanide's +cyberbullies +cyberbully +cyberbully's +cybernetic +cybernetics +cybernetics's +cyberpunk +cyberpunk's +cyberpunks +cybersex +cyberspace +cyberspace's +cyclamen +cyclamen's +cyclamens +cycle +cycle's +cycled +cycles +cyclic +cyclical +cyclically +cycling +cyclist +cyclist's +cyclists +cyclone +cyclone's +cyclones +cyclonic +cyclotron +cyclotron's +cyclotrons +cygnet +cygnet's +cygnets +cylinder +cylinder's +cylinders +cylindrical +cymbal +cymbal's +cymbals +cynic +cynic's +cynical +cynically +cynicism +cynicism's +cynics +cynosure +cynosure's +cynosures +cypher +cypher's +cypress +cypress's +cypresses +cyst +cyst's +cystic +cysts +cytology +cytology's +cytoplasm +cytoplasm's +czar +czar's +czarina +czarina's +czarinas +czars +d +d'Arezzo +d'Arezzo's +d'Estaing +d'Estaing's +dB +dab +dab's +dabbed +dabbing +dabble +dabbled +dabbler +dabbler's +dabblers +dabbles +dabbling +dabs +dacha +dacha's +dachas +dachshund +dachshund's +dachshunds +dactyl +dactyl's +dactylic +dactylic's +dactylics +dactyls +dad +dad's +daddies +daddy +daddy's +dado +dado's +dadoes +dados +dads +daemon +daemon's +daemons +daffier +daffiest +daffodil +daffodil's +daffodils +daffy +daft +dafter +daftest +dagger +dagger's +daggers +daguerreotype +daguerreotype's +daguerreotyped +daguerreotypes +daguerreotyping +dahlia +dahlia's +dahlias +dailies +daily +daily's +daintier +dainties +daintiest +daintily +daintiness +daintiness's +dainty +dainty's +daiquiri +daiquiri's +daiquiris +dairies +dairy +dairy's +dairying +dairying's +dairymaid +dairymaid's +dairymaids +dairyman +dairyman's +dairymen +dais +dais's +daises +daisies +daisy +daisy's +dale +dale's +dales +dalliance +dalliance's +dalliances +dallied +dallies +dally +dallying +dalmatian +dalmatian's +dalmatians +dam +dam's +damage +damage's +damaged +damages +damages's +damaging +damask +damask's +damasked +damasking +damasks +dame +dame's +dames +dammed +damming +damn +damn's +damnable +damnably +damnation +damnation's +damndest +damned +damnedest +damning +damns +damp +damp's +damped +dampen +dampened +dampening +dampens +damper +damper's +dampers +dampest +damping +damply +dampness +dampness's +damps +dams +damsel +damsel's +damsels +damson +damson's +damsons +dance +dance's +danced +dancer +dancer's +dancers +dances +dancing +dancing's +dandelion +dandelion's +dandelions +dander +dander's +dandier +dandies +dandiest +dandle +dandled +dandles +dandling +dandruff +dandruff's +dandy +dandy's +danger +danger's +dangerous +dangerously +dangers +dangle +dangled +dangles +dangling +dank +danker +dankest +dankly +dankness +dankness's +dapper +dapperer +dapperest +dapple +dapple's +dappled +dapples +dappling +dare +dare's +dared +daredevil +daredevil's +daredevils +dares +daring +daring's +daringly +dark +dark's +darken +darkened +darkening +darkens +darker +darkest +darkly +darkness +darkness's +darkroom +darkroom's +darkrooms +darling +darling's +darlings +darn +darn's +darned +darneder +darnedest +darning +darns +dart +dart's +dartboard +dartboard's +dartboards +darted +darting +darts +dash +dash's +dashboard +dashboard's +dashboards +dashed +dashes +dashiki +dashiki's +dashikis +dashing +dashingly +dastardly +data +database +database's +databases +datatype +date +date's +dated +dateline +dateline's +datelined +datelines +datelining +dates +dating +dative +dative's +datives +datum +datum's +daub +daub's +daubed +dauber +dauber's +daubers +daubing +daubs +daughter +daughter's +daughters +daunt +daunted +daunting +dauntless +dauntlessly +dauntlessness +dauntlessness's +daunts +dauphin +dauphin's +dauphins +davenport +davenport's +davenports +davit +davit's +davits +dawdle +dawdled +dawdler +dawdler's +dawdlers +dawdles +dawdling +dawn +dawn's +dawned +dawning +dawns +day +day's +daybed +daybed's +daybeds +daybreak +daybreak's +daydream +daydream's +daydreamed +daydreamer +daydreamer's +daydreamers +daydreaming +daydreams +daydreamt +daylight +daylight's +daylights +days +daytime +daytime's +daze +daze's +dazed +dazes +dazing +dazzle +dazzle's +dazzled +dazzles +dazzling +deacon +deacon's +deaconess +deaconess's +deaconesses +deacons +deactivate +deactivated +deactivates +deactivating +dead +dead's +deadbeat +deadbeat's +deadbeats +deadbolt +deadbolt's +deadbolts +deaden +deadened +deadening +deadens +deader +deadest +deadlier +deadliest +deadline +deadline's +deadlines +deadliness +deadliness's +deadlock +deadlock's +deadlocked +deadlocking +deadlocks +deadly +deadpan +deadpan's +deadpanned +deadpanning +deadpans +deadwood +deadwood's +deaf +deafen +deafened +deafening +deafens +deafer +deafest +deafness +deafness's +deal +deal's +dealer +dealer's +dealers +dealership +dealership's +dealerships +dealing +dealing's +dealings +deals +dealt +dean +dean's +deans +dear +dear's +dearer +dearest +dearly +dearness +dearness's +dears +dearth +dearth's +dearths +death +death's +deathbed +deathbed's +deathbeds +deathblow +deathblow's +deathblows +deathless +deathlike +deathly +deaths +deathtrap +deathtrap's +deathtraps +deaves +deb +deb's +debacle +debacle's +debacles +debar +debark +debarkation +debarkation's +debarked +debarking +debarks +debarment +debarment's +debarred +debarring +debars +debase +debased +debasement +debasement's +debasements +debases +debasing +debatable +debate +debate's +debated +debater +debater's +debaters +debates +debating +debauch +debauch's +debauched +debaucheries +debauchery +debauchery's +debauches +debauching +debenture +debenture's +debentures +debilitate +debilitated +debilitates +debilitating +debilitation +debilitation's +debilities +debility +debility's +debit +debit's +debited +debiting +debits +debonair +debonairly +debrief +debriefed +debriefing +debriefing's +debriefings +debriefs +debris +debris's +debs +debt +debt's +debtor +debtor's +debtors +debts +debug +debugged +debugger +debuggers +debugging +debugs +debunk +debunked +debunking +debunks +debut +debut's +debuted +debuting +debuts +decade +decade's +decadence +decadence's +decadent +decadent's +decadently +decadents +decades +decaf +decaf's +decaffeinate +decaffeinated +decaffeinates +decaffeinating +decal +decal's +decals +decamp +decamped +decamping +decamps +decant +decanted +decanter +decanter's +decanters +decanting +decants +decapitate +decapitated +decapitates +decapitating +decapitation +decapitation's +decapitations +decathlon +decathlon's +decathlons +decay +decay's +decayed +decaying +decays +decease +decease's +deceased +deceased's +deceases +deceasing +decedent +decedent's +decedents +deceit +deceit's +deceitful +deceitfully +deceitfulness +deceitfulness's +deceits +deceive +deceived +deceiver +deceiver's +deceivers +deceives +deceiving +decelerate +decelerated +decelerates +decelerating +deceleration +deceleration's +decencies +decency +decency's +decent +decently +decentralization +decentralization's +decentralize +decentralized +decentralizes +decentralizing +deception +deception's +deceptions +deceptive +deceptively +deceptiveness +deceptiveness's +decibel +decibel's +decibels +decide +decided +decidedly +decides +deciding +deciduous +decimal +decimal's +decimals +decimate +decimated +decimates +decimating +decimation +decimation's +decipher +decipherable +deciphered +deciphering +deciphers +decision +decision's +decisions +decisive +decisively +decisiveness +decisiveness's +deck +deck's +decked +deckhand +deckhand's +deckhands +decking +decks +declaim +declaimed +declaiming +declaims +declamation +declamation's +declamations +declamatory +declaration +declaration's +declarations +declarative +declare +declared +declares +declaring +declassified +declassifies +declassify +declassifying +declension +declension's +declensions +declination +declination's +decline +decline's +declined +declines +declining +declivities +declivity +declivity's +decode +decoded +decoder +decodes +decoding +decolonization +decolonization's +decolonize +decolonized +decolonizes +decolonizing +decommission +decommissioned +decommissioning +decommissions +decompose +decomposed +decomposes +decomposing +decomposition +decomposition's +decompress +decompressed +decompresses +decompressing +decompression +decompression's +decongestant +decongestant's +decongestants +deconstruction +deconstruction's +deconstructions +decontaminate +decontaminated +decontaminates +decontaminating +decontamination +decontamination's +decor +decor's +decorate +decorated +decorates +decorating +decoration +decoration's +decorations +decorative +decorator +decorator's +decorators +decorous +decorously +decors +decorum +decorum's +decoy +decoy's +decoyed +decoying +decoys +decrease +decrease's +decreased +decreases +decreasing +decree +decree's +decreed +decreeing +decrees +decremented +decrements +decrepit +decrepitude +decrepitude's +decrescendi +decrescendo +decrescendo's +decrescendos +decried +decries +decriminalization +decriminalization's +decriminalize +decriminalized +decriminalizes +decriminalizing +decry +decrying +decryption +dedicate +dedicated +dedicates +dedicating +dedication +dedication's +dedications +deduce +deduced +deduces +deducible +deducing +deduct +deducted +deductible +deductible's +deductibles +deducting +deduction +deduction's +deductions +deductive +deducts +deed +deed's +deeded +deeding +deeds +deejay +deejay's +deejays +deem +deemed +deeming +deems +deep +deep's +deepen +deepened +deepening +deepens +deeper +deepest +deeply +deepness +deepness's +deeps +deer +deer's +deers +deerskin +deerskin's +deescalate +deescalated +deescalates +deescalating +deface +defaced +defacement +defacement's +defaces +defacing +defamation +defamation's +defamatory +defame +defamed +defames +defaming +default +default's +defaulted +defaulter +defaulter's +defaulters +defaulting +defaults +defeat +defeat's +defeated +defeating +defeatism +defeatism's +defeatist +defeatist's +defeatists +defeats +defecate +defecated +defecates +defecating +defecation +defecation's +defect +defect's +defected +defecting +defection +defection's +defections +defective +defective's +defectives +defector +defector's +defectors +defects +defend +defendant +defendant's +defendants +defended +defender +defender's +defenders +defending +defends +defense +defense's +defensed +defenseless +defenses +defensible +defensing +defensive +defensive's +defensively +defensiveness +defensiveness's +defer +deference +deference's +deferential +deferentially +deferment +deferment's +deferments +deferred +deferring +defers +defiance +defiance's +defiant +defiantly +deficiencies +deficiency +deficiency's +deficient +deficit +deficit's +deficits +defied +defies +defile +defile's +defiled +defilement +defilement's +defiles +defiling +definable +define +defined +definer +definer's +definers +defines +defining +definite +definitely +definiteness +definiteness's +definition +definition's +definitions +definitive +definitively +deflate +deflated +deflates +deflating +deflation +deflation's +deflect +deflected +deflecting +deflection +deflection's +deflections +deflector +deflector's +deflectors +deflects +defogger +defogger's +defoggers +defoliant +defoliant's +defoliants +defoliate +defoliated +defoliates +defoliating +defoliation +defoliation's +deforest +deforestation +deforestation's +deforested +deforesting +deforests +deform +deformation +deformation's +deformations +deformed +deforming +deformities +deformity +deformity's +deforms +defraud +defrauded +defrauding +defrauds +defray +defrayal +defrayal's +defrayed +defraying +defrays +defrost +defrosted +defroster +defroster's +defrosters +defrosting +defrosts +deft +defter +deftest +deftly +deftness +deftness's +defunct +defuse +defused +defuses +defusing +defy +defying +degeneracy +degeneracy's +degenerate +degenerate's +degenerated +degenerates +degenerating +degeneration +degeneration's +degenerative +degradation +degradation's +degrade +degraded +degrades +degrading +degree +degree's +degrees +dehumanization +dehumanization's +dehumanize +dehumanized +dehumanizes +dehumanizing +dehumidified +dehumidifier +dehumidifier's +dehumidifiers +dehumidifies +dehumidify +dehumidifying +dehydrate +dehydrated +dehydrates +dehydrating +dehydration +dehydration's +deice +deiced +deicer +deicer's +deicers +deices +deicing +deification +deification's +deified +deifies +deify +deifying +deign +deigned +deigning +deigns +deism +deism's +deities +deity +deity's +deject +dejected +dejectedly +dejecting +dejection +dejection's +dejects +delay +delay's +delayed +delaying +delays +delectable +delectation +delectation's +delegate +delegate's +delegated +delegates +delegating +delegation +delegation's +delegations +delete +deleted +deleterious +deletes +deleting +deletion +deletion's +deletions +deleverage +deleveraged +deleverages +deleveraging +deli +deli's +deliberate +deliberated +deliberately +deliberates +deliberating +deliberation +deliberation's +deliberations +delicacies +delicacy +delicacy's +delicate +delicately +delicatessen +delicatessen's +delicatessens +delicious +deliciously +deliciousness +deliciousness's +delight +delight's +delighted +delightful +delightfully +delighting +delights +delimit +delimited +delimiter +delimiters +delimiting +delimits +delineate +delineated +delineates +delineating +delineation +delineation's +delineations +delinquencies +delinquency +delinquency's +delinquent +delinquent's +delinquently +delinquents +deliquescent +deliria +delirious +deliriously +delirium +delirium's +deliriums +delis +deliver +deliverance +deliverance's +delivered +deliverer +deliverer's +deliverers +deliveries +delivering +delivers +delivery +delivery's +dell +dell's +dells +delphinia +delphinium +delphinium's +delphiniums +delta +delta's +deltas +delude +deluded +deludes +deluding +deluge +deluge's +deluged +deluges +deluging +delusion +delusion's +delusions +delusive +deluxe +delve +delved +delves +delving +demagnetization +demagnetization's +demagnetize +demagnetized +demagnetizes +demagnetizing +demagog +demagog's +demagogic +demagogry +demagogs +demagogue +demagogue's +demagoguery +demagoguery's +demagogues +demagogy +demagogy's +demand +demand's +demanded +demanding +demands +demarcate +demarcated +demarcates +demarcating +demarcation +demarcation's +demean +demeaned +demeaning +demeanor +demeanor's +demeans +demented +dementedly +dementia +dementia's +demerit +demerit's +demerits +demesne +demesne's +demesnes +demigod +demigod's +demigods +demijohn +demijohn's +demijohns +demilitarization +demilitarization's +demilitarize +demilitarized +demilitarizes +demilitarizing +demise +demise's +demised +demises +demising +demitasse +demitasse's +demitasses +demo +demo's +demobilization +demobilization's +demobilize +demobilized +demobilizes +demobilizing +democracies +democracy +democracy's +democrat +democrat's +democratic +democratically +democratization +democratization's +democratize +democratized +democratizes +democratizing +democrats +demoed +demographer +demographer's +demographers +demographic +demographic's +demographically +demographics +demographics's +demography +demography's +demoing +demolish +demolished +demolishes +demolishing +demolition +demolition's +demolitions +demon +demon's +demoniac +demoniacal +demonic +demons +demonstrable +demonstrably +demonstrate +demonstrated +demonstrates +demonstrating +demonstration +demonstration's +demonstrations +demonstrative +demonstrative's +demonstratively +demonstratives +demonstrator +demonstrator's +demonstrators +demoralization +demoralization's +demoralize +demoralized +demoralizes +demoralizing +demos +demote +demoted +demotes +demoting +demotion +demotion's +demotions +demount +demur +demur's +demure +demurely +demurer +demurest +demurred +demurring +demurs +den +den's +denature +denatured +denatures +denaturing +dendrite +dendrite's +dendrites +deniability +denial +denial's +denials +denied +denier +denier's +deniers +denies +denigrate +denigrated +denigrates +denigrating +denigration +denigration's +denim +denim's +denims +denizen +denizen's +denizens +denominate +denominated +denominates +denominating +denomination +denomination's +denominational +denominations +denominator +denominator's +denominators +denotation +denotation's +denotations +denote +denoted +denotes +denoting +denouement +denouement's +denouements +denounce +denounced +denouncement +denouncement's +denouncements +denounces +denouncing +dens +dense +densely +denseness +denseness's +denser +densest +densities +density +density's +dent +dent's +dental +dented +dentifrice +dentifrice's +dentifrices +dentin +dentin's +dentine +dentine's +denting +dentist +dentist's +dentistry +dentistry's +dentists +dents +denture +denture's +dentures +denude +denuded +denudes +denuding +denunciation +denunciation's +denunciations +deny +denying +deodorant +deodorant's +deodorants +deodorize +deodorized +deodorizer +deodorizer's +deodorizers +deodorizes +deodorizing +depart +departed +departed's +departing +department +department's +departmental +departmentalize +departmentalized +departmentalizes +departmentalizing +departments +departs +departure +departure's +departures +depend +dependability +dependability's +dependable +dependably +dependance +dependance's +dependant +dependant's +dependants +depended +dependence +dependence's +dependencies +dependency +dependency's +dependent +dependent's +dependents +depending +depends +depict +depicted +depicting +depiction +depiction's +depictions +depicts +depilatories +depilatory +depilatory's +deplane +deplaned +deplanes +deplaning +deplete +depleted +depletes +depleting +depletion +depletion's +deplorable +deplorably +deplore +deplored +deplores +deploring +deploy +deployed +deploying +deployment +deployment's +deployments +deploys +depoliticize +depoliticized +depoliticizes +depoliticizing +depopulate +depopulated +depopulates +depopulating +depopulation +depopulation's +deport +deportation +deportation's +deportations +deported +deporting +deportment +deportment's +deports +depose +deposed +deposes +deposing +deposit +deposit's +deposited +depositing +deposition +deposition's +depositions +depositor +depositor's +depositories +depositors +depository +depository's +deposits +depot +depot's +depots +deprave +depraved +depraves +depraving +depravities +depravity +depravity's +deprecate +deprecated +deprecates +deprecating +deprecation +deprecation's +deprecatory +depreciate +depreciated +depreciates +depreciating +depreciation +depreciation's +depredation +depredation's +depredations +depress +depressant +depressant's +depressants +depressed +depresses +depressing +depressingly +depression +depression's +depressions +depressive +depressive's +depressives +deprivation +deprivation's +deprivations +deprive +deprived +deprives +depriving +deprogram +deprogramed +deprograming +deprogrammed +deprogramming +deprograms +depth +depth's +depths +deputation +deputation's +deputations +depute +deputed +deputes +deputies +deputing +deputize +deputized +deputizes +deputizing +deputy +deputy's +derail +derailed +derailing +derailment +derailment's +derailments +derails +derange +deranged +derangement +derangement's +deranges +deranging +derbies +derby +derby's +deregulate +deregulated +deregulates +deregulating +deregulation +deregulation's +derelict +derelict's +dereliction +dereliction's +derelicts +deride +derided +derides +deriding +derision +derision's +derisive +derisively +derisory +derivable +derivation +derivation's +derivations +derivative +derivative's +derivatives +derive +derived +derives +deriving +dermatitis +dermatitis's +dermatologist +dermatologist's +dermatologists +dermatology +dermatology's +dermis +dermis's +derogate +derogated +derogates +derogating +derogation +derogation's +derogatory +derrick +derrick's +derricks +derringer +derringer's +derringers +derrière +derrière's +derrières +dervish +dervish's +dervishes +desalinate +desalinated +desalinates +desalinating +desalination +desalination's +descant +descant's +descanted +descanting +descants +descend +descendant +descendant's +descendants +descended +descendent +descendent's +descendents +descender +descending +descends +descent +descent's +descents +describable +describe +described +describes +describing +descried +descries +description +description's +descriptions +descriptive +descriptively +descriptor +descriptors +descry +descrying +desecrate +desecrated +desecrates +desecrating +desecration +desecration's +desegregate +desegregated +desegregates +desegregating +desegregation +desegregation's +desensitization +desensitization's +desensitize +desensitized +desensitizes +desensitizing +desert +desert's +deserted +deserter +deserter's +deserters +deserting +desertion +desertion's +desertions +deserts +deserve +deserved +deservedly +deserves +deserving +desiccate +desiccated +desiccates +desiccating +desiccation +desiccation's +desiderata +desideratum +desideratum's +design +design's +designate +designated +designates +designating +designation +designation's +designations +designed +designer +designer's +designers +designing +designing's +designs +desirability +desirability's +desirable +desirably +desire +desire's +desired +desires +desiring +desirous +desist +desisted +desisting +desists +desk +desk's +desks +desktop +desktop's +desktops +desolate +desolated +desolately +desolateness +desolateness's +desolates +desolating +desolation +desolation's +despair +despair's +despaired +despairing +despairingly +despairs +despatch +despatch's +despatched +despatches +despatching +desperado +desperado's +desperadoes +desperados +desperate +desperately +desperation +desperation's +despicable +despicably +despise +despised +despises +despising +despite +despoil +despoiled +despoiling +despoils +despondency +despondency's +despondent +despondently +despot +despot's +despotic +despotism +despotism's +despots +dessert +dessert's +desserts +destabilize +destination +destination's +destinations +destine +destined +destines +destinies +destining +destiny +destiny's +destitute +destitution +destitution's +destroy +destroyed +destroyer +destroyer's +destroyers +destroying +destroys +destruct +destruct's +destructed +destructible +destructing +destruction +destruction's +destructive +destructively +destructiveness +destructiveness's +destructs +desultory +detach +detachable +detached +detaches +detaching +detachment +detachment's +detachments +detail +detail's +detailed +detailing +details +detain +detained +detainee +detainee's +detainees +detaining +detainment +detainment's +detains +detect +detectable +detected +detecting +detection +detection's +detective +detective's +detectives +detector +detector's +detectors +detects +detentes +detention +detention's +detentions +deter +detergent +detergent's +detergents +deteriorate +deteriorated +deteriorates +deteriorating +deterioration +deterioration's +determinable +determinant +determinant's +determinants +determinate +determination +determination's +determinations +determine +determined +determiner +determiner's +determiners +determines +determining +determinism +deterministic +deterred +deterrence +deterrence's +deterrent +deterrent's +deterrents +deterring +deters +detest +detestable +detestation +detestation's +detested +detesting +detests +dethrone +dethroned +dethronement +dethronement's +dethrones +dethroning +detonate +detonated +detonates +detonating +detonation +detonation's +detonations +detonator +detonator's +detonators +detour +detour's +detoured +detouring +detours +detox +detox's +detoxed +detoxes +detoxification +detoxification's +detoxified +detoxifies +detoxify +detoxifying +detoxing +detract +detracted +detracting +detraction +detraction's +detractor +detractor's +detractors +detracts +detriment +detriment's +detrimental +detriments +detritus +detritus's +deuce +deuce's +deuces +deuterium +deuterium's +devaluation +devaluation's +devaluations +devalue +devalued +devalues +devaluing +devastate +devastated +devastates +devastating +devastation +devastation's +develop +developed +developer +developer's +developers +developing +development +development's +developmental +developments +develops +deviance +deviance's +deviant +deviant's +deviants +deviate +deviate's +deviated +deviates +deviating +deviation +deviation's +deviations +device +device's +devices +devil +devil's +deviled +deviling +devilish +devilishly +devilled +devilling +devilment +devilment's +devilries +devilry +devilry's +devils +deviltries +deviltry +deviltry's +devious +deviously +deviousness +deviousness's +devise +devise's +devised +devises +devising +devoid +devolution +devolve +devolved +devolves +devolving +devote +devoted +devotedly +devotee +devotee's +devotees +devotes +devoting +devotion +devotion's +devotional +devotional's +devotionals +devotions +devour +devoured +devouring +devours +devout +devouter +devoutest +devoutly +devoutness +devoutness's +dew +dew's +dewberries +dewberry +dewberry's +dewdrop +dewdrop's +dewdrops +dewier +dewiest +dewlap +dewlap's +dewlaps +dewy +dexterity +dexterity's +dexterous +dexterously +dextrose +dextrose's +dextrous +dextrously +dharma +dhoti +dhoti's +dhotis +diabetes +diabetes's +diabetic +diabetic's +diabetics +diabolic +diabolical +diabolically +diacritic +diacritic's +diacritical +diacritics +diadem +diadem's +diadems +diagnose +diagnosed +diagnoses +diagnosing +diagnosis +diagnosis's +diagnostic +diagnostician +diagnostician's +diagnosticians +diagnostics +diagonal +diagonal's +diagonally +diagonals +diagram +diagram's +diagramed +diagraming +diagrammatic +diagrammed +diagramming +diagrams +dial +dial's +dialect +dialect's +dialectal +dialectic +dialectic's +dialects +dialed +dialing +dialings +dialog +dialog's +dialogs +dialogue +dialogue's +dialogues +dials +dialyses +dialysis +dialysis's +dialyzes +diameter +diameter's +diameters +diametrical +diametrically +diamond +diamond's +diamonds +diaper +diaper's +diapered +diapering +diapers +diaphanous +diaphragm +diaphragm's +diaphragms +diaries +diarist +diarist's +diarists +diarrhea +diarrhea's +diarrhoea +diarrhoea's +diary +diary's +diastolic +diatom +diatom's +diatoms +diatribe +diatribe's +diatribes +dibble +dibble's +dibbled +dibbles +dibbling +dice +diced +dices +dicey +dichotomies +dichotomy +dichotomy's +dicier +diciest +dicing +dick +dick's +dicker +dickered +dickering +dickers +dickey +dickey's +dickeys +dickie +dickie's +dickies +dicks +dicky +dicky's +dicta +dictate +dictate's +dictated +dictates +dictating +dictation +dictation's +dictations +dictator +dictator's +dictatorial +dictators +dictatorship +dictatorship's +dictatorships +diction +diction's +dictionaries +dictionary +dictionary's +dictum +dictum's +dictums +did +didactic +diddle +diddled +diddles +diddling +didn't +die +die's +died +diehard +diehard's +diehards +diereses +dieresis +dieresis's +dies +diesel +diesel's +dieseled +dieseling +diesels +diet +diet's +dietaries +dietary +dietary's +dieted +dieter +dieter's +dieters +dietetic +dietetics +dietetics's +dietician +dietician's +dieticians +dieting +dietitian +dietitian's +dietitians +diets +differ +differed +difference +difference's +differences +different +differential +differential's +differentials +differentiate +differentiated +differentiates +differentiating +differentiation +differentiation's +differently +differing +differs +difficult +difficulties +difficulty +difficulty's +diffidence +diffidence's +diffident +diffidently +diffraction +diffraction's +diffuse +diffused +diffusely +diffuseness +diffuseness's +diffuses +diffusing +diffusion +diffusion's +dig +dig's +digest +digest's +digested +digestible +digesting +digestion +digestion's +digestions +digestive +digests +digger +digger's +diggers +digging +digit +digit's +digital +digitalis +digitalis's +digitally +digitization +digitize +digitized +digitizes +digitizing +digits +dignified +dignifies +dignify +dignifying +dignitaries +dignitary +dignitary's +dignities +dignity +dignity's +digraph +digraph's +digraphs +digress +digressed +digresses +digressing +digression +digression's +digressions +digressive +digs +dike +dike's +diked +dikes +diking +dilapidated +dilapidation +dilapidation's +dilate +dilated +dilates +dilating +dilation +dilation's +dilatory +dilemma +dilemma's +dilemmas +dilettante +dilettante's +dilettantes +dilettanti +dilettantism +dilettantism's +diligence +diligence's +diligent +diligently +dill +dill's +dillies +dills +dilly +dilly's +dillydallied +dillydallies +dillydally +dillydallying +dilute +diluted +dilutes +diluting +dilution +dilution's +dim +dime +dime's +dimension +dimension's +dimensional +dimensionless +dimensions +dimer +dimes +diminish +diminished +diminishes +diminishing +diminuendo +diminuendo's +diminuendoes +diminuendos +diminution +diminution's +diminutions +diminutive +diminutive's +diminutives +dimly +dimmed +dimmer +dimmer's +dimmers +dimmest +dimming +dimness +dimness's +dimple +dimple's +dimpled +dimples +dimpling +dims +dimwit +dimwit's +dimwits +dimwitted +din +din's +dine +dined +diner +diner's +diners +dines +dinette +dinette's +dinettes +ding +ding's +dinged +dinghies +dinghy +dinghy's +dingier +dingiest +dinginess +dinginess's +dinging +dingo +dingo's +dingoes +dings +dingy +dining +dinkier +dinkies +dinkiest +dinky +dinky's +dinned +dinner +dinner's +dinnered +dinnering +dinners +dinning +dinosaur +dinosaur's +dinosaurs +dins +dint +dint's +diocesan +diocesan's +diocesans +diocese +diocese's +dioceses +diode +diode's +diodes +diorama +diorama's +dioramas +dioxide +dioxin +dioxin's +dioxins +dip +dip's +diphtheria +diphtheria's +diphthong +diphthong's +diphthongs +diploma +diploma's +diplomacy +diplomacy's +diplomas +diplomat +diplomat's +diplomata +diplomatic +diplomatically +diplomats +dipole +dipped +dipper +dipper's +dippers +dipping +dips +dipsomania +dipsomania's +dipsomaniac +dipsomaniac's +dipsomaniacs +dipstick +dipstick's +dipsticks +dire +direct +directed +directer +directest +directing +direction +direction's +directional +directions +directive +directive's +directives +directly +directness +directness's +director +director's +directorate +directorate's +directorates +directorial +directories +directors +directorship +directorship's +directorships +directory +directory's +directs +direr +direst +dirge +dirge's +dirges +dirigible +dirigible's +dirigibles +dirk +dirk's +dirks +dirt +dirt's +dirtied +dirtier +dirties +dirtiest +dirtiness +dirtiness's +dirty +dirtying +dis +dis's +disabilities +disability +disability's +disable +disabled +disablement +disablement's +disables +disabling +disabuse +disabused +disabuses +disabusing +disadvantage +disadvantage's +disadvantaged +disadvantageous +disadvantageously +disadvantages +disadvantaging +disaffect +disaffected +disaffecting +disaffection +disaffection's +disaffects +disagree +disagreeable +disagreeably +disagreed +disagreeing +disagreement +disagreement's +disagreements +disagrees +disallow +disallowed +disallowing +disallows +disambiguate +disambiguation +disappear +disappearance +disappearance's +disappearances +disappeared +disappearing +disappears +disappoint +disappointed +disappointing +disappointingly +disappointment +disappointment's +disappointments +disappoints +disapprobation +disapprobation's +disapproval +disapproval's +disapprove +disapproved +disapproves +disapproving +disapprovingly +disarm +disarmament +disarmament's +disarmed +disarming +disarms +disarrange +disarranged +disarrangement +disarrangement's +disarranges +disarranging +disarray +disarray's +disarrayed +disarraying +disarrays +disassemble +disassembled +disassembles +disassembling +disassociate +disassociated +disassociates +disassociating +disaster +disaster's +disasters +disastrous +disastrously +disavow +disavowal +disavowal's +disavowals +disavowed +disavowing +disavows +disband +disbanded +disbanding +disbands +disbar +disbarment +disbarment's +disbarred +disbarring +disbars +disbelief +disbelief's +disbelieve +disbelieved +disbelieves +disbelieving +disburse +disbursed +disbursement +disbursement's +disbursements +disburses +disbursing +disc +disc's +discard +discard's +discarded +discarding +discards +discern +discerned +discernible +discerning +discernment +discernment's +discerns +discharge +discharge's +discharged +discharges +discharging +disciple +disciple's +disciples +disciplinarian +disciplinarian's +disciplinarians +disciplinary +discipline +discipline's +disciplined +disciplines +disciplining +disclaim +disclaimed +disclaimer +disclaimer's +disclaimers +disclaiming +disclaims +disclose +disclosed +discloses +disclosing +disclosure +disclosure's +disclosures +disco +disco's +discoed +discoing +discolor +discoloration +discoloration's +discolorations +discolored +discoloring +discolors +discombobulate +discombobulated +discombobulates +discombobulating +discomfit +discomfited +discomfiting +discomfits +discomfiture +discomfiture's +discomfort +discomfort's +discomforted +discomforting +discomforts +discommode +discommoded +discommodes +discommoding +discompose +discomposed +discomposes +discomposing +discomposure +discomposure's +disconcert +disconcerted +disconcerting +disconcerts +disconnect +disconnected +disconnectedly +disconnecting +disconnection +disconnection's +disconnections +disconnects +disconsolate +disconsolately +discontent +discontent's +discontented +discontentedly +discontenting +discontentment +discontentment's +discontents +discontinuance +discontinuance's +discontinuances +discontinuation +discontinuation's +discontinuations +discontinue +discontinued +discontinues +discontinuing +discontinuities +discontinuity +discontinuity's +discontinuous +discord +discord's +discordant +discorded +discording +discords +discos +discotheque +discotheque's +discotheques +discount +discount's +discounted +discountenance +discountenanced +discountenances +discountenancing +discounting +discounts +discourage +discouraged +discouragement +discouragement's +discouragements +discourages +discouraging +discouragingly +discourse +discourse's +discoursed +discourses +discoursing +discourteous +discourteously +discourtesies +discourtesy +discourtesy's +discover +discovered +discoverer +discoverer's +discoverers +discoveries +discovering +discovers +discovery +discovery's +discredit +discredit's +discreditable +discredited +discrediting +discredits +discreet +discreeter +discreetest +discreetly +discrepancies +discrepancy +discrepancy's +discrete +discretion +discretion's +discretionary +discriminant +discriminate +discriminated +discriminates +discriminating +discrimination +discrimination's +discriminatory +discs +discursive +discus +discus's +discuses +discuss +discussant +discussant's +discussants +discussed +discusses +discussing +discussion +discussion's +discussions +disdain +disdain's +disdained +disdainful +disdainfully +disdaining +disdains +disease +disease's +diseased +diseases +disembark +disembarkation +disembarkation's +disembarked +disembarking +disembarks +disembodied +disembodies +disembody +disembodying +disembowel +disemboweled +disemboweling +disembowelled +disembowelling +disembowels +disenchant +disenchanted +disenchanting +disenchantment +disenchantment's +disenchants +disencumber +disencumbered +disencumbering +disencumbers +disenfranchise +disenfranchised +disenfranchisement +disenfranchisement's +disenfranchises +disenfranchising +disengage +disengaged +disengagement +disengagement's +disengagements +disengages +disengaging +disentangle +disentangled +disentanglement +disentanglement's +disentangles +disentangling +disestablish +disestablished +disestablishes +disestablishing +disfavor +disfavor's +disfavored +disfavoring +disfavors +disfigure +disfigured +disfigurement +disfigurement's +disfigurements +disfigures +disfiguring +disfranchise +disfranchised +disfranchisement +disfranchisement's +disfranchises +disfranchising +disgorge +disgorged +disgorges +disgorging +disgrace +disgrace's +disgraced +disgraceful +disgracefully +disgraces +disgracing +disgruntle +disgruntled +disgruntles +disgruntling +disguise +disguise's +disguised +disguises +disguising +disgust +disgust's +disgusted +disgustedly +disgusting +disgustingly +disgusts +dish +dish's +disharmonious +disharmony +disharmony's +dishcloth +dishcloth's +dishcloths +dishearten +disheartened +disheartening +disheartens +dished +dishes +dishevel +disheveled +disheveling +dishevelled +dishevelling +dishevels +dishing +dishonest +dishonestly +dishonesty +dishonesty's +dishonor +dishonor's +dishonorable +dishonorably +dishonored +dishonoring +dishonors +dishpan +dishpan's +dishpans +dishrag +dishrag's +dishrags +dishtowel +dishtowel's +dishtowels +dishwasher +dishwasher's +dishwashers +dishwater +dishwater's +disillusion +disillusion's +disillusioned +disillusioning +disillusionment +disillusionment's +disillusions +disincentive +disinclination +disinclination's +disincline +disinclined +disinclines +disinclining +disinfect +disinfectant +disinfectant's +disinfectants +disinfected +disinfecting +disinfects +disinformation +disinformation's +disingenuous +disinherit +disinherited +disinheriting +disinherits +disintegrate +disintegrated +disintegrates +disintegrating +disintegration +disintegration's +disinter +disinterest +disinterest's +disinterested +disinterestedly +disinterests +disinterment +disinterment's +disinterred +disinterring +disinters +disjoint +disjointed +disjointedly +disjointing +disjoints +disk +disk's +diskette +diskette's +diskettes +disks +dislike +dislike's +disliked +dislikes +disliking +dislocate +dislocated +dislocates +dislocating +dislocation +dislocation's +dislocations +dislodge +dislodged +dislodges +dislodging +disloyal +disloyally +disloyalty +disloyalty's +dismal +dismally +dismantle +dismantled +dismantles +dismantling +dismay +dismay's +dismayed +dismaying +dismays +dismember +dismembered +dismembering +dismemberment +dismemberment's +dismembers +dismiss +dismissal +dismissal's +dismissals +dismissed +dismisses +dismissing +dismissive +dismount +dismount's +dismounted +dismounting +dismounts +disobedience +disobedience's +disobedient +disobediently +disobey +disobeyed +disobeying +disobeys +disoblige +disobliged +disobliges +disobliging +disorder +disorder's +disordered +disordering +disorderliness +disorderliness's +disorderly +disorders +disorganization +disorganization's +disorganize +disorganized +disorganizes +disorganizing +disorient +disorientation +disorientation's +disoriented +disorienting +disorients +disown +disowned +disowning +disowns +disparage +disparaged +disparagement +disparagement's +disparages +disparaging +disparate +disparities +disparity +disparity's +dispassionate +dispassionately +dispatch +dispatch's +dispatched +dispatcher +dispatcher's +dispatchers +dispatches +dispatching +dispel +dispelled +dispelling +dispels +dispensable +dispensaries +dispensary +dispensary's +dispensation +dispensation's +dispensations +dispense +dispensed +dispenser +dispenser's +dispensers +dispenses +dispensing +dispersal +dispersal's +disperse +dispersed +disperses +dispersing +dispersion +dispersion's +dispirit +dispirited +dispiriting +dispirits +displace +displaced +displacement +displacement's +displacements +displaces +displacing +display +display's +displayable +displayed +displaying +displays +displease +displeased +displeases +displeasing +displeasure +displeasure's +disport +disported +disporting +disports +disposable +disposable's +disposables +disposal +disposal's +disposals +dispose +disposed +disposes +disposing +disposition +disposition's +dispositions +dispossess +dispossessed +dispossesses +dispossessing +dispossession +dispossession's +disproof +disproportion +disproportion's +disproportionate +disproportionately +disproportions +disprove +disproved +disproven +disproves +disproving +disputable +disputant +disputant's +disputants +disputation +disputation's +disputations +disputatious +dispute +dispute's +disputed +disputes +disputing +disqualification +disqualification's +disqualifications +disqualified +disqualifies +disqualify +disqualifying +disquiet +disquiet's +disquieted +disquieting +disquiets +disquisition +disquisition's +disquisitions +disregard +disregard's +disregarded +disregarding +disregards +disrepair +disrepair's +disreputable +disreputably +disrepute +disrepute's +disrespect +disrespect's +disrespected +disrespectful +disrespectfully +disrespecting +disrespects +disrobe +disrobed +disrobes +disrobing +disrupt +disrupted +disrupting +disruption +disruption's +disruptions +disruptive +disrupts +diss +diss's +dissatisfaction +dissatisfaction's +dissatisfied +dissatisfies +dissatisfy +dissatisfying +dissect +dissected +dissecting +dissection +dissection's +dissections +dissects +dissed +dissemble +dissembled +dissembles +dissembling +disseminate +disseminated +disseminates +disseminating +dissemination +dissemination's +dissension +dissension's +dissensions +dissent +dissent's +dissented +dissenter +dissenter's +dissenters +dissenting +dissents +dissertation +dissertation's +dissertations +disservice +disservice's +disservices +disses +dissidence +dissidence's +dissident +dissident's +dissidents +dissimilar +dissimilarities +dissimilarity +dissimilarity's +dissimulate +dissimulated +dissimulates +dissimulating +dissimulation +dissimulation's +dissing +dissipate +dissipated +dissipates +dissipating +dissipation +dissipation's +dissociate +dissociated +dissociates +dissociating +dissociation +dissociation's +dissolute +dissolutely +dissoluteness +dissoluteness's +dissolution +dissolution's +dissolve +dissolved +dissolves +dissolving +dissonance +dissonance's +dissonances +dissonant +dissuade +dissuaded +dissuades +dissuading +dissuasion +dissuasion's +distaff +distaff's +distaffs +distance +distance's +distanced +distances +distancing +distant +distantly +distaste +distaste's +distasteful +distastefully +distastes +distemper +distemper's +distend +distended +distending +distends +distension +distension's +distensions +distention +distention's +distentions +distil +distill +distillate +distillate's +distillates +distillation +distillation's +distillations +distilled +distiller +distiller's +distilleries +distillers +distillery +distillery's +distilling +distills +distils +distinct +distincter +distinctest +distinction +distinction's +distinctions +distinctive +distinctively +distinctiveness +distinctiveness's +distinctly +distinguish +distinguishable +distinguished +distinguishes +distinguishing +distort +distorted +distorter +distorting +distortion +distortion's +distortions +distorts +distract +distracted +distracting +distraction +distraction's +distractions +distracts +distrait +distraught +distress +distress's +distressed +distresses +distressful +distressing +distressingly +distribute +distributed +distributes +distributing +distribution +distribution's +distributions +distributive +distributor +distributor's +distributors +district +district's +districts +distrust +distrust's +distrusted +distrustful +distrustfully +distrusting +distrusts +disturb +disturbance +disturbance's +disturbances +disturbed +disturbing +disturbingly +disturbs +disunite +disunited +disunites +disuniting +disunity +disunity's +disuse +disuse's +disused +disuses +disusing +ditch +ditch's +ditched +ditches +ditching +dither +dither's +dithered +dithering +dithers +ditties +ditto +ditto's +dittoed +dittoes +dittoing +dittos +ditty +ditty's +diuretic +diuretic's +diuretics +diurnal +diurnally +diva +diva's +divan +divan's +divans +divas +dive +dive's +dived +diver +diver's +diverge +diverged +divergence +divergence's +divergences +divergent +diverges +diverging +divers +diverse +diversely +diversification +diversification's +diversified +diversifies +diversify +diversifying +diversion +diversion's +diversionary +diversions +diversities +diversity +diversity's +divert +diverted +diverting +diverts +dives +divest +divested +divesting +divests +divide +divide's +divided +dividend +dividend's +dividends +divider +divider's +dividers +divides +dividing +divination +divination's +divine +divine's +divined +divinely +diviner +diviner's +diviners +divines +divinest +diving +diving's +divining +divinities +divinity +divinity's +divisibility +divisibility's +divisible +division +division's +divisional +divisions +divisive +divisively +divisiveness +divisiveness's +divisor +divisor's +divisors +divorce +divorce's +divorced +divorces +divorcing +divorcée +divorcée's +divorcées +divot +divot's +divots +divulge +divulged +divulges +divulging +divvied +divvies +divvy +divvy's +divvying +dizzied +dizzier +dizzies +dizziest +dizzily +dizziness +dizziness's +dizzy +dizzying +djinn +djinn's +djinni +djinni's +djinns +do +do's +doable +doc +doc's +docent +docent's +docents +docile +docilely +docility +docility's +dock +dock's +docked +docket +docket's +docketed +docketing +dockets +docking +docks +dockyard +dockyard's +dockyards +docs +doctor +doctor's +doctoral +doctorate +doctorate's +doctorates +doctored +doctoring +doctors +doctrinaire +doctrinaire's +doctrinaires +doctrinal +doctrine +doctrine's +doctrines +docudrama +docudrama's +docudramas +document +document's +documentaries +documentary +documentary's +documentation +documentation's +documented +documenting +documents +dodder +dodder's +doddered +doddering +dodders +dodge +dodge's +dodged +dodger +dodger's +dodgers +dodges +dodging +dodo +dodo's +dodoes +dodos +doe +doe's +doer +doer's +doers +does +doesn't +doff +doffed +doffing +doffs +dog +dog's +dogcatcher +dogcatcher's +dogcatchers +dogfight +dogfight's +dogfights +dogfish +dogfish's +dogfishes +dogged +doggedly +doggedness +doggedness's +doggerel +doggerel's +doggie +doggie's +doggier +doggies +doggiest +dogging +doggone +doggoned +doggoneder +doggonedest +doggoner +doggones +doggonest +doggoning +doggy +doggy's +doghouse +doghouse's +doghouses +dogie +dogie's +dogies +dogma +dogma's +dogmas +dogmata +dogmatic +dogmatically +dogmatism +dogmatism's +dogmatist +dogmatist's +dogmatists +dogs +dogtrot +dogtrot's +dogtrots +dogtrotted +dogtrotting +dogwood +dogwood's +dogwoods +doilies +doily +doily's +doing +doing's +doings +doldrums +doldrums's +dole +dole's +doled +doleful +dolefully +doles +doling +doll +doll's +dollar +dollar's +dollars +dolled +dollhouse +dollhouse's +dollhouses +dollies +dolling +dollop +dollop's +dolloped +dolloping +dollops +dolls +dolly +dolly's +dolmen +dolmen's +dolmens +dolorous +dolphin +dolphin's +dolphins +dolt +dolt's +doltish +dolts +domain +domain's +domains +dome +dome's +domed +domes +domestic +domestic's +domestically +domesticate +domesticated +domesticates +domesticating +domestication +domestication's +domesticity +domesticity's +domestics +domicile +domicile's +domiciled +domiciles +domiciling +dominance +dominance's +dominant +dominant's +dominantly +dominants +dominate +dominated +dominates +dominating +domination +domination's +domineer +domineered +domineering +domineers +doming +dominion +dominion's +dominions +domino +domino's +dominoes +dominos +don +don's +don't +donate +donated +donates +donating +donation +donation's +donations +done +donkey +donkey's +donkeys +donned +donning +donor +donor's +donors +dons +donut +donut's +donuts +doodad +doodad's +doodads +doodle +doodle's +doodled +doodler +doodler's +doodlers +doodles +doodling +doohickey +doohickey's +doohickeys +doom +doom's +doomed +dooming +dooms +doomsday +doomsday's +door +door's +doorbell +doorbell's +doorbells +doorknob +doorknob's +doorknobs +doorman +doorman's +doormat +doormat's +doormats +doormen +doors +doorstep +doorstep's +doorsteps +doorway +doorway's +doorways +dope +dope's +doped +dopes +dopey +dopier +dopiest +doping +dopy +dories +dork +dork's +dorkier +dorkiest +dorks +dorky +dorm +dorm's +dormancy +dormancy's +dormant +dormer +dormer's +dormers +dormice +dormitories +dormitory +dormitory's +dormouse +dormouse's +dorms +dorsal +dory +dory's +dos +dosage +dosage's +dosages +dose +dose's +dosed +doses +dosing +dossier +dossier's +dossiers +dot +dot's +dotage +dotage's +dotcom +dotcom's +dotcoms +dote +doted +dotes +doth +doting +dotingly +dots +dotted +dotting +dotty +double +double's +doubled +doubles +doublet +doublet's +doublets +doubling +doubloon +doubloon's +doubloons +doubly +doubt +doubt's +doubted +doubter +doubter's +doubters +doubtful +doubtfully +doubting +doubtless +doubtlessly +doubts +douche +douche's +douched +douches +douching +dough +dough's +doughier +doughiest +doughnut +doughnut's +doughnuts +doughtier +doughtiest +doughty +doughy +dour +dourer +dourest +dourly +douse +doused +douses +dousing +dove +dove's +doves +dovetail +dovetail's +dovetailed +dovetailing +dovetails +dowager +dowager's +dowagers +dowdier +dowdies +dowdiest +dowdily +dowdiness +dowdiness's +dowdy +dowel +dowel's +doweled +doweling +dowelled +dowelling +dowels +down +down's +downbeat +downbeat's +downbeats +downcast +downed +downer +downer's +downers +downfall +downfall's +downfalls +downgrade +downgrade's +downgraded +downgrades +downgrading +downhearted +downhill +downhill's +downhills +downier +downiest +downing +download +download's +downloadable +downloaded +downloading +downloads +downplay +downplayed +downplaying +downplays +downpour +downpour's +downpours +downright +downs +downscale +downsize +downsized +downsizes +downsizing +downsizing's +downstage +downstairs +downstairs's +downstate +downstate's +downstream +downswing +downswing's +downswings +downtime +downtime's +downtown +downtown's +downtrodden +downturn +downturn's +downturns +downward +downwards +downwind +downy +dowries +dowry +dowry's +dowse +dowsed +dowses +dowsing +doxologies +doxology +doxology's +doyen +doyen's +doyens +doze +doze's +dozed +dozen +dozen's +dozens +dozes +dozing +drab +drab's +drabber +drabbest +drably +drabness +drabness's +drabs +drachma +drachma's +drachmae +drachmai +drachmas +draconian +draft +draft's +drafted +draftee +draftee's +draftees +draftier +draftiest +draftiness +draftiness's +drafting +drafts +draftsman +draftsman's +draftsmanship +draftsmanship's +draftsmen +drafty +drag +drag's +dragged +dragging +dragnet +dragnet's +dragnets +dragon +dragon's +dragonflies +dragonfly +dragonfly's +dragons +dragoon +dragoon's +dragooned +dragooning +dragoons +drags +drain +drain's +drainage +drainage's +drained +drainer +drainer's +drainers +draining +drainpipe +drainpipe's +drainpipes +drains +drake +drake's +drakes +dram +dram's +drama +drama's +dramas +dramatic +dramatically +dramatics +dramatics's +dramatist +dramatist's +dramatists +dramatization +dramatization's +dramatizations +dramatize +dramatized +dramatizes +dramatizing +drams +drank +drape +drape's +draped +draperies +drapery +drapery's +drapes +draping +drastic +drastically +draw +draw's +drawback +drawback's +drawbacks +drawbridge +drawbridge's +drawbridges +drawer +drawer's +drawers +drawing +drawing's +drawings +drawl +drawl's +drawled +drawling +drawls +drawn +draws +drawstring +drawstring's +drawstrings +dray +dray's +drays +dread +dread's +dreaded +dreadful +dreadfully +dreading +dreadlocks +dreadlocks's +dreadnought +dreadnought's +dreadnoughts +dreads +dream +dream's +dreamed +dreamer +dreamer's +dreamers +dreamier +dreamiest +dreamily +dreaming +dreamland +dreamland's +dreamless +dreamlike +dreams +dreamy +drearier +dreariest +drearily +dreariness +dreariness's +dreary +dredge +dredge's +dredged +dredger +dredger's +dredgers +dredges +dredging +dregs +dregs's +drench +drenched +drenches +drenching +dress +dress's +dressage +dressage's +dressed +dresser +dresser's +dressers +dresses +dressier +dressiest +dressiness +dressiness's +dressing +dressing's +dressings +dressmaker +dressmaker's +dressmakers +dressmaking +dressmaking's +dressy +drew +dribble +dribble's +dribbled +dribbler +dribbler's +dribblers +dribbles +dribbling +driblet +driblet's +driblets +dried +drier +drier's +driers +dries +driest +drift +drift's +drifted +drifter +drifter's +drifters +drifting +drifts +driftwood +driftwood's +drill +drill's +drilled +drilling +drills +drily +drink +drink's +drinkable +drinker +drinker's +drinkers +drinking +drinkings +drinks +drip +drip's +dripped +dripping +dripping's +drippings +drips +drive +drive's +drivel +drivel's +driveled +driveling +drivelled +drivelling +drivels +driven +driver +driver's +drivers +drives +driveway +driveway's +driveways +driving +drivings +drizzle +drizzle's +drizzled +drizzles +drizzling +drizzly +droll +droller +drolleries +drollery +drollery's +drollest +drollness +drollness's +drolly +dromedaries +dromedary +dromedary's +drone +drone's +droned +drones +droning +drool +drool's +drooled +drooling +drools +droop +droop's +drooped +droopier +droopiest +drooping +droops +droopy +drop +drop's +droplet +droplet's +droplets +dropout +dropout's +dropouts +dropped +dropper +dropper's +droppers +dropping +droppings +droppings's +drops +dropsy +dropsy's +dross +dross's +drought +drought's +droughts +drouth +drouth's +drouthes +drouths +drove +drove's +drover +drover's +drovers +droves +drown +drowned +drowning +drowning's +drownings +drowns +drowse +drowse's +drowsed +drowses +drowsier +drowsiest +drowsily +drowsiness +drowsiness's +drowsing +drowsy +drub +drubbed +drubbing +drubbing's +drubbings +drubs +drudge +drudge's +drudged +drudgery +drudgery's +drudges +drudging +drug +drug's +drugged +drugging +druggist +druggist's +druggists +drugs +drugstore +drugstore's +drugstores +druid +druid's +druids +drum +drum's +drummed +drummer +drummer's +drummers +drumming +drums +drumstick +drumstick's +drumsticks +drunk +drunk's +drunkard +drunkard's +drunkards +drunken +drunkenly +drunkenness +drunkenness's +drunker +drunkest +drunks +dry +dry's +dryad +dryad's +dryads +dryer +dryer's +dryers +dryest +drying +dryly +dryness +dryness's +drys +drywall +drywall's +dual +dualism +duality +duality's +dub +dub's +dubbed +dubbing +dubiety +dubiety's +dubious +dubiously +dubiousness +dubiousness's +dubs +ducal +ducat +ducat's +ducats +duchess +duchess's +duchesses +duchies +duchy +duchy's +duck +duck's +duckbill +duckbill's +duckbills +ducked +ducking +duckling +duckling's +ducklings +ducks +duct +duct's +ductile +ductility +ductility's +ducting +ductless +ducts +dud +dud's +dude +dude's +duded +dudes +dudgeon +dudgeon's +duding +duds +due +due's +duel +duel's +dueled +dueling +duelist +duelist's +duelists +duelled +duelling +duellist +duellist's +duellists +duels +dues +duet +duet's +duets +duff +duffer +duffer's +duffers +dug +dugout +dugout's +dugouts +duh +duke +duke's +dukedom +dukedom's +dukedoms +dukes +dulcet +dulcimer +dulcimer's +dulcimers +dull +dullard +dullard's +dullards +dulled +duller +dullest +dulling +dullness +dullness's +dulls +dully +dulness +dulness's +duly +dumb +dumbbell +dumbbell's +dumbbells +dumber +dumbest +dumbfound +dumbfounded +dumbfounding +dumbfounds +dumbly +dumbness +dumbness's +dumbwaiter +dumbwaiter's +dumbwaiters +dumfound +dumfounded +dumfounding +dumfounds +dummies +dummy +dummy's +dump +dump's +dumped +dumpier +dumpiest +dumping +dumpling +dumpling's +dumplings +dumps +dumpster +dumpy +dun +dun's +dunce +dunce's +dunces +dune +dune's +dunes +dung +dung's +dungaree +dungaree's +dungarees +dunged +dungeon +dungeon's +dungeons +dunging +dungs +dunk +dunk's +dunked +dunking +dunks +dunned +dunner +dunnest +dunning +dunno +duns +duo +duo's +duodena +duodenal +duodenum +duodenum's +duodenums +duos +dupe +dupe's +duped +dupes +duping +duplex +duplex's +duplexes +duplicate +duplicate's +duplicated +duplicates +duplicating +duplication +duplication's +duplicator +duplicator's +duplicators +duplicity +duplicity's +durability +durability's +durable +durably +duration +duration's +duress +duress's +during +dusk +dusk's +duskier +duskiest +dusky +dust +dust's +dustbin +dustbin's +dustbins +dusted +duster +duster's +dusters +dustier +dustiest +dustiness +dustiness's +dusting +dustless +dustman +dustmen +dustpan +dustpan's +dustpans +dusts +dusty +duteous +dutiable +duties +dutiful +dutifully +duty +duty's +duvet +dwarf +dwarf's +dwarfed +dwarfing +dwarfish +dwarfism +dwarfism's +dwarfs +dwarves +dweeb +dweeb's +dweebs +dwell +dwelled +dweller +dweller's +dwellers +dwelling +dwelling's +dwellings +dwells +dwelt +dwindle +dwindled +dwindles +dwindling +dyadic +dye +dye's +dyed +dyeing +dyer +dyer's +dyers +dyes +dyestuff +dyestuff's +dying +dying's +dyke +dyke's +dykes +dynamic +dynamic's +dynamical +dynamically +dynamics +dynamics's +dynamism +dynamism's +dynamite +dynamite's +dynamited +dynamites +dynamiting +dynamo +dynamo's +dynamos +dynastic +dynasties +dynasty +dynasty's +dysentery +dysentery's +dysfunction +dysfunction's +dysfunctional +dysfunctions +dyslexia +dyslexia's +dyslexic +dyslexic's +dyslexics +dyspepsia +dyspepsia's +dyspeptic +dyspeptic's +dyspeptics +débutante +débutante's +débutantes +décolleté +dérailleur +dérailleur's +dérailleurs +détente +détente's +e +e'er +eBay +eBay's +eMusic +eMusic's +each +eager +eagerer +eagerest +eagerly +eagerness +eagerness's +eagle +eagle's +eagles +eaglet +eaglet's +eaglets +ear +ear's +earache +earache's +earaches +earbud +earbud's +earbuds +eardrum +eardrum's +eardrums +earful +earful's +earfuls +earl +earl's +earldom +earldom's +earldoms +earlier +earliest +earliness +earliness's +earlobe +earlobe's +earlobes +earls +early +earmark +earmark's +earmarked +earmarking +earmarks +earmuff +earmuff's +earmuffs +earn +earned +earner +earner's +earners +earnest +earnest's +earnestly +earnestness +earnestness's +earnests +earning +earnings +earnings's +earns +earphone +earphone's +earphones +earplug +earplug's +earplugs +earring +earring's +earrings +ears +earshot +earshot's +earsplitting +earth +earth's +earthed +earthen +earthenware +earthenware's +earthier +earthiest +earthiness +earthiness's +earthing +earthlier +earthliest +earthling +earthling's +earthlings +earthly +earthquake +earthquake's +earthquakes +earths +earthshaking +earthward +earthwork +earthwork's +earthworks +earthworm +earthworm's +earthworms +earthy +earwax +earwax's +earwig +earwig's +earwigs +ease +ease's +eased +easel +easel's +easels +eases +easier +easiest +easily +easiness +easiness's +easing +east +east's +eastbound +easterlies +easterly +easterly's +eastern +easterner +easterner's +easterners +easternmost +eastward +eastwards +easy +easygoing +eat +eatable +eatable's +eatables +eaten +eater +eater's +eateries +eaters +eatery +eatery's +eating +eats +eave +eave's +eaves +eavesdrop +eavesdropped +eavesdropper +eavesdropper's +eavesdroppers +eavesdropping +eavesdrops +ebb +ebb's +ebbed +ebbing +ebbs +ebonies +ebony +ebony's +ebullience +ebullience's +ebullient +eccentric +eccentric's +eccentrically +eccentricities +eccentricity +eccentricity's +eccentrics +ecclesiastic +ecclesiastic's +ecclesiastical +ecclesiastics +echelon +echelon's +echelons +echo +echo's +echoed +echoes +echoing +echos +eclectic +eclectic's +eclectically +eclecticism +eclecticism's +eclectics +eclipse +eclipse's +eclipsed +eclipses +eclipsing +ecliptic +ecliptic's +ecological +ecologically +ecologist +ecologist's +ecologists +ecology +ecology's +econometric +economic +economical +economically +economics +economics's +economies +economist +economist's +economists +economize +economized +economizes +economizing +economy +economy's +ecosystem +ecosystem's +ecosystems +ecotourism +ecotourism's +ecru +ecru's +ecstasies +ecstasy +ecstasy's +ecstatic +ecstatically +ecumenical +ecumenically +eczema +eczema's +edamame +eddied +eddies +eddy +eddy's +eddying +edelweiss +edelweiss's +edema +edema's +edge +edge's +edged +edger +edges +edgeways +edgewise +edgier +edgiest +edginess +edginess's +edging +edging's +edgings +edgy +edibility +edibility's +edible +edible's +edibles +edict +edict's +edicts +edification +edification's +edifice +edifice's +edifices +edified +edifies +edify +edifying +edit +edit's +editable +edited +editing +edition +edition's +editions +editor +editor's +editorial +editorial's +editorialize +editorialized +editorializes +editorializing +editorially +editorials +editors +editorship +edits +educable +educate +educated +educates +educating +education +education's +educational +educationally +educations +educator +educator's +educators +eel +eel's +eels +eerie +eerier +eeriest +eerily +eeriness +eeriness's +eery +efface +effaced +effacement +effacement's +effaces +effacing +effect +effect's +effected +effecting +effective +effectively +effectiveness +effectiveness's +effects +effectual +effectually +effectuate +effectuated +effectuates +effectuating +effeminacy +effeminacy's +effeminate +effervesce +effervesced +effervescence +effervescence's +effervescent +effervesces +effervescing +effete +efficacious +efficaciously +efficacy +efficacy's +efficiencies +efficiency +efficiency's +efficient +efficiently +effigies +effigy +effigy's +effluent +effluent's +effluents +effort +effort's +effortless +effortlessly +efforts +effrontery +effrontery's +effulgence +effulgence's +effulgent +effusion +effusion's +effusions +effusive +effusively +effusiveness +effusiveness's +egalitarian +egalitarian's +egalitarianism +egalitarianism's +egalitarians +egg +egg's +eggbeater +eggbeater's +eggbeaters +egged +egghead +egghead's +eggheads +egging +eggnog +eggnog's +eggplant +eggplant's +eggplants +eggs +eggshell +eggshell's +eggshells +egis +egis's +eglantine +eglantine's +eglantines +ego +ego's +egocentric +egocentric's +egocentrics +egoism +egoism's +egoist +egoist's +egoistic +egoists +egos +egotism +egotism's +egotist +egotist's +egotistic +egotistical +egotistically +egotists +egregious +egregiously +egress +egress's +egresses +egret +egret's +egrets +eh +eider +eider's +eiderdown +eiderdown's +eiderdowns +eiders +eigenvalue +eigenvalues +eight +eight's +eighteen +eighteen's +eighteens +eighteenth +eighteenth's +eighteenths +eighth +eighth's +eighths +eighties +eightieth +eightieth's +eightieths +eights +eighty +eighty's +either +ejaculate +ejaculated +ejaculates +ejaculating +ejaculation +ejaculation's +ejaculations +eject +ejected +ejecting +ejection +ejection's +ejections +ejects +eke +eked +ekes +eking +elaborate +elaborated +elaborately +elaborateness +elaborateness's +elaborates +elaborating +elaboration +elaboration's +elaborations +elapse +elapsed +elapses +elapsing +elastic +elastic's +elasticity +elasticity's +elastics +elate +elated +elates +elating +elation +elation's +elbow +elbow's +elbowed +elbowing +elbowroom +elbowroom's +elbows +elder +elder's +elderberries +elderberry +elderberry's +eldercare +eldercare's +elderly +elders +eldest +elect +elect's +elected +electing +election +election's +electioneer +electioneered +electioneering +electioneers +elections +elective +elective's +electives +elector +elector's +electoral +electorate +electorate's +electorates +electors +electric +electrical +electrically +electrician +electrician's +electricians +electricity +electricity's +electrification +electrification's +electrified +electrifies +electrify +electrifying +electrocardiogram +electrocardiogram's +electrocardiograms +electrocardiograph +electrocardiograph's +electrocardiographs +electrocute +electrocuted +electrocutes +electrocuting +electrocution +electrocution's +electrocutions +electrode +electrode's +electrodes +electrodynamics +electroencephalogram +electroencephalogram's +electroencephalograms +electroencephalograph +electroencephalograph's +electroencephalographs +electrolysis +electrolysis's +electrolyte +electrolyte's +electrolytes +electrolytic +electromagnet +electromagnet's +electromagnetic +electromagnetism +electromagnetism's +electromagnets +electron +electron's +electronic +electronica +electronica's +electronically +electronics +electronics's +electrons +electroplate +electroplated +electroplates +electroplating +electrostatic +elects +elegance +elegance's +elegant +elegantly +elegiac +elegiac's +elegiacs +elegies +elegy +elegy's +element +element's +elemental +elementary +elements +elephant +elephant's +elephantine +elephants +elevate +elevated +elevates +elevating +elevation +elevation's +elevations +elevator +elevator's +elevators +eleven +eleven's +elevens +eleventh +eleventh's +elevenths +elf +elf's +elfin +elfish +elicit +elicited +eliciting +elicits +elide +elided +elides +eliding +eligibility +eligibility's +eligible +eliminate +eliminated +eliminates +eliminating +elimination +elimination's +eliminations +elision +elision's +elisions +elite +elite's +elites +elitism +elitism's +elitist +elitist's +elitists +elixir +elixir's +elixirs +elk +elk's +elks +ell +ell's +ellipse +ellipse's +ellipses +ellipsis +ellipsis's +elliptic +elliptical +elliptically +ells +elm +elm's +elms +elocution +elocution's +elocutionist +elocutionist's +elocutionists +elongate +elongated +elongates +elongating +elongation +elongation's +elongations +elope +eloped +elopement +elopement's +elopements +elopes +eloping +eloquence +eloquence's +eloquent +eloquently +else +elsewhere +elucidate +elucidated +elucidates +elucidating +elucidation +elucidation's +elucidations +elude +eluded +eludes +eluding +elusive +elusively +elusiveness +elusiveness's +elves +em +em's +emaciate +emaciated +emaciates +emaciating +emaciation +emaciation's +email +email's +emailed +emailing +emails +emanate +emanated +emanates +emanating +emanation +emanation's +emanations +emancipate +emancipated +emancipates +emancipating +emancipation +emancipation's +emancipator +emancipator's +emancipators +emasculate +emasculated +emasculates +emasculating +emasculation +emasculation's +embalm +embalmed +embalmer +embalmer's +embalmers +embalming +embalms +embankment +embankment's +embankments +embargo +embargo's +embargoed +embargoes +embargoing +embark +embarkation +embarkation's +embarkations +embarked +embarking +embarks +embarrass +embarrassed +embarrasses +embarrassing +embarrassingly +embarrassment +embarrassment's +embarrassments +embassies +embassy +embassy's +embattled +embed +embedded +embedding +embeds +embellish +embellished +embellishes +embellishing +embellishment +embellishment's +embellishments +ember +ember's +embers +embezzle +embezzled +embezzlement +embezzlement's +embezzler +embezzler's +embezzlers +embezzles +embezzling +embitter +embittered +embittering +embitters +emblazon +emblazoned +emblazoning +emblazons +emblem +emblem's +emblematic +emblems +embodied +embodies +embodiment +embodiment's +embody +embodying +embolden +emboldened +emboldening +emboldens +embolism +embolism's +embolisms +emboss +embossed +embosses +embossing +embrace +embrace's +embraced +embraces +embracing +embroider +embroidered +embroideries +embroidering +embroiders +embroidery +embroidery's +embroil +embroiled +embroiling +embroils +embryo +embryo's +embryologist +embryologist's +embryologists +embryology +embryology's +embryonic +embryos +emcee +emcee's +emceed +emceeing +emcees +emend +emendation +emendation's +emendations +emended +emending +emends +emerald +emerald's +emeralds +emerge +emerged +emergence +emergence's +emergencies +emergency +emergency's +emergent +emerges +emerging +emeritus +emery +emery's +emetic +emetic's +emetics +emigrant +emigrant's +emigrants +emigrate +emigrated +emigrates +emigrating +emigration +emigration's +emigrations +eminence +eminence's +eminences +eminent +eminently +emir +emir's +emirate +emirate's +emirates +emirs +emissaries +emissary +emissary's +emission +emission's +emissions +emit +emits +emitted +emitting +emo +emo's +emoji +emoji's +emojis +emollient +emollient's +emollients +emolument +emolument's +emoluments +emos +emote +emoted +emotes +emoting +emotion +emotion's +emotional +emotionalism +emotionalism's +emotionally +emotions +emotive +empanel +empaneled +empaneling +empanels +empathetic +empathize +empathized +empathizes +empathizing +empathy +empathy's +emperor +emperor's +emperors +emphases +emphasis +emphasis's +emphasize +emphasized +emphasizes +emphasizing +emphatic +emphatically +emphysema +emphysema's +empire +empire's +empires +empirical +empirically +empiricism +empiricism's +emplacement +emplacement's +emplacements +employ +employ's +employable +employe +employe's +employed +employee +employee's +employees +employer +employer's +employers +employes +employing +employment +employment's +employments +employs +emporia +emporium +emporium's +emporiums +empower +empowered +empowering +empowerment +empowerment's +empowers +empress +empress's +empresses +emptied +emptier +empties +emptiest +emptily +emptiness +emptiness's +empty +empty's +emptying +ems +emu +emu's +emulate +emulated +emulates +emulating +emulation +emulation's +emulations +emulator +emulator's +emulators +emulsification +emulsification's +emulsified +emulsifies +emulsify +emulsifying +emulsion +emulsion's +emulsions +emus +enable +enabled +enables +enabling +enact +enacted +enacting +enactment +enactment's +enactments +enacts +enamel +enamel's +enameled +enameling +enamelled +enamelling +enamels +enamor +enamored +enamoring +enamors +encamp +encamped +encamping +encampment +encampment's +encampments +encamps +encapsulate +encapsulated +encapsulates +encapsulating +encapsulation +encapsulation's +encapsulations +encase +encased +encases +encasing +encephalitis +encephalitis's +enchant +enchanted +enchanter +enchanter's +enchanters +enchanting +enchantingly +enchantment +enchantment's +enchantments +enchantress +enchantress's +enchantresses +enchants +enchilada +enchilada's +enchiladas +encircle +encircled +encirclement +encirclement's +encircles +encircling +enclave +enclave's +enclaves +enclose +enclosed +encloses +enclosing +enclosure +enclosure's +enclosures +encode +encoded +encoder +encoder's +encoders +encodes +encoding +encompass +encompassed +encompasses +encompassing +encore +encore's +encored +encores +encoring +encounter +encounter's +encountered +encountering +encounters +encourage +encouraged +encouragement +encouragement's +encouragements +encourages +encouraging +encouragingly +encroach +encroached +encroaches +encroaching +encroachment +encroachment's +encroachments +encrust +encrustation +encrustation's +encrustations +encrusted +encrusting +encrusts +encrypt +encrypted +encryption +encrypts +encumber +encumbered +encumbering +encumbers +encumbrance +encumbrance's +encumbrances +encyclical +encyclical's +encyclicals +encyclopaedia +encyclopaedia's +encyclopaedias +encyclopaedic +encyclopedia +encyclopedia's +encyclopedias +encyclopedic +end +end's +endanger +endangered +endangering +endangers +endear +endeared +endearing +endearingly +endearment +endearment's +endearments +endears +endeavor +endeavor's +endeavored +endeavoring +endeavors +ended +endemic +endemic's +endemics +ending +ending's +endings +endive +endive's +endives +endless +endlessly +endlessness +endlessness's +endocrine +endocrine's +endocrines +endorse +endorsed +endorsement +endorsement's +endorsements +endorser +endorser's +endorsers +endorses +endorsing +endow +endowed +endowing +endowment +endowment's +endowments +endows +ends +endue +endued +endues +enduing +endurable +endurance +endurance's +endure +endured +endures +enduring +endways +endwise +enema +enema's +enemas +enemata +enemies +enemy +enemy's +energetic +energetically +energies +energize +energized +energizer +energizer's +energizers +energizes +energizing +energy +energy's +enervate +enervated +enervates +enervating +enervation +enervation's +enfeeble +enfeebled +enfeebles +enfeebling +enfold +enfolded +enfolding +enfolds +enforce +enforceable +enforced +enforcement +enforcement's +enforcer +enforcer's +enforcers +enforces +enforcing +enfranchise +enfranchised +enfranchisement +enfranchisement's +enfranchises +enfranchising +engage +engaged +engagement +engagement's +engagements +engages +engaging +engagingly +engender +engendered +engendering +engenders +engine +engine's +engineer +engineer's +engineered +engineering +engineering's +engineers +engines +engorge +engorged +engorges +engorging +engrave +engraved +engraver +engraver's +engravers +engraves +engraving +engraving's +engravings +engross +engrossed +engrosses +engrossing +engulf +engulfed +engulfing +engulfs +enhance +enhanced +enhancement +enhancement's +enhancements +enhancer +enhances +enhancing +enigma +enigma's +enigmas +enigmatic +enigmatically +enjoin +enjoined +enjoining +enjoins +enjoy +enjoyable +enjoyed +enjoying +enjoyment +enjoyment's +enjoyments +enjoys +enlarge +enlarged +enlargement +enlargement's +enlargements +enlarger +enlarger's +enlargers +enlarges +enlarging +enlighten +enlightened +enlightening +enlightenment +enlightenment's +enlightens +enlist +enlisted +enlistee +enlistee's +enlistees +enlisting +enlistment +enlistment's +enlistments +enlists +enliven +enlivened +enlivening +enlivens +enmesh +enmeshed +enmeshes +enmeshing +enmities +enmity +enmity's +ennoble +ennobled +ennoblement +ennoblement's +ennobles +ennobling +ennui +ennui's +enormities +enormity +enormity's +enormous +enormously +enormousness +enormousness's +enough +enough's +enquire +enquired +enquires +enquiries +enquiring +enquiry +enquiry's +enrage +enraged +enrages +enraging +enrapture +enraptured +enraptures +enrapturing +enrich +enriched +enriches +enriching +enrichment +enrichment's +enrol +enroll +enrolled +enrolling +enrollment +enrollment's +enrollments +enrolls +enrolment +enrolment's +enrolments +enrols +ensconce +ensconced +ensconces +ensconcing +ensemble +ensemble's +ensembles +enshrine +enshrined +enshrines +enshrining +enshroud +enshrouded +enshrouding +enshrouds +ensign +ensign's +ensigns +enslave +enslaved +enslavement +enslavement's +enslaves +enslaving +ensnare +ensnared +ensnares +ensnaring +ensue +ensued +ensues +ensuing +ensure +ensured +ensures +ensuring +entail +entailed +entailing +entails +entangle +entangled +entanglement +entanglement's +entanglements +entangles +entangling +entente +entente's +ententes +enter +entered +entering +enterprise +enterprise's +enterprises +enterprising +enters +entertain +entertained +entertainer +entertainer's +entertainers +entertaining +entertaining's +entertainingly +entertainment +entertainment's +entertainments +entertains +enthral +enthrall +enthralled +enthralling +enthralls +enthrals +enthrone +enthroned +enthronement +enthronement's +enthronements +enthrones +enthroning +enthuse +enthused +enthuses +enthusiasm +enthusiasm's +enthusiasms +enthusiast +enthusiast's +enthusiastic +enthusiastically +enthusiasts +enthusing +entice +enticed +enticement +enticement's +enticements +entices +enticing +entire +entirely +entirety +entirety's +entities +entitle +entitled +entitlement +entitlement's +entitlements +entitles +entitling +entity +entity's +entomb +entombed +entombing +entombment +entombment's +entombs +entomological +entomologist +entomologist's +entomologists +entomology +entomology's +entourage +entourage's +entourages +entrails +entrails's +entrance +entrance's +entranced +entrances +entrancing +entrant +entrant's +entrants +entrap +entrapment +entrapment's +entrapped +entrapping +entraps +entreat +entreated +entreaties +entreating +entreats +entreaty +entreaty's +entrench +entrenched +entrenches +entrenching +entrenchment +entrenchment's +entrenchments +entrepreneur +entrepreneur's +entrepreneurial +entrepreneurs +entries +entropy +entropy's +entrust +entrusted +entrusting +entrusts +entry +entry's +entryway +entryway's +entryways +entrée +entrée's +entrées +entwine +entwined +entwines +entwining +enumerable +enumerate +enumerated +enumerates +enumerating +enumeration +enumeration's +enumerations +enunciate +enunciated +enunciates +enunciating +enunciation +enunciation's +enure +enured +enures +enuring +envelop +envelope +envelope's +enveloped +envelopes +enveloping +envelopment +envelopment's +envelops +enviable +enviably +envied +envies +envious +enviously +enviousness +enviousness's +environment +environment's +environmental +environmentalism +environmentalism's +environmentalist +environmentalist's +environmentalists +environmentally +environments +environs +environs's +envisage +envisaged +envisages +envisaging +envision +envisioned +envisioning +envisions +envoy +envoy's +envoys +envy +envy's +envying +enzyme +enzyme's +enzymes +eon +eon's +eons +epaulet +epaulet's +epaulets +epaulette +epaulette's +epaulettes +ephemeral +epic +epic's +epicenter +epicenter's +epicenters +epics +epicure +epicure's +epicurean +epicurean's +epicureans +epicures +epidemic +epidemic's +epidemics +epidemiology +epidemiology's +epidermal +epidermis +epidermis's +epidermises +epiglottides +epiglottis +epiglottis's +epiglottises +epigram +epigram's +epigrammatic +epigrams +epilepsy +epilepsy's +epileptic +epileptic's +epileptics +epilog +epilog's +epilogs +epilogue +epilogue's +epilogues +episcopacy +episcopacy's +episcopal +episcopate +episcopate's +episode +episode's +episodes +episodic +epistemology +epistle +epistle's +epistles +epistolary +epitaph +epitaph's +epitaphs +epithet +epithet's +epithets +epitome +epitome's +epitomes +epitomize +epitomized +epitomizes +epitomizing +epoch +epoch's +epochal +epochs +epoxied +epoxies +epoxy +epoxy's +epoxyed +epoxying +epsilon +equability +equability's +equable +equably +equal +equal's +equaled +equaling +equality +equality's +equalization +equalization's +equalize +equalized +equalizer +equalizer's +equalizers +equalizes +equalizing +equalled +equalling +equally +equals +equanimity +equanimity's +equate +equated +equates +equating +equation +equation's +equations +equator +equator's +equatorial +equators +equestrian +equestrian's +equestrians +equestrienne +equestrienne's +equestriennes +equidistant +equilateral +equilateral's +equilaterals +equilibrium +equilibrium's +equine +equine's +equines +equinoctial +equinox +equinox's +equinoxes +equip +equipage +equipage's +equipages +equipment +equipment's +equipoise +equipoise's +equipped +equipping +equips +equitable +equitably +equities +equity +equity's +equivalence +equivalence's +equivalences +equivalent +equivalent's +equivalently +equivalents +equivocal +equivocally +equivocate +equivocated +equivocates +equivocating +equivocation +equivocation's +equivocations +era +era's +eradicate +eradicated +eradicates +eradicating +eradication +eradication's +eras +erase +erased +eraser +eraser's +erasers +erases +erasing +erasure +erasure's +erasures +ere +erect +erected +erectile +erecting +erection +erection's +erections +erectly +erectness +erectness's +erects +erg +erg's +ergo +ergonomic +ergonomics +ergonomics's +ergs +ermine +ermine's +ermines +erode +eroded +erodes +eroding +erogenous +erosion +erosion's +erosive +erotic +erotica +erotica's +erotically +eroticism +eroticism's +err +errand +errand's +errands +errant +errata +errata's +erratas +erratic +erratically +erratum +erratum's +erred +erring +erroneous +erroneously +error +error's +errors +errs +ersatz +ersatz's +ersatzes +erstwhile +erudite +eruditely +erudition +erudition's +erupt +erupted +erupting +eruption +eruption's +eruptions +erupts +erythrocyte +erythrocyte's +erythrocytes +es +escalate +escalated +escalates +escalating +escalation +escalation's +escalations +escalator +escalator's +escalators +escapade +escapade's +escapades +escape +escape's +escaped +escapee +escapee's +escapees +escapes +escaping +escapism +escapism's +escapist +escapist's +escapists +escarole +escarole's +escaroles +escarpment +escarpment's +escarpments +eschatology +eschew +eschewed +eschewing +eschews +escort +escort's +escorted +escorting +escorts +escrow +escrow's +escrows +escutcheon +escutcheon's +escutcheons +esophagi +esophagus +esophagus's +esophaguses +esoteric +esoterically +espadrille +espadrille's +espadrilles +especial +especially +espied +espies +espionage +espionage's +esplanade +esplanade's +esplanades +espousal +espousal's +espouse +espoused +espouses +espousing +espresso +espresso's +espressos +espy +espying +esquire +esquire's +esquires +essay +essay's +essayed +essaying +essayist +essayist's +essayists +essays +essence +essence's +essences +essential +essential's +essentially +essentials +establish +established +establishes +establishing +establishment +establishment's +establishments +estate +estate's +estates +esteem +esteem's +esteemed +esteeming +esteems +ester +ester's +esters +esthete +esthete's +esthetes +esthetic +esthetics +estimable +estimate +estimate's +estimated +estimates +estimating +estimation +estimation's +estimations +estimator +estimator's +estimators +estrange +estranged +estrangement +estrangement's +estrangements +estranges +estranging +estrogen +estrogen's +estuaries +estuary +estuary's +eta +etch +etched +etcher +etcher's +etchers +etches +etching +etching's +etchings +eternal +eternally +eternities +eternity +eternity's +ether +ether's +ethereal +ethereally +ethic +ethic's +ethical +ethically +ethics +ethics's +ethnic +ethnic's +ethnically +ethnicity +ethnicity's +ethnics +ethnological +ethnologist +ethnologist's +ethnologists +ethnology +ethnology's +ethos +ethos's +etiologies +etiology +etiology's +etiquette +etiquette's +etymological +etymologies +etymologist +etymologist's +etymologists +etymology +etymology's +eucalypti +eucalyptus +eucalyptus's +eucalyptuses +eugenics +eugenics's +eulogies +eulogistic +eulogize +eulogized +eulogizes +eulogizing +eulogy +eulogy's +eunuch +eunuch's +eunuchs +euphemism +euphemism's +euphemisms +euphemistic +euphemistically +euphony +euphony's +euphoria +euphoria's +euphoric +eureka +euro +euro's +euros +eutectic +euthanasia +euthanasia's +evacuate +evacuated +evacuates +evacuating +evacuation +evacuation's +evacuations +evacuee +evacuee's +evacuees +evade +evaded +evades +evading +evaluate +evaluated +evaluates +evaluating +evaluation +evaluation's +evaluations +evanescent +evangelical +evangelical's +evangelicals +evangelism +evangelism's +evangelist +evangelist's +evangelistic +evangelists +evangelize +evangelized +evangelizes +evangelizing +evaporate +evaporated +evaporates +evaporating +evaporation +evaporation's +evasion +evasion's +evasions +evasive +evasively +evasiveness +evasiveness's +eve +eve's +even +even's +evened +evener +evenest +evenhanded +evening +evening's +evenings +evenly +evenness +evenness's +evens +event +event's +eventful +eventfully +eventfulness +eventfulness's +eventide +eventide's +events +eventual +eventualities +eventuality +eventuality's +eventually +eventuate +eventuated +eventuates +eventuating +ever +everglade +everglade's +everglades +evergreen +evergreen's +evergreens +everlasting +everlasting's +everlastings +evermore +every +everybody +everybody's +everyday +everyone +everyone's +everyplace +everything +everything's +everywhere +eves +evict +evicted +evicting +eviction +eviction's +evictions +evicts +evidence +evidence's +evidenced +evidences +evidencing +evident +evidently +evil +evil's +evildoer +evildoer's +evildoers +eviler +evilest +eviller +evillest +evilly +evils +evince +evinced +evinces +evincing +eviscerate +eviscerated +eviscerates +eviscerating +evisceration +evisceration's +evocation +evocation's +evocations +evocative +evoke +evoked +evokes +evoking +evolution +evolution's +evolutionary +evolve +evolved +evolves +evolving +ewe +ewe's +ewer +ewer's +ewers +ewes +ex +ex's +exacerbate +exacerbated +exacerbates +exacerbating +exacerbation +exacerbation's +exact +exacted +exacter +exactest +exacting +exactingly +exactitude +exactitude's +exactly +exactness +exactness's +exacts +exaggerate +exaggerated +exaggerates +exaggerating +exaggeration +exaggeration's +exaggerations +exalt +exaltation +exaltation's +exalted +exalting +exalts +exam +exam's +examination +examination's +examinations +examine +examined +examiner +examiner's +examiners +examines +examining +example +example's +exampled +examples +exampling +exams +exasperate +exasperated +exasperates +exasperating +exasperation +exasperation's +excavate +excavated +excavates +excavating +excavation +excavation's +excavations +excavator +excavator's +excavators +exceed +exceeded +exceeding +exceedingly +exceeds +excel +excelled +excellence +excellence's +excellent +excellently +excelling +excels +except +excepted +excepting +exception +exception's +exceptionable +exceptional +exceptionally +exceptions +excepts +excerpt +excerpt's +excerpted +excerpting +excerpts +excess +excess's +excesses +excessive +excessively +exchange +exchange's +exchangeable +exchanged +exchanges +exchanging +exchequer +exchequer's +exchequers +excise +excise's +excised +excises +excising +excision +excision's +excisions +excitability +excitability's +excitable +excitation +excitation's +excite +excited +excitedly +excitement +excitement's +excitements +excites +exciting +excitingly +exclaim +exclaimed +exclaiming +exclaims +exclamation +exclamation's +exclamations +exclamatory +exclude +excluded +excludes +excluding +exclusion +exclusion's +exclusive +exclusive's +exclusively +exclusiveness +exclusiveness's +exclusives +exclusivity +exclusivity's +excommunicate +excommunicated +excommunicates +excommunicating +excommunication +excommunication's +excommunications +excoriate +excoriated +excoriates +excoriating +excoriation +excoriation's +excoriations +excrement +excrement's +excrescence +excrescence's +excrescences +excreta +excreta's +excrete +excreted +excretes +excreting +excretion +excretion's +excretions +excretory +excruciating +excruciatingly +exculpate +exculpated +exculpates +exculpating +excursion +excursion's +excursions +excusable +excuse +excuse's +excused +excuses +excusing +exec +exec's +execrable +execrate +execrated +execrates +execrating +execs +executable +execute +executed +executes +executing +execution +execution's +executioner +executioner's +executioners +executions +executive +executive's +executives +executor +executor's +executors +executrices +executrix +executrix's +executrixes +exegeses +exegesis +exegesis's +exemplar +exemplar's +exemplars +exemplary +exemplification +exemplification's +exemplifications +exemplified +exemplifies +exemplify +exemplifying +exempt +exempted +exempting +exemption +exemption's +exemptions +exempts +exercise +exercise's +exercised +exercises +exercising +exert +exerted +exerting +exertion +exertion's +exertions +exerts +exes +exhalation +exhalation's +exhalations +exhale +exhaled +exhales +exhaling +exhaust +exhaust's +exhausted +exhaustible +exhausting +exhaustion +exhaustion's +exhaustive +exhaustively +exhausts +exhibit +exhibit's +exhibited +exhibiting +exhibition +exhibition's +exhibitionism +exhibitionism's +exhibitionist +exhibitionist's +exhibitionists +exhibitions +exhibitor +exhibitor's +exhibitors +exhibits +exhilarate +exhilarated +exhilarates +exhilarating +exhilaration +exhilaration's +exhort +exhortation +exhortation's +exhortations +exhorted +exhorting +exhorts +exhumation +exhumation's +exhumations +exhume +exhumed +exhumes +exhuming +exigencies +exigency +exigency's +exigent +exiguous +exile +exile's +exiled +exiles +exiling +exist +existed +existence +existence's +existences +existent +existential +existentialism +existentialism's +existentialist +existentialist's +existentialists +existentially +existing +exists +exit +exit's +exited +exiting +exits +exodus +exodus's +exoduses +exonerate +exonerated +exonerates +exonerating +exoneration +exoneration's +exoplanet +exoplanet's +exoplanets +exorbitance +exorbitance's +exorbitant +exorbitantly +exorcise +exorcised +exorcises +exorcising +exorcism +exorcism's +exorcisms +exorcist +exorcist's +exorcists +exorcize +exorcized +exorcizes +exorcizing +exotic +exotic's +exotically +exotics +expand +expandable +expanded +expanding +expands +expanse +expanse's +expanses +expansion +expansion's +expansionist +expansionist's +expansionists +expansions +expansive +expansively +expansiveness +expansiveness's +expatiate +expatiated +expatiates +expatiating +expatriate +expatriate's +expatriated +expatriates +expatriating +expatriation +expatriation's +expect +expectancy +expectancy's +expectant +expectantly +expectation +expectation's +expectations +expected +expecting +expectorant +expectorant's +expectorants +expectorate +expectorated +expectorates +expectorating +expectoration +expectoration's +expects +expedience +expedience's +expediences +expediencies +expediency +expediency's +expedient +expedient's +expediently +expedients +expedite +expedited +expediter +expediter's +expediters +expedites +expediting +expedition +expedition's +expeditionary +expeditions +expeditious +expeditiously +expeditor +expeditor's +expeditors +expel +expelled +expelling +expels +expend +expendable +expendable's +expendables +expended +expending +expenditure +expenditure's +expenditures +expends +expense +expense's +expenses +expensive +expensively +experience +experience's +experienced +experiences +experiencing +experiment +experiment's +experimental +experimentally +experimentation +experimentation's +experimented +experimenter +experimenter's +experimenters +experimenting +experiments +expert +expert's +expertise +expertise's +expertly +expertness +expertness's +experts +expiate +expiated +expiates +expiating +expiation +expiation's +expiration +expiration's +expire +expired +expires +expiring +expiry +explain +explained +explaining +explains +explanation +explanation's +explanations +explanatory +expletive +expletive's +expletives +explicable +explicate +explicated +explicates +explicating +explication +explication's +explications +explicit +explicitly +explicitness +explicitness's +explode +exploded +explodes +exploding +exploit +exploit's +exploitation +exploitation's +exploitative +exploited +exploiter +exploiter's +exploiters +exploiting +exploits +exploration +exploration's +explorations +exploratory +explore +explored +explorer +explorer's +explorers +explores +exploring +explosion +explosion's +explosions +explosive +explosive's +explosively +explosiveness +explosiveness's +explosives +expo +expo's +exponent +exponent's +exponential +exponentially +exponentiation +exponents +export +export's +exportation +exportation's +exported +exporter +exporter's +exporters +exporting +exports +expos +expose +expose's +exposed +exposes +exposing +exposition +exposition's +expositions +expository +expostulate +expostulated +expostulates +expostulating +expostulation +expostulation's +expostulations +exposure +exposure's +exposures +expound +expounded +expounding +expounds +express +express's +expressed +expresses +expressible +expressing +expression +expression's +expressionism +expressionism's +expressionist +expressionist's +expressionists +expressionless +expressions +expressive +expressively +expressiveness +expressiveness's +expressly +expressway +expressway's +expressways +expropriate +expropriated +expropriates +expropriating +expropriation +expropriation's +expropriations +expulsion +expulsion's +expulsions +expunge +expunged +expunges +expunging +expurgate +expurgated +expurgates +expurgating +expurgation +expurgation's +expurgations +exquisite +exquisitely +extant +extemporaneous +extemporaneously +extempore +extemporize +extemporized +extemporizes +extemporizing +extend +extendable +extended +extendible +extending +extends +extension +extension's +extensional +extensions +extensive +extensively +extensiveness +extensiveness's +extent +extent's +extents +extenuate +extenuated +extenuates +extenuating +extenuation +extenuation's +exterior +exterior's +exteriors +exterminate +exterminated +exterminates +exterminating +extermination +extermination's +exterminations +exterminator +exterminator's +exterminators +external +external's +externally +externals +extinct +extincted +extincting +extinction +extinction's +extinctions +extincts +extinguish +extinguishable +extinguished +extinguisher +extinguisher's +extinguishers +extinguishes +extinguishing +extirpate +extirpated +extirpates +extirpating +extirpation +extirpation's +extol +extoll +extolled +extolling +extolls +extols +extort +extorted +extorting +extortion +extortion's +extortionate +extortionist +extortionist's +extortionists +extorts +extra +extra's +extract +extract's +extracted +extracting +extraction +extraction's +extractions +extractor +extractor's +extractors +extracts +extracurricular +extradite +extradited +extradites +extraditing +extradition +extradition's +extraditions +extramarital +extraneous +extraneously +extraordinarily +extraordinary +extrapolate +extrapolated +extrapolates +extrapolating +extrapolation +extrapolation's +extrapolations +extras +extrasensory +extraterrestrial +extraterrestrial's +extraterrestrials +extravagance +extravagance's +extravagances +extravagant +extravagantly +extravaganza +extravaganza's +extravaganzas +extravert +extravert's +extraverted +extraverts +extreme +extreme's +extremely +extremer +extremes +extremest +extremism +extremism's +extremist +extremist's +extremists +extremities +extremity +extremity's +extricate +extricated +extricates +extricating +extrication +extrication's +extrinsic +extrinsically +extroversion +extroversion's +extrovert +extrovert's +extroverted +extroverts +extrude +extruded +extrudes +extruding +extrusion +extrusion's +extrusions +exuberance +exuberance's +exuberant +exuberantly +exude +exuded +exudes +exuding +exult +exultant +exultantly +exultation +exultation's +exulted +exulting +exults +eye +eye's +eyeball +eyeball's +eyeballed +eyeballing +eyeballs +eyebrow +eyebrow's +eyebrows +eyed +eyeful +eyeful's +eyefuls +eyeglass +eyeglass's +eyeglasses +eyeing +eyelash +eyelash's +eyelashes +eyelet +eyelet's +eyelets +eyelid +eyelid's +eyelids +eyeliner +eyeliner's +eyeliners +eyepiece +eyepiece's +eyepieces +eyes +eyesight +eyesight's +eyesore +eyesore's +eyesores +eyestrain +eyestrain's +eyeteeth +eyetooth +eyetooth's +eyewitness +eyewitness's +eyewitnesses +eying +eyrie +eyrie's +f +fa +fa's +fable +fable's +fabled +fables +fabric +fabric's +fabricate +fabricated +fabricates +fabricating +fabrication +fabrication's +fabrications +fabrics +fabulous +fabulously +facade +facade's +facades +face +face's +faced +faceless +facelift +facelift's +facelifts +faces +facet +facet's +faceted +faceting +facetious +facetiously +facetiousness +facetiousness's +facets +facetted +facetting +facial +facial's +facially +facials +facile +facilitate +facilitated +facilitates +facilitating +facilitation +facilitation's +facilities +facility +facility's +facing +facing's +facings +facsimile +facsimile's +facsimiled +facsimileing +facsimiles +fact +fact's +faction +faction's +factional +factionalism +factionalism's +factions +factitious +factor +factor's +factored +factorial +factories +factoring +factorization +factorize +factorizing +factors +factory +factory's +factotum +factotum's +factotums +facts +factual +factually +faculties +faculty +faculty's +fad +fad's +faddish +fade +fade's +faded +fades +fading +fads +faecal +faeces +faeces's +fag +fag's +fagged +fagging +faggot +faggot's +faggots +fagot +fagot's +fagots +fags +fail +fail's +failed +failing +failing's +failings +fails +failure +failure's +failures +fain +fainer +fainest +faint +faint's +fainted +fainter +faintest +fainthearted +fainting +faintly +faintness +faintness's +faints +fair +fair's +fairer +fairest +fairground +fairground's +fairgrounds +fairies +fairly +fairness +fairness's +fairs +fairway +fairway's +fairways +fairy +fairy's +fairyland +fairyland's +fairylands +faith +faith's +faithful +faithful's +faithfully +faithfulness +faithfulness's +faithfuls +faithless +faithlessly +faithlessness +faithlessness's +faiths +fake +fake's +faked +faker +faker's +fakers +fakes +faking +fakir +fakir's +fakirs +falcon +falcon's +falconer +falconer's +falconers +falconry +falconry's +falcons +fall +fall's +fallacies +fallacious +fallaciously +fallacy +fallacy's +fallen +fallibility +fallibility's +fallible +fallibly +falling +falloff +falloff's +falloffs +fallout +fallout's +fallow +fallow's +fallowed +fallowing +fallows +falls +false +falsehood +falsehood's +falsehoods +falsely +falseness +falseness's +falser +falsest +falsetto +falsetto's +falsettos +falsifiable +falsification +falsification's +falsifications +falsified +falsifies +falsify +falsifying +falsities +falsity +falsity's +falter +falter's +faltered +faltering +falteringly +falterings +falters +fame +fame's +famed +familial +familiar +familiar's +familiarity +familiarity's +familiarization +familiarization's +familiarize +familiarized +familiarizes +familiarizing +familiarly +familiars +families +family +family's +famine +famine's +famines +famish +famished +famishes +famishing +famous +famously +fan +fan's +fanatic +fanatic's +fanatical +fanatically +fanaticism +fanaticism's +fanatics +fanboy +fanboy's +fanboys +fancied +fancier +fancier's +fanciers +fancies +fanciest +fanciful +fancifully +fancily +fanciness +fanciness's +fancy +fancy's +fancying +fandom +fanfare +fanfare's +fanfares +fang +fang's +fangs +fanned +fannies +fanning +fanny +fanny's +fans +fantasied +fantasies +fantasize +fantasized +fantasizes +fantasizing +fantastic +fantastically +fantasy +fantasy's +fantasying +fanzine +far +faraway +farce +farce's +farces +farcical +fare +fare's +fared +fares +farewell +farewell's +farewells +farina +farina's +farinaceous +faring +farm +farm's +farmed +farmer +farmer's +farmers +farmhand +farmhand's +farmhands +farmhouse +farmhouse's +farmhouses +farming +farming's +farmland +farmland's +farms +farmyard +farmyard's +farmyards +farrow +farrow's +farrowed +farrowing +farrows +farsighted +farsightedness +farsightedness's +fart +fart's +farted +farther +farthest +farthing +farthing's +farthings +farting +farts +fascinate +fascinated +fascinates +fascinating +fascination +fascination's +fascinations +fascism +fascism's +fascist +fascist's +fascists +fashion +fashion's +fashionable +fashionably +fashioned +fashioning +fashionista +fashionista's +fashionistas +fashions +fast +fast's +fasted +fasten +fastened +fastener +fastener's +fasteners +fastening +fastening's +fastenings +fastens +faster +fastest +fastidious +fastidiously +fastidiousness +fastidiousness's +fasting +fastness +fastness's +fastnesses +fasts +fat +fat's +fatal +fatalism +fatalism's +fatalist +fatalist's +fatalistic +fatalists +fatalities +fatality +fatality's +fatally +fate +fate's +fated +fateful +fatefully +fates +fathead +fathead's +fatheads +father +father's +fathered +fatherhood +fatherhood's +fathering +fatherland +fatherland's +fatherlands +fatherless +fatherly +fathers +fathom +fathom's +fathomable +fathomed +fathoming +fathomless +fathoms +fatigue +fatigue's +fatigued +fatigues +fatigues's +fatiguing +fating +fatness +fatness's +fats +fatten +fattened +fattening +fattens +fatter +fattest +fattier +fatties +fattiest +fatty +fatty's +fatuous +fatuously +fatuousness +fatuousness's +faucet +faucet's +faucets +fault +fault's +faulted +faultfinding +faultfinding's +faultier +faultiest +faultily +faultiness +faultiness's +faulting +faultless +faultlessly +faults +faulty +faun +faun's +fauna +fauna's +faunae +faunas +fauns +favor +favor's +favorable +favorably +favored +favoring +favorite +favorite's +favorites +favoritism +favoritism's +favors +fawn +fawn's +fawned +fawning +fawns +fax +fax's +faxed +faxes +faxing +faze +fazed +fazes +fazing +fealty +fealty's +fear +fear's +feared +fearful +fearfully +fearfulness +fearfulness's +fearing +fearless +fearlessly +fearlessness +fearlessness's +fears +fearsome +feasibility +feasibility's +feasible +feasibly +feast +feast's +feasted +feasting +feasts +feat +feat's +feather +feather's +featherbedding +featherbedding's +feathered +featherier +featheriest +feathering +feathers +featherweight +featherweight's +featherweights +feathery +feats +feature +feature's +featured +featureless +features +featuring +febrile +fecal +feces +feces's +feckless +fecund +fecundity +fecundity's +fed +fed's +federal +federal's +federalism +federalism's +federalist +federalist's +federalists +federally +federals +federate +federated +federates +federating +federation +federation's +federations +fedora +fedora's +fedoras +feds +fee +fee's +feeble +feebleness +feebleness's +feebler +feeblest +feebly +feed +feed's +feedback +feedback's +feedbag +feedbag's +feedbags +feeder +feeder's +feeders +feeding +feeding's +feedings +feeds +feel +feel's +feeler +feeler's +feelers +feeling +feeling's +feelingly +feelings +feels +fees +feet +feign +feigned +feigning +feigns +feint +feint's +feinted +feinting +feints +feistier +feistiest +feisty +feldspar +feldspar's +felicities +felicitous +felicity +felicity's +feline +feline's +felines +fell +fell's +fellatio +fellatio's +felled +feller +fellest +felling +fellow +fellow's +fellows +fellowship +fellowship's +fellowships +fells +felon +felon's +felonies +felonious +felons +felony +felony's +felt +felt's +felted +felting +felts +female +female's +females +feminine +feminine's +feminines +femininity +femininity's +feminism +feminism's +feminist +feminist's +feminists +femora +femoral +femur +femur's +femurs +fen +fen's +fence +fence's +fenced +fencer +fencer's +fencers +fences +fencing +fencing's +fend +fended +fender +fender's +fenders +fending +fends +fennel +fennel's +fens +fer +feral +ferment +ferment's +fermentation +fermentation's +fermented +fermenting +ferments +fern +fern's +ferns +ferocious +ferociously +ferociousness +ferociousness's +ferocity +ferocity's +ferret +ferret's +ferreted +ferreting +ferrets +ferric +ferried +ferries +ferrous +ferrule +ferrule's +ferrules +ferry +ferry's +ferryboat +ferryboat's +ferryboats +ferrying +fertile +fertility +fertility's +fertilization +fertilization's +fertilize +fertilized +fertilizer +fertilizer's +fertilizers +fertilizes +fertilizing +fervency +fervency's +fervent +fervently +fervid +fervidly +fervor +fervor's +fest +fest's +festal +fester +fester's +festered +festering +festers +festival +festival's +festivals +festive +festively +festivities +festivity +festivity's +festoon +festoon's +festooned +festooning +festoons +fests +feta +feta's +fetal +fetch +fetched +fetches +fetching +fetchingly +feted +fetich +fetich's +fetiches +fetid +feting +fetish +fetish's +fetishes +fetishism +fetishism's +fetishist +fetishist's +fetishistic +fetishists +fetlock +fetlock's +fetlocks +fetter +fetter's +fettered +fettering +fetters +fettle +fettle's +fetus +fetus's +fetuses +feud +feud's +feudal +feudalism +feudalism's +feudalistic +feuded +feuding +feuds +fever +fever's +fevered +feverish +feverishly +fevers +few +few's +fewer +fewest +fey +fez +fez's +fezes +fezzes +fiancé +fiancé's +fiancée +fiancée's +fiancées +fiancés +fiasco +fiasco's +fiascoes +fiascos +fiat +fiat's +fiats +fib +fib's +fibbed +fibber +fibber's +fibbers +fibbing +fiber +fiber's +fiberboard +fiberboard's +fiberglass +fiberglass's +fibers +fibroid +fibrous +fibs +fibula +fibula's +fibulae +fibulas +fiche +fiche's +fiches +fickle +fickleness +fickleness's +fickler +ficklest +fiction +fiction's +fictional +fictionalize +fictionalized +fictionalizes +fictionalizing +fictions +fictitious +fiddle +fiddle's +fiddled +fiddler +fiddler's +fiddlers +fiddles +fiddlesticks +fiddling +fiddly +fidelity +fidelity's +fidget +fidget's +fidgeted +fidgeting +fidgets +fidgety +fiduciaries +fiduciary +fiduciary's +fie +fief +fief's +fiefs +field +field's +fielded +fielder +fielder's +fielders +fielding +fields +fieldwork +fieldwork's +fiend +fiend's +fiendish +fiendishly +fiends +fierce +fiercely +fierceness +fierceness's +fiercer +fiercest +fierier +fieriest +fieriness +fieriness's +fiery +fiesta +fiesta's +fiestas +fife +fife's +fifes +fifteen +fifteen's +fifteens +fifteenth +fifteenth's +fifteenths +fifth +fifth's +fifths +fifties +fiftieth +fiftieth's +fiftieths +fifty +fifty's +fig +fig's +fight +fight's +fighter +fighter's +fighters +fighting +fighting's +fights +figment +figment's +figments +figs +figurative +figuratively +figure +figure's +figured +figurehead +figurehead's +figureheads +figures +figurine +figurine's +figurines +figuring +filament +filament's +filamentous +filaments +filbert +filbert's +filberts +filch +filched +filches +filching +file +file's +filed +files +filet +filet's +filets +filial +filibuster +filibuster's +filibustered +filibustering +filibusters +filigree +filigree's +filigreed +filigreeing +filigrees +filing +filing's +filings +fill +fill's +filled +filler +filler's +fillers +fillet +fillet's +filleted +filleting +fillets +fillies +filling +filling's +fillings +fillip +fillip's +filliped +filliping +fillips +fills +filly +filly's +film +film's +filmed +filmier +filmiest +filming +filmmaker +filmmaker's +filmmakers +films +filmstrip +filmstrip's +filmstrips +filmy +filter +filter's +filterable +filtered +filtering +filters +filth +filth's +filthier +filthiest +filthiness +filthiness's +filthy +filtrable +filtrate +filtrate's +filtrated +filtrates +filtrating +filtration +filtration's +fin +fin's +finagle +finagled +finagler +finagler's +finaglers +finagles +finagling +final +final's +finale +finale's +finales +finalist +finalist's +finalists +finality +finality's +finalize +finalized +finalizes +finalizing +finally +finals +finance +finance's +financed +finances +financial +financially +financier +financier's +financiers +financing +financing's +finch +finch's +finches +find +find's +finder +finder's +finders +finding +finding's +findings +finds +fine +fine's +fined +finely +fineness +fineness's +finer +finery +finery's +fines +finesse +finesse's +finessed +finesses +finessing +finest +finger +finger's +fingerboard +fingerboard's +fingerboards +fingered +fingering +fingering's +fingerings +fingernail +fingernail's +fingernails +fingerprint +fingerprint's +fingerprinted +fingerprinting +fingerprints +fingers +fingertip +fingertip's +fingertips +finickier +finickiest +finicky +fining +finis +finis's +finises +finish +finish's +finished +finisher +finisher's +finishers +finishes +finishing +finite +finitely +fink +fink's +finked +finking +finks +finny +fins +fiord +fiord's +fiords +fir +fir's +fire +fire's +firearm +firearm's +firearms +fireball +fireball's +fireballs +firebomb +firebomb's +firebombed +firebombing +firebombs +firebrand +firebrand's +firebrands +firebreak +firebreak's +firebreaks +firebug +firebug's +firebugs +firecracker +firecracker's +firecrackers +fired +firefight +firefight's +firefighter +firefighter's +firefighters +firefighting +firefighting's +firefights +fireflies +firefly +firefly's +firehouse +firehouse's +firehouses +fireman +fireman's +firemen +fireplace +fireplace's +fireplaces +fireplug +fireplug's +fireplugs +firepower +firepower's +fireproof +fireproofed +fireproofing +fireproofs +fires +fireside +fireside's +firesides +firestorm +firestorm's +firestorms +firetrap +firetrap's +firetraps +firewall +firewall's +firewalls +firewater +firewater's +firewood +firewood's +firework +firework's +fireworks +firing +firm +firm's +firmament +firmament's +firmaments +firmed +firmer +firmest +firming +firmly +firmness +firmness's +firms +firmware +firs +first +first's +firstborn +firstborn's +firstborns +firsthand +firstly +firsts +firth +firth's +firths +fiscal +fiscal's +fiscally +fiscals +fish +fish's +fishbowl +fishbowl's +fishbowls +fished +fisher +fisher's +fisheries +fisherman +fisherman's +fishermen +fishers +fishery +fishery's +fishes +fishhook +fishhook's +fishhooks +fishier +fishiest +fishing +fishing's +fishnet +fishnet's +fishnets +fishtail +fishtailed +fishtailing +fishtails +fishwife +fishwife's +fishwives +fishy +fission +fission's +fissure +fissure's +fissures +fist +fist's +fistful +fistful's +fistfuls +fisticuffs +fisticuffs's +fists +fit +fit's +fitful +fitfully +fitly +fitness +fitness's +fits +fitted +fitter +fitter's +fitters +fittest +fitting +fitting's +fittingly +fittings +five +five's +fiver +fives +fix +fix's +fixable +fixate +fixated +fixates +fixating +fixation +fixation's +fixations +fixative +fixative's +fixatives +fixed +fixedly +fixer +fixer's +fixers +fixes +fixing +fixings +fixings's +fixity +fixity's +fixture +fixture's +fixtures +fizz +fizz's +fizzed +fizzes +fizzier +fizziest +fizzing +fizzle +fizzle's +fizzled +fizzles +fizzling +fizzy +fjord +fjord's +fjords +flab +flab's +flabbergast +flabbergasted +flabbergasting +flabbergasts +flabbier +flabbiest +flabbiness +flabbiness's +flabby +flaccid +flack +flack's +flacks +flag +flag's +flagella +flagellate +flagellated +flagellates +flagellating +flagellation +flagellation's +flagellum +flagellum's +flagellums +flagged +flagging +flagon +flagon's +flagons +flagpole +flagpole's +flagpoles +flagrant +flagrantly +flags +flagship +flagship's +flagships +flagstaff +flagstaff's +flagstaffs +flagstone +flagstone's +flagstones +flail +flail's +flailed +flailing +flails +flair +flair's +flairs +flak +flak's +flake +flake's +flaked +flakes +flakier +flakiest +flakiness +flakiness's +flaking +flaky +flambeing +flambes +flamboyance +flamboyance's +flamboyant +flamboyantly +flambé +flambé's +flambéed +flame +flame's +flamed +flamenco +flamenco's +flamencos +flames +flamethrower +flamethrower's +flamethrowers +flaming +flamingo +flamingo's +flamingoes +flamingos +flamings +flammability +flammability's +flammable +flammable's +flammables +flan +flange +flange's +flanges +flank +flank's +flanked +flanking +flanks +flannel +flannel's +flanneled +flannelet +flannelet's +flannelette +flannelette's +flanneling +flannelled +flannelling +flannels +flap +flap's +flapjack +flapjack's +flapjacks +flapped +flapper +flapper's +flappers +flapping +flaps +flare +flare's +flared +flares +flaring +flash +flash's +flashback +flashback's +flashbacks +flashbulb +flashbulb's +flashbulbs +flashed +flasher +flasher's +flashers +flashes +flashest +flashgun +flashgun's +flashguns +flashier +flashiest +flashily +flashiness +flashiness's +flashing +flashing's +flashlight +flashlight's +flashlights +flashy +flask +flask's +flasks +flat +flat's +flatbed +flatbed's +flatbeds +flatboat +flatboat's +flatboats +flatcar +flatcar's +flatcars +flatfeet +flatfish +flatfish's +flatfishes +flatfoot +flatfoot's +flatfooted +flatfoots +flatiron +flatiron's +flatirons +flatly +flatness +flatness's +flats +flatted +flatten +flattened +flattening +flattens +flatter +flattered +flatterer +flatterer's +flatterers +flattering +flatteringly +flatters +flattery +flattery's +flattest +flatting +flattop +flattop's +flattops +flatulence +flatulence's +flatulent +flatware +flatware's +flaunt +flaunt's +flaunted +flaunting +flaunts +flavor +flavor's +flavored +flavorful +flavoring +flavoring's +flavorings +flavorless +flavors +flaw +flaw's +flawed +flawing +flawless +flawlessly +flaws +flax +flax's +flaxen +flay +flayed +flaying +flays +flea +flea's +fleas +fleck +fleck's +flecked +flecking +flecks +fled +fledged +fledgeling +fledgeling's +fledgelings +fledgling +fledgling's +fledglings +flee +fleece +fleece's +fleeced +fleeces +fleecier +fleeciest +fleecing +fleecy +fleeing +flees +fleet +fleet's +fleeted +fleeter +fleetest +fleeting +fleetingly +fleetingly's +fleetness +fleetness's +fleets +flesh +flesh's +fleshed +fleshes +fleshier +fleshiest +fleshing +fleshlier +fleshliest +fleshly +fleshy +flew +flex +flex's +flexed +flexes +flexibility +flexibility's +flexible +flexibly +flexing +flexitime +flexitime's +flextime +flextime's +flibbertigibbet +flibbertigibbet's +flibbertigibbets +flick +flick's +flicked +flicker +flicker's +flickered +flickering +flickers +flicking +flicks +flied +flier +flier's +fliers +flies +fliest +flight +flight's +flightier +flightiest +flightiness +flightiness's +flightless +flights +flighty +flimflam +flimflam's +flimflammed +flimflamming +flimflams +flimsier +flimsiest +flimsily +flimsiness +flimsiness's +flimsy +flinch +flinch's +flinched +flinches +flinching +fling +fling's +flinging +flings +flint +flint's +flintier +flintiest +flintlock +flintlock's +flintlocks +flints +flinty +flip +flip's +flippancy +flippancy's +flippant +flippantly +flipped +flipper +flipper's +flippers +flippest +flipping +flips +flirt +flirt's +flirtation +flirtation's +flirtations +flirtatious +flirtatiously +flirted +flirting +flirts +flit +flit's +flits +flitted +flitting +float +float's +floatation +floatation's +floatations +floated +floater +floater's +floaters +floating +floats +flock +flock's +flocked +flocking +flocks +floe +floe's +floes +flog +flogged +flogging +flogging's +floggings +flogs +flood +flood's +flooded +flooder +floodgate +floodgate's +floodgates +flooding +floodlight +floodlight's +floodlighted +floodlighting +floodlights +floodlit +floods +floor +floor's +floorboard +floorboard's +floorboards +floored +flooring +flooring's +floors +floozie +floozie's +floozies +floozy +floozy's +flop +flop's +flophouse +flophouse's +flophouses +flopped +floppier +floppies +floppiest +floppiness +floppiness's +flopping +floppy +floppy's +flops +flora +flora's +florae +floral +floras +florid +floridly +florin +florin's +florins +florist +florist's +florists +floss +floss's +flossed +flosses +flossing +flotation +flotation's +flotations +flotilla +flotilla's +flotillas +flotsam +flotsam's +flounce +flounce's +flounced +flounces +flouncing +flounder +flounder's +floundered +floundering +flounders +flour +flour's +floured +flouring +flourish +flourish's +flourished +flourishes +flourishing +flours +floury +flout +flout's +flouted +flouting +flouts +flow +flow's +flowed +flower +flower's +flowerbed +flowerbed's +flowerbeds +flowered +flowerier +floweriest +floweriness +floweriness's +flowering +flowerpot +flowerpot's +flowerpots +flowers +flowery +flowing +flown +flows +flu +flu's +flub +flub's +flubbed +flubbing +flubs +fluctuate +fluctuated +fluctuates +fluctuating +fluctuation +fluctuation's +fluctuations +flue +flue's +fluency +fluency's +fluent +fluently +flues +fluff +fluff's +fluffed +fluffier +fluffiest +fluffiness +fluffiness's +fluffing +fluffs +fluffy +fluid +fluid's +fluidity +fluidity's +fluidly +fluids +fluke +fluke's +flukes +flukey +flukier +flukiest +fluky +flume +flume's +flumes +flummox +flummoxed +flummoxes +flummoxing +flung +flunk +flunk's +flunked +flunkey +flunkey's +flunkeys +flunkie +flunkie's +flunkies +flunking +flunks +flunky +flunky's +fluoresce +fluoresced +fluorescence +fluorescence's +fluorescent +fluoresces +fluorescing +fluoridate +fluoridated +fluoridates +fluoridating +fluoridation +fluoridation's +fluoride +fluoride's +fluorides +fluorine +fluorine's +fluorite +fluorite's +fluorocarbon +fluorocarbon's +fluorocarbons +fluoroscope +fluoroscope's +fluoroscopes +flurried +flurries +flurry +flurry's +flurrying +flush +flush's +flushed +flusher +flushes +flushest +flushing +fluster +fluster's +flustered +flustering +flusters +flute +flute's +fluted +flutes +fluting +fluting's +flutist +flutist's +flutists +flutter +flutter's +fluttered +fluttering +flutters +fluttery +flux +flux's +fluxed +fluxes +fluxing +fly +fly's +flyby +flyby's +flybys +flycatcher +flycatcher's +flycatchers +flyer +flyer's +flyers +flying +flying's +flyleaf +flyleaf's +flyleaves +flyover +flyover's +flyovers +flypaper +flypaper's +flypapers +flysheet +flyspeck +flyspeck's +flyspecked +flyspecking +flyspecks +flyswatter +flyswatter's +flyswatters +flyweight +flyweight's +flyweights +flywheel +flywheel's +flywheels +fo'c's'le +fo'c's'le's +fo'c's'les +fo'c'sle +fo'c'sle's +fo'c'sles +foal +foal's +foaled +foaling +foals +foam +foam's +foamed +foamier +foamiest +foaming +foams +foamy +fob +fob's +fobbed +fobbing +fobs +focal +foci +focus +focus's +focused +focuses +focusing +focussed +focusses +focussing +fodder +fodder's +fodders +foe +foe's +foes +foetal +foetus +foetus's +foetuses +fog +fog's +fogbound +fogey +fogey's +fogeys +fogged +foggier +foggiest +fogginess +fogginess's +fogging +foggy +foghorn +foghorn's +foghorns +fogies +fogs +fogy +fogy's +foible +foible's +foibles +foil +foil's +foiled +foiling +foils +foist +foisted +foisting +foists +fold +fold's +foldaway +folded +folder +folder's +folders +folding +folds +foliage +foliage's +folio +folio's +folios +folk +folk's +folklore +folklore's +folks +folksier +folksiest +folksy +follicle +follicle's +follicles +follies +follow +followed +follower +follower's +followers +following +following's +followings +follows +folly +folly's +foment +fomentation +fomentation's +fomented +fomenting +foments +fond +fondant +fondant's +fondants +fonder +fondest +fondle +fondled +fondles +fondling +fondly +fondness +fondness's +fondu +fondu's +fondue +fondue's +fondues +fondus +font +font's +fonts +food +food's +foods +foodstuff +foodstuff's +foodstuffs +fool +fool's +fooled +fooleries +foolery +foolery's +foolhardier +foolhardiest +foolhardiness +foolhardiness's +foolhardy +fooling +foolish +foolishly +foolishness +foolishness's +foolproof +fools +foolscap +foolscap's +foot +foot's +footage +footage's +football +football's +footballer +footballer's +footballers +footballs +footbridge +footbridge's +footbridges +footed +footfall +footfall's +footfalls +foothill +foothill's +foothills +foothold +foothold's +footholds +footing +footing's +footings +footlights +footlights's +footlocker +footlocker's +footlockers +footloose +footman +footman's +footmen +footnote +footnote's +footnoted +footnotes +footnoting +footpath +footpath's +footpaths +footprint +footprint's +footprints +footrest +footrest's +footrests +foots +footsie +footsie's +footsies +footsore +footstep +footstep's +footsteps +footstool +footstool's +footstools +footwear +footwear's +footwork +footwork's +fop +fop's +foppish +fops +for +fora +forage +forage's +foraged +forager +forager's +foragers +forages +foraging +foray +foray's +forayed +foraying +forays +forbad +forbade +forbear +forbear's +forbearance +forbearance's +forbearing +forbears +forbid +forbidden +forbidding +forbiddingly +forbiddings +forbids +forbore +forborne +force +force's +forced +forceful +forcefully +forcefulness +forcefulness's +forceps +forceps's +forces +forcible +forcibly +forcing +ford +ford's +forded +fording +fords +fore +fore's +forearm +forearm's +forearmed +forearming +forearms +forebear +forebear's +forebears +forebode +foreboded +forebodes +foreboding +foreboding's +forebodings +forecast +forecast's +forecasted +forecaster +forecaster's +forecasters +forecasting +forecastle +forecastle's +forecastles +forecasts +foreclose +foreclosed +forecloses +foreclosing +foreclosure +foreclosure's +foreclosures +forefather +forefather's +forefathers +forefeet +forefinger +forefinger's +forefingers +forefoot +forefoot's +forefront +forefront's +forefronts +foregather +foregathered +foregathering +foregathers +forego +foregoes +foregoing +foregone +foreground +foreground's +foregrounded +foregrounding +foregrounds +forehand +forehand's +forehands +forehead +forehead's +foreheads +foreign +foreigner +foreigner's +foreigners +foreknowledge +foreknowledge's +foreleg +foreleg's +forelegs +forelock +forelock's +forelocks +foreman +foreman's +foremast +foremast's +foremasts +foremen +foremost +forename +forename's +forenames +forenoon +forenoon's +forenoons +forensic +forensic's +forensics +foreordain +foreordained +foreordaining +foreordains +foreplay +foreplay's +forerunner +forerunner's +forerunners +fores +foresail +foresail's +foresails +foresaw +foresee +foreseeable +foreseeing +foreseen +foresees +foreshadow +foreshadowed +foreshadowing +foreshadows +foreshorten +foreshortened +foreshortening +foreshortens +foresight +foresight's +foreskin +foreskin's +foreskins +forest +forest's +forestall +forestalled +forestalling +forestalls +forestation +forestation's +forested +forester +forester's +foresters +foresting +forestry +forestry's +forests +foreswear +foreswearing +foreswears +foreswore +foresworn +foretaste +foretaste's +foretasted +foretastes +foretasting +foretell +foretelling +foretells +forethought +forethought's +foretold +forever +forever's +forevermore +forewarn +forewarned +forewarning +forewarns +forewent +forewoman +forewoman's +forewomen +foreword +foreword's +forewords +forfeit +forfeit's +forfeited +forfeiting +forfeits +forfeiture +forfeiture's +forgather +forgathered +forgathering +forgathers +forgave +forge +forge's +forged +forger +forger's +forgeries +forgers +forgery +forgery's +forges +forget +forgetful +forgetfully +forgetfulness +forgetfulness's +forgets +forgettable +forgetting +forging +forgivable +forgive +forgiven +forgiveness +forgiveness's +forgives +forgiving +forgo +forgoes +forgoing +forgone +forgot +forgotten +fork +fork's +forked +forking +forklift +forklift's +forklifts +forks +forlorn +forlornly +form +form's +formal +formal's +formaldehyde +formaldehyde's +formalism +formalism's +formalities +formality +formality's +formalization +formalization's +formalize +formalized +formalizes +formalizing +formally +formals +format +format's +formation +formation's +formations +formative +formats +formatted +formatting +formed +former +former's +formerly +formidable +formidably +forming +formless +formlessly +formlessness +formlessness's +forms +formula +formula's +formulae +formulaic +formulas +formulate +formulated +formulates +formulating +formulation +formulation's +formulations +fornicate +fornicated +fornicates +fornicating +fornication +fornication's +forsake +forsaken +forsakes +forsaking +forsook +forsooth +forswear +forswearing +forswears +forswore +forsworn +forsythia +forsythia's +forsythias +fort +fort's +forte +forte's +fortes +forth +forthcoming +forthcoming's +forthright +forthrightly +forthrightness +forthrightness's +forthwith +forties +fortieth +fortieth's +fortieths +fortification +fortification's +fortifications +fortified +fortifies +fortify +fortifying +fortissimo +fortitude +fortitude's +fortnight +fortnight's +fortnightly +fortnights +fortress +fortress's +fortresses +forts +fortuitous +fortuitously +fortunate +fortunately +fortune +fortune's +fortunes +forty +forty's +forum +forum's +forums +forward +forward's +forwarded +forwarder +forwardest +forwarding +forwardness +forwardness's +forwards +forwent +fossil +fossil's +fossilization +fossilization's +fossilize +fossilized +fossilizes +fossilizing +fossils +foster +fostered +fostering +fosters +fought +foul +foul's +fouled +fouler +foulest +fouling +foully +foulness +foulness's +fouls +found +foundation +foundation's +foundations +founded +founder +founder's +foundered +foundering +founders +founding +foundling +foundling's +foundlings +foundries +foundry +foundry's +founds +fount +fount's +fountain +fountain's +fountainhead +fountainhead's +fountainheads +fountains +founts +four +four's +fourfold +fours +fourscore +fourscore's +foursome +foursome's +foursomes +foursquare +fourteen +fourteen's +fourteens +fourteenth +fourteenth's +fourteenths +fourth +fourth's +fourthly +fourths +fowl +fowl's +fowled +fowling +fowls +fox +fox's +foxed +foxes +foxglove +foxglove's +foxgloves +foxhole +foxhole's +foxholes +foxhound +foxhound's +foxhounds +foxier +foxiest +foxing +foxtrot +foxtrot's +foxtrots +foxtrotted +foxtrotting +foxy +foyer +foyer's +foyers +fracas +fracas's +fracases +frack +fracked +fracking +fracks +fractal +fractal's +fractals +fraction +fraction's +fractional +fractionally +fractions +fractious +fractiously +fracture +fracture's +fractured +fractures +fracturing +fragile +fragility +fragility's +fragment +fragment's +fragmentary +fragmentary's +fragmentation +fragmentation's +fragmented +fragmenting +fragments +fragrance +fragrance's +fragrances +fragrant +fragrantly +frail +frailer +frailest +frailties +frailty +frailty's +frame +frame's +framed +framer +framer's +framers +frames +framework +framework's +frameworks +framing +franc +franc's +franchise +franchise's +franchised +franchisee +franchisee's +franchisees +franchiser +franchiser's +franchisers +franchises +franchising +francs +frank +frank's +franked +franker +frankest +frankfurter +frankfurter's +frankfurters +frankincense +frankincense's +franking +frankly +frankness +frankness's +franks +frantic +frantically +frappes +frappé +frappé's +frat +frat's +fraternal +fraternally +fraternities +fraternity +fraternity's +fraternization +fraternization's +fraternize +fraternized +fraternizes +fraternizing +fratricide +fratricide's +fratricides +frats +fraud +fraud's +frauds +fraudulence +fraudulence's +fraudulent +fraudulently +fraught +fray +fray's +frayed +fraying +frays +frazzle +frazzle's +frazzled +frazzles +frazzling +freak +freak's +freaked +freakier +freakiest +freaking +freakish +freaks +freaky +freckle +freckle's +freckled +freckles +freckling +free +freebase +freebase's +freebased +freebases +freebasing +freebee +freebee's +freebees +freebie +freebie's +freebies +freebooter +freebooter's +freebooters +freed +freedman +freedman's +freedmen +freedom +freedom's +freedoms +freehand +freehold +freehold's +freeholder +freeholder's +freeholders +freeholds +freeing +freelance +freelance's +freelanced +freelancer +freelancer's +freelancers +freelances +freelancing +freeload +freeloaded +freeloader +freeloader's +freeloaders +freeloading +freeloads +freely +freeman +freeman's +freemen +freer +frees +freest +freestanding +freestyle +freestyle's +freestyles +freethinker +freethinker's +freethinkers +freethinking +freethinking's +freeway +freeway's +freeways +freewheel +freewheeled +freewheeling +freewheels +freewill +freeze +freeze's +freezer +freezer's +freezers +freezes +freezing +freezing's +freight +freight's +freighted +freighter +freighter's +freighters +freighting +freights +french +frenetic +frenetically +frenzied +frenziedly +frenzies +frenzy +frenzy's +frequencies +frequency +frequency's +frequent +frequented +frequenter +frequentest +frequenting +frequently +frequents +fresco +fresco's +frescoes +frescos +fresh +freshen +freshened +freshening +freshens +fresher +freshest +freshet +freshet's +freshets +freshly +freshman +freshman's +freshmen +freshness +freshness's +freshwater +freshwater's +fret +fret's +fretful +fretfully +fretfulness +fretfulness's +frets +fretted +fretting +fretwork +fretwork's +friable +friar +friar's +friars +fricassee +fricassee's +fricasseed +fricasseeing +fricassees +friction +friction's +fridge +fridge's +fridges +fried +friend +friend's +friended +friending +friendless +friendlier +friendlies +friendliest +friendliness +friendliness's +friendly +friendly's +friends +friendship +friendship's +friendships +frier +frier's +friers +fries +frieze +frieze's +friezes +frigate +frigate's +frigates +fright +fright's +frighted +frighten +frightened +frightening +frighteningly +frightens +frightful +frightfully +frighting +frights +frigid +frigidity +frigidity's +frigidly +frill +frill's +frillier +frilliest +frills +frilly +fringe +fringe's +fringed +fringes +fringing +fripperies +frippery +frippery's +frisk +frisked +friskier +friskiest +friskily +friskiness +friskiness's +frisking +frisks +frisky +fritter +fritter's +frittered +frittering +fritters +frivolities +frivolity +frivolity's +frivolous +frivolously +frizz +frizz's +frizzed +frizzes +frizzier +frizziest +frizzing +frizzle +frizzle's +frizzled +frizzles +frizzling +frizzy +fro +frock +frock's +frocks +frog +frog's +frogman +frogman's +frogmen +frogs +frolic +frolic's +frolicked +frolicking +frolics +frolicsome +from +frond +frond's +fronds +front +front's +frontage +frontage's +frontages +frontal +frontally +fronted +frontier +frontier's +frontiers +frontiersman +frontiersman's +frontiersmen +fronting +frontispiece +frontispiece's +frontispieces +frontrunner +frontrunner's +frontrunners +fronts +frost +frost's +frostbit +frostbite +frostbite's +frostbites +frostbiting +frostbitten +frosted +frostier +frostiest +frostily +frostiness +frostiness's +frosting +frosting's +frostings +frosts +frosty +froth +froth's +frothed +frothier +frothiest +frothing +froths +frothy +frown +frown's +frowned +frowning +frowns +frowsier +frowsiest +frowsy +frowzier +frowziest +frowzy +froze +frozen +fructified +fructifies +fructify +fructifying +fructose +fructose's +frugal +frugality +frugality's +frugally +fruit +fruit's +fruitcake +fruitcake's +fruitcakes +fruited +fruitful +fruitfully +fruitfulness +fruitfulness's +fruitier +fruitiest +fruiting +fruition +fruition's +fruitless +fruitlessly +fruitlessness +fruitlessness's +fruits +fruity +frump +frump's +frumpier +frumpiest +frumps +frumpy +frustrate +frustrated +frustrates +frustrating +frustration +frustration's +frustrations +fry +fry's +fryer +fryer's +fryers +frying +fuchsia +fuchsia's +fuchsias +fuck +fuck's +fucked +fucker +fucker's +fuckers +fucking +fucks +fuddle +fuddle's +fuddled +fuddles +fuddling +fudge +fudge's +fudged +fudges +fudging +fuel +fuel's +fueled +fueling +fuelled +fuelling +fuels +fugitive +fugitive's +fugitives +fugue +fugue's +fugues +fulcra +fulcrum +fulcrum's +fulcrums +fulfil +fulfill +fulfilled +fulfilling +fulfillment +fulfillment's +fulfills +fulfilment +fulfilment's +fulfils +full +full's +fullback +fullback's +fullbacks +fulled +fuller +fullest +fulling +fullness +fullness's +fulls +fully +fulminate +fulminated +fulminates +fulminating +fulmination +fulmination's +fulminations +fulness +fulness's +fulsome +fumble +fumble's +fumbled +fumbler +fumbler's +fumblers +fumbles +fumbling +fume +fume's +fumed +fumes +fumigate +fumigated +fumigates +fumigating +fumigation +fumigation's +fumigator +fumigator's +fumigators +fuming +fun +fun's +function +function's +functional +functionality +functionally +functionaries +functionary +functionary's +functioned +functioning +functions +fund +fund's +fundamental +fundamental's +fundamentalism +fundamentalism's +fundamentalist +fundamentalist's +fundamentalists +fundamentally +fundamentals +funded +funding +funding's +funds +funeral +funeral's +funerals +funereal +funereally +fungal +fungi +fungicidal +fungicide +fungicide's +fungicides +fungous +fungus +fungus's +funguses +funicular +funicular's +funiculars +funk +funk's +funked +funkier +funkiest +funking +funks +funky +funnel +funnel's +funneled +funneling +funnelled +funnelling +funnels +funner +funnest +funnier +funnies +funniest +funnily +funniness +funniness's +funny +funny's +fur +fur's +furbelow +furbelow's +furbish +furbished +furbishes +furbishing +furies +furious +furiously +furl +furl's +furled +furling +furlong +furlong's +furlongs +furlough +furlough's +furloughed +furloughing +furloughs +furls +furnace +furnace's +furnaces +furnish +furnished +furnishes +furnishing +furnishings +furnishings's +furniture +furniture's +furor +furor's +furors +furred +furrier +furrier's +furriers +furriest +furring +furrow +furrow's +furrowed +furrowing +furrows +furry +furs +further +furtherance +furtherance's +furthered +furthering +furthermore +furthermost +furthers +furthest +furtive +furtively +furtiveness +furtiveness's +fury +fury's +furze +furze's +fuse +fuse's +fused +fuselage +fuselage's +fuselages +fuses +fusible +fusillade +fusillade's +fusillades +fusing +fusion +fusion's +fusions +fuss +fuss's +fussbudget +fussbudget's +fussbudgets +fussed +fusses +fussier +fussiest +fussily +fussiness +fussiness's +fussing +fussy +fustian +fustian's +fustier +fustiest +fusty +futile +futilely +futility +futility's +futon +futon's +futons +future +future's +futures +futuristic +futurities +futurity +futurity's +futz +futzed +futzes +futzing +fuze +fuze's +fuzed +fuzes +fuzing +fuzz +fuzz's +fuzzed +fuzzes +fuzzier +fuzziest +fuzzily +fuzziness +fuzziness's +fuzzing +fuzzy +fête +fête's +fêtes +g +gab +gab's +gabardine +gabardine's +gabardines +gabbed +gabbier +gabbiest +gabbing +gabble +gabble's +gabbled +gabbles +gabbling +gabby +gaberdine +gaberdine's +gaberdines +gable +gable's +gabled +gables +gabs +gad +gadabout +gadabout's +gadabouts +gadded +gadding +gadflies +gadfly +gadfly's +gadget +gadget's +gadgetry +gadgetry's +gadgets +gads +gaff +gaff's +gaffe +gaffe's +gaffed +gaffes +gaffing +gaffs +gag +gag's +gage +gage's +gaged +gages +gagged +gagging +gaggle +gaggle's +gaggles +gaging +gags +gaiety +gaiety's +gaily +gain +gain's +gained +gainful +gainfully +gaining +gains +gainsaid +gainsay +gainsaying +gainsays +gait +gait's +gaiter +gaiter's +gaiters +gaits +gal +gal's +gala +gala's +galactic +galas +galaxies +galaxy +galaxy's +gale +gale's +galena +galena's +gales +gall +gall's +gallant +gallant's +gallantly +gallantry +gallantry's +gallants +gallbladder +gallbladder's +gallbladders +galled +galleon +galleon's +galleons +galleries +gallery +gallery's +galley +galley's +galleys +galling +gallium +gallium's +gallivant +gallivanted +gallivanting +gallivants +gallon +gallon's +gallons +gallop +gallop's +galloped +galloping +gallops +gallows +gallows's +gallowses +galls +gallstone +gallstone's +gallstones +galore +galosh +galosh's +galoshes +gals +galvanic +galvanize +galvanized +galvanizes +galvanizing +galvanometer +galvanometer's +galvanometers +gambit +gambit's +gambits +gamble +gamble's +gambled +gambler +gambler's +gamblers +gambles +gambling +gambling's +gambol +gambol's +gamboled +gamboling +gambolled +gambolling +gambols +game +game's +gamecock +gamecock's +gamecocks +gamed +gamekeeper +gamekeeper's +gamekeepers +gamely +gameness +gameness's +gamer +games +gamesmanship +gamesmanship's +gamest +gamete +gamete's +gametes +gamey +gamier +gamiest +gamin +gamin's +gamine +gamine's +gamines +gaming +gamins +gamma +gamma's +gammas +gamut +gamut's +gamuts +gamy +gander +gander's +ganders +gang +gang's +ganged +ganging +gangland +gangland's +ganglia +ganglier +gangliest +gangling +ganglion +ganglion's +ganglions +gangly +gangplank +gangplank's +gangplanks +gangrene +gangrene's +gangrened +gangrenes +gangrening +gangrenous +gangs +gangster +gangster's +gangsters +gangway +gangway's +gangways +gannet +gannet's +gannets +gantlet +gantlet's +gantlets +gantries +gantry +gantry's +gap +gap's +gape +gape's +gaped +gapes +gaping +gaps +garage +garage's +garaged +garages +garaging +garb +garb's +garbage +garbage's +garbageman +garbanzo +garbanzo's +garbanzos +garbed +garbing +garble +garbled +garbles +garbling +garbs +garden +garden's +gardened +gardener +gardener's +gardeners +gardenia +gardenia's +gardenias +gardening +gardening's +gardens +gargantuan +gargle +gargle's +gargled +gargles +gargling +gargoyle +gargoyle's +gargoyles +garish +garishly +garishness +garishness's +garland +garland's +garlanded +garlanding +garlands +garlic +garlic's +garlicky +garment +garment's +garments +garner +garnered +garnering +garners +garnet +garnet's +garnets +garnish +garnish's +garnished +garnishee +garnishee's +garnisheed +garnisheeing +garnishees +garnishes +garnishing +garote +garote's +garoted +garotes +garoting +garotte +garotte's +garotted +garottes +garotting +garret +garret's +garrets +garrison +garrison's +garrisoned +garrisoning +garrisons +garrote +garrote's +garroted +garrotes +garroting +garrotte +garrotte's +garrotted +garrottes +garrotting +garrulity +garrulity's +garrulous +garrulously +garrulousness +garrulousness's +garter +garter's +garters +gas +gas's +gaseous +gases +gash +gash's +gashed +gashes +gashing +gasket +gasket's +gaskets +gaslight +gaslight's +gaslights +gasohol +gasohol's +gasolene +gasolene's +gasoline +gasoline's +gasp +gasp's +gasped +gasping +gasps +gassed +gasses +gassier +gassiest +gassing +gassy +gastric +gastritis +gastritis's +gastrointestinal +gastronomic +gastronomical +gastronomy +gastronomy's +gasworks +gasworks's +gate +gate's +gatecrasher +gatecrasher's +gatecrashers +gated +gatepost +gatepost's +gateposts +gates +gateway +gateway's +gateways +gather +gather's +gathered +gatherer +gatherer's +gatherers +gathering +gathering's +gatherings +gathers +gating +gauche +gaucher +gauchest +gaucho +gaucho's +gauchos +gaudier +gaudiest +gaudily +gaudiness +gaudiness's +gaudy +gauge +gauge's +gauged +gauges +gauging +gaunt +gaunter +gauntest +gauntlet +gauntlet's +gauntlets +gauntness +gauntness's +gauze +gauze's +gauzier +gauziest +gauzy +gave +gavel +gavel's +gavels +gavotte +gavotte's +gavottes +gawk +gawked +gawkier +gawkiest +gawkily +gawkiness +gawkiness's +gawking +gawks +gawky +gay +gay's +gayer +gayest +gayety +gayety's +gayly +gayness +gayness's +gays +gaze +gaze's +gazebo +gazebo's +gazeboes +gazebos +gazed +gazelle +gazelle's +gazelles +gazer +gazer's +gazers +gazes +gazette +gazette's +gazetted +gazetteer +gazetteer's +gazetteers +gazettes +gazetting +gazillion +gazillions +gazing +gazpacho +gazpacho's +gear +gear's +gearbox +gearbox's +gearboxes +geared +gearing +gears +gearshift +gearshift's +gearshifts +gearwheel +gearwheel's +gearwheels +gecko +gecko's +geckoes +geckos +gee +geed +geegaw +geegaw's +geegaws +geeing +geek +geek's +geekier +geekiest +geeks +geeky +gees +geese +geez +geezer +geezer's +geezers +geisha +geisha's +geishas +gel +gel's +gelatin +gelatin's +gelatine +gelatine's +gelatinous +geld +gelded +gelding +gelding's +geldings +gelds +gelid +gelled +gelling +gels +gelt +gem +gem's +gems +gemstone +gemstone's +gemstones +gendarme +gendarme's +gendarmes +gender +gender's +genders +gene +gene's +genealogical +genealogies +genealogist +genealogist's +genealogists +genealogy +genealogy's +genera +general +general's +generalissimo +generalissimo's +generalissimos +generalities +generality +generality's +generalization +generalization's +generalizations +generalize +generalized +generalizes +generalizing +generally +generals +generate +generated +generates +generating +generation +generation's +generations +generative +generator +generator's +generators +generic +generic's +generically +generics +generosities +generosity +generosity's +generous +generously +genes +geneses +genesis +genesis's +genetic +genetically +geneticist +geneticist's +geneticists +genetics +genetics's +genial +geniality +geniality's +genially +genie +genie's +genies +genii +genital +genitalia +genitalia's +genitals +genitals's +genitive +genitive's +genitives +genius +genius's +geniuses +genocide +genocide's +genome +genome's +genomes +genre +genre's +genres +gent +gent's +genteel +gentian +gentian's +gentians +gentile +gentile's +gentiles +gentility +gentility's +gentle +gentled +gentlefolk +gentlefolk's +gentleman +gentleman's +gentlemanly +gentlemen +gentleness +gentleness's +gentler +gentles +gentlest +gentlewoman +gentlewoman's +gentlewomen +gentling +gently +gentries +gentrification +gentrification's +gentrified +gentrifies +gentrify +gentrifying +gentry +gentry's +gents +genuflect +genuflected +genuflecting +genuflection +genuflection's +genuflections +genuflects +genuine +genuinely +genuineness +genuineness's +genus +genus's +genuses +geocache +geocached +geocaches +geocaching +geocentric +geode +geode's +geodes +geodesic +geodesic's +geodesics +geoengineering +geographer +geographer's +geographers +geographic +geographical +geographically +geographies +geography +geography's +geologic +geological +geologically +geologies +geologist +geologist's +geologists +geology +geology's +geometer +geometric +geometrical +geometrically +geometries +geometry +geometry's +geophysical +geophysics +geophysics's +geopolitical +geopolitics +geopolitics's +geostationary +geothermal +geranium +geranium's +geraniums +gerbil +gerbil's +gerbils +geriatric +geriatrics +geriatrics's +germ +germ's +germane +germanium +germanium's +germicidal +germicide +germicide's +germicides +germinal +germinal's +germinate +germinated +germinates +germinating +germination +germination's +germs +gerontologist +gerontologist's +gerontologists +gerontology +gerontology's +gerrymander +gerrymander's +gerrymandered +gerrymandering +gerrymandering's +gerrymanders +gerund +gerund's +gerunds +gestate +gestated +gestates +gestating +gestation +gestation's +gesticulate +gesticulated +gesticulates +gesticulating +gesticulation +gesticulation's +gesticulations +gesture +gesture's +gestured +gestures +gesturing +gesundheit +get +getaway +getaway's +getaways +gets +getting +getup +getup's +gewgaw +gewgaw's +gewgaws +geyser +geyser's +geysers +ghastlier +ghastliest +ghastliness +ghastliness's +ghastly +gherkin +gherkin's +gherkins +ghetto +ghetto's +ghettoes +ghettos +ghost +ghost's +ghosted +ghosting +ghostlier +ghostliest +ghostliness +ghostliness's +ghostly +ghosts +ghostwrite +ghostwriter +ghostwriter's +ghostwriters +ghostwrites +ghostwriting +ghostwritten +ghostwrote +ghoul +ghoul's +ghoulish +ghouls +giant +giant's +giantess +giantess's +giantesses +giants +gibber +gibbered +gibbering +gibberish +gibberish's +gibbers +gibbet +gibbet's +gibbeted +gibbeting +gibbets +gibbon +gibbon's +gibbons +gibe +gibe's +gibed +gibes +gibing +giblet +giblet's +giblets +giddier +giddiest +giddily +giddiness +giddiness's +giddy +gift +gift's +gifted +gifting +gifts +gig +gig's +gigabit +gigabit's +gigabits +gigabyte +gigabyte's +gigabytes +gigahertz +gigahertz's +gigantic +gigapixel +gigapixel's +gigapixels +gigged +gigging +giggle +giggle's +giggled +giggler +giggler's +gigglers +giggles +gigglier +giggliest +giggling +giggly +gigolo +gigolo's +gigolos +gigs +gild +gild's +gilded +gilding +gilds +gill +gill's +gills +gilt +gilt's +gilts +gimcrack +gimcrack's +gimcracks +gimlet +gimlet's +gimleted +gimleting +gimlets +gimme +gimmick +gimmick's +gimmickry +gimmickry's +gimmicks +gimmicky +gimpy +gin +gin's +ginger +ginger's +gingerbread +gingerbread's +gingerly +gingersnap +gingersnap's +gingersnaps +gingham +gingham's +gingivitis +gingivitis's +gingko +gingko's +gingkoes +gingkos +ginkgo +ginkgo's +ginkgoes +ginkgos +ginned +ginning +gins +ginseng +ginseng's +gipsies +gipsy +gipsy's +giraffe +giraffe's +giraffes +gird +girded +girder +girder's +girders +girding +girdle +girdle's +girdled +girdles +girdling +girds +girl +girl's +girlfriend +girlfriend's +girlfriends +girlhood +girlhood's +girlhoods +girlish +girlishly +girls +girt +girt's +girted +girth +girth's +girths +girting +girts +gismo +gismo's +gismos +gist +gist's +give +giveaway +giveaway's +giveaways +given +given's +givens +gives +giving +gizmo +gizmo's +gizmos +gizzard +gizzard's +gizzards +glacial +glacially +glacier +glacier's +glaciers +glad +glad's +gladden +gladdened +gladdening +gladdens +gladder +gladdest +glade +glade's +glades +gladiator +gladiator's +gladiatorial +gladiators +gladiola +gladiola's +gladiolas +gladioli +gladiolus +gladiolus's +gladioluses +gladly +gladness +gladness's +glads +glamor +glamor's +glamored +glamoring +glamorize +glamorized +glamorizes +glamorizing +glamorous +glamorously +glamors +glamour +glamour's +glamoured +glamouring +glamourize +glamourized +glamourizes +glamourizing +glamourous +glamours +glance +glance's +glanced +glances +glancing +gland +gland's +glands +glandular +glare +glare's +glared +glares +glaring +glaringly +glass +glass's +glassed +glasses +glassful +glassful's +glassfuls +glassier +glassiest +glassing +glassware +glassware's +glassy +glaucoma +glaucoma's +glaze +glaze's +glazed +glazes +glazier +glazier's +glaziers +glazing +gleam +gleam's +gleamed +gleaming +gleamings +gleams +glean +gleaned +gleaning +gleans +glee +glee's +gleeful +gleefully +glen +glen's +glens +glib +glibber +glibbest +glibly +glibness +glibness's +glide +glide's +glided +glider +glider's +gliders +glides +gliding +glimmer +glimmer's +glimmered +glimmering +glimmering's +glimmerings +glimmers +glimpse +glimpse's +glimpsed +glimpses +glimpsing +glint +glint's +glinted +glinting +glints +glissandi +glissando +glissando's +glissandos +glisten +glisten's +glistened +glistening +glistens +glitch +glitch's +glitches +glitter +glitter's +glittered +glittering +glitters +glittery +glitz +glitz's +glitzier +glitziest +glitzy +gloaming +gloaming's +gloamings +gloat +gloat's +gloated +gloating +gloats +glob +glob's +global +globalization +globally +globe +globe's +globes +globetrotter +globetrotter's +globetrotters +globs +globular +globule +globule's +globules +glockenspiel +glockenspiel's +glockenspiels +gloom +gloom's +gloomier +gloomiest +gloomily +gloominess +gloominess's +gloomy +glop +glop's +gloried +glories +glorification +glorification's +glorified +glorifies +glorify +glorifying +glorious +gloriously +glory +glory's +glorying +gloss +gloss's +glossaries +glossary +glossary's +glossed +glosses +glossier +glossies +glossiest +glossiness +glossiness's +glossing +glossy +glossy's +glottides +glottis +glottis's +glottises +glove +glove's +gloved +gloves +gloving +glow +glow's +glowed +glower +glower's +glowered +glowering +glowers +glowing +glowingly +glows +glowworm +glowworm's +glowworms +glucose +glucose's +glue +glue's +glued +glueing +glues +gluey +gluier +gluiest +gluing +glum +glumly +glummer +glummest +glumness +glumness's +glut +glut's +gluten +gluten's +glutinous +gluts +glutted +glutting +glutton +glutton's +gluttonous +gluttonously +gluttons +gluttony +gluttony's +glycerin +glycerin's +glycerine +glycerine's +glycerol +glycerol's +glycogen +glycogen's +glyph +gnarl +gnarled +gnarlier +gnarliest +gnarling +gnarls +gnarly +gnash +gnash's +gnashed +gnashes +gnashing +gnat +gnat's +gnats +gnaw +gnawed +gnawing +gnawn +gnaws +gneiss +gneiss's +gnome +gnome's +gnomes +gnomish +gnu +gnu's +gnus +go +go's +goad +goad's +goaded +goading +goads +goal +goal's +goalie +goalie's +goalies +goalkeeper +goalkeeper's +goalkeepers +goalpost +goalpost's +goalposts +goals +goaltender +goaltender's +goaltenders +goat +goat's +goatee +goatee's +goatees +goatherd +goatherd's +goatherds +goats +goatskin +goatskin's +goatskins +gob +gob's +gobbed +gobbing +gobble +gobble's +gobbled +gobbledegook +gobbledegook's +gobbledygook +gobbledygook's +gobbler +gobbler's +gobblers +gobbles +gobbling +goblet +goblet's +goblets +goblin +goblin's +goblins +gobs +god +god's +godchild +godchild's +godchildren +godchildren's +goddam +goddamed +goddamn +goddamned +goddaughter +goddaughter's +goddaughters +goddess +goddess's +goddesses +godfather +godfather's +godfathers +godforsaken +godhood +godhood's +godless +godlier +godliest +godlike +godliness +godliness's +godly +godmother +godmother's +godmothers +godparent +godparent's +godparents +gods +godsend +godsend's +godsends +godson +godson's +godsons +goes +gofer +gofer's +gofers +goggle +goggle's +goggled +goggles +goggles's +goggling +going +going's +goings +goiter +goiter's +goiters +goitre +goitre's +goitres +gold +gold's +goldbrick +goldbrick's +goldbricked +goldbricking +goldbricks +golden +goldener +goldenest +goldenrod +goldenrod's +goldfinch +goldfinch's +goldfinches +goldfish +goldfish's +goldfishes +golds +goldsmith +goldsmith's +goldsmiths +golf +golf's +golfed +golfer +golfer's +golfers +golfing +golfs +gollies +golly +golly's +gonad +gonad's +gonads +gondola +gondola's +gondolas +gondolier +gondolier's +gondoliers +gone +goner +goner's +goners +gong +gong's +gonged +gonging +gongs +gonna +gonorrhea +gonorrhea's +gonorrhoea +gonorrhoea's +goo +goo's +goober +goober's +goobers +good +good's +goodby +goodby's +goodbye +goodbye's +goodbyes +goodbys +goodie +goodie's +goodies +goodlier +goodliest +goodly +goodness +goodness's +goodnight +goods +goods's +goodwill +goodwill's +goody +goody's +gooey +goof +goof's +goofed +goofier +goofiest +goofing +goofs +goofy +google +google's +googled +googles +googling +gooier +gooiest +gook +gook's +gooks +goon +goon's +goons +goop +goop's +goose +goose's +gooseberries +gooseberry +gooseberry's +goosed +gooses +goosing +gopher +gopher's +gophers +gore +gore's +gored +gores +gorge +gorge's +gorged +gorgeous +gorgeously +gorges +gorging +gorier +goriest +gorilla +gorilla's +gorillas +goriness +goriness's +goring +gorse +gorse's +gory +gosh +gosling +gosling's +goslings +gospel +gospel's +gospels +gossamer +gossamer's +gossip +gossip's +gossiped +gossiping +gossipped +gossipping +gossips +gossipy +got +gotta +gotten +gouge +gouge's +gouged +gouger +gouger's +gougers +gouges +gouging +goulash +goulash's +goulashes +gourd +gourd's +gourds +gourmand +gourmand's +gourmands +gourmet +gourmet's +gourmets +gout +gout's +goutier +goutiest +gouty +govern +governable +governance +governance's +governed +governess +governess's +governesses +governing +government +government's +governmental +governments +governor +governor's +governors +governorship +governorship's +governs +gown +gown's +gowned +gowning +gowns +grab +grab's +grabbed +grabber +grabbing +grabs +grace +grace's +graced +graceful +gracefully +gracefulness +gracefulness's +graceless +gracelessly +gracelessness +gracelessness's +graces +gracing +gracious +graciously +graciousness +graciousness's +grackle +grackle's +grackles +grad +grad's +gradation +gradation's +gradations +grade +grade's +graded +grader +grader's +graders +grades +gradient +gradient's +gradients +grading +grads +gradual +gradually +graduate +graduate's +graduated +graduates +graduating +graduation +graduation's +graduations +graffiti +graffito +graffito's +graft +graft's +grafted +grafter +grafter's +grafters +grafting +grafts +grail +grain +grain's +grainier +grainiest +grains +grainy +gram +gram's +grammar +grammar's +grammarian +grammarian's +grammarians +grammars +grammatical +grammatically +gramophone +grams +granaries +granary +granary's +grand +grand's +grandad +grandad's +grandads +grandchild +grandchild's +grandchildren +grandchildren's +granddad +granddad's +granddads +granddaughter +granddaughter's +granddaughters +grandee +grandee's +grandees +grander +grandest +grandeur +grandeur's +grandfather +grandfather's +grandfathered +grandfathering +grandfathers +grandiloquence +grandiloquence's +grandiloquent +grandiose +grandly +grandma +grandma's +grandmas +grandmother +grandmother's +grandmothers +grandness +grandness's +grandpa +grandpa's +grandparent +grandparent's +grandparents +grandpas +grands +grandson +grandson's +grandsons +grandstand +grandstand's +grandstanded +grandstanding +grandstands +grange +grange's +granges +granite +granite's +grannie +grannie's +grannies +granny +granny's +granola +granola's +grant +grant's +granted +granting +grants +granular +granularity +granularity's +granulate +granulated +granulates +granulating +granulation +granulation's +granule +granule's +granules +grape +grape's +grapefruit +grapefruit's +grapefruits +grapes +grapevine +grapevine's +grapevines +graph +graph's +graphed +graphic +graphic's +graphical +graphically +graphics +graphing +graphite +graphite's +graphologist +graphologist's +graphologists +graphology +graphology's +graphs +grapnel +grapnel's +grapnels +grapple +grapple's +grappled +grapples +grappling +grasp +grasp's +grasped +grasping +grasps +grass +grass's +grassed +grasses +grasshopper +grasshopper's +grasshoppers +grassier +grassiest +grassing +grassland +grassland's +grassy +grate +grate's +grated +grateful +gratefully +gratefulness +gratefulness's +grater +grater's +graters +grates +gratification +gratification's +gratifications +gratified +gratifies +gratify +gratifying +grating +grating's +gratings +gratis +gratitude +gratitude's +gratuities +gratuitous +gratuitously +gratuity +gratuity's +grave +grave's +graved +gravel +gravel's +graveled +graveling +gravelled +gravelling +gravelly +gravels +gravely +graven +graver +graves +gravest +gravestone +gravestone's +gravestones +graveyard +graveyard's +graveyards +gravies +graving +gravitate +gravitated +gravitates +gravitating +gravitation +gravitation's +gravitational +gravity +gravity's +gravy +gravy's +gray +gray's +graybeard +graybeard's +graybeards +grayed +grayer +grayest +graying +grayish +grayness +grayness's +grays +graze +graze's +grazed +grazes +grazing +grease +grease's +greased +greasepaint +greasepaint's +greases +greasier +greasiest +greasiness +greasiness's +greasing +greasy +great +great's +greater +greatest +greatly +greatness +greatness's +greats +grebe +grebe's +grebes +greed +greed's +greedier +greediest +greedily +greediness +greediness's +greedy +green +green's +greenback +greenback's +greenbacks +greened +greener +greenery +greenery's +greenest +greengrocer +greengrocer's +greengrocers +greenhorn +greenhorn's +greenhorns +greenhouse +greenhouse's +greenhouses +greening +greenish +greenness +greenness's +greens +greensward +greensward's +greet +greeted +greeting +greeting's +greetings +greets +gregarious +gregariously +gregariousness +gregariousness's +gremlin +gremlin's +gremlins +grenade +grenade's +grenades +grenadier +grenadier's +grenadiers +grew +grey +grey's +greyed +greyer +greyest +greyhound +greyhound's +greyhounds +greying +greyish +greys +grid +grid's +griddle +griddle's +griddlecake +griddlecake's +griddlecakes +griddles +gridiron +gridiron's +gridirons +gridlock +gridlock's +gridlocks +grids +grief +grief's +griefs +grievance +grievance's +grievances +grieve +grieved +grieves +grieving +grievous +grievously +griffin +griffin's +griffins +grill +grill's +grille +grille's +grilled +grilles +grilling +grills +grim +grimace +grimace's +grimaced +grimaces +grimacing +grime +grime's +grimed +grimes +grimier +grimiest +griming +grimly +grimmer +grimmest +grimness +grimness's +grimy +grin +grin's +grind +grind's +grinder +grinder's +grinders +grinding +grinds +grindstone +grindstone's +grindstones +gringo +gringo's +gringos +grinned +grinning +grins +grip +grip's +gripe +gripe's +griped +gripes +griping +grippe +grippe's +gripped +gripping +grips +grislier +grisliest +grisly +grist +grist's +gristle +gristle's +gristly +grit +grit's +grits +grits's +gritted +grittier +grittiest +gritting +gritty +grizzled +grizzlier +grizzlies +grizzliest +grizzly +grizzly's +groan +groan's +groaned +groaning +groans +grocer +grocer's +groceries +grocers +grocery +grocery's +grog +grog's +groggier +groggiest +groggily +grogginess +grogginess's +groggy +groin +groin's +groins +grommet +grommet's +grommets +groom +groom's +groomed +grooming +grooming's +grooms +groove +groove's +grooved +grooves +groovier +grooviest +grooving +groovy +grope +grope's +groped +gropes +groping +grosbeak +grosbeak's +grosbeaks +gross +gross's +grossed +grosser +grosses +grossest +grossing +grossly +grossness +grossness's +grotesque +grotesque's +grotesquely +grotesques +grotto +grotto's +grottoes +grottos +grouch +grouch's +grouched +grouches +grouchier +grouchiest +grouchiness +grouchiness's +grouching +grouchy +ground +ground's +groundbreaking +groundbreaking's +groundbreakings +grounded +grounder +grounder's +grounders +groundhog +groundhog's +groundhogs +grounding +grounding's +groundings +groundless +groundlessly +grounds +groundswell +groundswell's +groundswells +groundwork +groundwork's +group +group's +grouped +grouper +grouper's +groupers +groupie +groupie's +groupies +grouping +grouping's +groupings +groups +grouse +grouse's +groused +grouses +grousing +grout +grout's +grouted +grouting +grouts +grove +grove's +grovel +groveled +groveler +groveler's +grovelers +groveling +grovelled +groveller +groveller's +grovellers +grovelling +grovels +groves +grow +grower +grower's +growers +growing +growl +growl's +growled +growling +growls +grown +grownup +grownup's +grownups +grows +growth +growth's +growths +grub +grub's +grubbed +grubbier +grubbiest +grubbiness +grubbiness's +grubbing +grubby +grubs +grubstake +grubstake's +grudge +grudge's +grudged +grudges +grudging +grudgingly +gruel +gruel's +grueling +gruelings +gruelling +gruellings +gruesome +gruesomely +gruesomer +gruesomest +gruff +gruffer +gruffest +gruffly +gruffness +gruffness's +grumble +grumble's +grumbled +grumbler +grumbler's +grumblers +grumbles +grumbling +grumpier +grumpiest +grumpily +grumpiness +grumpiness's +grumpy +grunge +grunge's +grungier +grungiest +grungy +grunt +grunt's +grunted +grunting +grunts +gryphon +gryphon's +gryphons +gs +guacamole +guacamole's +guano +guano's +guarantee +guarantee's +guaranteed +guaranteeing +guarantees +guarantied +guaranties +guarantor +guarantor's +guarantors +guaranty +guaranty's +guarantying +guard +guard's +guarded +guardedly +guardhouse +guardhouse's +guardhouses +guardian +guardian's +guardians +guardianship +guardianship's +guarding +guardrail +guardrail's +guardrails +guardroom +guardroom's +guardrooms +guards +guardsman +guardsman's +guardsmen +guava +guava's +guavas +gubernatorial +guerilla +guerilla's +guerillas +guerrilla +guerrilla's +guerrillas +guess +guess's +guessable +guessed +guesser +guesser's +guessers +guesses +guessing +guesstimate +guesstimate's +guesstimated +guesstimates +guesstimating +guesswork +guesswork's +guest +guest's +guested +guesting +guests +guff +guff's +guffaw +guffaw's +guffawed +guffawing +guffaws +guidance +guidance's +guide +guide's +guidebook +guidebook's +guidebooks +guided +guideline +guideline's +guidelines +guides +guiding +guild +guild's +guilder +guilder's +guilders +guilds +guile +guile's +guileful +guileless +guillotine +guillotine's +guillotined +guillotines +guillotining +guilt +guilt's +guiltier +guiltiest +guiltily +guiltiness +guiltiness's +guiltless +guilty +guinea +guinea's +guineas +guise +guise's +guises +guitar +guitar's +guitarist +guitarist's +guitarists +guitars +gulag +gulag's +gulags +gulch +gulch's +gulches +gulf +gulf's +gulfs +gull +gull's +gulled +gullet +gullet's +gullets +gulley +gulley's +gullibility +gullibility's +gullible +gullies +gulling +gulls +gully +gully's +gulp +gulp's +gulped +gulping +gulps +gum +gum's +gumbo +gumbo's +gumbos +gumdrop +gumdrop's +gumdrops +gummed +gummier +gummiest +gumming +gummy +gumption +gumption's +gums +gun +gun's +gunboat +gunboat's +gunboats +gunfight +gunfight's +gunfights +gunfire +gunfire's +gunk +gunk's +gunman +gunman's +gunmen +gunned +gunner +gunner's +gunners +gunnery +gunnery's +gunning +gunny +gunny's +gunnysack +gunnysack's +gunnysacks +gunpoint +gunpoint's +gunpowder +gunpowder's +gunrunner +gunrunner's +gunrunners +gunrunning +gunrunning's +guns +gunshot +gunshot's +gunshots +gunslinger +gunslinger's +gunslingers +gunsmith +gunsmith's +gunsmiths +gunwale +gunwale's +gunwales +guppies +guppy +guppy's +gurgle +gurgle's +gurgled +gurgles +gurgling +gurney +gurney's +gurneys +guru +guru's +gurus +gush +gush's +gushed +gusher +gusher's +gushers +gushes +gushier +gushiest +gushing +gushy +gusset +gusset's +gusseted +gusseting +gussets +gust +gust's +gustatory +gusted +gustier +gustiest +gusting +gusto +gusto's +gusts +gusty +gut +gut's +gutless +guts +gutsier +gutsiest +gutsy +gutted +gutter +gutter's +guttered +guttering +gutters +guttersnipe +guttersnipe's +guttersnipes +gutting +guttural +guttural's +gutturals +guy +guy's +guyed +guying +guys +guzzle +guzzled +guzzler +guzzler's +guzzlers +guzzles +guzzling +gybe +gybe's +gybed +gybes +gybing +gym +gym's +gymnasia +gymnasium +gymnasium's +gymnasiums +gymnast +gymnast's +gymnastic +gymnastics +gymnastics's +gymnasts +gymnosperm +gymnosperm's +gymnosperms +gyms +gynecological +gynecologist +gynecologist's +gynecologists +gynecology +gynecology's +gyp +gyp's +gypped +gypping +gyps +gypsies +gypsum +gypsum's +gypsy +gypsy's +gyrate +gyrated +gyrates +gyrating +gyration +gyration's +gyrations +gyro +gyro's +gyros +gyroscope +gyroscope's +gyroscopes +h +h'm +ha +haberdasher +haberdasher's +haberdasheries +haberdashers +haberdashery +haberdashery's +habit +habit's +habitability +habitability's +habitable +habitat +habitat's +habitation +habitation's +habitations +habitats +habits +habitual +habitually +habituate +habituated +habituates +habituating +habituation +habituation's +habitué +habitué's +habitués +hacienda +hacienda's +haciendas +hack +hack's +hacked +hacker +hacker's +hackers +hacking +hackle +hackle's +hackles +hackney +hackney's +hackneyed +hackneying +hackneys +hacks +hacksaw +hacksaw's +hacksaws +hacktivist +hacktivist's +hacktivists +had +haddock +haddock's +haddocks +hadn't +haemoglobin +haemoglobin's +haemophilia +haemophilia's +haemorrhage +haemorrhage's +haemorrhaged +haemorrhages +haemorrhaging +haemorrhoids +hafnium +hafnium's +haft +haft's +hafts +hag +hag's +haggard +haggle +haggle's +haggled +haggler +haggler's +hagglers +haggles +haggling +hags +hah +haiku +haiku's +hail +hail's +hailed +hailing +hails +hailstone +hailstone's +hailstones +hailstorm +hailstorm's +hailstorms +hair +hair's +hairbreadth +hairbreadth's +hairbreadths +hairbrush +hairbrush's +hairbrushes +haircut +haircut's +haircuts +hairdo +hairdo's +hairdos +hairdresser +hairdresser's +hairdressers +hairdressing +hairdressing's +haired +hairier +hairiest +hairiness +hairiness's +hairless +hairline +hairline's +hairlines +hairnet +hairnet's +hairnets +hairpiece +hairpiece's +hairpieces +hairpin +hairpin's +hairpins +hairs +hairsbreadth +hairsbreadth's +hairsbreadths +hairsplitting +hairsplitting's +hairspring +hairspring's +hairsprings +hairstyle +hairstyle's +hairstyles +hairstylist +hairstylist's +hairstylists +hairy +hake +hake's +hakes +halberd +halberd's +halberds +halcyon +hale +haled +haler +hales +halest +half +half's +halfback +halfback's +halfbacks +halfhearted +halfheartedly +halfheartedness +halfheartedness's +halfpence +halfpennies +halfpenny +halfpenny's +halftime +halftime's +halftimes +halfway +halibut +halibut's +halibuts +haling +halitosis +halitosis's +hall +hall's +halleluiah +halleluiah's +halleluiahs +hallelujah +hallelujah's +hallelujahs +hallmark +hallmark's +hallmarked +hallmarking +hallmarks +hallow +hallowed +hallowing +hallows +halls +hallucinate +hallucinated +hallucinates +hallucinating +hallucination +hallucination's +hallucinations +hallucinatory +hallucinogen +hallucinogen's +hallucinogenic +hallucinogenic's +hallucinogenics +hallucinogens +hallway +hallway's +hallways +halo +halo's +haloed +haloes +halogen +halogen's +halogens +haloing +halon +halos +halt +halt's +halted +halter +halter's +haltered +haltering +halters +halting +haltingly +halts +halve +halved +halves +halving +halyard +halyard's +halyards +ham +ham's +hamburger +hamburger's +hamburgers +hamlet +hamlet's +hamlets +hammed +hammer +hammer's +hammered +hammerhead +hammerhead's +hammerheads +hammering +hammerings +hammers +hamming +hammock +hammock's +hammocks +hamper +hamper's +hampered +hampering +hampers +hams +hamster +hamster's +hamsters +hamstring +hamstring's +hamstringing +hamstrings +hamstrung +hand +hand's +handbag +handbag's +handbags +handball +handball's +handballs +handbill +handbill's +handbills +handbook +handbook's +handbooks +handcar +handcar's +handcars +handcart +handcart's +handcarts +handcraft +handcraft's +handcrafted +handcrafting +handcrafts +handcuff +handcuff's +handcuffed +handcuffing +handcuffs +handed +handedness +handful +handful's +handfuls +handgun +handgun's +handguns +handheld +handheld's +handhelds +handicap +handicap's +handicapped +handicapper +handicapper's +handicappers +handicapping +handicaps +handicraft +handicraft's +handicrafts +handier +handiest +handily +handiness +handiness's +handing +handiwork +handiwork's +handkerchief +handkerchief's +handkerchiefs +handkerchieves +handle +handle's +handlebar +handlebar's +handlebars +handled +handler +handler's +handlers +handles +handling +handmade +handmaid +handmaid's +handmaiden +handmaiden's +handmaidens +handmaids +handout +handout's +handouts +handpick +handpicked +handpicking +handpicks +handrail +handrail's +handrails +hands +handset +handset's +handsets +handsful +handshake +handshake's +handshakes +handshaking +handsome +handsomely +handsomeness +handsomeness's +handsomer +handsomest +handspring +handspring's +handsprings +handstand +handstand's +handstands +handwork +handwork's +handwriting +handwriting's +handwritten +handy +handyman +handyman's +handymen +hang +hang's +hangar +hangar's +hangars +hangdog +hanged +hanger +hanger's +hangers +hanging +hanging's +hangings +hangman +hangman's +hangmen +hangnail +hangnail's +hangnails +hangout +hangout's +hangouts +hangover +hangover's +hangovers +hangs +hank +hank's +hanker +hankered +hankering +hankering's +hankerings +hankers +hankie +hankie's +hankies +hanks +hanky +hanky's +hansom +hansom's +hansoms +haphazard +haphazardly +hapless +happen +happened +happening +happening's +happenings +happens +happenstance +happenstance's +happenstances +happier +happiest +happily +happiness +happiness's +happy +harangue +harangue's +harangued +harangues +haranguing +harass +harassed +harasses +harassing +harassment +harassment's +harbinger +harbinger's +harbingers +harbor +harbor's +harbored +harboring +harbors +hard +hardback +hardback's +hardbacks +hardball +hardball's +hardcover +hardcover's +hardcovers +harden +hardened +hardener +hardener's +hardeners +hardening +hardens +harder +hardest +hardheaded +hardheadedly +hardheadedness +hardheadedness's +hardhearted +hardheartedly +hardheartedness +hardheartedness's +hardier +hardiest +hardily +hardiness +hardiness's +hardline +hardliner +hardliner's +hardliners +hardly +hardness +hardness's +hardship +hardship's +hardships +hardtack +hardtack's +hardtop +hardtop's +hardtops +hardware +hardware's +hardwood +hardwood's +hardwoods +hardy +hare +hare's +harebrained +hared +harelip +harelip's +harelips +harem +harem's +harems +hares +haring +hark +harked +harken +harkened +harkening +harkens +harking +harks +harlequin +harlequin's +harlequins +harlot +harlot's +harlots +harm +harm's +harmed +harmful +harmfully +harmfulness +harmfulness's +harming +harmless +harmlessly +harmlessness +harmlessness's +harmonic +harmonic's +harmonica +harmonica's +harmonically +harmonicas +harmonics +harmonies +harmonious +harmoniously +harmoniousness +harmoniousness's +harmonization +harmonization's +harmonize +harmonized +harmonizes +harmonizing +harmony +harmony's +harms +harness +harness's +harnessed +harnesses +harnessing +harp +harp's +harped +harpies +harping +harpist +harpist's +harpists +harpoon +harpoon's +harpooned +harpooning +harpoons +harps +harpsichord +harpsichord's +harpsichords +harpy +harpy's +harridan +harridan's +harridans +harried +harries +harrow +harrow's +harrowed +harrowing +harrows +harry +harrying +harsh +harsher +harshest +harshly +harshness +harshness's +hart +hart's +harts +harvest +harvest's +harvested +harvester +harvester's +harvesters +harvesting +harvests +has +hash +hash's +hashed +hasheesh +hasheesh's +hashes +hashing +hashish +hashish's +hashtag +hashtag's +hashtags +hasn't +hasp +hasp's +hasps +hassle +hassle's +hassled +hassles +hassling +hassock +hassock's +hassocks +haste +haste's +hasted +hasten +hastened +hastening +hastens +hastes +hastier +hastiest +hastily +hastiness +hastiness's +hasting +hasty +hat +hat's +hatch +hatch's +hatchback +hatchback's +hatchbacks +hatched +hatcheries +hatchery +hatchery's +hatches +hatchet +hatchet's +hatchets +hatching +hatching's +hatchway +hatchway's +hatchways +hate +hate's +hated +hateful +hatefully +hatefulness +hatefulness's +hater +hater's +haters +hates +hath +hating +hatred +hatred's +hatreds +hats +hatted +hatter +hatter's +hatters +hatting +haughtier +haughtiest +haughtily +haughtiness +haughtiness's +haughty +haul +haul's +hauled +hauler +hauler's +haulers +hauling +hauls +haunch +haunch's +haunches +haunt +haunt's +haunted +haunting +hauntingly +haunts +hauteur +hauteur's +have +have's +haven +haven's +haven't +havens +haversack +haversack's +haversacks +haves +having +havoc +havoc's +haw +haw's +hawed +hawing +hawk +hawk's +hawked +hawker +hawker's +hawkers +hawking +hawkish +hawks +haws +hawser +hawser's +hawsers +hawthorn +hawthorn's +hawthorns +hay +hay's +haycock +haycock's +haycocks +hayed +haying +hayloft +hayloft's +haylofts +haymow +haymow's +haymows +hays +hayseed +hayseed's +hayseeds +haystack +haystack's +haystacks +haywire +hazard +hazard's +hazarded +hazarding +hazardous +hazards +haze +haze's +hazed +hazel +hazel's +hazelnut +hazelnut's +hazelnuts +hazels +hazes +hazier +haziest +hazily +haziness +haziness's +hazing +hazing's +hazings +hazmat +hazy +he +he'd +he'll +he's +head +head's +headache +headache's +headaches +headband +headband's +headbands +headboard +headboard's +headboards +headdress +headdress's +headdresses +headed +header +header's +headers +headfirst +headgear +headgear's +headhunter +headhunter's +headhunters +headier +headiest +heading +heading's +headings +headland +headland's +headlands +headless +headlight +headlight's +headlights +headline +headline's +headlined +headlines +headlining +headlock +headlock's +headlocks +headlong +headmaster +headmaster's +headmasters +headmistress +headmistress's +headmistresses +headphone +headphone's +headphones +headquarter +headquarters +headquarters's +headrest +headrest's +headrests +headroom +headroom's +heads +headset +headset's +headsets +headstone +headstone's +headstones +headstrong +headwaiter +headwaiter's +headwaiters +headwaters +headwaters's +headway +headway's +headwind +headwind's +headwinds +headword +headword's +headwords +heady +heal +healed +healer +healer's +healers +healing +heals +health +health's +healthcare +healthful +healthfully +healthfulness +healthfulness's +healthier +healthiest +healthily +healthiness +healthiness's +healthy +heap +heap's +heaped +heaping +heaps +hear +heard +hearer +hearer's +hearers +hearing +hearing's +hearings +hearken +hearkened +hearkening +hearkens +hears +hearsay +hearsay's +hearse +hearse's +hearses +heart +heart's +heartache +heartache's +heartaches +heartbeat +heartbeat's +heartbeats +heartbreak +heartbreak's +heartbreaking +heartbreaks +heartbroken +heartburn +heartburn's +hearten +heartened +heartening +heartens +heartfelt +hearth +hearth's +hearths +heartier +hearties +heartiest +heartily +heartiness +heartiness's +heartland +heartland's +heartlands +heartless +heartlessly +heartlessness +heartlessness's +heartrending +hearts +heartsick +heartstrings +heartstrings's +heartthrob +heartthrob's +heartthrobs +heartwarming +hearty +hearty's +heat +heat's +heated +heatedly +heater +heater's +heaters +heath +heath's +heathen +heathen's +heathenish +heathens +heather +heather's +heaths +heating +heats +heatstroke +heatstroke's +heave +heave's +heaved +heaven +heaven's +heavenlier +heavenliest +heavenly +heavens +heavens's +heavenward +heavenwards +heaves +heavier +heavies +heaviest +heavily +heaviness +heaviness's +heaving +heavy +heavy's +heavyset +heavyweight +heavyweight's +heavyweights +heck +heck's +heckle +heckle's +heckled +heckler +heckler's +hecklers +heckles +heckling +heckling's +hectare +hectare's +hectares +hectic +hectically +hector +hector's +hectored +hectoring +hectors +hedge +hedge's +hedged +hedgehog +hedgehog's +hedgehogs +hedgerow +hedgerow's +hedgerows +hedges +hedging +hedonism +hedonism's +hedonist +hedonist's +hedonistic +hedonists +heed +heed's +heeded +heedful +heeding +heedless +heedlessly +heedlessness +heedlessness's +heeds +heehaw +heehaw's +heehawed +heehawing +heehaws +heel +heel's +heeled +heeling +heels +heft +heft's +hefted +heftier +heftiest +hefting +hefts +hefty +hegemony +hegemony's +heifer +heifer's +heifers +height +height's +heighten +heightened +heightening +heightens +heights +heinous +heinously +heinousness +heinousness's +heir +heir's +heiress +heiress's +heiresses +heirloom +heirloom's +heirlooms +heirs +heist +heist's +heisted +heisting +heists +held +helical +helices +helicopter +helicopter's +helicoptered +helicoptering +helicopters +heliotrope +heliotrope's +heliotropes +heliport +heliport's +heliports +helium +helium's +helix +helix's +helixes +hell +hell's +hellebore +hellebore's +hellhole +hellhole's +hellholes +hellion +hellion's +hellions +hellish +hellishly +hello +hello's +hellos +helm +helm's +helmet +helmet's +helmets +helms +helmsman +helmsman's +helmsmen +helot +helot's +helots +help +help's +helped +helper +helper's +helpers +helpful +helpfully +helpfulness +helpfulness's +helping +helping's +helpings +helpless +helplessly +helplessness +helplessness's +helpline +helpline's +helplines +helpmate +helpmate's +helpmates +helpmeet +helpmeet's +helpmeets +helps +hem +hem's +hematologist +hematologist's +hematologists +hematology +hematology's +hemisphere +hemisphere's +hemispheres +hemispheric +hemispherical +hemline +hemline's +hemlines +hemlock +hemlock's +hemlocks +hemmed +hemming +hemoglobin +hemoglobin's +hemophilia +hemophilia's +hemophiliac +hemophiliac's +hemophiliacs +hemorrhage +hemorrhage's +hemorrhaged +hemorrhages +hemorrhaging +hemorrhoid +hemorrhoid's +hemorrhoids +hemp +hemp's +hempen +hems +hemstitch +hemstitch's +hemstitched +hemstitches +hemstitching +hen +hen's +hence +henceforth +henceforward +henchman +henchman's +henchmen +henna +henna's +hennaed +hennaing +hennas +henpeck +henpecked +henpecking +henpecks +hens +hep +hepatic +hepatitis +hepatitis's +hepper +heppest +heptagon +heptagon's +heptagons +her +herald +herald's +heralded +heraldic +heralding +heraldry +heraldry's +heralds +herb +herb's +herbaceous +herbage +herbage's +herbal +herbalist +herbalist's +herbalists +herbicide +herbicide's +herbicides +herbivore +herbivore's +herbivores +herbivorous +herbs +herculean +herd +herd's +herded +herder +herder's +herders +herding +herds +herdsman +herdsman's +herdsmen +here +here's +hereabout +hereabouts +hereafter +hereafter's +hereafters +hereby +hereditary +heredity +heredity's +herein +hereof +heresies +heresy +heresy's +heretic +heretic's +heretical +heretics +hereto +heretofore +hereupon +herewith +heritage +heritage's +heritages +hermaphrodite +hermaphrodite's +hermaphrodites +hermaphroditic +hermetic +hermetically +hermit +hermit's +hermitage +hermitage's +hermitages +hermits +hernia +hernia's +herniae +hernias +hero +hero's +heroes +heroic +heroically +heroics +heroics's +heroin +heroin's +heroine +heroine's +heroins +heroism +heroism's +heron +heron's +herons +heros +herpes +herpes's +herring +herring's +herringbone +herringbone's +herrings +hers +herself +hertz +hertz's +hertzes +hes +hesitancy +hesitancy's +hesitant +hesitantly +hesitate +hesitated +hesitates +hesitating +hesitatingly +hesitation +hesitation's +hesitations +heterodox +heterodoxy +heterodoxy's +heterogeneity +heterogeneity's +heterogeneous +heterosexual +heterosexual's +heterosexuality +heterosexuality's +heterosexuals +heuristic +heuristic's +heuristics +hew +hewed +hewer +hewer's +hewers +hewing +hewn +hews +hex +hex's +hexadecimal +hexagon +hexagon's +hexagonal +hexagons +hexameter +hexameter's +hexameters +hexed +hexes +hexing +hey +heyday +heyday's +heydays +hi +hiatus +hiatus's +hiatuses +hibachi +hibachi's +hibachis +hibernate +hibernated +hibernates +hibernating +hibernation +hibernation's +hibiscus +hibiscus's +hibiscuses +hiccough +hiccough's +hiccoughed +hiccoughing +hiccoughs +hiccup +hiccup's +hiccuped +hiccuping +hiccups +hick +hick's +hickey +hickey's +hickeys +hickories +hickory +hickory's +hicks +hid +hidden +hide +hide's +hideaway +hideaway's +hideaways +hidebound +hided +hideous +hideously +hideousness +hideousness's +hideout +hideout's +hideouts +hides +hiding +hiding's +hie +hied +hieing +hierarchical +hierarchically +hierarchies +hierarchy +hierarchy's +hieroglyphic +hieroglyphic's +hieroglyphics +hies +hifalutin +high +high's +highball +highball's +highballs +highborn +highboy +highboy's +highboys +highbrow +highbrow's +highbrows +highchair +highchair's +highchairs +higher +highest +highfalutin +highfaluting +highjack +highjack's +highjacked +highjacker +highjacker's +highjackers +highjacking +highjacks +highland +highland's +highlands +highlight +highlight's +highlighted +highlighter +highlighter's +highlighters +highlighting +highlights +highly +highness +highness's +highs +hightail +hightailed +hightailing +hightails +highway +highway's +highwayman +highwayman's +highwaymen +highways +hijack +hijack's +hijacked +hijacker +hijacker's +hijackers +hijacking +hijacking's +hijackings +hijacks +hike +hike's +hiked +hiker +hiker's +hikers +hikes +hiking +hilarious +hilariously +hilarity +hilarity's +hill +hill's +hillbillies +hillbilly +hillbilly's +hillier +hilliest +hillock +hillock's +hillocks +hills +hillside +hillside's +hillsides +hilltop +hilltop's +hilltops +hilly +hilt +hilt's +hilts +him +hims +himself +hind +hind's +hinder +hindered +hindering +hinders +hindmost +hindquarter +hindquarter's +hindquarters +hindrance +hindrance's +hindrances +hinds +hindsight +hindsight's +hinge +hinge's +hinged +hinges +hinging +hint +hint's +hinted +hinterland +hinterland's +hinterlands +hinting +hints +hip +hip's +hipped +hipper +hippest +hippie +hippie's +hippies +hipping +hippo +hippo's +hippopotami +hippopotamus +hippopotamus's +hippopotamuses +hippos +hippy +hippy's +hips +hire +hire's +hired +hireling +hireling's +hirelings +hires +hiring +hirsute +his +hiss +hiss's +hissed +hisses +hissing +histamine +histamine's +histamines +histogram +histogram's +histograms +historian +historian's +historians +historic +historical +historically +histories +history +history's +histrionic +histrionics +histrionics's +hit +hit's +hitch +hitch's +hitched +hitches +hitchhike +hitchhike's +hitchhiked +hitchhiker +hitchhiker's +hitchhikers +hitchhikes +hitchhiking +hitching +hither +hitherto +hits +hitter +hitter's +hitters +hitting +hive +hive's +hived +hives +hiving +ho +ho's +hoagie +hoagie's +hoagies +hoagy +hoagy's +hoard +hoard's +hoarded +hoarder +hoarder's +hoarders +hoarding +hoards +hoarfrost +hoarfrost's +hoarier +hoariest +hoariness +hoariness's +hoarse +hoarsely +hoarseness +hoarseness's +hoarser +hoarsest +hoary +hoax +hoax's +hoaxed +hoaxer +hoaxer's +hoaxers +hoaxes +hoaxing +hob +hob's +hobbies +hobbit +hobble +hobble's +hobbled +hobbles +hobbling +hobby +hobby's +hobbyhorse +hobbyhorse's +hobbyhorses +hobbyist +hobbyist's +hobbyists +hobgoblin +hobgoblin's +hobgoblins +hobnail +hobnail's +hobnailed +hobnailing +hobnails +hobnob +hobnobbed +hobnobbing +hobnobs +hobo +hobo's +hoboes +hobos +hobs +hock +hock's +hocked +hockey +hockey's +hocking +hocks +hockshop +hockshop's +hockshops +hod +hod's +hodgepodge +hodgepodge's +hodgepodges +hods +hoe +hoe's +hoed +hoedown +hoedown's +hoedowns +hoeing +hoes +hog +hog's +hogan +hogan's +hogans +hogged +hogging +hoggish +hogs +hogshead +hogshead's +hogsheads +hogwash +hogwash's +hoist +hoist's +hoisted +hoisting +hoists +hokey +hokier +hokiest +hokum +hokum's +hold +hold's +holder +holder's +holders +holding +holding's +holdings +holdout +holdout's +holdouts +holdover +holdover's +holdovers +holds +holdup +holdup's +holdups +hole +hole's +holed +holes +holiday +holiday's +holidayed +holidaying +holidays +holier +holiest +holiness +holiness's +holing +holistic +holler +holler's +hollered +hollering +hollers +hollies +hollow +hollow's +hollowed +hollower +hollowest +hollowing +hollowly +hollowness +hollowness's +hollows +holly +holly's +hollyhock +hollyhock's +hollyhocks +holocaust +holocaust's +holocausts +hologram +hologram's +holograms +holograph +holograph's +holographic +holographs +holography +holography's +holster +holster's +holstered +holstering +holsters +holy +homage +homage's +homages +homburg +homburg's +homburgs +home +home's +homebodies +homebody +homebody's +homeboy +homeboy's +homeboys +homecoming +homecoming's +homecomings +homed +homegrown +homeland +homeland's +homelands +homeless +homeless's +homelessness +homelessness's +homelier +homeliest +homeliness +homeliness's +homely +homemade +homemaker +homemaker's +homemakers +homeopathic +homeopathy +homeopathy's +homeowner +homeowner's +homeowners +homepage +homepage's +homepages +homer +homer's +homered +homering +homeroom +homeroom's +homerooms +homers +homes +homesick +homesickness +homesickness's +homespun +homespun's +homestead +homestead's +homesteaded +homesteader +homesteader's +homesteaders +homesteading +homesteads +homestretch +homestretch's +homestretches +hometown +hometown's +hometowns +homeward +homewards +homework +homework's +homewrecker +homewrecker's +homewreckers +homey +homey's +homeyness +homeyness's +homeys +homicidal +homicide +homicide's +homicides +homie +homie's +homier +homies +homiest +homilies +homily +homily's +hominess +hominess's +homing +hominy +hominy's +homogeneity +homogeneity's +homogeneous +homogeneously +homogenization +homogenization's +homogenize +homogenized +homogenizes +homogenizing +homograph +homograph's +homographs +homonym +homonym's +homonyms +homophobia +homophobia's +homophobic +homophone +homophone's +homophones +homosexual +homosexual's +homosexuality +homosexuality's +homosexuals +homy +honcho +honcho's +honchos +hone +hone's +honed +hones +honest +honester +honestest +honestly +honesty +honesty's +honey +honey's +honeybee +honeybee's +honeybees +honeycomb +honeycomb's +honeycombed +honeycombing +honeycombs +honeydew +honeydew's +honeydews +honeyed +honeying +honeymoon +honeymoon's +honeymooned +honeymooner +honeymooner's +honeymooners +honeymooning +honeymoons +honeys +honeysuckle +honeysuckle's +honeysuckles +honied +honing +honk +honk's +honked +honking +honks +honor +honor's +honorable +honorably +honoraria +honorarium +honorarium's +honorariums +honorary +honored +honorific +honorific's +honorifics +honoring +honors +hooch +hooch's +hood +hood's +hooded +hoodie +hoodie's +hoodies +hooding +hoodlum +hoodlum's +hoodlums +hoodoo +hoodoo's +hoodooed +hoodooing +hoodoos +hoods +hoodwink +hoodwinked +hoodwinking +hoodwinks +hooey +hooey's +hoof +hoof's +hoofed +hoofing +hoofs +hook +hook's +hookah +hookah's +hookahs +hooked +hooker +hooker's +hookers +hookey +hookey's +hooking +hooks +hookup +hookup's +hookups +hookworm +hookworm's +hookworms +hooky +hooky's +hooligan +hooligan's +hooliganism +hooliganism's +hooligans +hoop +hoop's +hooped +hooping +hoopla +hoopla's +hoops +hoorah +hoorah's +hoorahs +hooray +hooray's +hoorayed +hooraying +hoorays +hoot +hoot's +hootch +hootch's +hooted +hooter +hooter's +hooters +hooting +hoots +hooves +hop +hop's +hope +hope's +hoped +hopeful +hopeful's +hopefully +hopefulness +hopefulness's +hopefuls +hopeless +hopelessly +hopelessness +hopelessness's +hopes +hoping +hopped +hopper +hopper's +hoppers +hopping +hops +hopscotch +hopscotch's +hopscotched +hopscotches +hopscotching +horde +horde's +horded +hordes +hording +horizon +horizon's +horizons +horizontal +horizontal's +horizontally +horizontals +hormonal +hormone +hormone's +hormones +horn +horn's +horned +hornet +hornet's +hornets +hornier +horniest +hornless +hornpipe +hornpipe's +hornpipes +horns +horny +horology +horology's +horoscope +horoscope's +horoscopes +horrendous +horrendously +horrible +horribly +horrid +horridly +horrific +horrified +horrifies +horrify +horrifying +horror +horror's +horrors +horse +horse's +horseback +horseback's +horsed +horseflies +horsefly +horsefly's +horsehair +horsehair's +horsehide +horsehide's +horseman +horseman's +horsemanship +horsemanship's +horsemen +horseplay +horseplay's +horsepower +horsepower's +horseradish +horseradish's +horseradishes +horses +horseshoe +horseshoe's +horseshoed +horseshoeing +horseshoes +horsetail +horsetail's +horsetails +horsewhip +horsewhip's +horsewhipped +horsewhipping +horsewhips +horsewoman +horsewoman's +horsewomen +horsey +horsier +horsiest +horsing +horsy +horticultural +horticulture +horticulture's +horticulturist +horticulturist's +horticulturists +hos +hosanna +hosanna's +hosannas +hose +hose's +hosed +hoses +hosiery +hosiery's +hosing +hospice +hospice's +hospices +hospitable +hospitably +hospital +hospital's +hospitality +hospitality's +hospitalization +hospitalization's +hospitalizations +hospitalize +hospitalized +hospitalizes +hospitalizing +hospitals +host +host's +hostage +hostage's +hostages +hosted +hostel +hostel's +hosteled +hosteler +hosteler's +hostelers +hosteling +hostelled +hostelling +hostelries +hostelry +hostelry's +hostels +hostess +hostess's +hostessed +hostesses +hostessing +hostile +hostile's +hostilely +hostiles +hostilities +hostilities's +hostility +hostility's +hosting +hostler +hostler's +hostlers +hosts +hot +hotbed +hotbed's +hotbeds +hotcake +hotcake's +hotcakes +hotel +hotel's +hotelier +hotelier's +hoteliers +hotels +hothead +hothead's +hotheaded +hotheadedly +hotheadedness +hotheadedness's +hotheads +hothouse +hothouse's +hothouses +hotkey +hotkeys +hotly +hotness +hotness's +hotshot +hotshot's +hotshots +hotter +hottest +hoummos +houmous +hound +hound's +hounded +hounding +hounds +hour +hour's +hourglass +hourglass's +hourglasses +hourly +hours +house +house's +houseboat +houseboat's +houseboats +housebound +housebreak +housebreaking +housebreaking's +housebreaks +housebroke +housebroken +houseclean +housecleaned +housecleaning +housecleaning's +housecleans +housecoat +housecoat's +housecoats +housed +houseflies +housefly +housefly's +household +household's +householder +householder's +householders +households +househusband +househusband's +househusbands +housekeeper +housekeeper's +housekeepers +housekeeping +housekeeping's +housemaid +housemaid's +housemaids +housemother +housemother's +housemothers +houseplant +houseplant's +houseplants +houses +housetop +housetop's +housetops +housewares +housewares's +housewarming +housewarming's +housewarmings +housewife +housewife's +housewives +housework +housework's +housing +housing's +housings +hove +hovel +hovel's +hovels +hover +hovercraft +hovercraft's +hovercrafts +hovered +hovering +hovers +how +how's +howdah +howdah's +howdahs +howdy +however +howitzer +howitzer's +howitzers +howl +howl's +howled +howler +howler's +howlers +howling +howls +hows +howsoever +hub +hub's +hubbies +hubbub +hubbub's +hubbubs +hubby +hubby's +hubcap +hubcap's +hubcaps +hubris +hubris's +hubs +huckleberries +huckleberry +huckleberry's +huckster +huckster's +huckstered +huckstering +hucksters +huddle +huddle's +huddled +huddles +huddling +hue +hue's +hued +hues +huff +huff's +huffed +huffier +huffiest +huffily +huffing +huffs +huffy +hug +hug's +huge +hugely +hugeness +hugeness's +huger +hugest +hugged +hugging +hugs +huh +hula +hula's +hulas +hulk +hulk's +hulking +hulks +hull +hull's +hullabaloo +hullabaloo's +hullabaloos +hulled +hulling +hulls +hum +hum's +human +human's +humane +humanely +humaneness +humaneness's +humaner +humanest +humanism +humanism's +humanist +humanist's +humanistic +humanists +humanitarian +humanitarian's +humanitarianism +humanitarianism's +humanitarians +humanities +humanities's +humanity +humanity's +humanization +humanization's +humanize +humanized +humanizer +humanizer's +humanizers +humanizes +humanizing +humankind +humankind's +humanly +humanness +humanness's +humanoid +humanoid's +humanoids +humans +humble +humbled +humbleness +humbleness's +humbler +humbles +humblest +humbling +humblings +humbly +humbug +humbug's +humbugged +humbugging +humbugs +humdinger +humdinger's +humdingers +humdrum +humdrum's +humeri +humerus +humerus's +humid +humidified +humidifier +humidifier's +humidifiers +humidifies +humidify +humidifying +humidity +humidity's +humidor +humidor's +humidors +humiliate +humiliated +humiliates +humiliating +humiliation +humiliation's +humiliations +humility +humility's +hummed +humming +hummingbird +hummingbird's +hummingbirds +hummock +hummock's +hummocks +hummus +humongous +humor +humor's +humored +humoring +humorist +humorist's +humorists +humorless +humorlessness +humorlessness's +humorous +humorously +humors +hump +hump's +humpback +humpback's +humpbacked +humpbacks +humped +humping +humps +hums +humungous +humus +humus's +hunch +hunch's +hunchback +hunchback's +hunchbacked +hunchbacks +hunched +hunches +hunching +hundred +hundred's +hundredfold +hundreds +hundredth +hundredth's +hundredths +hundredweight +hundredweight's +hundredweights +hung +hunger +hunger's +hungered +hungering +hungers +hungover +hungrier +hungriest +hungrily +hungry +hunk +hunk's +hunker +hunkered +hunkering +hunkers +hunks +hunt +hunt's +hunted +hunter +hunter's +hunters +hunting +hunting's +huntress +huntress's +huntresses +hunts +huntsman +huntsman's +huntsmen +hurdle +hurdle's +hurdled +hurdler +hurdler's +hurdlers +hurdles +hurdling +hurl +hurl's +hurled +hurler +hurler's +hurlers +hurling +hurls +hurrah +hurrah's +hurrahed +hurrahing +hurrahs +hurray +hurray's +hurrayed +hurraying +hurrays +hurricane +hurricane's +hurricanes +hurried +hurriedly +hurries +hurry +hurry's +hurrying +hurt +hurt's +hurtful +hurting +hurtle +hurtled +hurtles +hurtling +hurts +husband +husband's +husbanded +husbanding +husbandry +husbandry's +husbands +hush +hush's +hushed +hushes +hushing +husk +husk's +husked +husker +husker's +huskers +huskier +huskies +huskiest +huskily +huskiness +huskiness's +husking +husks +husky +husky's +hussar +hussar's +hussars +hussies +hussy +hussy's +hustings +hustings's +hustle +hustle's +hustled +hustler +hustler's +hustlers +hustles +hustling +hut +hut's +hutch +hutch's +hutches +huts +hutzpa +hutzpa's +hutzpah +hutzpah's +hyacinth +hyacinth's +hyacinths +hyaena +hyaena's +hyaenas +hybrid +hybrid's +hybridize +hybridized +hybridizes +hybridizing +hybrids +hydra +hydra's +hydrae +hydrangea +hydrangea's +hydrangeas +hydrant +hydrant's +hydrants +hydras +hydrate +hydrate's +hydrated +hydrates +hydrating +hydraulic +hydraulically +hydraulics +hydraulics's +hydrocarbon +hydrocarbon's +hydrocarbons +hydroelectric +hydroelectricity +hydroelectricity's +hydrofoil +hydrofoil's +hydrofoils +hydrogen +hydrogen's +hydrogenate +hydrogenated +hydrogenates +hydrogenating +hydrology +hydrology's +hydrolysis +hydrolysis's +hydrometer +hydrometer's +hydrometers +hydrophobia +hydrophobia's +hydroplane +hydroplane's +hydroplaned +hydroplanes +hydroplaning +hydroponic +hydroponics +hydroponics's +hydrosphere +hydrosphere's +hydrotherapy +hydrotherapy's +hyena +hyena's +hyenas +hygiene +hygiene's +hygienic +hygienically +hygienist +hygienist's +hygienists +hygrometer +hygrometer's +hygrometers +hying +hymen +hymen's +hymens +hymn +hymn's +hymnal +hymnal's +hymnals +hymned +hymning +hymns +hype +hype's +hyped +hyper +hyperactive +hyperactivity +hyperactivity's +hyperbola +hyperbola's +hyperbolae +hyperbolas +hyperbole +hyperbole's +hyperbolic +hypercritical +hypercritically +hyperlink +hyperlink's +hyperlinked +hyperlinking +hyperlinks +hypermarket +hypersensitive +hypersensitivities +hypersensitivity +hypersensitivity's +hyperspace +hypertension +hypertension's +hypertext +hypertext's +hyperventilate +hyperventilated +hyperventilates +hyperventilating +hyperventilation +hyperventilation's +hypes +hyphen +hyphen's +hyphenate +hyphenate's +hyphenated +hyphenates +hyphenating +hyphenation +hyphenation's +hyphenations +hyphened +hyphening +hyphens +hyping +hypnoses +hypnosis +hypnosis's +hypnotic +hypnotic's +hypnotically +hypnotics +hypnotism +hypnotism's +hypnotist +hypnotist's +hypnotists +hypnotize +hypnotized +hypnotizes +hypnotizing +hypo +hypo's +hypoallergenic +hypochondria +hypochondria's +hypochondriac +hypochondriac's +hypochondriacs +hypocrisies +hypocrisy +hypocrisy's +hypocrite +hypocrite's +hypocrites +hypocritical +hypocritically +hypodermic +hypodermic's +hypodermics +hypoglycemia +hypoglycemia's +hypoglycemic +hypoglycemic's +hypoglycemics +hypos +hypotenuse +hypotenuse's +hypotenuses +hypothalami +hypothalamus +hypothalamus's +hypothermia +hypothermia's +hypotheses +hypothesis +hypothesis's +hypothesize +hypothesized +hypothesizes +hypothesizing +hypothetical +hypothetically +hysterectomies +hysterectomy +hysterectomy's +hysteresis +hysteria +hysteria's +hysteric +hysteric's +hysterical +hysterically +hysterics +hysterics's +i +iOS +iOS's +iPad +iPad's +iPhone +iPhone's +iPod +iPod's +iTunes +iTunes's +iamb +iamb's +iambic +iambic's +iambics +iambs +ibex +ibex's +ibexes +ibices +ibis +ibis's +ibises +ibuprofen +ibuprofen's +ice +ice's +iceberg +iceberg's +icebergs +icebound +icebox +icebox's +iceboxes +icebreaker +icebreaker's +icebreakers +icecap +icecap's +icecaps +iced +ices +icicle +icicle's +icicles +icier +iciest +icily +iciness +iciness's +icing +icing's +icings +ickier +ickiest +icky +icon +icon's +iconoclast +iconoclast's +iconoclastic +iconoclasts +icons +icy +id +id's +idea +idea's +ideal +ideal's +idealism +idealism's +idealist +idealist's +idealistic +idealistically +idealists +idealization +idealization's +idealize +idealized +idealizes +idealizing +ideally +ideals +ideas +identical +identically +identifiable +identification +identification's +identified +identifier +identifiers +identifies +identify +identifying +identities +identity +identity's +ideogram +ideogram's +ideograms +ideograph +ideograph's +ideographs +ideological +ideologically +ideologies +ideologist +ideologist's +ideologists +ideology +ideology's +ides +ides's +idiocies +idiocy +idiocy's +idiom +idiom's +idiomatic +idiomatically +idioms +idiosyncrasies +idiosyncrasy +idiosyncrasy's +idiosyncratic +idiot +idiot's +idiotic +idiotically +idiots +idle +idle's +idled +idleness +idleness's +idler +idler's +idlers +idles +idlest +idling +idly +idol +idol's +idolater +idolater's +idolaters +idolatrous +idolatry +idolatry's +idolize +idolized +idolizes +idolizing +idols +ids +idyl +idyl's +idyll +idyll's +idyllic +idylls +idyls +if +if's +iffier +iffiest +iffy +ifs +igloo +igloo's +igloos +igneous +ignite +ignited +ignites +igniting +ignition +ignition's +ignitions +ignoble +ignobly +ignominies +ignominious +ignominiously +ignominy +ignominy's +ignoramus +ignoramus's +ignoramuses +ignorance +ignorance's +ignorant +ignorantly +ignore +ignored +ignores +ignoring +iguana +iguana's +iguanas +ikon +ikon's +ikons +ilk +ilk's +ilks +ill +ill's +illegal +illegal's +illegalities +illegality +illegality's +illegally +illegals +illegibility +illegibility's +illegible +illegibly +illegitimacy +illegitimacy's +illegitimate +illegitimately +illiberal +illicit +illicitly +illicitness +illicitness's +illiteracy +illiteracy's +illiterate +illiterate's +illiterates +illness +illness's +illnesses +illogical +illogically +ills +illuminate +illuminated +illuminates +illuminating +illumination +illumination's +illuminations +illumine +illumined +illumines +illumining +illusion +illusion's +illusions +illusive +illusory +illustrate +illustrated +illustrates +illustrating +illustration +illustration's +illustrations +illustrative +illustrator +illustrator's +illustrators +illustrious +image +image's +imaged +imagery +imagery's +images +imaginable +imaginably +imaginary +imagination +imagination's +imaginations +imaginative +imaginatively +imagine +imagined +imagines +imaging +imagining +imam +imam's +imams +imbalance +imbalance's +imbalanced +imbalances +imbecile +imbecile's +imbeciles +imbecilic +imbecilities +imbecility +imbecility's +imbed +imbedded +imbedding +imbeds +imbibe +imbibed +imbibes +imbibing +imbroglio +imbroglio's +imbroglios +imbue +imbued +imbues +imbuing +imitate +imitated +imitates +imitating +imitation +imitation's +imitations +imitative +imitator +imitator's +imitators +immaculate +immaculately +immaculateness +immaculateness's +immanence +immanence's +immanent +immaterial +immature +immaturely +immaturity +immaturity's +immeasurable +immeasurably +immediacy +immediacy's +immediate +immediately +immemorial +immense +immensely +immensities +immensity +immensity's +immerse +immersed +immerses +immersing +immersion +immersion's +immersions +immersive +immigrant +immigrant's +immigrants +immigrate +immigrated +immigrates +immigrating +immigration +immigration's +imminence +imminence's +imminent +imminently +immobile +immobility +immobility's +immobilization +immobilization's +immobilize +immobilized +immobilizes +immobilizing +immoderate +immoderately +immodest +immodestly +immodesty +immodesty's +immolate +immolated +immolates +immolating +immolation +immolation's +immoral +immoralities +immorality +immorality's +immorally +immortal +immortal's +immortality +immortality's +immortalize +immortalized +immortalizes +immortalizing +immortally +immortals +immovable +immovably +immoveable +immune +immunity +immunity's +immunization +immunization's +immunizations +immunize +immunized +immunizes +immunizing +immunology +immunology's +immure +immured +immures +immuring +immutability +immutability's +immutable +immutably +imp +imp's +impact +impact's +impacted +impacting +impacts +impair +impaired +impairing +impairment +impairment's +impairments +impairs +impala +impala's +impalas +impale +impaled +impalement +impalement's +impales +impaling +impalpable +impanel +impaneled +impaneling +impanels +impart +imparted +impartial +impartiality +impartiality's +impartially +imparting +imparts +impassable +impasse +impasse's +impasses +impassioned +impassive +impassively +impassivity +impassivity's +impatience +impatience's +impatiences +impatient +impatiently +impeach +impeached +impeaches +impeaching +impeachment +impeachment's +impeachments +impeccability +impeccability's +impeccable +impeccably +impecunious +impecuniousness +impecuniousness's +impedance +impedance's +impede +impeded +impedes +impediment +impediment's +impedimenta +impedimenta's +impediments +impeding +impel +impelled +impelling +impels +impend +impended +impending +impends +impenetrability +impenetrability's +impenetrable +impenetrably +impenitence +impenitence's +impenitent +imperative +imperative's +imperatively +imperatives +imperceptible +imperceptibly +imperfect +imperfect's +imperfection +imperfection's +imperfections +imperfectly +imperfects +imperial +imperial's +imperialism +imperialism's +imperialist +imperialist's +imperialistic +imperialists +imperially +imperials +imperil +imperiled +imperiling +imperilled +imperilling +imperils +imperious +imperiously +imperiousness +imperiousness's +imperishable +impermanence +impermanence's +impermanent +impermeable +impermissible +impersonal +impersonally +impersonate +impersonated +impersonates +impersonating +impersonation +impersonation's +impersonations +impersonator +impersonator's +impersonators +impertinence +impertinence's +impertinent +impertinently +imperturbability +imperturbability's +imperturbable +imperturbably +impervious +impetigo +impetigo's +impetuosity +impetuosity's +impetuous +impetuously +impetus +impetus's +impetuses +impieties +impiety +impiety's +impinge +impinged +impingement +impingement's +impinges +impinging +impious +impiously +impish +impishly +impishness +impishness's +implacability +implacability's +implacable +implacably +implant +implant's +implantation +implantation's +implanted +implanting +implants +implausibilities +implausibility +implausibility's +implausible +implausibly +implement +implement's +implementable +implementation +implementation's +implementations +implemented +implementer +implementing +implements +implicate +implicated +implicates +implicating +implication +implication's +implications +implicit +implicitly +implied +implies +implode +imploded +implodes +imploding +implore +implored +implores +imploring +implosion +implosion's +implosions +imply +implying +impolite +impolitely +impoliteness +impoliteness's +impolitenesses +impolitic +imponderable +imponderable's +imponderables +import +import's +importance +importance's +important +importantly +importation +importation's +importations +imported +importer +importer's +importers +importing +imports +importunate +importune +importuned +importunes +importuning +importunity +importunity's +impose +imposed +imposes +imposing +imposingly +imposition +imposition's +impositions +impossibilities +impossibility +impossibility's +impossible +impossibles +impossibly +imposter +imposter's +imposters +impostor +impostor's +impostors +imposture +imposture's +impostures +impotence +impotence's +impotent +impotently +impound +impounded +impounding +impounds +impoverish +impoverished +impoverishes +impoverishing +impoverishment +impoverishment's +impracticable +impracticably +impractical +impracticality +impracticality's +imprecation +imprecation's +imprecations +imprecise +imprecisely +imprecision +imprecision's +impregnability +impregnability's +impregnable +impregnably +impregnate +impregnated +impregnates +impregnating +impregnation +impregnation's +impresario +impresario's +impresarios +impress +impress's +impressed +impresses +impressing +impression +impression's +impressionable +impressionism +impressionism's +impressionist +impressionist's +impressionistic +impressionists +impressions +impressive +impressively +impressiveness +impressiveness's +imprimatur +imprimatur's +imprimaturs +imprint +imprint's +imprinted +imprinting +imprints +imprison +imprisoned +imprisoning +imprisonment +imprisonment's +imprisonments +imprisons +improbabilities +improbability +improbability's +improbable +improbably +impromptu +impromptu's +impromptus +improper +improperly +improprieties +impropriety +impropriety's +improvable +improve +improved +improvement +improvement's +improvements +improves +improvidence +improvidence's +improvident +improvidently +improving +improvisation +improvisation's +improvisations +improvise +improvised +improvises +improvising +imprudence +imprudence's +imprudent +imps +impudence +impudence's +impudent +impudently +impugn +impugned +impugning +impugns +impulse +impulse's +impulsed +impulses +impulsing +impulsion +impulsion's +impulsive +impulsively +impulsiveness +impulsiveness's +impunity +impunity's +impure +impurely +impurer +impurest +impurities +impurity +impurity's +imputation +imputation's +imputations +impute +imputed +imputes +imputing +in +in's +inabilities +inability +inability's +inaccessibility +inaccessibility's +inaccessible +inaccuracies +inaccuracy +inaccuracy's +inaccurate +inaccurately +inaction +inaction's +inactive +inactivity +inactivity's +inadequacies +inadequacy +inadequacy's +inadequate +inadequately +inadmissible +inadvertence +inadvertence's +inadvertent +inadvertently +inadvisable +inalienable +inamorata +inamorata's +inamoratas +inane +inanely +inaner +inanest +inanimate +inanities +inanity +inanity's +inapplicable +inappropriate +inappropriately +inapt +inarticulate +inarticulately +inasmuch +inattention +inattention's +inattentive +inaudible +inaudibly +inaugural +inaugural's +inaugurals +inaugurate +inaugurated +inaugurates +inaugurating +inauguration +inauguration's +inaugurations +inauspicious +inboard +inboard's +inboards +inborn +inbound +inbox +inbox's +inboxes +inbred +inbreed +inbreeding +inbreeding's +inbreeds +inbuilt +incalculable +incalculably +incandescence +incandescence's +incandescent +incantation +incantation's +incantations +incapability +incapability's +incapable +incapacitate +incapacitated +incapacitates +incapacitating +incapacity +incapacity's +incarcerate +incarcerated +incarcerates +incarcerating +incarceration +incarceration's +incarcerations +incarnate +incarnated +incarnates +incarnating +incarnation +incarnation's +incarnations +incautious +incendiaries +incendiary +incendiary's +incense +incense's +incensed +incenses +incensing +incentive +incentive's +incentives +inception +inception's +inceptions +incessant +incessantly +incest +incest's +incestuous +inch +inch's +inched +inches +inching +inchoate +incidence +incidence's +incidences +incident +incident's +incidental +incidental's +incidentally +incidentals +incidents +incinerate +incinerated +incinerates +incinerating +incineration +incineration's +incinerator +incinerator's +incinerators +incipient +incise +incised +incises +incising +incision +incision's +incisions +incisive +incisively +incisiveness +incisiveness's +incisor +incisor's +incisors +incite +incited +incitement +incitement's +incitements +incites +inciting +incivilities +incivility +incivility's +inclemency +inclemency's +inclement +inclination +inclination's +inclinations +incline +incline's +inclined +inclines +inclining +inclose +inclosed +incloses +inclosing +inclosure +inclosure's +inclosures +include +included +includes +including +inclusion +inclusion's +inclusions +inclusive +inclusively +incognito +incognito's +incognitos +incoherence +incoherence's +incoherent +incoherently +incombustible +income +income's +incomes +incoming +incommensurate +incommunicado +incomparable +incomparably +incompatibilities +incompatibility +incompatibility's +incompatible +incompatible's +incompatibles +incompatibly +incompetence +incompetence's +incompetent +incompetent's +incompetently +incompetents +incomplete +incompletely +incompleteness +incomprehensible +incomprehensibly +inconceivable +inconceivably +inconclusive +inconclusively +incongruities +incongruity +incongruity's +incongruous +incongruously +inconsequential +inconsequentially +inconsiderable +inconsiderate +inconsiderately +inconsiderateness +inconsiderateness's +inconsistencies +inconsistency +inconsistency's +inconsistent +inconsistently +inconsolable +inconspicuous +inconspicuously +inconspicuousness +inconspicuousness's +inconstancy +inconstancy's +inconstant +incontestable +incontestably +incontinence +incontinence's +incontinent +incontrovertible +incontrovertibly +inconvenience +inconvenience's +inconvenienced +inconveniences +inconveniencing +inconvenient +inconveniently +incorporate +incorporated +incorporates +incorporating +incorporation +incorporation's +incorporeal +incorrect +incorrectly +incorrectness +incorrectness's +incorrigibility +incorrigibility's +incorrigible +incorrigibly +incorruptibility +incorruptibility's +incorruptible +increase +increase's +increased +increases +increasing +increasingly +incredibility +incredibility's +incredible +incredibly +incredulity +incredulity's +incredulous +incredulously +increment +increment's +incremental +incremented +increments +incriminate +incriminated +incriminates +incriminating +incrimination +incrimination's +incriminatory +incrust +incrustation +incrustation's +incrustations +incrusted +incrusting +incrusts +incubate +incubated +incubates +incubating +incubation +incubation's +incubator +incubator's +incubators +incubi +incubus +incubus's +incubuses +inculcate +inculcated +inculcates +inculcating +inculcation +inculcation's +inculpate +inculpated +inculpates +inculpating +incumbencies +incumbency +incumbency's +incumbent +incumbent's +incumbents +incur +incurable +incurable's +incurables +incurably +incurious +incurred +incurring +incurs +incursion +incursion's +incursions +indebted +indebtedness +indebtedness's +indecencies +indecency +indecency's +indecent +indecently +indecipherable +indecision +indecision's +indecisive +indecisively +indecisiveness +indecisiveness's +indecorous +indeed +indefatigable +indefatigably +indefensible +indefensibly +indefinable +indefinably +indefinite +indefinitely +indelible +indelibly +indelicacies +indelicacy +indelicacy's +indelicate +indelicately +indemnification +indemnification's +indemnifications +indemnified +indemnifies +indemnify +indemnifying +indemnities +indemnity +indemnity's +indent +indent's +indentation +indentation's +indentations +indented +indenting +indents +indenture +indenture's +indentured +indentures +indenturing +independence +independence's +independent +independent's +independently +independents +indescribable +indescribably +indestructible +indestructibly +indeterminable +indeterminacy +indeterminacy's +indeterminate +indeterminately +index +index's +indexed +indexes +indexing +indicate +indicated +indicates +indicating +indication +indication's +indications +indicative +indicative's +indicatives +indicator +indicator's +indicators +indices +indict +indictable +indicted +indicting +indictment +indictment's +indictments +indicts +indifference +indifference's +indifferent +indifferently +indigence +indigence's +indigenous +indigent +indigent's +indigents +indigestible +indigestion +indigestion's +indignant +indignantly +indignation +indignation's +indignities +indignity +indignity's +indigo +indigo's +indirect +indirection +indirectly +indirectness +indirectness's +indiscernible +indiscreet +indiscreetly +indiscretion +indiscretion's +indiscretions +indiscriminate +indiscriminately +indispensable +indispensable's +indispensables +indispensably +indisposed +indisposition +indisposition's +indispositions +indisputable +indisputably +indissoluble +indistinct +indistinctly +indistinctness +indistinctness's +indistinguishable +individual +individual's +individualism +individualism's +individualist +individualist's +individualistic +individualists +individuality +individuality's +individualize +individualized +individualizes +individualizing +individually +individuals +indivisibility +indivisibility's +indivisible +indivisibly +indoctrinate +indoctrinated +indoctrinates +indoctrinating +indoctrination +indoctrination's +indolence +indolence's +indolent +indolently +indomitable +indomitably +indoor +indoors +indorse +indorsed +indorsement +indorsement's +indorsements +indorses +indorsing +indubitable +indubitably +induce +induced +inducement +inducement's +inducements +induces +inducing +induct +inductance +inductance's +inducted +inductee +inductee's +inductees +inducting +induction +induction's +inductions +inductive +inducts +indue +indued +indues +induing +indulge +indulged +indulgence +indulgence's +indulgences +indulgent +indulgently +indulges +indulging +industrial +industrialism +industrialism's +industrialist +industrialist's +industrialists +industrialization +industrialization's +industrialize +industrialized +industrializes +industrializing +industrially +industries +industrious +industriously +industriousness +industriousness's +industry +industry's +inebriate +inebriate's +inebriated +inebriates +inebriating +inebriation +inebriation's +inedible +ineducable +ineffable +ineffably +ineffective +ineffectively +ineffectiveness +ineffectiveness's +ineffectual +ineffectually +inefficiencies +inefficiency +inefficiency's +inefficient +inefficiently +inelastic +inelegance +inelegant +inelegantly +ineligibility +ineligibility's +ineligible +ineligible's +ineligibles +ineluctable +ineluctably +inept +ineptitude +ineptitude's +ineptly +ineptness +ineptness's +inequalities +inequality +inequality's +inequitable +inequities +inequity +inequity's +inert +inertia +inertia's +inertial +inertly +inertness +inertness's +inescapable +inescapably +inessential +inessential's +inessentials +inestimable +inestimably +inevitability +inevitability's +inevitable +inevitable's +inevitably +inexact +inexcusable +inexcusably +inexhaustible +inexhaustibly +inexorable +inexorably +inexpedient +inexpensive +inexpensively +inexperience +inexperience's +inexperienced +inexpert +inexplicable +inexplicably +inexpressible +inextinguishable +inextricable +inextricably +infallibility +infallibility's +infallible +infallibly +infamies +infamous +infamously +infamy +infamy's +infancy +infancy's +infant +infant's +infanticide +infanticide's +infanticides +infantile +infantries +infantry +infantry's +infantryman +infantryman's +infantrymen +infants +infarction +infarction's +infatuate +infatuated +infatuates +infatuating +infatuation +infatuation's +infatuations +infeasible +infect +infected +infecting +infection +infection's +infections +infectious +infectiously +infectiousness +infectiousness's +infects +infelicities +infelicitous +infelicity +infelicity's +infer +inference +inference's +inferences +inferential +inferior +inferior's +inferiority +inferiority's +inferiors +infernal +inferno +inferno's +infernos +inferred +inferring +infers +infertile +infertility +infertility's +infest +infestation +infestation's +infestations +infested +infesting +infests +infidel +infidel's +infidelities +infidelity +infidelity's +infidels +infield +infield's +infielder +infielder's +infielders +infields +infighting +infighting's +infiltrate +infiltrated +infiltrates +infiltrating +infiltration +infiltration's +infiltrator +infiltrator's +infiltrators +infinite +infinite's +infinitely +infinitesimal +infinitesimal's +infinitesimally +infinitesimals +infinities +infinitive +infinitive's +infinitives +infinitude +infinitude's +infinity +infinity's +infirm +infirmaries +infirmary +infirmary's +infirmities +infirmity +infirmity's +infix +inflame +inflamed +inflames +inflaming +inflammable +inflammation +inflammation's +inflammations +inflammatory +inflatable +inflatable's +inflatables +inflate +inflated +inflates +inflating +inflation +inflation's +inflationary +inflect +inflected +inflecting +inflection +inflection's +inflectional +inflections +inflects +inflexibility +inflexibility's +inflexible +inflexibly +inflict +inflicted +inflicting +infliction +infliction's +inflicts +inflorescence +inflorescence's +inflow +influence +influence's +influenced +influences +influencing +influential +influentially +influenza +influenza's +influx +influx's +influxes +info +info's +infomercial +infomercial's +infomercials +inform +informal +informality +informality's +informally +informant +informant's +informants +information +information's +informational +informative +informed +informer +informer's +informers +informing +informs +infotainment +infotainment's +infraction +infraction's +infractions +infrared +infrared's +infrastructure +infrastructure's +infrastructures +infrequency +infrequency's +infrequent +infrequently +infringe +infringed +infringement +infringement's +infringements +infringes +infringing +infuriate +infuriated +infuriates +infuriating +infuriatingly +infuse +infused +infuses +infusing +infusion +infusion's +infusions +ingenious +ingeniously +ingenuity +ingenuity's +ingenuous +ingenuously +ingenuousness +ingenuousness's +ingest +ingested +ingesting +ingestion +ingestion's +ingests +inglorious +ingot +ingot's +ingots +ingrain +ingrained +ingraining +ingrains +ingrate +ingrate's +ingrates +ingratiate +ingratiated +ingratiates +ingratiating +ingratiatingly +ingratitude +ingratitude's +ingredient +ingredient's +ingredients +ingress +ingress's +ingresses +ingrown +ingénue +ingénue's +ingénues +inhabit +inhabitable +inhabitant +inhabitant's +inhabitants +inhabited +inhabiting +inhabits +inhalant +inhalant's +inhalants +inhalation +inhalation's +inhalations +inhalator +inhalator's +inhalators +inhale +inhaled +inhaler +inhaler's +inhalers +inhales +inhaling +inhere +inhered +inherent +inherently +inheres +inhering +inherit +inheritance +inheritance's +inheritances +inherited +inheriting +inheritor +inheritor's +inheritors +inherits +inhibit +inhibited +inhibiting +inhibition +inhibition's +inhibitions +inhibits +inhospitable +inhuman +inhumane +inhumanely +inhumanities +inhumanity +inhumanity's +inhumanly +inimical +inimically +inimitable +inimitably +iniquities +iniquitous +iniquity +iniquity's +initial +initial's +initialed +initialing +initialization +initialize +initialized +initializes +initializing +initialled +initialling +initially +initials +initiate +initiate's +initiated +initiates +initiating +initiation +initiation's +initiations +initiative +initiative's +initiatives +initiator +initiator's +initiators +inject +injected +injecting +injection +injection's +injections +injector +injector's +injectors +injects +injudicious +injunction +injunction's +injunctions +injure +injured +injures +injuries +injuring +injurious +injury +injury's +injustice +injustice's +injustices +ink +ink's +inkblot +inkblot's +inkblots +inked +inkier +inkiest +inkiness +inkiness's +inking +inkling +inkling's +inklings +inks +inkwell +inkwell's +inkwells +inky +inlaid +inland +inland's +inlay +inlay's +inlaying +inlays +inlet +inlet's +inlets +inline +inmate +inmate's +inmates +inmost +inn +inn's +innards +innards's +innate +innately +inner +innermost +inning +inning's +innings +innkeeper +innkeeper's +innkeepers +innocence +innocence's +innocent +innocent's +innocently +innocents +innocuous +innocuously +innovate +innovated +innovates +innovating +innovation +innovation's +innovations +innovative +innovator +innovator's +innovators +inns +innuendo +innuendo's +innuendoes +innuendos +innumerable +inoculate +inoculated +inoculates +inoculating +inoculation +inoculation's +inoculations +inoffensive +inoffensively +inoperable +inoperative +inopportune +inordinate +inordinately +inorganic +inpatient +inpatient's +inpatients +input +input's +inputs +inputted +inputting +inquest +inquest's +inquests +inquietude +inquietude's +inquire +inquired +inquirer +inquirer's +inquirers +inquires +inquiries +inquiring +inquiringly +inquiry +inquiry's +inquisition +inquisition's +inquisitions +inquisitive +inquisitively +inquisitiveness +inquisitiveness's +inquisitor +inquisitor's +inquisitors +inroad +inroad's +inroads +ins +insane +insanely +insaner +insanest +insanity +insanity's +insatiable +insatiably +inscribe +inscribed +inscribes +inscribing +inscription +inscription's +inscriptions +inscrutable +inscrutably +inseam +inseam's +inseams +insect +insect's +insecticide +insecticide's +insecticides +insectivore +insectivore's +insectivores +insectivorous +insects +insecure +insecurely +insecurities +insecurity +insecurity's +inseminate +inseminated +inseminates +inseminating +insemination +insemination's +insensate +insensibility +insensibility's +insensible +insensibly +insensitive +insensitively +insensitivity +insensitivity's +insentience +insentience's +insentient +inseparability +inseparability's +inseparable +inseparable's +inseparables +inseparably +insert +insert's +inserted +inserting +insertion +insertion's +insertions +inserts +inset +inset's +insets +insetted +insetting +inshore +inside +inside's +insider +insider's +insiders +insides +insidious +insidiously +insidiousness +insidiousness's +insight +insight's +insightful +insights +insigne +insigne's +insignes +insignia +insignia's +insignias +insignificance +insignificance's +insignificant +insignificantly +insincere +insincerely +insincerity +insincerity's +insinuate +insinuated +insinuates +insinuating +insinuation +insinuation's +insinuations +insipid +insist +insisted +insistence +insistence's +insistent +insistently +insisting +insists +insofar +insole +insole's +insolence +insolence's +insolent +insolently +insoles +insolubility +insolubility's +insoluble +insolvable +insolvency +insolvency's +insolvent +insolvent's +insolvents +insomnia +insomnia's +insomniac +insomniac's +insomniacs +insouciance +insouciance's +insouciant +inspect +inspected +inspecting +inspection +inspection's +inspections +inspector +inspector's +inspectors +inspects +inspiration +inspiration's +inspirational +inspirations +inspire +inspired +inspires +inspiring +instability +instability's +instal +install +installation +installation's +installations +installed +installing +installment +installment's +installments +installs +instalment +instalment's +instalments +instals +instance +instance's +instanced +instances +instancing +instant +instant's +instantaneous +instantaneously +instantly +instants +instead +instep +instep's +insteps +instigate +instigated +instigates +instigating +instigation +instigation's +instigator +instigator's +instigators +instil +instill +instilled +instilling +instills +instils +instinct +instinct's +instinctive +instinctively +instincts +institute +institute's +instituted +institutes +instituting +institution +institution's +institutional +institutionalize +institutionalized +institutionalizes +institutionalizing +institutions +instruct +instructed +instructing +instruction +instruction's +instructional +instructions +instructive +instructively +instructor +instructor's +instructors +instructs +instrument +instrument's +instrumental +instrumental's +instrumentalist +instrumentalist's +instrumentalists +instrumentality +instrumentality's +instrumentals +instrumentation +instrumentation's +instrumented +instrumenting +instruments +insubordinate +insubordination +insubordination's +insubstantial +insufferable +insufferably +insufficiency +insufficiency's +insufficient +insufficiently +insular +insularity +insularity's +insulate +insulated +insulates +insulating +insulation +insulation's +insulator +insulator's +insulators +insulin +insulin's +insult +insult's +insulted +insulting +insults +insuperable +insupportable +insurance +insurance's +insurances +insure +insured +insured's +insureds +insurer +insurer's +insurers +insures +insurgence +insurgence's +insurgences +insurgencies +insurgency +insurgency's +insurgent +insurgent's +insurgents +insuring +insurmountable +insurrection +insurrection's +insurrectionist +insurrectionist's +insurrectionists +insurrections +intact +intagli +intaglio +intaglio's +intaglios +intake +intake's +intakes +intangible +intangible's +intangibles +intangibly +integer +integer's +integers +integral +integral's +integrals +integrate +integrated +integrates +integrating +integration +integration's +integrator +integrity +integrity's +integument +integument's +integuments +intellect +intellect's +intellects +intellectual +intellectual's +intellectualism +intellectualize +intellectualized +intellectualizes +intellectualizing +intellectually +intellectuals +intelligence +intelligence's +intelligent +intelligently +intelligentsia +intelligentsia's +intelligibility +intelligibility's +intelligible +intelligibly +intemperance +intemperance's +intemperate +intend +intended +intended's +intendeds +intending +intends +intense +intensely +intenser +intensest +intensification +intensification's +intensified +intensifier +intensifier's +intensifiers +intensifies +intensify +intensifying +intensities +intensity +intensity's +intensive +intensive's +intensively +intensives +intent +intent's +intention +intention's +intentional +intentionally +intentions +intently +intentness +intentness's +intents +inter +interact +interacted +interacting +interaction +interaction's +interactions +interactive +interactively +interacts +interbred +interbreed +interbreeding +interbreeds +intercede +interceded +intercedes +interceding +intercept +intercept's +intercepted +intercepting +interception +interception's +interceptions +interceptor +interceptor's +interceptors +intercepts +intercession +intercession's +intercessions +intercessor +intercessor's +intercessors +interchange +interchange's +interchangeable +interchangeably +interchanged +interchanges +interchanging +intercollegiate +intercom +intercom's +intercoms +interconnect +interconnected +interconnecting +interconnection +interconnection's +interconnections +interconnects +intercontinental +intercourse +intercourse's +interdenominational +interdepartmental +interdependence +interdependence's +interdependent +interdict +interdict's +interdicted +interdicting +interdiction +interdiction's +interdicts +interdisciplinary +interest +interest's +interested +interesting +interestingly +interests +interface +interface's +interfaced +interfaces +interfacing +interfaith +interfere +interfered +interference +interference's +interferes +interfering +interferon +interferon's +intergalactic +interim +interim's +interior +interior's +interiors +interject +interjected +interjecting +interjection +interjection's +interjections +interjects +interlace +interlaced +interlaces +interlacing +interlard +interlarded +interlarding +interlards +interleave +interleaved +interleaves +interleaving +interleukin +interleukin's +interlink +interlinked +interlinking +interlinks +interlock +interlock's +interlocked +interlocking +interlocks +interlocutory +interloper +interloper's +interlopers +interlude +interlude's +interluded +interludes +interluding +intermarriage +intermarriage's +intermarriages +intermarried +intermarries +intermarry +intermarrying +intermediaries +intermediary +intermediary's +intermediate +intermediate's +intermediates +interment +interment's +interments +intermezzi +intermezzo +intermezzo's +intermezzos +interminable +interminably +intermingle +intermingled +intermingles +intermingling +intermission +intermission's +intermissions +intermittent +intermittently +intern +intern's +internal +internalize +internalized +internalizes +internalizing +internally +internals +international +international's +internationalism +internationalism's +internationalize +internationalized +internationalizes +internationalizing +internationally +internationals +interne +interne's +internecine +interned +internee +internee's +internees +internement +internes +interneship +interneships +internet +interning +internist +internist's +internists +internment +internment's +interns +internship +internship's +internships +interoffice +interpersonal +interplanetary +interplay +interplay's +interpolate +interpolated +interpolates +interpolating +interpolation +interpolation's +interpolations +interpose +interposed +interposes +interposing +interposition +interposition's +interpret +interpretation +interpretation's +interpretations +interpretative +interpreted +interpreter +interpreter's +interpreters +interpreting +interpretive +interprets +interracial +interred +interrelate +interrelated +interrelates +interrelating +interrelation +interrelation's +interrelations +interrelationship +interrelationship's +interrelationships +interring +interrogate +interrogated +interrogates +interrogating +interrogation +interrogation's +interrogations +interrogative +interrogative's +interrogatives +interrogator +interrogator's +interrogatories +interrogators +interrogatory +interrogatory's +interrupt +interrupt's +interrupted +interrupting +interruption +interruption's +interruptions +interrupts +inters +interscholastic +intersect +intersected +intersecting +intersection +intersection's +intersections +intersects +intersperse +interspersed +intersperses +interspersing +interstate +interstate's +interstates +interstellar +interstice +interstice's +interstices +intertwine +intertwined +intertwines +intertwining +interurban +interval +interval's +intervals +intervene +intervened +intervenes +intervening +intervention +intervention's +interventions +interview +interview's +interviewed +interviewee +interviewee's +interviewees +interviewer +interviewer's +interviewers +interviewing +interviews +interweave +interweaved +interweaves +interweaving +interwove +interwoven +intestate +intestinal +intestine +intestine's +intestines +intimacies +intimacy +intimacy's +intimate +intimate's +intimated +intimately +intimates +intimating +intimation +intimation's +intimations +intimidate +intimidated +intimidates +intimidating +intimidation +intimidation's +into +intolerable +intolerably +intolerance +intolerance's +intolerant +intonation +intonation's +intonations +intone +intoned +intones +intoning +intoxicant +intoxicant's +intoxicants +intoxicate +intoxicated +intoxicates +intoxicating +intoxication +intoxication's +intractability +intractability's +intractable +intramural +intranet +intranet's +intranets +intransigence +intransigence's +intransigent +intransigent's +intransigents +intransitive +intransitive's +intransitively +intransitives +intravenous +intravenous's +intravenouses +intravenously +intrench +intrenched +intrenches +intrenching +intrenchment +intrenchment's +intrenchments +intrepid +intrepidly +intricacies +intricacy +intricacy's +intricate +intricately +intrigue +intrigue's +intrigued +intrigues +intriguing +intriguingly +intrinsic +intrinsically +introduce +introduced +introduces +introducing +introduction +introduction's +introductions +introductory +intros +introspection +introspection's +introspective +introversion +introversion's +introvert +introvert's +introverted +introverts +intrude +intruded +intruder +intruder's +intruders +intrudes +intruding +intrusion +intrusion's +intrusions +intrusive +intrust +intrusted +intrusting +intrusts +intuit +intuited +intuiting +intuition +intuition's +intuitions +intuitive +intuitively +intuits +inundate +inundated +inundates +inundating +inundation +inundation's +inundations +inure +inured +inures +inuring +invade +invaded +invader +invader's +invaders +invades +invading +invalid +invalid's +invalidate +invalidated +invalidates +invalidating +invalidation +invalidation's +invalided +invaliding +invalidity +invalidity's +invalids +invaluable +invariable +invariable's +invariables +invariably +invariant +invasion +invasion's +invasions +invasive +invective +invective's +inveigh +inveighed +inveighing +inveighs +inveigle +inveigled +inveigles +inveigling +invent +invented +inventing +invention +invention's +inventions +inventive +inventiveness +inventiveness's +inventor +inventor's +inventoried +inventories +inventors +inventory +inventory's +inventorying +invents +inverse +inverse's +inversely +inverses +inversion +inversion's +inversions +invert +invert's +invertebrate +invertebrate's +invertebrates +inverted +inverting +inverts +invest +invested +investigate +investigated +investigates +investigating +investigation +investigation's +investigations +investigative +investigator +investigator's +investigators +investing +investiture +investiture's +investitures +investment +investment's +investments +investor +investor's +investors +invests +inveterate +invidious +invidiously +invigorate +invigorated +invigorates +invigorating +invigoration +invigoration's +invincibility +invincibility's +invincible +invincibly +inviolability +inviolability's +inviolable +inviolate +invisibility +invisibility's +invisible +invisibly +invitation +invitation's +invitational +invitational's +invitationals +invitations +invite +invite's +invited +invites +inviting +invitingly +invocation +invocation's +invocations +invoice +invoice's +invoiced +invoices +invoicing +invoke +invoked +invokes +invoking +involuntarily +involuntary +involve +involved +involvement +involvement's +involvements +involves +involving +invulnerability +invulnerability's +invulnerable +invulnerably +inward +inwardly +inwards +iodine +iodine's +iodize +iodized +iodizes +iodizing +ion +ion's +ionization +ionization's +ionize +ionized +ionizer +ionizer's +ionizers +ionizes +ionizing +ionosphere +ionosphere's +ionospheres +ions +iota +iota's +iotas +ipecac +ipecac's +ipecacs +irascibility +irascibility's +irascible +irate +irately +irateness +irateness's +ire +ire's +iridescence +iridescence's +iridescent +iridium +iridium's +iris +iris's +irises +irk +irked +irking +irks +irksome +iron +iron's +ironclad +ironclad's +ironclads +ironed +ironic +ironical +ironically +ironies +ironing +ironing's +irons +ironware +ironware's +ironwork +ironwork's +irony +irony's +irradiate +irradiated +irradiates +irradiating +irradiation +irradiation's +irrational +irrational's +irrationality +irrationality's +irrationally +irrationals +irreconcilable +irrecoverable +irredeemable +irrefutable +irregardless +irregular +irregular's +irregularities +irregularity +irregularity's +irregularly +irregulars +irrelevance +irrelevance's +irrelevances +irrelevancies +irrelevancy +irrelevancy's +irrelevant +irrelevantly +irreligious +irremediable +irremediably +irreparable +irreparably +irreplaceable +irrepressible +irreproachable +irresistible +irresistibly +irresolute +irresolutely +irresolution +irresolution's +irrespective +irresponsibility +irresponsibility's +irresponsible +irresponsibly +irretrievable +irretrievably +irreverence +irreverence's +irreverent +irreverently +irreversible +irreversibly +irrevocable +irrevocably +irrigate +irrigated +irrigates +irrigating +irrigation +irrigation's +irritability +irritability's +irritable +irritably +irritant +irritant's +irritants +irritate +irritated +irritates +irritating +irritatingly +irritation +irritation's +irritations +irruption +irruption's +irruptions +is +isinglass +isinglass's +island +island's +islander +islander's +islanders +islands +isle +isle's +isles +islet +islet's +islets +ism +ism's +isms +isn't +isobar +isobar's +isobars +isolate +isolate's +isolated +isolates +isolating +isolation +isolation's +isolationism +isolationism's +isolationist +isolationist's +isolationists +isometric +isometrics +isometrics's +isomorphic +isosceles +isotope +isotope's +isotopes +isotopic +isotropic +issuance +issuance's +issue +issue's +issued +issues +issuing +isthmi +isthmus +isthmus's +isthmuses +it +it'd +it'll +it's +italic +italic's +italicize +italicized +italicizes +italicizing +italics +italics's +itch +itch's +itched +itches +itchier +itchiest +itchiness +itchiness's +itching +itchy +item +item's +itemization +itemization's +itemize +itemized +itemizes +itemizing +items +iterate +iterated +iterates +iterating +iteration +iteration's +iterations +iterative +iterator +iterators +itinerant +itinerant's +itinerants +itineraries +itinerary +itinerary's +its +itself +ivies +ivories +ivory +ivory's +ivy +ivy's +j +jab +jab's +jabbed +jabber +jabber's +jabbered +jabberer +jabberer's +jabberers +jabbering +jabbers +jabbing +jabot +jabot's +jabots +jabs +jack +jack's +jackal +jackal's +jackals +jackass +jackass's +jackasses +jackboot +jackboot's +jackboots +jackdaw +jackdaw's +jackdaws +jacked +jacket +jacket's +jackets +jackhammer +jackhammer's +jackhammers +jacking +jackknife +jackknife's +jackknifed +jackknifes +jackknifing +jackknives +jackpot +jackpot's +jackpots +jackrabbit +jackrabbit's +jackrabbits +jacks +jade +jade's +jaded +jades +jading +jag +jag's +jagged +jaggeder +jaggedest +jaggedly +jaggedness +jaggedness's +jags +jaguar +jaguar's +jaguars +jail +jail's +jailbreak +jailbreak's +jailbreaks +jailed +jailer +jailer's +jailers +jailing +jailor +jailor's +jailors +jails +jalapeño +jalapeño's +jalapeños +jalopies +jalopy +jalopy's +jalousie +jalousie's +jalousies +jam +jam's +jamb +jamb's +jamboree +jamboree's +jamborees +jambs +jammed +jamming +jams +jangle +jangle's +jangled +jangles +jangling +janitor +janitor's +janitorial +janitors +japan +japan's +japanned +japanning +japans +jape +jape's +japed +japes +japing +jar +jar's +jardinière +jardinière's +jardinières +jargon +jargon's +jarred +jarring +jars +jasmine +jasmine's +jasmines +jasper +jasper's +jaundice +jaundice's +jaundiced +jaundices +jaundicing +jaunt +jaunt's +jaunted +jauntier +jauntiest +jauntily +jauntiness +jauntiness's +jaunting +jaunts +jaunty +javelin +javelin's +javelins +jaw +jaw's +jawbone +jawbone's +jawboned +jawbones +jawboning +jawbreaker +jawbreaker's +jawbreakers +jawed +jawing +jaws +jay +jay's +jays +jaywalk +jaywalked +jaywalker +jaywalker's +jaywalkers +jaywalking +jaywalks +jazz +jazz's +jazzed +jazzes +jazzier +jazziest +jazzing +jazzy +jealous +jealousies +jealously +jealousy +jealousy's +jeans +jeans's +jeep +jeep's +jeeps +jeer +jeer's +jeered +jeering +jeeringly +jeers +jeez +jehad +jehad's +jehads +jejune +jell +jelled +jellied +jellies +jelling +jello +jello's +jells +jelly +jelly's +jellybean +jellybean's +jellybeans +jellyfish +jellyfish's +jellyfishes +jellying +jeopardize +jeopardized +jeopardizes +jeopardizing +jeopardy +jeopardy's +jeremiad +jeremiad's +jeremiads +jerk +jerk's +jerked +jerkier +jerkiest +jerkily +jerkin +jerkin's +jerking +jerkins +jerks +jerkwater +jerky +jerky's +jersey +jersey's +jerseys +jessamine +jessamine's +jessamines +jest +jest's +jested +jester +jester's +jesters +jesting +jests +jet +jet's +jets +jetsam +jetsam's +jetted +jetties +jetting +jettison +jettison's +jettisoned +jettisoning +jettisons +jetty +jetty's +jewel +jewel's +jeweled +jeweler +jeweler's +jewelers +jeweling +jewelled +jeweller +jeweller's +jewellers +jewelling +jewelries +jewelry +jewelry's +jewels +jib +jib's +jibbed +jibbing +jibe +jibe's +jibed +jibes +jibing +jibs +jiffies +jiffy +jiffy's +jig +jig's +jigged +jigger +jigger's +jiggered +jiggering +jiggers +jigging +jiggle +jiggle's +jiggled +jiggles +jiggling +jigs +jigsaw +jigsaw's +jigsawed +jigsawing +jigsawn +jigsaws +jihad +jihad's +jihadist +jihadist's +jihadists +jihads +jilt +jilt's +jilted +jilting +jilts +jimmied +jimmies +jimmy +jimmy's +jimmying +jingle +jingle's +jingled +jingles +jingling +jingoism +jingoism's +jingoist +jingoist's +jingoistic +jingoists +jinn +jinn's +jinni +jinni's +jinnis +jinns +jinricksha +jinricksha's +jinrickshas +jinrikisha +jinrikisha's +jinrikishas +jinx +jinx's +jinxed +jinxes +jinxing +jitney +jitney's +jitneys +jitterbug +jitterbug's +jitterbugged +jitterbugging +jitterbugs +jitterier +jitteriest +jitters +jitters's +jittery +jiujitsu +jiujitsu's +jive +jive's +jived +jives +jiving +job +job's +jobbed +jobber +jobber's +jobbers +jobbing +jobless +joblessness +joblessness's +jobs +jock +jock's +jockey +jockey's +jockeyed +jockeying +jockeys +jocks +jockstrap +jockstrap's +jockstraps +jocose +jocosely +jocosity +jocosity's +jocular +jocularity +jocularity's +jocularly +jocund +jocundity +jocundity's +jocundly +jodhpurs +jodhpurs's +jog +jog's +jogged +jogger +jogger's +joggers +jogging +jogging's +joggle +joggle's +joggled +joggles +joggling +jogs +john +john's +johns +join +join's +joined +joiner +joiner's +joiners +joining +joins +joint +joint's +jointed +jointing +jointly +joints +joist +joist's +joists +joke +joke's +joked +joker +joker's +jokers +jokes +joking +jokingly +jollied +jollier +jollies +jolliest +jolliness +jolliness's +jollity +jollity's +jolly +jolly's +jollying +jolt +jolt's +jolted +jolting +jolts +jonquil +jonquil's +jonquils +josh +josh's +joshed +joshes +joshing +jostle +jostle's +jostled +jostles +jostling +jot +jot's +jots +jotted +jotting +jotting's +jottings +joule +joule's +joules +jounce +jounce's +jounced +jounces +jouncing +journal +journal's +journalese +journalese's +journalism +journalism's +journalist +journalist's +journalistic +journalists +journals +journey +journey's +journeyed +journeying +journeyman +journeyman's +journeymen +journeys +joust +joust's +jousted +jousting +jousts +jovial +joviality +joviality's +jovially +jowl +jowl's +jowls +joy +joy's +joyed +joyful +joyfuller +joyfullest +joyfully +joyfulness +joyfulness's +joying +joyless +joyous +joyously +joyousness +joyousness's +joyridden +joyride +joyride's +joyrider +joyrider's +joyriders +joyrides +joyriding +joyriding's +joyrode +joys +joystick +joystick's +joysticks +jubilant +jubilantly +jubilation +jubilation's +jubilee +jubilee's +jubilees +judge +judge's +judged +judgement +judgement's +judgemental +judgements +judges +judgeship +judgeship's +judging +judgment +judgment's +judgmental +judgments +judicature +judicature's +judicial +judicially +judiciaries +judiciary +judiciary's +judicious +judiciously +judiciousness +judiciousness's +judo +judo's +jug +jug's +jugged +juggernaut +juggernaut's +juggernauts +jugging +juggle +juggle's +juggled +juggler +juggler's +jugglers +juggles +juggling +jugs +jugular +jugular's +jugulars +juice +juice's +juiced +juicer +juicer's +juicers +juices +juicier +juiciest +juicily +juiciness +juiciness's +juicing +juicy +jujitsu +jujitsu's +jujube +jujube's +jujubes +jujutsu +jujutsu's +jukebox +jukebox's +jukeboxes +julep +julep's +juleps +julienne +jumble +jumble's +jumbled +jumbles +jumbling +jumbo +jumbo's +jumbos +jump +jump's +jumped +jumper +jumper's +jumpers +jumpier +jumpiest +jumpiness +jumpiness's +jumping +jumps +jumpsuit +jumpsuit's +jumpsuits +jumpy +junco +junco's +juncoes +juncos +junction +junction's +junctions +juncture +juncture's +junctures +jungle +jungle's +jungles +junior +junior's +juniors +juniper +juniper's +junipers +junk +junk's +junked +junker +junker's +junkers +junket +junket's +junketed +junketing +junkets +junkie +junkie's +junkier +junkies +junkiest +junking +junks +junky +junky's +junkyard +junkyard's +junkyards +junta +junta's +juntas +juridical +juries +jurisdiction +jurisdiction's +jurisdictional +jurisprudence +jurisprudence's +jurist +jurist's +jurists +juror +juror's +jurors +jury +jury's +just +juster +justest +justice +justice's +justices +justifiable +justifiably +justification +justification's +justifications +justified +justifies +justify +justifying +justly +justness +justness's +jut +jut's +jute +jute's +juts +jutted +jutting +juvenile +juvenile's +juveniles +juxtapose +juxtaposed +juxtaposes +juxtaposing +juxtaposition +juxtaposition's +juxtapositions +k +kHz +kW +kabob +kabob's +kabobs +kaboom +kaftan +kaftan's +kaftans +kale +kale's +kaleidoscope +kaleidoscope's +kaleidoscopes +kaleidoscopic +kamikaze +kamikaze's +kamikazes +kangaroo +kangaroo's +kangaroos +kaolin +kaolin's +kapok +kapok's +kaput +karakul +karakul's +karaoke +karaoke's +karaokes +karat +karat's +karate +karate's +karats +karma +karma's +katydid +katydid's +katydids +kayak +kayak's +kayaked +kayaking +kayaks +kazoo +kazoo's +kazoos +kebab +kebab's +kebabs +kebob +kebob's +kebobs +keel +keel's +keeled +keeling +keels +keen +keen's +keened +keener +keenest +keening +keenly +keenness +keenness's +keens +keep +keep's +keeper +keeper's +keepers +keeping +keeping's +keeps +keepsake +keepsake's +keepsakes +keg +keg's +kegs +kelp +kelp's +ken +ken's +kenned +kennel +kennel's +kenneled +kenneling +kennelled +kennelling +kennels +kenning +kens +kept +keratin +keratin's +kerchief +kerchief's +kerchiefs +kerchieves +kernel +kernel's +kernels +kerosene +kerosene's +kerosine +kerosine's +kestrel +kestrel's +kestrels +ketch +ketch's +ketches +ketchup +ketchup's +kettle +kettle's +kettledrum +kettledrum's +kettledrums +kettles +key +key's +keybinding +keybindings +keyboard +keyboard's +keyboarded +keyboarder +keyboarder's +keyboarders +keyboarding +keyboards +keyed +keyhole +keyhole's +keyholes +keying +keynote +keynote's +keynoted +keynotes +keynoting +keypunch +keypunch's +keypunched +keypunches +keypunching +keys +keystone +keystone's +keystones +keystroke +keystroke's +keystrokes +keyword +keyword's +keywords +khaki +khaki's +khakis +khan +khan's +khans +kibbutz +kibbutz's +kibbutzim +kibitz +kibitzed +kibitzer +kibitzer's +kibitzers +kibitzes +kibitzing +kibosh +kibosh's +kick +kick's +kickback +kickback's +kickbacks +kicked +kicker +kicker's +kickers +kickier +kickiest +kicking +kickoff +kickoff's +kickoffs +kicks +kickstand +kickstand's +kickstands +kicky +kid +kid's +kidded +kidder +kidder's +kidders +kiddie +kiddie's +kiddies +kidding +kiddo +kiddo's +kiddoes +kiddos +kiddy +kiddy's +kidnap +kidnaped +kidnaper +kidnaper's +kidnapers +kidnaping +kidnapped +kidnapper +kidnapper's +kidnappers +kidnapping +kidnapping's +kidnappings +kidnaps +kidney +kidney's +kidneys +kids +kielbasa +kielbasa's +kielbasas +kielbasy +kill +kill's +killdeer +killdeer's +killdeers +killed +killer +killer's +killers +killing +killing's +killings +killjoy +killjoy's +killjoys +kills +kiln +kiln's +kilned +kilning +kilns +kilo +kilo's +kilobyte +kilobyte's +kilobytes +kilocycle +kilocycle's +kilocycles +kilogram +kilogram's +kilograms +kilohertz +kilohertz's +kilohertzes +kilometer +kilometer's +kilometers +kilos +kiloton +kiloton's +kilotons +kilowatt +kilowatt's +kilowatts +kilt +kilt's +kilter +kilter's +kilts +kimono +kimono's +kimonos +kin +kin's +kind +kind's +kinda +kinder +kindergarten +kindergarten's +kindergartener +kindergartener's +kindergarteners +kindergartens +kindergärtner +kindergärtner's +kindergärtners +kindest +kindhearted +kindle +kindled +kindles +kindlier +kindliest +kindliness +kindliness's +kindling +kindling's +kindly +kindness +kindness's +kindnesses +kindred +kindred's +kinds +kinematic +kinematics +kinetic +kinfolk +kinfolk's +kinfolks +kinfolks's +king +king's +kingdom +kingdom's +kingdoms +kingfisher +kingfisher's +kingfishers +kinglier +kingliest +kingly +kingpin +kingpin's +kingpins +kings +kingship +kingship's +kink +kink's +kinked +kinkier +kinkiest +kinking +kinks +kinky +kinship +kinship's +kinsman +kinsman's +kinsmen +kinswoman +kinswoman's +kinswomen +kiosk +kiosk's +kiosks +kipper +kipper's +kippered +kippering +kippers +kismet +kismet's +kiss +kiss's +kissed +kisser +kisser's +kissers +kisses +kissing +kit +kit's +kitchen +kitchen's +kitchenette +kitchenette's +kitchenettes +kitchens +kitchenware +kitchenware's +kite +kite's +kited +kites +kith +kith's +kiting +kits +kitsch +kitsch's +kitschy +kitten +kitten's +kittenish +kittens +kitties +kitty +kitty's +kiwi +kiwi's +kiwis +kleptomania +kleptomania's +kleptomaniac +kleptomaniac's +kleptomaniacs +klutz +klutz's +klutzes +klutzier +klutziest +klutzy +knack +knack's +knacker +knacks +knackwurst +knackwurst's +knackwursts +knapsack +knapsack's +knapsacks +knave +knave's +knavery +knavery's +knaves +knavish +knead +kneaded +kneader +kneader's +kneaders +kneading +kneads +knee +knee's +kneecap +kneecap's +kneecapped +kneecapping +kneecaps +kneed +kneeing +kneel +kneeled +kneeling +kneels +knees +knell +knell's +knelled +knelling +knells +knelt +knew +knickers +knickers's +knickknack +knickknack's +knickknacks +knife +knife's +knifed +knifes +knifing +knight +knight's +knighted +knighthood +knighthood's +knighthoods +knighting +knightly +knights +knit +knit's +knits +knitted +knitter +knitter's +knitters +knitting +knitting's +knitwear +knitwear's +knives +knob +knob's +knobbier +knobbiest +knobby +knobs +knock +knock's +knocked +knocker +knocker's +knockers +knocking +knockout +knockout's +knockouts +knocks +knockwurst +knockwurst's +knockwursts +knoll +knoll's +knolls +knot +knot's +knothole +knothole's +knotholes +knots +knotted +knottier +knottiest +knotting +knotty +know +knowable +knowing +knowingly +knowings +knowledge +knowledge's +knowledgeable +knowledgeably +known +knows +knuckle +knuckle's +knuckled +knucklehead +knucklehead's +knuckleheads +knuckles +knuckling +koala +koala's +koalas +kohlrabi +kohlrabi's +kohlrabies +kook +kook's +kookaburra +kookaburra's +kookaburras +kookie +kookier +kookiest +kookiness +kookiness's +kooks +kooky +kopeck +kopeck's +kopecks +kopek +kopek's +kopeks +kosher +koshered +koshering +koshers +kowtow +kowtow's +kowtowed +kowtowing +kowtows +krone +krone's +kroner +kronor +krypton +krypton's +króna +króna's +krónur +ks +kudos +kudos's +kudzu +kudzu's +kudzus +kumquat +kumquat's +kumquats +l +la +la's +lab +lab's +label +label's +labeled +labeling +labelled +labelling +labels +labia +labial +labial's +labials +labium +labium's +labor +labor's +laboratories +laboratory +laboratory's +labored +laborer +laborer's +laborers +laboring +laborious +laboriously +labors +labs +laburnum +laburnum's +laburnums +labyrinth +labyrinth's +labyrinthine +labyrinths +lace +lace's +laced +lacerate +lacerated +lacerates +lacerating +laceration +laceration's +lacerations +laces +lachrymal +lachrymose +lacier +laciest +lacing +lack +lack's +lackadaisical +lackadaisically +lacked +lackey +lackey's +lackeys +lacking +lackluster +lacks +laconic +laconically +lacquer +lacquer's +lacquered +lacquering +lacquers +lacrimal +lacrosse +lacrosse's +lactate +lactated +lactates +lactating +lactation +lactation's +lactic +lactose +lactose's +lacuna +lacuna's +lacunae +lacunas +lacy +lad +lad's +ladder +ladder's +laddered +laddering +ladders +laddie +laddie's +laddies +lade +laded +laden +lades +ladies +lading +lading's +ladings +ladle +ladle's +ladled +ladles +ladling +lads +lady +lady's +ladybird +ladybird's +ladybirds +ladybug +ladybug's +ladybugs +ladyfinger +ladyfinger's +ladyfingers +ladylike +ladyship +ladyship's +lag +lag's +lager +lager's +lagers +laggard +laggard's +laggards +lagged +lagging +lagniappe +lagniappe's +lagniappes +lagoon +lagoon's +lagoons +lags +laid +lain +lair +lair's +lairs +laity +laity's +lake +lake's +lakes +lallygag +lallygagged +lallygagging +lallygags +lam +lam's +lama +lama's +lamas +lamaseries +lamasery +lamasery's +lamb +lamb's +lambast +lambaste +lambasted +lambastes +lambasting +lambasts +lambda +lambed +lambent +lambing +lambkin +lambkin's +lambkins +lambs +lambskin +lambskin's +lambskins +lame +lame's +lamebrain +lamebrain's +lamebrains +lamed +lamely +lameness +lameness's +lament +lament's +lamentable +lamentably +lamentation +lamentation's +lamentations +lamented +lamenting +laments +lamer +lames +lamest +laminate +laminate's +laminated +laminates +laminating +lamination +lamination's +laming +lammed +lamming +lamp +lamp's +lampblack +lampblack's +lampoon +lampoon's +lampooned +lampooning +lampoons +lamppost +lamppost's +lampposts +lamprey +lamprey's +lampreys +lamps +lampshade +lampshade's +lampshades +lams +lance +lance's +lanced +lancer +lancer's +lancers +lances +lancet +lancet's +lancets +lancing +land +land's +landed +lander +landfall +landfall's +landfalls +landfill +landfill's +landfills +landholder +landholder's +landholders +landing +landing's +landings +landladies +landlady +landlady's +landline +landline's +landlines +landlocked +landlord +landlord's +landlords +landlubber +landlubber's +landlubbers +landmark +landmark's +landmarks +landmass +landmass's +landmasses +landowner +landowner's +landowners +lands +landscape +landscape's +landscaped +landscaper +landscaper's +landscapers +landscapes +landscaping +landslid +landslidden +landslide +landslide's +landslides +landsliding +landward +landwards +lane +lane's +lanes +language +language's +languages +languid +languidly +languish +languished +languishes +languishing +languor +languor's +languorous +languorously +languors +lank +lanker +lankest +lankier +lankiest +lankiness +lankiness's +lanky +lanolin +lanolin's +lantern +lantern's +lanterns +lanyard +lanyard's +lanyards +lap +lap's +lapel +lapel's +lapels +lapidaries +lapidary +lapidary's +lapped +lapping +laps +lapse +lapse's +lapsed +lapses +lapsing +laptop +laptop's +laptops +lapwing +lapwing's +lapwings +larboard +larboard's +larboards +larcenies +larcenous +larceny +larceny's +larch +larch's +larches +lard +lard's +larded +larder +larder's +larders +larding +lards +large +large's +largely +largeness +largeness's +larger +larges +largess +largess's +largesse +largesse's +largest +largo +largo's +largos +lariat +lariat's +lariats +lark +lark's +larked +larking +larks +larkspur +larkspur's +larkspurs +larva +larva's +larvae +larval +larvas +larynges +laryngitis +laryngitis's +larynx +larynx's +larynxes +lasagna +lasagna's +lasagnas +lasagne +lasagne's +lasagnes +lascivious +lasciviously +lasciviousness +lasciviousness's +laser +laser's +lasers +lash +lash's +lashed +lashes +lashing +lass +lass's +lasses +lassie +lassie's +lassies +lassitude +lassitude's +lasso +lasso's +lassoed +lassoes +lassoing +lassos +last +last's +lasted +lasting +lastingly +lastly +lasts +latch +latch's +latched +latches +latching +late +latecomer +latecomer's +latecomers +lately +latency +latency's +lateness +lateness's +latent +later +lateral +lateral's +lateraled +lateraling +lateralled +lateralling +laterally +laterals +latest +latest's +latex +latex's +lath +lath's +lathe +lathe's +lathed +lather +lather's +lathered +lathering +lathers +lathes +lathing +laths +latitude +latitude's +latitudes +latitudinal +latrine +latrine's +latrines +lats +latte +latte's +latter +latter's +latterly +lattes +lattice +lattice's +latticed +lattices +latticework +latticework's +latticeworks +laud +laud's +laudable +laudably +laudanum +laudanum's +laudatory +lauded +lauding +lauds +laugh +laugh's +laughable +laughably +laughed +laughing +laughingly +laughingstock +laughingstock's +laughingstocks +laughs +laughter +laughter's +launch +launch's +launched +launcher +launcher's +launchers +launches +launching +launder +laundered +launderer +launderer's +launderers +laundering +launders +laundress +laundress's +laundresses +laundries +laundry +laundry's +laundryman +laundryman's +laundrymen +laureate +laureate's +laureates +laurel +laurel's +laurels +lava +lava's +lavatories +lavatory +lavatory's +lavender +lavender's +lavenders +lavish +lavished +lavisher +lavishes +lavishest +lavishing +lavishly +lavishness +lavishness's +law +law's +lawbreaker +lawbreaker's +lawbreakers +lawful +lawfully +lawfulness +lawfulness's +lawgiver +lawgiver's +lawgivers +lawless +lawlessly +lawlessness +lawlessness's +lawmaker +lawmaker's +lawmakers +lawn +lawn's +lawns +lawrencium +lawrencium's +laws +lawsuit +lawsuit's +lawsuits +lawyer +lawyer's +lawyers +lax +laxative +laxative's +laxatives +laxer +laxest +laxity +laxity's +laxly +laxness +laxness's +lay +lay's +layaway +layaway's +layer +layer's +layered +layering +layers +layette +layette's +layettes +laying +layman +layman's +laymen +layoff +layoff's +layoffs +layout +layout's +layouts +layover +layover's +layovers +laypeople +layperson +layperson's +laypersons +lays +laywoman +laywoman's +laywomen +laze +laze's +lazed +lazes +lazied +lazier +lazies +laziest +lazily +laziness +laziness's +lazing +lazy +lazybones +lazybones's +lazying +lea +lea's +leach +leached +leaches +leaching +lead +lead's +leaded +leaden +leader +leader's +leaders +leadership +leadership's +leading +leading's +leads +leaf +leaf's +leafed +leafier +leafiest +leafing +leafless +leaflet +leaflet's +leafleted +leafleting +leaflets +leafletted +leafletting +leafs +leafy +league +league's +leagued +leagues +leaguing +leak +leak's +leakage +leakage's +leakages +leaked +leakier +leakiest +leaking +leaks +leaky +lean +lean's +leaned +leaner +leanest +leaning +leaning's +leanings +leanness +leanness's +leans +leap +leap's +leaped +leapfrog +leapfrog's +leapfrogged +leapfrogging +leapfrogs +leaping +leaps +leapt +learn +learned +learner +learner's +learners +learning +learning's +learns +learnt +leas +lease +lease's +leased +leasehold +leasehold's +leaseholder +leaseholder's +leaseholders +leaseholds +leases +leash +leash's +leashed +leashes +leashing +leasing +least +least's +leastwise +leather +leather's +leatherneck +leatherneck's +leathernecks +leathers +leathery +leave +leave's +leaved +leaven +leaven's +leavened +leavening +leavening's +leavens +leaves +leaving +leavings +leavings's +lecher +lecher's +lecherous +lecherously +lechers +lechery +lechery's +lecithin +lecithin's +lectern +lectern's +lecterns +lecture +lecture's +lectured +lecturer +lecturer's +lecturers +lectures +lecturing +led +ledge +ledge's +ledger +ledger's +ledgers +ledges +lee +lee's +leech +leech's +leeched +leeches +leeching +leek +leek's +leeks +leer +leer's +leered +leerier +leeriest +leering +leers +leery +lees +leeward +leeward's +leewards +leeway +leeway's +left +left's +lefter +leftest +leftie +leftie's +lefties +leftism +leftism's +leftist +leftist's +leftists +leftmost +leftover +leftover's +leftovers +lefts +leftwards +lefty +lefty's +leg +leg's +legacies +legacy +legacy's +legal +legal's +legalese +legalese's +legalism +legalism's +legalisms +legalistic +legality +legality's +legalization +legalization's +legalize +legalized +legalizes +legalizing +legally +legals +legate +legate's +legatee +legatee's +legatees +legates +legation +legation's +legations +legato +legato's +legatos +legend +legend's +legendary +legends +legerdemain +legerdemain's +legged +leggier +leggiest +leggin +leggin's +legging +legging's +leggings +leggins +leggy +legibility +legibility's +legible +legibly +legion +legion's +legionnaire +legionnaire's +legionnaires +legions +legislate +legislated +legislates +legislating +legislation +legislation's +legislative +legislator +legislator's +legislators +legislature +legislature's +legislatures +legit +legitimacy +legitimacy's +legitimate +legitimated +legitimately +legitimates +legitimating +legitimize +legitimized +legitimizes +legitimizing +legless +legman +legman's +legmen +legroom +legroom's +legrooms +legs +legume +legume's +legumes +leguminous +legwork +legwork's +lei +lei's +leis +leisure +leisure's +leisurely +leitmotif +leitmotif's +leitmotifs +lemma +lemmas +lemme +lemming +lemming's +lemmings +lemon +lemon's +lemonade +lemonade's +lemons +lemony +lemur +lemur's +lemurs +lend +lender +lender's +lenders +lending +lends +length +length's +lengthen +lengthened +lengthening +lengthens +lengthier +lengthiest +lengthily +lengths +lengthways +lengthwise +lengthy +leniency +leniency's +lenient +leniently +lens +lens's +lenses +lent +lentil +lentil's +lentils +leonine +leopard +leopard's +leopards +leotard +leotard's +leotards +leper +leper's +lepers +leprechaun +leprechaun's +leprechauns +leprosy +leprosy's +leprous +lept +lesbian +lesbian's +lesbianism +lesbianism's +lesbians +lesion +lesion's +lesions +less +less's +lessee +lessee's +lessees +lessen +lessened +lessening +lessens +lesser +lesson +lesson's +lessons +lessor +lessor's +lessors +lest +let +let's +letdown +letdown's +letdowns +lethal +lethally +lethargic +lethargically +lethargy +lethargy's +lets +letter +letter's +letterbox +lettered +letterhead +letterhead's +letterheads +lettering +lettering's +letters +letting +lettuce +lettuce's +lettuces +letup +letup's +letups +leukemia +leukemia's +leukocyte +leukocyte's +leukocytes +levee +levee's +levees +level +level's +leveled +leveler +leveler's +levelers +levelheaded +levelheadedness +levelheadedness's +leveling +levelled +leveller's +levellers +levelling +levelness +levelness's +levels +lever +lever's +leverage +leverage's +leveraged +leverages +leveraging +levered +levering +levers +leviathan +leviathan's +leviathans +levied +levies +levitate +levitated +levitates +levitating +levitation +levitation's +levity +levity's +levy +levy's +levying +lewd +lewder +lewdest +lewdly +lewdness +lewdness's +lexica +lexical +lexicographer +lexicographer's +lexicographers +lexicography +lexicography's +lexicon +lexicon's +lexicons +liabilities +liability +liability's +liable +liaise +liaised +liaises +liaising +liaison +liaison's +liaisons +liar +liar's +liars +lib +lib's +libation +libation's +libations +libel +libel's +libeled +libeler +libeler's +libelers +libeling +libelled +libeller +libeller's +libellers +libelling +libellous +libelous +libels +liberal +liberal's +liberalism +liberalism's +liberality +liberality's +liberalization +liberalization's +liberalizations +liberalize +liberalized +liberalizes +liberalizing +liberally +liberals +liberate +liberated +liberates +liberating +liberation +liberation's +liberator +liberator's +liberators +libertarian +libertarian's +libertarians +liberties +libertine +libertine's +libertines +liberty +liberty's +libidinous +libido +libido's +libidos +librarian +librarian's +librarians +libraries +library +library's +libretti +librettist +librettist's +librettists +libretto +libretto's +librettos +lice +licence +licence's +licenced +licences +licencing +license +license's +licensed +licensee +licensee's +licensees +licenses +licensing +licentiate +licentiate's +licentiates +licentious +licentiously +licentiousness +licentiousness's +lichee +lichee's +lichees +lichen +lichen's +lichens +licit +lick +lick's +licked +licking +licking's +lickings +licks +licorice +licorice's +licorices +lid +lid's +lidded +lids +lie +lie's +lied +lief +liefer +liefest +liege +liege's +lieges +lien +lien's +liens +lies +lieu +lieu's +lieutenancy +lieutenancy's +lieutenant +lieutenant's +lieutenants +life +life's +lifeblood +lifeblood's +lifeboat +lifeboat's +lifeboats +lifeforms +lifeguard +lifeguard's +lifeguards +lifeless +lifelike +lifeline +lifeline's +lifelines +lifelong +lifer +lifer's +lifers +lifesaver +lifesaver's +lifesavers +lifesaving +lifesaving's +lifespan +lifespans +lifestyle +lifestyle's +lifestyles +lifetime +lifetime's +lifetimes +lifework +lifework's +lifeworks +lift +lift's +lifted +lifting +liftoff +liftoff's +liftoffs +lifts +ligament +ligament's +ligaments +ligature +ligature's +ligatured +ligatures +ligaturing +light +light's +lighted +lighten +lightened +lightening +lightens +lighter +lighter's +lighters +lightest +lightheaded +lighthearted +lightheartedly +lightheartedness +lightheartedness's +lighthouse +lighthouse's +lighthouses +lighting +lighting's +lightly +lightness +lightness's +lightning +lightning's +lightninged +lightnings +lights +lightweight +lightweight's +lightweights +lignite +lignite's +likable +likableness +likableness's +like +like's +likeable +likeableness +likeableness's +liked +likelier +likeliest +likelihood +likelihood's +likelihoods +likely +liken +likened +likeness +likeness's +likenesses +likening +likens +liker +likes +likest +likewise +liking +liking's +lilac +lilac's +lilacs +lilies +lilt +lilt's +lilted +lilting +lilts +lily +lily's +limb +limb's +limber +limbered +limbering +limbers +limbless +limbo +limbo's +limbos +limbs +lime +lime's +limeade +limeade's +limeades +limed +limelight +limelight's +limerick +limerick's +limericks +limes +limestone +limestone's +limier +limiest +liming +limit +limit's +limitation +limitation's +limitations +limited +limiting +limitings +limitless +limits +limn +limned +limning +limns +limo +limo's +limos +limousine +limousine's +limousines +limp +limp's +limped +limper +limpest +limpet +limpet's +limpets +limpid +limpidity +limpidity's +limpidly +limping +limply +limpness +limpness's +limps +limy +linage +linage's +linchpin +linchpin's +linchpins +linden +linden's +lindens +line +line's +lineage +lineage's +lineages +lineal +lineally +lineament +lineament's +lineaments +linear +linearly +linebacker +linebacker's +linebackers +lined +linefeed +lineman +lineman's +linemen +linen +linen's +linens +linens's +liner +liner's +liners +lines +linesman +linesman's +linesmen +lineup +lineup's +lineups +linger +lingered +lingerer +lingerer's +lingerers +lingerie +lingerie's +lingering +lingeringly +lingerings +lingers +lingo +lingo's +lingoes +lingos +lingual +linguist +linguist's +linguistic +linguistics +linguistics's +linguists +liniment +liniment's +liniments +lining +lining's +linings +link +link's +linkage +linkage's +linkages +linked +linker +linking +links +linkup +linkup's +linkups +linnet +linnet's +linnets +linoleum +linoleum's +linseed +linseed's +lint +lint's +lintel +lintel's +lintels +lion +lion's +lioness +lioness's +lionesses +lionhearted +lionize +lionized +lionizes +lionizing +lions +lip +lip's +lipid +lipid's +lipids +liposuction +liposuction's +lipread +lipreading +lipreading's +lipreads +lips +lipstick +lipstick's +lipsticked +lipsticking +lipsticks +liquefaction +liquefaction's +liquefied +liquefies +liquefy +liquefying +liqueur +liqueur's +liqueurs +liquid +liquid's +liquidate +liquidated +liquidates +liquidating +liquidation +liquidation's +liquidations +liquidator +liquidator's +liquidators +liquidity +liquidity's +liquidize +liquidized +liquidizes +liquidizing +liquids +liquified +liquifies +liquify +liquifying +liquor +liquor's +liquored +liquoring +liquors +lira +lira's +liras +lire +lisle +lisle's +lisp +lisp's +lisped +lisping +lisps +lissom +lissome +list +list's +listed +listen +listen's +listened +listener +listener's +listeners +listening +listens +listing +listing's +listings +listless +listlessly +listlessness +listlessness's +lists +lit +litanies +litany +litany's +litchi +litchi's +litchis +lite +liter +liter's +literacy +literacy's +literal +literal's +literally +literals +literary +literate +literate's +literates +literati +literati's +literature +literature's +liters +lithe +lither +lithest +lithium +lithium's +lithograph +lithograph's +lithographed +lithographer +lithographer's +lithographers +lithographic +lithographing +lithographs +lithography +lithography's +lithosphere +lithosphere's +lithospheres +litigant +litigant's +litigants +litigate +litigated +litigates +litigating +litigation +litigation's +litigious +litigiousness +litigiousness's +litmus +litmus's +litter +litter's +litterbug +litterbug's +litterbugs +littered +littering +litters +little +little's +littleness +littleness's +littler +littlest +littoral +littoral's +littorals +liturgical +liturgies +liturgy +liturgy's +livability +livability's +livable +live +liveable +lived +livelier +liveliest +livelihood +livelihood's +livelihoods +liveliness +liveliness's +livelong +livelongs +lively +liven +livened +livening +livens +liver +liver's +liveried +liveries +livers +liverwurst +liverwurst's +livery +livery's +lives +livest +livestock +livestock's +livid +lividly +living +living's +livings +lizard +lizard's +lizards +llama +llama's +llamas +llano +llano's +llanos +lo +load +load's +loadable +loaded +loader +loader's +loaders +loading +loads +loadstar +loadstar's +loadstars +loadstone +loadstone's +loadstones +loaf +loaf's +loafed +loafer +loafer's +loafers +loafing +loafs +loam +loam's +loamier +loamiest +loamy +loan +loan's +loaned +loaner +loaner's +loaners +loaning +loans +loanword +loanword's +loanwords +loath +loathe +loathed +loathes +loathing +loathing's +loathings +loathsome +loathsomeness +loathsomeness's +loaves +lob +lob's +lobbed +lobbied +lobbies +lobbing +lobby +lobby's +lobbying +lobbyist +lobbyist's +lobbyists +lobe +lobe's +lobed +lobes +lobotomies +lobotomy +lobotomy's +lobs +lobster +lobster's +lobsters +local +local's +locale +locale's +locales +localities +locality +locality's +localization +localization's +localize +localized +localizes +localizing +locally +locals +locate +located +locates +locating +location +location's +locations +locavore +locavore's +locavores +loci +lock +lock's +lockable +locked +locker +locker's +lockers +locket +locket's +lockets +locking +lockjaw +lockjaw's +lockout +lockout's +lockouts +locks +locksmith +locksmith's +locksmiths +lockstep +lockstep's +lockup +lockup's +lockups +loco +locomotion +locomotion's +locomotive +locomotive's +locomotives +locoweed +locoweed's +locoweeds +locus +locus's +locust +locust's +locusts +locution +locution's +locutions +lode +lode's +lodes +lodestar +lodestar's +lodestars +lodestone +lodestone's +lodestones +lodge +lodge's +lodged +lodger +lodger's +lodgers +lodges +lodging +lodging's +lodgings +lodgings's +loft +loft's +lofted +loftier +loftiest +loftily +loftiness +loftiness's +lofting +lofts +lofty +log +log's +loganberries +loganberry +loganberry's +logarithm +logarithm's +logarithmic +logarithms +logbook +logbook's +logbooks +loge +loge's +loges +logged +logger +logger's +loggerhead +loggerhead's +loggerheads +loggers +logging +logging's +logic +logic's +logical +logically +logician +logician's +logicians +login +login's +logins +logistic +logistical +logistically +logistics +logistics's +logjam +logjam's +logjams +logo +logo's +logoff +logoff's +logoffs +logon +logon's +logons +logos +logotype +logotype's +logotypes +logout +logout's +logouts +logrolling +logrolling's +logs +loin +loin's +loincloth +loincloth's +loincloths +loins +loiter +loitered +loiterer +loiterer's +loiterers +loitering +loiters +lolcat +lolcat's +lolcats +loll +lolled +lolling +lollipop +lollipop's +lollipops +lolls +lollygag +lollygagged +lollygagging +lollygags +lollypop +lollypop's +lollypops +lone +lonelier +loneliest +loneliness +loneliness's +lonely +loner +loner's +loners +lonesome +long +long's +longboat +longboat's +longboats +longed +longer +longest +longevity +longevity's +longhair +longhair's +longhairs +longhand +longhand's +longhorn +longhorn's +longhorns +longing +longing's +longingly +longings +longish +longitude +longitude's +longitudes +longitudinal +longitudinally +longs +longshoreman +longshoreman's +longshoremen +longtime +loofah +look +look's +lookalike +lookalike's +lookalikes +looked +looking +lookout +lookout's +lookouts +looks +lookup +loom +loom's +loomed +looming +looms +loon +loon's +looney +looney's +looneyier +looneyies +looneys +loonie +loonie's +loonier +loonies +looniest +loons +loony +loony's +loop +loop's +looped +loophole +loophole's +loopholes +loopier +loopiest +looping +loops +loopy +loose +loosed +loosely +loosen +loosened +looseness +looseness's +loosening +loosens +looser +looses +loosest +loosing +loot +loot's +looted +looter +looter's +looters +looting +loots +lop +lope +lope's +loped +lopes +loping +lopped +lopping +lops +lopsided +lopsidedly +lopsidedness +lopsidedness's +loquacious +loquacity +loquacity's +lord +lord's +lorded +lording +lordlier +lordliest +lordly +lords +lordship +lordship's +lordships +lore +lore's +lorgnette +lorgnette's +lorgnettes +lorn +lorries +lorry +lorry's +lose +loser +loser's +losers +loses +losing +loss +loss's +losses +lost +lot +lot's +loth +lotion +lotion's +lotions +lots +lotteries +lottery +lottery's +lotto +lotto's +lotus +lotus's +lotuses +loud +louder +loudest +loudly +loudmouth +loudmouth's +loudmouthed +loudmouths +loudness +loudness's +loudspeaker +loudspeaker's +loudspeakers +lounge +lounge's +lounged +lounges +lounging +louse +louse's +louses +lousier +lousiest +lousiness +lousiness's +lousy +lout +lout's +loutish +louts +louver +louver's +louvered +louvers +louvred +lovable +love +love's +loveable +lovebird +lovebird's +lovebirds +loved +loveless +lovelier +lovelies +loveliest +loveliness +loveliness's +lovelorn +lovely +lovely's +lovemaking +lovemaking's +lover +lover's +lovers +loves +lovesick +loving +lovingly +low +low's +lowbrow +lowbrow's +lowbrows +lowdown +lowdown's +lowed +lower +lowercase +lowercase's +lowered +lowering +lowers +lowest +lowing +lowish +lowland +lowland's +lowlands +lowlier +lowliest +lowliness +lowliness's +lowly +lowness +lowness's +lows +lox +lox's +loxes +loyal +loyaler +loyalest +loyalist +loyalist's +loyalists +loyaller +loyallest +loyally +loyalties +loyalty +loyalty's +lozenge +lozenge's +lozenges +ls +luau +luau's +luaus +lubber +lubber's +lubbers +lube +lube's +lubed +lubes +lubing +lubricant +lubricant's +lubricants +lubricate +lubricated +lubricates +lubricating +lubrication +lubrication's +lubricator +lubricator's +lubricators +lucid +lucidity +lucidity's +lucidly +lucidness +lucidness's +luck +luck's +lucked +luckier +luckiest +luckily +luckiness +luckiness's +lucking +luckless +lucks +lucky +lucrative +lucratively +lucre +lucre's +ludicrous +ludicrously +ludicrousness +ludicrousness's +lug +lug's +luggage +luggage's +lugged +lugging +lugs +lugubrious +lugubriously +lugubriousness +lugubriousness's +lukewarm +lull +lull's +lullabies +lullaby +lullaby's +lulled +lulling +lulls +lumbago +lumbago's +lumbar +lumber +lumber's +lumbered +lumbering +lumbering's +lumberjack +lumberjack's +lumberjacks +lumberman +lumberman's +lumbermen +lumbers +lumberyard +lumberyard's +lumberyards +luminaries +luminary +luminary's +luminescence +luminescence's +luminescent +luminosity +luminosity's +luminous +luminously +lummox +lummox's +lummoxes +lump +lump's +lumped +lumpier +lumpiest +lumpiness +lumpiness's +lumping +lumpish +lumps +lumpy +lunacies +lunacy +lunacy's +lunar +lunatic +lunatic's +lunatics +lunch +lunch's +lunchbox +lunched +luncheon +luncheon's +luncheonette +luncheonette's +luncheonettes +luncheons +lunches +lunching +lunchroom +lunchroom's +lunchrooms +lunchtime +lunchtime's +lunchtimes +lung +lung's +lunge +lunge's +lunged +lunges +lunging +lungs +lupin +lupin's +lupine +lupine's +lupines +lupins +lupus +lupus's +lurch +lurch's +lurched +lurches +lurching +lure +lure's +lured +lures +lurid +luridly +luridness +luridness's +luring +lurk +lurked +lurking +lurks +luscious +lusciously +lusciousness +lusciousness's +lush +lush's +lusher +lushes +lushest +lushness +lushness's +lust +lust's +lusted +luster +luster's +lustful +lustfully +lustier +lustiest +lustily +lustiness +lustiness's +lusting +lustre +lustre's +lustrous +lusts +lusty +lute +lute's +lutes +luxuriance +luxuriance's +luxuriant +luxuriantly +luxuriate +luxuriated +luxuriates +luxuriating +luxuries +luxurious +luxuriously +luxuriousness +luxuriousness's +luxury +luxury's +lyceum +lyceum's +lyceums +lychee +lychee's +lychees +lye +lye's +lying +lying's +lymph +lymph's +lymphatic +lymphatic's +lymphatics +lymphoma +lymphoma's +lymphomas +lymphomata +lynch +lynched +lynches +lynching +lynching's +lynchings +lynchpin +lynchpin's +lynchpins +lynx +lynx's +lynxes +lyre +lyre's +lyres +lyric +lyric's +lyrical +lyrically +lyricist +lyricist's +lyricists +lyrics +m +ma +ma'am +ma's +macabre +macadam +macadam's +macaroni +macaroni's +macaronies +macaronis +macaroon +macaroon's +macaroons +macaw +macaw's +macaws +mace +mace's +maced +macerate +macerated +macerates +macerating +maceration +maceration's +maces +machete +machete's +machetes +machination +machination's +machinations +machine +machine's +machined +machinery +machinery's +machines +machining +machinist +machinist's +machinists +machismo +machismo's +macho +macho's +macing +macintosh +macintosh's +macintoshes +mackerel +mackerel's +mackerels +mackinaw +mackinaw's +mackinaws +mackintosh +mackintosh's +mackintoshes +macramé +macramé's +macro +macro's +macrobiotic +macrobiotics +macrobiotics's +macrocosm +macrocosm's +macrocosms +macron +macron's +macrons +macros +macroscopic +mad +mad's +madam +madam's +madame +madame's +madams +madcap +madcap's +madcaps +madden +maddened +maddening +maddeningly +maddens +madder +madder's +madders +maddest +made +mademoiselle +mademoiselle's +mademoiselles +madhouse +madhouse's +madhouses +madly +madman +madman's +madmen +madness +madness's +madras +madras's +madrasa +madrasa's +madrasah +madrasah's +madrasahs +madrasas +madrases +madrassa +madrassa's +madrassas +madrigal +madrigal's +madrigals +mads +madwoman +madwoman's +madwomen +maelstrom +maelstrom's +maelstroms +maestri +maestro +maestro's +maestros +magazine +magazine's +magazines +magenta +magenta's +maggot +maggot's +maggots +magic +magic's +magical +magically +magician +magician's +magicians +magisterial +magisterially +magistrate +magistrate's +magistrates +magma +magma's +magnanimity +magnanimity's +magnanimous +magnanimously +magnate +magnate's +magnates +magnesia +magnesia's +magnesium +magnesium's +magnet +magnet's +magnetic +magnetically +magnetism +magnetism's +magnetization +magnetization's +magnetize +magnetized +magnetizes +magnetizing +magneto +magneto's +magnetos +magnetosphere +magnets +magnification +magnification's +magnifications +magnificence +magnificence's +magnificent +magnificently +magnified +magnifier +magnifier's +magnifiers +magnifies +magnify +magnifying +magnitude +magnitude's +magnitudes +magnolia +magnolia's +magnolias +magnum +magnum's +magnums +magpie +magpie's +magpies +maharaja +maharaja's +maharajah +maharajah's +maharajahs +maharajas +maharanee +maharanee's +maharanees +maharani +maharani's +maharanis +maharishi +maharishi's +maharishis +mahatma +mahatma's +mahatmas +mahjong +mahjong's +mahoganies +mahogany +mahogany's +maid +maid's +maiden +maiden's +maidenhair +maidenhair's +maidenhead +maidenhead's +maidenheads +maidenhood +maidenhood's +maidenly +maidens +maids +maidservant +maidservant's +maidservants +mail +mail's +mailbox +mailbox's +mailboxes +mailed +mailer +mailer's +mailers +mailing +mailing's +mailings +mailman +mailman's +mailmen +mails +maim +maimed +maiming +maims +main +main's +mainframe +mainframe's +mainframes +mainland +mainland's +mainlands +mainline +mainline's +mainlined +mainlines +mainlining +mainly +mainmast +mainmast's +mainmasts +mains +mainsail +mainsail's +mainsails +mainspring +mainspring's +mainsprings +mainstay +mainstay's +mainstays +mainstream +mainstream's +mainstreamed +mainstreaming +mainstreams +maintain +maintainability +maintainable +maintained +maintainer +maintainers +maintaining +maintains +maintenance +maintenance's +maize +maize's +maizes +majestic +majestically +majesties +majesty +majesty's +major +major's +majored +majorette +majorette's +majorettes +majoring +majorities +majority +majority's +majorly +majors +make +make's +maker +maker's +makers +makes +makeshift +makeshift's +makeshifts +makeup +makeup's +makeups +making +making's +makings +maladies +maladjusted +maladjustment +maladjustment's +maladroit +malady +malady's +malaise +malaise's +malapropism +malapropism's +malapropisms +malaria +malaria's +malarial +malarkey +malarkey's +malcontent +malcontent's +malcontents +male +male's +malediction +malediction's +maledictions +malefactor +malefactor's +malefactors +maleness +maleness's +males +malevolence +malevolence's +malevolent +malevolently +malfeasance +malfeasance's +malformation +malformation's +malformations +malformed +malfunction +malfunction's +malfunctioned +malfunctioning +malfunctions +malice +malice's +malicious +maliciously +malign +malignancies +malignancy +malignancy's +malignant +malignantly +maligned +maligning +malignity +malignity's +maligns +malinger +malingered +malingerer +malingerer's +malingerers +malingering +malingers +mall +mall's +mallard +mallard's +mallards +malleability +malleability's +malleable +mallet +mallet's +mallets +mallow +mallow's +mallows +malls +malnourished +malnutrition +malnutrition's +malodorous +malpractice +malpractice's +malpractices +malt +malt's +malted +malted's +malteds +malting +maltreat +maltreated +maltreating +maltreatment +maltreatment's +maltreats +malts +malware +malware's +mama +mama's +mamas +mambo +mambo's +mamboed +mamboing +mambos +mamma +mamma's +mammal +mammal's +mammalian +mammalian's +mammalians +mammals +mammary +mammas +mammogram +mammogram's +mammograms +mammography +mammography's +mammon +mammon's +mammoth +mammoth's +mammoths +man +man's +manacle +manacle's +manacled +manacles +manacling +manage +manageability +manageability's +manageable +managed +management +management's +manager +manager's +managerial +managers +manages +managing +manatee +manatee's +manatees +mandarin +mandarin's +mandarins +mandate +mandate's +mandated +mandates +mandating +mandatory +mandible +mandible's +mandibles +mandolin +mandolin's +mandolins +mandrake +mandrake's +mandrakes +mandrill +mandrill's +mandrills +mane +mane's +manes +maneuver +maneuver's +maneuverability +maneuverability's +maneuverable +maneuvered +maneuvering +maneuvers +manful +manfully +manga +manga's +manganese +manganese's +mange +mange's +manger +manger's +mangers +mangier +mangiest +mangle +mangle's +mangled +mangles +mangling +mango +mango's +mangoes +mangos +mangrove +mangrove's +mangroves +mangy +manhandle +manhandled +manhandles +manhandling +manhole +manhole's +manholes +manhood +manhood's +manhunt +manhunt's +manhunts +mania +mania's +maniac +maniac's +maniacal +maniacs +manias +manic +manic's +manics +manicure +manicure's +manicured +manicures +manicuring +manicurist +manicurist's +manicurists +manifest +manifest's +manifestation +manifestation's +manifestations +manifested +manifesting +manifestly +manifesto +manifesto's +manifestoes +manifestos +manifests +manifold +manifold's +manifolded +manifolding +manifolds +manikin +manikin's +manikins +manipulate +manipulated +manipulates +manipulating +manipulation +manipulation's +manipulations +manipulative +manipulator +manipulator's +manipulators +mankind +mankind's +manlier +manliest +manliness +manliness's +manly +manna +manna's +manned +mannequin +mannequin's +mannequins +manner +manner's +mannered +mannerism +mannerism's +mannerisms +mannerly +manners +mannikin +mannikin's +mannikins +manning +mannish +mannishly +mannishness +mannishness's +manor +manor's +manorial +manors +manpower +manpower's +manqué +mans +mansard +mansard's +mansards +manse +manse's +manservant +manservant's +manses +mansion +mansion's +mansions +manslaughter +manslaughter's +mantel +mantel's +mantelpiece +mantelpiece's +mantelpieces +mantels +mantes +mantilla +mantilla's +mantillas +mantis +mantis's +mantises +mantissa +mantle +mantle's +mantled +mantlepiece +mantlepieces +mantles +mantling +mantra +mantra's +mantras +manual +manual's +manually +manuals +manufacture +manufacture's +manufactured +manufacturer +manufacturer's +manufacturers +manufactures +manufacturing +manufacturing's +manumit +manumits +manumitted +manumitting +manure +manure's +manured +manures +manuring +manuscript +manuscript's +manuscripts +many +many's +manège +manège's +map +map's +maple +maple's +maples +mapped +mapper +mapping +mappings +maps +mar +marabou +marabou's +marabous +maraca +maraca's +maracas +marathon +marathon's +marathoner +marathoner's +marathoners +marathons +maraud +marauded +marauder +marauder's +marauders +marauding +marauds +marble +marble's +marbled +marbles +marbling +marbling's +march +march's +marched +marcher +marcher's +marchers +marches +marching +marchioness +marchioness's +marchionesses +mare +mare's +mares +margarine +margarine's +margarita +margarita's +margaritas +margin +margin's +marginal +marginalia +marginalia's +marginally +margins +maria +maria's +mariachi +mariachi's +mariachis +marigold +marigold's +marigolds +marihuana +marihuana's +marijuana +marijuana's +marimba +marimba's +marimbas +marina +marina's +marinade +marinade's +marinaded +marinades +marinading +marinas +marinate +marinated +marinates +marinating +marine +marine's +mariner +mariner's +mariners +marines +marionette +marionette's +marionettes +marital +maritime +marjoram +marjoram's +mark +mark's +markdown +markdown's +markdowns +marked +markedly +marker +marker's +markers +market +market's +marketability +marketability's +marketable +marketed +marketer +marketer's +marketers +marketing +marketing's +marketplace +marketplace's +marketplaces +markets +marking +marking's +markings +marks +marksman +marksman's +marksmanship +marksmanship's +marksmen +markup +markup's +markups +marlin +marlin's +marlins +marmalade +marmalade's +marmoset +marmoset's +marmosets +marmot +marmot's +marmots +maroon +maroon's +marooned +marooning +maroons +marquee +marquee's +marquees +marquess +marquess's +marquesses +marquetry +marquetry's +marquis +marquis's +marquise +marquise's +marquises +marred +marriage +marriage's +marriageable +marriages +married +married's +marrieds +marries +marring +marrow +marrow's +marrows +marry +marrying +mars +marsh +marsh's +marshal +marshal's +marshaled +marshaling +marshalled +marshalling +marshals +marshes +marshier +marshiest +marshmallow +marshmallow's +marshmallows +marshy +marsupial +marsupial's +marsupials +mart +mart's +marten +marten's +martens +martial +martin +martin's +martinet +martinet's +martinets +martini +martini's +martinis +martins +marts +martyr +martyr's +martyrdom +martyrdom's +martyred +martyring +martyrs +marvel +marvel's +marveled +marveling +marvelled +marvelling +marvellously +marvelous +marvelously +marvels +marzipan +marzipan's +mas +mascara +mascara's +mascaraed +mascaraing +mascaras +mascot +mascot's +mascots +masculine +masculine's +masculines +masculinity +masculinity's +mash +mash's +mashed +masher +masher's +mashers +mashes +mashing +mashup +mashup's +mashups +mask +mask's +masked +masking +masks +masochism +masochism's +masochist +masochist's +masochistic +masochists +mason +mason's +masonic +masonry +masonry's +masons +masque +masque's +masquerade +masquerade's +masqueraded +masquerader +masquerader's +masqueraders +masquerades +masquerading +masques +mass +mass's +massacre +massacre's +massacred +massacres +massacring +massage +massage's +massaged +massages +massaging +massed +masses +masseur +masseur's +masseurs +masseuse +masseuse's +masseuses +massing +massive +massively +massiveness +massiveness's +mast +mast's +mastectomies +mastectomy +mastectomy's +master +master's +mastered +masterful +masterfully +mastering +masterly +mastermind +mastermind's +masterminded +masterminding +masterminds +masterpiece +masterpiece's +masterpieces +masters +masterstroke +masterstroke's +masterstrokes +masterwork +masterwork's +masterworks +mastery +mastery's +masthead +masthead's +mastheads +masticate +masticated +masticates +masticating +mastication +mastication's +mastiff +mastiff's +mastiffs +mastodon +mastodon's +mastodons +mastoid +mastoid's +mastoids +masts +masturbate +masturbated +masturbates +masturbating +masturbation +masturbation's +mat +mat's +matador +matador's +matadors +match +match's +matchbook +matchbook's +matchbooks +matchbox +matchbox's +matchboxes +matched +matches +matching +matchless +matchmaker +matchmaker's +matchmakers +matchmaking +matchmaking's +matchstick +matchstick's +matchsticks +mate +mate's +mated +material +material's +materialism +materialism's +materialist +materialist's +materialistic +materialistically +materialists +materialization +materialization's +materialize +materialized +materializes +materializing +materially +materials +maternal +maternally +maternity +maternity's +mates +math +mathematical +mathematically +mathematician +mathematician's +mathematicians +mathematics +mathematics's +mating +matins +matins's +matinée +matinée's +matinées +matriarch +matriarch's +matriarchal +matriarchies +matriarchs +matriarchy +matriarchy's +matrices +matricide +matricide's +matricides +matriculate +matriculated +matriculates +matriculating +matriculation +matriculation's +matrimonial +matrimony +matrimony's +matrix +matrix's +matrixes +matron +matron's +matronly +matrons +mats +matt +matte +matte's +matted +matter +matter's +mattered +mattering +matters +mattes +matting +matting's +mattock +mattock's +mattocks +mattress +mattress's +mattresses +matts +maturation +maturation's +mature +matured +maturely +maturer +matures +maturest +maturing +maturities +maturity +maturity's +matzo +matzo's +matzoh +matzoh's +matzohs +matzos +matzot +matzoth +matériel +matériel's +maudlin +maul +maul's +mauled +mauling +mauls +maunder +maundered +maundering +maunders +mausolea +mausoleum +mausoleum's +mausoleums +mauve +mauve's +maven +maven's +mavens +maverick +maverick's +mavericks +mavin +mavin's +mavins +maw +maw's +mawkish +mawkishly +maws +maxed +maxes +maxilla +maxilla's +maxillae +maxillary +maxillas +maxim +maxim's +maxima +maximal +maximally +maximization +maximization's +maximize +maximized +maximizes +maximizing +maxims +maximum +maximum's +maximums +maxing +may +may's +maybe +maybe's +maybes +mayday +mayday's +maydays +mayflies +mayflower +mayflower's +mayflowers +mayfly +mayfly's +mayhem +mayhem's +mayo +mayo's +mayonnaise +mayonnaise's +mayor +mayor's +mayoral +mayoralty +mayoralty's +mayors +maypole +maypole's +maypoles +maze +maze's +mazes +mazourka +mazourka's +mazourkas +mazurka +mazurka's +mazurkas +me +mead +mead's +meadow +meadow's +meadowlark +meadowlark's +meadowlarks +meadows +meager +meagerly +meagerness +meagerness's +meal +meal's +mealier +mealiest +meals +mealtime +mealtime's +mealtimes +mealy +mean +mean's +meander +meander's +meandered +meandering +meanders +meaner +meanest +meaning +meaning's +meaningful +meaningfully +meaningless +meanings +meanly +meanness +meanness's +means +meant +meantime +meantime's +meanwhile +meanwhile's +measles +measles's +measlier +measliest +measly +measurable +measurably +measure +measure's +measured +measureless +measurement +measurement's +measurements +measures +measuring +meat +meat's +meatball +meatball's +meatballs +meatier +meatiest +meatloaf +meatloaf's +meatloaves +meats +meaty +mecca +mecca's +meccas +mechanic +mechanic's +mechanical +mechanically +mechanics +mechanics's +mechanism +mechanism's +mechanisms +mechanistic +mechanization +mechanization's +mechanize +mechanized +mechanizes +mechanizing +medal +medal's +medalist +medalist's +medalists +medallion +medallion's +medallions +medals +meddle +meddled +meddler +meddler's +meddlers +meddles +meddlesome +meddling +media +media's +mediaeval +medial +median +median's +medians +medias +mediate +mediated +mediates +mediating +mediation +mediation's +mediator +mediator's +mediators +medic +medic's +medical +medical's +medically +medicals +medicate +medicated +medicates +medicating +medication +medication's +medications +medicinal +medicinally +medicine +medicine's +medicines +medics +medieval +mediocre +mediocrities +mediocrity +mediocrity's +meditate +meditated +meditates +meditating +meditation +meditation's +meditations +meditative +meditatively +medium +medium's +mediums +medley +medley's +medleys +medulla +medulla's +medullae +medullas +meek +meeker +meekest +meekly +meekness +meekness's +meet +meet's +meeting +meeting's +meetinghouse +meetinghouse's +meetinghouses +meetings +meets +meg +megabyte +megabyte's +megabytes +megachurch +megachurch's +megachurches +megacycle +megacycle's +megacycles +megahertz +megahertz's +megahertzes +megalith +megalith's +megaliths +megalomania +megalomania's +megalomaniac +megalomaniac's +megalomaniacs +megalopolis +megalopolis's +megalopolises +megaphone +megaphone's +megaphoned +megaphones +megaphoning +megapixel +megapixel's +megapixels +megaton +megaton's +megatons +megs +meh +melancholia +melancholia's +melancholic +melancholics +melancholy +melancholy's +melange +melange's +melanges +melanin +melanin's +melanoma +melanoma's +melanomas +melanomata +meld +meld's +melded +melding +melds +mellifluous +mellifluously +mellow +mellowed +mellower +mellowest +mellowing +mellowness +mellowness's +mellows +melodic +melodically +melodies +melodious +melodiously +melodiousness +melodiousness's +melodrama +melodrama's +melodramas +melodramatic +melodramatically +melody +melody's +melon +melon's +melons +melt +melt's +meltdown +meltdown's +meltdowns +melted +melting +melts +member +member's +members +membership +membership's +memberships +membrane +membrane's +membranes +membranous +meme +meme's +memento +memento's +mementoes +mementos +memes +memo +memo's +memoir +memoir's +memoirs +memorabilia +memorabilia's +memorable +memorably +memoranda +memorandum +memorandum's +memorandums +memorial +memorial's +memorialize +memorialized +memorializes +memorializing +memorials +memories +memorization +memorization's +memorize +memorized +memorizes +memorizing +memory +memory's +memos +men +men's +menace +menace's +menaced +menaces +menacing +menacingly +menage +menage's +menagerie +menagerie's +menageries +menages +mend +mend's +mendacious +mendacity +mendacity's +mended +mender +mender's +menders +mendicant +mendicant's +mendicants +mending +mends +menfolk +menfolk's +menhaden +menhaden's +menhadens +menial +menial's +menially +menials +meningitis +meningitis's +menopausal +menopause +menopause's +menorah +menorah's +menorahs +menservants +menses +menses's +menstrual +menstruate +menstruated +menstruates +menstruating +menstruation +menstruation's +menswear +menswear's +mental +mentalities +mentality +mentality's +mentally +menthol +menthol's +mentholated +mention +mention's +mentioned +mentioning +mentions +mentor +mentor's +mentored +mentoring +mentors +menu +menu's +menus +meow +meow's +meowed +meowing +meows +mercantile +mercenaries +mercenary +mercenary's +mercerize +mercerized +mercerizes +mercerizing +merchandise +merchandise's +merchandised +merchandises +merchandising +merchandize +merchandized +merchandizes +merchandizing +merchant +merchant's +merchantman +merchantman's +merchantmen +merchants +mercies +merciful +mercifully +merciless +mercilessly +mercurial +mercuric +mercury +mercury's +mercy +mercy's +mere +mere's +merely +meres +merest +meretricious +merganser +merganser's +mergansers +merge +merged +merger +merger's +mergers +merges +merging +meridian +meridian's +meridians +meringue +meringue's +meringues +merino +merino's +merinos +merit +merit's +merited +meriting +meritocracies +meritocracy +meritocracy's +meritorious +meritoriously +merits +mermaid +mermaid's +mermaids +merman +merman's +mermen +merrier +merriest +merrily +merriment +merriment's +merriness +merriness's +merry +merrymaker +merrymaker's +merrymakers +merrymaking +merrymaking's +mes +mesa +mesa's +mesas +mescal +mescal's +mescaline +mescaline's +mescals +mesdames +mesdemoiselles +mesh +mesh's +meshed +meshes +meshing +mesmerism +mesmerism's +mesmerize +mesmerized +mesmerizes +mesmerizing +mesquite +mesquite's +mesquites +mess +mess's +message +message's +messages +messed +messenger +messenger's +messengers +messes +messiah +messiah's +messiahs +messier +messiest +messieurs +messily +messiness +messiness's +messing +messy +mestizo +mestizo's +mestizoes +mestizos +met +metabolic +metabolism +metabolism's +metabolisms +metabolize +metabolized +metabolizes +metabolizing +metacarpal +metacarpal's +metacarpals +metacarpi +metacarpus +metacarpus's +metal +metal's +metallic +metallurgical +metallurgist +metallurgist's +metallurgists +metallurgy +metallurgy's +metals +metamorphic +metamorphism +metamorphism's +metamorphose +metamorphosed +metamorphoses +metamorphosing +metamorphosis +metamorphosis's +metaphor +metaphor's +metaphorical +metaphorically +metaphors +metaphysical +metaphysics +metaphysics's +metastases +metastasis +metastasis's +metastasize +metastasized +metastasizes +metastasizing +metatarsal +metatarsal's +metatarsals +mete +mete's +meted +meteor +meteor's +meteoric +meteorite +meteorite's +meteorites +meteoroid +meteoroid's +meteoroids +meteorological +meteorologist +meteorologist's +meteorologists +meteorology +meteorology's +meteors +meter +meter's +metered +metering +meters +metes +methadon +methadon's +methadone +methadone's +methane +methane's +methanol +methanol's +methinks +method +method's +methodical +methodically +methodological +methodologies +methodology +methodology's +methods +methought +meticulous +meticulously +meticulousness +meticulousness's +meting +metric +metrical +metrically +metrication +metrication's +metrics +metro +metro's +metronome +metronome's +metronomes +metropolis +metropolis's +metropolises +metropolitan +metros +mettle +mettle's +mettlesome +mew +mew's +mewed +mewing +mewl +mewled +mewling +mewls +mews +mews's +mezzanine +mezzanine's +mezzanines +mi +mi's +miaow +miaow's +miaowed +miaowing +miaows +miasma +miasma's +miasmas +miasmata +mica +mica's +mice +micra +microaggression +microaggression's +microaggressions +microbe +microbe's +microbes +microbiologist +microbiologist's +microbiologists +microbiology +microbiology's +microchip +microchip's +microchips +microcode +microcomputer +microcomputer's +microcomputers +microcosm +microcosm's +microcosms +microeconomics +microeconomics's +microfiche +microfiche's +microfiches +microfilm +microfilm's +microfilmed +microfilming +microfilms +microloan +microloan's +microloans +micrometer +micrometer's +micrometers +micron +micron's +microns +microorganism +microorganism's +microorganisms +microphone +microphone's +microphones +microprocessor +microprocessor's +microprocessors +microscope +microscope's +microscopes +microscopic +microscopically +microscopy +microscopy's +microsecond +microsecond's +microseconds +microsurgery +microsurgery's +microwave +microwave's +microwaved +microwaves +microwaving +mid +midair +midair's +midday +midday's +middies +middle +middle's +middlebrow +middlebrow's +middlebrows +middleman +middleman's +middlemen +middles +middleweight +middleweight's +middleweights +middling +middy +middy's +midge +midge's +midges +midget +midget's +midgets +midland +midland's +midlands +midmost +midnight +midnight's +midpoint +midpoint's +midpoints +midriff +midriff's +midriffs +midshipman +midshipman's +midshipmen +midst +midst's +midstream +midstream's +midsummer +midsummer's +midterm +midterm's +midterms +midtown +midtown's +midway +midway's +midways +midweek +midweek's +midweeks +midwife +midwife's +midwifed +midwiferies +midwifery +midwifery's +midwifes +midwifing +midwinter +midwinter's +midwived +midwives +midwiving +midyear +midyear's +midyears +mien +mien's +miens +miff +miffed +miffing +miffs +might +might's +mightier +mightiest +mightily +mightiness +mightiness's +mighty +migraine +migraine's +migraines +migrant +migrant's +migrants +migrate +migrated +migrates +migrating +migration +migration's +migrations +migratory +mike +mike's +miked +mikes +miking +mil +mil's +milch +mild +mild's +milder +mildest +mildew +mildew's +mildewed +mildewing +mildews +mildly +mildness +mildness's +mile +mile's +mileage +mileage's +mileages +milepost +milepost's +mileposts +miler +miler's +milers +miles +milestone +milestone's +milestones +milf +milf's +milfs +milieu +milieu's +milieus +milieux +militancy +militancy's +militant +militant's +militantly +militants +militaries +militarily +militarism +militarism's +militarist +militarist's +militaristic +militarists +militarization +militarization's +militarize +militarized +militarizes +militarizing +military +military's +militate +militated +militates +militating +militia +militia's +militiaman +militiaman's +militiamen +militias +milk +milk's +milked +milker +milkier +milkiest +milkiness +milkiness's +milking +milkmaid +milkmaid's +milkmaids +milkman +milkman's +milkmen +milks +milkshake +milkshake's +milkshakes +milksop +milksop's +milksops +milkweed +milkweed's +milkweeds +milky +mill +mill's +millage +millage's +milled +millennia +millennial +millennial's +millennium +millennium's +millenniums +millepede +millepede's +millepedes +miller +miller's +millers +millet +millet's +milligram +milligram's +milligrams +milliliter +milliliter's +milliliters +millimeter +millimeter's +millimeters +milliner +milliner's +milliners +millinery +millinery's +milling +million +million's +millionaire +millionaire's +millionaires +millions +millionth +millionth's +millionths +millipede +millipede's +millipedes +millisecond +millisecond's +milliseconds +millrace +millrace's +millraces +mills +millstone +millstone's +millstones +milquetoast +milquetoast's +milquetoasts +mils +mime +mime's +mimed +mimeograph +mimeograph's +mimeographed +mimeographing +mimeographs +mimes +mimetic +mimic +mimic's +mimicked +mimicking +mimicries +mimicry +mimicry's +mimics +miming +mimosa +mimosa's +mimosas +minaret +minaret's +minarets +minatory +mince +mince's +minced +mincemeat +mincemeat's +minces +mincing +mind +mind's +mindbogglingly +minded +mindedness +mindful +mindfully +mindfulness +mindfulness's +minding +mindless +mindlessly +mindlessness +mindlessness's +minds +mine +mine's +mined +minefield +minefield's +minefields +miner +miner's +mineral +mineral's +mineralogist +mineralogist's +mineralogists +mineralogy +mineralogy's +minerals +miners +mines +minestrone +minestrone's +minesweeper +minesweeper's +minesweepers +mingle +mingled +mingles +mingling +mini +mini's +miniature +miniature's +miniatures +miniaturist +miniaturist's +miniaturists +miniaturization +miniaturization's +miniaturize +miniaturized +miniaturizes +miniaturizing +minibike +minibike's +minibikes +minibus +minibus's +minibuses +minibusses +minicam +minicam's +minicams +minicomputer +minicomputer's +minicomputers +minim +minim's +minima +minimal +minimalism +minimalism's +minimalist +minimalist's +minimalists +minimally +minimization +minimize +minimized +minimizes +minimizing +minims +minimum +minimum's +minimums +mining +mining's +minion +minion's +minions +minis +miniscule +miniscule's +miniscules +miniseries +miniseries's +miniskirt +miniskirt's +miniskirts +minister +minister's +ministered +ministerial +ministering +ministers +ministrant +ministrant's +ministrants +ministration +ministration's +ministrations +ministries +ministry +ministry's +minivan +minivan's +minivans +mink +mink's +minks +minnow +minnow's +minnows +minor +minor's +minored +minoring +minorities +minority +minority's +minors +minster +minstrel +minstrel's +minstrels +mint +mint's +minted +mintier +mintiest +minting +mints +minty +minuend +minuend's +minuends +minuet +minuet's +minuets +minus +minus's +minuscule +minuscule's +minuscules +minuses +minute +minute's +minuted +minutely +minuteman +minuteman's +minutemen +minuteness +minuteness's +minuter +minutes +minutest +minutia +minutia's +minutiae +minuting +minx +minx's +minxes +miracle +miracle's +miracles +miraculous +miraculously +mirage +mirage's +mirages +mire +mire's +mired +mires +miring +mirror +mirror's +mirrored +mirroring +mirrors +mirth +mirth's +mirthful +mirthfully +mirthless +misadventure +misadventure's +misadventures +misalignment +misalliance +misalliance's +misalliances +misanthrope +misanthrope's +misanthropes +misanthropic +misanthropist +misanthropist's +misanthropists +misanthropy +misanthropy's +misapplication +misapplication's +misapplied +misapplies +misapply +misapplying +misapprehend +misapprehended +misapprehending +misapprehends +misapprehension +misapprehension's +misapprehensions +misappropriate +misappropriated +misappropriates +misappropriating +misappropriation +misappropriation's +misappropriations +misbegotten +misbehave +misbehaved +misbehaves +misbehaving +misbehavior +misbehavior's +miscalculate +miscalculated +miscalculates +miscalculating +miscalculation +miscalculation's +miscalculations +miscall +miscalled +miscalling +miscalls +miscarriage +miscarriage's +miscarriages +miscarried +miscarries +miscarry +miscarrying +miscast +miscasting +miscasts +miscegenation +miscegenation's +miscellaneous +miscellanies +miscellany +miscellany's +mischance +mischance's +mischances +mischief +mischief's +mischievous +mischievously +mischievousness +mischievousness's +miscommunication +misconceive +misconceived +misconceives +misconceiving +misconception +misconception's +misconceptions +misconduct +misconduct's +misconducted +misconducting +misconducts +misconstruction +misconstruction's +misconstructions +misconstrue +misconstrued +misconstrues +misconstruing +miscount +miscount's +miscounted +miscounting +miscounts +miscreant +miscreant's +miscreants +miscue +miscue's +miscued +miscues +miscuing +misdeal +misdeal's +misdealing +misdeals +misdealt +misdeed +misdeed's +misdeeds +misdemeanor +misdemeanor's +misdemeanors +misdiagnose +misdiagnosed +misdiagnoses +misdiagnosing +misdiagnosis +misdiagnosis's +misdid +misdirect +misdirected +misdirecting +misdirection +misdirection's +misdirects +misdo +misdoes +misdoing +misdoing's +misdoings +misdone +miser +miser's +miserable +miserably +miseries +miserliness +miserliness's +miserly +misers +misery +misery's +misfeasance +misfeasance's +misfire +misfire's +misfired +misfires +misfiring +misfit +misfit's +misfits +misfitted +misfitting +misfortune +misfortune's +misfortunes +misgiving +misgiving's +misgivings +misgovern +misgoverned +misgoverning +misgoverns +misguide +misguided +misguidedly +misguides +misguiding +mishandle +mishandled +mishandles +mishandling +mishap +mishap's +mishaps +mishmash +mishmash's +mishmashes +misidentified +misidentifies +misidentify +misidentifying +misinform +misinformation +misinformation's +misinformed +misinforming +misinforms +misinterpret +misinterpretation +misinterpretation's +misinterpretations +misinterpreted +misinterpreting +misinterprets +misjudge +misjudged +misjudgement +misjudgement's +misjudgements +misjudges +misjudging +misjudgment +misjudgment's +misjudgments +mislaid +mislay +mislaying +mislays +mislead +misleading +misleads +misled +mismanage +mismanaged +mismanagement +mismanagement's +mismanages +mismanaging +mismatch +mismatch's +mismatched +mismatches +mismatching +misnomer +misnomer's +misnomers +misogynist +misogynist's +misogynistic +misogynists +misogyny +misogyny's +misplace +misplaced +misplaces +misplacing +misplay +misplay's +misplayed +misplaying +misplays +misprint +misprint's +misprinted +misprinting +misprints +mispronounce +mispronounced +mispronounces +mispronouncing +mispronunciation +mispronunciation's +mispronunciations +misquotation +misquotation's +misquotations +misquote +misquote's +misquoted +misquotes +misquoting +misread +misreading +misreading's +misreadings +misreads +misrepresent +misrepresentation +misrepresentation's +misrepresentations +misrepresented +misrepresenting +misrepresents +misrule +misrule's +misruled +misrules +misruling +miss +miss's +missal +missal's +missals +missed +misses +misshapen +missile +missile's +missilery +missilery's +missiles +missing +mission +mission's +missionaries +missionary +missionary's +missions +missive +missive's +missives +misspell +misspelled +misspelling +misspelling's +misspellings +misspells +misspelt +misspend +misspending +misspends +misspent +misstate +misstated +misstatement +misstatement's +misstatements +misstates +misstating +misstep +misstep's +missteps +mist +mist's +mistake +mistake's +mistaken +mistakenly +mistakes +mistaking +misted +mister +mister's +misters +mistier +mistiest +mistily +mistime +mistimed +mistimes +mistiming +mistiness +mistiness's +misting +mistletoe +mistletoe's +mistook +mistranslated +mistreat +mistreated +mistreating +mistreatment +mistreatment's +mistreats +mistress +mistress's +mistresses +mistrial +mistrial's +mistrials +mistrust +mistrust's +mistrusted +mistrustful +mistrusting +mistrusts +mists +misty +mistype +mistypes +mistyping +misunderstand +misunderstanding +misunderstanding's +misunderstandings +misunderstands +misunderstood +misuse +misuse's +misused +misuses +misusing +mite +mite's +miter +miter's +mitered +mitering +miters +mites +mitigate +mitigated +mitigates +mitigating +mitigation +mitigation's +mitosis +mitosis's +mitt +mitt's +mitten +mitten's +mittens +mitts +mix +mix's +mixed +mixer +mixer's +mixers +mixes +mixing +mixture +mixture's +mixtures +mizzen +mizzen's +mizzenmast +mizzenmast's +mizzenmasts +mizzens +mkay +mnemonic +mnemonic's +mnemonics +moan +moan's +moaned +moaning +moans +moat +moat's +moats +mob +mob's +mobbed +mobbing +mobile +mobile's +mobiles +mobility +mobility's +mobilization +mobilization's +mobilizations +mobilize +mobilized +mobilizes +mobilizing +mobs +mobster +mobster's +mobsters +moccasin +moccasin's +moccasins +mocha +mocha's +mochas +mock +mocked +mocker +mocker's +mockeries +mockers +mockery +mockery's +mocking +mockingbird +mockingbird's +mockingbirds +mockingly +mocks +mod +mod's +modal +modal's +modals +mode +mode's +model +model's +modeled +modeling +modeling's +modelings +modelled +modelling +models +modem +modem's +modems +moderate +moderate's +moderated +moderately +moderates +moderating +moderation +moderation's +moderator +moderator's +moderators +modern +modern's +modernism +modernism's +modernist +modernist's +modernistic +modernists +modernity +modernity's +modernization +modernization's +modernize +modernized +modernizes +modernizing +moderns +modes +modest +modestly +modesty +modesty's +modicum +modicum's +modicums +modifiable +modification +modification's +modifications +modified +modifier +modifier's +modifiers +modifies +modify +modifying +modish +modishly +modishness +modishness's +mods +modular +modulate +modulated +modulates +modulating +modulation +modulation's +modulations +modulator +modulator's +modulators +module +module's +modules +modulus +mogul +mogul's +moguls +mohair +mohair's +moieties +moiety +moiety's +moire +moire's +moires +moist +moisten +moistened +moistening +moistens +moister +moistest +moistly +moistness +moistness's +moisture +moisture's +moisturize +moisturized +moisturizer +moisturizer's +moisturizers +moisturizes +moisturizing +molar +molar's +molars +molasses +molasses's +mold +mold's +molded +molder +molder's +moldered +moldering +molders +moldier +moldiest +moldiness +moldiness's +molding +molding's +moldings +molds +moldy +mole +mole's +molecular +molecule +molecule's +molecules +molehill +molehill's +molehills +moles +moleskin +moleskin's +molest +molestation +molestation's +molested +molester +molester's +molesters +molesting +molests +moll +moll's +mollification +mollification's +mollified +mollifies +mollify +mollifying +molls +mollusc +mollusc's +molluscs +mollusk +mollusk's +mollusks +mollycoddle +mollycoddle's +mollycoddled +mollycoddles +mollycoddling +molt +molt's +molted +molten +molting +molts +molybdenum +molybdenum's +mom +mom's +moment +moment's +momentarily +momentary +momentous +momentousness +momentousness's +moments +momentum +momentum's +momma +momma's +mommas +mommies +mommy +mommy's +moms +monarch +monarch's +monarchic +monarchical +monarchies +monarchism +monarchism's +monarchist +monarchist's +monarchists +monarchs +monarchy +monarchy's +monasteries +monastery +monastery's +monastic +monastic's +monasticism +monasticism's +monastics +monaural +monetarily +monetarism +monetary +monetize +monetized +monetizes +monetizing +money +money's +moneybag +moneybag's +moneybags +moneyed +moneymaker +moneymaker's +moneymakers +moneymaking +moneymaking's +mongeese +monger +monger's +mongered +mongering +mongers +mongolism +mongolism's +mongoose +mongoose's +mongooses +mongrel +mongrel's +mongrels +monicker +monicker's +monickers +monied +monies +moniker +moniker's +monikers +monitor +monitor's +monitored +monitoring +monitors +monk +monk's +monkey +monkey's +monkeyed +monkeying +monkeys +monkeyshine +monkeyshine's +monkeyshines +monks +mono +mono's +monochromatic +monochrome +monochrome's +monochromes +monocle +monocle's +monocles +monocotyledon +monocotyledon's +monocotyledons +monogamous +monogamy +monogamy's +monogram +monogram's +monogrammed +monogramming +monograms +monograph +monograph's +monographs +monolingual +monolingual's +monolinguals +monolith +monolith's +monolithic +monoliths +monolog +monolog's +monologs +monologue +monologue's +monologues +monomania +monomania's +monomaniac +monomaniac's +monomaniacs +mononucleosis +mononucleosis's +monophonic +monopolies +monopolist +monopolist's +monopolistic +monopolists +monopolization +monopolization's +monopolize +monopolized +monopolizes +monopolizing +monopoly +monopoly's +monorail +monorail's +monorails +monosyllabic +monosyllable +monosyllable's +monosyllables +monotheism +monotheism's +monotheist +monotheist's +monotheistic +monotheists +monotone +monotone's +monotones +monotonic +monotonically +monotonous +monotonously +monotony +monotony's +monoxide +monoxide's +monoxides +monsieur +monsieur's +monsignor +monsignor's +monsignori +monsignors +monsoon +monsoon's +monsoons +monster +monster's +monsters +monstrance +monstrance's +monstrances +monstrosities +monstrosity +monstrosity's +monstrous +monstrously +montage +montage's +montages +month +month's +monthlies +monthly +monthly's +months +monument +monument's +monumental +monumentally +monuments +moo +moo's +mooch +mooch's +mooched +moocher +moocher's +moochers +mooches +mooching +mood +mood's +moodier +moodiest +moodily +moodiness +moodiness's +moods +moody +mooed +mooing +moon +moon's +moonbeam +moonbeam's +moonbeams +mooned +mooning +moonlight +moonlight's +moonlighted +moonlighter +moonlighter's +moonlighters +moonlighting +moonlighting's +moonlights +moonlit +moons +moonscape +moonscape's +moonscapes +moonshine +moonshine's +moonshines +moonshot +moonshot's +moonshots +moonstone +moonstone's +moonstones +moonstruck +moor +moor's +moored +mooring +mooring's +moorings +moorland +moors +moos +moose +moose's +moot +mooted +mooting +moots +mop +mop's +mope +mope's +moped +moped's +mopeds +mopes +moping +mopped +moppet +moppet's +moppets +mopping +mops +moraine +moraine's +moraines +moral +moral's +morale +morale's +moralist +moralist's +moralistic +moralists +moralities +morality +morality's +moralize +moralized +moralizes +moralizing +morally +morals +morass +morass's +morasses +moratoria +moratorium +moratorium's +moratoriums +moray +moray's +morays +morbid +morbidity +morbidity's +morbidly +mordant +mordant's +mordants +more +more's +moreover +mores +mores's +morgue +morgue's +morgues +moribund +morn +morn's +morning +morning's +mornings +morns +morocco +morocco's +moron +moron's +moronic +morons +morose +morosely +moroseness +moroseness's +morpheme +morpheme's +morphemes +morphine +morphine's +morphological +morphology +morphology's +morrow +morrow's +morrows +morsel +morsel's +morsels +mortal +mortal's +mortality +mortality's +mortally +mortals +mortar +mortar's +mortarboard +mortarboard's +mortarboards +mortared +mortaring +mortars +mortgage +mortgage's +mortgaged +mortgagee +mortgagee's +mortgagees +mortgager +mortgager's +mortgagers +mortgages +mortgaging +mortgagor +mortgagor's +mortgagors +mortice +mortice's +morticed +mortices +mortician +mortician's +morticians +morticing +mortification +mortification's +mortified +mortifies +mortify +mortifying +mortise +mortise's +mortised +mortises +mortising +mortuaries +mortuary +mortuary's +mosaic +mosaic's +mosaics +mosey +moseyed +moseying +moseys +mosque +mosque's +mosques +mosquito +mosquito's +mosquitoes +mosquitos +moss +moss's +mosses +mossier +mossiest +mossy +most +most's +mostly +mote +mote's +motel +motel's +motels +motes +moth +moth's +mothball +mothball's +mothballed +mothballing +mothballs +mother +mother's +motherboard +motherboard's +motherboards +mothered +motherfucker +motherfucker's +motherfuckers +motherfucking +motherhood +motherhood's +mothering +motherland +motherland's +motherlands +motherless +motherliness +motherliness's +motherly +mothers +moths +motif +motif's +motifs +motile +motiles +motility +motility's +motion +motion's +motioned +motioning +motionless +motions +motivate +motivated +motivates +motivating +motivation +motivation's +motivational +motivations +motivator +motivator's +motivators +motive +motive's +motives +motley +motley's +motleys +motlier +motliest +motocross +motocross's +motocrosses +motor +motor's +motorbike +motorbike's +motorbiked +motorbikes +motorbiking +motorboat +motorboat's +motorboats +motorcade +motorcade's +motorcades +motorcar +motorcar's +motorcars +motorcycle +motorcycle's +motorcycled +motorcycles +motorcycling +motorcyclist +motorcyclist's +motorcyclists +motored +motoring +motorist +motorist's +motorists +motorize +motorized +motorizes +motorizing +motorman +motorman's +motormen +motormouth +motormouth's +motormouths +motors +motorway +motorway's +motorways +mottle +mottled +mottles +mottling +motto +motto's +mottoes +mottos +mound +mound's +mounded +mounding +mounds +mount +mount's +mountain +mountain's +mountaineer +mountaineer's +mountaineered +mountaineering +mountaineering's +mountaineers +mountainous +mountains +mountainside +mountainside's +mountainsides +mountaintop +mountaintop's +mountaintops +mountebank +mountebank's +mountebanks +mounted +mounting +mounting's +mountings +mounts +mourn +mourned +mourner +mourner's +mourners +mournful +mournfully +mournfulness +mournfulness's +mourning +mourning's +mourns +mouse +mouse's +moused +mouser +mouser's +mousers +mouses +mousetrap +mousetrap's +mousetrapped +mousetrapping +mousetraps +mousey +mousier +mousiest +mousiness +mousiness's +mousing +mousse +mousse's +moussed +mousses +moussing +moustache +moustache's +moustaches +mousy +mouth +mouth's +mouthed +mouthful +mouthful's +mouthfuls +mouthing +mouthpiece +mouthpiece's +mouthpieces +mouths +mouthwash +mouthwash's +mouthwashes +mouthwatering +movable +movable's +movables +move +move's +moveable +moveable's +moveables +moved +movement +movement's +movements +mover +mover's +movers +moves +movie +movie's +movies +moving +movingly +mow +mow's +mowed +mower +mower's +mowers +mowing +mown +mows +mozzarella +mozzarella's +ms +mu +much +much's +mucilage +mucilage's +muck +muck's +mucked +muckier +muckiest +mucking +muckrake +muckraked +muckraker +muckraker's +muckrakers +muckrakes +muckraking +mucks +mucky +mucous +mucus +mucus's +mud +mud's +muddied +muddier +muddies +muddiest +muddiness +muddiness's +muddle +muddle's +muddled +muddles +muddling +muddy +muddying +mudguard +mudguard's +mudguards +mudslide +mudslide's +mudslides +mudslinger +mudslinger's +mudslingers +mudslinging +mudslinging's +muesli +muezzin +muezzin's +muezzins +muff +muff's +muffed +muffin +muffin's +muffing +muffins +muffle +muffled +muffler +muffler's +mufflers +muffles +muffling +muffs +mufti +mufti's +muftis +mug +mug's +mugged +mugger +mugger's +muggers +muggier +muggiest +mugginess +mugginess's +mugging +mugging's +muggings +muggle +muggle's +muggles +muggy +mugs +mukluk +mukluk's +mukluks +mulatto +mulatto's +mulattoes +mulattos +mulberries +mulberry +mulberry's +mulch +mulch's +mulched +mulches +mulching +mule +mule's +mules +muleteer +muleteer's +muleteers +mulish +mulishly +mulishness +mulishness's +mull +mullah +mullah's +mullahs +mulled +mullet +mullet's +mullets +mulligatawny +mulligatawny's +mulling +mullion +mullion's +mullions +mulls +multi +multicolored +multicultural +multiculturalism +multiculturalism's +multidimensional +multifaceted +multifarious +multifariousness +multifariousness's +multilateral +multilingual +multimedia +multimedia's +multimillionaire +multimillionaire's +multimillionaires +multinational +multinational's +multinationals +multiplayer +multiplayer's +multiple +multiple's +multiples +multiplex +multiplex's +multiplexed +multiplexer +multiplexer's +multiplexers +multiplexes +multiplexing +multiplexor +multiplexor's +multiplexors +multiplicand +multiplicand's +multiplicands +multiplication +multiplication's +multiplications +multiplicative +multiplicities +multiplicity +multiplicity's +multiplied +multiplier +multiplier's +multipliers +multiplies +multiply +multiplying +multiprocessing +multipurpose +multiracial +multitasking +multitude +multitude's +multitudes +multitudinous +multivariate +multiverse +multiverse's +multiverses +multivitamin +multivitamin's +multivitamins +mum +mumble +mumble's +mumbled +mumbler +mumbler's +mumblers +mumbles +mumbling +mummer +mummer's +mummers +mummery +mummery's +mummies +mummification +mummification's +mummified +mummifies +mummify +mummifying +mummy +mummy's +mumps +mumps's +munch +munched +munches +munchies +munchies's +munching +mundane +mundanely +municipal +municipal's +municipalities +municipality +municipality's +municipally +municipals +munificence +munificence's +munificent +munition +munition's +munitions +mural +mural's +muralist +muralist's +muralists +murals +murder +murder's +murdered +murderer +murderer's +murderers +murderess +murderess's +murderesses +murdering +murderous +murderously +murders +murk +murk's +murkier +murkiest +murkily +murkiness +murkiness's +murks +murky +murmur +murmur's +murmured +murmuring +murmurs +muscat +muscatel +muscatel's +muscatels +muscle +muscle's +muscled +muscles +muscling +muscular +muscularity +muscularity's +musculature +musculature's +muse +muse's +mused +muses +museum +museum's +museums +mush +mush's +mushed +mushes +mushier +mushiest +mushiness +mushiness's +mushing +mushroom +mushroom's +mushroomed +mushrooming +mushrooms +mushy +music +music's +musical +musical's +musicale +musicale's +musicales +musically +musicals +musician +musician's +musicians +musicianship +musicianship's +musicologist +musicologist's +musicologists +musicology +musicology's +musing +musing's +musings +musk +musk's +muskellunge +muskellunge's +muskellunges +musket +musket's +musketeer +musketeer's +musketeers +musketry +musketry's +muskets +muskier +muskiest +muskiness +muskiness's +muskmelon +muskmelon's +muskmelons +muskrat +muskrat's +muskrats +musky +muslin +muslin's +muss +muss's +mussed +mussel +mussel's +mussels +musses +mussier +mussiest +mussing +mussy +must +must's +mustache +mustache's +mustaches +mustang +mustang's +mustangs +mustard +mustard's +muster +muster's +mustered +mustering +musters +mustier +mustiest +mustiness +mustiness's +mustn't +musts +musty +mutability +mutability's +mutable +mutant +mutant's +mutants +mutate +mutated +mutates +mutating +mutation +mutation's +mutations +mute +mute's +muted +mutely +muteness +muteness's +muter +mutes +mutest +mutilate +mutilated +mutilates +mutilating +mutilation +mutilation's +mutilations +mutineer +mutineer's +mutineers +muting +mutinied +mutinies +mutinous +mutinously +mutiny +mutiny's +mutinying +mutt +mutt's +mutter +mutter's +muttered +muttering +mutters +mutton +mutton's +mutts +mutual +mutuality +mutuality's +mutually +muumuu +muumuu's +muumuus +muzzle +muzzle's +muzzled +muzzles +muzzling +my +myna +myna's +mynah +mynah's +mynahes +mynahs +mynas +myopia +myopia's +myopic +myriad +myriad's +myriads +myrrh +myrrh's +myrtle +myrtle's +myrtles +myself +mysteries +mysterious +mysteriously +mysteriousness +mysteriousness's +mystery +mystery's +mystic +mystic's +mystical +mystically +mysticism +mysticism's +mystics +mystification +mystification's +mystified +mystifies +mystify +mystifying +mystique +mystique's +myth +myth's +mythic +mythical +mythological +mythologies +mythologist +mythologist's +mythologists +mythology +mythology's +myths +métier +métier's +métiers +mêlée +mêlée's +mêlées +n +nab +nabbed +nabbing +nabob +nabob's +nabobs +nabs +nacho +nacho's +nachos +nacre +nacre's +nadir +nadir's +nadirs +nag +nag's +nagged +nagging +nags +naiad +naiad's +naiades +naiads +nail +nail's +nailbrush +nailbrush's +nailbrushes +nailed +nailing +nails +naive +naively +naiver +naivest +naivety +naiveté +naiveté's +naked +nakedly +nakedness +nakedness's +name +name's +named +nameless +namely +names +namesake +namesake's +namesakes +naming +nannies +nanny +nanny's +nanosecond +nanosecond's +nanoseconds +nanotechnology +nanotechnology's +nap +nap's +napalm +napalm's +napalmed +napalming +napalms +nape +nape's +napes +naphtha +naphtha's +naphthalene +naphthalene's +napkin +napkin's +napkins +napped +nappier +nappies +nappiest +napping +nappy +nappy's +naps +narc +narc's +narcissi +narcissism +narcissism's +narcissist +narcissist's +narcissistic +narcissists +narcissus +narcissus's +narcissuses +narcosis +narcosis's +narcotic +narcotic's +narcotics +narcs +nark +nark's +narked +narking +narks +narrate +narrated +narrates +narrating +narration +narration's +narrations +narrative +narrative's +narratives +narrator +narrator's +narrators +narrow +narrow's +narrowed +narrower +narrowest +narrowing +narrowly +narrowness +narrowness's +narrows +narwhal +narwhal's +narwhals +nary +nasal +nasal's +nasalize +nasalized +nasalizes +nasalizing +nasally +nasals +nascent +nastier +nastiest +nastily +nastiness +nastiness's +nasturtium +nasturtium's +nasturtiums +nasty +natal +nation +nation's +national +national's +nationalism +nationalism's +nationalist +nationalist's +nationalistic +nationalists +nationalities +nationality +nationality's +nationalization +nationalization's +nationalizations +nationalize +nationalized +nationalizes +nationalizing +nationally +nationals +nations +nationwide +native +native's +natives +nativities +nativity +nativity's +nattier +nattiest +nattily +natty +natural +natural's +naturalism +naturalism's +naturalist +naturalist's +naturalistic +naturalists +naturalization +naturalization's +naturalize +naturalized +naturalizes +naturalizing +naturally +naturalness +naturalness's +naturals +nature +nature's +natures +naught +naught's +naughtier +naughtiest +naughtily +naughtiness +naughtiness's +naughts +naughty +nausea +nausea's +nauseate +nauseated +nauseates +nauseating +nauseatingly +nauseous +nautical +nautically +nautili +nautilus +nautilus's +nautiluses +naval +nave +nave's +navel +navel's +navels +naves +navies +navigability +navigability's +navigable +navigate +navigated +navigates +navigating +navigation +navigation's +navigational +navigator +navigator's +navigators +navy +navy's +nay +nay's +nays +naysayer +naysayer's +naysayers +ne'er +near +nearby +neared +nearer +nearest +nearing +nearly +nearness +nearness's +nears +nearsighted +nearsightedness +nearsightedness's +neat +neater +neatest +neath +neatly +neatness +neatness's +nebula +nebula's +nebulae +nebular +nebulas +nebulous +necessaries +necessarily +necessary +necessary's +necessitate +necessitated +necessitates +necessitating +necessities +necessity +necessity's +neck +neck's +necked +neckerchief +neckerchief's +neckerchiefs +neckerchieves +necking +necklace +necklace's +necklaces +neckline +neckline's +necklines +necks +necktie +necktie's +neckties +necromancer +necromancer's +necromancers +necromancy +necromancy's +necrophilia +necrosis +necrosis's +nectar +nectar's +nectarine +nectarine's +nectarines +need +need's +needed +needful +needier +neediest +neediness +neediness's +needing +needle +needle's +needled +needlepoint +needlepoint's +needles +needless +needlessly +needlework +needlework's +needling +needn't +needs +needy +nefarious +nefariously +nefariousness +nefariousness's +negate +negated +negates +negating +negation +negation's +negations +negative +negative's +negatived +negatively +negatives +negativing +negativity +negativity's +neglect +neglect's +neglected +neglectful +neglectfully +neglecting +neglects +neglig +neglig's +negligee +negligee's +negligees +negligence +negligence's +negligent +negligently +negligible +negligibly +negligs +negotiable +negotiate +negotiated +negotiates +negotiating +negotiation +negotiation's +negotiations +negotiator +negotiator's +negotiators +neigh +neigh's +neighbor +neighbor's +neighbored +neighborhood +neighborhood's +neighborhoods +neighboring +neighborliness +neighborliness's +neighborly +neighbors +neighed +neighing +neighs +neither +nematode +nematode's +nematodes +nemeses +nemesis +nemesis's +neoclassic +neoclassical +neoclassicism +neoclassicism's +neocolonialism +neocolonialism's +neocon +neocon's +neocons +neoconservative +neoconservative's +neoconservatives +neodymium +neodymium's +neologism +neologism's +neologisms +neon +neon's +neonatal +neonate +neonate's +neonates +neophyte +neophyte's +neophytes +neoprene +neoprene's +nephew +nephew's +nephews +nephritis +nephritis's +nepotism +nepotism's +neptunium +neptunium's +nerd +nerd's +nerdier +nerdiest +nerds +nerdy +nerve +nerve's +nerved +nerveless +nervelessly +nerves +nervier +nerviest +nerving +nervous +nervously +nervousness +nervousness's +nervy +nest +nest's +nested +nesting +nestle +nestled +nestles +nestling +nestling's +nestlings +nests +net +net's +netbook +netbook's +netbooks +nether +nethermost +nets +netted +netting +netting's +nettle +nettle's +nettled +nettles +nettlesome +nettling +network +network's +networked +networking +networking's +networks +neural +neuralgia +neuralgia's +neuralgic +neuritis +neuritis's +neurological +neurologist +neurologist's +neurologists +neurology +neurology's +neuron +neuron's +neurons +neuroses +neurosis +neurosis's +neurosurgery +neurosurgery's +neurotic +neurotic's +neurotically +neurotics +neurotransmitter +neurotransmitter's +neurotransmitters +neuter +neuter's +neutered +neutering +neuters +neutral +neutral's +neutrality +neutrality's +neutralization +neutralization's +neutralize +neutralized +neutralizer +neutralizer's +neutralizers +neutralizes +neutralizing +neutrally +neutrals +neutrino +neutrino's +neutrinos +neutron +neutron's +neutrons +never +nevermore +nevertheless +new +new's +newbie +newbie's +newbies +newborn +newborn's +newborns +newcomer +newcomer's +newcomers +newel +newel's +newels +newer +newest +newfangled +newly +newlywed +newlywed's +newlyweds +newness +newness's +news +news's +newsagents +newsboy +newsboy's +newsboys +newscast +newscast's +newscaster +newscaster's +newscasters +newscasts +newsflash +newsier +newsiest +newsletter +newsletter's +newsletters +newsman +newsman's +newsmen +newspaper +newspaper's +newspaperman +newspaperman's +newspapermen +newspapers +newspaperwoman +newspaperwoman's +newspaperwomen +newsprint +newsprint's +newsreel +newsreel's +newsreels +newsstand +newsstand's +newsstands +newsworthy +newsy +newt +newt's +newton +newton's +newtons +newts +next +next's +nexus +nexus's +nexuses +niacin +niacin's +nib +nib's +nibble +nibble's +nibbled +nibbler +nibbler's +nibblers +nibbles +nibbling +nibs +nice +nicely +niceness +niceness's +nicer +nicest +niceties +nicety +nicety's +niche +niche's +niches +nick +nick's +nicked +nickel +nickel's +nickelodeon +nickelodeon's +nickelodeons +nickels +nicking +nicknack +nicknack's +nicknacks +nickname +nickname's +nicknamed +nicknames +nicknaming +nicks +nicotine +nicotine's +niece +niece's +nieces +niftier +niftiest +nifty +nigga +nigga's +niggard +niggard's +niggardliness +niggardliness's +niggardly +niggards +niggas +niggaz +nigger +nigger's +niggers +niggle +niggle's +niggled +niggles +niggling +nigh +nigher +nighest +night +night's +nightcap +nightcap's +nightcaps +nightclothes +nightclothes's +nightclub +nightclub's +nightclubbed +nightclubbing +nightclubs +nightfall +nightfall's +nightgown +nightgown's +nightgowns +nighthawk +nighthawk's +nighthawks +nightie +nightie's +nighties +nightingale +nightingale's +nightingales +nightlife +nightlife's +nightly +nightmare +nightmare's +nightmares +nightmarish +nights +nightshade +nightshade's +nightshades +nightshirt +nightshirt's +nightshirts +nightstick +nightstick's +nightsticks +nighttime +nighttime's +nighty +nighty's +nihilism +nihilism's +nihilist +nihilist's +nihilistic +nihilists +nil +nil's +nimbi +nimble +nimbleness +nimbleness's +nimbler +nimblest +nimbly +nimbus +nimbus's +nimbuses +nincompoop +nincompoop's +nincompoops +nine +nine's +ninepin +ninepin's +ninepins +ninepins's +nines +nineteen +nineteen's +nineteens +nineteenth +nineteenth's +nineteenths +nineties +ninetieth +ninetieth's +ninetieths +ninety +ninety's +ninja +ninja's +ninjas +ninnies +ninny +ninny's +ninth +ninth's +ninths +nip +nip's +nipped +nipper +nipper's +nippers +nippier +nippiest +nipping +nipple +nipple's +nipples +nippy +nips +nirvana +nirvana's +nit +nit's +nite +nite's +niter +niter's +nites +nitpick +nitpicked +nitpicker +nitpicker's +nitpickers +nitpicking +nitpicks +nitrate +nitrate's +nitrated +nitrates +nitrating +nitrogen +nitrogen's +nitrogenous +nitroglycerin +nitroglycerin's +nitroglycerine +nitroglycerine's +nits +nitwit +nitwit's +nitwits +nix +nix's +nixed +nixes +nixing +no +no's +nobility +nobility's +noble +noble's +nobleman +nobleman's +noblemen +nobleness +nobleness's +nobler +nobles +noblest +noblewoman +noblewoman's +noblewomen +nobly +nobodies +nobody +nobody's +nocturnal +nocturnally +nocturne +nocturne's +nocturnes +nod +nod's +nodal +nodded +nodding +noddy +node +node's +nodes +nods +nodular +nodule +nodule's +nodules +noel +noel's +noels +noes +noggin +noggin's +noggins +noise +noise's +noised +noiseless +noiselessly +noiselessness +noiselessness's +noisemaker +noisemaker's +noisemakers +noises +noisier +noisiest +noisily +noisiness +noisiness's +noising +noisome +noisy +nomad +nomad's +nomadic +nomads +nomenclature +nomenclature's +nomenclatures +nominal +nominally +nominate +nominated +nominates +nominating +nomination +nomination's +nominations +nominative +nominative's +nominatives +nominee +nominee's +nominees +non +nonabrasive +nonabsorbent +nonabsorbent's +nonabsorbents +nonagenarian +nonagenarian's +nonagenarians +nonalcoholic +nonaligned +nonbeliever +nonbeliever's +nonbelievers +nonbreakable +nonce +nonce's +nonchalance +nonchalance's +nonchalant +nonchalantly +noncom +noncom's +noncombatant +noncombatant's +noncombatants +noncommercial +noncommercial's +noncommercials +noncommittal +noncommittally +noncompetitive +noncompliance +noncompliance's +noncoms +nonconductor +nonconductor's +nonconductors +nonconformist +nonconformist's +nonconformists +nonconformity +nonconformity's +noncontagious +noncooperation +noncooperation's +nondairy +nondeductible +nondeductible's +nondenominational +nondescript +nondrinker +nondrinker's +nondrinkers +none +nonempty +nonentities +nonentity +nonentity's +nonessential +nonesuch +nonesuch's +nonesuches +nonetheless +nonevent +nonevent's +nonevents +nonexempt +nonexempt's +nonexistence +nonexistence's +nonexistent +nonfat +nonfatal +nonfiction +nonfiction's +nonflammable +nongovernmental +nonhazardous +nonhuman +nonindustrial +noninterference +noninterference's +nonintervention +nonintervention's +nonjudgmental +nonliving +nonliving's +nonmalignant +nonmember +nonmember's +nonmembers +nonnegotiable +nonobjective +nonpareil +nonpareil's +nonpareils +nonpartisan +nonpartisan's +nonpartisans +nonpayment +nonpayment's +nonpayments +nonphysical +nonplus +nonplused +nonpluses +nonplusing +nonplussed +nonplusses +nonplussing +nonpoisonous +nonpolitical +nonpolluting +nonprescription +nonproductive +nonprofessional +nonprofessional's +nonprofessionals +nonprofit +nonprofit's +nonprofits +nonproliferation +nonproliferation's +nonrefillable +nonrefundable +nonrenewable +nonrepresentational +nonresident +nonresident's +nonresidents +nonrestrictive +nonreturnable +nonreturnable's +nonreturnables +nonrigid +nonscheduled +nonseasonal +nonsectarian +nonsense +nonsense's +nonsensical +nonsensically +nonsexist +nonskid +nonsmoker +nonsmoker's +nonsmokers +nonsmoking +nonstandard +nonstick +nonstop +nonsupport +nonsupport's +nontaxable +nontechnical +nontoxic +nontransferable +nontrivial +nonunion +nonuser +nonuser's +nonusers +nonverbal +nonviolence +nonviolence's +nonviolent +nonvoting +nonwhite +nonwhite's +nonwhites +nonzero +noodle +noodle's +noodled +noodles +noodling +nook +nook's +nooks +noon +noon's +noonday +noonday's +noontime +noontime's +noose +noose's +nooses +nope +nor +norm +norm's +normal +normal's +normalcy +normalcy's +normality +normality's +normalization +normalization's +normalize +normalized +normalizes +normalizing +normally +normative +norms +north +north's +northbound +northeast +northeast's +northeaster +northeaster's +northeasterly +northeastern +northeasters +northeastward +northerlies +northerly +northerly's +northern +northerner +northerner's +northerners +northernmost +northward +northwards +northwest +northwest's +northwesterly +northwestern +northwestward +nose +nose's +nosebleed +nosebleed's +nosebleeds +nosed +nosedive +nosedive's +nosedived +nosedives +nosediving +nosedove +nosegay +nosegay's +nosegays +noses +nosey +nosh +nosh's +noshed +noshes +noshing +nosier +nosiest +nosiness +nosiness's +nosing +nostalgia +nostalgia's +nostalgic +nostalgically +nostril +nostril's +nostrils +nostrum +nostrum's +nostrums +nosy +not +notable +notable's +notables +notably +notaries +notarize +notarized +notarizes +notarizing +notary +notary's +notation +notation's +notations +notch +notch's +notched +notches +notching +note +note's +notebook +notebook's +notebooks +noted +notepad +notepaper +notes +noteworthy +nothing +nothing's +nothingness +nothingness's +nothings +notice +notice's +noticeable +noticeably +noticeboard +noticeboards +noticed +notices +noticing +notification +notification's +notifications +notified +notifies +notify +notifying +noting +notion +notion's +notional +notionally +notions +notoriety +notoriety's +notorious +notoriously +notwithstanding +nougat +nougat's +nougats +nought +nought's +noughts +noun +noun's +nouns +nourish +nourished +nourishes +nourishing +nourishment +nourishment's +nous +nova +nova's +novae +novas +novel +novel's +novelette +novelette's +novelettes +novelist +novelist's +novelists +novella +novella's +novellas +novelle +novels +novelties +novelty +novelty's +novice +novice's +novices +novitiate +novitiate's +novitiates +now +now's +nowadays +nowadays's +noway +nowhere +nowhere's +nowise +noxious +nozzle +nozzle's +nozzles +nth +nu +nuance +nuance's +nuanced +nuances +nub +nub's +nubile +nubs +nuclear +nuclei +nucleic +nucleus +nucleus's +nucleuses +nude +nude's +nuder +nudes +nudest +nudge +nudge's +nudged +nudges +nudging +nudism +nudism's +nudist +nudist's +nudists +nudity +nudity's +nugget +nugget's +nuggets +nuisance +nuisance's +nuisances +nuke +nuke's +nuked +nukes +nuking +null +nullification +nullification's +nullified +nullifies +nullify +nullifying +nullity +nullity's +nulls +numb +numbed +number +number's +numbered +numbering +numberless +numbers +numbest +numbing +numbly +numbness +numbness's +numbs +numbskull +numbskull's +numbskulls +numeracy +numeral +numeral's +numerals +numerate +numerated +numerates +numerating +numeration +numeration's +numerations +numerator +numerator's +numerators +numeric +numerical +numerically +numerology +numerology's +numerous +numismatic +numismatics +numismatics's +numismatist +numismatist's +numismatists +numskull +numskull's +numskulls +nun +nun's +nuncio +nuncio's +nuncios +nunneries +nunnery +nunnery's +nuns +nuptial +nuptial's +nuptials +nurse +nurse's +nursed +nursemaid +nursemaid's +nursemaids +nurseries +nursery +nursery's +nurseryman +nurseryman's +nurserymen +nurses +nursing +nursing's +nurture +nurture's +nurtured +nurtures +nurturing +nut +nut's +nutcracker +nutcracker's +nutcrackers +nuthatch +nuthatch's +nuthatches +nutmeat +nutmeat's +nutmeats +nutmeg +nutmeg's +nutmegs +nutria +nutria's +nutrias +nutrient +nutrient's +nutrients +nutriment +nutriment's +nutriments +nutrition +nutrition's +nutritional +nutritionally +nutritionist +nutritionist's +nutritionists +nutritious +nutritive +nuts +nutshell +nutshell's +nutshells +nutted +nuttier +nuttiest +nuttiness +nuttiness's +nutting +nutty +nuzzle +nuzzle's +nuzzled +nuzzles +nuzzling +nylon +nylon's +nylons +nylons's +nymph +nymph's +nymphomania +nymphomania's +nymphomaniac +nymphomaniac's +nymphomaniacs +nymphs +née +o +o'clock +o'er +oaf +oaf's +oafish +oafs +oak +oak's +oaken +oaks +oakum +oakum's +oar +oar's +oared +oaring +oarlock +oarlock's +oarlocks +oars +oarsman +oarsman's +oarsmen +oases +oasis +oasis's +oat +oat's +oaten +oath +oath's +oaths +oatmeal +oatmeal's +oats +oats's +obduracy +obduracy's +obdurate +obdurately +obedience +obedience's +obedient +obediently +obeisance +obeisance's +obeisances +obeisant +obelisk +obelisk's +obelisks +obese +obesity +obesity's +obey +obeyed +obeying +obeys +obfuscate +obfuscated +obfuscates +obfuscating +obfuscation +obfuscation's +obit +obit's +obits +obituaries +obituary +obituary's +object +object's +objected +objecting +objection +objection's +objectionable +objectionably +objections +objective +objective's +objectively +objectiveness +objectiveness's +objectives +objectivity +objectivity's +objector +objector's +objectors +objects +oblate +oblation +oblation's +oblations +obligate +obligated +obligates +obligating +obligation +obligation's +obligations +obligatory +oblige +obliged +obliges +obliging +obligingly +oblique +oblique's +obliquely +obliqueness +obliqueness's +obliques +obliterate +obliterated +obliterates +obliterating +obliteration +obliteration's +oblivion +oblivion's +oblivious +obliviously +obliviousness +obliviousness's +oblong +oblong's +oblongs +obloquy +obloquy's +obnoxious +obnoxiously +oboe +oboe's +oboes +oboist +oboist's +oboists +obscene +obscenely +obscener +obscenest +obscenities +obscenity +obscenity's +obscure +obscured +obscurely +obscurer +obscures +obscurest +obscuring +obscurities +obscurity +obscurity's +obsequies +obsequious +obsequiously +obsequiousness +obsequiousness's +obsequy +obsequy's +observable +observably +observance +observance's +observances +observant +observantly +observation +observation's +observational +observations +observatories +observatory +observatory's +observe +observed +observer +observer's +observers +observes +observing +obsess +obsessed +obsesses +obsessing +obsession +obsession's +obsessions +obsessive +obsessive's +obsessively +obsessives +obsidian +obsidian's +obsolescence +obsolescence's +obsolescent +obsolete +obsoleted +obsoletes +obsoleting +obstacle +obstacle's +obstacles +obstetric +obstetrical +obstetrician +obstetrician's +obstetricians +obstetrics +obstetrics's +obstinacy +obstinacy's +obstinate +obstinately +obstreperous +obstruct +obstructed +obstructing +obstruction +obstruction's +obstructionist +obstructionist's +obstructionists +obstructions +obstructive +obstructively +obstructiveness +obstructiveness's +obstructs +obtain +obtainable +obtained +obtaining +obtains +obtrude +obtruded +obtrudes +obtruding +obtrusive +obtrusively +obtrusiveness +obtrusiveness's +obtuse +obtusely +obtuseness +obtuseness's +obtuser +obtusest +obverse +obverse's +obverses +obviate +obviated +obviates +obviating +obvious +obviously +obviousness +obviousness's +ocarina +ocarina's +ocarinas +occasion +occasion's +occasional +occasionally +occasioned +occasioning +occasions +occidental +occidental's +occidentals +occlude +occluded +occludes +occluding +occlusion +occlusion's +occlusions +occlusive +occult +occult's +occupancy +occupancy's +occupant +occupant's +occupants +occupation +occupation's +occupational +occupations +occupied +occupies +occupy +occupying +occur +occurred +occurrence +occurrence's +occurrences +occurring +occurs +ocean +ocean's +oceangoing +oceanic +oceanic's +oceanographer +oceanographer's +oceanographers +oceanographic +oceanography +oceanography's +oceans +ocelot +ocelot's +ocelots +ocher +ocher's +ochre +ochre's +octagon +octagon's +octagonal +octagons +octal +octane +octane's +octave +octave's +octaves +octet +octet's +octets +octette +octette's +octettes +octogenarian +octogenarian's +octogenarians +octopi +octopus +octopus's +octopuses +ocular +ocular's +oculars +oculist +oculist's +oculists +odd +oddball +oddball's +oddballs +odder +oddest +oddities +oddity +oddity's +oddly +oddness +oddness's +odds +odds's +ode +ode's +odes +odious +odiously +odium +odium's +odometer +odometer's +odometers +odor +odor's +odoriferous +odorless +odorous +odors +odyssey +odyssey's +odysseys +of +off +offal +offal's +offbeat +offbeat's +offbeats +offed +offend +offended +offender +offender's +offenders +offending +offends +offense +offense's +offenses +offensive +offensive's +offensively +offensiveness +offensiveness's +offensives +offer +offer's +offered +offering +offering's +offerings +offers +offertories +offertory +offertory's +offhand +offhandedly +office +office's +officeholder +officeholder's +officeholders +officer +officer's +officers +offices +official +official's +officialdom +officialdom's +officially +officials +officiate +officiated +officiates +officiating +officious +officiously +officiousness +officiousness's +offing +offing's +offings +offload +offloaded +offloading +offloads +offs +offset +offset's +offsets +offsetting +offshoot +offshoot's +offshoots +offshore +offshoring +offside +offspring +offspring's +offsprings +offstage +offstages +oft +often +oftener +oftenest +oftentimes +ogle +ogle's +ogled +ogles +ogling +ogre +ogre's +ogres +oh +oh's +ohm +ohm's +ohms +oho +ohs +oil +oil's +oilcloth +oilcloth's +oilcloths +oiled +oilfield +oilfields +oilier +oiliest +oiliness +oiliness's +oiling +oils +oilskin +oilskin's +oily +oink +oink's +oinked +oinking +oinks +ointment +ointment's +ointments +okay +okay's +okayed +okaying +okays +okra +okra's +okras +old +old's +olden +older +oldest +oldie +oldie's +oldies +oleaginous +oleander +oleander's +oleanders +oleo +oleo's +oleomargarine +oleomargarine's +olfactories +olfactory +olfactory's +oligarch +oligarch's +oligarchic +oligarchies +oligarchs +oligarchy +oligarchy's +olive +olive's +olives +ombudsman +ombudsman's +ombudsmen +omega +omega's +omegas +omelet +omelet's +omelets +omelette +omelette's +omelettes +omen +omen's +omens +ominous +ominously +omission +omission's +omissions +omit +omits +omitted +omitting +omnibus +omnibus's +omnibuses +omnibusses +omnipotence +omnipotence's +omnipotent +omnipresence +omnipresence's +omnipresent +omniscience +omniscience's +omniscient +omnivore +omnivore's +omnivores +omnivorous +on +once +once's +oncology +oncology's +oncoming +one +one's +oneness +oneness's +onerous +ones +oneself +onetime +ongoing +onion +onion's +onions +onionskin +onionskin's +online +onlooker +onlooker's +onlookers +only +onomatopoeia +onomatopoeia's +onomatopoeic +onrush +onrush's +onrushes +onrushing +onset +onset's +onsets +onshore +onslaught +onslaught's +onslaughts +onto +onus +onus's +onuses +onward +onwards +onyx +onyx's +onyxes +oodles +oodles's +oops +ooze +ooze's +oozed +oozes +oozing +opacity +opacity's +opal +opal's +opalescence +opalescence's +opalescent +opals +opaque +opaqued +opaquely +opaqueness +opaqueness's +opaquer +opaques +opaquest +opaquing +open +open's +opened +opener +opener's +openers +openest +openhanded +opening +opening's +openings +openly +openness +openness's +opens +openwork +openwork's +opera +opera's +operable +operand +operands +operas +operate +operated +operates +operatic +operating +operation +operation's +operational +operationally +operations +operative +operative's +operatives +operator +operator's +operators +operetta +operetta's +operettas +ophthalmic +ophthalmologist +ophthalmologist's +ophthalmologists +ophthalmology +ophthalmology's +opiate +opiate's +opiates +opine +opined +opines +opining +opinion +opinion's +opinionated +opinions +opium +opium's +opossum +opossum's +opossums +opponent +opponent's +opponents +opportune +opportunism +opportunism's +opportunist +opportunist's +opportunistic +opportunists +opportunities +opportunity +opportunity's +oppose +opposed +opposes +opposing +opposite +opposite's +opposites +opposition +opposition's +oppress +oppressed +oppresses +oppressing +oppression +oppression's +oppressive +oppressively +oppressor +oppressor's +oppressors +opprobrious +opprobrium +opprobrium's +opt +opted +optic +optic's +optical +optically +optician +optician's +opticians +optics +optics's +optima +optimal +optimism +optimism's +optimist +optimist's +optimistic +optimistically +optimists +optimization +optimizations +optimize +optimized +optimizer +optimizes +optimizing +optimum +optimum's +optimums +opting +option +option's +optional +optionally +optioned +optioning +options +optometrist +optometrist's +optometrists +optometry +optometry's +opts +opulence +opulence's +opulent +opus +opus's +opuses +or +oracle +oracle's +oracles +oracular +oral +oral's +orally +orals +orange +orange's +orangeade +orangeade's +orangeades +oranges +orangutan +orangutan's +orangutang +orangutang's +orangutangs +orangutans +orate +orated +orates +orating +oration +oration's +orations +orator +orator's +oratorical +oratories +oratorio +oratorio's +oratorios +orators +oratory +oratory's +orb +orb's +orbit +orbit's +orbital +orbital's +orbitals +orbited +orbiting +orbits +orbs +orc +orc's +orchard +orchard's +orchards +orchestra +orchestra's +orchestral +orchestras +orchestrate +orchestrated +orchestrates +orchestrating +orchestration +orchestration's +orchestrations +orchid +orchid's +orchids +orcs +ordain +ordained +ordaining +ordains +ordeal +ordeal's +ordeals +order +order's +ordered +ordering +orderings +orderlies +orderliness +orderliness's +orderly +orderly's +orders +ordinal +ordinal's +ordinals +ordinance +ordinance's +ordinances +ordinaries +ordinarily +ordinariness +ordinariness's +ordinary +ordinary's +ordination +ordination's +ordinations +ordnance +ordnance's +ordure +ordure's +ore +ore's +oregano +oregano's +ores +organ +organ's +organdie +organdie's +organdy +organdy's +organelle +organelle's +organelles +organic +organic's +organically +organics +organism +organism's +organisms +organist +organist's +organists +organization +organization's +organizational +organizations +organize +organized +organizer +organizer's +organizers +organizes +organizing +organs +orgasm +orgasm's +orgasmic +orgasms +orgiastic +orgies +orgy +orgy's +orient +orient's +oriental +oriental's +orientals +orientate +orientated +orientates +orientating +orientation +orientation's +orientations +oriented +orienting +orients +orifice +orifice's +orifices +origami +origami's +origin +origin's +original +original's +originality +originality's +originally +originals +originate +originated +originates +originating +origination +origination's +originator +originator's +originators +origins +oriole +oriole's +orioles +ormolu +ormolu's +ornament +ornament's +ornamental +ornamentation +ornamentation's +ornamented +ornamenting +ornaments +ornate +ornately +ornateness +ornateness's +ornerier +orneriest +ornery +ornithologist +ornithologist's +ornithologists +ornithology +ornithology's +orotund +orphan +orphan's +orphanage +orphanage's +orphanages +orphaned +orphaning +orphans +orthodontia +orthodontia's +orthodontic +orthodontics +orthodontics's +orthodontist +orthodontist's +orthodontists +orthodox +orthodoxies +orthodoxy +orthodoxy's +orthogonal +orthogonality +orthographic +orthographies +orthography +orthography's +orthopaedic +orthopaedics +orthopaedics's +orthopaedist +orthopaedist's +orthopaedists +orthopedic +orthopedics +orthopedics's +orthopedist +orthopedist's +orthopedists +oscillate +oscillated +oscillates +oscillating +oscillation +oscillation's +oscillations +oscillator +oscillator's +oscillators +oscilloscope +oscilloscope's +oscilloscopes +osier +osier's +osiers +osmosis +osmosis's +osmotic +osprey +osprey's +ospreys +ossification +ossification's +ossified +ossifies +ossify +ossifying +ostensible +ostensibly +ostentation +ostentation's +ostentatious +ostentatiously +osteopath +osteopath's +osteopaths +osteopathy +osteopathy's +osteoporosis +osteoporosis's +ostracism +ostracism's +ostracize +ostracized +ostracizes +ostracizing +ostrich +ostrich's +ostriches +other +other's +others +otherwise +otherworldly +otiose +otter +otter's +otters +ottoman +ottoman's +ottomans +ouch +ought +ounce +ounce's +ounces +our +ours +ourselves +oust +ousted +ouster +ouster's +ousters +ousting +ousts +out +out's +outage +outage's +outages +outback +outback's +outbacks +outbalance +outbalanced +outbalances +outbalancing +outbid +outbidding +outbids +outbound +outbreak +outbreak's +outbreaks +outbuilding +outbuilding's +outbuildings +outburst +outburst's +outbursts +outcast +outcast's +outcasts +outclass +outclassed +outclasses +outclassing +outcome +outcome's +outcomes +outcries +outcrop +outcrop's +outcropped +outcropping +outcropping's +outcroppings +outcrops +outcry +outcry's +outdated +outdid +outdistance +outdistanced +outdistances +outdistancing +outdo +outdoes +outdoing +outdone +outdoor +outdoors +outdoors's +outed +outer +outermost +outfield +outfield's +outfielder +outfielder's +outfielders +outfields +outfit +outfit's +outfits +outfitted +outfitter +outfitter's +outfitters +outfitting +outflank +outflanked +outflanking +outflanks +outfox +outfoxed +outfoxes +outfoxing +outgo +outgo's +outgoes +outgoing +outgrew +outgrow +outgrowing +outgrown +outgrows +outgrowth +outgrowth's +outgrowths +outhouse +outhouse's +outhouses +outing +outing's +outings +outlaid +outlandish +outlandishly +outlast +outlasted +outlasting +outlasts +outlaw +outlaw's +outlawed +outlawing +outlaws +outlay +outlay's +outlaying +outlays +outlet +outlet's +outlets +outline +outline's +outlined +outlines +outlining +outlive +outlived +outlives +outliving +outlook +outlook's +outlooks +outlying +outmaneuver +outmaneuvered +outmaneuvering +outmaneuvers +outmanoeuvre +outmanoeuvred +outmanoeuvres +outmanoeuvring +outmoded +outnumber +outnumbered +outnumbering +outnumbers +outpatient +outpatient's +outpatients +outperform +outperformed +outperforming +outperforms +outplacement +outplacement's +outplay +outplayed +outplaying +outplays +outpost +outpost's +outposts +outpouring +outpouring's +outpourings +output +output's +outputs +outputted +outputting +outrage +outrage's +outraged +outrageous +outrageously +outrages +outraging +outran +outrank +outranked +outranking +outranks +outreach +outreach's +outreached +outreaches +outreaching +outrider +outrider's +outriders +outrigger +outrigger's +outriggers +outright +outrun +outrunning +outruns +outré +outs +outsell +outselling +outsells +outset +outset's +outsets +outshine +outshined +outshines +outshining +outshone +outside +outside's +outsider +outsider's +outsiders +outsides +outsize +outsize's +outsized +outsizes +outskirt +outskirt's +outskirts +outsmart +outsmarted +outsmarting +outsmarts +outsold +outsource +outsourced +outsources +outsourcing +outsourcing's +outspoken +outspokenly +outspokenness +outspokenness's +outspread +outspreading +outspreads +outstanding +outstandingly +outstation +outstation's +outstations +outstay +outstayed +outstaying +outstays +outstretch +outstretched +outstretches +outstretching +outstrip +outstripped +outstripping +outstrips +outstript +outtake +outtake's +outtakes +outvote +outvoted +outvotes +outvoting +outward +outwardly +outwards +outwear +outwearing +outwears +outweigh +outweighed +outweighing +outweighs +outwit +outwits +outwitted +outwitting +outwore +outworn +ova +oval +oval's +ovals +ovarian +ovaries +ovary +ovary's +ovation +ovation's +ovations +oven +oven's +ovens +over +over's +overabundance +overabundance's +overabundant +overachieve +overachieved +overachiever +overachiever's +overachievers +overachieves +overachieving +overact +overacted +overacting +overactive +overacts +overage +overage's +overages +overall +overall's +overalls +overalls's +overambitious +overanxious +overate +overawe +overawed +overawes +overawing +overbalance +overbalance's +overbalanced +overbalances +overbalancing +overbear +overbearing +overbears +overbite +overbite's +overbites +overblown +overboard +overbook +overbooked +overbooking +overbooks +overbore +overborne +overburden +overburdened +overburdening +overburdens +overcame +overcast +overcast's +overcasting +overcasts +overcautious +overcharge +overcharge's +overcharged +overcharges +overcharging +overcoat +overcoat's +overcoats +overcome +overcomes +overcoming +overcompensate +overcompensated +overcompensates +overcompensating +overcompensation +overcompensation's +overconfident +overcook +overcooked +overcooking +overcooks +overcrowd +overcrowded +overcrowding +overcrowds +overdid +overdo +overdoes +overdoing +overdone +overdose +overdose's +overdosed +overdoses +overdosing +overdraft +overdraft's +overdrafts +overdraw +overdrawing +overdrawn +overdraws +overdress +overdress's +overdressed +overdresses +overdressing +overdrew +overdrive +overdrive's +overdue +overeager +overeat +overeaten +overeating +overeats +overemphasize +overemphasized +overemphasizes +overemphasizing +overenthusiastic +overestimate +overestimate's +overestimated +overestimates +overestimating +overexpose +overexposed +overexposes +overexposing +overexposure +overexposure's +overextend +overextended +overextending +overextends +overflow +overflow's +overflowed +overflowing +overflows +overfull +overgenerous +overgrew +overgrow +overgrowing +overgrown +overgrows +overgrowth +overgrowth's +overhand +overhand's +overhands +overhang +overhang's +overhanging +overhangs +overhaul +overhaul's +overhauled +overhauling +overhauls +overhead +overhead's +overheads +overhear +overheard +overhearing +overhears +overheat +overheated +overheating +overheats +overhung +overindulge +overindulged +overindulgence +overindulgence's +overindulges +overindulging +overjoy +overjoyed +overjoying +overjoys +overkill +overkill's +overlaid +overlain +overland +overlap +overlap's +overlapped +overlapping +overlaps +overlay +overlay's +overlaying +overlays +overlie +overlies +overload +overload's +overloaded +overloading +overloads +overlong +overlook +overlook's +overlooked +overlooking +overlooks +overlord +overlord's +overlords +overly +overlying +overmuch +overmuches +overnight +overnight's +overnights +overpaid +overpass +overpass's +overpasses +overpay +overpaying +overpays +overplay +overplayed +overplaying +overplays +overpopulate +overpopulated +overpopulates +overpopulating +overpopulation +overpopulation's +overpower +overpowered +overpowering +overpowers +overprice +overpriced +overprices +overpricing +overprint +overprinted +overprinting +overprints +overproduce +overproduced +overproduces +overproducing +overproduction +overproduction's +overprotective +overqualified +overran +overrate +overrated +overrates +overrating +overreach +overreached +overreaches +overreaching +overreact +overreacted +overreacting +overreaction +overreaction's +overreactions +overreacts +overridden +override +override's +overrides +overriding +overripe +overripe's +overrode +overrule +overruled +overrules +overruling +overrun +overrun's +overrunning +overruns +overs +oversampling +oversaw +overseas +oversee +overseeing +overseen +overseer +overseer's +overseers +oversees +oversell +overselling +oversells +oversensitive +oversexed +overshadow +overshadowed +overshadowing +overshadows +overshare +overshared +overshares +oversharing +overshoe +overshoe's +overshoes +overshoot +overshooting +overshoots +overshot +oversight +oversight's +oversights +oversimplification +oversimplification's +oversimplifications +oversimplified +oversimplifies +oversimplify +oversimplifying +oversize +oversized +oversleep +oversleeping +oversleeps +overslept +oversold +overspecialize +overspecialized +overspecializes +overspecializing +overspend +overspending +overspends +overspent +overspill +overspread +overspreading +overspreads +overstate +overstated +overstatement +overstatement's +overstatements +overstates +overstating +overstay +overstayed +overstaying +overstays +overstep +overstepped +overstepping +oversteps +overstock +overstocked +overstocking +overstocks +overstuffed +oversupplied +oversupplies +oversupply +oversupplying +overt +overtake +overtaken +overtakes +overtaking +overtax +overtaxed +overtaxes +overtaxing +overthink +overthinking +overthinks +overthought +overthrew +overthrow +overthrow's +overthrowing +overthrown +overthrows +overtime +overtime's +overtimes +overtly +overtone +overtone's +overtones +overtook +overture +overture's +overtures +overturn +overturned +overturning +overturns +overuse +overuse's +overused +overuses +overusing +overview +overview's +overviews +overweening +overweight +overweight's +overwhelm +overwhelmed +overwhelming +overwhelmingly +overwhelms +overwork +overwork's +overworked +overworking +overworks +overwrite +overwrites +overwriting +overwritten +overwrought +overzealous +oviduct +oviduct's +oviducts +oviparous +ovoid +ovoid's +ovoids +ovulate +ovulated +ovulates +ovulating +ovulation +ovulation's +ovule +ovule's +ovules +ovum +ovum's +ow +owe +owed +owes +owing +owl +owl's +owlet +owlet's +owlets +owlish +owls +own +owned +owner +owner's +owners +ownership +ownership's +owning +owns +ox +ox's +oxbow +oxbow's +oxbows +oxen +oxford +oxford's +oxfords +oxidation +oxidation's +oxide +oxide's +oxides +oxidize +oxidized +oxidizer +oxidizer's +oxidizers +oxidizes +oxidizing +oxyacetylene +oxyacetylene's +oxygen +oxygen's +oxygenate +oxygenated +oxygenates +oxygenating +oxygenation +oxygenation's +oxymora +oxymoron +oxymoron's +oxymorons +oyster +oyster's +oysters +ozone +ozone's +p +pH +pa +pa's +pace +pace's +paced +pacemaker +pacemaker's +pacemakers +paces +pacesetter +pacesetter's +pacesetters +pachyderm +pachyderm's +pachyderms +pacific +pacifically +pacification +pacification's +pacified +pacifier +pacifier's +pacifiers +pacifies +pacifism +pacifism's +pacifist +pacifist's +pacifists +pacify +pacifying +pacing +pack +pack's +package +package's +packaged +packages +packaging +packaging's +packed +packer +packer's +packers +packet +packet's +packets +packing +packing's +packs +pact +pact's +pacts +pad +pad's +padded +paddies +padding +padding's +paddle +paddle's +paddled +paddles +paddling +paddock +paddock's +paddocked +paddocking +paddocks +paddy +paddy's +padlock +padlock's +padlocked +padlocking +padlocks +padre +padre's +padres +pads +paean +paean's +paeans +pagan +pagan's +paganism +paganism's +pagans +page +page's +pageant +pageant's +pageantry +pageantry's +pageants +paged +pager +pager's +pagers +pages +paginate +paginated +paginates +paginating +pagination +pagination's +paging +pagoda +pagoda's +pagodas +paid +pail +pail's +pailful +pailful's +pailfuls +pails +pailsful +pain +pain's +pained +painful +painfuller +painfullest +painfully +paining +painkiller +painkiller's +painkillers +painless +painlessly +pains +painstaking +painstaking's +painstakingly +paint +paint's +paintbrush +paintbrush's +paintbrushes +painted +painter +painter's +painters +painting +painting's +paintings +paints +paintwork +pair +pair's +paired +pairing +pairs +pairwise +paisley +paisley's +paisleys +pajamas +pajamas's +pal +pal's +palace +palace's +palaces +palatable +palatal +palatal's +palatals +palate +palate's +palates +palatial +palaver +palaver's +palavered +palavering +palavers +palazzi +palazzo +pale +pale's +paled +paleface +paleface's +palefaces +paleness +paleness's +paleontologist +paleontologist's +paleontologists +paleontology +paleontology's +paler +pales +palest +palette +palette's +palettes +palimony +palimony's +palimpsest +palimpsest's +palimpsests +palindrome +palindrome's +palindromes +palindromic +paling +paling's +palings +palisade +palisade's +palisades +pall +pall's +palladium +palladium's +pallbearer +pallbearer's +pallbearers +palled +pallet +pallet's +pallets +palliate +palliated +palliates +palliating +palliation +palliation's +palliative +palliative's +palliatives +pallid +palling +pallor +pallor's +palls +palm +palm's +palmed +palmetto +palmetto's +palmettoes +palmettos +palmier +palmiest +palming +palmist +palmist's +palmistry +palmistry's +palmists +palms +palmy +palomino +palomino's +palominos +palpable +palpably +palpate +palpated +palpates +palpating +palpation +palpation's +palpitate +palpitated +palpitates +palpitating +palpitation +palpitation's +palpitations +pals +palsied +palsies +palsy +palsy's +palsying +paltrier +paltriest +paltriness +paltriness's +paltry +pampas +pampas's +pamper +pampered +pampering +pampers +pamphlet +pamphlet's +pamphleteer +pamphleteer's +pamphleteers +pamphlets +pan +pan's +panacea +panacea's +panaceas +panache +panache's +pancake +pancake's +pancaked +pancakes +pancaking +panchromatic +pancreas +pancreas's +pancreases +pancreatic +panda +panda's +pandas +pandemic +pandemic's +pandemics +pandemonium +pandemonium's +pander +pander's +pandered +panderer +panderer's +panderers +pandering +panders +pane +pane's +panegyric +panegyric's +panegyrics +panel +panel's +paneled +paneling +paneling's +panelings +panelist +panelist's +panelists +panelled +panelling +panelling's +panellings +panels +panes +pang +pang's +pangs +panhandle +panhandle's +panhandled +panhandler +panhandler's +panhandlers +panhandles +panhandling +panic +panic's +panicked +panicking +panicky +panics +panier +panier's +paniers +panned +pannier +pannier's +panniers +panning +panoplies +panoply +panoply's +panorama +panorama's +panoramas +panoramic +pans +pansies +pansy +pansy's +pant +pant's +pantaloons +pantaloons's +panted +pantheism +pantheism's +pantheist +pantheist's +pantheistic +pantheists +pantheon +pantheon's +pantheons +panther +panther's +panthers +pantie +pantie's +panties +panting +pantomime +pantomime's +pantomimed +pantomimes +pantomiming +pantries +pantry +pantry's +pants +pantsuit +pantsuit's +pantsuits +panty +panty's +pantyhose +pantyhose's +pap +pap's +papa +papa's +papacies +papacy +papacy's +papal +papas +papaw +papaw's +papaws +papaya +papaya's +papayas +paper +paper's +paperback +paperback's +paperbacks +paperboy +paperboy's +paperboys +papered +papergirl +papergirl's +papergirls +paperhanger +paperhanger's +paperhangers +papering +papers +paperweight +paperweight's +paperweights +paperwork +paperwork's +papery +papilla +papilla's +papillae +papoose +papoose's +papooses +paprika +paprika's +paps +papyri +papyrus +papyrus's +papyruses +par +par's +parable +parable's +parables +parabola +parabola's +parabolas +parabolic +parachute +parachute's +parachuted +parachutes +parachuting +parachutist +parachutist's +parachutists +parade +parade's +paraded +parades +paradigm +paradigm's +paradigmatic +paradigms +parading +paradise +paradise's +paradises +paradox +paradox's +paradoxes +paradoxical +paradoxically +paraffin +paraffin's +paragliding +paragon +paragon's +paragons +paragraph +paragraph's +paragraphed +paragraphing +paragraphs +parakeet +parakeet's +parakeets +paralegal +paralegal's +paralegals +parallax +parallax's +parallaxes +parallel +parallel's +paralleled +paralleling +parallelism +parallelism's +parallelisms +parallelled +parallelling +parallelogram +parallelogram's +parallelograms +parallels +paralyses +paralysis +paralysis's +paralytic +paralytic's +paralytics +paralyze +paralyzed +paralyzes +paralyzing +paramecia +paramecium +paramecium's +parameciums +paramedic +paramedic's +paramedical +paramedical's +paramedicals +paramedics +parameter +parameter's +parameters +paramilitaries +paramilitary +paramilitary's +paramount +paramour +paramour's +paramours +paranoia +paranoia's +paranoid +paranoid's +paranoids +paranormal +parapet +parapet's +parapets +paraphernalia +paraphernalia's +paraphrase +paraphrase's +paraphrased +paraphrases +paraphrasing +paraplegia +paraplegia's +paraplegic +paraplegic's +paraplegics +paraprofessional +paraprofessional's +paraprofessionals +parapsychology +parapsychology's +parasailing +parasite +parasite's +parasites +parasitic +parasol +parasol's +parasols +paratrooper +paratrooper's +paratroopers +paratroops +paratroops's +parboil +parboiled +parboiling +parboils +parcel +parcel's +parceled +parceling +parcelled +parcelling +parcels +parch +parched +parches +parching +parchment +parchment's +parchments +pardon +pardon's +pardonable +pardoned +pardoning +pardons +pare +pared +parent +parent's +parentage +parentage's +parental +parented +parentheses +parenthesis +parenthesis's +parenthesize +parenthesized +parenthesizes +parenthesizing +parenthetic +parenthetical +parenthetically +parenthood +parenthood's +parenting +parenting's +parents +pares +parfait +parfait's +parfaits +pariah +pariah's +pariahs +paring +paring's +parings +parish +parish's +parishes +parishioner +parishioner's +parishioners +parity +parity's +park +park's +parka +parka's +parkas +parked +parking +parking's +parkour +parks +parkway +parkway's +parkways +parlance +parlance's +parlay +parlay's +parlayed +parlaying +parlays +parley +parley's +parleyed +parleying +parleys +parliament +parliament's +parliamentarian +parliamentarian's +parliamentarians +parliamentary +parliaments +parlor +parlor's +parlors +parochial +parochialism +parochialism's +parodied +parodies +parody +parody's +parodying +parole +parole's +paroled +parolee +parolee's +parolees +paroles +paroling +paroxysm +paroxysm's +paroxysms +parquet +parquet's +parqueted +parqueting +parquetry +parquetry's +parquets +parrakeet +parrakeet's +parrakeets +parred +parricide +parricide's +parricides +parried +parries +parring +parrot +parrot's +parroted +parroting +parrots +parry +parry's +parrying +pars +parse +parsec +parsec's +parsecs +parsed +parser +parses +parsimonious +parsimony +parsimony's +parsing +parsley +parsley's +parsnip +parsnip's +parsnips +parson +parson's +parsonage +parsonage's +parsonages +parsons +part +part's +partake +partaken +partaker +partaker's +partakers +partakes +partaking +parted +parterre +parterre's +parterres +parthenogenesis +parthenogenesis's +partial +partial's +partiality +partiality's +partially +partials +participant +participant's +participants +participate +participated +participates +participating +participation +participation's +participator +participator's +participators +participatory +participial +participial's +participle +participle's +participles +particle +particle's +particles +particular +particular's +particularities +particularity +particularity's +particularization +particularization's +particularize +particularized +particularizes +particularizing +particularly +particulars +particulate +particulate's +particulates +partied +parties +parting +parting's +partings +partisan +partisan's +partisans +partisanship +partisanship's +partition +partition's +partitioned +partitioning +partitions +partizan +partizan's +partizans +partly +partner +partner's +partnered +partnering +partners +partnership +partnership's +partnerships +partook +partridge +partridge's +partridges +parts +parturition +parturition's +partway +party +party's +partying +parvenu +parvenu's +parvenus +pas +paschal +pasha +pasha's +pashas +pass +pass's +passable +passably +passage +passage's +passages +passageway +passageway's +passageways +passbook +passbook's +passbooks +passed +passel +passel's +passels +passenger +passenger's +passengers +passer +passerby +passerby's +passersby +passes +passing +passing's +passion +passion's +passionate +passionately +passionless +passions +passive +passive's +passively +passives +passivity +passivity's +passkey +passkey's +passkeys +passport +passport's +passports +password +password's +passwords +passé +past +past's +pasta +pasta's +pastas +paste +paste's +pasteboard +pasteboard's +pasted +pastel +pastel's +pastels +pastern +pastern's +pasterns +pastes +pasteurization +pasteurization's +pasteurize +pasteurized +pasteurizes +pasteurizing +pastiche +pastiche's +pastiches +pastier +pasties +pastiest +pastime +pastime's +pastimes +pasting +pastor +pastor's +pastoral +pastoral's +pastorals +pastorate +pastorate's +pastorates +pastors +pastrami +pastrami's +pastries +pastry +pastry's +pasts +pasturage +pasturage's +pasture +pasture's +pastured +pastures +pasturing +pasty +pasty's +pat +pat's +patch +patch's +patched +patches +patchier +patchiest +patchiness +patchiness's +patching +patchwork +patchwork's +patchworks +patchy +pate +pate's +patella +patella's +patellae +patellas +patent +patent's +patented +patenting +patently +patents +paternal +paternalism +paternalism's +paternalistic +paternally +paternity +paternity's +pates +path +path's +pathetic +pathetically +pathogen +pathogen's +pathogenic +pathogens +pathological +pathologically +pathologist +pathologist's +pathologists +pathology +pathology's +pathos +pathos's +paths +pathway +pathway's +pathways +patience +patience's +patient +patient's +patienter +patientest +patiently +patients +patina +patina's +patinae +patinas +patine +patio +patio's +patios +patois +patois's +patriarch +patriarch's +patriarchal +patriarchies +patriarchs +patriarchy +patriarchy's +patrician +patrician's +patricians +patricide +patricide's +patricides +patrimonial +patrimonies +patrimony +patrimony's +patriot +patriot's +patriotic +patriotically +patriotism +patriotism's +patriots +patrol +patrol's +patrolled +patrolling +patrolman +patrolman's +patrolmen +patrols +patrolwoman +patrolwoman's +patrolwomen +patron +patron's +patronage +patronage's +patronages +patronize +patronized +patronizes +patronizing +patronizingly +patrons +patronymic +patronymic's +patronymics +pats +patsies +patsy +patsy's +patted +patter +patter's +pattered +pattering +pattern +pattern's +patterned +patterning +patterns +patters +patties +patting +patty +patty's +paucity +paucity's +paunch +paunch's +paunches +paunchier +paunchiest +paunchy +pauper +pauper's +pauperism +pauperism's +pauperize +pauperized +pauperizes +pauperizing +paupers +pause +pause's +paused +pauses +pausing +pave +paved +pavement +pavement's +pavements +paves +pavilion +pavilion's +pavilions +paving +paving's +pavings +paw +paw's +pawed +pawing +pawl +pawl's +pawls +pawn +pawn's +pawnbroker +pawnbroker's +pawnbrokers +pawned +pawning +pawns +pawnshop +pawnshop's +pawnshops +pawpaw +pawpaw's +pawpaws +paws +pay +pay's +payable +paycheck +paycheck's +paychecks +payday +payday's +paydays +payed +payee +payee's +payees +payer +payer's +payers +paying +payload +payload's +payloads +paymaster +paymaster's +paymasters +payment +payment's +payments +payoff +payoff's +payoffs +payroll +payroll's +payrolls +pays +paywall +paywall's +paywalls +pea +pea's +peace +peace's +peaceable +peaceably +peaceful +peacefully +peacefulness +peacefulness's +peacekeeping +peacekeeping's +peacemaker +peacemaker's +peacemakers +peaces +peacetime +peacetime's +peach +peach's +peaches +peacock +peacock's +peacocks +peafowl +peafowl's +peafowls +peahen +peahen's +peahens +peak +peak's +peaked +peaking +peaks +peal +peal's +pealed +pealing +peals +peanut +peanut's +peanuts +pear +pear's +pearl +pearl's +pearled +pearlier +pearliest +pearling +pearls +pearly +pears +peas +peasant +peasant's +peasantry +peasantry's +peasants +pease +peat +peat's +pebble +pebble's +pebbled +pebbles +pebbling +pebbly +pecan +pecan's +pecans +peccadillo +peccadillo's +peccadilloes +peccadillos +peccaries +peccary +peccary's +peck +peck's +pecked +pecking +pecks +pecs +pectin +pectin's +pectoral +pectoral's +pectorals +peculiar +peculiarities +peculiarity +peculiarity's +peculiarly +pecuniary +pedagog +pedagog's +pedagogic +pedagogical +pedagogs +pedagogue +pedagogue's +pedagogues +pedagogy +pedagogy's +pedal +pedal's +pedaled +pedaling +pedalled +pedalling +pedals +pedant +pedant's +pedantic +pedantically +pedantry +pedantry's +pedants +peddle +peddled +peddler +peddler's +peddlers +peddles +peddling +pederast +pederast's +pederasts +pederasty +pederasty's +pedestal +pedestal's +pedestals +pedestrian +pedestrian's +pedestrianize +pedestrianized +pedestrianizes +pedestrianizing +pedestrians +pediatric +pediatrician +pediatrician's +pediatricians +pediatrics +pediatrics's +pediatrist +pediatrist's +pediatrists +pedicure +pedicure's +pedicured +pedicures +pedicuring +pedigree +pedigree's +pedigreed +pedigrees +pediment +pediment's +pediments +pedlar +pedlar's +pedlars +pedometer +pedometer's +pedometers +pee +pee's +peed +peeing +peek +peek's +peekaboo +peekaboo's +peeked +peeking +peeks +peel +peel's +peeled +peeling +peeling's +peelings +peels +peep +peep's +peeped +peeper +peeper's +peepers +peephole +peephole's +peepholes +peeping +peeps +peer +peer's +peerage +peerage's +peerages +peered +peering +peerless +peers +pees +peeve +peeve's +peeved +peeves +peeving +peevish +peevishly +peevishness +peevishness's +peewee +peewee's +peewees +peg +peg's +pegged +pegging +pegs +pejorative +pejorative's +pejoratives +pekoe +pekoe's +pelagic +pelican +pelican's +pelicans +pellagra +pellagra's +pellet +pellet's +pelleted +pelleting +pellets +pellucid +pelt +pelt's +pelted +pelting +pelts +pelves +pelvic +pelvis +pelvis's +pelvises +pen +pen's +penal +penalize +penalized +penalizes +penalizing +penalties +penalty +penalty's +penance +penance's +penances +pence +penchant +penchant's +penchants +pencil +pencil's +penciled +penciling +pencilled +pencilling +pencils +pendant +pendant's +pendants +pended +pendent +pendent's +pendents +pending +pends +pendulous +pendulum +pendulum's +pendulums +penes +penetrable +penetrate +penetrated +penetrates +penetrating +penetration +penetration's +penetrations +penetrative +penguin +penguin's +penguins +penicillin +penicillin's +penile +peninsula +peninsula's +peninsular +peninsulas +penis +penis's +penises +penitence +penitence's +penitent +penitent's +penitential +penitentiaries +penitentiary +penitentiary's +penitently +penitents +penknife +penknife's +penknives +penlight +penlight's +penlights +penlite +penlite's +penlites +penmanship +penmanship's +pennant +pennant's +pennants +penned +pennies +penniless +penning +pennon +pennon's +pennons +penny +penny's +pennyweight +pennyweight's +pennyweights +penologist +penologist's +penologists +penology +penology's +pens +pension +pension's +pensioned +pensioner +pensioner's +pensioners +pensioning +pensions +pensive +pensively +pensiveness +pensiveness's +pent +pentagon +pentagon's +pentagonal +pentagons +pentameter +pentameter's +pentameters +pentathlon +pentathlon's +pentathlons +penthouse +penthouse's +penthouses +penultimate +penultimate's +penultimates +penurious +penury +penury's +peon +peon's +peonage +peonage's +peonies +peons +peony +peony's +people +people's +peopled +peoples +peopling +pep +pep's +pepped +pepper +pepper's +peppercorn +peppercorn's +peppercorns +peppered +peppering +peppermint +peppermint's +peppermints +pepperoni +pepperoni's +pepperonis +peppers +peppery +peppier +peppiest +pepping +peppy +peps +pepsin +pepsin's +peptic +peptic's +peptics +per +perambulate +perambulated +perambulates +perambulating +perambulator +perambulator's +perambulators +percale +percale's +percales +perceivable +perceive +perceived +perceives +perceiving +percent +percent's +percentage +percentage's +percentages +percentile +percentile's +percentiles +percents +perceptible +perceptibly +perception +perception's +perceptions +perceptive +perceptively +perceptiveness +perceptiveness's +perceptual +perch +perch's +perchance +perched +perches +perching +percolate +percolated +percolates +percolating +percolation +percolation's +percolator +percolator's +percolators +percussion +percussion's +percussionist +percussionist's +percussionists +perdition +perdition's +peregrination +peregrination's +peregrinations +peremptorily +peremptory +perennial +perennial's +perennially +perennials +perfect +perfect's +perfected +perfecter +perfectest +perfectible +perfecting +perfection +perfection's +perfectionism +perfectionism's +perfectionist +perfectionist's +perfectionists +perfections +perfectly +perfects +perfidies +perfidious +perfidy +perfidy's +perforate +perforated +perforates +perforating +perforation +perforation's +perforations +perforce +perform +performance +performance's +performances +performed +performer +performer's +performers +performing +performs +perfume +perfume's +perfumed +perfumeries +perfumery +perfumery's +perfumes +perfuming +perfunctorily +perfunctory +perhaps +pericardia +pericardium +pericardium's +pericardiums +perigee +perigee's +perigees +perihelia +perihelion +perihelion's +perihelions +peril +peril's +periled +periling +perilled +perilling +perilous +perilously +perils +perimeter +perimeter's +perimeters +period +period's +periodic +periodical +periodical's +periodically +periodicals +periodicity +periodontal +periods +peripatetic +peripatetic's +peripatetics +peripheral +peripheral's +peripherals +peripheries +periphery +periphery's +periphrases +periphrasis +periphrasis's +periscope +periscope's +periscopes +perish +perishable +perishable's +perishables +perished +perishes +perishing +peritonea +peritoneum +peritoneum's +peritoneums +peritonitis +peritonitis's +periwig +periwig's +periwigs +periwinkle +periwinkle's +periwinkles +perjure +perjured +perjurer +perjurer's +perjurers +perjures +perjuries +perjuring +perjury +perjury's +perk +perk's +perked +perkier +perkiest +perkiness +perkiness's +perking +perks +perky +perm +perm's +permafrost +permafrost's +permanence +permanence's +permanent +permanent's +permanently +permanents +permeability +permeability's +permeable +permeate +permeated +permeates +permeating +permed +perming +permissible +permissibly +permission +permission's +permissions +permissive +permissively +permissiveness +permissiveness's +permit +permit's +permits +permitted +permitting +perms +permutation +permutation's +permutations +permute +permuted +permutes +permuting +pernicious +perniciously +peroration +peroration's +perorations +peroxide +peroxide's +peroxided +peroxides +peroxiding +perpendicular +perpendicular's +perpendiculars +perpetrate +perpetrated +perpetrates +perpetrating +perpetration +perpetration's +perpetrator +perpetrator's +perpetrators +perpetual +perpetual's +perpetually +perpetuals +perpetuate +perpetuated +perpetuates +perpetuating +perpetuation +perpetuation's +perpetuity +perpetuity's +perplex +perplexed +perplexes +perplexing +perplexities +perplexity +perplexity's +perquisite +perquisite's +perquisites +persecute +persecuted +persecutes +persecuting +persecution +persecution's +persecutions +persecutor +persecutor's +persecutors +perseverance +perseverance's +persevere +persevered +perseveres +persevering +persiflage +persiflage's +persimmon +persimmon's +persimmons +persist +persisted +persistence +persistence's +persistent +persistently +persisting +persists +persnickety +person +person's +persona +persona's +personable +personae +personage +personage's +personages +personal +personal's +personalities +personality +personality's +personalize +personalized +personalizes +personalizing +personally +personals +personification +personification's +personifications +personified +personifies +personify +personifying +personnel +personnel's +persons +perspective +perspective's +perspectives +perspicacious +perspicacity +perspicacity's +perspicuity +perspicuity's +perspicuous +perspiration +perspiration's +perspire +perspired +perspires +perspiring +persuade +persuaded +persuades +persuading +persuasion +persuasion's +persuasions +persuasive +persuasively +persuasiveness +persuasiveness's +pert +pertain +pertained +pertaining +pertains +perter +pertest +pertinacious +pertinacity +pertinacity's +pertinence +pertinence's +pertinent +pertly +pertness +pertness's +perturb +perturbation +perturbation's +perturbations +perturbed +perturbing +perturbs +perusal +perusal's +perusals +peruse +perused +peruses +perusing +pervade +pervaded +pervades +pervading +pervasive +perverse +perversely +perverseness +perverseness's +perversion +perversion's +perversions +perversity +perversity's +pervert +pervert's +perverted +perverting +perverts +peseta +peseta's +pesetas +peskier +peskiest +pesky +peso +peso's +pesos +pessimism +pessimism's +pessimist +pessimist's +pessimistic +pessimistically +pessimists +pest +pest's +pester +pestered +pestering +pesters +pesticide +pesticide's +pesticides +pestilence +pestilence's +pestilences +pestilent +pestle +pestle's +pestled +pestles +pestling +pests +pet +pet's +petal +petal's +petals +petard +petard's +petards +peter +peter's +petered +petering +peters +petiole +petiole's +petioles +petite +petite's +petites +petition +petition's +petitioned +petitioner +petitioner's +petitioners +petitioning +petitions +petrel +petrel's +petrels +petrifaction +petrifaction's +petrified +petrifies +petrify +petrifying +petrochemical +petrochemical's +petrochemicals +petrol +petrol's +petrolatum +petrolatum's +petroleum +petroleum's +pets +petted +petticoat +petticoat's +petticoats +pettier +pettiest +pettifog +pettifogged +pettifogger +pettifogger's +pettifoggers +pettifogging +pettifogs +pettily +pettiness +pettiness's +petting +petty +petulance +petulance's +petulant +petulantly +petunia +petunia's +petunias +pew +pew's +pewee +pewee's +pewees +pews +pewter +pewter's +pewters +peyote +peyote's +phalanges +phalanx +phalanx's +phalanxes +phalli +phallic +phallus +phallus's +phalluses +phantasied +phantasies +phantasm +phantasm's +phantasmagoria +phantasmagoria's +phantasmagorias +phantasms +phantasy +phantasy's +phantasying +phantom +phantom's +phantoms +pharaoh +pharaoh's +pharaohs +pharmaceutical +pharmaceutical's +pharmaceuticals +pharmacies +pharmacist +pharmacist's +pharmacists +pharmacologist +pharmacologist's +pharmacologists +pharmacology +pharmacology's +pharmacopeia +pharmacopeia's +pharmacopeias +pharmacopoeia +pharmacopoeia's +pharmacopoeias +pharmacy +pharmacy's +pharyngeal +pharynges +pharynx +pharynx's +pharynxes +phase +phase's +phased +phases +phasing +pheasant +pheasant's +pheasants +phenobarbital +phenobarbital's +phenomena +phenomenal +phenomenally +phenomenon +phenomenon's +phenomenons +phenotype +pheromone +pheromone's +pheromones +phial +phial's +phials +philander +philandered +philanderer +philanderer's +philanderers +philandering +philanders +philanthropic +philanthropically +philanthropies +philanthropist +philanthropist's +philanthropists +philanthropy +philanthropy's +philatelic +philatelist +philatelist's +philatelists +philately +philately's +philharmonic +philharmonic's +philharmonics +philippic +philippic's +philippics +philistine +philistine's +philistines +philodendra +philodendron +philodendron's +philodendrons +philological +philologist +philologist's +philologists +philology +philology's +philosopher +philosopher's +philosophers +philosophic +philosophical +philosophically +philosophies +philosophize +philosophized +philosophizes +philosophizing +philosophy +philosophy's +philter +philter's +philters +phish +phished +phisher +phisher's +phishers +phishing +phlebitis +phlebitis's +phlegm +phlegm's +phlegmatic +phlegmatically +phloem +phloem's +phlox +phlox's +phloxes +phobia +phobia's +phobias +phobic +phobic's +phobics +phoebe +phoebe's +phoebes +phoenix +phoenix's +phoenixes +phone +phone's +phoned +phoneme +phoneme's +phonemes +phonemic +phones +phonetic +phonetically +phonetician +phonetician's +phoneticians +phonetics +phonetics's +phoney +phoney's +phoneyed +phoneying +phoneys +phonic +phonically +phonics +phonics's +phonied +phonier +phonies +phoniest +phoniness +phoniness's +phoning +phonograph +phonograph's +phonographs +phonological +phonologist +phonologist's +phonologists +phonology +phonology's +phony +phony's +phonying +phooey +phosphate +phosphate's +phosphates +phosphor +phosphor's +phosphorescence +phosphorescence's +phosphorescent +phosphoric +phosphors +phosphorus +phosphorus's +photo +photo's +photocopied +photocopier +photocopier's +photocopiers +photocopies +photocopy +photocopy's +photocopying +photoed +photoelectric +photogenic +photograph +photograph's +photographed +photographer +photographer's +photographers +photographic +photographically +photographing +photographs +photography +photography's +photoing +photojournalism +photojournalism's +photojournalist +photojournalist's +photojournalists +photon +photon's +photons +photos +photosensitive +photosynthesis +photosynthesis's +phototypesetter +phototypesetting +phrasal +phrase +phrase's +phrased +phraseology +phraseology's +phrases +phrasing +phrasing's +phrasings +phrenology +phrenology's +phyla +phylum +phylum's +physic +physic's +physical +physical's +physically +physicals +physician +physician's +physicians +physicist +physicist's +physicists +physicked +physicking +physics +physics's +physiognomies +physiognomy +physiognomy's +physiological +physiologist +physiologist's +physiologists +physiology +physiology's +physiotherapist +physiotherapist's +physiotherapists +physiotherapy +physiotherapy's +physique +physique's +physiques +pi +pi's +pianissimi +pianissimo +pianissimo's +pianissimos +pianist +pianist's +pianists +piano +piano's +pianoforte +pianoforte's +pianofortes +pianos +piazza +piazza's +piazzas +piazze +pica +pica's +picante +picaresque +picayune +piccalilli +piccalilli's +piccolo +piccolo's +piccolos +pick +pick's +pickaback +pickaback's +pickabacked +pickabacking +pickabacks +pickax +pickax's +pickaxe +pickaxe's +pickaxed +pickaxes +pickaxing +picked +picker +picker's +pickerel +pickerel's +pickerels +pickers +picket +picket's +picketed +picketing +pickets +pickier +pickiest +picking +pickings +pickings's +pickle +pickle's +pickled +pickles +pickling +pickpocket +pickpocket's +pickpockets +picks +pickup +pickup's +pickups +picky +picnic +picnic's +picnicked +picnicker +picnicker's +picnickers +picnicking +picnics +pictograph +pictograph's +pictographs +pictorial +pictorial's +pictorially +pictorials +picture +picture's +pictured +pictures +picturesque +picturing +piddle +piddle's +piddled +piddles +piddling +pidgin +pidgin's +pidgins +pie +pie's +piebald +piebald's +piebalds +piece +piece's +pieced +piecemeal +pieces +piecework +piecework's +piecing +pied +pieing +pier +pier's +pierce +pierced +pierces +piercing +piercing's +piercingly +piercings +piers +pies +piety +piety's +piffle +piffle's +pig +pig's +pigeon +pigeon's +pigeonhole +pigeonhole's +pigeonholed +pigeonholes +pigeonholing +pigeons +pigged +piggier +piggies +piggiest +pigging +piggish +piggishness +piggishness's +piggy +piggy's +piggyback +piggyback's +piggybacked +piggybacking +piggybacks +pigheaded +piglet +piglet's +piglets +pigment +pigment's +pigmentation +pigmentation's +pigments +pigmies +pigmy +pigmy's +pigpen +pigpen's +pigpens +pigs +pigskin +pigskin's +pigskins +pigsties +pigsty +pigsty's +pigtail +pigtail's +pigtails +piing +pike +pike's +piked +piker +piker's +pikers +pikes +piking +pilaf +pilaf's +pilaff +pilaff's +pilaffs +pilafs +pilaster +pilaster's +pilasters +pilau +pilau's +pilaus +pilaw +pilaw's +pilaws +pilchard +pilchard's +pilchards +pile +pile's +piled +piles +pileup +pileup's +pileups +pilfer +pilfered +pilferer +pilferer's +pilferers +pilfering +pilfers +pilgrim +pilgrim's +pilgrimage +pilgrimage's +pilgrimages +pilgrims +piling +piling's +pilings +pill +pill's +pillage +pillage's +pillaged +pillages +pillaging +pillar +pillar's +pillars +pillbox +pillbox's +pillboxes +pilled +pilling +pillion +pillion's +pillions +pilloried +pillories +pillory +pillory's +pillorying +pillow +pillow's +pillowcase +pillowcase's +pillowcases +pillowed +pillowing +pillows +pills +pilot +pilot's +piloted +pilothouse +pilothouse's +pilothouses +piloting +pilots +pimento +pimento's +pimentos +pimiento +pimiento's +pimientos +pimp +pimp's +pimped +pimpernel +pimpernel's +pimpernels +pimping +pimple +pimple's +pimples +pimplier +pimpliest +pimply +pimps +pin +pin's +pinafore +pinafore's +pinafores +pinball +pinball's +pincer +pincer's +pincers +pinch +pinch's +pinched +pinches +pinching +pincushion +pincushion's +pincushions +pine +pine's +pineapple +pineapple's +pineapples +pined +pines +pinfeather +pinfeather's +pinfeathers +ping +ping's +pinged +pinging +pings +pinhead +pinhead's +pinheads +pinhole +pinhole's +pinholes +pining +pinion +pinion's +pinioned +pinioning +pinions +pink +pink's +pinked +pinker +pinkest +pinkeye +pinkeye's +pinkie +pinkie's +pinkies +pinking +pinkish +pinks +pinky +pinky's +pinnacle +pinnacle's +pinnacles +pinnate +pinned +pinning +pinochle +pinochle's +pinpoint +pinpoint's +pinpointed +pinpointing +pinpoints +pinprick +pinprick's +pinpricks +pins +pinstripe +pinstripe's +pinstriped +pinstripes +pint +pint's +pinto +pinto's +pintoes +pintos +pints +pinup +pinup's +pinups +pinwheel +pinwheel's +pinwheeled +pinwheeling +pinwheels +pioneer +pioneer's +pioneered +pioneering +pioneers +pious +piously +pip +pip's +pipe +pipe's +piped +pipeline +pipeline's +pipelines +piper +piper's +pipers +pipes +piping +piping's +pipit +pipit's +pipits +pipped +pippin +pippin's +pipping +pippins +pips +pipsqueak +pipsqueak's +pipsqueaks +piquancy +piquancy's +piquant +pique +pique's +piqued +piques +piquing +piracy +piracy's +piranha +piranha's +piranhas +pirate +pirate's +pirated +pirates +piratical +pirating +pirouette +pirouette's +pirouetted +pirouettes +pirouetting +pis +piscatorial +piss +piss's +pissed +pisses +pissing +pistachio +pistachio's +pistachios +pistil +pistil's +pistillate +pistils +pistol +pistol's +pistols +piston +piston's +pistons +pit +pit's +pita +pita's +pitch +pitch's +pitchblende +pitchblende's +pitched +pitcher +pitcher's +pitchers +pitches +pitchfork +pitchfork's +pitchforked +pitchforking +pitchforks +pitching +pitchman +pitchman's +pitchmen +piteous +piteously +pitfall +pitfall's +pitfalls +pith +pith's +pithier +pithiest +pithily +pithy +pitiable +pitiably +pitied +pities +pitiful +pitifully +pitiless +pitilessly +piton +piton's +pitons +pits +pittance +pittance's +pittances +pitted +pitting +pituitaries +pituitary +pituitary's +pity +pity's +pitying +pivot +pivot's +pivotal +pivoted +pivoting +pivots +pixel +pixel's +pixels +pixie +pixie's +pixies +pixy +pixy's +pizazz +pizazz's +pizza +pizza's +pizzas +pizzazz +pizzazz's +pizzeria +pizzeria's +pizzerias +pizzicati +pizzicato +pizzicato's +pizzicatos +pj's +placard +placard's +placarded +placarding +placards +placate +placated +placates +placating +placation +placation's +place +place's +placebo +placebo's +placebos +placed +placeholder +placement +placement's +placements +placenta +placenta's +placentae +placental +placentals +placentas +placer +placer's +placers +places +placid +placidity +placidity's +placidly +placing +placket +placket's +plackets +plagiarism +plagiarism's +plagiarisms +plagiarist +plagiarist's +plagiarists +plagiarize +plagiarized +plagiarizes +plagiarizing +plague +plague's +plagued +plagues +plaguing +plaice +plaid +plaid's +plaids +plain +plain's +plainclothes +plainclothesman +plainclothesman's +plainclothesmen +plainer +plainest +plainly +plainness +plainness's +plains +plaint +plaint's +plaintiff +plaintiff's +plaintiffs +plaintive +plaintively +plaints +plait +plait's +plaited +plaiting +plaits +plan +plan's +planar +plane +plane's +planed +planes +planet +planet's +planetaria +planetarium +planetarium's +planetariums +planetary +planets +plangent +planing +plank +plank's +planked +planking +planking's +planks +plankton +plankton's +planned +planner +planner's +planners +planning +plannings +plans +plant +plant's +plantain +plantain's +plantains +plantation +plantation's +plantations +planted +planter +planter's +planters +planting +planting's +plantings +plants +plaque +plaque's +plaques +plasma +plasma's +plaster +plaster's +plasterboard +plasterboard's +plastered +plasterer +plasterer's +plasterers +plastering +plasters +plastic +plastic's +plasticity +plasticity's +plastics +plastique +plate +plate's +plateau +plateau's +plateaued +plateauing +plateaus +plateaux +plated +plateful +plateful's +platefuls +platelet +platelet's +platelets +platen +platen's +platens +plates +platform +platform's +platformed +platforming +platforms +plating +plating's +platinum +platinum's +platitude +platitude's +platitudes +platitudinous +platonic +platoon +platoon's +platooned +platooning +platoons +platter +platter's +platters +platypi +platypus +platypus's +platypuses +plaudit +plaudit's +plaudits +plausibility +plausibility's +plausible +plausibly +play +play's +playable +playact +playacted +playacting +playacting's +playacts +playback +playback's +playbacks +playbill +playbill's +playbills +playboy +playboy's +playboys +played +player +player's +players +playful +playfully +playfulness +playfulness's +playgoer +playgoer's +playgoers +playground +playground's +playgrounds +playhouse +playhouse's +playhouses +playing +playlist +playlist's +playlists +playmate +playmate's +playmates +playoff +playoff's +playoffs +playpen +playpen's +playpens +playroom +playroom's +playrooms +plays +plaything +plaything's +playthings +playwright +playwright's +playwrights +plaza +plaza's +plazas +plea +plea's +plead +pleaded +pleader +pleader's +pleaders +pleading +pleads +pleas +pleasant +pleasanter +pleasantest +pleasantly +pleasantness +pleasantness's +pleasantries +pleasantry +pleasantry's +please +pleased +pleases +pleasing +pleasingly +pleasings +pleasurable +pleasurably +pleasure +pleasure's +pleasured +pleasures +pleasuring +pleat +pleat's +pleated +pleating +pleats +plebeian +plebeian's +plebeians +plebiscite +plebiscite's +plebiscites +plectra +plectrum +plectrum's +plectrums +pled +pledge +pledge's +pledged +pledges +pledging +plenaries +plenary +plenary's +plenipotentiaries +plenipotentiary +plenipotentiary's +plenitude +plenitude's +plenitudes +plenteous +plentiful +plentifully +plenty +plenty's +plethora +plethora's +pleurisy +pleurisy's +plexus +plexus's +plexuses +pliability +pliability's +pliable +pliancy +pliancy's +pliant +plied +pliers +pliers's +plies +plight +plight's +plighted +plighting +plights +plinth +plinth's +plinths +plod +plodded +plodder +plodder's +plodders +plodding +ploddings +plods +plop +plop's +plopped +plopping +plops +plot +plot's +plots +plotted +plotter +plotter's +plotters +plotting +plough +plough's +ploughed +ploughing +ploughs +ploughshare +ploughshare's +ploughshares +plover +plover's +plovers +plow +plow's +plowed +plowing +plowman +plowman's +plowmen +plows +plowshare +plowshare's +plowshares +ploy +ploy's +ploys +pluck +pluck's +plucked +pluckier +pluckiest +pluckiness +pluckiness's +plucking +plucks +plucky +plug +plug's +plugged +plugging +plugin +plugin's +plugins +plugs +plum +plum's +plumage +plumage's +plumb +plumb's +plumbed +plumber +plumber's +plumbers +plumbing +plumbing's +plumbs +plume +plume's +plumed +plumes +pluming +plummet +plummet's +plummeted +plummeting +plummets +plump +plump's +plumped +plumper +plumpest +plumping +plumpness +plumpness's +plumps +plums +plunder +plunder's +plundered +plunderer +plunderer's +plunderers +plundering +plunders +plunge +plunge's +plunged +plunger +plunger's +plungers +plunges +plunging +plunk +plunk's +plunked +plunking +plunks +pluperfect +pluperfect's +pluperfects +plural +plural's +pluralism +pluralism's +pluralistic +pluralities +plurality +plurality's +pluralize +pluralized +pluralizes +pluralizing +plurals +plus +plus's +pluses +plush +plush's +plusher +plushest +plushier +plushiest +plushy +plusses +plutocracies +plutocracy +plutocracy's +plutocrat +plutocrat's +plutocratic +plutocrats +plutonium +plutonium's +ply +ply's +plying +plywood +plywood's +pneumatic +pneumatically +pneumonia +pneumonia's +poach +poached +poacher +poacher's +poachers +poaches +poaching +pock +pock's +pocked +pocket +pocket's +pocketbook +pocketbook's +pocketbooks +pocketed +pocketful +pocketful's +pocketfuls +pocketing +pocketknife +pocketknife's +pocketknives +pockets +pocking +pockmark +pockmark's +pockmarked +pockmarking +pockmarks +pocks +pod +pod's +podcast +podcast's +podcasting +podcasts +podded +podding +podia +podiatrist +podiatrist's +podiatrists +podiatry +podiatry's +podium +podium's +podiums +pods +poem +poem's +poems +poesy +poesy's +poet +poet's +poetess +poetess's +poetesses +poetic +poetical +poetically +poetry +poetry's +poets +pogrom +pogrom's +pogroms +poi +poi's +poignancy +poignancy's +poignant +poignantly +poinsettia +poinsettia's +poinsettias +point +point's +pointed +pointedly +pointer +pointer's +pointers +pointier +pointiest +pointillism +pointillism's +pointillist +pointillist's +pointillists +pointing +pointless +pointlessly +pointlessness +pointlessness's +points +pointy +poise +poise's +poised +poises +poising +poison +poison's +poisoned +poisoner +poisoner's +poisoners +poisoning +poisoning's +poisonings +poisonous +poisonously +poisons +poke +poke's +poked +poker +poker's +pokers +pokes +pokey +pokey's +pokeys +pokier +pokiest +poking +poky +pol +pol's +polar +polarities +polarity +polarity's +polarization +polarization's +polarize +polarized +polarizes +polarizing +pole +pole's +polecat +polecat's +polecats +poled +polemic +polemic's +polemical +polemics +poles +polestar +polestar's +polestars +police +police's +policed +policeman +policeman's +policemen +polices +policewoman +policewoman's +policewomen +policies +policing +policy +policy's +policyholder +policyholder's +policyholders +poling +polio +polio's +poliomyelitis +poliomyelitis's +polios +polish +polish's +polished +polisher +polisher's +polishers +polishes +polishing +polite +politely +politeness +politeness's +politer +politesse +politesse's +politest +politic +political +politically +politician +politician's +politicians +politicize +politicized +politicizes +politicizing +politico +politico's +politicoes +politicos +politics +politics's +polities +polity +polity's +polka +polka's +polkaed +polkaing +polkas +poll +poll's +polled +pollen +pollen's +pollinate +pollinated +pollinates +pollinating +pollination +pollination's +polling +polliwog +polliwog's +polliwogs +polls +pollster +pollster's +pollsters +pollutant +pollutant's +pollutants +pollute +polluted +polluter +polluter's +polluters +pollutes +polluting +pollution +pollution's +pollywog +pollywog's +pollywogs +polo +polo's +polonaise +polonaise's +polonaises +polonium +polonium's +pols +poltergeist +poltergeist's +poltergeists +poltroon +poltroon's +poltroons +polyamories +polyamory +polyester +polyester's +polyesters +polyethylene +polyethylene's +polygamist +polygamist's +polygamists +polygamous +polygamy +polygamy's +polyglot +polyglot's +polyglots +polygon +polygon's +polygonal +polygons +polygraph +polygraph's +polygraphed +polygraphing +polygraphs +polyhedra +polyhedron +polyhedron's +polyhedrons +polymath +polymath's +polymaths +polymer +polymer's +polymeric +polymerization +polymerization's +polymers +polymorphic +polynomial +polynomial's +polynomials +polyp +polyp's +polyphonic +polyphony +polyphony's +polyps +polystyrene +polystyrene's +polysyllabic +polysyllable +polysyllable's +polysyllables +polytechnic +polytechnic's +polytechnics +polytheism +polytheism's +polytheist +polytheist's +polytheistic +polytheists +polythene +polyunsaturated +pomade +pomade's +pomaded +pomades +pomading +pomegranate +pomegranate's +pomegranates +pommel +pommel's +pommeled +pommeling +pommelled +pommelling +pommels +pomp +pomp's +pompadour +pompadour's +pompadoured +pompadours +pompom +pompom's +pompoms +pompon +pompon's +pompons +pomposity +pomposity's +pompous +pompously +pompousness +pompousness's +poncho +poncho's +ponchos +pond +pond's +ponder +pondered +pondering +ponderous +ponderously +ponders +ponds +pone +pone's +pones +poniard +poniard's +poniards +ponies +pontiff +pontiff's +pontiffs +pontifical +pontificate +pontificate's +pontificated +pontificates +pontificating +pontoon +pontoon's +pontoons +pony +pony's +ponytail +ponytail's +ponytails +pooch +pooch's +pooched +pooches +pooching +poodle +poodle's +poodles +pooh +pooh's +poohed +poohing +poohs +pool +pool's +pooled +pooling +pools +poop +poop's +pooped +pooping +poops +poor +poorer +poorest +poorhouse +poorhouse's +poorhouses +poorly +pop +pop's +popcorn +popcorn's +pope +pope's +popes +popgun +popgun's +popguns +popinjay +popinjay's +popinjays +poplar +poplar's +poplars +poplin +poplin's +popover +popover's +popovers +poppa +poppa's +poppas +popped +poppies +popping +poppy +poppy's +poppycock +poppycock's +pops +populace +populace's +populaces +popular +popularity +popularity's +popularization +popularization's +popularize +popularized +popularizes +popularizing +popularly +populate +populated +populates +populating +population +population's +populations +populism +populism's +populist +populist's +populists +populous +porcelain +porcelain's +porch +porch's +porches +porcine +porcupine +porcupine's +porcupines +pore +pore's +pored +pores +poring +pork +pork's +porn +porn's +porno +porno's +pornographer +pornographer's +pornographers +pornographic +pornography +pornography's +porosity +porosity's +porous +porphyry +porphyry's +porpoise +porpoise's +porpoised +porpoises +porpoising +porridge +porridge's +porringer +porringer's +porringers +port +port's +portability +portability's +portable +portable's +portables +portage +portage's +portaged +portages +portaging +portal +portal's +portals +portcullis +portcullis's +portcullises +ported +portend +portended +portending +portends +portent +portent's +portentous +portentously +portents +porter +porter's +porterhouse +porterhouse's +porterhouses +porters +portfolio +portfolio's +portfolios +porthole +porthole's +portholes +portico +portico's +porticoes +porticos +porting +portion +portion's +portioned +portioning +portions +portlier +portliest +portliness +portliness's +portly +portmanteau +portmanteau's +portmanteaus +portmanteaux +portrait +portrait's +portraitist +portraitist's +portraitists +portraits +portraiture +portraiture's +portray +portrayal +portrayal's +portrayals +portrayed +portraying +portrays +ports +pose +pose's +posed +poser +poser's +posers +poses +poseur +poseur's +poseurs +posh +posher +poshest +posies +posing +posit +posited +positing +position +position's +positional +positioned +positioning +positions +positive +positive's +positively +positives +positivism +positron +positron's +positrons +posits +posse +posse's +posses +possess +possessed +possesses +possessing +possession +possession's +possessions +possessive +possessive's +possessively +possessiveness +possessiveness's +possessives +possessor +possessor's +possessors +possibilities +possibility +possibility's +possible +possible's +possibles +possibly +possum +possum's +possums +post +post's +postage +postage's +postal +postbox +postcard +postcard's +postcards +postcode +postcodes +postdate +postdated +postdates +postdating +postdoc +postdoctoral +posted +poster +poster's +posterior +posterior's +posteriors +posterity +posterity's +posters +postgraduate +postgraduate's +postgraduates +posthaste +posthumous +posthumously +posting +postlude +postlude's +postludes +postman +postman's +postmark +postmark's +postmarked +postmarking +postmarks +postmaster +postmaster's +postmasters +postmen +postmistress +postmistress's +postmistresses +postmodern +postmortem +postmortem's +postmortems +postnatal +postoperative +postpaid +postpartum +postpone +postponed +postponement +postponement's +postponements +postpones +postponing +posts +postscript +postscript's +postscripts +postulate +postulate's +postulated +postulates +postulating +posture +posture's +postured +postures +posturing +postwar +posy +posy's +pot +pot's +potable +potable's +potables +potash +potash's +potassium +potassium's +potato +potato's +potatoes +potbellied +potbellies +potbelly +potbelly's +potboiler +potboiler's +potboilers +potency +potency's +potent +potentate +potentate's +potentates +potential +potential's +potentialities +potentiality +potentiality's +potentially +potentials +potful +potful's +potfuls +potholder +potholder's +potholders +pothole +pothole's +potholes +pothook +pothook's +pothooks +potion +potion's +potions +potluck +potluck's +potlucks +potpie +potpie's +potpies +potpourri +potpourri's +potpourris +pots +potsherd +potsherd's +potsherds +potshot +potshot's +potshots +pottage +pottage's +potted +potter +potter's +pottered +potteries +pottering +potters +pottery +pottery's +pottier +potties +pottiest +potting +potty +potty's +pouch +pouch's +pouched +pouches +pouching +poultice +poultice's +poulticed +poultices +poulticing +poultry +poultry's +pounce +pounce's +pounced +pounces +pouncing +pound +pound's +pounded +pounding +pounds +pour +poured +pouring +pours +pout +pout's +pouted +pouting +pouts +poverty +poverty's +powder +powder's +powdered +powdering +powders +powdery +power +power's +powerboat +powerboat's +powerboats +powered +powerful +powerfully +powerhouse +powerhouse's +powerhouses +powering +powerless +powerlessly +powerlessness +powerlessness's +powers +powwow +powwow's +powwowed +powwowing +powwows +pox +pox's +poxes +practicability +practicability's +practicable +practicably +practical +practical's +practicalities +practicality +practicality's +practically +practicals +practice +practice's +practiced +practices +practicing +practise +practise's +practised +practises +practising +practitioner +practitioner's +practitioners +pragmatic +pragmatic's +pragmatically +pragmatics +pragmatism +pragmatism's +pragmatist +pragmatist's +pragmatists +prairie +prairie's +prairies +praise +praise's +praised +praises +praiseworthiness +praiseworthiness's +praiseworthy +praising +praline +praline's +pralines +pram +prance +prance's +pranced +prancer +prancer's +prancers +prances +prancing +prank +prank's +pranks +prankster +prankster's +pranksters +prate +prate's +prated +prates +pratfall +pratfall's +pratfalls +prating +prattle +prattle's +prattled +prattles +prattling +prawn +prawn's +prawned +prawning +prawns +pray +prayed +prayer +prayer's +prayers +praying +prays +preach +preached +preacher +preacher's +preachers +preaches +preachier +preachiest +preaching +preachy +preamble +preamble's +preambled +preambles +preambling +prearrange +prearranged +prearrangement +prearrangement's +prearranges +prearranging +precarious +precariously +precaution +precaution's +precautionary +precautions +precede +preceded +precedence +precedence's +precedent +precedent's +precedents +precedes +preceding +precept +precept's +preceptor +preceptor's +preceptors +precepts +precinct +precinct's +precincts +preciosity +preciosity's +precious +preciously +preciousness +preciousness's +precipice +precipice's +precipices +precipitant +precipitant's +precipitants +precipitate +precipitate's +precipitated +precipitately +precipitates +precipitating +precipitation +precipitation's +precipitations +precipitous +precipitously +precise +precisely +preciseness +preciseness's +preciser +precises +precisest +precision +precision's +preclude +precluded +precludes +precluding +preclusion +preclusion's +precocious +precociously +precociousness +precociousness's +precocity +precocity's +precognition +preconceive +preconceived +preconceives +preconceiving +preconception +preconception's +preconceptions +precondition +precondition's +preconditioned +preconditioning +preconditions +precursor +precursor's +precursors +predate +predated +predates +predating +predator +predator's +predators +predatory +predecease +predeceased +predeceases +predeceasing +predecessor +predecessor's +predecessors +predefined +predestination +predestination's +predestine +predestined +predestines +predestining +predetermination +predetermination's +predetermine +predetermined +predetermines +predetermining +predicament +predicament's +predicaments +predicate +predicate's +predicated +predicates +predicating +predication +predication's +predicative +predict +predictability +predictable +predictably +predicted +predicting +prediction +prediction's +predictions +predictive +predictor +predicts +predilection +predilection's +predilections +predispose +predisposed +predisposes +predisposing +predisposition +predisposition's +predispositions +predominance +predominance's +predominant +predominantly +predominate +predominated +predominates +predominating +preeminence +preeminence's +preeminent +preeminently +preempt +preempted +preempting +preemption +preemption's +preemptive +preemptively +preempts +preen +preened +preening +preens +preexist +preexisted +preexisting +preexists +prefab +prefab's +prefabbed +prefabbing +prefabricate +prefabricated +prefabricates +prefabricating +prefabrication +prefabrication's +prefabs +preface +preface's +prefaced +prefaces +prefacing +prefatory +prefect +prefect's +prefects +prefecture +prefecture's +prefectures +prefer +preferable +preferably +preference +preference's +preferences +preferential +preferentially +preferment +preferment's +preferred +preferring +prefers +prefigure +prefigured +prefigures +prefiguring +prefix +prefix's +prefixed +prefixes +prefixing +pregnancies +pregnancy +pregnancy's +pregnant +preheat +preheated +preheating +preheats +prehensile +prehistoric +prehistory +prehistory's +prejudge +prejudged +prejudges +prejudging +prejudgment +prejudgment's +prejudgments +prejudice +prejudice's +prejudiced +prejudices +prejudicial +prejudicing +prelate +prelate's +prelates +preliminaries +preliminary +preliminary's +prelude +prelude's +preludes +premarital +premature +prematurely +premeditate +premeditated +premeditates +premeditating +premeditation +premeditation's +premenstrual +premier +premier's +premiere +premiere's +premiered +premieres +premiering +premiers +premise +premise's +premised +premises +premising +premiss +premiss's +premisses +premium +premium's +premiums +premonition +premonition's +premonitions +premonitory +prenatal +prenup +prenup's +prenups +preoccupation +preoccupation's +preoccupations +preoccupied +preoccupies +preoccupy +preoccupying +preordain +preordained +preordaining +preordains +prep +prep's +prepackage +prepackaged +prepackages +prepackaging +prepaid +preparation +preparation's +preparations +preparatory +prepare +prepared +preparedness +preparedness's +prepares +preparing +prepay +prepaying +prepayment +prepayment's +prepayments +prepays +preponderance +preponderance's +preponderances +preponderant +preponderate +preponderated +preponderates +preponderating +preposition +preposition's +prepositional +prepositions +prepossess +prepossessed +prepossesses +prepossessing +preposterous +preposterously +prepped +preppie +preppie's +preppier +preppies +preppiest +prepping +preppy +preppy's +preps +prequel +prequel's +prequels +prerecord +prerecorded +prerecording +prerecords +preregister +preregistered +preregistering +preregisters +preregistration +preregistration's +prerequisite +prerequisite's +prerequisites +prerogative +prerogative's +prerogatives +presage +presage's +presaged +presages +presaging +preschool +preschool's +preschooler +preschooler's +preschoolers +preschools +prescience +prescience's +prescient +prescribe +prescribed +prescribes +prescribing +prescription +prescription's +prescriptions +prescriptive +presence +presence's +presences +present +present's +presentable +presentation +presentation's +presentations +presented +presenter +presentiment +presentiment's +presentiments +presenting +presently +presents +preservation +preservation's +preservative +preservative's +preservatives +preserve +preserve's +preserved +preserver +preserver's +preservers +preserves +preserving +preset +presets +presetting +preshrank +preshrink +preshrinking +preshrinks +preshrunk +preshrunken +preside +presided +presidencies +presidency +presidency's +president +president's +presidential +presidents +presides +presiding +press +press's +pressed +presses +pressing +pressing's +pressings +pressman +pressman's +pressmen +pressure +pressure's +pressured +pressures +pressuring +pressurization +pressurization's +pressurize +pressurized +pressurizes +pressurizing +prestige +prestige's +prestigious +presto +presto's +prestos +presumable +presumably +presume +presumed +presumes +presuming +presumption +presumption's +presumptions +presumptive +presumptuous +presumptuously +presumptuousness +presumptuousness's +presuppose +presupposed +presupposes +presupposing +presupposition +presupposition's +presuppositions +preteen +preteen's +preteens +pretence +pretence's +pretences +pretend +pretended +pretender +pretender's +pretenders +pretending +pretends +pretense +pretense's +pretenses +pretension +pretension's +pretensions +pretentious +pretentiously +pretentiousness +pretentiousness's +preterit +preterit's +preterite +preterite's +preterites +preterits +preternatural +pretext +pretext's +pretexts +prettied +prettier +pretties +prettiest +prettified +prettifies +prettify +prettifying +prettily +prettiness +prettiness's +pretty +pretty's +prettying +pretzel +pretzel's +pretzels +prevail +prevailed +prevailing +prevails +prevalence +prevalence's +prevalent +prevaricate +prevaricated +prevaricates +prevaricating +prevarication +prevarication's +prevarications +prevaricator +prevaricator's +prevaricators +prevent +preventable +preventative +preventative's +preventatives +prevented +preventible +preventing +prevention +prevention's +preventive +preventive's +preventives +prevents +preview +preview's +previewed +previewer +previewers +previewing +previews +previous +previously +prevue +prevue's +prevues +prewar +prey +prey's +preyed +preying +preys +price +price's +priced +priceless +prices +pricey +pricier +priciest +pricing +prick +prick's +pricked +pricking +prickle +prickle's +prickled +prickles +pricklier +prickliest +prickling +prickly +pricks +pricy +pride +pride's +prided +prides +priding +pried +pries +priest +priest's +priestess +priestess's +priestesses +priesthood +priesthood's +priesthoods +priestlier +priestliest +priestly +priests +prig +prig's +priggish +prigs +prim +primacy +primacy's +primaeval +primal +primaries +primarily +primary +primary's +primate +primate's +primates +prime +prime's +primed +primer +primer's +primers +primes +primeval +priming +primitive +primitive's +primitively +primitives +primly +primmer +primmest +primness +primness's +primogeniture +primogeniture's +primordial +primp +primped +primping +primps +primrose +primrose's +primroses +prince +prince's +princelier +princeliest +princely +princes +princess +princess's +princesses +principal +principal's +principalities +principality +principality's +principally +principals +principle +principle's +principled +principles +print +print's +printable +printed +printer +printer's +printers +printing +printing's +printings +printout +printout's +printouts +prints +prior +prior's +prioress +prioress's +prioresses +priories +priorities +prioritize +prioritized +prioritizes +prioritizing +priority +priority's +priors +priory +priory's +prism +prism's +prismatic +prisms +prison +prison's +prisoner +prisoner's +prisoners +prisons +prissier +prissiest +prissiness +prissiness's +prissy +pristine +prithee +privacy +privacy's +private +private's +privateer +privateer's +privateers +privately +privater +privates +privatest +privation +privation's +privations +privatization +privatization's +privatizations +privatize +privatized +privatizes +privatizing +privet +privet's +privets +privier +privies +priviest +privilege +privilege's +privileged +privileges +privileging +privy +privy's +prize +prize's +prized +prizefight +prizefight's +prizefighter +prizefighter's +prizefighters +prizefighting +prizefights +prizes +prizing +pro +pro's +proactive +probabilistic +probabilities +probability +probability's +probable +probable's +probables +probably +probate +probate's +probated +probates +probating +probation +probation's +probationary +probationer +probationer's +probationers +probe +probe's +probed +probes +probing +probity +probity's +problem +problem's +problematic +problematical +problematically +problems +proboscides +proboscis +proboscis's +proboscises +procedural +procedure +procedure's +procedures +proceed +proceeded +proceeding +proceeding's +proceedings +proceeds +proceeds's +process +process's +processed +processes +processing +procession +procession's +processional +processional's +processionals +processioned +processioning +processions +processor +processor's +processors +proclaim +proclaimed +proclaiming +proclaims +proclamation +proclamation's +proclamations +proclivities +proclivity +proclivity's +procrastinate +procrastinated +procrastinates +procrastinating +procrastination +procrastination's +procrastinator +procrastinator's +procrastinators +procreate +procreated +procreates +procreating +procreation +procreation's +procreative +proctor +proctor's +proctored +proctoring +proctors +procurator +procurator's +procurators +procure +procured +procurement +procurement's +procurer +procurer's +procurers +procures +procuring +prod +prod's +prodded +prodding +prodigal +prodigal's +prodigality +prodigality's +prodigals +prodigies +prodigious +prodigiously +prodigy +prodigy's +prods +produce +produce's +produced +producer +producer's +producers +produces +producing +product +product's +production +production's +productions +productive +productively +productiveness +productiveness's +productivity +productivity's +products +prof +prof's +profanation +profanation's +profanations +profane +profaned +profanely +profanes +profaning +profanities +profanity +profanity's +profess +professed +professes +professing +profession +profession's +professional +professional's +professionalism +professionalism's +professionally +professionals +professions +professor +professor's +professorial +professors +professorship +professorship's +professorships +proffer +proffer's +proffered +proffering +proffers +proficiency +proficiency's +proficient +proficient's +proficiently +proficients +profile +profile's +profiled +profiles +profiling +profit +profit's +profitability +profitability's +profitable +profitably +profited +profiteer +profiteer's +profiteered +profiteering +profiteers +profiting +profits +profligacy +profligacy's +profligate +profligate's +profligates +proforma +profound +profounder +profoundest +profoundly +profs +profundities +profundity +profundity's +profuse +profusely +profusion +profusion's +profusions +progenitor +progenitor's +progenitors +progeny +progeny's +progesterone +progesterone's +prognoses +prognosis +prognosis's +prognostic +prognostic's +prognosticate +prognosticated +prognosticates +prognosticating +prognostication +prognostication's +prognostications +prognosticator +prognosticator's +prognosticators +prognostics +program +program's +programed +programer +programer's +programers +programing +programmable +programmable's +programmables +programme +programmed +programmer +programmer's +programmers +programmes +programming +programming's +programs +progress +progress's +progressed +progresses +progressing +progression +progression's +progressions +progressive +progressive's +progressively +progressives +prohibit +prohibited +prohibiting +prohibition +prohibition's +prohibitionist +prohibitionist's +prohibitionists +prohibitions +prohibitive +prohibitively +prohibitory +prohibits +project +project's +projected +projectile +projectile's +projectiles +projecting +projection +projection's +projectionist +projectionist's +projectionists +projections +projector +projector's +projectors +projects +proletarian +proletarian's +proletarians +proletariat +proletariat's +proliferate +proliferated +proliferates +proliferating +proliferation +proliferation's +prolific +prolifically +prolix +prolixity +prolixity's +prolog +prolog's +prologs +prologue +prologue's +prologues +prolong +prolongation +prolongation's +prolongations +prolonged +prolonging +prolongs +prom +prom's +promenade +promenade's +promenaded +promenades +promenading +prominence +prominence's +prominent +prominently +promiscuity +promiscuity's +promiscuous +promiscuously +promise +promise's +promised +promises +promising +promisingly +promissory +promo +promo's +promontories +promontory +promontory's +promos +promote +promoted +promoter +promoter's +promoters +promotes +promoting +promotion +promotion's +promotional +promotions +prompt +prompt's +prompted +prompter +prompter's +prompters +promptest +prompting +prompting's +promptings +promptly +promptness +promptness's +prompts +proms +promulgate +promulgated +promulgates +promulgating +promulgation +promulgation's +prone +proneness +proneness's +prong +prong's +pronged +pronghorn +pronghorn's +pronghorns +prongs +pronoun +pronoun's +pronounce +pronounceable +pronounced +pronouncement +pronouncement's +pronouncements +pronounces +pronouncing +pronouns +pronto +pronunciation +pronunciation's +pronunciations +proof +proof's +proofed +proofing +proofread +proofreader +proofreader's +proofreaders +proofreading +proofreads +proofs +prop +prop's +propaganda +propaganda's +propagandist +propagandist's +propagandists +propagandize +propagandized +propagandizes +propagandizing +propagate +propagated +propagates +propagating +propagation +propagation's +propane +propane's +propel +propellant +propellant's +propellants +propelled +propellent +propellent's +propellents +propeller +propeller's +propellers +propelling +propels +propensities +propensity +propensity's +proper +proper's +properer +properest +properly +propertied +properties +property +property's +prophecies +prophecy +prophecy's +prophesied +prophesies +prophesy +prophesy's +prophesying +prophet +prophet's +prophetess +prophetess's +prophetesses +prophetic +prophetically +prophets +prophylactic +prophylactic's +prophylactics +prophylaxis +prophylaxis's +propinquity +propinquity's +propitiate +propitiated +propitiates +propitiating +propitiation +propitiation's +propitiatory +propitious +proponent +proponent's +proponents +proportion +proportion's +proportional +proportionality +proportionally +proportionals +proportionate +proportionately +proportioned +proportioning +proportions +proposal +proposal's +proposals +propose +proposed +proposer +proposes +proposing +proposition +proposition's +propositional +propositioned +propositioning +propositions +propound +propounded +propounding +propounds +propped +propping +proprietaries +proprietary +proprietary's +proprietor +proprietor's +proprietors +proprietorship +proprietorship's +proprietress +proprietress's +proprietresses +propriety +propriety's +props +propulsion +propulsion's +propulsive +prorate +prorated +prorates +prorating +pros +prosaic +prosaically +proscenia +proscenium +proscenium's +prosceniums +proscribe +proscribed +proscribes +proscribing +proscription +proscription's +proscriptions +prose +prose's +prosecute +prosecuted +prosecutes +prosecuting +prosecution +prosecution's +prosecutions +prosecutor +prosecutor's +prosecutors +proselyte +proselyte's +proselyted +proselytes +proselyting +proselytize +proselytized +proselytizes +proselytizing +prosier +prosiest +prosodies +prosody +prosody's +prospect +prospect's +prospected +prospecting +prospective +prospector +prospector's +prospectors +prospects +prospectus +prospectus's +prospectuses +prosper +prospered +prospering +prosperity +prosperity's +prosperous +prosperously +prospers +prostate +prostate's +prostates +prostheses +prosthesis +prosthesis's +prosthetic +prostitute +prostitute's +prostituted +prostitutes +prostituting +prostitution +prostitution's +prostrate +prostrated +prostrates +prostrating +prostration +prostration's +prostrations +prosy +protagonist +protagonist's +protagonists +protean +protect +protected +protecting +protection +protection's +protections +protective +protectively +protectiveness +protectiveness's +protector +protector's +protectorate +protectorate's +protectorates +protectors +protects +protein +protein's +proteins +protest +protest's +protestant +protestants +protestation +protestation's +protestations +protested +protester +protester's +protesters +protesting +protestor +protestor's +protestors +protests +protocol +protocol's +protocols +proton +proton's +protons +protoplasm +protoplasm's +protoplasmic +prototype +prototype's +prototypes +prototyping +protozoa +protozoan +protozoan's +protozoans +protozoon +protozoon's +protract +protracted +protracting +protraction +protraction's +protractor +protractor's +protractors +protracts +protrude +protruded +protrudes +protruding +protrusion +protrusion's +protrusions +protuberance +protuberance's +protuberances +protuberant +protégé +protégé's +protégés +proud +prouder +proudest +proudly +provable +provably +prove +proved +proven +provenance +provenance's +provender +provender's +proverb +proverb's +proverbial +proverbially +proverbs +proves +provide +provided +providence +providence's +provident +providential +providentially +providently +provider +provider's +providers +provides +providing +province +province's +provinces +provincial +provincial's +provincialism +provincialism's +provincials +proving +provision +provision's +provisional +provisionally +provisioned +provisioning +provisions +proviso +proviso's +provisoes +provisos +provocation +provocation's +provocations +provocative +provocatively +provoke +provoked +provokes +provoking +provost +provost's +provosts +prow +prow's +prowess +prowess's +prowl +prowl's +prowled +prowler +prowler's +prowlers +prowling +prowls +prows +proxies +proximity +proximity's +proxy +proxy's +prude +prude's +prudence +prudence's +prudent +prudential +prudently +prudery +prudery's +prudes +prudish +prudishly +prune +prune's +pruned +prunes +pruning +prurience +prurience's +prurient +pry +pry's +prying +précis +précis's +précised +précising +psalm +psalm's +psalmist +psalmist's +psalmists +psalms +pseudo +pseudonym +pseudonym's +pseudonyms +pshaw +pshaw's +pshaws +psoriasis +psoriasis's +psst +psych +psych's +psyche +psyche's +psyched +psychedelic +psychedelic's +psychedelics +psyches +psychiatric +psychiatrist +psychiatrist's +psychiatrists +psychiatry +psychiatry's +psychic +psychic's +psychical +psychically +psychics +psyching +psycho +psycho's +psychoanalysis +psychoanalysis's +psychoanalyst +psychoanalyst's +psychoanalysts +psychoanalyze +psychoanalyzed +psychoanalyzes +psychoanalyzing +psychobabble +psychobabble's +psychogenic +psychokinesis +psychological +psychologically +psychologies +psychologist +psychologist's +psychologists +psychology +psychology's +psychopath +psychopath's +psychopathic +psychopaths +psychos +psychoses +psychosis +psychosis's +psychosomatic +psychotherapies +psychotherapist +psychotherapist's +psychotherapists +psychotherapy +psychotherapy's +psychotic +psychotic's +psychotics +psychs +ptarmigan +ptarmigan's +ptarmigans +pterodactyl +pterodactyl's +pterodactyls +ptomaine +ptomaine's +ptomaines +pub +pub's +puberty +puberty's +pubescence +pubescence's +pubescent +pubic +public +public's +publican +publican's +publicans +publication +publication's +publications +publicist +publicist's +publicists +publicity +publicity's +publicize +publicized +publicizes +publicizing +publicly +publish +publishable +published +publisher +publisher's +publishers +publishes +publishing +publishing's +pubs +puck +puck's +pucker +pucker's +puckered +puckering +puckers +puckish +pucks +pudding +pudding's +puddings +puddle +puddle's +puddled +puddles +puddling +pudgier +pudgiest +pudgy +pueblo +pueblo's +pueblos +puerile +puerility +puerility's +puff +puff's +puffball +puffball's +puffballs +puffed +puffer +puffier +puffiest +puffin +puffin's +puffiness +puffiness's +puffing +puffins +puffs +puffy +pug +pug's +pugilism +pugilism's +pugilist +pugilist's +pugilistic +pugilists +pugnacious +pugnaciously +pugnacity +pugnacity's +pugs +puke +puke's +puked +pukes +puking +pulchritude +pulchritude's +pull +pull's +pullback +pullback's +pullbacks +pulled +puller +puller's +pullers +pullet +pullet's +pullets +pulley +pulley's +pulleys +pulling +pullout +pullout's +pullouts +pullover +pullover's +pullovers +pulls +pulmonary +pulp +pulp's +pulped +pulpier +pulpiest +pulping +pulpit +pulpit's +pulpits +pulps +pulpy +pulsar +pulsar's +pulsars +pulsate +pulsated +pulsates +pulsating +pulsation +pulsation's +pulsations +pulse +pulse's +pulsed +pulses +pulsing +pulverization +pulverization's +pulverize +pulverized +pulverizes +pulverizing +puma +puma's +pumas +pumice +pumice's +pumices +pummel +pummeled +pummeling +pummelled +pummelling +pummels +pump +pump's +pumped +pumper +pumper's +pumpernickel +pumpernickel's +pumpers +pumping +pumpkin +pumpkin's +pumpkins +pumps +pun +pun's +punch +punch's +punched +punches +punchier +punchiest +punching +punchline +punchy +punctilious +punctiliously +punctual +punctuality +punctuality's +punctually +punctuate +punctuated +punctuates +punctuating +punctuation +punctuation's +puncture +puncture's +punctured +punctures +puncturing +pundit +pundit's +pundits +pungency +pungency's +pungent +pungently +punier +puniest +punish +punishable +punished +punishes +punishing +punishment +punishment's +punishments +punitive +punk +punk's +punker +punkest +punks +punned +punning +puns +punster +punster's +punsters +punt +punt's +punted +punter +punter's +punters +punting +punts +puny +pup +pup's +pupa +pupa's +pupae +pupal +pupas +pupil +pupil's +pupils +pupped +puppet +puppet's +puppeteer +puppeteer's +puppeteers +puppetry +puppetry's +puppets +puppies +pupping +puppy +puppy's +pups +purblind +purchasable +purchase +purchase's +purchased +purchaser +purchaser's +purchasers +purchases +purchasing +pure +purebred +purebred's +purebreds +puree +puree's +pureed +pureeing +purees +purely +pureness +pureness's +purer +purest +purgative +purgative's +purgatives +purgatorial +purgatories +purgatory +purgatory's +purge +purge's +purged +purges +purging +purification +purification's +purified +purifier +purifier's +purifiers +purifies +purify +purifying +purism +purism's +purist +purist's +purists +puritan +puritan's +puritanical +puritanically +puritanism +puritanism's +puritans +purity +purity's +purl +purl's +purled +purling +purloin +purloined +purloining +purloins +purls +purple +purple's +purpler +purples +purplest +purplish +purport +purport's +purported +purportedly +purporting +purports +purpose +purpose's +purposed +purposeful +purposefully +purposeless +purposely +purposes +purposing +purr +purr's +purred +purring +purrs +purse +purse's +pursed +purser +purser's +pursers +purses +pursing +pursuance +pursuance's +pursuant +pursue +pursued +pursuer +pursuer's +pursuers +pursues +pursuing +pursuit +pursuit's +pursuits +purulence +purulence's +purulent +purvey +purveyed +purveying +purveyor +purveyor's +purveyors +purveys +purview +purview's +pus +pus's +push +push's +pushcart +pushcart's +pushcarts +pushed +pusher +pusher's +pushers +pushes +pushier +pushiest +pushiness +pushiness's +pushing +pushover +pushover's +pushovers +pushup +pushup's +pushups +pushy +pusillanimity +pusillanimity's +pusillanimous +puss +puss's +pusses +pussier +pussies +pussiest +pussy +pussy's +pussycat +pussycat's +pussycats +pussyfoot +pussyfooted +pussyfooting +pussyfoots +pustule +pustule's +pustules +put +put's +putative +putrefaction +putrefaction's +putrefied +putrefies +putrefy +putrefying +putrescence +putrescence's +putrescent +putrid +puts +putsch +putsch's +putsches +putt +putt's +putted +putter +putter's +puttered +puttering +putters +puttied +putties +putting +putts +putty +putty's +puttying +puzzle +puzzle's +puzzled +puzzlement +puzzlement's +puzzler +puzzler's +puzzlers +puzzles +puzzling +pwn +pwned +pwning +pwns +pygmies +pygmy +pygmy's +pylon +pylon's +pylons +pyorrhea +pyorrhea's +pyramid +pyramid's +pyramidal +pyramided +pyramiding +pyramids +pyre +pyre's +pyres +pyrite +pyrite's +pyromania +pyromania's +pyromaniac +pyromaniac's +pyromaniacs +pyrotechnic +pyrotechnics +pyrotechnics's +python +python's +pythons +pyx +pyx's +pyxes +q +qua +quack +quack's +quacked +quackery +quackery's +quacking +quacks +quad +quad's +quadrangle +quadrangle's +quadrangles +quadrangular +quadrant +quadrant's +quadrants +quadraphonic +quadratic +quadrature +quadrennial +quadriceps +quadriceps's +quadricepses +quadrilateral +quadrilateral's +quadrilaterals +quadrille +quadrille's +quadrilles +quadriphonic +quadriplegia +quadriplegia's +quadriplegic +quadriplegic's +quadriplegics +quadruped +quadruped's +quadrupeds +quadruple +quadruple's +quadrupled +quadruples +quadruplet +quadruplet's +quadruplets +quadruplicate +quadruplicate's +quadruplicated +quadruplicates +quadruplicating +quadrupling +quads +quaff +quaff's +quaffed +quaffing +quaffs +quagmire +quagmire's +quagmires +quahaug +quahaug's +quahaugs +quahog +quahog's +quahogs +quail +quail's +quailed +quailing +quails +quaint +quainter +quaintest +quaintly +quaintness +quaintness's +quake +quake's +quaked +quakes +quaking +qualification +qualification's +qualifications +qualified +qualifier +qualifier's +qualifiers +qualifies +qualify +qualifying +qualitative +qualitatively +qualities +quality +quality's +qualm +qualm's +qualms +quandaries +quandary +quandary's +quanta +quantified +quantifier +quantifier's +quantifiers +quantifies +quantify +quantifying +quantitative +quantities +quantity +quantity's +quantum +quantum's +quarantine +quarantine's +quarantined +quarantines +quarantining +quark +quark's +quarks +quarrel +quarrel's +quarreled +quarreling +quarrelled +quarrelling +quarrels +quarrelsome +quarried +quarries +quarry +quarry's +quarrying +quart +quart's +quarter +quarter's +quarterback +quarterback's +quarterbacked +quarterbacking +quarterbacks +quarterdeck +quarterdeck's +quarterdecks +quartered +quarterfinal +quarterfinal's +quarterfinals +quartering +quarterlies +quarterly +quarterly's +quartermaster +quartermaster's +quartermasters +quarters +quartet +quartet's +quartets +quartette +quartette's +quartettes +quarto +quarto's +quartos +quarts +quartz +quartz's +quasar +quasar's +quasars +quash +quashed +quashes +quashing +quasi +quatrain +quatrain's +quatrains +quaver +quaver's +quavered +quavering +quavers +quavery +quay +quay's +quays +queasier +queasiest +queasily +queasiness +queasiness's +queasy +queen +queen's +queened +queening +queenlier +queenliest +queenly +queens +queer +queer's +queered +queerer +queerest +queering +queerly +queerness +queerness's +queers +quell +quelled +quelling +quells +quench +quenched +quenches +quenching +queried +queries +querulous +querulously +query +query's +querying +quesadilla +quesadilla's +quesadillas +quest +quest's +quested +questing +question +question's +questionable +questionably +questioned +questioner +questioner's +questioners +questioning +questioningly +questionnaire +questionnaire's +questionnaires +questions +quests +queue +queue's +queued +queues +queuing +quibble +quibble's +quibbled +quibbler +quibbler's +quibblers +quibbles +quibbling +quiche +quiche's +quiches +quick +quick's +quicken +quickened +quickening +quickens +quicker +quickest +quickie +quickie's +quickies +quicklime +quicklime's +quickly +quickness +quickness's +quicksand +quicksand's +quicksands +quicksilver +quicksilver's +quid +quid's +quids +quiescence +quiescence's +quiescent +quiet +quiet's +quieted +quieter +quietest +quieting +quietly +quietness +quietness's +quiets +quietude +quietude's +quietus +quietus's +quietuses +quill +quill's +quills +quilt +quilt's +quilted +quilter +quilter's +quilters +quilting +quilting's +quilts +quince +quince's +quinces +quinine +quinine's +quintessence +quintessence's +quintessences +quintessential +quintet +quintet's +quintets +quintuple +quintuple's +quintupled +quintuples +quintuplet +quintuplet's +quintuplets +quintupling +quip +quip's +quipped +quipping +quips +quire +quire's +quires +quirk +quirk's +quirked +quirkier +quirkiest +quirking +quirks +quirky +quisling +quisling's +quislings +quit +quite +quits +quitted +quitter +quitter's +quitters +quitting +quiver +quiver's +quivered +quivering +quivers +quixotic +quiz +quiz's +quizzed +quizzes +quizzical +quizzically +quizzing +quoit +quoit's +quoited +quoiting +quoits +quondam +quorum +quorum's +quorums +quota +quota's +quotable +quotas +quotation +quotation's +quotations +quote +quote's +quoted +quotes +quoth +quotidian +quotient +quotient's +quotients +quoting +r +rabbi +rabbi's +rabbinate +rabbinate's +rabbinical +rabbis +rabbit +rabbit's +rabbited +rabbiting +rabbits +rabble +rabble's +rabbles +rabid +rabies +rabies's +raccoon +raccoon's +raccoons +race +race's +racecourse +racecourse's +racecourses +raced +racehorse +racehorse's +racehorses +raceme +raceme's +racemes +racer +racer's +racers +races +racetrack +racetrack's +racetracks +raceway +raceway's +raceways +racial +racially +racier +raciest +racily +raciness +raciness's +racing +racing's +racism +racism's +racist +racist's +racists +rack +rack's +racked +racket +racket's +racketed +racketeer +racketeer's +racketeered +racketeering +racketeering's +racketeers +racketing +rackets +racking +racks +raconteur +raconteur's +raconteurs +racoon +racoon's +racoons +racquet +racquet's +racquetball +racquetball's +racquetballs +racquets +racy +radar +radar's +radars +radial +radial's +radially +radials +radiance +radiance's +radiant +radiantly +radiate +radiated +radiates +radiating +radiation +radiation's +radiations +radiator +radiator's +radiators +radical +radical's +radicalism +radicalism's +radically +radicals +radii +radio +radio's +radioactive +radioactivity +radioactivity's +radioed +radiogram +radiogram's +radiograms +radioing +radioisotope +radioisotope's +radioisotopes +radiologist +radiologist's +radiologists +radiology +radiology's +radios +radiotelephone +radiotelephone's +radiotelephones +radiotherapist +radiotherapist's +radiotherapists +radiotherapy +radiotherapy's +radish +radish's +radishes +radium +radium's +radius +radius's +radiuses +radon +radon's +raffia +raffia's +raffish +raffle +raffle's +raffled +raffles +raffling +raft +raft's +rafted +rafter +rafter's +rafters +rafting +rafts +rag +rag's +raga +raga's +ragamuffin +ragamuffin's +ragamuffins +ragas +rage +rage's +raged +rages +ragged +raggeder +raggedest +raggedier +raggediest +raggedly +raggedness +raggedness's +raggedy +ragging +raging +raglan +raglan's +raglans +ragout +ragout's +ragouts +rags +ragtag +ragtags +ragtime +ragtime's +ragweed +ragweed's +raid +raid's +raided +raider +raider's +raiders +raiding +raids +rail +rail's +railed +railing +railing's +railings +railleries +raillery +raillery's +railroad +railroad's +railroaded +railroading +railroads +rails +railway +railway's +railways +raiment +raiment's +rain +rain's +rainbow +rainbow's +rainbows +raincoat +raincoat's +raincoats +raindrop +raindrop's +raindrops +rained +rainfall +rainfall's +rainfalls +rainforest +rainier +rainiest +raining +rainmaker +rainmaker's +rainmakers +rains +rainstorm +rainstorm's +rainstorms +rainwater +rainwater's +rainy +raise +raise's +raised +raises +raisin +raisin's +raising +raisins +raja +raja's +rajah +rajah's +rajahs +rajas +rake +rake's +raked +rakes +raking +rakish +rakishly +rakishness +rakishness's +rallied +rallies +rally +rally's +rallying +ram +ram's +ramble +ramble's +rambled +rambler +rambler's +ramblers +rambles +rambling +rambunctious +rambunctiousness +rambunctiousness's +ramification +ramification's +ramifications +ramified +ramifies +ramify +ramifying +rammed +ramming +ramp +ramp's +rampage +rampage's +rampaged +rampages +rampaging +rampant +rampantly +rampart +rampart's +ramparts +ramps +ramrod +ramrod's +ramrodded +ramrodding +ramrods +rams +ramshackle +ran +ranch +ranch's +ranched +rancher +rancher's +ranchers +ranches +ranching +ranching's +rancid +rancidity +rancidity's +rancor +rancor's +rancorous +rancorously +randier +randiest +random +randomize +randomized +randomizes +randomizing +randomly +randomness +randomness's +randy +rang +range +range's +ranged +ranger +ranger's +rangers +ranges +rangier +rangiest +ranginess +ranginess's +ranging +rangy +rank +rank's +ranked +ranker +rankest +ranking +ranking's +rankings +rankle +rankled +rankles +rankling +rankness +rankness's +ranks +ransack +ransacked +ransacking +ransacks +ransom +ransom's +ransomed +ransoming +ransoms +rant +rant's +ranted +ranter +ranting +rants +rap +rap's +rapacious +rapaciously +rapaciousness +rapaciousness's +rapacity +rapacity's +rape +rape's +raped +rapes +rapid +rapid's +rapider +rapidest +rapidity +rapidity's +rapidly +rapids +rapier +rapier's +rapiers +rapine +rapine's +raping +rapist +rapist's +rapists +rapped +rapper +rapper's +rappers +rapping +rapport +rapport's +rapports +rapprochement +rapprochement's +rapprochements +raps +rapscallion +rapscallion's +rapscallions +rapt +rapture +rapture's +raptures +rapturous +rare +rared +rarefied +rarefies +rarefy +rarefying +rarely +rareness +rareness's +rarer +rares +rarest +raring +rarities +rarity +rarity's +rascal +rascal's +rascally +rascals +rash +rash's +rasher +rasher's +rashers +rashes +rashest +rashly +rashness +rashness's +rasp +rasp's +raspberries +raspberry +raspberry's +rasped +raspier +raspiest +rasping +rasps +raspy +raster +rat +rat's +ratchet +ratchet's +ratcheted +ratcheting +ratchets +rate +rate's +rated +rates +rather +rathskeller +rathskeller's +rathskellers +ratification +ratification's +ratified +ratifies +ratify +ratifying +rating +rating's +ratings +ratio +ratio's +ration +ration's +rational +rational's +rationale +rationale's +rationales +rationalism +rationalism's +rationalist +rationalist's +rationalistic +rationalists +rationality +rationality's +rationalization +rationalization's +rationalizations +rationalize +rationalized +rationalizes +rationalizing +rationally +rationals +rationed +rationing +rations +ratios +rats +rattan +rattan's +rattans +ratted +rattier +rattiest +ratting +rattle +rattle's +rattled +rattler +rattler's +rattlers +rattles +rattlesnake +rattlesnake's +rattlesnakes +rattletrap +rattletrap's +rattletraps +rattling +rattlings +rattrap +rattrap's +rattraps +ratty +raucous +raucously +raucousness +raucousness's +raunchier +raunchiest +raunchiness +raunchiness's +raunchy +ravage +ravage's +ravaged +ravages +ravaging +rave +rave's +raved +ravel +ravel's +raveled +raveling +ravelled +ravelling +ravels +raven +raven's +ravened +ravening +ravenous +ravenously +ravens +raves +ravine +ravine's +ravines +raving +raving's +ravings +ravioli +ravioli's +raviolis +ravish +ravished +ravishes +ravishing +ravishingly +ravishment +ravishment's +raw +raw's +rawboned +rawer +rawest +rawhide +rawhide's +rawness +rawness's +ray +ray's +rayon +rayon's +rays +raze +razed +razes +razing +razor +razor's +razors +razz +razz's +razzed +razzes +razzing +re +re's +reach +reach's +reachable +reached +reaches +reaching +react +reacted +reacting +reaction +reaction's +reactionaries +reactionary +reactionary's +reactions +reactivate +reactivated +reactivates +reactivating +reactivation +reactivation's +reactive +reactor +reactor's +reactors +reacts +read +read's +readabilities +readability +readability's +readable +reader +reader's +readers +readership +readership's +readerships +readied +readier +readies +readiest +readily +readiness +readiness's +reading +reading's +readings +readjust +readjusted +readjusting +readjustment +readjustment's +readjustments +readjusts +readmit +readmits +readmitted +readmitting +readout +readout's +readouts +reads +ready +readying +reaffirm +reaffirmed +reaffirming +reaffirms +reagent +reagent's +reagents +real +real's +realer +reales +realest +realign +realism +realism's +realist +realist's +realistic +realistically +realists +realities +reality +reality's +realizable +realization +realization's +realize +realized +realizes +realizing +reallocate +reallocated +reallocates +reallocating +reallocation +really +realm +realm's +realms +reals +realtor +realtor's +realtors +realty +realty's +ream +ream's +reamed +reamer +reamer's +reamers +reaming +reams +reanimate +reanimated +reanimates +reanimating +reap +reaped +reaper +reaper's +reapers +reaping +reappear +reappearance +reappearance's +reappearances +reappeared +reappearing +reappears +reapplied +reapplies +reapply +reapplying +reappoint +reappointed +reappointing +reappointment +reappointment's +reappoints +reapportion +reapportioned +reapportioning +reapportionment +reapportionment's +reapportions +reappraisal +reappraisal's +reappraisals +reappraise +reappraised +reappraises +reappraising +reaps +rear +rear's +reared +rearing +rearm +rearmament +rearmament's +rearmed +rearming +rearmost +rearms +rearrange +rearranged +rearrangement +rearrangement's +rearrangements +rearranges +rearranging +rears +rearward +rearwards +reason +reason's +reasonable +reasonableness +reasonableness's +reasonably +reasoned +reasoning +reasoning's +reasons +reassemble +reassembled +reassembles +reassembling +reassert +reasserted +reasserting +reasserts +reassess +reassessed +reassesses +reassessing +reassessment +reassessment's +reassessments +reassign +reassigned +reassigning +reassigns +reassurance +reassurance's +reassurances +reassure +reassured +reassures +reassuring +reassuringly +reawaken +reawakened +reawakening +reawakens +rebate +rebate's +rebated +rebates +rebating +rebel +rebel's +rebelled +rebelling +rebellion +rebellion's +rebellions +rebellious +rebelliously +rebelliousness +rebelliousness's +rebels +rebind +rebinding +rebinds +rebirth +rebirth's +rebirths +reborn +rebound +rebound's +rebounded +rebounding +rebounds +rebroadcast +rebroadcast's +rebroadcasted +rebroadcasting +rebroadcasts +rebuff +rebuff's +rebuffed +rebuffing +rebuffs +rebuild +rebuilding +rebuilds +rebuilt +rebuke +rebuke's +rebuked +rebukes +rebuking +rebus +rebus's +rebuses +rebut +rebuts +rebuttal +rebuttal's +rebuttals +rebutted +rebutting +recalcitrance +recalcitrance's +recalcitrant +recall +recall's +recalled +recalling +recalls +recant +recantation +recantation's +recantations +recanted +recanting +recants +recap +recap's +recapitulate +recapitulated +recapitulates +recapitulating +recapitulation +recapitulation's +recapitulations +recapped +recapping +recaps +recapture +recapture's +recaptured +recaptures +recapturing +recast +recast's +recasting +recasts +recede +receded +recedes +receding +receipt +receipt's +receipted +receipting +receipts +receivable +receive +received +receiver +receiver's +receivers +receivership +receivership's +receives +receiving +recent +recenter +recentest +recently +receptacle +receptacle's +receptacles +reception +reception's +receptionist +receptionist's +receptionists +receptions +receptive +receptively +receptiveness +receptiveness's +receptivity +receptivity's +receptor +receptor's +receptors +recess +recess's +recessed +recesses +recessing +recession +recession's +recessional +recessional's +recessionals +recessions +recessive +recessive's +recessives +recharge +recharge's +rechargeable +recharged +recharges +recharging +recheck +recheck's +rechecked +rechecking +rechecks +recherché +recidivism +recidivism's +recidivist +recidivist's +recidivists +recipe +recipe's +recipes +recipient +recipient's +recipients +reciprocal +reciprocal's +reciprocally +reciprocals +reciprocate +reciprocated +reciprocates +reciprocating +reciprocation +reciprocation's +reciprocity +reciprocity's +recital +recital's +recitals +recitation +recitation's +recitations +recitative +recitative's +recitatives +recite +recited +recites +reciting +reckless +recklessly +recklessness +recklessness's +reckon +reckoned +reckoning +reckoning's +reckonings +reckons +reclaim +reclaimed +reclaiming +reclaims +reclamation +reclamation's +reclassified +reclassifies +reclassify +reclassifying +recline +reclined +recliner +recliner's +recliners +reclines +reclining +recluse +recluse's +recluses +reclusive +recognition +recognition's +recognizable +recognizably +recognizance +recognizance's +recognize +recognized +recognizer +recognizes +recognizing +recoil +recoil's +recoiled +recoiling +recoils +recollect +recollected +recollecting +recollection +recollection's +recollections +recollects +recombination +recombine +recombined +recombines +recombining +recommence +recommenced +recommences +recommencing +recommend +recommendation +recommendation's +recommendations +recommended +recommending +recommends +recompense +recompense's +recompensed +recompenses +recompensing +recompilation +recompile +recompiled +recompiling +reconcilable +reconcile +reconciled +reconciles +reconciliation +reconciliation's +reconciliations +reconciling +recondite +recondition +reconditioned +reconditioning +reconditions +reconfiguration +reconfigure +reconfigured +reconnaissance +reconnaissance's +reconnaissances +reconnect +reconnected +reconnecting +reconnects +reconnoiter +reconnoitered +reconnoitering +reconnoiters +reconquer +reconquered +reconquering +reconquers +reconsider +reconsideration +reconsideration's +reconsidered +reconsidering +reconsiders +reconstitute +reconstituted +reconstitutes +reconstituting +reconstruct +reconstructed +reconstructing +reconstruction +reconstruction's +reconstructions +reconstructs +reconvene +reconvened +reconvenes +reconvening +recopied +recopies +recopy +recopying +record +record's +recorded +recorder +recorder's +recorders +recording +recording's +recordings +records +recount +recount's +recounted +recounting +recounts +recoup +recouped +recouping +recoups +recourse +recourse's +recover +recoverable +recovered +recoveries +recovering +recovers +recovery +recovery's +recreant +recreant's +recreants +recreate +recreated +recreates +recreating +recreation +recreation's +recreational +recreations +recriminate +recriminated +recriminates +recriminating +recrimination +recrimination's +recriminations +recrudescence +recrudescence's +recruit +recruit's +recruited +recruiter +recruiter's +recruiters +recruiting +recruitment +recruitment's +recruits +recta +rectal +rectangle +rectangle's +rectangles +rectangular +rectifiable +rectification +rectification's +rectifications +rectified +rectifier +rectifier's +rectifiers +rectifies +rectify +rectifying +rectilinear +rectitude +rectitude's +rector +rector's +rectories +rectors +rectory +rectory's +rectum +rectum's +rectums +recumbent +recuperate +recuperated +recuperates +recuperating +recuperation +recuperation's +recuperative +recur +recurred +recurrence +recurrence's +recurrences +recurrent +recurring +recurs +recursion +recursive +recursively +recyclable +recyclable's +recyclables +recycle +recycle's +recycled +recycles +recycling +recycling's +red +red's +redbreast +redbreast's +redbreasts +redcap +redcap's +redcaps +redcoat +redcoat's +redcoats +redden +reddened +reddening +reddens +redder +reddest +reddish +redecorate +redecorated +redecorates +redecorating +rededicate +rededicated +rededicates +rededicating +redeem +redeemable +redeemed +redeemer +redeemer's +redeemers +redeeming +redeems +redefine +redefined +redefines +redefining +redefinition +redemption +redemption's +redeploy +redeployed +redeploying +redeployment +redeployment's +redeploys +redesign +redesigned +redesigning +redesigns +redevelop +redeveloped +redeveloping +redevelopment +redevelopment's +redevelopments +redevelops +redhead +redhead's +redheaded +redheads +redid +redirect +redirected +redirecting +redirection +redirects +rediscover +rediscovered +rediscovering +rediscovers +rediscovery +rediscovery's +redistribute +redistributed +redistributes +redistributing +redistribution +redistribution's +redistributor +redistributors +redistrict +redistricted +redistricting +redistricts +redneck +redneck's +rednecks +redness +redness's +redo +redoes +redoing +redolence +redolence's +redolent +redone +redouble +redoubled +redoubles +redoubling +redoubt +redoubt's +redoubtable +redoubts +redound +redounded +redounding +redounds +redraft +redrafted +redrafting +redrafts +redraw +redrawing +redrawn +redraws +redress +redress's +redressed +redresses +redressing +redrew +reds +redskin +redskin's +redskins +reduce +reduced +reduces +reducing +reduction +reduction's +reductions +redundancies +redundancy +redundancy's +redundant +redundantly +redwood +redwood's +redwoods +reed +reed's +reedier +reediest +reeds +reeducate +reeducated +reeducates +reeducating +reeducation +reeducation's +reedy +reef +reef's +reefed +reefer +reefer's +reefers +reefing +reefs +reek +reek's +reeked +reeking +reeks +reel +reel's +reelect +reelected +reelecting +reelection +reelection's +reelections +reelects +reeled +reeling +reels +reemerge +reemerged +reemerges +reemerging +reemphasize +reemphasized +reemphasizes +reemphasizing +reenact +reenacted +reenacting +reenactment +reenactment's +reenactments +reenacts +reenforce +reenforced +reenforces +reenforcing +reenlist +reenlisted +reenlisting +reenlists +reenter +reentered +reentering +reenters +reentries +reentry +reentry's +reestablish +reestablished +reestablishes +reestablishing +reevaluate +reevaluated +reevaluates +reevaluating +reeve +reeved +reeves +reeving +reexamine +reexamined +reexamines +reexamining +ref +ref's +refashion +refashioned +refashioning +refashions +refectories +refectory +refectory's +refer +referee +referee's +refereed +refereeing +referees +reference +reference's +referenced +references +referencing +referenda +referendum +referendum's +referendums +referent +referential +referral +referral's +referrals +referred +referring +refers +reffed +reffing +refile +refiled +refiles +refiling +refill +refill's +refillable +refilled +refilling +refills +refinance +refinanced +refinances +refinancing +refine +refined +refinement +refinement's +refinements +refiner +refiner's +refineries +refiners +refinery +refinery's +refines +refining +refinish +refinished +refinishes +refinishing +refit +refit's +refits +refitted +refitting +reflect +reflected +reflecting +reflection +reflection's +reflections +reflective +reflector +reflector's +reflectors +reflects +reflex +reflex's +reflexes +reflexive +reflexive's +reflexively +reflexives +refocus +refocused +refocuses +refocusing +refocussed +refocusses +refocussing +reforest +reforestation +reforestation's +reforested +reforesting +reforests +reform +reform's +reformat +reformation +reformation's +reformations +reformatories +reformatory +reformatory's +reformatted +reformatting +reformed +reformer +reformer's +reformers +reforming +reforms +reformulate +reformulated +reformulates +reformulating +refract +refracted +refracting +refraction +refraction's +refractories +refractory +refractory's +refracts +refrain +refrain's +refrained +refraining +refrains +refresh +refreshed +refresher +refresher's +refreshers +refreshes +refreshing +refreshingly +refreshment +refreshment's +refreshments +refreshments's +refrigerant +refrigerant's +refrigerants +refrigerate +refrigerated +refrigerates +refrigerating +refrigeration +refrigeration's +refrigerator +refrigerator's +refrigerators +refs +refuel +refueled +refueling +refuelled +refuelling +refuels +refuge +refuge's +refugee +refugee's +refugees +refuges +refulgence +refulgence's +refulgent +refund +refund's +refundable +refunded +refunding +refunds +refurbish +refurbished +refurbishes +refurbishing +refurbishment +refurbishment's +refurbishments +refurnish +refurnished +refurnishes +refurnishing +refusal +refusal's +refusals +refuse +refuse's +refused +refuses +refusing +refutation +refutation's +refutations +refute +refuted +refutes +refuting +regain +regained +regaining +regains +regal +regale +regaled +regales +regalia +regalia's +regaling +regally +regard +regard's +regarded +regarding +regardless +regards +regards's +regatta +regatta's +regattas +regencies +regency +regency's +regenerate +regenerated +regenerates +regenerating +regeneration +regeneration's +regenerative +regent +regent's +regents +reggae +reggae's +regicide +regicide's +regicides +regime +regime's +regimen +regimen's +regimens +regiment +regiment's +regimental +regimentation +regimentation's +regimented +regimenting +regiments +regimes +region +region's +regional +regionalism +regionalism's +regionalisms +regionally +regions +register +register's +registered +registering +registers +registrant +registrant's +registrants +registrar +registrar's +registrars +registration +registration's +registrations +registries +registry +registry's +regress +regress's +regressed +regresses +regressing +regression +regression's +regressions +regressive +regret +regret's +regretful +regretfully +regrets +regrettable +regrettably +regretted +regretting +regroup +regrouped +regrouping +regroups +regular +regular's +regularity +regularity's +regularize +regularized +regularizes +regularizing +regularly +regulars +regulate +regulated +regulates +regulating +regulation +regulation's +regulations +regulator +regulator's +regulators +regulatory +regurgitate +regurgitated +regurgitates +regurgitating +regurgitation +regurgitation's +rehab +rehab's +rehabbed +rehabbing +rehabilitate +rehabilitated +rehabilitates +rehabilitating +rehabilitation +rehabilitation's +rehabs +rehash +rehash's +rehashed +rehashes +rehashing +rehearsal +rehearsal's +rehearsals +rehearse +rehearsed +rehearses +rehearsing +reheat +reheated +reheating +reheats +rehire +rehired +rehires +rehiring +reign +reign's +reigned +reigning +reigns +reimburse +reimbursed +reimbursement +reimbursement's +reimbursements +reimburses +reimbursing +reimpose +reimposed +reimposes +reimposing +rein +rein's +reincarnate +reincarnated +reincarnates +reincarnating +reincarnation +reincarnation's +reincarnations +reindeer +reindeer's +reindeers +reined +reinforce +reinforced +reinforcement +reinforcement's +reinforcements +reinforces +reinforcing +reining +reinitialize +reinitialized +reins +reinsert +reinserted +reinserting +reinserts +reinstall +reinstalled +reinstalling +reinstate +reinstated +reinstatement +reinstatement's +reinstates +reinstating +reinterpret +reinterpretation +reinterpretation's +reinterpretations +reinterpreted +reinterpreting +reinterprets +reinvent +reinvented +reinventing +reinvents +reinvest +reinvested +reinvesting +reinvests +reis +reissue +reissue's +reissued +reissues +reissuing +reiterate +reiterated +reiterates +reiterating +reiteration +reiteration's +reiterations +reject +reject's +rejected +rejecting +rejection +rejection's +rejections +rejects +rejoice +rejoiced +rejoices +rejoicing +rejoicing's +rejoicings +rejoin +rejoinder +rejoinder's +rejoinders +rejoined +rejoining +rejoins +rejuvenate +rejuvenated +rejuvenates +rejuvenating +rejuvenation +rejuvenation's +rekindle +rekindled +rekindles +rekindling +relabel +relabeled +relabeling +relabelled +relabelling +relabels +relaid +relapse +relapse's +relapsed +relapses +relapsing +relate +related +relates +relating +relation +relation's +relational +relations +relationship +relationship's +relationships +relative +relative's +relatively +relatives +relativistic +relativity +relativity's +relax +relaxant +relaxant's +relaxants +relaxation +relaxation's +relaxations +relaxed +relaxes +relaxing +relay +relay's +relayed +relaying +relays +relearn +relearned +relearning +relearns +releasable +release +release's +released +releases +releasing +relegate +relegated +relegates +relegating +relegation +relegation's +relent +relented +relenting +relentless +relentlessly +relentlessness +relentlessness's +relents +relevance +relevance's +relevancy +relevancy's +relevant +relevantly +reliability +reliability's +reliable +reliably +reliance +reliance's +reliant +relic +relic's +relics +relied +relief +relief's +reliefs +relies +relieve +relieved +relieves +relieving +religion +religion's +religions +religious +religious's +religiously +relinquish +relinquished +relinquishes +relinquishing +relinquishment +relinquishment's +relish +relish's +relished +relishes +relishing +relive +relived +relives +reliving +reload +reloaded +reloading +reloads +relocatable +relocate +relocated +relocates +relocating +relocation +relocation's +reluctance +reluctance's +reluctant +reluctantly +rely +relying +remade +remain +remainder +remainder's +remaindered +remainders +remained +remaining +remains +remake +remake's +remakes +remaking +remand +remanded +remanding +remands +remark +remark's +remarkable +remarkably +remarked +remarking +remarks +remarriage +remarriage's +remarriages +remarried +remarries +remarry +remarrying +rematch +rematch's +rematches +remediable +remedial +remedied +remedies +remedy +remedy's +remedying +remember +remembered +remembering +remembers +remembrance +remembrance's +remembrances +remind +reminded +reminder +reminder's +reminders +reminding +reminds +reminisce +reminisced +reminiscence +reminiscence's +reminiscences +reminiscent +reminisces +reminiscing +remiss +remission +remission's +remissions +remissness +remissness's +remit +remits +remittance +remittance's +remittances +remitted +remitting +remnant +remnant's +remnants +remodel +remodeled +remodeling +remodelled +remodelling +remodels +remonstrance +remonstrance's +remonstrances +remonstrate +remonstrated +remonstrates +remonstrating +remorse +remorse's +remorseful +remorsefully +remorseless +remorselessly +remortgage +remortgaged +remortgages +remortgaging +remote +remote's +remotely +remoteness +remoteness's +remoter +remotes +remotest +remount +remount's +remounted +remounting +remounts +removable +removal +removal's +removals +remove +remove's +removed +remover +remover's +removers +removes +removing +remunerate +remunerated +remunerates +remunerating +remuneration +remuneration's +remunerations +remunerative +renaissance +renaissance's +renaissances +renal +rename +renamed +renames +renaming +renascence +renascence's +renascences +renascent +rend +render +render's +rendered +rendering +rendering's +renderings +renders +rendezvous +rendezvous's +rendezvoused +rendezvouses +rendezvousing +rending +rendition +rendition's +renditions +rends +renegade +renegade's +renegaded +renegades +renegading +renege +reneged +reneges +reneging +renegotiate +renegotiated +renegotiates +renegotiating +renew +renewable +renewal +renewal's +renewals +renewed +renewing +renews +rennet +rennet's +renounce +renounced +renounces +renouncing +renovate +renovated +renovates +renovating +renovation +renovation's +renovations +renovator +renovator's +renovators +renown +renown's +renowned +rent +rent's +rental +rental's +rentals +rented +renter +renter's +renters +renting +rents +renumber +renumbered +renumbering +renumbers +renunciation +renunciation's +renunciations +reoccupied +reoccupies +reoccupy +reoccupying +reoccur +reoccurred +reoccurring +reoccurs +reopen +reopened +reopening +reopens +reorder +reorder's +reordered +reordering +reorders +reorg +reorg's +reorganization +reorganization's +reorganizations +reorganize +reorganized +reorganizes +reorganizing +reorged +reorging +reorgs +rep +rep's +repackage +repackaged +repackages +repackaging +repaid +repaint +repainted +repainting +repaints +repair +repair's +repairable +repaired +repairing +repairman +repairman's +repairmen +repairs +reparation +reparation's +reparations +reparations's +repartee +repartee's +repast +repast's +repasts +repatriate +repatriate's +repatriated +repatriates +repatriating +repatriation +repatriation's +repay +repayable +repaying +repayment +repayment's +repayments +repays +repeal +repeal's +repealed +repealing +repeals +repeat +repeat's +repeatable +repeatably +repeated +repeatedly +repeater +repeater's +repeaters +repeating +repeats +repel +repellant +repellant's +repellants +repelled +repellent +repellent's +repellents +repelling +repels +repent +repentance +repentance's +repentant +repented +repenting +repents +repercussion +repercussion's +repercussions +repertoire +repertoire's +repertoires +repertories +repertory +repertory's +repetition +repetition's +repetitions +repetitious +repetitive +rephrase +rephrased +rephrases +rephrasing +replace +replaceable +replaced +replacement +replacement's +replacements +replaces +replacing +replay +replay's +replayed +replaying +replays +replenish +replenished +replenishes +replenishing +replenishment +replenishment's +replete +repleted +repletes +repleting +repletion +repletion's +replica +replica's +replicas +replicate +replicated +replicates +replicating +replication +replication's +replications +replied +replies +reply +reply's +replying +report +report's +reportage +reportage's +reported +reportedly +reporter +reporter's +reporters +reporting +reports +repose +repose's +reposed +reposeful +reposes +reposing +repositories +repository +repository's +repossess +repossessed +repossesses +repossessing +repossession +repossession's +repossessions +reprehend +reprehended +reprehending +reprehends +reprehensible +reprehensibly +represent +representation +representation's +representational +representations +representative +representative's +representatives +represented +representing +represents +repress +repressed +represses +repressing +repression +repression's +repressions +repressive +reprieve +reprieve's +reprieved +reprieves +reprieving +reprimand +reprimand's +reprimanded +reprimanding +reprimands +reprint +reprint's +reprinted +reprinting +reprints +reprisal +reprisal's +reprisals +reprise +reprise's +reprises +reprising +reprized +reproach +reproach's +reproached +reproaches +reproachful +reproachfully +reproaching +reprobate +reprobate's +reprobates +reprocess +reprocessed +reprocesses +reprocessing +reproduce +reproduced +reproduces +reproducible +reproducing +reproduction +reproduction's +reproductions +reproductive +reprogram +reprogramed +reprograming +reprogrammed +reprogramming +reprograms +reproof +reproof's +reproofed +reproofing +reproofs +reprove +reproved +reproves +reproving +reps +reptile +reptile's +reptiles +reptilian +reptilian's +reptilians +republic +republic's +republican +republican's +republicanism +republicanism's +republicans +republics +republish +republished +republishes +republishing +repudiate +repudiated +repudiates +repudiating +repudiation +repudiation's +repudiations +repugnance +repugnance's +repugnant +repulse +repulse's +repulsed +repulses +repulsing +repulsion +repulsion's +repulsive +repulsively +repulsiveness +repulsiveness's +reputable +reputably +reputation +reputation's +reputations +repute +repute's +reputed +reputedly +reputes +reputing +request +request's +requested +requester +requesting +requests +requiem +requiem's +requiems +require +required +requirement +requirement's +requirements +requires +requiring +requisite +requisite's +requisites +requisition +requisition's +requisitioned +requisitioning +requisitions +requital +requital's +requite +requited +requites +requiting +reran +reread +rereading +rereads +reroute +rerouted +reroutes +rerouting +rerun +rerun's +rerunning +reruns +resale +resale's +resales +reschedule +rescheduled +reschedules +rescheduling +rescind +rescinded +rescinding +rescinds +rescission +rescission's +rescue +rescue's +rescued +rescuer +rescuer's +rescuers +rescues +rescuing +research +research's +researched +researcher +researcher's +researchers +researches +researching +resell +reselling +resells +resemblance +resemblance's +resemblances +resemble +resembled +resembles +resembling +resend +resent +resented +resentful +resentfully +resenting +resentment +resentment's +resentments +resents +reservation +reservation's +reservations +reserve +reserve's +reserved +reservedly +reserves +reserving +reservist +reservist's +reservists +reservoir +reservoir's +reservoirs +reset +reset's +resets +resetting +resettle +resettled +resettles +resettling +reshuffle +reshuffle's +reshuffled +reshuffles +reshuffling +reside +resided +residence +residence's +residences +residencies +residency +residency's +resident +resident's +residential +residents +resides +residing +residual +residual's +residuals +residue +residue's +residues +resign +resignation +resignation's +resignations +resigned +resignedly +resigning +resigns +resilience +resilience's +resiliency +resiliency's +resilient +resin +resin's +resinous +resins +resist +resist's +resistance +resistance's +resistances +resistant +resisted +resister +resister's +resisters +resisting +resistor +resistor's +resistors +resists +resold +resolute +resolutely +resoluteness +resoluteness's +resolution +resolution's +resolutions +resolve +resolve's +resolved +resolver +resolves +resolving +resonance +resonance's +resonances +resonant +resonantly +resonate +resonated +resonates +resonating +resonator +resonator's +resonators +resort +resort's +resorted +resorting +resorts +resound +resounded +resounding +resoundingly +resounds +resource +resource's +resourceful +resourcefully +resourcefulness +resourcefulness's +resources +respect +respect's +respectability +respectability's +respectable +respectably +respected +respectful +respectfully +respecting +respective +respectively +respects +respell +respelled +respelling +respells +respelt +respiration +respiration's +respirator +respirator's +respirators +respiratory +respire +respired +respires +respiring +respite +respite's +respites +resplendence +resplendence's +resplendent +resplendently +respond +responded +respondent +respondent's +respondents +responding +responds +response +response's +responses +responsibilities +responsibility +responsibility's +responsible +responsibly +responsive +responsively +responsiveness +responsiveness's +rest +rest's +restart +restart's +restarted +restarting +restarts +restate +restated +restatement +restatement's +restatements +restates +restating +restaurant +restaurant's +restauranteur +restauranteur's +restauranteurs +restaurants +restaurateur +restaurateur's +restaurateurs +rested +restful +restfuller +restfullest +restfully +restfulness +restfulness's +resting +restitution +restitution's +restive +restively +restiveness +restiveness's +restless +restlessly +restlessness +restlessness's +restock +restocked +restocking +restocks +restoration +restoration's +restorations +restorative +restorative's +restoratives +restore +restored +restorer +restorer's +restorers +restores +restoring +restrain +restrained +restraining +restrains +restraint +restraint's +restraints +restrict +restricted +restricting +restriction +restriction's +restrictions +restrictive +restrictively +restricts +restroom +restroom's +restrooms +restructure +restructured +restructures +restructuring +restructuring's +restructurings +rests +restudied +restudies +restudy +restudying +resubmit +resubmits +resubmitted +resubmitting +result +result's +resultant +resultant's +resultants +resulted +resulting +results +resume +resume's +resumed +resumes +resuming +resumption +resumption's +resumptions +resupplied +resupplies +resupply +resupplying +resurface +resurfaced +resurfaces +resurfacing +resurgence +resurgence's +resurgences +resurgent +resurrect +resurrected +resurrecting +resurrection +resurrection's +resurrections +resurrects +resuscitate +resuscitated +resuscitates +resuscitating +resuscitation +resuscitation's +resuscitator +resuscitator's +resuscitators +retail +retail's +retailed +retailer +retailer's +retailers +retailing +retails +retain +retained +retainer +retainer's +retainers +retaining +retains +retake +retake's +retaken +retakes +retaking +retaliate +retaliated +retaliates +retaliating +retaliation +retaliation's +retaliations +retaliatory +retard +retard's +retardant +retardant's +retardants +retardation +retardation's +retarded +retarding +retards +retch +retched +retches +retching +retell +retelling +retells +retention +retention's +retentive +retentiveness +retentiveness's +rethink +rethink's +rethinking +rethinks +rethought +reticence +reticence's +reticent +retina +retina's +retinae +retinal +retinas +retinue +retinue's +retinues +retire +retired +retiree +retiree's +retirees +retirement +retirement's +retirements +retires +retiring +retold +retook +retool +retooled +retooling +retools +retort +retort's +retorted +retorting +retorts +retouch +retouch's +retouched +retouches +retouching +retrace +retraced +retraces +retracing +retract +retractable +retracted +retracting +retraction +retraction's +retractions +retracts +retrain +retrained +retraining +retrains +retread +retread's +retreaded +retreading +retreads +retreat +retreat's +retreated +retreating +retreats +retrench +retrenched +retrenches +retrenching +retrenchment +retrenchment's +retrenchments +retrial +retrial's +retrials +retribution +retribution's +retributions +retributive +retried +retries +retrievable +retrieval +retrieval's +retrievals +retrieve +retrieve's +retrieved +retriever +retriever's +retrievers +retrieves +retrieving +retroactive +retroactively +retrod +retrodden +retrofit +retrofit's +retrofits +retrofitted +retrofitting +retrograde +retrograded +retrogrades +retrograding +retrogress +retrogressed +retrogresses +retrogressing +retrogression +retrogression's +retrogressive +retrorocket +retrorocket's +retrorockets +retrospect +retrospect's +retrospected +retrospecting +retrospection +retrospection's +retrospective +retrospective's +retrospectively +retrospectives +retrospects +retry +retrying +return +return's +returnable +returnable's +returnables +returned +returnee +returnee's +returnees +returning +returns +retweet +retweeted +retweeting +retweets +retype +retyped +retypes +retyping +reunification +reunification's +reunified +reunifies +reunify +reunifying +reunion +reunion's +reunions +reunite +reunited +reunites +reuniting +reupholster +reupholstered +reupholstering +reupholsters +reusable +reuse +reuse's +reused +reuses +reusing +rev +rev's +revaluation +revaluation's +revaluations +revalue +revalued +revalues +revaluing +revamp +revamp's +revamped +revamping +revamps +reveal +revealed +revealing +revealings +reveals +reveille +reveille's +revel +revel's +revelation +revelation's +revelations +reveled +reveler +reveler's +revelers +reveling +revelled +reveller +reveller's +revellers +revelling +revelries +revelry +revelry's +revels +revenge +revenge's +revenged +revengeful +revenges +revenging +revenue +revenue's +revenues +reverberate +reverberated +reverberates +reverberating +reverberation +reverberation's +reverberations +revere +revered +reverence +reverence's +reverenced +reverences +reverencing +reverend +reverend's +reverends +reverent +reverential +reverently +reveres +reverie +reverie's +reveries +revering +reversal +reversal's +reversals +reverse +reverse's +reversed +reverses +reversible +reversing +reversion +reversion's +revert +reverted +reverting +reverts +revery +revery's +review +review's +reviewed +reviewer +reviewer's +reviewers +reviewing +reviews +revile +reviled +revilement +revilement's +reviler +reviler's +revilers +reviles +reviling +revise +revise's +revised +revises +revising +revision +revision's +revisions +revisit +revisited +revisiting +revisits +revitalization +revitalization's +revitalize +revitalized +revitalizes +revitalizing +revival +revival's +revivalist +revivalist's +revivalists +revivals +revive +revived +revives +revivification +revivification's +revivified +revivifies +revivify +revivifying +reviving +revocable +revocation +revocation's +revocations +revokable +revoke +revoked +revokes +revoking +revolt +revolt's +revolted +revolting +revoltingly +revolts +revolution +revolution's +revolutionaries +revolutionary +revolutionary's +revolutionist +revolutionist's +revolutionists +revolutionize +revolutionized +revolutionizes +revolutionizing +revolutions +revolve +revolved +revolver +revolver's +revolvers +revolves +revolving +revs +revue +revue's +revues +revulsion +revulsion's +revved +revving +reward +reward's +rewarded +rewarding +rewards +rewind +rewind's +rewindable +rewinding +rewinds +rewire +rewired +rewires +rewiring +reword +reworded +rewording +rewords +rework +reworked +reworking +reworks +rewound +rewrite +rewrite's +rewrites +rewriting +rewritten +rewrote +rhapsodic +rhapsodies +rhapsodize +rhapsodized +rhapsodizes +rhapsodizing +rhapsody +rhapsody's +rhea +rhea's +rheas +rheostat +rheostat's +rheostats +rhetoric +rhetoric's +rhetorical +rhetorically +rhetorician +rhetorician's +rhetoricians +rheum +rheum's +rheumatic +rheumatic's +rheumatics +rheumatism +rheumatism's +rheumy +rhinestone +rhinestone's +rhinestones +rhino +rhino's +rhinoceri +rhinoceros +rhinoceros's +rhinoceroses +rhinos +rhizome +rhizome's +rhizomes +rho +rhodium +rhodium's +rhododendron +rhododendron's +rhododendrons +rhombi +rhomboid +rhomboid's +rhomboids +rhombus +rhombus's +rhombuses +rhubarb +rhubarb's +rhubarbs +rhyme +rhyme's +rhymed +rhymes +rhyming +rhythm +rhythm's +rhythmic +rhythmical +rhythmically +rhythms +rib +rib's +ribald +ribaldry +ribaldry's +ribbed +ribbing +ribbon +ribbon's +ribbons +riboflavin +riboflavin's +ribs +rice +rice's +riced +rices +rich +rich's +richer +riches +richest +richly +richness +richness's +ricing +rick +rick's +ricked +ricketier +ricketiest +rickets +rickets's +rickety +ricking +ricks +ricksha +ricksha's +rickshas +rickshaw +rickshaw's +rickshaws +ricochet +ricochet's +ricocheted +ricocheting +ricochets +ricochetted +ricochetting +ricotta +ricotta's +rid +riddance +riddance's +ridded +ridden +ridding +riddle +riddle's +riddled +riddles +riddling +ride +ride's +rider +rider's +riders +rides +ridge +ridge's +ridged +ridgepole +ridgepole's +ridgepoles +ridges +ridging +ridicule +ridicule's +ridiculed +ridicules +ridiculing +ridiculous +ridiculously +ridiculousness +ridiculousness's +riding +riding's +rids +rife +rifer +rifest +riff +riff's +riffed +riffing +riffle +riffle's +riffled +riffles +riffling +riffraff +riffraff's +riffs +rifle +rifle's +rifled +rifleman +rifleman's +riflemen +rifles +rifling +rift +rift's +rifted +rifting +rifts +rig +rig's +rigamarole +rigamarole's +rigamaroles +rigged +rigging +rigging's +right +right's +righted +righteous +righteously +righteousness +righteousness's +righter +rightest +rightful +rightfully +rightfulness +rightfulness's +righting +rightist +rightist's +rightists +rightly +rightmost +rightness +rightness's +rights +rigid +rigidity +rigidity's +rigidly +rigidness +rigidness's +rigmarole +rigmarole's +rigmaroles +rigor +rigor's +rigorous +rigorously +rigors +rigs +rile +riled +riles +riling +rill +rill's +rills +rim +rim's +rime +rime's +rimed +rimes +riming +rimmed +rimming +rims +rind +rind's +rinds +ring +ring's +ringed +ringer +ringer's +ringers +ringing +ringleader +ringleader's +ringleaders +ringlet +ringlet's +ringlets +ringmaster +ringmaster's +ringmasters +rings +ringside +ringside's +ringtone +ringtone's +ringtones +ringworm +ringworm's +rink +rink's +rinks +rinse +rinse's +rinsed +rinses +rinsing +riot +riot's +rioted +rioter +rioter's +rioters +rioting +rioting's +riotous +riots +rip +rip's +ripe +ripely +ripen +ripened +ripeness +ripeness's +ripening +ripens +riper +ripest +riposte +riposte's +riposted +ripostes +riposting +ripped +ripper +ripper's +rippers +ripping +ripple +ripple's +rippled +ripples +rippling +rips +ripsaw +ripsaw's +ripsaws +rise +rise's +risen +riser +riser's +risers +rises +risible +rising +risk +risk's +risked +riskier +riskiest +riskiness +riskiness's +risking +risks +risky +risqué +rite +rite's +rites +ritual +ritual's +ritualism +ritualism's +ritualistic +ritually +rituals +ritzier +ritziest +ritzy +rival +rival's +rivaled +rivaling +rivalled +rivalling +rivalries +rivalry +rivalry's +rivals +riven +river +river's +riverbed +riverbed's +riverbeds +riverfront +rivers +riverside +riverside's +riversides +rivet +rivet's +riveted +riveter +riveter's +riveters +riveting +rivets +rivetted +rivetting +rivulet +rivulet's +rivulets +roach +roach's +roaches +road +road's +roadbed +roadbed's +roadbeds +roadblock +roadblock's +roadblocked +roadblocking +roadblocks +roadhouse +roadhouse's +roadhouses +roadkill +roadkill's +roadrunner +roadrunner's +roadrunners +roads +roadshow +roadside +roadside's +roadsides +roadster +roadster's +roadsters +roadway +roadway's +roadways +roadwork +roadwork's +roadworthy +roam +roamed +roamer +roamer's +roamers +roaming +roams +roan +roan's +roans +roar +roar's +roared +roaring +roaring's +roars +roast +roast's +roasted +roaster +roaster's +roasters +roasting +roasts +rob +robbed +robber +robber's +robberies +robbers +robbery +robbery's +robbing +robe +robe's +robed +robes +robin +robin's +robing +robins +robocall +robocall's +robocalled +robocalling +robocalls +robot +robot's +robotic +robotics +robotics's +robots +robs +robust +robuster +robustest +robustly +robustness +robustness's +rock +rock's +rocked +rocker +rocker's +rockers +rocket +rocket's +rocketed +rocketing +rocketry +rocketry's +rockets +rockier +rockiest +rockiness +rockiness's +rocking +rocks +rocky +rococo +rococo's +rod +rod's +rode +rodent +rodent's +rodents +rodeo +rodeo's +rodeos +rods +roe +roe's +roebuck +roebuck's +roebucks +roentgen +roentgen's +roentgens +roes +roger +rogered +rogering +rogers +rogue +rogue's +roguery +roguery's +rogues +roguish +roguishly +roil +roiled +roiling +roils +roister +roistered +roisterer +roisterer's +roisterers +roistering +roisters +role +role's +roles +roll +roll's +rollback +rollback's +rollbacks +rolled +roller +roller's +rollers +rollerskating +rollerskating's +rollick +rollicked +rollicking +rollicking's +rollicks +rolling +rolls +romaine +romaine's +roman +romance +romance's +romanced +romances +romancing +romantic +romantic's +romantically +romanticism +romanticism's +romanticist +romanticist's +romanticists +romanticize +romanticized +romanticizes +romanticizing +romantics +romp +romp's +romped +romper +romper's +rompers +romping +romps +rood +rood's +roods +roof +roof's +roofed +roofer +roofer's +roofers +roofing +roofing's +roofs +rooftop +rooftop's +rooftops +rook +rook's +rooked +rookeries +rookery +rookery's +rookie +rookie's +rookies +rooking +rooks +room +room's +roomed +roomer +roomer's +roomers +roomful +roomful's +roomfuls +roomier +roomiest +roominess +roominess's +rooming +roommate +roommate's +roommates +rooms +roomy +roost +roost's +roosted +rooster +rooster's +roosters +roosting +roosts +root +root's +rooted +rooter +rooting +rootless +roots +rope +rope's +roped +ropes +roping +rosaries +rosary +rosary's +rose +rose's +roseate +rosebud +rosebud's +rosebuds +rosebush +rosebush's +rosebushes +rosemary +rosemary's +roses +rosette +rosette's +rosettes +rosewood +rosewood's +rosewoods +rosier +rosiest +rosily +rosin +rosin's +rosined +rosiness +rosiness's +rosining +rosins +roster +roster's +rosters +rostra +rostrum +rostrum's +rostrums +rosy +rot +rot's +rotaries +rotary +rotary's +rotate +rotated +rotates +rotating +rotation +rotation's +rotational +rotations +rote +rote's +rotisserie +rotisserie's +rotisseries +rotogravure +rotogravure's +rotogravures +rotor +rotor's +rotors +rots +rotted +rotten +rottener +rottenest +rottenness +rottenness's +rotting +rotund +rotunda +rotunda's +rotundas +rotundity +rotundity's +rotundness +rotundness's +rouge +rouge's +rouged +rouges +rough +rough's +roughage +roughage's +roughed +roughen +roughened +roughening +roughens +rougher +roughest +roughhouse +roughhouse's +roughhoused +roughhouses +roughhousing +roughing +roughly +roughneck +roughneck's +roughnecked +roughnecking +roughnecks +roughness +roughness's +roughs +roughshod +rouging +roulette +roulette's +round +round's +roundabout +roundabout's +roundabouts +rounded +roundelay +roundelay's +roundelays +rounder +roundest +roundhouse +roundhouse's +roundhouses +rounding +roundish +roundly +roundness +roundness's +rounds +roundup +roundup's +roundups +roundworm +roundworm's +roundworms +rouse +roused +rouses +rousing +roustabout +roustabout's +roustabouts +rout +rout's +route +route's +routed +routeing +router +routes +routine +routine's +routinely +routines +routing +routinize +routinized +routinizes +routinizing +routs +roué +roué's +roués +rove +roved +rover +rover's +rovers +roves +roving +row +row's +rowboat +rowboat's +rowboats +rowdier +rowdies +rowdiest +rowdiness +rowdiness's +rowdy +rowdy's +rowdyism +rowdyism's +rowed +rowel +rowel's +roweled +roweling +rowelled +rowelling +rowels +rower +rower's +rowers +rowing +rowing's +rows +royal +royal's +royalist +royalist's +royalists +royally +royals +royalties +royalties's +royalty +royalty's +rs +rub +rub's +rubbed +rubber +rubber's +rubberize +rubberized +rubberizes +rubberizing +rubberneck +rubberneck's +rubbernecked +rubbernecking +rubbernecks +rubbers +rubbery +rubbing +rubbish +rubbish's +rubbished +rubbishes +rubbishing +rubbishy +rubble +rubble's +rubdown +rubdown's +rubdowns +rube +rube's +rubella +rubella's +rubes +rubicund +rubier +rubies +rubiest +ruble +ruble's +rubles +rubric +rubric's +rubrics +rubs +ruby +ruby's +rucksack +rucksack's +rucksacks +ruckus +ruckus's +ruckuses +rudder +rudder's +rudders +ruddier +ruddiest +ruddiness +ruddiness's +ruddy +rude +rudely +rudeness +rudeness's +ruder +rudest +rudiment +rudiment's +rudimentary +rudiments +rue +rue's +rued +rueful +ruefully +rues +ruff +ruff's +ruffed +ruffian +ruffian's +ruffians +ruffing +ruffle +ruffle's +ruffled +ruffles +ruffling +ruffs +rug +rug's +rugby +rugby's +rugged +ruggeder +ruggedest +ruggedly +ruggedness +ruggedness's +rugrat +rugrat's +rugrats +rugs +ruin +ruin's +ruination +ruination's +ruined +ruing +ruining +ruinous +ruinously +ruins +rule +rule's +ruled +ruler +ruler's +rulers +rules +ruling +ruling's +rulings +rum +rum's +rumba +rumba's +rumbaed +rumbaing +rumbas +rumble +rumble's +rumbled +rumbles +rumbling +rumbling's +rumblings +ruminant +ruminant's +ruminants +ruminate +ruminated +ruminates +ruminating +rumination +rumination's +ruminations +rummage +rummage's +rummaged +rummages +rummaging +rummer +rummest +rummy +rummy's +rumor +rumor's +rumored +rumoring +rumors +rump +rump's +rumple +rumple's +rumpled +rumples +rumpling +rumps +rumpus +rumpus's +rumpuses +rums +run +run's +runabout +runabout's +runabouts +runaround +runaround's +runarounds +runaway +runaway's +runaways +rundown +rundown's +rundowns +rune +rune's +runes +rung +rung's +rungs +runnel +runnel's +runnels +runner +runner's +runners +runnier +runniest +running +running's +runny +runoff +runoff's +runoffs +runs +runt +runt's +runts +runway +runway's +runways +rupee +rupee's +rupees +rupture +rupture's +ruptured +ruptures +rupturing +rural +ruse +ruse's +ruses +rush +rush's +rushed +rushes +rushing +rusk +rusk's +rusks +russet +russet's +russets +rust +rust's +rusted +rustic +rustic's +rustically +rusticity +rusticity's +rustics +rustier +rustiest +rustiness +rustiness's +rusting +rustle +rustle's +rustled +rustler +rustler's +rustlers +rustles +rustling +rustproof +rustproofed +rustproofing +rustproofs +rusts +rusty +rut +rut's +rutabaga +rutabaga's +rutabagas +ruthless +ruthlessly +ruthlessness +ruthlessness's +ruts +rutted +rutting +rye +rye's +s +sabbatical +sabbatical's +sabbaticals +saber +saber's +sabers +sable +sable's +sables +sabotage +sabotage's +sabotaged +sabotages +sabotaging +saboteur +saboteur's +saboteurs +sabre +sabre's +sabres +sac +sac's +saccharin +saccharin's +saccharine +sacerdotal +sachem +sachem's +sachems +sachet +sachet's +sachets +sack +sack's +sackcloth +sackcloth's +sacked +sackful +sackful's +sackfuls +sacking +sacking's +sacks +sacrament +sacrament's +sacramental +sacraments +sacred +sacredly +sacredness +sacredness's +sacrifice +sacrifice's +sacrificed +sacrifices +sacrificial +sacrificing +sacrilege +sacrilege's +sacrileges +sacrilegious +sacristan +sacristan's +sacristans +sacristies +sacristy +sacristy's +sacrosanct +sacs +sad +sadden +saddened +saddening +saddens +sadder +saddest +saddle +saddle's +saddlebag +saddlebag's +saddlebags +saddled +saddles +saddling +sades +sadism +sadism's +sadist +sadist's +sadistic +sadistically +sadists +sadly +sadness +sadness's +safari +safari's +safaried +safariing +safaris +safe +safe's +safeguard +safeguard's +safeguarded +safeguarding +safeguards +safekeeping +safekeeping's +safely +safeness +safeness's +safer +safes +safest +safeties +safety +safety's +safflower +safflower's +safflowers +saffron +saffron's +saffrons +sag +sag's +saga +saga's +sagacious +sagacity +sagacity's +sagas +sage +sage's +sagebrush +sagebrush's +sager +sages +sagest +sagged +sagging +sago +sago's +sags +saguaro +saguaro's +saguaros +sahib +sahib's +sahibs +said +sail +sail's +sailboard +sailboard's +sailboards +sailboat +sailboat's +sailboats +sailcloth +sailcloth's +sailed +sailfish +sailfish's +sailfishes +sailing +sailing's +sailings +sailor +sailor's +sailors +sails +saint +saint's +sainthood +sainthood's +saintlier +saintliest +saintliness +saintliness's +saintly +saints +saith +sake +sake's +saki +saki's +salaam +salaam's +salaamed +salaaming +salaams +salable +salacious +salaciously +salaciousness +salaciousness's +salad +salad's +salads +salamander +salamander's +salamanders +salami +salami's +salamis +salaried +salaries +salary +salary's +sale +sale's +saleable +sales +salesclerk +salesclerk's +salesclerks +salesgirl +salesgirl's +salesgirls +salesman +salesman's +salesmanship +salesmanship's +salesmen +salespeople +salespeople's +salesperson +salesperson's +salespersons +saleswoman +saleswoman's +saleswomen +salience +salience's +salient +salient's +salients +saline +saline's +salines +salinity +salinity's +saliva +saliva's +salivary +salivate +salivated +salivates +salivating +salivation +salivation's +sallied +sallies +sallow +sallower +sallowest +sally +sally's +sallying +salmon +salmon's +salmonella +salmonella's +salmonellae +salmonellas +salmons +salon +salon's +salons +saloon +saloon's +saloons +salsa +salsa's +salsas +salt +salt's +saltcellar +saltcellar's +saltcellars +salted +salter +saltest +saltier +saltiest +saltine +saltine's +saltines +saltiness +saltiness's +salting +saltpeter +saltpeter's +saltpetre +saltpetre's +salts +saltshaker +saltshaker's +saltshakers +saltwater +saltwater's +salty +salubrious +salutary +salutation +salutation's +salutations +salute +salute's +saluted +salutes +saluting +salvage +salvage's +salvageable +salvaged +salvages +salvaging +salvation +salvation's +salve +salve's +salved +salver +salver's +salvers +salves +salving +salvo +salvo's +salvoes +salvos +samba +samba's +sambaed +sambaing +sambas +same +sameness +sameness's +sames +samovar +samovar's +samovars +sampan +sampan's +sampans +sample +sample's +sampled +sampler +sampler's +samplers +samples +sampling +sampling's +samplings +samurai +samurai's +sanatoria +sanatorium +sanatorium's +sanatoriums +sancta +sanctification +sanctification's +sanctified +sanctifies +sanctify +sanctifying +sanctimonious +sanctimoniously +sanction +sanction's +sanctioned +sanctioning +sanctions +sanctity +sanctity's +sanctuaries +sanctuary +sanctuary's +sanctum +sanctum's +sanctums +sand +sand's +sandal +sandal's +sandals +sandalwood +sandalwood's +sandbag +sandbag's +sandbagged +sandbagging +sandbags +sandbank +sandbank's +sandbanks +sandbar +sandbar's +sandbars +sandblast +sandblast's +sandblasted +sandblaster +sandblaster's +sandblasters +sandblasting +sandblasts +sandbox +sandbox's +sandboxes +sandcastle +sandcastle's +sandcastles +sanded +sander +sander's +sanders +sandhog +sandhog's +sandhogs +sandier +sandiest +sandiness +sandiness's +sanding +sandlot +sandlot's +sandlots +sandman +sandman's +sandmen +sandpaper +sandpaper's +sandpapered +sandpapering +sandpapers +sandpiper +sandpiper's +sandpipers +sands +sandstone +sandstone's +sandstorm +sandstorm's +sandstorms +sandwich +sandwich's +sandwiched +sandwiches +sandwiching +sandy +sane +sanely +saner +sanest +sang +sangfroid +sangfroid's +sanguinary +sanguine +sanitaria +sanitarium +sanitarium's +sanitariums +sanitary +sanitation +sanitation's +sanitize +sanitized +sanitizes +sanitizing +sanity +sanity's +sank +sans +sanserif +sap +sap's +sapience +sapience's +sapient +sapling +sapling's +saplings +sapped +sapphire +sapphire's +sapphires +sappier +sappiest +sapping +sappy +saprophyte +saprophyte's +saprophytes +saps +sapsucker +sapsucker's +sapsuckers +sarape +sarape's +sarapes +sarcasm +sarcasm's +sarcasms +sarcastic +sarcastically +sarcoma +sarcoma's +sarcomas +sarcomata +sarcophagi +sarcophagus +sarcophagus's +sarcophaguses +sardine +sardine's +sardines +sardonic +sardonically +saree +saree's +sarees +sari +sari's +saris +sarong +sarong's +sarongs +sarsaparilla +sarsaparilla's +sarsaparillas +sartorial +sartorially +sash +sash's +sashay +sashay's +sashayed +sashaying +sashays +sashes +sass +sass's +sassafras +sassafras's +sassafrases +sassed +sasses +sassier +sassiest +sassing +sassy +sat +satanic +satanically +satanism +satanism's +satay +satchel +satchel's +satchels +sate +sated +sateen +sateen's +satellite +satellite's +satellited +satellites +satelliting +sates +satiate +satiated +satiates +satiating +satiety +satiety's +satin +satin's +sating +satinwood +satinwood's +satinwoods +satiny +satire +satire's +satires +satirical +satirically +satirist +satirist's +satirists +satirize +satirized +satirizes +satirizing +satisfaction +satisfaction's +satisfactions +satisfactorily +satisfactory +satisfied +satisfies +satisfy +satisfying +satrap +satrap's +satraps +saturate +saturated +saturates +saturating +saturation +saturation's +saturnine +satyr +satyr's +satyrs +sauce +sauce's +sauced +saucepan +saucepan's +saucepans +saucer +saucer's +saucers +sauces +saucier +sauciest +saucily +sauciness +sauciness's +saucing +saucy +sauerkraut +sauerkraut's +sauna +sauna's +saunaed +saunaing +saunas +saunter +saunter's +sauntered +sauntering +saunters +sausage +sausage's +sausages +sauted +sauté +sauté's +sautéed +sautéing +sautés +savage +savage's +savaged +savagely +savageness +savageness's +savager +savageries +savagery +savagery's +savages +savagest +savaging +savanna +savanna's +savannah +savannah's +savannahes +savannahs +savannas +savant +savant's +savants +save +save's +saved +saver +saver's +savers +saves +saving +saving's +savings +savings's +savior +savior's +saviors +saviour +saviour's +saviours +savor +savor's +savored +savorier +savories +savoriest +savoring +savors +savory +savory's +savvied +savvier +savvies +savviest +savvy +savvy's +savvying +saw +saw's +sawdust +sawdust's +sawed +sawhorse +sawhorse's +sawhorses +sawing +sawmill +sawmill's +sawmills +sawn +saws +sawyer +sawyer's +sawyers +sax +sax's +saxes +saxophone +saxophone's +saxophones +saxophonist +saxophonist's +saxophonists +say +say's +saying +saying's +sayings +says +scab +scab's +scabbard +scabbard's +scabbards +scabbed +scabbier +scabbiest +scabbing +scabby +scabies +scabies's +scabrous +scabs +scad +scad's +scads +scaffold +scaffold's +scaffolding +scaffolding's +scaffolds +scalar +scalars +scalawag +scalawag's +scalawags +scald +scald's +scalded +scalding +scalds +scale +scale's +scaled +scalene +scales +scalier +scaliest +scaling +scallion +scallion's +scallions +scallop +scallop's +scalloped +scalloping +scallops +scallywag +scallywag's +scallywags +scalp +scalp's +scalped +scalpel +scalpel's +scalpels +scalper +scalper's +scalpers +scalping +scalps +scaly +scam +scam's +scammed +scammer +scammers +scamming +scamp +scamp's +scamper +scamper's +scampered +scampering +scampers +scampi +scampi's +scampies +scamps +scams +scan +scan's +scandal +scandal's +scandalize +scandalized +scandalizes +scandalizing +scandalmonger +scandalmonger's +scandalmongers +scandalous +scandalously +scandals +scanned +scanner +scanner's +scanners +scanning +scans +scansion +scansion's +scant +scanted +scanter +scantest +scantier +scanties +scantiest +scantily +scantiness +scantiness's +scanting +scants +scanty +scapegoat +scapegoat's +scapegoated +scapegoating +scapegoats +scapula +scapula's +scapulae +scapulas +scar +scar's +scarab +scarab's +scarabs +scarce +scarcely +scarceness +scarceness's +scarcer +scarcest +scarcity +scarcity's +scare +scare's +scarecrow +scarecrow's +scarecrows +scared +scares +scarf +scarf's +scarfed +scarfing +scarfs +scarier +scariest +scarified +scarifies +scarify +scarifying +scaring +scarlet +scarlet's +scarred +scarring +scars +scarves +scary +scat +scat's +scathing +scathingly +scatological +scats +scatted +scatter +scatter's +scatterbrain +scatterbrain's +scatterbrained +scatterbrains +scattered +scattering +scatters +scatting +scavenge +scavenged +scavenger +scavenger's +scavengers +scavenges +scavenging +scenario +scenario's +scenarios +scene +scene's +scenery +scenery's +scenes +scenic +scenically +scent +scent's +scented +scenting +scents +scepter +scepter's +scepters +schedule +schedule's +scheduled +scheduler +schedulers +schedules +scheduling +schema +schematic +schematic's +schematically +schematics +scheme +scheme's +schemed +schemer +schemer's +schemers +schemes +scheming +scherzi +scherzo +scherzo's +scherzos +schism +schism's +schismatic +schismatic's +schismatics +schisms +schist +schist's +schizoid +schizoid's +schizoids +schizophrenia +schizophrenia's +schizophrenic +schizophrenic's +schizophrenics +schlemiel +schlemiel's +schlemiels +schlep +schlep's +schlepp +schlepp's +schlepped +schlepping +schlepps +schleps +schlock +schlock's +schlocky +schmaltz +schmaltz's +schmaltzier +schmaltziest +schmaltzy +schmalz +schmalz's +schmalzy +schmooze +schmoozed +schmoozes +schmoozing +schmuck +schmuck's +schmucks +schnapps +schnapps's +schnauzer +schnauzer's +schnauzers +scholar +scholar's +scholarly +scholars +scholarship +scholarship's +scholarships +scholastic +scholastically +school +school's +schoolbook +schoolbook's +schoolbooks +schoolboy +schoolboy's +schoolboys +schoolchild +schoolchild's +schoolchildren +schoolchildren's +schooldays +schooled +schoolgirl +schoolgirl's +schoolgirls +schoolhouse +schoolhouse's +schoolhouses +schooling +schooling's +schoolmarm +schoolmarm's +schoolmarms +schoolmaster +schoolmaster's +schoolmasters +schoolmate +schoolmate's +schoolmates +schoolmistress +schoolmistress's +schoolmistresses +schoolroom +schoolroom's +schoolrooms +schools +schoolteacher +schoolteacher's +schoolteachers +schoolwork +schoolwork's +schoolyard +schoolyard's +schoolyards +schooner +schooner's +schooners +schrod +schrod's +schrods +schtick +schtick's +schticks +schuss +schuss's +schussed +schusses +schussing +schwa +schwa's +schwas +sciatic +sciatica +sciatica's +science +science's +sciences +scientific +scientifically +scientist +scientist's +scientists +scimitar +scimitar's +scimitars +scintilla +scintilla's +scintillas +scintillate +scintillated +scintillates +scintillating +scintillation +scintillation's +scion +scion's +scions +scissor +scissors +sclerosis +sclerosis's +sclerotic +scoff +scoff's +scoffed +scoffing +scofflaw +scofflaw's +scofflaws +scoffs +scold +scold's +scolded +scolding +scolding's +scoldings +scolds +scoliosis +scoliosis's +scollop +scollop's +scolloped +scolloping +scollops +sconce +sconce's +sconces +scone +scone's +scones +scoop +scoop's +scooped +scooping +scoops +scoot +scooted +scooter +scooter's +scooters +scooting +scoots +scope +scope's +scoped +scopes +scoping +scorch +scorch's +scorched +scorcher +scorcher's +scorchers +scorches +scorching +score +score's +scoreboard +scoreboard's +scoreboards +scorecard +scorecard's +scorecards +scored +scoreless +scorer +scorer's +scorers +scores +scoring +scorn +scorn's +scorned +scornful +scornfully +scorning +scorns +scorpion +scorpion's +scorpions +scotch +scotch's +scotched +scotches +scotching +scotchs +scoundrel +scoundrel's +scoundrels +scour +scoured +scourge +scourge's +scourged +scourges +scourging +scouring +scours +scout +scout's +scouted +scouting +scouting's +scoutmaster +scoutmaster's +scoutmasters +scouts +scow +scow's +scowl +scowl's +scowled +scowling +scowls +scows +scrabble +scrabble's +scrabbled +scrabbles +scrabbling +scragglier +scraggliest +scraggly +scram +scramble +scramble's +scrambled +scrambler +scrambler's +scramblers +scrambles +scrambling +scrammed +scramming +scrams +scrap +scrap's +scrapbook +scrapbook's +scrapbooks +scrape +scrape's +scraped +scraper +scraper's +scrapers +scrapes +scraping +scrapped +scrappier +scrappiest +scrapping +scrappy +scraps +scratch +scratch's +scratched +scratches +scratchier +scratchiest +scratchiness +scratchiness's +scratching +scratchy +scrawl +scrawl's +scrawled +scrawling +scrawls +scrawnier +scrawniest +scrawny +scream +scream's +screamed +screaming +screams +screech +screech's +screeched +screeches +screechier +screechiest +screeching +screechy +screen +screen's +screened +screening +screening's +screenings +screenplay +screenplay's +screenplays +screens +screenshot +screenshots +screenwriter +screenwriter's +screenwriters +screw +screw's +screwball +screwball's +screwballs +screwdriver +screwdriver's +screwdrivers +screwed +screwier +screwiest +screwing +screws +screwy +scribble +scribble's +scribbled +scribbler +scribbler's +scribblers +scribbles +scribbling +scribe +scribe's +scribes +scrimmage +scrimmage's +scrimmaged +scrimmages +scrimmaging +scrimp +scrimped +scrimping +scrimps +scrimshaw +scrimshaw's +scrimshawed +scrimshawing +scrimshaws +scrip +scrip's +scrips +script +script's +scripted +scripting +scripts +scriptural +scripture +scripture's +scriptures +scriptwriter +scriptwriter's +scriptwriters +scrod +scrod's +scrods +scrofula +scrofula's +scroll +scroll's +scrolled +scrolling +scrolls +scrooge +scrooge's +scrooges +scrota +scrotum +scrotum's +scrotums +scrounge +scrounged +scrounger +scrounger's +scroungers +scrounges +scrounging +scrub +scrub's +scrubbed +scrubber +scrubber's +scrubbers +scrubbier +scrubbiest +scrubbing +scrubby +scrubs +scruff +scruff's +scruffier +scruffiest +scruffs +scruffy +scrumptious +scrunch +scrunch's +scrunched +scrunches +scrunchie +scrunchie's +scrunchies +scrunching +scrunchy +scrunchy's +scruple +scruple's +scrupled +scruples +scrupling +scrupulous +scrupulously +scrutinize +scrutinized +scrutinizes +scrutinizing +scrutiny +scrutiny's +scuba +scuba's +scubaed +scubaing +scubas +scud +scud's +scudded +scudding +scuds +scuff +scuff's +scuffed +scuffing +scuffle +scuffle's +scuffled +scuffles +scuffling +scuffs +scull +scull's +sculled +sculleries +scullery +scullery's +sculling +scullion +scullion's +scullions +sculls +sculpt +sculpted +sculpting +sculptor +sculptor's +sculptors +sculpts +sculptural +sculpture +sculpture's +sculptured +sculptures +sculpturing +scum +scum's +scumbag +scumbag's +scumbags +scummed +scummier +scummiest +scumming +scummy +scums +scupper +scupper's +scuppered +scuppering +scuppers +scurf +scurf's +scurfy +scurried +scurries +scurrilous +scurrilously +scurry +scurry's +scurrying +scurvier +scurviest +scurvy +scurvy's +scuttle +scuttle's +scuttlebutt +scuttlebutt's +scuttled +scuttles +scuttling +scuzzier +scuzziest +scuzzy +scythe +scythe's +scythed +scythes +scything +sea +sea's +seabed +seabed's +seabeds +seabird +seabird's +seabirds +seaboard +seaboard's +seaboards +seacoast +seacoast's +seacoasts +seafarer +seafarer's +seafarers +seafaring +seafaring's +seafood +seafood's +seagoing +seal +seal's +sealant +sealant's +sealants +sealed +sealer +sealer's +sealers +sealing +seals +sealskin +sealskin's +seam +seam's +seaman +seaman's +seamanship +seamanship's +seamed +seamen +seamier +seamiest +seaming +seamless +seams +seamstress +seamstress's +seamstresses +seamy +seaplane +seaplane's +seaplanes +seaport +seaport's +seaports +sear +sear's +search +search's +searched +searcher +searcher's +searchers +searches +searching +searchingly +searchlight +searchlight's +searchlights +seared +searing +sears +seas +seascape +seascape's +seascapes +seashell +seashell's +seashells +seashore +seashore's +seashores +seasick +seasickness +seasickness's +seaside +seaside's +seasides +season +season's +seasonable +seasonal +seasonally +seasoned +seasoning +seasoning's +seasonings +seasons +seat +seat's +seated +seating +seating's +seats +seaward +seaward's +seawards +seaway +seaway's +seaways +seaweed +seaweed's +seaworthy +sebaceous +secede +seceded +secedes +seceding +secession +secession's +secessionist +secessionist's +secessionists +seclude +secluded +secludes +secluding +seclusion +seclusion's +seclusive +second +second's +secondaries +secondarily +secondary +secondary's +seconded +secondhand +seconding +secondly +seconds +secrecy +secrecy's +secret +secret's +secretarial +secretariat +secretariat's +secretariats +secretaries +secretary +secretary's +secrete +secreted +secretes +secreting +secretion +secretion's +secretions +secretive +secretively +secretiveness +secretiveness's +secretly +secrets +secs +sect +sect's +sectarian +sectarian's +sectarianism +sectarianism's +sectarians +section +section's +sectional +sectional's +sectionalism +sectionalism's +sectionals +sectioned +sectioning +sections +sector +sector's +sectors +sects +secular +secularism +secularism's +secularization +secularization's +secularize +secularized +secularizes +secularizing +secure +secured +securely +securer +secures +securest +securing +securities +security +security's +sedan +sedan's +sedans +sedate +sedated +sedately +sedater +sedates +sedatest +sedating +sedation +sedation's +sedative +sedative's +sedatives +sedentary +sedge +sedge's +sediment +sediment's +sedimentary +sedimentation +sedimentation's +sediments +sedition +sedition's +seditious +seduce +seduced +seducer +seducer's +seducers +seduces +seducing +seduction +seduction's +seductions +seductive +seductively +sedulous +see +see's +seed +seed's +seeded +seedier +seediest +seediness +seediness's +seeding +seedless +seedling +seedling's +seedlings +seeds +seedy +seeing +seeings +seek +seeker +seeker's +seekers +seeking +seeks +seem +seemed +seeming +seemingly +seemlier +seemliest +seemliness +seemliness's +seemly +seems +seen +seep +seepage +seepage's +seeped +seeping +seeps +seer +seer's +seers +seersucker +seersucker's +sees +seesaw +seesaw's +seesawed +seesawing +seesaws +seethe +seethed +seethes +seething +segment +segment's +segmentation +segmentation's +segmented +segmenting +segments +segregate +segregated +segregates +segregating +segregation +segregation's +segregationist +segregationist's +segregationists +segue +segue's +segued +segueing +segues +seismic +seismically +seismograph +seismograph's +seismographic +seismographs +seismologist +seismologist's +seismologists +seismology +seismology's +seize +seized +seizes +seizing +seizure +seizure's +seizures +seldom +select +selected +selecting +selection +selection's +selections +selective +selectively +selectivity +selectivity's +selectman +selectman's +selectmen +selector +selector's +selectors +selects +selenium +selenium's +self +self's +selfie +selfie's +selfies +selfish +selfishly +selfishness +selfishness's +selfless +selflessly +selflessness +selflessness's +selfsame +sell +sell's +seller +seller's +sellers +selling +selloff +selloff's +selloffs +sellout +sellout's +sellouts +sells +seltzer +seltzer's +selvage +selvage's +selvages +selvedge +selvedge's +selvedges +selves +semantic +semantically +semantics +semantics's +semaphore +semaphore's +semaphored +semaphores +semaphoring +semblance +semblance's +semblances +semen +semen's +semester +semester's +semesters +semi +semi's +semiannual +semiautomatic +semiautomatic's +semiautomatics +semicircle +semicircle's +semicircles +semicircular +semicolon +semicolon's +semicolons +semiconductor +semiconductor's +semiconductors +semiconscious +semifinal +semifinal's +semifinalist +semifinalist's +semifinalists +semifinals +semimonthlies +semimonthly +semimonthly's +seminal +seminar +seminar's +seminarian +seminarian's +seminarians +seminaries +seminars +seminary +seminary's +semiotics +semipermeable +semiprecious +semiprivate +semiprofessional +semiprofessional's +semiprofessionals +semiretired +semis +semiskilled +semitone +semitone's +semitones +semitrailer +semitrailer's +semitrailers +semitropical +semiweeklies +semiweekly +semiweekly's +senate +senate's +senates +senator +senator's +senatorial +senators +send +sender +sender's +senders +sending +sends +senile +senility +senility's +senior +senior's +seniority +seniority's +seniors +senna +senna's +sensation +sensation's +sensational +sensationalism +sensationalism's +sensationalist +sensationalist's +sensationalists +sensationally +sensations +sense +sense's +sensed +senseless +senselessly +senselessness +senselessness's +senses +sensibilities +sensibility +sensibility's +sensible +sensibly +sensing +sensitive +sensitive's +sensitively +sensitiveness +sensitiveness's +sensitives +sensitivities +sensitivity +sensitivity's +sensitization +sensitization's +sensitize +sensitized +sensitizes +sensitizing +sensor +sensor's +sensors +sensory +sensual +sensuality +sensuality's +sensually +sensuous +sensuously +sensuousness +sensuousness's +sent +sentence +sentence's +sentenced +sentences +sentencing +sententious +sentience +sentient +sentiment +sentiment's +sentimental +sentimentalism +sentimentalism's +sentimentalist +sentimentalist's +sentimentalists +sentimentality +sentimentality's +sentimentalize +sentimentalized +sentimentalizes +sentimentalizing +sentimentally +sentiments +sentinel +sentinel's +sentinels +sentries +sentry +sentry's +sepal +sepal's +sepals +separable +separate +separate's +separated +separately +separates +separating +separation +separation's +separations +separatism +separatism's +separatist +separatist's +separatists +separator +separator's +separators +sepia +sepia's +sepsis +sepsis's +septa +septet +septet's +septets +septette +septette's +septettes +septic +septicemia +septicemia's +septuagenarian +septuagenarian's +septuagenarians +septum +septum's +septums +sepulcher +sepulcher's +sepulchered +sepulchering +sepulchers +sepulchral +sequel +sequel's +sequels +sequence +sequence's +sequenced +sequencer +sequencers +sequences +sequencing +sequential +sequentially +sequester +sequestered +sequestering +sequesters +sequestration +sequestration's +sequestrations +sequin +sequin's +sequined +sequins +sequitur +sequoia +sequoia's +sequoias +sera +seraglio +seraglio's +seraglios +serape +serape's +serapes +seraph +seraph's +seraphic +seraphim +seraphs +sere +serenade +serenade's +serenaded +serenades +serenading +serendipitous +serendipity +serendipity's +serene +serenely +sereneness +sereneness's +serener +serenest +serenity +serenity's +serer +serest +serf +serf's +serfdom +serfdom's +serfs +serge +serge's +sergeant +sergeant's +sergeants +serial +serial's +serialization +serialization's +serialize +serialized +serializes +serializing +serially +serials +series +series's +serious +seriously +seriousness +seriousness's +sermon +sermon's +sermonize +sermonized +sermonizes +sermonizing +sermons +serous +serpent +serpent's +serpentine +serpentine's +serpents +serrated +serried +serum +serum's +serums +servant +servant's +servants +serve +serve's +served +server +server's +servers +serves +service +service's +serviceable +serviced +serviceman +serviceman's +servicemen +services +servicewoman +servicewoman's +servicewomen +servicing +serviette +serviette's +serviettes +servile +servility +servility's +serving +serving's +servings +servitude +servitude's +servo +servo's +servomechanism +servomechanism's +servomechanisms +servos +sesame +sesame's +sesames +session +session's +sessions +set +set's +setback +setback's +setbacks +sets +settable +settee +settee's +settees +setter +setter's +setters +setting +setting's +settings +settle +settle's +settled +settlement +settlement's +settlements +settler +settler's +settlers +settles +settling +setup +setup's +setups +seven +seven's +sevens +seventeen +seventeen's +seventeens +seventeenth +seventeenth's +seventeenths +seventh +seventh's +sevenths +seventies +seventieth +seventieth's +seventieths +seventy +seventy's +sever +several +several's +severally +severance +severance's +severances +severe +severed +severely +severer +severest +severing +severity +severity's +severs +sew +sewage +sewage's +sewed +sewer +sewer's +sewerage +sewerage's +sewers +sewing +sewing's +sewn +sews +sex +sex's +sexagenarian +sexagenarian's +sexagenarians +sexed +sexes +sexier +sexiest +sexily +sexiness +sexiness's +sexing +sexism +sexism's +sexist +sexist's +sexists +sexless +sexpot +sexpot's +sexpots +sextant +sextant's +sextants +sextet +sextet's +sextets +sextette +sextette's +sextettes +sexting +sexton +sexton's +sextons +sexual +sexuality +sexuality's +sexually +sexy +sh +shabbier +shabbiest +shabbily +shabbiness +shabbiness's +shabby +shack +shack's +shackle +shackle's +shackled +shackles +shackling +shacks +shad +shad's +shade +shade's +shaded +shades +shadier +shadiest +shadiness +shadiness's +shading +shading's +shadings +shadow +shadow's +shadowbox +shadowboxed +shadowboxes +shadowboxing +shadowed +shadowier +shadowiest +shadowing +shadows +shadowy +shads +shady +shaft +shaft's +shafted +shafting +shafts +shag +shag's +shagged +shaggier +shaggiest +shagginess +shagginess's +shagging +shaggy +shags +shah +shah's +shahs +shaikh +shaikh's +shaikhs +shake +shake's +shakedown +shakedown's +shakedowns +shaken +shaker +shaker's +shakers +shakes +shakeup +shakeup's +shakeups +shakier +shakiest +shakily +shakiness +shakiness's +shaking +shaky +shale +shale's +shall +shallot +shallot's +shallots +shallow +shallow's +shallower +shallowest +shallowness +shallowness's +shallows +shalt +sham +sham's +shaman +shaman's +shamans +shamble +shamble's +shambled +shambles +shambles's +shambling +shame +shame's +shamed +shamefaced +shameful +shamefully +shamefulness +shamefulness's +shameless +shamelessly +shames +shaming +shammed +shammies +shamming +shammy +shammy's +shampoo +shampoo's +shampooed +shampooing +shampoos +shamrock +shamrock's +shamrocks +shams +shan't +shandy +shanghai +shanghaied +shanghaiing +shanghais +shank +shank's +shanks +shanties +shantung +shantung's +shanty +shanty's +shantytown +shantytown's +shantytowns +shape +shape's +shaped +shapeless +shapelessly +shapelessness +shapelessness's +shapelier +shapeliest +shapeliness +shapeliness's +shapely +shapes +shaping +sharable +shard +shard's +shards +share +share's +shareable +sharecropper +sharecropper's +sharecroppers +shared +shareholder +shareholder's +shareholders +shares +sharia +shariah +sharing +shark +shark's +sharked +sharking +sharks +sharkskin +sharkskin's +sharp +sharp's +sharped +sharpen +sharpened +sharpener +sharpener's +sharpeners +sharpening +sharpens +sharper +sharper's +sharpers +sharpest +sharping +sharply +sharpness +sharpness's +sharps +sharpshooter +sharpshooter's +sharpshooters +shat +shatter +shatter's +shattered +shattering +shatterproof +shatters +shave +shave's +shaved +shaven +shaver +shaver's +shavers +shaves +shaving +shaving's +shavings +shawl +shawl's +shawls +shaykh +shaykh's +shaykhs +she +she'd +she'll +she's +sheaf +sheaf's +shear +shear's +sheared +shearer +shearer's +shearers +shearing +shears +sheath +sheath's +sheathe +sheathed +sheathes +sheathing +sheathing's +sheathings +sheaths +sheave +sheave's +sheaves +shebang +shebang's +shebangs +shed +shed's +shedding +sheds +sheen +sheen's +sheep +sheep's +sheepdog +sheepdog's +sheepdogs +sheepfold +sheepfold's +sheepfolds +sheepish +sheepishly +sheepishness +sheepishness's +sheepskin +sheepskin's +sheepskins +sheer +sheer's +sheered +sheerer +sheerest +sheering +sheers +sheet +sheet's +sheeting +sheeting's +sheets +sheik +sheik's +sheikdom +sheikdom's +sheikdoms +sheikh +sheikh's +sheikhdom +sheikhdom's +sheikhdoms +sheikhs +sheiks +shekel +shekel's +shekels +shelf +shelf's +shell +shell's +shellac +shellac's +shellacked +shellacking +shellacs +shelled +sheller +shellfish +shellfish's +shellfishes +shelling +shells +shelter +shelter's +sheltered +sheltering +shelters +shelve +shelved +shelves +shelving +shelving's +shenanigan +shenanigan's +shenanigans +shepherd +shepherd's +shepherded +shepherdess +shepherdess's +shepherdesses +shepherding +shepherds +sherbert +sherbert's +sherberts +sherbet +sherbet's +sherbets +sherd +sherd's +sherds +sheriff +sheriff's +sheriffs +sherries +sherry +sherry's +shes +shibboleth +shibboleth's +shibboleths +shied +shield +shield's +shielded +shielding +shields +shies +shift +shift's +shifted +shiftier +shiftiest +shiftily +shiftiness +shiftiness's +shifting +shiftless +shiftlessness +shiftlessness's +shifts +shifty +shiitake +shiitake's +shiitakes +shill +shill's +shillalah +shillalah's +shillalahs +shilled +shillelagh +shillelagh's +shillelaghs +shilling +shilling's +shillings +shills +shim +shim's +shimmed +shimmer +shimmer's +shimmered +shimmering +shimmers +shimmery +shimmied +shimmies +shimming +shimmy +shimmy's +shimmying +shims +shin +shin's +shinbone +shinbone's +shinbones +shindig +shindig's +shindigs +shine +shine's +shined +shiner +shiner's +shiners +shines +shingle +shingle's +shingled +shingles +shingling +shinier +shiniest +shininess +shininess's +shining +shinned +shinnied +shinnies +shinning +shinny +shinnying +shins +shiny +ship +ship's +shipboard +shipboard's +shipboards +shipbuilder +shipbuilder's +shipbuilders +shipbuilding +shipbuilding's +shipload +shipload's +shiploads +shipmate +shipmate's +shipmates +shipment +shipment's +shipments +shipped +shipper +shipper's +shippers +shipping +shipping's +ships +shipshape +shipwreck +shipwreck's +shipwrecked +shipwrecking +shipwrecks +shipwright +shipwright's +shipwrights +shipyard +shipyard's +shipyards +shire +shire's +shires +shirk +shirked +shirker +shirker's +shirkers +shirking +shirks +shirr +shirr's +shirred +shirring +shirring's +shirrings +shirrs +shirt +shirt's +shirted +shirting +shirts +shirtsleeve +shirtsleeve's +shirtsleeves +shirttail +shirttail's +shirttails +shirtwaist +shirtwaist's +shirtwaists +shit +shit's +shits +shittier +shittiest +shitting +shitty +shiver +shiver's +shivered +shivering +shivers +shivery +shlemiel +shlemiel's +shlemiels +shlep +shlep's +shlepp +shlepp's +shlepped +shlepping +shlepps +shleps +shlock +shlocky +shoal +shoal's +shoaled +shoaling +shoals +shock +shock's +shocked +shocker +shocker's +shockers +shocking +shockingly +shockproof +shocks +shod +shodden +shoddier +shoddiest +shoddily +shoddiness +shoddiness's +shoddy +shoddy's +shoe +shoe's +shoed +shoehorn +shoehorn's +shoehorned +shoehorning +shoehorns +shoeing +shoelace +shoelace's +shoelaces +shoemaker +shoemaker's +shoemakers +shoes +shoeshine +shoeshine's +shoeshines +shoestring +shoestring's +shoestrings +shogun +shogun's +shoguns +shone +shoo +shooed +shooing +shook +shoon +shoos +shoot +shoot's +shooter +shooter's +shooters +shooting +shooting's +shootings +shootout +shootout's +shootouts +shoots +shop +shop's +shopaholic +shopaholic's +shopaholics +shopkeeper +shopkeeper's +shopkeepers +shoplift +shoplifted +shoplifter +shoplifter's +shoplifters +shoplifting +shoplifting's +shoplifts +shopped +shopper +shopper's +shoppers +shopping +shopping's +shops +shoptalk +shoptalk's +shopworn +shore +shore's +shored +shoreline +shoreline's +shorelines +shores +shoring +shorn +short +short's +shortage +shortage's +shortages +shortbread +shortbread's +shortcake +shortcake's +shortcakes +shortchange +shortchanged +shortchanges +shortchanging +shortcoming +shortcoming's +shortcomings +shortcut +shortcut's +shortcuts +shorted +shorten +shortened +shortening +shortening's +shortenings +shortens +shorter +shortest +shortfall +shortfall's +shortfalls +shorthand +shorthand's +shorthorn +shorthorn's +shorthorns +shorting +shortish +shortlist +shortly +shortness +shortness's +shorts +shortsighted +shortsightedly +shortsightedness +shortsightedness's +shortstop +shortstop's +shortstops +shortwave +shortwave's +shortwaves +shot +shot's +shotgun +shotgun's +shotgunned +shotgunning +shotguns +shots +should +should've +shoulder +shoulder's +shouldered +shouldering +shoulders +shouldn't +shout +shout's +shouted +shouting +shouts +shove +shove's +shoved +shovel +shovel's +shoveled +shovelful +shovelful's +shovelfuls +shoveling +shovelled +shovelling +shovels +shoves +shoving +show +show's +showbiz +showbiz's +showboat +showboat's +showboated +showboating +showboats +showcase +showcase's +showcased +showcases +showcasing +showdown +showdown's +showdowns +showed +shower +shower's +showered +showering +showers +showery +showgirl +showgirl's +showgirls +showier +showiest +showily +showiness +showiness's +showing +showing's +showings +showman +showman's +showmanship +showmanship's +showmen +shown +showoff +showoff's +showoffs +showpiece +showpiece's +showpieces +showplace +showplace's +showplaces +showroom +showroom's +showrooms +shows +showy +shrank +shrapnel +shrapnel's +shred +shred's +shredded +shredder +shredder's +shredders +shredding +shreds +shrew +shrew's +shrewd +shrewder +shrewdest +shrewdly +shrewdness +shrewdness's +shrewish +shrews +shriek +shriek's +shrieked +shrieking +shrieks +shrift +shrift's +shrike +shrike's +shrikes +shrill +shrilled +shriller +shrillest +shrilling +shrillness +shrillness's +shrills +shrilly +shrimp +shrimp's +shrimped +shrimping +shrimps +shrine +shrine's +shrines +shrink +shrink's +shrinkable +shrinkage +shrinkage's +shrinking +shrinks +shrive +shrived +shrivel +shriveled +shriveling +shrivelled +shrivelling +shrivels +shriven +shrives +shriving +shroud +shroud's +shrouded +shrouding +shrouds +shrove +shrub +shrub's +shrubberies +shrubbery +shrubbery's +shrubbier +shrubbiest +shrubby +shrubs +shrug +shrug's +shrugged +shrugging +shrugs +shrunk +shrunken +shtick +shtick's +shticks +shtik +shtik's +shtiks +shuck +shuck's +shucked +shucking +shucks +shuckses +shudder +shudder's +shuddered +shuddering +shudders +shuffle +shuffle's +shuffleboard +shuffleboard's +shuffleboards +shuffled +shuffler +shuffler's +shufflers +shuffles +shuffling +shun +shunned +shunning +shuns +shunt +shunt's +shunted +shunting +shunts +shush +shushed +shushes +shushing +shut +shutdown +shutdown's +shutdowns +shuteye +shuteye's +shutout +shutout's +shutouts +shuts +shutter +shutter's +shutterbug +shutterbug's +shutterbugs +shuttered +shuttering +shutters +shutting +shuttle +shuttle's +shuttlecock +shuttlecock's +shuttlecocked +shuttlecocking +shuttlecocks +shuttled +shuttles +shuttling +shy +shy's +shyer +shyest +shying +shyly +shyness +shyness's +shyster +shyster's +shysters +sibilant +sibilant's +sibilants +sibling +sibling's +siblings +sibyl +sibyl's +sibyls +sic +sick +sickbed +sickbed's +sickbeds +sicked +sicken +sickened +sickening +sickeningly +sickens +sicker +sickest +sicking +sickle +sickle's +sickles +sicklier +sickliest +sickly +sickness +sickness's +sicknesses +sicks +sics +side +side's +sidearm +sidearm's +sidearms +sidebar +sidebar's +sidebars +sideboard +sideboard's +sideboards +sideburns +sideburns's +sidecar +sidecar's +sidecars +sided +sidekick +sidekick's +sidekicks +sidelight +sidelight's +sidelights +sideline +sideline's +sidelined +sidelines +sidelining +sidelong +sidereal +sides +sidesaddle +sidesaddle's +sidesaddles +sideshow +sideshow's +sideshows +sidesplitting +sidestep +sidestep's +sidestepped +sidestepping +sidesteps +sidestroke +sidestroke's +sidestroked +sidestrokes +sidestroking +sideswipe +sideswipe's +sideswiped +sideswipes +sideswiping +sidetrack +sidetrack's +sidetracked +sidetracking +sidetracks +sidewalk +sidewalk's +sidewalks +sidewall +sidewall's +sidewalls +sideways +sidewise +siding +siding's +sidings +sidle +sidle's +sidled +sidles +sidling +siege +siege's +sieges +sierra +sierra's +sierras +siesta +siesta's +siestas +sieve +sieve's +sieved +sieves +sieving +sift +sifted +sifter +sifter's +sifters +sifting +sifts +sigh +sigh's +sighed +sighing +sighs +sight +sight's +sighted +sighting +sighting's +sightings +sightless +sightread +sights +sightseeing +sightseeing's +sightseer +sightseer's +sightseers +sigma +sign +sign's +signal +signal's +signaled +signaling +signalize +signalized +signalizes +signalizing +signalled +signalling +signally +signals +signatories +signatory +signatory's +signature +signature's +signatures +signboard +signboard's +signboards +signed +signer +signer's +signers +signet +signet's +signets +significance +significance's +significant +significantly +signification +signification's +significations +signified +signifies +signify +signifying +signing +signing's +signings +signpost +signpost's +signposted +signposting +signposts +signs +silage +silage's +silence +silence's +silenced +silencer +silencer's +silencers +silences +silencing +silent +silent's +silenter +silentest +silently +silents +silhouette +silhouette's +silhouetted +silhouettes +silhouetting +silica +silica's +silicate +silicate's +silicates +siliceous +silicious +silicon +silicon's +silicone +silicone's +silicosis +silicosis's +silk +silk's +silken +silkier +silkiest +silks +silkworm +silkworm's +silkworms +silky +sill +sill's +sillier +sillies +silliest +silliness +silliness's +sills +silly +silly's +silo +silo's +silos +silt +silt's +silted +silting +silts +silvan +silver +silver's +silvered +silverfish +silverfish's +silverfishes +silvering +silvers +silversmith +silversmith's +silversmiths +silverware +silverware's +silvery +sim +sim's +simian +simian's +simians +similar +similarities +similarity +similarity's +similarly +simile +simile's +similes +simmer +simmer's +simmered +simmering +simmers +simpatico +simper +simper's +simpered +simpering +simpers +simple +simpleness +simpleness's +simpler +simplest +simpleton +simpleton's +simpletons +simplex +simplicity +simplicity's +simplification +simplification's +simplifications +simplified +simplifies +simplify +simplifying +simplistic +simply +sims +simulate +simulated +simulates +simulating +simulation +simulation's +simulations +simulator +simulator's +simulators +simulcast +simulcast's +simulcasted +simulcasting +simulcasts +simultaneous +simultaneously +sin +sin's +since +sincere +sincerely +sincerer +sincerest +sincerity +sincerity's +sine +sinecure +sinecure's +sinecures +sinew +sinew's +sinews +sinewy +sinful +sinfully +sinfulness +sinfulness's +sing +sing's +singe +singe's +singed +singeing +singer +singer's +singers +singes +singing +singing's +single +single's +singled +singles +singles's +singleton +singleton's +singletons +singling +singly +sings +singsong +singsong's +singsonged +singsonging +singsongs +singular +singular's +singularities +singularity +singularity's +singularly +singulars +sinister +sink +sink's +sinkable +sinker +sinker's +sinkers +sinkhole +sinkhole's +sinkholes +sinking +sinks +sinned +sinner +sinner's +sinners +sinning +sins +sinuous +sinus +sinus's +sinuses +sinusitis +sinusitis's +sinusoidal +sip +sip's +siphon +siphon's +siphoned +siphoning +siphons +sipped +sipping +sips +sir +sir's +sire +sire's +sired +siren +siren's +sirens +sires +siring +sirloin +sirloin's +sirloins +sirocco +sirocco's +siroccos +sirs +sirup +sirup's +sirups +sis +sis's +sisal +sisal's +sises +sissier +sissies +sissiest +sissy +sissy's +sister +sister's +sisterhood +sisterhood's +sisterhoods +sisterly +sisters +sit +sitar +sitar's +sitars +sitcom +sitcom's +sitcoms +site +site's +sited +sites +siting +sits +sitter +sitter's +sitters +sitting +sitting's +sittings +situate +situated +situates +situating +situation +situation's +situations +six +six's +sixes +sixpence +sixpence's +sixpences +sixteen +sixteen's +sixteens +sixteenth +sixteenth's +sixteenths +sixth +sixth's +sixths +sixties +sixtieth +sixtieth's +sixtieths +sixty +sixty's +sizable +size +size's +sizeable +sized +sizer +sizes +sizing +sizing's +sizzle +sizzle's +sizzled +sizzles +sizzling +skate +skate's +skateboard +skateboard's +skateboarded +skateboarder +skateboarder's +skateboarders +skateboarding +skateboarding's +skateboards +skated +skater +skater's +skaters +skates +skating +skedaddle +skedaddle's +skedaddled +skedaddles +skedaddling +skeet +skeet's +skein +skein's +skeins +skeletal +skeleton +skeleton's +skeletons +skeptic +skeptic's +skeptical +skeptically +skepticism +skepticism's +skeptics +sketch +sketch's +sketched +sketches +sketchier +sketchiest +sketching +sketchy +skew +skew's +skewed +skewer +skewer's +skewered +skewering +skewers +skewing +skews +ski +ski's +skid +skid's +skidded +skidding +skids +skied +skier +skier's +skiers +skies +skiff +skiff's +skiffs +skiing +skiing's +skilful +skill +skill's +skilled +skillet +skillet's +skillets +skillful +skillfully +skills +skim +skim's +skimmed +skimming +skimp +skimped +skimpier +skimpiest +skimpiness +skimpiness's +skimping +skimps +skimpy +skims +skin +skin's +skinflint +skinflint's +skinflints +skinhead +skinhead's +skinheads +skinless +skinned +skinnier +skinniest +skinniness +skinniness's +skinning +skinny +skinny's +skins +skintight +skip +skip's +skipped +skipper +skipper's +skippered +skippering +skippers +skipping +skips +skirmish +skirmish's +skirmished +skirmishes +skirmishing +skirt +skirt's +skirted +skirting +skirts +skis +skit +skit's +skits +skitter +skittered +skittering +skitters +skittish +skivvied +skivvies +skivvy +skivvy's +skivvying +skulduggery +skulduggery's +skulk +skulked +skulking +skulks +skull +skull's +skullcap +skullcap's +skullcaps +skullduggery +skullduggery's +skulls +skunk +skunk's +skunked +skunking +skunks +sky +sky's +skycap +skycap's +skycaps +skydive +skydived +skydiver +skydiver's +skydivers +skydives +skydiving +skydiving's +skydove +skyed +skying +skyjack +skyjacked +skyjacker +skyjacker's +skyjackers +skyjacking +skyjacks +skylark +skylark's +skylarked +skylarking +skylarks +skylight +skylight's +skylights +skyline +skyline's +skylines +skyrocket +skyrocket's +skyrocketed +skyrocketing +skyrockets +skyscraper +skyscraper's +skyscrapers +skyward +skywards +skywriter +skywriter's +skywriters +skywriting +skywriting's +slab +slab's +slabbed +slabbing +slabs +slack +slack's +slacked +slacken +slackened +slackening +slackens +slacker +slacker's +slackers +slackest +slacking +slackly +slackness +slackness's +slacks +slacks's +slag +slag's +slags +slain +slake +slaked +slakes +slaking +slalom +slalom's +slalomed +slaloming +slaloms +slam +slam's +slammed +slammer +slammer's +slammers +slamming +slams +slander +slander's +slandered +slanderer +slanderer's +slanderers +slandering +slanderous +slanders +slang +slang's +slangier +slangiest +slangy +slant +slant's +slanted +slanting +slants +slantwise +slap +slap's +slapdash +slaphappy +slapped +slapping +slaps +slapstick +slapstick's +slash +slash's +slashed +slashes +slashing +slat +slat's +slate +slate's +slated +slates +slather +slathered +slathering +slathers +slating +slats +slattern +slattern's +slatternly +slatterns +slaughter +slaughter's +slaughtered +slaughterer +slaughterer's +slaughterers +slaughterhouse +slaughterhouse's +slaughterhouses +slaughtering +slaughters +slave +slave's +slaved +slaver +slaver's +slavered +slavering +slavers +slavery +slavery's +slaves +slaving +slavish +slavishly +slaw +slaw's +slay +slayer +slayer's +slayers +slaying +slaying's +slayings +slays +sleaze +sleaze's +sleazes +sleazier +sleaziest +sleazily +sleaziness +sleaziness's +sleazy +sled +sled's +sledded +sledding +sledge +sledge's +sledged +sledgehammer +sledgehammer's +sledgehammered +sledgehammering +sledgehammers +sledges +sledging +sleds +sleek +sleeked +sleeker +sleekest +sleeking +sleekly +sleekness +sleekness's +sleeks +sleep +sleep's +sleeper +sleeper's +sleepers +sleepier +sleepiest +sleepily +sleepiness +sleepiness's +sleeping +sleepless +sleeplessness +sleeplessness's +sleeps +sleepwalk +sleepwalked +sleepwalker +sleepwalker's +sleepwalkers +sleepwalking +sleepwalking's +sleepwalks +sleepwear +sleepwear's +sleepy +sleepyhead +sleepyhead's +sleepyheads +sleet +sleet's +sleeted +sleeting +sleets +sleety +sleeve +sleeve's +sleeveless +sleeves +sleigh +sleigh's +sleighed +sleighing +sleighs +slender +slenderer +slenderest +slenderize +slenderized +slenderizes +slenderizing +slenderness +slenderness's +slept +sleuth +sleuth's +sleuths +slew +slew's +slewed +slewing +slews +slice +slice's +sliced +slicer +slicer's +slicers +slices +slicing +slick +slick's +slicked +slicker +slicker's +slickers +slickest +slicking +slickly +slickness +slickness's +slicks +slid +slide +slide's +slider +slider's +sliders +slides +slideshow +slideshow's +slideshows +sliding +slier +sliest +slight +slight's +slighted +slighter +slightest +slighting +slightly +slightness +slightness's +slights +slily +slim +slime +slime's +slimier +slimiest +slimmed +slimmer +slimmest +slimming +slimness +slimness's +slims +slimy +sling +sling's +slinging +slings +slingshot +slingshot's +slingshots +slink +slinked +slinkier +slinkiest +slinking +slinks +slinky +slip +slip's +slipcover +slipcover's +slipcovers +slipknot +slipknot's +slipknots +slippage +slippage's +slippages +slipped +slipper +slipper's +slipperier +slipperiest +slipperiness +slipperiness's +slippers +slippery +slipping +slips +slipshod +slit +slit's +slither +slither's +slithered +slithering +slithers +slithery +slits +slitter +slitting +sliver +sliver's +slivered +slivering +slivers +slob +slob's +slobber +slobber's +slobbered +slobbering +slobbers +slobs +sloe +sloe's +sloes +slog +slog's +slogan +slogan's +slogans +slogged +slogging +slogs +sloop +sloop's +sloops +slop +slop's +slope +slope's +sloped +slopes +sloping +slopped +sloppier +sloppiest +sloppily +sloppiness +sloppiness's +slopping +sloppy +slops +slosh +sloshed +sloshes +sloshing +slot +slot's +sloth +sloth's +slothful +slothfulness +slothfulness's +sloths +slots +slotted +slotting +slouch +slouch's +slouched +slouches +slouchier +slouchiest +slouching +slouchy +slough +slough's +sloughed +sloughing +sloughs +sloven +sloven's +slovenlier +slovenliest +slovenliness +slovenliness's +slovenly +slovens +slow +slowdown +slowdown's +slowdowns +slowed +slower +slowest +slowing +slowly +slowness +slowness's +slowpoke +slowpoke's +slowpokes +slows +sludge +sludge's +slue +slue's +slued +slues +slug +slug's +sluggard +sluggard's +sluggards +slugged +slugger +slugger's +sluggers +slugging +sluggish +sluggishly +sluggishness +sluggishness's +slugs +sluice +sluice's +sluiced +sluices +sluicing +sluing +slum +slum's +slumber +slumber's +slumbered +slumbering +slumberous +slumbers +slumbrous +slumdog +slumdog's +slumdogs +slumlord +slumlord's +slumlords +slummed +slummer +slumming +slump +slump's +slumped +slumping +slumps +slums +slung +slunk +slur +slur's +slurp +slurp's +slurped +slurping +slurps +slurred +slurring +slurs +slush +slush's +slushier +slushiest +slushy +slut +slut's +sluts +sluttish +sly +slyer +slyest +slyly +slyness +slyness's +smack +smack's +smacked +smacker +smacker's +smackers +smacking +smacks +small +small's +smaller +smallest +smallish +smallness +smallness's +smallpox +smallpox's +smalls +smarmier +smarmiest +smarmy +smart +smart's +smarted +smarten +smartened +smartening +smartens +smarter +smartest +smarting +smartly +smartness +smartness's +smartphone +smartphone's +smartphones +smarts +smarts's +smartwatch +smartwatch's +smartwatches +smash +smash's +smashed +smashes +smashing +smattering +smattering's +smatterings +smear +smear's +smeared +smearing +smears +smell +smell's +smelled +smellier +smelliest +smelling +smells +smelly +smelt +smelt's +smelted +smelter +smelter's +smelters +smelting +smelts +smidge +smidge's +smidgen +smidgen's +smidgens +smidgeon +smidgeon's +smidgeons +smidges +smidgin +smidgin's +smidgins +smile +smile's +smiled +smiles +smiling +smilingly +smirch +smirch's +smirched +smirches +smirching +smirk +smirk's +smirked +smirking +smirks +smit +smite +smites +smith +smith's +smithereens +smithereens's +smithies +smiths +smithy +smithy's +smiting +smitten +smock +smock's +smocked +smocking +smocking's +smocks +smog +smog's +smoggier +smoggiest +smoggy +smoke +smoke's +smoked +smokehouse +smokehouse's +smokehouses +smokeless +smoker +smoker's +smokers +smokes +smokestack +smokestack's +smokestacks +smokier +smokiest +smokiness +smokiness's +smoking +smoking's +smoky +smolder +smolder's +smoldered +smoldering +smolders +smooch +smooch's +smooched +smooches +smooching +smooth +smoothed +smoother +smoothes +smoothest +smoothie +smoothie's +smoothies +smoothing +smoothly +smoothness +smoothness's +smooths +smoothy +smoothy's +smote +smother +smother's +smothered +smothering +smothers +smoulder +smoulder's +smouldered +smouldering +smoulders +smudge +smudge's +smudged +smudges +smudgier +smudgiest +smudging +smudgy +smug +smugger +smuggest +smuggle +smuggled +smuggler +smuggler's +smugglers +smuggles +smuggling +smuggling's +smugly +smugness +smugness's +smut +smut's +smuts +smuttier +smuttiest +smutty +smörgåsbord +smörgåsbord's +smörgåsbords +snack +snack's +snacked +snacking +snacks +snaffle +snaffle's +snaffled +snaffles +snaffling +snafu +snafu's +snafus +snag +snag's +snagged +snagging +snags +snail +snail's +snailed +snailing +snails +snake +snake's +snakebite +snakebite's +snakebites +snaked +snakes +snakier +snakiest +snaking +snaky +snap +snap's +snapdragon +snapdragon's +snapdragons +snapped +snapper +snapper's +snappers +snappier +snappiest +snapping +snappish +snappy +snaps +snapshot +snapshot's +snapshots +snare +snare's +snared +snares +snaring +snarkier +snarkiest +snarky +snarl +snarl's +snarled +snarling +snarls +snatch +snatch's +snatched +snatches +snatching +snazzier +snazziest +snazzy +sneak +sneak's +sneaked +sneaker +sneaker's +sneakers +sneakier +sneakiest +sneaking +sneaks +sneaky +sneer +sneer's +sneered +sneering +sneeringly +sneers +sneeze +sneeze's +sneezed +sneezes +sneezing +snicker +snicker's +snickered +snickering +snickers +snide +snider +snidest +sniff +sniff's +sniffed +sniffing +sniffle +sniffle's +sniffled +sniffles +sniffling +sniffs +snifter +snifter's +snifters +snigger +snigger's +sniggered +sniggering +sniggers +snip +snip's +snipe +snipe's +sniped +sniper +sniper's +snipers +snipes +sniping +snipped +snippet +snippet's +snippets +snippier +snippiest +snipping +snippy +snips +snit +snit's +snitch +snitch's +snitched +snitches +snitching +snits +snivel +snivel's +sniveled +sniveling +snivelled +snivelling +snivels +snob +snob's +snobbery +snobbery's +snobbier +snobbiest +snobbish +snobbishness +snobbishness's +snobby +snobs +snooker +snoop +snoop's +snooped +snooper +snooper's +snoopers +snoopier +snoopiest +snooping +snoops +snoopy +snoot +snoot's +snootier +snootiest +snootiness +snootiness's +snoots +snooty +snooze +snooze's +snoozed +snoozes +snoozing +snore +snore's +snored +snorer +snorer's +snorers +snores +snoring +snorkel +snorkel's +snorkeled +snorkeler +snorkeler's +snorkelers +snorkeling +snorkeling's +snorkelled +snorkelling +snorkels +snort +snort's +snorted +snorting +snorts +snot +snot's +snots +snottier +snottiest +snotty +snout +snout's +snouts +snow +snow's +snowball +snowball's +snowballed +snowballing +snowballs +snowblower +snowblower's +snowblowers +snowboard +snowboard's +snowboarded +snowboarding +snowboarding's +snowboards +snowbound +snowdrift +snowdrift's +snowdrifts +snowdrop +snowdrop's +snowdrops +snowed +snowfall +snowfall's +snowfalls +snowflake +snowflake's +snowflakes +snowier +snowiest +snowing +snowman +snowman's +snowmen +snowmobile +snowmobile's +snowmobiled +snowmobiles +snowmobiling +snowplow +snowplow's +snowplowed +snowplowing +snowplows +snows +snowshed +snowshoe +snowshoe's +snowshoeing +snowshoes +snowstorm +snowstorm's +snowstorms +snowsuit +snowsuit's +snowsuits +snowy +snub +snub's +snubbed +snubbing +snubs +snuck +snuff +snuff's +snuffbox +snuffbox's +snuffboxes +snuffed +snuffer +snuffer's +snuffers +snuffing +snuffle +snuffle's +snuffled +snuffles +snuffling +snuffs +snug +snug's +snugged +snugger +snuggest +snugging +snuggle +snuggle's +snuggled +snuggles +snuggling +snugly +snugs +so +so's +soak +soak's +soaked +soaking +soaking's +soakings +soaks +soap +soap's +soapbox +soapbox's +soapboxes +soaped +soapier +soapiest +soapiness +soapiness's +soaping +soaps +soapstone +soapstone's +soapsuds +soapsuds's +soapy +soar +soar's +soared +soaring +soars +sob +sob's +sobbed +sobbing +sober +sobered +soberer +soberest +sobering +soberly +soberness +soberness's +sobers +sobriety +sobriety's +sobriquet +sobriquet's +sobriquets +sobs +soccer +soccer's +sociability +sociability's +sociable +sociable's +sociables +sociably +social +social's +socialism +socialism's +socialist +socialist's +socialistic +socialists +socialite +socialite's +socialites +socialization +socialization's +socialize +socialized +socializes +socializing +socially +socials +societal +societies +society +society's +socioeconomic +sociological +sociologist +sociologist's +sociologists +sociology +sociology's +sociopath +sociopath's +sociopaths +sock +sock's +socked +socket +socket's +sockets +socking +socks +sod +sod's +soda +soda's +sodas +sodded +sodden +sodding +sodium +sodium's +sodomite +sodomite's +sodomites +sodomy +sodomy's +sods +sofa +sofa's +sofas +soft +softball +softball's +softballs +soften +softened +softener +softener's +softeners +softening +softens +softer +softest +softhearted +softie +softie's +softies +softly +softness +softness's +software +software's +softwood +softwood's +softwoods +softy +softy's +soggier +soggiest +soggily +sogginess +sogginess's +soggy +soil +soil's +soiled +soiling +soils +soirée +soirée's +soirées +sojourn +sojourn's +sojourned +sojourning +sojourns +sol +sol's +solace +solace's +solaced +solaces +solacing +solar +solaria +solarium +solarium's +solariums +sold +solder +solder's +soldered +soldering +solders +soldier +soldier's +soldiered +soldiering +soldierly +soldiers +sole +sole's +solecism +solecism's +solecisms +soled +solely +solemn +solemner +solemnest +solemnity +solemnity's +solemnize +solemnized +solemnizes +solemnizing +solemnly +solenoid +solenoid's +solenoids +soles +soli +solicit +solicitation +solicitation's +solicitations +solicited +soliciting +solicitor +solicitor's +solicitors +solicitous +solicitously +solicits +solicitude +solicitude's +solid +solid's +solidarity +solidarity's +solider +solidest +solidification +solidification's +solidified +solidifies +solidify +solidifying +solidity +solidity's +solidly +solidness +solidness's +solids +soliloquies +soliloquize +soliloquized +soliloquizes +soliloquizing +soliloquy +soliloquy's +soling +solitaire +solitaire's +solitaires +solitaries +solitary +solitary's +solitude +solitude's +solo +solo's +soloed +soloing +soloist +soloist's +soloists +solos +sols +solstice +solstice's +solstices +solubility +solubility's +soluble +soluble's +solubles +solution +solution's +solutions +solvable +solve +solved +solvency +solvency's +solvent +solvent's +solvents +solver +solver's +solvers +solves +solving +somber +somberly +sombre +sombrely +sombrero +sombrero's +sombreros +some +somebodies +somebody +somebody's +someday +somehow +someone +someone's +someones +someplace +somersault +somersault's +somersaulted +somersaulting +somersaults +something +something's +somethings +sometime +sometimes +someway +somewhat +somewhats +somewhere +somnambulism +somnambulism's +somnambulist +somnambulist's +somnambulists +somnolence +somnolence's +somnolent +son +son's +sonar +sonar's +sonars +sonata +sonata's +sonatas +song +song's +songbird +songbird's +songbirds +songs +songster +songster's +songsters +songwriter +songwriter's +songwriters +sonic +sonnet +sonnet's +sonnets +sonnies +sonny +sonny's +sonority +sonority's +sonorous +sons +soon +sooner +soonest +soot +soot's +sooth +sooth's +soothe +soothed +soothes +soothing +soothingly +soothsayer +soothsayer's +soothsayers +sootier +sootiest +sooty +sop +sop's +sophism +sophism's +sophist +sophist's +sophisticate +sophisticate's +sophisticated +sophisticates +sophisticating +sophistication +sophistication's +sophistries +sophistry +sophistry's +sophists +sophomore +sophomore's +sophomores +sophomoric +soporific +soporific's +soporifics +sopped +soppier +soppiest +sopping +soppy +soprano +soprano's +sopranos +sops +sorbet +sorbet's +sorbets +sorcerer +sorcerer's +sorcerers +sorceress +sorceress's +sorceresses +sorcery +sorcery's +sordid +sordidly +sordidness +sordidness's +sore +sore's +sorehead +sorehead's +soreheads +sorely +soreness +soreness's +sorer +sores +sorest +sorghum +sorghum's +sororities +sorority +sorority's +sorrel +sorrel's +sorrels +sorrier +sorriest +sorrow +sorrow's +sorrowed +sorrowful +sorrowfully +sorrowing +sorrows +sorry +sort +sort's +sorta +sorted +sorter +sorter's +sorters +sortie +sortie's +sortied +sortieing +sorties +sorting +sorts +sos +sot +sot's +sots +sottish +sou'wester +soubriquet +soubriquet's +soubriquets +soufflé +soufflé's +soufflés +sough +sough's +soughed +soughing +soughs +sought +soul +soul's +soulful +soulfully +soulfulness +soulfulness's +soulless +soulmate +soulmate's +soulmates +souls +sound +sound's +sounded +sounder +soundest +sounding +sounding's +soundings +soundless +soundlessly +soundly +soundness +soundness's +soundproof +soundproofed +soundproofing +soundproofs +sounds +soundtrack +soundtrack's +soundtracks +soup +soup's +souped +soupier +soupiest +souping +soups +soupy +soupçon +soupçon's +soupçons +sour +sour's +source +source's +sourced +sources +sourcing +sourdough +sourdough's +sourdoughs +soured +sourer +sourest +souring +sourly +sourness +sourness's +sourpuss +sourpuss's +sourpusses +sours +souse +souse's +soused +souses +sousing +south +south's +southbound +southeast +southeast's +southeasterly +southeastern +southeastward +southerlies +southerly +southerly's +southern +southern's +southerner +southerner's +southerners +southernmost +southerns +southpaw +southpaw's +southpaws +southward +southward's +southwards +southwest +southwest's +southwester +southwester's +southwesterly +southwestern +southwesters +southwestward +souvenir +souvenir's +souvenirs +sovereign +sovereign's +sovereigns +sovereignty +sovereignty's +soviet +soviet's +soviets +sow +sow's +sowed +sower +sower's +sowers +sowing +sown +sows +sox +soy +soy's +soya +soya's +soybean +soybean's +soybeans +spa +spa's +space +space's +spacecraft +spacecraft's +spacecrafts +spaced +spaceflight +spaceflight's +spaceflights +spaceman +spaceman's +spacemen +spaces +spaceship +spaceship's +spaceships +spacesuit +spacesuit's +spacesuits +spacewalk +spacewalk's +spacewalked +spacewalking +spacewalks +spacey +spacial +spacier +spaciest +spacing +spacing's +spacious +spaciously +spaciousness +spaciousness's +spacy +spade +spade's +spaded +spadeful +spadeful's +spadefuls +spades +spadework +spadework's +spading +spaghetti +spaghetti's +spake +spam +spam's +spammed +spammer +spammer's +spammers +spamming +spams +span +span's +spandex +spandex's +spangle +spangle's +spangled +spangles +spangling +spaniel +spaniel's +spaniels +spank +spank's +spanked +spanking +spanking's +spankings +spanks +spanned +spanner +spanner's +spanners +spanning +spans +spar +spar's +spare +spare's +spared +sparely +spareness +spareness's +sparer +spareribs +spareribs's +spares +sparest +sparing +sparingly +spark +spark's +sparked +sparking +sparkle +sparkle's +sparkled +sparkler +sparkler's +sparklers +sparkles +sparkling +sparks +sparred +sparring +sparrow +sparrow's +sparrows +spars +sparse +sparsely +sparseness +sparseness's +sparser +sparsest +sparsity +sparsity's +spartan +spas +spasm +spasm's +spasmodic +spasmodically +spasms +spastic +spastic's +spastics +spat +spat's +spate +spate's +spates +spatial +spatially +spats +spatted +spatter +spatter's +spattered +spattering +spatters +spatting +spatula +spatula's +spatulas +spawn +spawn's +spawned +spawning +spawns +spay +spayed +spaying +spays +speak +speakeasies +speakeasy +speakeasy's +speaker +speaker's +speakers +speaking +speaks +spear +spear's +speared +spearhead +spearhead's +spearheaded +spearheading +spearheads +spearing +spearmint +spearmint's +spears +spec +spec's +specced +speccing +special +special's +specialist +specialist's +specialists +specialization +specialization's +specializations +specialize +specialized +specializes +specializing +specially +specials +specialties +specialty +specialty's +specie +specie's +species +species's +specifiable +specific +specific's +specifically +specification +specification's +specifications +specifics +specified +specifier +specifiers +specifies +specify +specifying +specimen +specimen's +specimens +specious +speciously +speck +speck's +specked +specking +speckle +speckle's +speckled +speckles +speckling +specks +specs +specs's +spectacle +spectacle's +spectacles +spectacles's +spectacular +spectacular's +spectacularly +spectaculars +spectator +spectator's +spectators +specter +specter's +specters +spectra +spectral +spectroscope +spectroscope's +spectroscopes +spectroscopic +spectroscopy +spectroscopy's +spectrum +spectrum's +spectrums +speculate +speculated +speculates +speculating +speculation +speculation's +speculations +speculative +speculator +speculator's +speculators +sped +speech +speech's +speeches +speechless +speed +speed's +speedboat +speedboat's +speedboats +speeded +speeder +speeder's +speeders +speedier +speediest +speedily +speeding +speeding's +speedometer +speedometer's +speedometers +speeds +speedster +speedster's +speedsters +speedup +speedup's +speedups +speedway +speedway's +speedways +speedy +spell +spell's +spellbind +spellbinder +spellbinder's +spellbinders +spellbinding +spellbinds +spellbound +spellcheck +spellcheck's +spellchecked +spellchecker +spellchecker's +spellcheckers +spellchecking +spellchecks +spelled +speller +speller's +spellers +spelling +spelling's +spellings +spells +spelt +spelunker +spelunker's +spelunkers +spend +spender +spender's +spenders +spending +spending's +spends +spendthrift +spendthrift's +spendthrifts +spent +sperm +sperm's +spermatozoa +spermatozoon +spermatozoon's +spermicide +spermicide's +spermicides +sperms +spew +spew's +spewed +spewing +spews +sphere +sphere's +spheres +spherical +spheroid +spheroid's +spheroidal +spheroids +sphincter +sphincter's +sphincters +sphinges +sphinx +sphinx's +sphinxes +spice +spice's +spiced +spices +spicier +spiciest +spiciness +spiciness's +spicing +spicy +spider +spider's +spiders +spidery +spied +spiel +spiel's +spieled +spieling +spiels +spies +spiffier +spiffiest +spiffy +spigot +spigot's +spigots +spike +spike's +spiked +spikes +spikier +spikiest +spiking +spiky +spill +spill's +spillage +spillage's +spillages +spilled +spilling +spills +spillway +spillway's +spillways +spilt +spin +spin's +spinach +spinach's +spinal +spinal's +spinals +spindle +spindle's +spindled +spindles +spindlier +spindliest +spindling +spindly +spine +spine's +spineless +spines +spinet +spinet's +spinets +spinier +spiniest +spinnaker +spinnaker's +spinnakers +spinner +spinner's +spinners +spinning +spinoff +spinoff's +spinoffs +spins +spinster +spinster's +spinsterhood +spinsterhood's +spinsters +spiny +spiraea +spiraea's +spiraeas +spiral +spiral's +spiraled +spiraling +spiralled +spiralling +spirally +spirals +spire +spire's +spirea +spirea's +spireas +spires +spirit +spirit's +spirited +spiriting +spiritless +spirits +spiritual +spiritual's +spiritualism +spiritualism's +spiritualist +spiritualist's +spiritualistic +spiritualists +spirituality +spirituality's +spiritually +spirituals +spirituous +spit +spit's +spitball +spitball's +spitballs +spite +spite's +spited +spiteful +spitefuller +spitefullest +spitefully +spitefulness +spitefulness's +spites +spitfire +spitfire's +spitfires +spiting +spits +spitted +spitting +spittle +spittle's +spittoon +spittoon's +spittoons +splash +splash's +splashdown +splashdown's +splashdowns +splashed +splashes +splashier +splashiest +splashing +splashy +splat +splat's +splats +splatted +splatter +splatter's +splattered +splattering +splatters +splatting +splay +splay's +splayed +splaying +splays +spleen +spleen's +spleens +splendid +splendider +splendidest +splendidly +splendor +splendor's +splenetic +splice +splice's +spliced +splicer +splicer's +splicers +splices +splicing +spline +splines +splint +splint's +splinted +splinter +splinter's +splintered +splintering +splinters +splinting +splints +split +split's +splits +splitting +splitting's +splittings +splodge +splotch +splotch's +splotched +splotches +splotchier +splotchiest +splotching +splotchy +splurge +splurge's +splurged +splurges +splurging +splutter +splutter's +spluttered +spluttering +splutters +spoil +spoil's +spoilage +spoilage's +spoiled +spoiler +spoiler's +spoilers +spoiling +spoils +spoilsport +spoilsport's +spoilsports +spoilt +spoke +spoke's +spoken +spokes +spokesman +spokesman's +spokesmen +spokespeople +spokesperson +spokesperson's +spokespersons +spokeswoman +spokeswoman's +spokeswomen +spoliation +spoliation's +sponge +sponge's +sponged +sponger +sponger's +spongers +sponges +spongier +spongiest +sponging +spongy +sponsor +sponsor's +sponsored +sponsoring +sponsors +sponsorship +sponsorship's +spontaneity +spontaneity's +spontaneous +spontaneously +spoof +spoof's +spoofed +spoofing +spoofs +spook +spook's +spooked +spookier +spookiest +spooking +spooks +spooky +spool +spool's +spooled +spooling +spools +spoon +spoon's +spoonbill +spoonbill's +spoonbills +spooned +spoonerism +spoonerism's +spoonerisms +spoonful +spoonful's +spoonfuls +spooning +spoons +spoonsful +spoor +spoor's +spoored +spooring +spoors +sporadic +sporadically +spore +spore's +spored +spores +sporing +sporran +sport +sport's +sported +sportier +sportiest +sporting +sportive +sports +sportscast +sportscast's +sportscaster +sportscaster's +sportscasters +sportscasting +sportscasts +sportsman +sportsman's +sportsmanlike +sportsmanship +sportsmanship's +sportsmen +sportswear +sportswear's +sportswoman +sportswoman's +sportswomen +sporty +spot +spot's +spotless +spotlessly +spotlessness +spotlessness's +spotlight +spotlight's +spotlighted +spotlighting +spotlights +spots +spotted +spotter +spotter's +spotters +spottier +spottiest +spottiness +spottiness's +spotting +spotty +spouse +spouse's +spouses +spout +spout's +spouted +spouting +spouts +sprain +sprain's +sprained +spraining +sprains +sprang +sprat +sprat's +sprats +sprawl +sprawl's +sprawled +sprawling +sprawls +spray +spray's +sprayed +sprayer +sprayer's +sprayers +spraying +sprays +spread +spread's +spreader +spreader's +spreaders +spreading +spreads +spreadsheet +spreadsheet's +spreadsheets +spree +spree's +spreed +spreeing +sprees +sprier +spriest +sprig +sprig's +sprightlier +sprightliest +sprightliness +sprightliness's +sprightly +sprigs +spring +spring's +springboard +springboard's +springboards +springier +springiest +springiness +springiness's +springing +springs +springtime +springtime's +springy +sprinkle +sprinkle's +sprinkled +sprinkler +sprinkler's +sprinklers +sprinkles +sprinkling +sprinkling's +sprinklings +sprint +sprint's +sprinted +sprinter +sprinter's +sprinters +sprinting +sprints +sprite +sprite's +sprites +spritz +spritz's +spritzed +spritzes +spritzing +sprocket +sprocket's +sprockets +sprout +sprout's +sprouted +sprouting +sprouts +spruce +spruce's +spruced +sprucer +spruces +sprucest +sprucing +sprung +spry +spryer +spryest +spryly +spryness +spryness's +spud +spud's +spuds +spume +spume's +spumed +spumes +spuming +spumone +spumone's +spumoni +spumoni's +spun +spunk +spunk's +spunkier +spunkiest +spunky +spur +spur's +spurious +spuriously +spuriousness +spuriousness's +spurn +spurned +spurning +spurns +spurred +spurring +spurs +spurt +spurt's +spurted +spurting +spurts +sputter +sputter's +sputtered +sputtering +sputters +sputum +sputum's +spy +spy's +spyglass +spyglass's +spyglasses +spying +spyware +spyware's +squab +squab's +squabble +squabble's +squabbled +squabbles +squabbling +squabs +squad +squad's +squadron +squadron's +squadrons +squads +squalid +squalider +squalidest +squall +squall's +squalled +squalling +squalls +squalor +squalor's +squander +squandered +squandering +squanders +square +square's +squared +squarely +squareness +squareness's +squarer +squares +squarest +squaring +squash +squash's +squashed +squashes +squashier +squashiest +squashing +squashy +squat +squat's +squats +squatted +squatter +squatter's +squatters +squattest +squatting +squaw +squaw's +squawk +squawk's +squawked +squawking +squawks +squaws +squeak +squeak's +squeaked +squeakier +squeakiest +squeaking +squeaks +squeaky +squeal +squeal's +squealed +squealer +squealer's +squealers +squealing +squeals +squeamish +squeamishly +squeamishness +squeamishness's +squeegee +squeegee's +squeegeed +squeegeeing +squeegees +squeeze +squeeze's +squeezed +squeezer +squeezer's +squeezers +squeezes +squeezing +squelch +squelch's +squelched +squelches +squelching +squid +squid's +squids +squiggle +squiggle's +squiggled +squiggles +squiggling +squiggly +squint +squint's +squinted +squinter +squintest +squinting +squints +squire +squire's +squired +squires +squiring +squirm +squirm's +squirmed +squirmier +squirmiest +squirming +squirms +squirmy +squirrel +squirrel's +squirreled +squirreling +squirrelled +squirrelling +squirrels +squirt +squirt's +squirted +squirting +squirts +squish +squish's +squished +squishes +squishier +squishiest +squishing +squishy +sriracha +stab +stab's +stabbed +stabbing +stabbing's +stabbings +stability +stability's +stabilization +stabilization's +stabilize +stabilized +stabilizer +stabilizer's +stabilizers +stabilizes +stabilizing +stable +stable's +stabled +stabler +stables +stablest +stabling +stabs +staccati +staccato +staccato's +staccatos +stack +stack's +stacked +stacking +stacks +stadia +stadium +stadium's +stadiums +staff +staff's +staffed +staffer +staffer's +staffers +staffing +staffing's +staffs +stag +stag's +stage +stage's +stagecoach +stagecoach's +stagecoaches +staged +stagehand +stagehand's +stagehands +stages +stagflation +stagflation's +stagger +stagger's +staggered +staggering +staggeringly +staggers +staging +staging's +stagings +stagnant +stagnate +stagnated +stagnates +stagnating +stagnation +stagnation's +stags +staid +staider +staidest +staidly +stain +stain's +stained +staining +stainless +stainless's +stains +stair +stair's +staircase +staircase's +staircases +stairs +stairway +stairway's +stairways +stairwell +stairwell's +stairwells +stake +stake's +staked +stakeout +stakeout's +stakeouts +stakes +staking +stalactite +stalactite's +stalactites +stalagmite +stalagmite's +stalagmites +stale +staled +stalemate +stalemate's +stalemated +stalemates +stalemating +staleness +staleness's +staler +stales +stalest +staling +stalk +stalk's +stalked +stalker +stalker's +stalkers +stalking +stalking's +stalkings +stalks +stall +stall's +stalled +stalling +stallion +stallion's +stallions +stalls +stalwart +stalwart's +stalwarts +stamen +stamen's +stamens +stamina +stamina's +stammer +stammer's +stammered +stammerer +stammerer's +stammerers +stammering +stammers +stamp +stamp's +stamped +stampede +stampede's +stampeded +stampedes +stampeding +stamping +stamps +stance +stance's +stances +stanch +stanched +stancher +stanches +stanchest +stanching +stanchion +stanchion's +stanchions +stand +stand's +standard +standard's +standardization +standardization's +standardize +standardized +standardizes +standardizing +standards +standby +standby's +standbys +standing +standing's +standings +standoff +standoff's +standoffish +standoffs +standout +standout's +standouts +standpoint +standpoint's +standpoints +stands +standstill +standstill's +standstills +stank +stanza +stanza's +stanzas +staph +staph's +staphylococci +staphylococcus +staphylococcus's +staple +staple's +stapled +stapler +stapler's +staplers +staples +stapling +star +star's +starboard +starboard's +starch +starch's +starched +starches +starchier +starchiest +starching +starchy +stardom +stardom's +stare +stare's +stared +stares +starfish +starfish's +starfishes +stargazer +stargazer's +stargazers +staring +stark +starker +starkest +starkly +starkness +starkness's +starless +starlet +starlet's +starlets +starlight +starlight's +starling +starling's +starlings +starlit +starred +starrier +starriest +starring +starry +stars +start +start's +started +starter +starter's +starters +starting +startle +startled +startles +startling +startlingly +starts +startup +startup's +startups +starvation +starvation's +starve +starved +starves +starving +starvings +stash +stash's +stashed +stashes +stashing +state +state's +stated +statehood +statehood's +statehouse +statehouse's +statehouses +stateless +statelier +stateliest +stateliness +stateliness's +stately +statement +statement's +statements +stater +stateroom +stateroom's +staterooms +states +stateside +statesman +statesman's +statesmanlike +statesmanship +statesmanship's +statesmen +statewide +static +static's +statically +stating +station +station's +stationary +stationed +stationer +stationer's +stationers +stationery +stationery's +stationing +stations +statistic +statistic's +statistical +statistically +statistician +statistician's +statisticians +statistics +stats +statuary +statuary's +statue +statue's +statues +statuesque +statuette +statuette's +statuettes +stature +stature's +statures +status +status's +statuses +statute +statute's +statutes +statutory +staunch +staunched +stauncher +staunches +staunchest +staunching +staunchly +stave +stave's +staved +staves +staving +stay +stay's +stayed +staying +stays +stead +stead's +steadfast +steadfastly +steadfastness +steadfastness's +steadied +steadier +steadies +steadiest +steadily +steadiness +steadiness's +steads +steady +steady's +steadying +steak +steak's +steakhouse +steakhouse's +steakhouses +steaks +steal +steal's +stealing +steals +stealth +stealth's +stealthier +stealthiest +stealthily +stealthy +steam +steam's +steamboat +steamboat's +steamboats +steamed +steamer +steamer's +steamers +steamier +steamiest +steaming +steamroll +steamrolled +steamroller +steamroller's +steamrollered +steamrollering +steamrollers +steamrolling +steamrolls +steams +steamship +steamship's +steamships +steamy +steed +steed's +steeds +steel +steel's +steeled +steelier +steeliest +steeling +steels +steely +steep +steep's +steeped +steeper +steepest +steeping +steeple +steeple's +steeplechase +steeplechase's +steeplechases +steeplejack +steeplejack's +steeplejacks +steeples +steeply +steepness +steepness's +steeps +steer +steer's +steerage +steerage's +steered +steering +steering's +steers +stein +stein's +steins +stellar +stem +stem's +stemmed +stemming +stems +stench +stench's +stenches +stencil +stencil's +stenciled +stenciling +stencilled +stencilling +stencils +stenographer +stenographer's +stenographers +stenographic +stenography +stenography's +stent +stent's +stentorian +stents +step +step's +stepbrother +stepbrother's +stepbrothers +stepchild +stepchild's +stepchildren +stepchildren's +stepdad +stepdad's +stepdads +stepdaughter +stepdaughter's +stepdaughters +stepfather +stepfather's +stepfathers +stepladder +stepladder's +stepladders +stepmom +stepmom's +stepmoms +stepmother +stepmother's +stepmothers +stepparent +stepparent's +stepparents +steppe +steppe's +stepped +steppes +stepping +steppingstone +steppingstone's +steppingstones +steps +stepsister +stepsister's +stepsisters +stepson +stepson's +stepsons +stereo +stereo's +stereophonic +stereos +stereoscope +stereoscope's +stereoscopes +stereotype +stereotype's +stereotyped +stereotypes +stereotypical +stereotyping +sterile +sterility +sterility's +sterilization +sterilization's +sterilize +sterilized +sterilizer +sterilizer's +sterilizers +sterilizes +sterilizing +sterling +sterling's +stern +stern's +sterna +sterner +sternest +sternly +sternness +sternness's +sterns +sternum +sternum's +sternums +steroid +steroid's +steroids +stethoscope +stethoscope's +stethoscopes +stevedore +stevedore's +stevedores +stew +stew's +steward +steward's +stewarded +stewardess +stewardess's +stewardesses +stewarding +stewards +stewardship +stewardship's +stewed +stewing +stews +stick +stick's +sticker +sticker's +stickers +stickier +stickies +stickiest +stickiness +stickiness's +sticking +stickleback +stickleback's +sticklebacks +stickler +stickler's +sticklers +stickpin +stickpin's +stickpins +sticks +stickup +stickup's +stickups +sticky +sticky's +sties +stiff +stiff's +stiffed +stiffen +stiffened +stiffener +stiffener's +stiffeners +stiffening +stiffens +stiffer +stiffest +stiffing +stiffly +stiffness +stiffness's +stiffs +stifle +stifled +stifles +stifling +stiflings +stigma +stigma's +stigmas +stigmata +stigmatize +stigmatized +stigmatizes +stigmatizing +stile +stile's +stiles +stiletto +stiletto's +stilettoes +stilettos +still +still's +stillbirth +stillbirth's +stillbirths +stillborn +stilled +stiller +stillest +stilling +stillness +stillness's +stills +stilt +stilt's +stilted +stilts +stimulant +stimulant's +stimulants +stimulate +stimulated +stimulates +stimulating +stimulation +stimulation's +stimuli +stimulus +stimulus's +sting +sting's +stinger +stinger's +stingers +stingier +stingiest +stingily +stinginess +stinginess's +stinging +stingray +stingray's +stingrays +stings +stingy +stink +stink's +stinker +stinker's +stinkers +stinking +stinks +stint +stint's +stinted +stinting +stints +stipend +stipend's +stipends +stipple +stipple's +stippled +stipples +stippling +stipulate +stipulated +stipulates +stipulating +stipulation +stipulation's +stipulations +stir +stir's +stirred +stirrer +stirrer's +stirrers +stirring +stirrings +stirrup +stirrup's +stirrups +stirs +stitch +stitch's +stitched +stitches +stitching +stitching's +stoat +stoat's +stoats +stochastic +stock +stock's +stockade +stockade's +stockaded +stockades +stockading +stockbroker +stockbroker's +stockbrokers +stocked +stockholder +stockholder's +stockholders +stockier +stockiest +stockiness +stockiness's +stocking +stocking's +stockings +stockpile +stockpile's +stockpiled +stockpiles +stockpiling +stockroom +stockroom's +stockrooms +stocks +stocky +stockyard +stockyard's +stockyards +stodgier +stodgiest +stodginess +stodginess's +stodgy +stoic +stoic's +stoical +stoically +stoicism +stoicism's +stoics +stoke +stoked +stoker +stoker's +stokers +stokes +stoking +stole +stole's +stolen +stoles +stolid +stolider +stolidest +stolidity +stolidity's +stolidly +stomach +stomach's +stomachache +stomachache's +stomachaches +stomached +stomaching +stomachs +stomp +stomp's +stomped +stomping +stomps +stone +stone's +stoned +stoner +stoner's +stoners +stones +stonewall +stonewalled +stonewalling +stonewalls +stoneware +stoneware's +stonework +stonework's +stoney +stonier +stoniest +stonily +stoning +stony +stood +stooge +stooge's +stooges +stool +stool's +stools +stoop +stoop's +stooped +stooping +stoops +stop +stop's +stopcock +stopcock's +stopcocks +stopgap +stopgap's +stopgaps +stoplight +stoplight's +stoplights +stopover +stopover's +stopovers +stoppable +stoppage +stoppage's +stoppages +stopped +stopper +stopper's +stoppered +stoppering +stoppers +stopping +stops +stopwatch +stopwatch's +stopwatches +storage +storage's +store +store's +stored +storefront +storefront's +storefronts +storehouse +storehouse's +storehouses +storekeeper +storekeeper's +storekeepers +storeroom +storeroom's +storerooms +stores +storey +storey's +storeys +storied +stories +storing +stork +stork's +storks +storm +storm's +stormed +stormier +stormiest +stormily +storminess +storminess's +storming +storms +stormy +story +story's +storybook +storybook's +storybooks +storyteller +storyteller's +storytellers +stout +stout's +stouter +stoutest +stoutly +stoutness +stoutness's +stove +stove's +stovepipe +stovepipe's +stovepipes +stoves +stow +stowaway +stowaway's +stowaways +stowed +stowing +stows +straddle +straddle's +straddled +straddles +straddling +strafe +strafe's +strafed +strafes +strafing +straggle +straggled +straggler +straggler's +stragglers +straggles +stragglier +straggliest +straggling +straggly +straight +straight's +straightaway +straightaway's +straightaways +straightedge +straightedge's +straightedges +straighten +straightened +straightening +straightens +straighter +straightest +straightforward +straightforwardly +straightjacket +straightjacket's +straightjacketed +straightjacketing +straightjackets +straightness +straightness's +straights +strain +strain's +strained +strainer +strainer's +strainers +straining +strains +strait +strait's +straiten +straitened +straitening +straitens +straitjacket +straitjacket's +straitjacketed +straitjacketing +straitjackets +straits +strand +strand's +stranded +stranding +strands +strange +strangely +strangeness +strangeness's +stranger +stranger's +strangers +strangest +strangle +strangled +stranglehold +stranglehold's +strangleholds +strangler +strangler's +stranglers +strangles +strangling +strangulate +strangulated +strangulates +strangulating +strangulation +strangulation's +strap +strap's +strapless +strapless's +straplesses +strapped +strapping +strapping's +straps +strata +stratagem +stratagem's +stratagems +strategic +strategically +strategies +strategist +strategist's +strategists +strategy +strategy's +stratification +stratification's +stratified +stratifies +stratify +stratifying +stratosphere +stratosphere's +stratospheres +stratum +stratum's +stratums +straw +straw's +strawberries +strawberry +strawberry's +strawed +strawing +straws +stray +stray's +strayed +straying +strays +streak +streak's +streaked +streakier +streakiest +streaking +streaks +streaky +stream +stream's +streamed +streamer +streamer's +streamers +streaming +streamline +streamlined +streamlines +streamlining +streams +street +street's +streetcar +streetcar's +streetcars +streetlight +streetlight's +streetlights +streets +streetwalker +streetwalker's +streetwalkers +streetwise +strength +strength's +strengthen +strengthened +strengthening +strengthens +strengths +strenuous +strenuously +strenuousness +strenuousness's +strep +strep's +streptococcal +streptococci +streptococcus +streptococcus's +streptomycin +streptomycin's +stress +stress's +stressed +stresses +stressful +stressing +stretch +stretch's +stretched +stretcher +stretcher's +stretchers +stretches +stretchier +stretchiest +stretching +stretchy +strew +strewed +strewing +strewn +strews +striated +stricken +strict +stricter +strictest +strictly +strictness +strictness's +stricture +stricture's +strictures +stridden +stride +stride's +strident +stridently +strides +striding +strife +strife's +strike +strike's +strikeout +strikeout's +strikeouts +striker +striker's +strikers +strikes +striking +strikingly +strikings +string +string's +stringed +stringency +stringency's +stringent +stringently +stringer +stringer's +stringers +stringier +stringiest +stringing +strings +stringy +strip +strip's +stripe +stripe's +striped +stripes +striping +stripling +stripling's +striplings +stripped +stripper +stripper's +strippers +stripping +strips +stript +striptease +striptease's +stripteased +stripteases +stripteasing +strive +strived +striven +strives +striving +strobe +strobe's +strobes +strode +stroke +stroke's +stroked +strokes +stroking +stroll +stroll's +strolled +stroller +stroller's +strollers +strolling +strolls +strong +strongbox +strongbox's +strongboxes +stronger +strongest +stronghold +stronghold's +strongholds +strongly +strontium +strontium's +strop +strop's +strophe +strophe's +strophes +stropped +stropping +strops +strove +struck +structural +structuralist +structurally +structure +structure's +structured +structures +structuring +strudel +strudel's +strudels +struggle +struggle's +struggled +struggles +struggling +strum +strum's +strummed +strumming +strumpet +strumpet's +strumpets +strums +strung +strut +strut's +struts +strutted +strutting +strychnine +strychnine's +stub +stub's +stubbed +stubbier +stubbiest +stubbing +stubble +stubble's +stubbly +stubborn +stubborner +stubbornest +stubbornly +stubbornness +stubbornness's +stubby +stubs +stucco +stucco's +stuccoed +stuccoes +stuccoing +stuccos +stuck +stud +stud's +studded +studding +student +student's +students +studentship +studentships +studied +studies +studio +studio's +studios +studious +studiously +studs +study +study's +studying +stuff +stuff's +stuffed +stuffier +stuffiest +stuffily +stuffiness +stuffiness's +stuffing +stuffing's +stuffings +stuffs +stuffy +stultification +stultification's +stultified +stultifies +stultify +stultifying +stumble +stumble's +stumbled +stumbler +stumbler's +stumblers +stumbles +stumbling +stump +stump's +stumped +stumpier +stumpiest +stumping +stumps +stumpy +stun +stung +stunk +stunned +stunning +stunningly +stuns +stunt +stunt's +stunted +stunting +stunts +stupefaction +stupefaction's +stupefied +stupefies +stupefy +stupefying +stupendous +stupendously +stupid +stupid's +stupider +stupidest +stupidities +stupidity +stupidity's +stupidly +stupids +stupor +stupor's +stupors +sturdier +sturdiest +sturdily +sturdiness +sturdiness's +sturdy +sturgeon +sturgeon's +sturgeons +stutter +stutter's +stuttered +stutterer +stutterer's +stutterers +stuttering +stutters +sty +sty's +stye +stye's +styes +style +style's +styled +styles +styli +styling +stylish +stylishly +stylishness +stylishness's +stylist +stylist's +stylistic +stylistically +stylists +stylize +stylized +stylizes +stylizing +stylus +stylus's +styluses +stymie +stymie's +stymied +stymieing +stymies +stymying +styptic +styptic's +styptics +suave +suavely +suaver +suavest +suavity +suavity's +sub +sub's +subatomic +subbasement +subbasement's +subbasements +subbed +subbing +subclass +subcommittee +subcommittee's +subcommittees +subcompact +subcompact's +subcompacts +subconscious +subconscious's +subconsciously +subcontinent +subcontinent's +subcontinents +subcontract +subcontract's +subcontracted +subcontracting +subcontractor +subcontractor's +subcontractors +subcontracts +subculture +subculture's +subcultures +subcutaneous +subdivide +subdivided +subdivides +subdividing +subdivision +subdivision's +subdivisions +subdue +subdued +subdues +subduing +subgroup +subgroup's +subgroups +subhead +subhead's +subheading +subheading's +subheadings +subheads +subhuman +subhuman's +subhumans +subject +subject's +subjected +subjecting +subjection +subjection's +subjective +subjectively +subjectivity +subjectivity's +subjects +subjoin +subjoined +subjoining +subjoins +subjugate +subjugated +subjugates +subjugating +subjugation +subjugation's +subjunctive +subjunctive's +subjunctives +sublease +sublease's +subleased +subleases +subleasing +sublet +sublet's +sublets +subletting +sublimate +sublimated +sublimates +sublimating +sublimation +sublimation's +sublime +sublimed +sublimely +sublimer +sublimes +sublimest +subliminal +subliminally +subliming +sublimity +sublimity's +submarine +submarine's +submarines +submerge +submerged +submergence +submergence's +submerges +submerging +submerse +submersed +submerses +submersible +submersible's +submersibles +submersing +submersion +submersion's +submission +submission's +submissions +submissive +submit +submits +submitted +submitter +submitting +subnormal +suborbital +subordinate +subordinate's +subordinated +subordinates +subordinating +subordination +subordination's +suborn +subornation +subornation's +suborned +suborning +suborns +subplot +subplot's +subplots +subpoena +subpoena's +subpoenaed +subpoenaing +subpoenas +subprime +subprogram +subprograms +subroutine +subroutine's +subroutines +subs +subscribe +subscribed +subscriber +subscriber's +subscribers +subscribes +subscribing +subscript +subscript's +subscription +subscription's +subscriptions +subscripts +subsection +subsection's +subsections +subsequent +subsequently +subservience +subservience's +subservient +subset +subset's +subsets +subside +subsided +subsidence +subsidence's +subsides +subsidiaries +subsidiary +subsidiary's +subsidies +subsiding +subsidization +subsidization's +subsidize +subsidized +subsidizes +subsidizing +subsidy +subsidy's +subsist +subsisted +subsistence +subsistence's +subsisting +subsists +subsoil +subsoil's +subsonic +subspace +substance +substance's +substances +substandard +substantial +substantially +substantiate +substantiated +substantiates +substantiating +substantiation +substantiation's +substantiations +substantive +substantive's +substantives +substation +substation's +substations +substitute +substitute's +substituted +substitutes +substituting +substitution +substitution's +substitutions +substrata +substrate +substratum +substratum's +substratums +substructure +substructure's +substructures +subsume +subsumed +subsumes +subsuming +subsystem +subsystem's +subsystems +subteen +subteen's +subteens +subterfuge +subterfuge's +subterfuges +subterranean +subtitle +subtitle's +subtitled +subtitles +subtitling +subtle +subtler +subtlest +subtleties +subtlety +subtlety's +subtly +subtotal +subtotal's +subtotaled +subtotaling +subtotalled +subtotalling +subtotals +subtract +subtracted +subtracting +subtraction +subtraction's +subtractions +subtracts +subtrahend +subtrahend's +subtrahends +subtropical +suburb +suburb's +suburban +suburban's +suburbanite +suburbanite's +suburbanites +suburbans +suburbia +suburbia's +suburbs +subversion +subversion's +subversive +subversive's +subversives +subvert +subverted +subverting +subverts +subway +subway's +subways +succeed +succeeded +succeeding +succeeds +success +success's +successes +successful +successfully +succession +succession's +successions +successive +successively +successor +successor's +successors +succinct +succincter +succinctest +succinctly +succinctness +succinctness's +succor +succor's +succored +succoring +succors +succotash +succotash's +succulence +succulence's +succulent +succulent's +succulents +succumb +succumbed +succumbing +succumbs +such +suchlike +suck +suck's +sucked +sucker +sucker's +suckered +suckering +suckers +sucking +suckle +suckled +suckles +suckling +suckling's +sucklings +sucks +sucrose +sucrose's +suction +suction's +suctioned +suctioning +suctions +sudden +suddenly +suddenness +suddenness's +suds +suds's +sudsier +sudsiest +sudsy +sue +sued +suede +suede's +sues +suet +suet's +suffer +sufferance +sufferance's +suffered +sufferer +sufferer's +sufferers +suffering +suffering's +sufferings +suffers +suffice +sufficed +suffices +sufficiency +sufficiency's +sufficient +sufficiently +sufficing +suffix +suffix's +suffixed +suffixes +suffixing +suffocate +suffocated +suffocates +suffocating +suffocation +suffocation's +suffragan +suffragan's +suffragans +suffrage +suffrage's +suffragette +suffragette's +suffragettes +suffragist +suffragist's +suffragists +suffuse +suffused +suffuses +suffusing +suffusion +suffusion's +sugar +sugar's +sugarcane +sugarcane's +sugarcoat +sugarcoated +sugarcoating +sugarcoats +sugared +sugarier +sugariest +sugaring +sugarless +sugars +sugary +suggest +suggested +suggester +suggestible +suggesting +suggestion +suggestion's +suggestions +suggestive +suggestively +suggests +suicidal +suicide +suicide's +suicides +suing +suit +suit's +suitability +suitability's +suitable +suitably +suitcase +suitcase's +suitcases +suite +suite's +suited +suites +suiting +suiting's +suitor +suitor's +suitors +suits +sukiyaki +sukiyaki's +sulfate +sulfate's +sulfates +sulfide +sulfide's +sulfides +sulfur +sulfur's +sulfured +sulfuric +sulfuring +sulfurous +sulfurs +sulk +sulk's +sulked +sulkier +sulkies +sulkiest +sulkily +sulkiness +sulkiness's +sulking +sulks +sulky +sulky's +sullen +sullener +sullenest +sullenly +sullenness +sullenness's +sullied +sullies +sully +sullying +sulphur +sulphur's +sulphured +sulphuring +sulphurous +sulphurs +sultan +sultan's +sultana +sultana's +sultanas +sultanate +sultanate's +sultanates +sultans +sultrier +sultriest +sultry +sum +sum's +sumac +sumac's +sumach +sumach's +summaries +summarily +summarize +summarized +summarizes +summarizing +summary +summary's +summation +summation's +summations +summed +summer +summer's +summered +summerhouse +summerhouse's +summerhouses +summering +summers +summertime +summertime's +summery +summing +summit +summit's +summitry +summitry's +summits +summon +summoned +summoner +summoner's +summoners +summoning +summons +summons's +summonsed +summonses +summonsing +sumo +sumo's +sump +sump's +sumps +sumptuous +sums +sun +sun's +sunbathe +sunbathed +sunbather +sunbather's +sunbathers +sunbathes +sunbathing +sunbathing's +sunbeam +sunbeam's +sunbeams +sunblock +sunblock's +sunblocks +sunbonnet +sunbonnet's +sunbonnets +sunburn +sunburn's +sunburned +sunburning +sunburns +sunburnt +sundae +sundae's +sundaes +sunder +sundered +sundering +sunders +sundial +sundial's +sundials +sundown +sundown's +sundowns +sundries +sundries's +sundry +sunfish +sunfish's +sunfishes +sunflower +sunflower's +sunflowers +sung +sunglasses +sunglasses's +sunk +sunken +sunlamp +sunlamp's +sunlamps +sunless +sunlight +sunlight's +sunlit +sunned +sunnier +sunniest +sunning +sunny +sunrise +sunrise's +sunrises +sunroof +sunroof's +sunroofs +suns +sunscreen +sunscreen's +sunscreens +sunset +sunset's +sunsets +sunshine +sunshine's +sunspot +sunspot's +sunspots +sunstroke +sunstroke's +suntan +suntan's +suntanned +suntanning +suntans +sunup +sunup's +sup +sup's +super +super's +superabundance +superabundance's +superabundances +superabundant +superannuate +superannuated +superannuates +superannuating +superb +superber +superbest +superbly +supercharge +supercharged +supercharger +supercharger's +superchargers +supercharges +supercharging +supercilious +supercomputer +supercomputer's +supercomputers +superconductivity +superconductivity's +superconductor +superconductor's +superconductors +superego +superego's +superegos +superficial +superficiality +superficiality's +superficially +superfluity +superfluity's +superfluous +superhighway +superhighway's +superhighways +superhuman +superimpose +superimposed +superimposes +superimposing +superintend +superintended +superintendence +superintendence's +superintendency +superintendency's +superintendent +superintendent's +superintendents +superintending +superintends +superior +superior's +superiority +superiority's +superiors +superlative +superlative's +superlatively +superlatives +superman +superman's +supermarket +supermarket's +supermarkets +supermen +supermodel +supermodel's +supermodels +supernatural +supernaturals +supernova +supernova's +supernovae +supernovas +supernumeraries +supernumerary +supernumerary's +superpower +superpower's +superpowers +supers +superscript +superscript's +superscripts +supersede +superseded +supersedes +superseding +supersize +supersized +supersizes +supersizing +supersonic +superstar +superstar's +superstars +superstition +superstition's +superstitions +superstitious +superstitiously +superstructure +superstructure's +superstructures +supertanker +supertanker's +supertankers +supervene +supervened +supervenes +supervening +supervise +supervised +supervises +supervising +supervision +supervision's +supervisions +supervisor +supervisor's +supervisors +supervisory +supine +supped +supper +supper's +suppers +supping +supplant +supplanted +supplanting +supplants +supple +supplement +supplement's +supplemental +supplementary +supplemented +supplementing +supplements +suppleness +suppleness's +suppler +supplest +suppliant +suppliant's +suppliants +supplicant +supplicant's +supplicants +supplicate +supplicated +supplicates +supplicating +supplication +supplication's +supplications +supplied +supplier +supplier's +suppliers +supplies +supply +supply's +supplying +support +support's +supportable +supported +supporter +supporter's +supporters +supporting +supportive +supports +suppose +supposed +supposedly +supposes +supposing +supposition +supposition's +suppositions +suppositories +suppository +suppository's +suppress +suppressed +suppresses +suppressing +suppression +suppression's +suppurate +suppurated +suppurates +suppurating +suppuration +suppuration's +supranational +supremacist +supremacist's +supremacists +supremacy +supremacy's +supreme +supremely +sups +surcease +surcease's +surceased +surceases +surceasing +surcharge +surcharge's +surcharged +surcharges +surcharging +sure +surefire +surefooted +surely +sureness +sureness's +surer +surest +sureties +surety +surety's +surf +surf's +surface +surface's +surfaced +surfaces +surfacing +surfboard +surfboard's +surfboarded +surfboarding +surfboards +surfed +surfeit +surfeit's +surfeited +surfeiting +surfeits +surfer +surfer's +surfers +surfing +surfing's +surfs +surge +surge's +surged +surgeon +surgeon's +surgeons +surgeries +surgery +surgery's +surges +surgical +surgically +surging +surlier +surliest +surliness +surliness's +surly +surmise +surmise's +surmised +surmises +surmising +surmount +surmountable +surmounted +surmounting +surmounts +surname +surname's +surnames +surpass +surpassed +surpasses +surpassing +surplice +surplice's +surplices +surplus +surplus's +surplused +surpluses +surplusing +surplussed +surplussing +surprise +surprise's +surprised +surprises +surprising +surprisingly +surprisings +surreal +surrealism +surrealism's +surrealist +surrealist's +surrealistic +surrealists +surrender +surrender's +surrendered +surrendering +surrenders +surreptitious +surreptitiously +surrey +surrey's +surreys +surrogate +surrogate's +surrogates +surround +surrounded +surrounding +surrounding's +surroundings +surroundings's +surrounds +surtax +surtax's +surtaxed +surtaxes +surtaxing +surveillance +surveillance's +survey +survey's +surveyed +surveying +surveyor +surveyor's +surveyors +surveys +survival +survival's +survivals +survive +survived +survives +surviving +survivor +survivor's +survivors +susceptibility +susceptibility's +susceptible +sushi +sushi's +suspect +suspect's +suspected +suspecting +suspects +suspend +suspended +suspender +suspender's +suspenders +suspending +suspends +suspense +suspense's +suspenseful +suspension +suspension's +suspensions +suspicion +suspicion's +suspicions +suspicious +suspiciously +sustain +sustainable +sustained +sustaining +sustains +sustenance +sustenance's +suture +suture's +sutured +sutures +suturing +svelte +svelter +sveltest +swab +swab's +swabbed +swabbing +swabs +swaddle +swaddled +swaddles +swaddling +swag +swag's +swagged +swagger +swagger's +swaggered +swaggerer +swaggering +swaggers +swagging +swags +swain +swain's +swains +swallow +swallow's +swallowed +swallowing +swallows +swallowtail +swallowtail's +swallowtails +swam +swami +swami's +swamis +swamp +swamp's +swamped +swampier +swampiest +swamping +swamps +swampy +swan +swan's +swank +swank's +swanked +swanker +swankest +swankier +swankiest +swanking +swanks +swanky +swans +swap +swap's +swapped +swapping +swaps +sward +sward's +swards +swarm +swarm's +swarmed +swarming +swarms +swarthier +swarthiest +swarthy +swash +swash's +swashbuckler +swashbuckler's +swashbucklers +swashbuckling +swashbuckling's +swashed +swashes +swashing +swastika +swastika's +swastikas +swat +swat's +swatch +swatch's +swatches +swath +swath's +swathe +swathe's +swathed +swathes +swathing +swaths +swats +swatted +swatter +swatter's +swattered +swattering +swatters +swatting +sway +sway's +swaybacked +swayed +swaying +sways +swear +swearer +swearer's +swearers +swearing +swears +swearword +swearword's +swearwords +sweat +sweat's +sweater +sweater's +sweaters +sweatier +sweatiest +sweating +sweatpants +sweatpants's +sweats +sweats's +sweatshirt +sweatshirt's +sweatshirts +sweatshop +sweatshop's +sweatshops +sweaty +sweep +sweep's +sweeper +sweeper's +sweepers +sweeping +sweeping's +sweepings +sweepings's +sweeps +sweepstake +sweepstake's +sweepstakes +sweepstakes's +sweet +sweet's +sweetbread +sweetbread's +sweetbreads +sweetbriar +sweetbriar's +sweetbriars +sweetbrier +sweetbrier's +sweetbriers +sweeten +sweetened +sweetener +sweetener's +sweeteners +sweetening +sweetening's +sweetens +sweeter +sweetest +sweetheart +sweetheart's +sweethearts +sweetie +sweetie's +sweeties +sweetish +sweetly +sweetmeat +sweetmeat's +sweetmeats +sweetness +sweetness's +sweets +swell +swell's +swelled +sweller +swellest +swellhead +swellhead's +swellheaded +swellheads +swelling +swelling's +swellings +swells +swelter +swelter's +sweltered +sweltering +swelters +swept +swerve +swerve's +swerved +swerves +swerving +swift +swift's +swifter +swiftest +swiftly +swiftness +swiftness's +swifts +swig +swig's +swigged +swigging +swigs +swill +swill's +swilled +swilling +swills +swim +swim's +swimmer +swimmer's +swimmers +swimming +swimming's +swims +swimsuit +swimsuit's +swimsuits +swindle +swindle's +swindled +swindler +swindler's +swindlers +swindles +swindling +swine +swine's +swines +swing +swing's +swinger +swinger's +swingers +swinging +swings +swinish +swipe +swipe's +swiped +swipes +swiping +swirl +swirl's +swirled +swirling +swirls +swirly +swish +swish's +swished +swisher +swishes +swishest +swishing +switch +switch's +switchable +switchback +switchback's +switchbacks +switchblade +switchblade's +switchblades +switchboard +switchboard's +switchboards +switched +switcher +switches +switching +swivel +swivel's +swiveled +swiveling +swivelled +swivelling +swivels +swollen +swoon +swoon's +swooned +swooning +swoons +swoop +swoop's +swooped +swooping +swoops +swop +swop's +swopped +swopping +swops +sword +sword's +swordfish +swordfish's +swordfishes +swordplay +swordplay's +swords +swordsman +swordsman's +swordsmen +swore +sworn +swum +swung +sybarite +sybarite's +sybarites +sybaritic +sycamore +sycamore's +sycamores +sycophant +sycophant's +sycophantic +sycophants +syllabi +syllabic +syllabication +syllabication's +syllabification +syllabification's +syllabified +syllabifies +syllabify +syllabifying +syllable +syllable's +syllables +syllabus +syllabus's +syllabuses +syllogism +syllogism's +syllogisms +syllogistic +sylph +sylph's +sylphs +sylvan +symbioses +symbiosis +symbiosis's +symbiotic +symbol +symbol's +symbolic +symbolically +symbolism +symbolism's +symbolization +symbolization's +symbolize +symbolized +symbolizes +symbolizing +symbols +symmetric +symmetrical +symmetrically +symmetricly +symmetries +symmetry +symmetry's +sympathetic +sympathetically +sympathies +sympathies's +sympathize +sympathized +sympathizer +sympathizer's +sympathizers +sympathizes +sympathizing +sympathy +sympathy's +symphonic +symphonies +symphony +symphony's +symposia +symposium +symposium's +symposiums +symptom +symptom's +symptomatic +symptoms +synagog +synagog's +synagogs +synagogue +synagogue's +synagogues +synapse +synapse's +synapses +sync +sync's +synced +synch +synch's +synched +synches +synching +synchronization +synchronization's +synchronizations +synchronize +synchronized +synchronizes +synchronizing +synchronous +synchronously +synchs +syncing +syncopate +syncopated +syncopates +syncopating +syncopation +syncopation's +syncs +syndicate +syndicate's +syndicated +syndicates +syndicating +syndication +syndication's +syndrome +syndrome's +syndromes +synergism +synergism's +synergistic +synergy +synergy's +synod +synod's +synods +synonym +synonym's +synonymous +synonyms +synopses +synopsis +synopsis's +syntactic +syntactical +syntactically +syntax +syntax's +syntheses +synthesis +synthesis's +synthesize +synthesized +synthesizer +synthesizer's +synthesizers +synthesizes +synthesizing +synthetic +synthetic's +synthetically +synthetics +syphilis +syphilis's +syphilitic +syphilitic's +syphilitics +syphon +syphon's +syphoned +syphoning +syphons +syringe +syringe's +syringed +syringes +syringing +syrup +syrup's +syrups +syrupy +system +system's +systematic +systematically +systematize +systematized +systematizes +systematizing +systemic +systemic's +systemics +systems +systolic +séance +séance's +séances +t +tab +tab's +tabbed +tabbies +tabbing +tabby +tabby's +tabernacle +tabernacle's +tabernacles +table +table's +tableau +tableau's +tableaus +tableaux +tablecloth +tablecloth's +tablecloths +tabled +tableland +tableland's +tablelands +tables +tablespoon +tablespoon's +tablespoonful +tablespoonful's +tablespoonfuls +tablespoons +tablespoonsful +tablet +tablet's +tablets +tableware +tableware's +tabling +tabloid +tabloid's +tabloids +taboo +taboo's +tabooed +tabooing +taboos +tabs +tabu +tabu's +tabued +tabuing +tabular +tabulate +tabulated +tabulates +tabulating +tabulation +tabulation's +tabulator +tabulator's +tabulators +tabus +tachometer +tachometer's +tachometers +tacit +tacitly +tacitness +tacitness's +taciturn +taciturnity +taciturnity's +tack +tack's +tacked +tackier +tackiest +tackiness +tackiness's +tacking +tackle +tackle's +tackled +tackler +tackler's +tacklers +tackles +tackling +tacks +tacky +taco +taco's +tacos +tact +tact's +tactful +tactfully +tactic +tactic's +tactical +tactically +tactician +tactician's +tacticians +tactics +tactile +tactless +tactlessly +tactlessness +tactlessness's +tad +tad's +tadpole +tadpole's +tadpoles +tads +taffeta +taffeta's +taffies +taffy +taffy's +tag +tag's +tagged +tagging +tags +tail +tail's +tailcoat +tailcoat's +tailcoats +tailed +tailgate +tailgate's +tailgated +tailgates +tailgating +tailing +tailless +taillight +taillight's +taillights +tailor +tailor's +tailored +tailoring +tailoring's +tailors +tailpipe +tailpipe's +tailpipes +tails +tailspin +tailspin's +tailspins +tailwind +tailwind's +tailwinds +taint +taint's +tainted +tainting +taints +take +take's +takeaways +taken +takeoff +takeoff's +takeoffs +takeout +takeout's +takeouts +takeover +takeover's +takeovers +taker +taker's +takers +takes +taking +taking's +takings +takings's +talc +talc's +tale +tale's +talent +talent's +talented +talents +tales +talisman +talisman's +talismans +talk +talk's +talkative +talkativeness +talkativeness's +talked +talker +talker's +talkers +talking +talks +tall +taller +tallest +tallied +tallies +tallness +tallness's +tallow +tallow's +tally +tally's +tallyho +tallyho's +tallyhoed +tallyhoing +tallyhos +tallying +talon +talon's +talons +tam +tam's +tamable +tamale +tamale's +tamales +tamarind +tamarind's +tamarinds +tambourine +tambourine's +tambourines +tame +tameable +tamed +tamely +tameness +tameness's +tamer +tamer's +tamers +tames +tamest +taming +tamp +tamped +tamper +tampered +tampering +tampers +tamping +tampon +tampon's +tampons +tamps +tams +tan +tan's +tanager +tanager's +tanagers +tandem +tandem's +tandems +tang +tang's +tangelo +tangelo's +tangelos +tangent +tangent's +tangential +tangents +tangerine +tangerine's +tangerines +tangibility +tangibility's +tangible +tangible's +tangibles +tangibly +tangier +tangiest +tangle +tangle's +tangled +tangles +tangling +tango +tango's +tangoed +tangoing +tangos +tangs +tangy +tank +tank's +tankard +tankard's +tankards +tanked +tanker +tanker's +tankers +tankful +tankful's +tankfuls +tanking +tanks +tanned +tanner +tanner's +tanneries +tanners +tannery +tannery's +tannest +tannin +tannin's +tanning +tans +tansy +tansy's +tantalize +tantalized +tantalizes +tantalizing +tantalizingly +tantamount +tantrum +tantrum's +tantrums +tap +tap's +tape +tape's +taped +taper +taper's +tapered +tapering +tapers +tapes +tapestries +tapestry +tapestry's +tapeworm +tapeworm's +tapeworms +taping +tapioca +tapioca's +tapir +tapir's +tapirs +tapped +tapping +taproom +taproom's +taprooms +taproot +taproot's +taproots +taps +tar +tar's +tarantula +tarantula's +tarantulae +tarantulas +tardier +tardiest +tardily +tardiness +tardiness's +tardy +tare +tare's +tared +tares +target +target's +targeted +targeting +targets +tariff +tariff's +tariffs +taring +tarmac +tarmac's +tarmacked +tarmacking +tarmacs +tarnish +tarnish's +tarnished +tarnishes +tarnishing +taro +taro's +taros +tarot +tarot's +tarots +tarp +tarp's +tarpaulin +tarpaulin's +tarpaulins +tarpon +tarpon's +tarpons +tarps +tarragon +tarragon's +tarragons +tarred +tarried +tarrier +tarries +tarriest +tarring +tarry +tarrying +tars +tart +tart's +tartan +tartan's +tartans +tartar +tartar's +tartars +tarter +tartest +tartly +tartness +tartness's +tarts +taser +taser's +tasered +tasering +tasers +task +task's +tasked +tasking +taskmaster +taskmaster's +taskmasters +tasks +tassel +tassel's +tasseled +tasseling +tasselled +tasselling +tassels +taste +taste's +tasted +tasteful +tastefully +tasteless +tastelessly +tastelessness +tastelessness's +taster +taster's +tasters +tastes +tastier +tastiest +tastiness +tastiness's +tasting +tasty +tat +tats +tatted +tatter +tatter's +tattered +tattering +tatters +tatting +tatting's +tattle +tattle's +tattled +tattler +tattler's +tattlers +tattles +tattletale +tattletale's +tattletales +tattling +tattoo +tattoo's +tattooed +tattooing +tattooist +tattooist's +tattooists +tattoos +tatty +taught +taunt +taunt's +taunted +taunting +taunts +taupe +taupe's +taut +tauter +tautest +tautly +tautness +tautness's +tautological +tautologies +tautology +tautology's +tavern +tavern's +taverns +tawdrier +tawdriest +tawdriness +tawdriness's +tawdry +tawnier +tawniest +tawny +tawny's +tax +tax's +taxable +taxation +taxation's +taxed +taxes +taxi +taxi's +taxicab +taxicab's +taxicabs +taxidermist +taxidermist's +taxidermists +taxidermy +taxidermy's +taxied +taxies +taxiing +taxing +taxis +taxonomic +taxonomies +taxonomy +taxonomy's +taxpayer +taxpayer's +taxpayers +taxying +tea +tea's +teabag +teach +teachable +teacher +teacher's +teachers +teaches +teaching +teaching's +teachings +teacup +teacup's +teacups +teak +teak's +teakettle +teakettle's +teakettles +teaks +teal +teal's +tealight +tealight's +tealights +teals +team +team's +teamed +teaming +teammate +teammate's +teammates +teams +teamster +teamster's +teamsters +teamwork +teamwork's +teapot +teapot's +teapots +tear +tear's +teardrop +teardrop's +teardrops +teared +tearful +tearfully +teargas +teargas's +teargases +teargassed +teargasses +teargassing +tearier +teariest +tearing +tearjerker +tearjerker's +tearjerkers +tearoom +tearoom's +tearooms +tears +teary +teas +tease +tease's +teased +teasel +teasel's +teasels +teaser +teaser's +teasers +teases +teasing +teaspoon +teaspoon's +teaspoonful +teaspoonful's +teaspoonfuls +teaspoons +teaspoonsful +teat +teat's +teatime +teats +teazel +teazel's +teazels +teazle +teazle's +teazles +technical +technicalities +technicality +technicality's +technically +technician +technician's +technicians +technique +technique's +techniques +techno +technocracy +technocracy's +technocrat +technocrat's +technocrats +technological +technologically +technologies +technologist +technologist's +technologists +technology +technology's +techs +tectonics +tectonics's +tedious +tediously +tediousness +tediousness's +tedium +tedium's +tee +tee's +teed +teeing +teem +teemed +teeming +teems +teen +teen's +teenage +teenaged +teenager +teenager's +teenagers +teenier +teeniest +teens +teensier +teensiest +teensy +teeny +teepee +teepee's +teepees +tees +teeter +teeter's +teetered +teetering +teeters +teeth +teethe +teethed +teethes +teething +teetotal +teetotaler +teetotaler's +teetotalers +teetotaller +teetotaller's +teetotallers +telecast +telecast's +telecasted +telecaster +telecaster's +telecasters +telecasting +telecasts +telecommunication +telecommunication's +telecommunications +telecommunications's +telecommute +telecommuted +telecommuter +telecommuter's +telecommuters +telecommutes +telecommuting +telecommuting's +teleconference +teleconference's +teleconferenced +teleconferences +teleconferencing +telegram +telegram's +telegrams +telegraph +telegraph's +telegraphed +telegrapher +telegrapher's +telegraphers +telegraphic +telegraphing +telegraphs +telegraphy +telegraphy's +telekinesis +telekinesis's +telemarketing +telemarketing's +telemeter +telemeter's +telemeters +telemetries +telemetry +telemetry's +telepathic +telepathically +telepathy +telepathy's +telephone +telephone's +telephoned +telephones +telephonic +telephoning +telephony +telephony's +telephoto +telephoto's +telephotos +telescope +telescope's +telescoped +telescopes +telescopic +telescoping +telethon +telethon's +telethons +teletype +teletypes +teletypewriter +teletypewriter's +teletypewriters +televangelist +televangelist's +televangelists +televise +televised +televises +televising +television +television's +televisions +telex +telex's +telexed +telexes +telexing +tell +teller +teller's +tellers +telling +tellingly +tells +telltale +telltale's +telltales +temblor +temblor's +temblors +temerity +temerity's +temp +temp's +temped +temper +temper's +tempera +tempera's +temperament +temperament's +temperamental +temperamentally +temperaments +temperance +temperance's +temperas +temperate +temperature +temperature's +temperatures +tempered +tempering +tempers +tempest +tempest's +tempests +tempestuous +tempestuously +tempestuousness +tempestuousness's +tempi +temping +template +template's +templates +temple +temple's +temples +tempo +tempo's +temporal +temporally +temporaries +temporarily +temporary +temporary's +temporize +temporized +temporizes +temporizing +tempos +temps +tempt +temptation +temptation's +temptations +tempted +tempter +tempter's +tempters +tempting +temptingly +temptress +temptress's +temptresses +tempts +tempura +tempura's +ten +ten's +tenability +tenability's +tenable +tenacious +tenaciously +tenacity +tenacity's +tenancies +tenancy +tenancy's +tenant +tenant's +tenanted +tenanting +tenants +tend +tended +tendencies +tendency +tendency's +tendentious +tendentiously +tendentiousness +tendentiousness's +tender +tender's +tendered +tenderer +tenderest +tenderfeet +tenderfoot +tenderfoot's +tenderfoots +tenderhearted +tendering +tenderize +tenderized +tenderizer +tenderizer's +tenderizers +tenderizes +tenderizing +tenderloin +tenderloin's +tenderloins +tenderly +tenderness +tenderness's +tenders +tending +tendinitis +tendinitis's +tendon +tendon's +tendonitis +tendonitis's +tendons +tendril +tendril's +tendrils +tends +tenement +tenement's +tenements +tenet +tenet's +tenets +tenfold +tennis +tennis's +tenon +tenon's +tenoned +tenoning +tenons +tenor +tenor's +tenors +tenpin +tenpin's +tenpins +tenpins's +tens +tense +tense's +tensed +tensely +tenseness +tenseness's +tenser +tenses +tensest +tensile +tensing +tension +tension's +tensions +tensor +tensors +tent +tent's +tentacle +tentacle's +tentacles +tentative +tentatively +tented +tenth +tenth's +tenths +tenting +tents +tenuous +tenuously +tenuousness +tenuousness's +tenure +tenure's +tenured +tenures +tenuring +tepee +tepee's +tepees +tepid +tequila +tequila's +tequilas +terabit +terabit's +terabits +terabyte +terabyte's +terabytes +tercentenaries +tercentenary +tercentenary's +term +term's +termagant +termagant's +termagants +termed +terminable +terminal +terminal's +terminally +terminals +terminate +terminated +terminates +terminating +termination +termination's +terminations +terminator +terminators +terming +termini +terminological +terminologies +terminology +terminology's +terminus +terminus's +terminuses +termite +termite's +termites +termly +terms +tern +tern's +terns +terrace +terrace's +terraced +terraces +terracing +terrain +terrain's +terrains +terrapin +terrapin's +terrapins +terraria +terrarium +terrarium's +terrariums +terrestrial +terrestrial's +terrestrials +terrible +terribly +terrier +terrier's +terriers +terrific +terrifically +terrified +terrifies +terrify +terrifying +terrifyingly +territorial +territorial's +territorials +territories +territory +territory's +terror +terror's +terrorism +terrorism's +terrorist +terrorist's +terrorists +terrorize +terrorized +terrorizes +terrorizing +terrors +terry +terry's +terse +tersely +terseness +terseness's +terser +tersest +tertiary +test +test's +testable +testament +testament's +testamentary +testaments +testate +testates +tested +tester +tester's +testers +testes +testicle +testicle's +testicles +testier +testiest +testified +testifies +testify +testifying +testily +testimonial +testimonial's +testimonials +testimonies +testimony +testimony's +testiness +testiness's +testing +testis +testis's +testosterone +testosterone's +tests +testy +tetanus +tetanus's +tether +tether's +tethered +tethering +tethers +tetrahedra +tetrahedron +tetrahedron's +tetrahedrons +text +text's +textbook +textbook's +textbooks +texted +textile +textile's +textiles +texting +texts +textual +textually +textural +texture +texture's +textured +textures +texturing +thalami +thalamus +thalamus's +thallium +thallium's +than +thank +thanked +thankful +thankfully +thankfulness +thankfulness's +thanking +thankless +thanklessly +thanks +thanksgiving +thanksgiving's +thanksgivings +that +that's +thatch +thatch's +thatched +thatcher +thatches +thatching +thatching's +thaw +thaw's +thawed +thawing +thaws +the +theater +theater's +theaters +theatre +theatre's +theatres +theatrical +theatrically +thee +thees +theft +theft's +thefts +their +theirs +theism +theism's +theist +theist's +theistic +theists +them +thematic +thematically +theme +theme's +themes +themselves +then +then's +thence +thenceforth +thenceforward +theocracies +theocracy +theocracy's +theocratic +theologian +theologian's +theologians +theological +theologies +theology +theology's +theorem +theorem's +theorems +theoretic +theoretical +theoretically +theoretician +theoretician's +theoreticians +theories +theorist +theorist's +theorists +theorize +theorized +theorizes +theorizing +theory +theory's +theosophy +theosophy's +therapeutic +therapeutically +therapeutics +therapeutics's +therapies +therapist +therapist's +therapists +therapy +therapy's +there +there's +thereabout +thereabouts +thereafter +thereby +therefore +therefrom +therein +thereof +thereon +thereto +thereupon +therewith +thermal +thermal's +thermally +thermals +thermionic +thermodynamic +thermodynamics +thermodynamics's +thermometer +thermometer's +thermometers +thermonuclear +thermoplastic +thermoplastic's +thermoplastics +thermos +thermos's +thermoses +thermostat +thermostat's +thermostatic +thermostats +thesauri +thesaurus +thesaurus's +thesauruses +these +theses +thesis +thesis's +thespian +thespian's +thespians +theta +they +they'd +they'll +they're +they've +thiamin +thiamin's +thiamine +thiamine's +thick +thick's +thicken +thickened +thickener +thickener's +thickeners +thickening +thickening's +thickenings +thickens +thicker +thickest +thicket +thicket's +thickets +thickly +thickness +thickness's +thicknesses +thickset +thief +thief's +thieve +thieved +thievery +thievery's +thieves +thieving +thievish +thigh +thigh's +thighbone +thighbone's +thighbones +thighs +thimble +thimble's +thimbleful +thimbleful's +thimblefuls +thimbles +thin +thine +thing +thing's +thingamajig +thingamajig's +thingamajigs +things +think +thinker +thinker's +thinkers +thinking +thinking's +thinks +thinly +thinned +thinner +thinner's +thinners +thinness +thinness's +thinnest +thinning +thins +third +third's +thirdly +thirds +thirst +thirst's +thirsted +thirstier +thirstiest +thirstily +thirsting +thirsts +thirsty +thirteen +thirteen's +thirteens +thirteenth +thirteenth's +thirteenths +thirties +thirtieth +thirtieth's +thirtieths +thirty +thirty's +this +thistle +thistle's +thistledown +thistledown's +thistles +thither +tho +thong +thong's +thongs +thoraces +thoracic +thorax +thorax's +thoraxes +thorium +thorium's +thorn +thorn's +thornier +thorniest +thorns +thorny +thorough +thoroughbred +thoroughbred's +thoroughbreds +thorougher +thoroughest +thoroughfare +thoroughfare's +thoroughfares +thoroughgoing +thoroughly +thoroughness +thoroughness's +those +thou +thou's +though +thought +thought's +thoughtful +thoughtfully +thoughtfulness +thoughtfulness's +thoughtless +thoughtlessly +thoughtlessness +thoughtlessness's +thoughts +thous +thousand +thousand's +thousands +thousandth +thousandth's +thousandths +thraldom +thraldom's +thrall +thrall's +thralldom +thralldom's +thralled +thralling +thralls +thrash +thrash's +thrashed +thrasher +thrasher's +thrashers +thrashes +thrashing +thrashing's +thrashings +thread +thread's +threadbare +threaded +threading +threads +threat +threat's +threaten +threatened +threatening +threateningly +threatens +threats +three +three's +threefold +threes +threescore +threescore's +threescores +threesome +threesome's +threesomes +threnodies +threnody +threnody's +thresh +thresh's +threshed +thresher +thresher's +threshers +threshes +threshing +threshold +threshold's +thresholds +threw +thrice +thrift +thrift's +thriftier +thriftiest +thriftily +thriftiness +thriftiness's +thrifts +thrifty +thrill +thrill's +thrilled +thriller +thriller's +thrillers +thrilling +thrills +thrive +thrived +thriven +thrives +thriving +throat +throat's +throatier +throatiest +throatily +throatiness +throatiness's +throats +throaty +throb +throb's +throbbed +throbbing +throbs +throe +throe's +throes +thromboses +thrombosis +thrombosis's +throne +throne's +thrones +throng +throng's +thronged +thronging +throngs +throttle +throttle's +throttled +throttles +throttling +through +throughout +throughput +throughway +throughway's +throughways +throve +throw +throw's +throwaway +throwaway's +throwaways +throwback +throwback's +throwbacks +thrower +thrower's +throwers +throwing +thrown +throws +thru +thrum +thrum's +thrummed +thrumming +thrums +thrush +thrush's +thrushes +thrust +thrust's +thrusting +thrusts +thruway +thruway's +thruways +thud +thud's +thudded +thudding +thuds +thug +thug's +thugs +thumb +thumb's +thumbed +thumbing +thumbnail +thumbnail's +thumbnails +thumbs +thumbscrew +thumbscrew's +thumbscrews +thumbtack +thumbtack's +thumbtacks +thump +thump's +thumped +thumping +thumps +thunder +thunder's +thunderbolt +thunderbolt's +thunderbolts +thunderclap +thunderclap's +thunderclaps +thundercloud +thundercloud's +thunderclouds +thundered +thunderhead +thunderhead's +thunderheads +thundering +thunderous +thunderously +thunders +thundershower +thundershower's +thundershowers +thunderstorm +thunderstorm's +thunderstorms +thunderstruck +thus +thwack +thwack's +thwacked +thwacking +thwacks +thwart +thwart's +thwarted +thwarting +thwarts +thy +thyme +thyme's +thymi +thymus +thymus's +thymuses +thyroid +thyroid's +thyroids +thyself +ti +ti's +tiara +tiara's +tiaras +tibia +tibia's +tibiae +tibias +tic +tic's +tick +tick's +ticked +ticker +ticker's +tickers +ticket +ticket's +ticketed +ticketing +tickets +ticking +ticking's +tickle +tickle's +tickled +tickles +tickling +ticklish +ticks +tics +tidal +tidbit +tidbit's +tidbits +tiddlywinks +tiddlywinks's +tide +tide's +tided +tides +tidewater +tidewater's +tidewaters +tidied +tidier +tidies +tidiest +tidily +tidiness +tidiness's +tiding +tidings +tidings's +tidy +tidy's +tidying +tie +tie's +tiebreaker +tiebreaker's +tiebreakers +tied +tieing +tier +tier's +tiers +ties +tiff +tiff's +tiffed +tiffing +tiffs +tiger +tiger's +tigers +tight +tighten +tightened +tightening +tightens +tighter +tightest +tightfisted +tightly +tightness +tightness's +tightrope +tightrope's +tightropes +tights +tights's +tightwad +tightwad's +tightwads +tigress +tigress's +tigresses +tike +tike's +tikes +tilde +tilde's +tildes +tile +tile's +tiled +tiles +tiling +tiling's +till +till's +tillable +tillage +tillage's +tilled +tiller +tiller's +tillers +tilling +tills +tilt +tilt's +tilted +tilting +tilts +timber +timber's +timbered +timbering +timberland +timberland's +timberline +timberline's +timberlines +timbers +timbre +timbre's +timbres +time +time's +timed +timekeeper +timekeeper's +timekeepers +timeless +timelessness +timelessness's +timelier +timeliest +timeline +timeline's +timelines +timeliness +timeliness's +timely +timepiece +timepiece's +timepieces +timer +timer's +timers +times +timescale +timescales +timestamp +timestamp's +timestamps +timetable +timetable's +timetabled +timetables +timetabling +timeworn +timezone +timid +timider +timidest +timidity +timidity's +timidly +timing +timing's +timings +timorous +timorously +timpani +timpani's +timpanist +timpanist's +timpanists +tin +tin's +tincture +tincture's +tinctured +tinctures +tincturing +tinder +tinder's +tinderbox +tinderbox's +tinderboxes +tine +tine's +tines +tinfoil +tinfoil's +ting +tinge +tinge's +tinged +tingeing +tinges +tinging +tingle +tingle's +tingled +tingles +tingling +tingling's +tinglings +tingly +tings +tinier +tiniest +tinker +tinker's +tinkered +tinkering +tinkers +tinkle +tinkle's +tinkled +tinkles +tinkling +tinned +tinnier +tinniest +tinning +tinny +tins +tinsel +tinsel's +tinseled +tinseling +tinselled +tinselling +tinsels +tinsmith +tinsmith's +tinsmiths +tint +tint's +tinted +tinting +tintinnabulation +tintinnabulation's +tintinnabulations +tints +tiny +tip +tip's +tipi +tipi's +tipis +tipped +tipper +tipper's +tippers +tipping +tipple +tipple's +tippled +tippler +tippler's +tipplers +tipples +tippling +tips +tipsier +tipsiest +tipsily +tipster +tipster's +tipsters +tipsy +tiptoe +tiptoe's +tiptoed +tiptoeing +tiptoes +tiptop +tiptop's +tiptops +tirade +tirade's +tirades +tire +tire's +tired +tireder +tiredest +tiredness +tiredness's +tireless +tirelessly +tirelessness +tirelessness's +tires +tiresome +tiresomely +tiresomeness +tiresomeness's +tiring +tiro +tiro's +tiros +tissue +tissue's +tissues +tit +tit's +titan +titan's +titanic +titanium +titanium's +titans +titbit +titbit's +titbits +tithe +tithe's +tithed +tithes +tithing +titillate +titillated +titillates +titillating +titillation +titillation's +title +title's +titled +titles +titling +titmice +titmouse +titmouse's +tits +titter +titter's +tittered +tittering +titters +tittle +tittle's +tittles +titular +tizzies +tizzy +tizzy's +to +toad +toad's +toadied +toadies +toads +toadstool +toadstool's +toadstools +toady +toady's +toadying +toast +toast's +toasted +toaster +toaster's +toasters +toastier +toastiest +toasting +toastmaster +toastmaster's +toastmasters +toasts +toasty +tobacco +tobacco's +tobaccoes +tobacconist +tobacconist's +tobacconists +tobaccos +toboggan +toboggan's +tobogganed +tobogganing +toboggans +tocsin +tocsin's +tocsins +today +today's +toddies +toddle +toddle's +toddled +toddler +toddler's +toddlers +toddles +toddling +toddy +toddy's +toe +toe's +toed +toehold +toehold's +toeholds +toeing +toenail +toenail's +toenails +toes +toffee +toffee's +toffees +toffies +toffy +toffy's +tofu +tofu's +tog +tog's +toga +toga's +togae +togas +together +togetherness +togetherness's +toggle +toggle's +toggled +toggles +toggling +togs +togs's +toil +toil's +toiled +toiler +toiler's +toilers +toilet +toilet's +toileted +toileting +toiletries +toiletry +toiletry's +toilets +toilette +toilette's +toiling +toils +toilsome +toke +toke's +toked +token +token's +tokenism +tokenism's +tokens +tokes +toking +told +tolerable +tolerably +tolerance +tolerance's +tolerances +tolerant +tolerantly +tolerate +tolerated +tolerates +tolerating +toleration +toleration's +toll +toll's +tollbooth +tollbooth's +tollbooths +tolled +tollgate +tollgate's +tollgates +tolling +tolls +tom +tom's +tomahawk +tomahawk's +tomahawked +tomahawking +tomahawks +tomato +tomato's +tomatoes +tomb +tomb's +tombed +tombing +tomboy +tomboy's +tomboys +tombs +tombstone +tombstone's +tombstones +tomcat +tomcat's +tomcats +tome +tome's +tomes +tomfooleries +tomfoolery +tomfoolery's +tomorrow +tomorrow's +tomorrows +toms +ton +ton's +tonal +tonalities +tonality +tonality's +tone +tone's +toned +toneless +toner +tones +tong +tong's +tongs +tongue +tongue's +tongued +tongues +tonguing +tonic +tonic's +tonics +tonier +toniest +tonight +tonight's +toning +tonnage +tonnage's +tonnages +tonne +tonne's +tonnes +tons +tonsil +tonsil's +tonsillectomies +tonsillectomy +tonsillectomy's +tonsillitis +tonsillitis's +tonsils +tonsorial +tonsure +tonsure's +tonsured +tonsures +tonsuring +tony +too +took +tool +tool's +toolbar +toolbar's +toolbars +toolbox +toolbox's +toolboxes +tooled +tooling +toolkit +tools +toot +toot's +tooted +tooth +tooth's +toothache +toothache's +toothaches +toothbrush +toothbrush's +toothbrushes +toothed +toothier +toothiest +toothless +toothpaste +toothpaste's +toothpastes +toothpick +toothpick's +toothpicks +toothsome +toothy +tooting +toots +top +top's +topaz +topaz's +topazes +topcoat +topcoat's +topcoats +topic +topic's +topical +topically +topics +topknot +topknot's +topknots +topless +topmast +topmast's +topmasts +topmost +topographer +topographer's +topographers +topographic +topographical +topographies +topography +topography's +topological +topologically +topology +topped +topping +topping's +toppings +topple +toppled +topples +toppling +tops +topsail +topsail's +topsails +topside +topside's +topsides +topsoil +topsoil's +toque +toque's +toques +tor +tor's +torch +torch's +torched +torches +torching +torchlight +torchlight's +tore +toreador +toreador's +toreadors +torment +torment's +tormented +tormenter +tormenter's +tormenters +tormenting +tormentor +tormentor's +tormentors +torments +torn +tornado +tornado's +tornadoes +tornados +torpedo +torpedo's +torpedoed +torpedoes +torpedoing +torpedos +torpid +torpidity +torpidity's +torpor +torpor's +torque +torque's +torqued +torques +torquing +torrent +torrent's +torrential +torrents +torrid +tors +torsi +torsion +torsion's +torso +torso's +torsos +tort +tort's +torte +torte's +tortes +tortilla +tortilla's +tortillas +tortoise +tortoise's +tortoises +tortoiseshell +tortoiseshell's +tortoiseshells +torts +tortuous +tortuously +torture +torture's +tortured +torturer +torturer's +torturers +tortures +torturing +torus +toss +toss's +tossed +tosses +tossing +tossup +tossup's +tossups +tost +tot +tot's +total +total's +totaled +totaling +totalitarian +totalitarian's +totalitarianism +totalitarianism's +totalitarians +totalities +totality +totality's +totalled +totalling +totally +totals +tote +tote's +toted +totem +totem's +totemic +totems +totes +toting +tots +totted +totter +totter's +tottered +tottering +totters +totting +toucan +toucan's +toucans +touch +touch's +touchdown +touchdown's +touchdowns +touched +touches +touchier +touchiest +touching +touchingly +touchings +touchstone +touchstone's +touchstones +touchy +touché +tough +tough's +toughen +toughened +toughening +toughens +tougher +toughest +toughly +toughness +toughness's +toughs +toupee +toupee's +toupees +tour +tour's +toured +touring +tourism +tourism's +tourist +tourist's +tourists +tourmaline +tourmaline's +tournament +tournament's +tournaments +tourney +tourney's +tourneys +tourniquet +tourniquet's +tourniquets +tours +tousle +tousled +tousles +tousling +tout +tout's +touted +touting +touts +tow +tow's +toward +towards +towed +towel +towel's +toweled +toweling +toweling's +towelings +towelled +towelling +towelling's +towellings +towels +tower +tower's +towered +towering +towers +towhead +towhead's +towheaded +towheads +towing +town +town's +townhouse +townhouse's +townhouses +towns +townsfolk +townsfolk's +township +township's +townships +townsman +townsman's +townsmen +townspeople +townspeople's +towpath +towpath's +towpaths +tows +toxemia +toxemia's +toxic +toxicity +toxicity's +toxicologist +toxicologist's +toxicologists +toxicology +toxicology's +toxin +toxin's +toxins +toy +toy's +toyed +toying +toys +trace +trace's +traceable +traced +tracer +tracer's +traceries +tracers +tracery +tracery's +traces +trachea +trachea's +tracheae +tracheas +tracheotomies +tracheotomy +tracheotomy's +tracing +tracing's +tracings +track +track's +tracked +tracker +tracker's +trackers +tracking +tracks +tract +tract's +tractable +traction +traction's +tractor +tractor's +tractors +tracts +trade +trade's +traded +trademark +trademark's +trademarked +trademarking +trademarks +trader +trader's +traders +trades +tradesman +tradesman's +tradesmen +trading +tradition +tradition's +traditional +traditionalist +traditionalist's +traditionalists +traditionally +traditions +traduce +traduced +traduces +traducing +traffic +traffic's +trafficked +trafficker +trafficker's +traffickers +trafficking +traffics +tragedian +tragedian's +tragedians +tragedies +tragedy +tragedy's +tragic +tragically +tragicomedies +tragicomedy +tragicomedy's +trail +trail's +trailblazer +trailblazer's +trailblazers +trailed +trailer +trailer's +trailers +trailing +trails +train +train's +trained +trainee +trainee's +trainees +trainer +trainer's +trainers +training +training's +trains +traipse +traipse's +traipsed +traipses +traipsing +trait +trait's +traitor +traitor's +traitorous +traitors +traits +trajectories +trajectory +trajectory's +tram +tram's +trammed +trammel +trammel's +trammeled +trammeling +trammelled +trammelling +trammels +tramming +tramp +tramp's +tramped +tramping +trample +trample's +trampled +tramples +trampling +trampoline +trampoline's +trampolines +tramps +trams +trance +trance's +trances +tranquil +tranquiler +tranquilest +tranquility +tranquility's +tranquilize +tranquilized +tranquilizer +tranquilizer's +tranquilizers +tranquilizes +tranquilizing +tranquiller +tranquillest +tranquillity +tranquillity's +tranquillize +tranquillized +tranquillizer +tranquillizer's +tranquillizers +tranquillizes +tranquillizing +tranquilly +transact +transacted +transacting +transaction +transaction's +transactions +transacts +transatlantic +transceiver +transceiver's +transceivers +transcend +transcended +transcendence +transcendence's +transcendent +transcendental +transcendentalism +transcendentalism's +transcendentalist +transcendentalist's +transcendentalists +transcendentally +transcending +transcends +transcontinental +transcribe +transcribed +transcribes +transcribing +transcript +transcript's +transcription +transcription's +transcriptions +transcripts +transducer +transducer's +transducers +transept +transept's +transepts +transfer +transfer's +transferable +transferal +transferal's +transferals +transference +transference's +transferred +transferring +transfers +transfiguration +transfiguration's +transfigure +transfigured +transfigures +transfiguring +transfinite +transfix +transfixed +transfixes +transfixing +transfixt +transform +transform's +transformation +transformation's +transformations +transformed +transformer +transformer's +transformers +transforming +transforms +transfuse +transfused +transfuses +transfusing +transfusion +transfusion's +transfusions +transgress +transgressed +transgresses +transgressing +transgression +transgression's +transgressions +transgressor +transgressor's +transgressors +transience +transience's +transiency +transiency's +transient +transient's +transients +transistor +transistor's +transistors +transit +transit's +transited +transiting +transition +transition's +transitional +transitioned +transitioning +transitions +transitive +transitive's +transitively +transitives +transitory +transits +transitted +transitting +translate +translated +translates +translating +translation +translation's +translations +translator +translator's +translators +transliterate +transliterated +transliterates +transliterating +transliteration +transliteration's +transliterations +translucence +translucence's +translucent +transmigrate +transmigrated +transmigrates +transmigrating +transmigration +transmigration's +transmissible +transmission +transmission's +transmissions +transmit +transmits +transmittable +transmittal +transmittal's +transmitted +transmitter +transmitter's +transmitters +transmitting +transmutation +transmutation's +transmutations +transmute +transmuted +transmutes +transmuting +transnational +transnational's +transnationals +transoceanic +transom +transom's +transoms +transparencies +transparency +transparency's +transparent +transparently +transpiration +transpiration's +transpire +transpired +transpires +transpiring +transplant +transplant's +transplantation +transplantation's +transplanted +transplanting +transplants +transponder +transponder's +transponders +transport +transport's +transportable +transportation +transportation's +transported +transporter +transporter's +transporters +transporting +transports +transpose +transposed +transposes +transposing +transposition +transposition's +transpositions +transsexual +transsexual's +transsexuals +transship +transshipment +transshipment's +transshipped +transshipping +transships +transubstantiation +transubstantiation's +transverse +transverse's +transversely +transverses +transvestism +transvestism's +transvestite +transvestite's +transvestites +trap +trap's +trapdoor +trapdoor's +trapdoors +trapeze +trapeze's +trapezes +trapezoid +trapezoid's +trapezoidal +trapezoids +trappable +trapped +trapper +trapper's +trappers +trapping +trappings +trappings's +traps +trapshooting +trapshooting's +trash +trash's +trashcan +trashcan's +trashcans +trashed +trashes +trashier +trashiest +trashing +trashy +trauma +trauma's +traumas +traumata +traumatic +traumatize +traumatized +traumatizes +traumatizing +travail +travail's +travailed +travailing +travails +travel +travel's +traveled +traveler +traveler's +travelers +traveling +travelings +travelled +traveller +traveller's +travellers +travelling +travelog +travelog's +travelogs +travelogue +travelogue's +travelogues +travels +traverse +traverse's +traversed +traverses +traversing +travestied +travesties +travesty +travesty's +travestying +trawl +trawl's +trawled +trawler +trawler's +trawlers +trawling +trawls +tray +tray's +trays +treacheries +treacherous +treacherously +treachery +treachery's +treacle +treacle's +tread +tread's +treading +treadle +treadle's +treadled +treadles +treadling +treadmill +treadmill's +treadmills +treads +treason +treason's +treasonable +treasonous +treasure +treasure's +treasured +treasurer +treasurer's +treasurers +treasures +treasuries +treasuring +treasury +treasury's +treat +treat's +treatable +treated +treaties +treating +treatise +treatise's +treatises +treatment +treatment's +treatments +treats +treaty +treaty's +treble +treble's +trebled +trebles +trebling +tree +tree's +treed +treeing +treeless +trees +treetop +treetop's +treetops +trefoil +trefoil's +trefoils +trek +trek's +trekked +trekking +treks +trellis +trellis's +trellised +trellises +trellising +tremble +tremble's +trembled +trembles +trembling +tremendous +tremendously +tremolo +tremolo's +tremolos +tremor +tremor's +tremors +tremulous +tremulously +trench +trench's +trenchant +trenchantly +trenched +trenches +trenching +trend +trend's +trended +trendier +trendies +trendiest +trending +trends +trendy +trendy's +trepidation +trepidation's +trespass +trespass's +trespassed +trespasser +trespasser's +trespassers +trespasses +trespassing +tress +tress's +tresses +trestle +trestle's +trestles +triad +triad's +triads +triage +triage's +trial +trial's +trialed +trialing +trials +triangle +triangle's +triangles +triangular +triangulation +triangulation's +triathlon +triathlon's +triathlons +tribal +tribalism +tribalism's +tribe +tribe's +tribes +tribesman +tribesman's +tribesmen +tribulation +tribulation's +tribulations +tribunal +tribunal's +tribunals +tribune +tribune's +tribunes +tributaries +tributary +tributary's +tribute +tribute's +tributes +trice +trice's +triceps +triceps's +tricepses +triceratops +triceratops's +triceratopses +trick +trick's +tricked +trickery +trickery's +trickier +trickiest +trickiness +trickiness's +tricking +trickle +trickle's +trickled +trickles +trickling +tricks +trickster +trickster's +tricksters +tricky +tricolor +tricolor's +tricolors +tricycle +tricycle's +tricycles +trident +trident's +tridents +tried +triennial +triennial's +triennials +tries +trifecta +trifecta's +trifectas +trifle +trifle's +trifled +trifler +trifler's +triflers +trifles +trifling +trifocals +trifocals's +trig +trig's +trigger +trigger's +triggered +triggering +triggers +triglyceride +triglyceride's +triglycerides +trigonometric +trigonometry +trigonometry's +trike +trike's +trikes +trilateral +trilaterals +trill +trill's +trilled +trilling +trillion +trillion's +trillions +trillionth +trillionth's +trillionths +trills +trilogies +trilogy +trilogy's +trim +trim's +trimaran +trimaran's +trimarans +trimester +trimester's +trimesters +trimly +trimmed +trimmer +trimmer's +trimmers +trimmest +trimming +trimming's +trimmings +trimmings's +trimness +trimness's +trims +trinities +trinity +trinity's +trinket +trinket's +trinkets +trio +trio's +trios +trip +trip's +tripartite +tripe +tripe's +triple +triple's +tripled +triples +triplet +triplet's +triplets +triplicate +triplicate's +triplicated +triplicates +triplicating +tripling +triply +tripod +tripod's +tripods +tripos +tripped +tripping +trips +triptych +triptych's +triptychs +trisect +trisected +trisecting +trisects +trite +tritely +triteness +triteness's +triter +tritest +triumph +triumph's +triumphal +triumphant +triumphantly +triumphed +triumphing +triumphs +triumvirate +triumvirate's +triumvirates +trivet +trivet's +trivets +trivia +trivia's +trivial +trivialities +triviality +triviality's +trivialize +trivialized +trivializes +trivializing +trivially +trochee +trochee's +trochees +trod +trodden +troglodyte +troglodyte's +troglodytes +troika +troika's +troikas +troll +troll's +trolled +trolley +trolley's +trolleys +trollies +trolling +trollop +trollop's +trollops +trolls +trolly +trolly's +trombone +trombone's +trombones +trombonist +trombonist's +trombonists +tromp +tromped +tromping +tromps +troop +troop's +trooped +trooper +trooper's +troopers +trooping +troops +troopship +troopship's +troopships +trope +trope's +tropes +trophies +trophy +trophy's +tropic +tropic's +tropical +tropics +tropics's +tropism +tropism's +tropisms +troposphere +troposphere's +tropospheres +trot +trot's +troth +troth's +trots +trotted +trotter +trotter's +trotters +trotting +troubadour +troubadour's +troubadours +trouble +trouble's +troubled +troublemaker +troublemaker's +troublemakers +troubles +troubleshoot +troubleshooted +troubleshooter +troubleshooter's +troubleshooters +troubleshooting +troubleshooting's +troubleshoots +troubleshot +troublesome +troubling +trough +trough's +troughs +trounce +trounced +trounces +trouncing +troupe +troupe's +trouped +trouper +trouper's +troupers +troupes +trouping +trouser +trouser's +trousers +trousers's +trousseau +trousseau's +trousseaus +trousseaux +trout +trout's +trouts +trowel +trowel's +troweled +troweling +trowelled +trowelling +trowels +troy +troys +truancy +truancy's +truant +truant's +truanted +truanting +truants +truce +truce's +truces +truck +truck's +trucked +trucker +trucker's +truckers +trucking +trucking's +truckle +truckle's +truckled +truckles +truckling +truckload +truckload's +truckloads +trucks +truculence +truculence's +truculent +truculently +trudge +trudge's +trudged +trudges +trudging +true +true's +trued +trueing +truer +trues +truest +truffle +truffle's +truffles +truing +truism +truism's +truisms +truly +trump +trump's +trumped +trumpery +trumpery's +trumpet +trumpet's +trumpeted +trumpeter +trumpeter's +trumpeters +trumpeting +trumpets +trumping +trumps +truncate +truncated +truncates +truncating +truncation +truncation's +truncheon +truncheon's +truncheons +trundle +trundle's +trundled +trundles +trundling +trunk +trunk's +trunking +trunks +truss +truss's +trussed +trusses +trussing +trust +trust's +trusted +trustee +trustee's +trustees +trusteeship +trusteeship's +trusteeships +trustful +trustfully +trustfulness +trustfulness's +trustier +trusties +trustiest +trusting +trusts +trustworthier +trustworthiest +trustworthiness +trustworthiness's +trustworthy +trusty +trusty's +truth +truth's +truther +truther's +truthers +truthful +truthfully +truthfulness +truthfulness's +truthiness +truths +try +try's +trying +tryout +tryout's +tryouts +tryst +tryst's +trysted +trysting +trysts +ts +tsar +tsar's +tsarina +tsarina's +tsarinas +tsars +tsunami +tsunami's +tsunamis +tub +tub's +tuba +tuba's +tubas +tubbier +tubbiest +tubby +tube +tube's +tubed +tubeless +tubeless's +tuber +tuber's +tubercle +tubercle's +tubercles +tubercular +tuberculosis +tuberculosis's +tuberculous +tuberous +tubers +tubes +tubing +tubing's +tubs +tubular +tuck +tuck's +tucked +tucker +tucker's +tuckered +tuckering +tuckers +tucking +tucks +tuft +tuft's +tufted +tufting +tufts +tug +tug's +tugboat +tugboat's +tugboats +tugged +tugging +tugs +tuition +tuition's +tulip +tulip's +tulips +tulle +tulle's +tumble +tumble's +tumbled +tumbledown +tumbler +tumbler's +tumblers +tumbles +tumbleweed +tumbleweed's +tumbleweeds +tumbling +tumbrel +tumbrel's +tumbrels +tumbril +tumbril's +tumbrils +tumid +tummies +tummy +tummy's +tumor +tumor's +tumors +tumult +tumult's +tumults +tumultuous +tun +tun's +tuna +tuna's +tunas +tundra +tundra's +tundras +tune +tune's +tuned +tuneful +tunefully +tuneless +tunelessly +tuner +tuner's +tuners +tunes +tungsten +tungsten's +tunic +tunic's +tunics +tuning +tunnel +tunnel's +tunneled +tunneling +tunnelings +tunnelled +tunnelling +tunnels +tunnies +tunny +tunny's +tuns +turban +turban's +turbans +turbid +turbine +turbine's +turbines +turbojet +turbojet's +turbojets +turboprop +turboprop's +turboprops +turbot +turbot's +turbots +turbulence +turbulence's +turbulent +turbulently +turd +turd's +turds +turducken +turducken's +turduckens +tureen +tureen's +tureens +turf +turf's +turfed +turfing +turfs +turgid +turgidity +turgidity's +turgidly +turkey +turkey's +turkeys +turmeric +turmeric's +turmerics +turmoil +turmoil's +turmoils +turn +turn's +turnabout +turnabout's +turnabouts +turnaround +turnaround's +turnarounds +turncoat +turncoat's +turncoats +turned +turner +turner's +turners +turning +turnip +turnip's +turnips +turnkey +turnkey's +turnkeys +turnoff +turnoff's +turnoffs +turnout +turnout's +turnouts +turnover +turnover's +turnovers +turnpike +turnpike's +turnpikes +turns +turnstile +turnstile's +turnstiles +turntable +turntable's +turntables +turpentine +turpentine's +turpitude +turpitude's +turquoise +turquoise's +turquoises +turret +turret's +turrets +turtle +turtle's +turtledove +turtledove's +turtledoves +turtleneck +turtleneck's +turtlenecks +turtles +turves +tush +tush's +tushes +tusk +tusk's +tusked +tusks +tussle +tussle's +tussled +tussles +tussling +tussock +tussock's +tussocks +tutelage +tutelage's +tutor +tutor's +tutored +tutorial +tutorial's +tutorials +tutoring +tutors +tutu +tutu's +tutus +tux +tux's +tuxedo +tuxedo's +tuxedoes +tuxedos +tuxes +twaddle +twaddle's +twaddled +twaddles +twaddling +twain +twain's +twang +twang's +twanged +twanging +twangs +tweak +tweak's +tweaked +tweaking +tweaks +twee +tweed +tweed's +tweedier +tweediest +tweeds +tweeds's +tweedy +tweet +tweet's +tweeted +tweeter +tweeter's +tweeters +tweeting +tweets +tweezers +tweezers's +twelfth +twelfth's +twelfths +twelve +twelve's +twelves +twenties +twentieth +twentieth's +twentieths +twenty +twenty's +twerk +twerked +twerking +twerks +twerp +twerp's +twerps +twice +twiddle +twiddle's +twiddled +twiddles +twiddling +twig +twig's +twigged +twiggier +twiggiest +twigging +twiggy +twigs +twilight +twilight's +twill +twill's +twilled +twin +twin's +twine +twine's +twined +twines +twinge +twinge's +twinged +twingeing +twinges +twinging +twining +twinkle +twinkle's +twinkled +twinkles +twinkling +twinkling's +twinklings +twinned +twinning +twins +twirl +twirl's +twirled +twirler +twirler's +twirlers +twirling +twirls +twist +twist's +twisted +twister +twister's +twisters +twisting +twists +twit +twit's +twitch +twitch's +twitched +twitches +twitching +twits +twitted +twitter +twitter's +twittered +twittering +twitters +twitting +two +two's +twofer +twofer's +twofers +twofold +twos +twosome +twosome's +twosomes +tycoon +tycoon's +tycoons +tying +tyke +tyke's +tykes +tympana +tympanum +tympanum's +tympanums +type +type's +typecast +typecasting +typecasts +typed +typeface +typeface's +typefaces +types +typescript +typescript's +typescripts +typeset +typesets +typesetter +typesetter's +typesetters +typesetting +typewrite +typewriter +typewriter's +typewriters +typewrites +typewriting +typewritten +typewrote +typhoid +typhoid's +typhoon +typhoon's +typhoons +typhus +typhus's +typical +typically +typified +typifies +typify +typifying +typing +typing's +typist +typist's +typists +typo +typo's +typographer +typographer's +typographers +typographic +typographical +typographically +typography +typography's +typos +tyrannical +tyrannically +tyrannies +tyrannize +tyrannized +tyrannizes +tyrannizing +tyrannosaur +tyrannosaur's +tyrannosaurs +tyrannosaurus +tyrannosaurus's +tyrannosauruses +tyrannous +tyranny +tyranny's +tyrant +tyrant's +tyrants +tyro +tyro's +tyroes +tyros +tzar +tzar's +tzarina +tzarina's +tzarinas +tzars +u +ubiquitous +ubiquitously +ubiquity +ubiquity's +udder +udder's +udders +ugh +uglier +ugliest +ugliness +ugliness's +ugly +uh +ukelele +ukelele's +ukeleles +ukulele +ukulele's +ukuleles +ulcer +ulcer's +ulcerate +ulcerated +ulcerates +ulcerating +ulceration +ulceration's +ulcerations +ulcerous +ulcers +ulna +ulna's +ulnae +ulnas +ulterior +ultimata +ultimate +ultimate's +ultimately +ultimatum +ultimatum's +ultimatums +ultra +ultra's +ultraconservative +ultraconservative's +ultraconservatives +ultramarine +ultramarine's +ultras +ultrasonic +ultrasonically +ultrasound +ultrasound's +ultrasounds +ultraviolet +ultraviolet's +ululate +ululated +ululates +ululating +um +umbel +umbel's +umbels +umber +umber's +umbilical +umbilici +umbilicus +umbilicus's +umbilicuses +umbrage +umbrage's +umbrella +umbrella's +umbrellas +umiak +umiak's +umiaks +umlaut +umlaut's +umlauts +ump +ump's +umped +umping +umpire +umpire's +umpired +umpires +umpiring +umps +umpteen +umpteenth +unabashed +unabated +unable +unabridged +unabridged's +unabridgeds +unaccented +unacceptability +unacceptable +unacceptably +unaccepted +unaccompanied +unaccountable +unaccountably +unaccustomed +unacknowledged +unacquainted +unadorned +unadulterated +unadvised +unaffected +unafraid +unaided +unalterable +unalterably +unaltered +unambiguous +unambiguously +unanimity +unanimity's +unanimous +unanimously +unannounced +unanswerable +unanswered +unanticipated +unappealing +unappetizing +unappreciated +unappreciative +unapproachable +unarmed +unashamed +unashamedly +unasked +unassailable +unassigned +unassisted +unassuming +unattached +unattainable +unattended +unattractive +unattributed +unauthenticated +unauthorized +unavailable +unavailing +unavoidable +unavoidably +unaware +unawares +unbalanced +unbar +unbarred +unbarring +unbars +unbearable +unbearably +unbeatable +unbeaten +unbecoming +unbeknown +unbeknownst +unbelief +unbelief's +unbelievable +unbelievably +unbeliever +unbeliever's +unbelievers +unbend +unbending +unbends +unbent +unbiased +unbiassed +unbidden +unbind +unbinding +unbinds +unblock +unblocked +unblocking +unblocks +unblushing +unbolt +unbolted +unbolting +unbolts +unborn +unbosom +unbosomed +unbosoming +unbosoms +unbound +unbounded +unbranded +unbreakable +unbridled +unbroken +unbuckle +unbuckled +unbuckles +unbuckling +unburden +unburdened +unburdening +unburdens +unbutton +unbuttoned +unbuttoning +unbuttons +uncalled +uncannier +uncanniest +uncannily +uncanny +uncaring +uncased +uncatalogued +unceasing +unceasingly +uncensored +unceremonious +unceremoniously +uncertain +uncertainly +uncertainties +uncertainty +uncertainty's +unchallenged +unchanged +unchanging +uncharacteristic +uncharacteristically +uncharitable +uncharitably +uncharted +unchecked +unchristian +uncivil +uncivilized +unclaimed +unclasp +unclasped +unclasping +unclasps +unclassified +uncle +uncle's +unclean +uncleaner +uncleanest +uncleanlier +uncleanliest +uncleanly +uncleanness +uncleanness's +unclear +unclearer +unclearest +uncles +unclothe +unclothed +unclothes +unclothing +uncluttered +uncoil +uncoiled +uncoiling +uncoils +uncollected +uncomfortable +uncomfortably +uncommitted +uncommon +uncommoner +uncommonest +uncommonly +uncommunicative +uncomplaining +uncompleted +uncomplicated +uncomplimentary +uncomprehending +uncompressed +uncompromising +uncompromisingly +unconcern +unconcern's +unconcerned +unconcernedly +unconditional +unconditionally +unconfirmed +unconnected +unconquerable +unconscionable +unconscionably +unconscious +unconscious's +unconsciously +unconsciousness +unconsciousness's +unconsidered +unconstitutional +uncontaminated +uncontested +uncontrollable +uncontrollably +uncontrolled +uncontroversial +unconventional +unconventionally +unconvinced +unconvincing +unconvincingly +uncooked +uncooperative +uncoordinated +uncork +uncorked +uncorking +uncorks +uncorrelated +uncorroborated +uncountable +uncounted +uncouple +uncoupled +uncouples +uncoupling +uncouth +uncover +uncovered +uncovering +uncovers +uncritical +unction +unction's +unctions +unctuous +unctuously +unctuousness +unctuousness's +uncultivated +uncultured +uncut +undamaged +undated +undaunted +undeceive +undeceived +undeceives +undeceiving +undecidable +undecided +undecided's +undecideds +undecipherable +undeclared +undefeated +undefended +undefinable +undefined +undelivered +undemanding +undemocratic +undemonstrative +undeniable +undeniably +undependable +under +underachieve +underachieved +underachiever +underachiever's +underachievers +underachieves +underachieving +underact +underacted +underacting +underacts +underage +underarm +underarm's +underarms +underbellies +underbelly +underbelly's +underbid +underbidding +underbids +underbrush +underbrush's +undercarriage +undercarriage's +undercarriages +undercharge +undercharge's +undercharged +undercharges +undercharging +underclass +underclass's +underclassman +underclassman's +underclassmen +underclothes +underclothes's +underclothing +underclothing's +undercoat +undercoat's +undercoated +undercoating +undercoats +undercover +undercurrent +undercurrent's +undercurrents +undercut +undercut's +undercuts +undercutting +underdeveloped +underdog +underdog's +underdogs +underdone +underemployed +underestimate +underestimate's +underestimated +underestimates +underestimating +underexpose +underexposed +underexposes +underexposing +underfed +underfeed +underfeeding +underfeeds +underflow +underfoot +underfunded +undergarment +undergarment's +undergarments +undergo +undergoes +undergoing +undergone +undergrad +undergrads +undergraduate +undergraduate's +undergraduates +underground +underground's +undergrounds +undergrowth +undergrowth's +underhand +underhanded +underhandedly +underlain +underlay +underlay's +underlays +underlie +underlies +underline +underline's +underlined +underlines +underling +underling's +underlings +underlining +underlying +undermine +undermined +undermines +undermining +undermost +underneath +underneath's +underneaths +undernourished +underpaid +underpants +underpants's +underpass +underpass's +underpasses +underpay +underpaying +underpays +underpin +underpinned +underpinning +underpinning's +underpinnings +underpins +underplay +underplayed +underplaying +underplays +underprivileged +underrate +underrated +underrates +underrating +underscore +underscore's +underscored +underscores +underscoring +undersea +undersecretaries +undersecretary +undersecretary's +undersell +underselling +undersells +undershirt +undershirt's +undershirts +undershoot +undershooting +undershoots +undershorts +undershorts's +undershot +underside +underside's +undersides +undersign +undersigned +undersigned's +undersigning +undersigns +undersize +undersized +underskirt +underskirt's +underskirts +undersold +understaffed +understand +understandable +understandably +understanding +understanding's +understandingly +understandings +understands +understate +understated +understatement +understatement's +understatements +understates +understating +understood +understudied +understudies +understudy +understudy's +understudying +undertake +undertaken +undertaker +undertaker's +undertakers +undertakes +undertaking +undertaking's +undertakings +undertone +undertone's +undertones +undertook +undertow +undertow's +undertows +underused +undervalue +undervalued +undervalues +undervaluing +underwater +underwear +underwear's +underweight +underweight's +underwent +underworld +underworld's +underworlds +underwrite +underwriter +underwriter's +underwriters +underwrites +underwriting +underwritten +underwrote +undeserved +undeservedly +undeserving +undesirability +undesirable +undesirable's +undesirables +undetectable +undetected +undetermined +undeterred +undeveloped +undid +undies +undies's +undignified +undiluted +undiminished +undisciplined +undisclosed +undiscovered +undiscriminating +undisguised +undisputed +undistinguished +undisturbed +undivided +undo +undocumented +undoes +undoing +undoing's +undoings +undone +undoubted +undoubtedly +undress +undress's +undressed +undresses +undressing +undue +undulant +undulate +undulated +undulates +undulating +undulation +undulation's +undulations +unduly +undying +unearned +unearth +unearthed +unearthing +unearthly +unearths +unease +unease's +uneasier +uneasiest +uneasily +uneasiness +uneasiness's +uneasy +uneaten +uneconomic +uneconomical +unedited +uneducated +unembarrassed +unemotional +unemployable +unemployed +unemployed's +unemployment +unemployment's +unending +unendurable +unenforceable +unenlightened +unenthusiastic +unenviable +unequal +unequaled +unequalled +unequally +unequivocal +unequivocally +unerring +unerringly +unethical +uneven +unevenly +unevenness +unevenness's +uneventful +uneventfully +unexampled +unexceptionable +unexceptional +unexciting +unexpected +unexpectedly +unexplained +unexplored +unexpurgated +unfailing +unfailingly +unfair +unfairer +unfairest +unfairly +unfairness +unfairness's +unfaithful +unfaithfully +unfaithfulness +unfaithfulness's +unfamiliar +unfamiliarity +unfamiliarity's +unfashionable +unfasten +unfastened +unfastening +unfastens +unfathomable +unfavorable +unfavorably +unfeasible +unfeeling +unfeelingly +unfeigned +unfetter +unfettered +unfettering +unfetters +unfilled +unfinished +unfit +unfits +unfitted +unfitting +unflagging +unflappable +unflattering +unflinching +unflinchingly +unfold +unfolded +unfolding +unfolds +unforeseeable +unforeseen +unforgettable +unforgettably +unforgivable +unforgiving +unformed +unfortunate +unfortunate's +unfortunately +unfortunates +unfounded +unfrequented +unfriend +unfriended +unfriending +unfriendlier +unfriendliest +unfriendliness +unfriendliness's +unfriendly +unfriends +unfrock +unfrocked +unfrocking +unfrocks +unfulfilled +unfunny +unfurl +unfurled +unfurling +unfurls +unfurnished +ungainlier +ungainliest +ungainliness +ungainliness's +ungainly +ungentlemanly +ungodlier +ungodliest +ungodly +ungovernable +ungracious +ungrammatical +ungrateful +ungratefully +ungratefulness +ungratefulness's +ungrudging +unguarded +unguent +unguent's +unguents +ungulate +ungulate's +ungulates +unhand +unhanded +unhanding +unhands +unhappier +unhappiest +unhappily +unhappiness +unhappiness's +unhappy +unharmed +unhealthful +unhealthier +unhealthiest +unhealthy +unheard +unheeded +unhelpful +unhesitating +unhesitatingly +unhindered +unhinge +unhinged +unhinges +unhinging +unhitch +unhitched +unhitches +unhitching +unholier +unholiest +unholy +unhook +unhooked +unhooking +unhooks +unhorse +unhorsed +unhorses +unhorsing +unhurried +unhurt +unicameral +unicorn +unicorn's +unicorns +unicycle +unicycle's +unicycles +unidentifiable +unidentified +unidirectional +unification +unification's +unified +unifies +uniform +uniform's +uniformed +uniforming +uniformity +uniformity's +uniformly +uniforms +unify +unifying +unilateral +unilaterally +unimaginable +unimaginative +unimpaired +unimpeachable +unimplementable +unimplemented +unimportant +unimpressed +unimpressive +uninformative +uninformed +uninhabitable +uninhabited +uninhibited +uninitialized +uninitiated +uninjured +uninspired +uninspiring +uninstall +uninstallable +uninstalled +uninstaller +uninstaller's +uninstallers +uninstalling +uninstalls +uninsured +unintelligent +unintelligible +unintelligibly +unintended +unintentional +unintentionally +uninterested +uninteresting +uninterpreted +uninterrupted +uninvited +uninviting +union +union's +unionization +unionization's +unionize +unionized +unionizes +unionizing +unions +unique +uniquely +uniqueness +uniqueness's +uniquer +uniquest +unisex +unisex's +unison +unison's +unit +unit's +unitary +unite +united +unites +unities +uniting +units +unity +unity's +universal +universal's +universality +universality's +universally +universals +universe +universe's +universes +universities +university +university's +unjust +unjustifiable +unjustified +unjustly +unkempt +unkind +unkinder +unkindest +unkindlier +unkindliest +unkindly +unkindness +unkindness's +unknowable +unknowing +unknowingly +unknowings +unknown +unknown's +unknowns +unlabeled +unlace +unlaced +unlaces +unlacing +unlatch +unlatched +unlatches +unlatching +unlawful +unlawfully +unleaded +unleaded's +unlearn +unlearned +unlearning +unlearns +unleash +unleashed +unleashes +unleashing +unleavened +unless +unlettered +unlicensed +unlike +unlikelier +unlikeliest +unlikelihood +unlikelihood's +unlikely +unlimited +unlisted +unload +unloaded +unloading +unloads +unlock +unlocked +unlocking +unlocks +unloose +unloosed +unlooses +unloosing +unloved +unluckier +unluckiest +unluckily +unlucky +unmade +unmake +unmakes +unmaking +unman +unmanageable +unmanlier +unmanliest +unmanly +unmanned +unmannerly +unmanning +unmans +unmarked +unmarried +unmask +unmasked +unmasking +unmasks +unmatched +unmemorable +unmentionable +unmentionable's +unmentionables +unmerciful +unmercifully +unmindful +unmissed +unmistakable +unmistakably +unmitigated +unmodified +unmoral +unmoved +unnamed +unnatural +unnaturally +unnecessarily +unnecessary +unneeded +unnerve +unnerved +unnerves +unnerving +unnoticeable +unnoticed +unnumbered +unobjectionable +unobservant +unobserved +unobstructed +unobtainable +unobtrusive +unobtrusively +unoccupied +unoffensive +unofficial +unofficially +unopened +unopposed +unorganized +unoriginal +unorthodox +unpack +unpacked +unpacking +unpacks +unpaid +unpainted +unpalatable +unparalleled +unpardonable +unpatriotic +unpaved +unperturbed +unpick +unpin +unpinned +unpinning +unpins +unplanned +unpleasant +unpleasantly +unpleasantness +unpleasantness's +unplug +unplugged +unplugging +unplugs +unplumbed +unpolluted +unpopular +unpopularity +unpopularity's +unprecedented +unpredictability +unpredictability's +unpredictable +unprejudiced +unpremeditated +unprepared +unpretentious +unpreventable +unprincipled +unprintable +unprivileged +unproductive +unprofessional +unprofitable +unpromising +unprompted +unpronounceable +unprotected +unproved +unproven +unprovoked +unpublished +unpunished +unqualified +unquenchable +unquestionable +unquestionably +unquestioned +unquestioning +unquestioningly +unquote +unquoted +unquotes +unquoting +unravel +unraveled +unraveling +unravelled +unravelling +unravels +unreachable +unread +unreadable +unready +unreal +unrealistic +unrealistically +unrealized +unreasonable +unreasonableness +unreasonableness's +unreasonably +unreasoning +unrecognizable +unrecognized +unreconstructed +unrecorded +unrefined +unregenerate +unregistered +unregulated +unrehearsed +unrelated +unreleased +unrelenting +unrelentingly +unreliability +unreliable +unrelieved +unremarkable +unremitting +unrepeatable +unrepentant +unrepresentative +unrequited +unreserved +unreservedly +unresolved +unresponsive +unrest +unrest's +unrestrained +unrestricted +unrewarding +unripe +unriper +unripest +unrivaled +unrivalled +unroll +unrolled +unrolling +unrolls +unromantic +unruffled +unrulier +unruliest +unruliness +unruliness's +unruly +unsaddle +unsaddled +unsaddles +unsaddling +unsafe +unsafer +unsafest +unsaid +unsalted +unsanctioned +unsanitary +unsatisfactory +unsatisfied +unsatisfying +unsaturated +unsavory +unsay +unsaying +unsays +unscathed +unscheduled +unschooled +unscientific +unscramble +unscrambled +unscrambles +unscrambling +unscrew +unscrewed +unscrewing +unscrews +unscrupulous +unscrupulously +unscrupulousness +unscrupulousness's +unseal +unsealed +unsealing +unseals +unseasonable +unseasonably +unseasoned +unseat +unseated +unseating +unseats +unseeing +unseemlier +unseemliest +unseemliness +unseemliness's +unseemly +unseen +unseen's +unselfish +unselfishly +unselfishness +unselfishness's +unsent +unsentimental +unset +unsettle +unsettled +unsettles +unsettling +unshakable +unshakeable +unshaven +unsheathe +unsheathed +unsheathes +unsheathing +unsightlier +unsightliest +unsightliness +unsightliness's +unsightly +unsigned +unskilled +unskillful +unsmiling +unsnap +unsnapped +unsnapping +unsnaps +unsnarl +unsnarled +unsnarling +unsnarls +unsociable +unsold +unsolicited +unsolved +unsophisticated +unsound +unsounder +unsoundest +unsparing +unspeakable +unspeakably +unspecific +unspecified +unspoiled +unspoilt +unspoken +unsportsmanlike +unstable +unstated +unsteadier +unsteadiest +unsteadily +unsteadiness +unsteadiness's +unsteady +unstop +unstoppable +unstopped +unstopping +unstops +unstressed +unstructured +unstrung +unstuck +unstudied +unsubscribe +unsubscribed +unsubscribes +unsubscribing +unsubstantial +unsubstantiated +unsubtle +unsuccessful +unsuccessfully +unsuitable +unsuitably +unsuited +unsung +unsupervised +unsupportable +unsupported +unsure +unsurpassed +unsurprising +unsuspected +unsuspecting +unsweetened +unswerving +unsympathetic +untainted +untamed +untangle +untangled +untangles +untangling +untapped +untaught +untenable +untested +unthinkable +unthinking +unthinkingly +untidier +untidiest +untidiness +untidiness's +untidy +untie +untied +unties +until +untimelier +untimeliest +untimeliness +untimeliness's +untimely +untiring +untiringly +untitled +unto +untold +untouchable +untouchable's +untouchables +untouched +untoward +untrained +untreated +untried +untroubled +untrue +untruer +untruest +untrustworthy +untruth +untruth's +untruthful +untruthfully +untruths +untutored +untwist +untwisted +untwisting +untwists +untying +unusable +unused +unusual +unusually +unutterable +unutterably +unvarnished +unvarying +unveil +unveiled +unveiling +unveils +unverified +unvoiced +unwanted +unwarier +unwariest +unwariness +unwariness's +unwarranted +unwary +unwashed +unwavering +unwed +unwelcome +unwell +unwholesome +unwieldier +unwieldiest +unwieldiness +unwieldiness's +unwieldy +unwilling +unwillingly +unwillingness +unwillingness's +unwind +unwinding +unwinds +unwise +unwisely +unwiser +unwisest +unwitting +unwittingly +unwonted +unworkable +unworldly +unworthier +unworthiest +unworthiness +unworthiness's +unworthy +unwound +unwrap +unwrapped +unwrapping +unwraps +unwritten +unyielding +unzip +unzipped +unzipping +unzips +up +upbeat +upbeat's +upbeats +upbraid +upbraided +upbraiding +upbraids +upbringing +upbringing's +upbringings +upchuck +upchucked +upchucking +upchucks +upcoming +upcountry +upcountry's +update +update's +updated +updater +updates +updating +updraft +updraft's +updrafts +upend +upended +upending +upends +upfront +upgrade +upgrade's +upgraded +upgrades +upgrading +upheaval +upheaval's +upheavals +upheld +uphill +uphill's +uphills +uphold +upholding +upholds +upholster +upholstered +upholsterer +upholsterer's +upholsterers +upholstering +upholsters +upholstery +upholstery's +upkeep +upkeep's +upland +upland's +uplands +uplift +uplift's +uplifted +uplifting +upliftings +uplifts +upload +upmarket +upon +upped +upper +upper's +uppercase +uppercase's +upperclassman +upperclassman's +upperclassmen +uppercut +uppercut's +uppercuts +uppercutting +uppermost +uppers +upping +uppity +upraise +upraised +upraises +upraising +upright +upright's +uprights +uprising +uprising's +uprisings +uproar +uproar's +uproarious +uproariously +uproars +uproot +uprooted +uprooting +uproots +ups +upscale +upset +upset's +upsets +upsetting +upshot +upshot's +upshots +upside +upside's +upsides +upstage +upstaged +upstages +upstaging +upstairs +upstanding +upstart +upstart's +upstarted +upstarting +upstarts +upstate +upstate's +upstream +upsurge +upsurge's +upsurged +upsurges +upsurging +upswing +upswing's +upswings +uptake +uptake's +uptakes +uptight +uptown +uptown's +upturn +upturn's +upturned +upturning +upturns +upward +upwardly +upwards +uranium +uranium's +urban +urbane +urbaner +urbanest +urbanity +urbanity's +urbanization +urbanization's +urbanize +urbanized +urbanizes +urbanizing +urchin +urchin's +urchins +urea +urea's +urethra +urethra's +urethrae +urethras +urge +urge's +urged +urgency +urgency's +urgent +urgently +urges +urging +uric +urinal +urinal's +urinals +urinalyses +urinalysis +urinalysis's +urinary +urinate +urinated +urinates +urinating +urination +urination's +urine +urine's +urn +urn's +urns +urologist +urologist's +urologists +urology +urology's +us +usability +usability's +usable +usage +usage's +usages +use +use's +useability +useability's +useable +used +useful +usefully +usefulness +usefulness's +useless +uselessly +uselessness +uselessness's +user +user's +username +username's +usernames +users +uses +usher +usher's +ushered +usherette +usherette's +usherettes +ushering +ushers +using +usual +usual's +usually +usurer +usurer's +usurers +usurious +usurp +usurpation +usurpation's +usurped +usurper +usurper's +usurpers +usurping +usurps +usury +usury's +utensil +utensil's +utensils +uteri +uterine +uterus +uterus's +uteruses +utilitarian +utilitarian's +utilitarianism +utilitarians +utilities +utility +utility's +utilization +utilization's +utilize +utilized +utilizes +utilizing +utmost +utmost's +utopia +utopia's +utopian +utopian's +utopians +utopias +utter +utterance +utterance's +utterances +uttered +uttering +utterly +uttermost +uttermost's +utters +uvula +uvula's +uvulae +uvular +uvular's +uvulars +uvulas +v +vacancies +vacancy +vacancy's +vacant +vacantly +vacate +vacated +vacates +vacating +vacation +vacation's +vacationed +vacationer +vacationer's +vacationers +vacationing +vacations +vaccinate +vaccinated +vaccinates +vaccinating +vaccination +vaccination's +vaccinations +vaccine +vaccine's +vaccines +vacillate +vacillated +vacillates +vacillating +vacillation +vacillation's +vacillations +vacua +vacuity +vacuity's +vacuous +vacuously +vacuum +vacuum's +vacuumed +vacuuming +vacuums +vagabond +vagabond's +vagabonded +vagabonding +vagabonds +vagaries +vagary +vagary's +vagina +vagina's +vaginae +vaginal +vagrancy +vagrancy's +vagrant +vagrant's +vagrants +vague +vaguely +vagueness +vagueness's +vaguer +vaguest +vain +vainer +vainest +vainglorious +vainglory +vainglory's +vainly +valance +valance's +valances +vale +vale's +valedictorian +valedictorian's +valedictorians +valedictories +valedictory +valedictory's +valence +valence's +valences +valentine +valentine's +valentines +vales +valet +valet's +valeted +valeting +valets +valiant +valiantly +valid +validate +validated +validates +validating +validation +validation's +validations +validity +validity's +validly +validness +validness's +valise +valise's +valises +valley +valley's +valleys +valor +valor's +valorous +valuable +valuable's +valuables +valuation +valuation's +valuations +value +value's +valued +valueless +values +valuing +valve +valve's +valved +valves +valving +vamoose +vamoosed +vamooses +vamoosing +vamp +vamp's +vamped +vamping +vampire +vampire's +vampires +vamps +van +van's +vanadium +vanadium's +vandal +vandal's +vandalism +vandalism's +vandalize +vandalized +vandalizes +vandalizing +vandals +vane +vane's +vanes +vanguard +vanguard's +vanguards +vanilla +vanilla's +vanillas +vanish +vanished +vanishes +vanishing +vanishings +vanities +vanity +vanity's +vanned +vanning +vanquish +vanquished +vanquishes +vanquishing +vans +vantage +vantage's +vantages +vape +vaped +vapes +vapid +vapidity +vapidity's +vapidness +vapidness's +vaping +vapor +vapor's +vaporization +vaporization's +vaporize +vaporized +vaporizer +vaporizer's +vaporizers +vaporizes +vaporizing +vaporous +vapors +variability +variability's +variable +variable's +variables +variably +variance +variance's +variances +variant +variant's +variants +variate +variation +variation's +variations +varicolored +varicose +varied +variegate +variegated +variegates +variegating +varies +varieties +variety +variety's +various +variously +varlet +varlet's +varlets +varmint +varmint's +varmints +varnish +varnish's +varnished +varnishes +varnishing +varsities +varsity +varsity's +vary +varying +vascular +vase +vase's +vasectomies +vasectomy +vasectomy's +vases +vassal +vassal's +vassalage +vassalage's +vassals +vast +vast's +vaster +vastest +vastly +vastness +vastness's +vasts +vat +vat's +vats +vatted +vatting +vaudeville +vaudeville's +vault +vault's +vaulted +vaulter +vaulter's +vaulters +vaulting +vaulting's +vaults +vaunt +vaunt's +vaunted +vaunting +vaunts +veal +veal's +vector +vector's +vectored +vectoring +vectors +veep +veep's +veeps +veer +veer's +veered +veering +veers +vegan +vegan's +vegans +vegetable +vegetable's +vegetables +vegetarian +vegetarian's +vegetarianism +vegetarianism's +vegetarians +vegetate +vegetated +vegetates +vegetating +vegetation +vegetation's +vegetative +veggie +veggie's +veggies +vehemence +vehemence's +vehement +vehemently +vehicle +vehicle's +vehicles +vehicular +veil +veil's +veiled +veiling +veils +vein +vein's +veined +veining +veins +veld +veld's +velds +veldt +veldt's +veldts +vellum +vellum's +velocities +velocity +velocity's +velour +velour's +velours +velours's +velvet +velvet's +velveteen +velveteen's +velvety +venal +venality +venality's +venally +vend +vended +vender +vender's +venders +vendetta +vendetta's +vendettas +vending +vendor +vendor's +vendors +vends +veneer +veneer's +veneered +veneering +veneers +venerable +venerate +venerated +venerates +venerating +veneration +veneration's +venereal +vengeance +vengeance's +vengeful +vengefully +venial +venison +venison's +venom +venom's +venomous +venomously +venous +vent +vent's +vented +ventilate +ventilated +ventilates +ventilating +ventilation +ventilation's +ventilator +ventilator's +ventilators +venting +ventral +ventricle +ventricle's +ventricles +ventricular +ventriloquism +ventriloquism's +ventriloquist +ventriloquist's +ventriloquists +vents +venture +venture's +ventured +ventures +venturesome +venturing +venturous +venue +venue's +venues +veracious +veracity +veracity's +veranda +veranda's +verandah +verandah's +verandahs +verandas +verb +verb's +verbal +verbal's +verbalize +verbalized +verbalizes +verbalizing +verbally +verbals +verbatim +verbena +verbena's +verbenas +verbiage +verbiage's +verbose +verbosity +verbosity's +verbs +verdant +verdict +verdict's +verdicts +verdigris +verdigris's +verdigrised +verdigrises +verdigrising +verdure +verdure's +verge +verge's +verged +verges +verging +verier +veriest +verifiable +verification +verification's +verified +verifies +verify +verifying +verily +verisimilitude +verisimilitude's +veritable +veritably +verities +verity +verity's +vermicelli +vermicelli's +vermilion +vermilion's +vermillion +vermillion's +vermin +vermin's +verminous +vermouth +vermouth's +vernacular +vernacular's +vernaculars +vernal +versatile +versatility +versatility's +verse +verse's +versed +verses +versification +versification's +versified +versifies +versify +versifying +versing +version +version's +versions +versus +vertebra +vertebra's +vertebrae +vertebral +vertebras +vertebrate +vertebrate's +vertebrates +vertex +vertex's +vertexes +vertical +vertical's +vertically +verticals +vertices +vertiginous +vertigo +vertigo's +verve +verve's +very +vesicle +vesicle's +vesicles +vesper +vesper's +vespers +vessel +vessel's +vessels +vest +vest's +vested +vestibule +vestibule's +vestibules +vestige +vestige's +vestiges +vestigial +vesting +vestment +vestment's +vestments +vestries +vestry +vestry's +vests +vet +vet's +vetch +vetch's +vetches +veteran +veteran's +veterans +veterinarian +veterinarian's +veterinarians +veterinaries +veterinary +veterinary's +veto +veto's +vetoed +vetoes +vetoing +vets +vetted +vetting +vex +vexation +vexation's +vexations +vexatious +vexed +vexes +vexing +via +viability +viability's +viable +viaduct +viaduct's +viaducts +vial +vial's +vials +viand +viand's +viands +vibe +vibe's +vibes +vibes's +vibrancy +vibrancy's +vibrant +vibrantly +vibraphone +vibraphone's +vibraphones +vibrate +vibrated +vibrates +vibrating +vibration +vibration's +vibrations +vibrato +vibrato's +vibrator +vibrator's +vibrators +vibratos +viburnum +viburnum's +viburnums +vicar +vicar's +vicarage +vicarage's +vicarages +vicarious +vicariously +vicars +vice +vice's +viced +viceroy +viceroy's +viceroys +vices +vichyssoise +vichyssoise's +vicing +vicinity +vicinity's +vicious +viciously +viciousness +viciousness's +vicissitude +vicissitude's +vicissitudes +victim +victim's +victimization +victimization's +victimize +victimized +victimizes +victimizing +victims +victor +victor's +victories +victorious +victoriously +victors +victory +victory's +victual +victual's +victualed +victualing +victualled +victualling +victuals +vicuña +vicuña's +vicuñas +video +video's +videocassette +videocassette's +videocassettes +videodisc +videodisc's +videodiscs +videos +videotape +videotape's +videotaped +videotapes +videotaping +vie +vied +vies +view +view's +viewed +viewer +viewer's +viewers +viewfinder +viewfinder's +viewfinders +viewing +viewing's +viewings +viewpoint +viewpoint's +viewpoints +views +vigil +vigil's +vigilance +vigilance's +vigilant +vigilante +vigilante's +vigilantes +vigilantism +vigilantism's +vigilantly +vigils +vignette +vignette's +vignetted +vignettes +vignetting +vigor +vigor's +vigorous +vigorously +vile +vilely +vileness +vileness's +viler +vilest +vilification +vilification's +vilified +vilifies +vilify +vilifying +villa +villa's +village +village's +villager +villager's +villagers +villages +villain +villain's +villainies +villainous +villains +villainy +villainy's +villas +villein +villein's +villeins +vim +vim's +vinaigrette +vinaigrette's +vindicate +vindicated +vindicates +vindicating +vindication +vindication's +vindications +vindicator +vindicator's +vindicators +vindictive +vindictively +vindictiveness +vindictiveness's +vine +vine's +vinegar +vinegar's +vinegary +vines +vineyard +vineyard's +vineyards +vintage +vintage's +vintages +vintner +vintner's +vintners +vinyl +vinyl's +vinyls +viol +viol's +viola +viola's +violable +violas +violate +violated +violates +violating +violation +violation's +violations +violator +violator's +violators +violence +violence's +violent +violently +violet +violet's +violets +violin +violin's +violinist +violinist's +violinists +violins +violist +violist's +violists +violoncello +violoncello's +violoncellos +viols +viper +viper's +vipers +virago +virago's +viragoes +viragos +viral +vireo +vireo's +vireos +virgin +virgin's +virginal +virginal's +virginals +virginity +virginity's +virgins +virgule +virgule's +virgules +virile +virility +virility's +virology +virology's +virtual +virtually +virtue +virtue's +virtues +virtuosi +virtuosity +virtuosity's +virtuoso +virtuoso's +virtuosos +virtuous +virtuously +virtuousness +virtuousness's +virulence +virulence's +virulent +virulently +virus +virus's +viruses +visa +visa's +visaed +visage +visage's +visages +visaing +visas +viscera +visceral +viscid +viscosity +viscosity's +viscount +viscount's +viscountess +viscountess's +viscountesses +viscounts +viscous +viscus +viscus's +vise +vise's +vised +vises +visibility +visibility's +visible +visibly +vising +vision +vision's +visionaries +visionary +visionary's +visioned +visioning +visions +visit +visit's +visitation +visitation's +visitations +visited +visiting +visitor +visitor's +visitors +visits +visor +visor's +visors +vista +vista's +vistas +visual +visual's +visualization +visualization's +visualize +visualized +visualizes +visualizing +visually +visuals +vital +vitality +vitality's +vitalize +vitalized +vitalizes +vitalizing +vitally +vitals +vitals's +vitamin +vitamin's +vitamins +vitiate +vitiated +vitiates +vitiating +vitiation +vitiation's +viticulture +viticulture's +vitreous +vitriol +vitriol's +vitriolic +vituperate +vituperated +vituperates +vituperating +vituperation +vituperation's +vituperative +viva +viva's +vivace +vivacious +vivaciously +vivaciousness +vivaciousness's +vivacity +vivacity's +vivas +vivid +vivider +vividest +vividly +vividness +vividness's +vivified +vivifies +vivify +vivifying +viviparous +vivisection +vivisection's +vixen +vixen's +vixenish +vixens +vizier +vizier's +viziers +vizor +vizor's +vizors +vocabularies +vocabulary +vocabulary's +vocal +vocal's +vocalic +vocalist +vocalist's +vocalists +vocalization +vocalization's +vocalizations +vocalize +vocalized +vocalizes +vocalizing +vocally +vocals +vocation +vocation's +vocational +vocations +vocative +vocative's +vocatives +vociferate +vociferated +vociferates +vociferating +vociferation +vociferation's +vociferous +vociferously +vodka +vodka's +vogue +vogue's +vogues +voguish +voice +voice's +voiced +voiceless +voicemail +voicemail's +voicemails +voices +voicing +void +void's +voided +voiding +voids +voile +voile's +volatile +volatility +volatility's +volcanic +volcano +volcano's +volcanoes +volcanos +vole +vole's +voles +volition +volition's +volley +volley's +volleyball +volleyball's +volleyballs +volleyed +volleying +volleys +volt +volt's +voltage +voltage's +voltages +voltaic +voltmeter +voltmeter's +voltmeters +volts +volubility +volubility's +voluble +volubly +volume +volume's +volumes +voluminous +voluminously +voluntaries +voluntarily +voluntary +voluntary's +volunteer +volunteer's +volunteered +volunteering +volunteers +voluptuaries +voluptuary +voluptuary's +voluptuous +voluptuously +voluptuousness +voluptuousness's +vomit +vomit's +vomited +vomiting +vomits +voodoo +voodoo's +voodooed +voodooing +voodooism +voodooism's +voodoos +voracious +voraciously +voracity +voracity's +vortex +vortex's +vortexes +vortices +votaries +votary +votary's +vote +vote's +voted +voter +voter's +voters +votes +voting +votive +vouch +vouched +voucher +voucher's +vouchers +vouches +vouching +vouchsafe +vouchsafed +vouchsafes +vouchsafing +vow +vow's +vowed +vowel +vowel's +vowels +vowing +vows +voyage +voyage's +voyaged +voyager +voyager's +voyagers +voyages +voyaging +voyeur +voyeur's +voyeurism +voyeurism's +voyeuristic +voyeurs +vulcanization +vulcanization's +vulcanize +vulcanized +vulcanizes +vulcanizing +vulgar +vulgarer +vulgarest +vulgarism +vulgarism's +vulgarisms +vulgarities +vulgarity +vulgarity's +vulgarization +vulgarization's +vulgarize +vulgarized +vulgarizes +vulgarizing +vulgarly +vulnerabilities +vulnerability +vulnerability's +vulnerable +vulnerably +vulture +vulture's +vultures +vulva +vulva's +vulvae +vulvas +vuvuzela +vuvuzela's +vuvuzelas +vying +w +wack +wack's +wacker +wackest +wackier +wackiest +wackiness +wackiness's +wacko +wacko's +wackos +wacks +wacky +wad +wad's +wadded +wadding +wadding's +waddle +waddle's +waddled +waddles +waddling +wade +wade's +waded +wader +wader's +waders +wades +wadi +wadi's +wading +wadis +wads +wafer +wafer's +wafers +waffle +waffle's +waffled +waffles +waffling +waft +waft's +wafted +wafting +wafts +wag +wag's +wage +wage's +waged +wager +wager's +wagered +wagering +wagers +wages +wagged +wagging +waggish +waggle +waggle's +waggled +waggles +waggling +waging +wagon +wagon's +wagoner +wagoner's +wagoners +wagons +wags +waif +waif's +waifs +wail +wail's +wailed +wailing +wails +wainscot +wainscot's +wainscoted +wainscoting +wainscoting's +wainscotings +wainscots +wainscotted +wainscotting +wainscotting's +wainscottings +waist +waist's +waistband +waistband's +waistbands +waistcoat +waistcoat's +waistcoats +waistline +waistline's +waistlines +waists +wait +wait's +waited +waiter +waiter's +waiters +waiting +waitress +waitress's +waitresses +waits +waive +waived +waiver +waiver's +waivers +waives +waiving +wake +wake's +waked +wakeful +wakefulness +wakefulness's +waken +wakened +wakening +wakens +wakes +waking +wale +wale's +waled +wales +waling +walk +walk's +walked +walker +walker's +walkers +walking +walkout +walkout's +walkouts +walks +walkway +walkway's +walkways +wall +wall's +wallabies +wallaby +wallaby's +wallboard +wallboard's +walled +wallet +wallet's +wallets +walleye +walleye's +walleyed +walleyes +wallflower +wallflower's +wallflowers +walling +wallop +wallop's +walloped +walloping +walloping's +wallopings +wallops +wallow +wallow's +wallowed +wallowing +wallows +wallpaper +wallpaper's +wallpapered +wallpapering +wallpapers +walls +walnut +walnut's +walnuts +walrus +walrus's +walruses +waltz +waltz's +waltzed +waltzes +waltzing +wampum +wampum's +wan +wand +wand's +wander +wandered +wanderer +wanderer's +wanderers +wandering +wanderlust +wanderlust's +wanderlusts +wanders +wands +wane +wane's +waned +wanes +wangle +wangle's +wangled +wangles +wangling +waning +wanly +wanna +wannabe +wannabe's +wannabes +wanner +wannest +want +want's +wanted +wanting +wanton +wanton's +wantoned +wantoning +wantonly +wantonness +wantonness's +wantons +wants +wapiti +wapiti's +wapitis +war +war's +warble +warble's +warbled +warbler +warbler's +warblers +warbles +warbling +ward +ward's +warded +warden +warden's +wardens +warder +warder's +warders +warding +wardrobe +wardrobe's +wardrobes +wardroom +wardroom's +wardrooms +wards +ware +ware's +warehouse +warehouse's +warehoused +warehouses +warehousing +wares +warfare +warfare's +warhead +warhead's +warheads +warhorse +warhorse's +warhorses +warier +wariest +warily +wariness +wariness's +warlike +warlock +warlock's +warlocks +warlord +warlord's +warlords +warm +warmed +warmer +warmer's +warmers +warmest +warmhearted +warming +warmly +warmonger +warmonger's +warmongering +warmongering's +warmongers +warms +warmth +warmth's +warn +warned +warning +warning's +warnings +warns +warp +warp's +warpath +warpath's +warpaths +warped +warping +warps +warrant +warrant's +warranted +warrantied +warranties +warranting +warrants +warranty +warranty's +warrantying +warred +warren +warren's +warrens +warring +warrior +warrior's +warriors +wars +warship +warship's +warships +wart +wart's +warthog +warthog's +warthogs +wartier +wartiest +wartime +wartime's +warts +warty +wary +was +wash +wash's +washable +washable's +washables +washbasin +washbasin's +washbasins +washboard +washboard's +washboards +washbowl +washbowl's +washbowls +washcloth +washcloth's +washcloths +washed +washer +washer's +washers +washerwoman +washerwoman's +washerwomen +washes +washing +washing's +washings +washout +washout's +washouts +washroom +washroom's +washrooms +washstand +washstand's +washstands +washtub +washtub's +washtubs +wasn't +wasp +wasp's +waspish +wasps +wassail +wassail's +wassailed +wassailing +wassails +wastage +wastage's +waste +waste's +wastebasket +wastebasket's +wastebaskets +wasted +wasteful +wastefully +wastefulness +wastefulness's +wasteland +wasteland's +wastelands +wastepaper +wastepaper's +waster +waster's +wasters +wastes +wastewater +wasting +wastrel +wastrel's +wastrels +watch +watch's +watchband +watchband's +watchbands +watchdog +watchdog's +watchdogs +watched +watcher +watcher's +watchers +watches +watchful +watchfully +watchfulness +watchfulness's +watching +watchmaker +watchmaker's +watchmakers +watchman +watchman's +watchmen +watchtower +watchtower's +watchtowers +watchword +watchword's +watchwords +water +water's +waterbed +waterbed's +waterbeds +waterboard +waterboard's +waterboarded +waterboarding +waterboarding's +waterboardings +waterboards +watercolor +watercolor's +watercolors +watercourse +watercourse's +watercourses +watercraft +watercraft's +watercress +watercress's +watered +waterfall +waterfall's +waterfalls +waterfowl +waterfowl's +waterfowls +waterfront +waterfront's +waterfronts +waterier +wateriest +watering +waterline +waterline's +waterlines +waterlogged +watermark +watermark's +watermarked +watermarking +watermarks +watermelon +watermelon's +watermelons +waterpower +waterpower's +waterproof +waterproof's +waterproofed +waterproofing +waterproofing's +waterproofs +waters +waters's +watershed +watershed's +watersheds +waterside +waterside's +watersides +waterspout +waterspout's +waterspouts +watertight +waterway +waterway's +waterways +waterworks +waterworks's +watery +watt +watt's +wattage +wattage's +wattle +wattle's +wattled +wattles +wattling +watts +wave +wave's +waved +waveform +wavelength +wavelength's +wavelengths +wavelet +wavelet's +wavelets +waver +waver's +wavered +wavering +wavers +waves +wavier +waviest +waviness +waviness's +waving +wavy +wax +wax's +waxed +waxen +waxes +waxier +waxiest +waxiness +waxiness's +waxing +waxwing +waxwing's +waxwings +waxwork +waxwork's +waxworks +waxy +way +way's +wayfarer +wayfarer's +wayfarers +wayfaring +wayfaring's +wayfarings +waylaid +waylay +waylaying +waylays +ways +wayside +wayside's +waysides +wayward +waywardly +waywardness +waywardness's +we +we'd +we'll +we're +we've +weak +weaken +weakened +weakening +weakens +weaker +weakest +weakfish +weakfish's +weakfishes +weakling +weakling's +weaklings +weakly +weakness +weakness's +weaknesses +weal +weal's +weals +wealth +wealth's +wealthier +wealthiest +wealthiness +wealthiness's +wealthy +wean +weaned +weaning +weans +weapon +weapon's +weaponless +weaponry +weaponry's +weapons +wear +wear's +wearable +wearer +wearer's +wearers +wearied +wearier +wearies +weariest +wearily +weariness +weariness's +wearing +wearisome +wears +weary +wearying +weasel +weasel's +weaseled +weaseling +weasels +weather +weather's +weathercock +weathercock's +weathercocks +weathered +weathering +weathering's +weatherize +weatherized +weatherizes +weatherizing +weatherman +weatherman's +weathermen +weatherproof +weatherproofed +weatherproofing +weatherproofs +weathers +weave +weave's +weaved +weaver +weaver's +weavers +weaves +weaving +web +web's +webbed +webbing +webbing's +webcam +webcam's +webcams +webcast +webcast's +webcasting +webcasts +webinar +webinar's +webinars +webisode +webisode's +webisodes +webmaster +webmaster's +webmasters +webmistress +webmistress's +webmistresses +webs +website +website's +websites +wed +wedded +wedder +wedding +wedding's +weddings +wedge +wedge's +wedged +wedges +wedging +wedlock +wedlock's +weds +wee +wee's +weed +weed's +weeded +weeder +weeder's +weeders +weedier +weediest +weeding +weeds +weedy +weeing +week +week's +weekday +weekday's +weekdays +weekend +weekend's +weekended +weekending +weekends +weeklies +weekly +weekly's +weeknight +weeknight's +weeknights +weeks +weep +weep's +weeper +weeper's +weepers +weepier +weepies +weepiest +weeping +weepings +weeps +weepy +weepy's +weer +wees +weest +weevil +weevil's +weevils +weft +weft's +wefts +weigh +weigh's +weighed +weighing +weighs +weight +weight's +weighted +weightier +weightiest +weightiness +weightiness's +weighting +weightless +weightlessness +weightlessness's +weightlifter +weightlifter's +weightlifters +weightlifting +weightlifting's +weights +weighty +weir +weir's +weird +weirder +weirdest +weirdly +weirdness +weirdness's +weirdo +weirdo's +weirdos +weirs +welch +welched +welches +welching +welcome +welcome's +welcomed +welcomes +welcoming +weld +weld's +welded +welder +welder's +welders +welding +welds +welfare +welfare's +welkin +welkin's +well +well's +welled +welling +wellington +wells +wellspring +wellspring's +wellsprings +welsh +welshed +welshes +welshing +welt +welt's +welted +welter +welter's +weltered +weltering +welters +welterweight +welterweight's +welterweights +welting +welts +wen +wen's +wench +wench's +wenches +wend +wended +wending +wends +wens +went +wept +were +weren't +werewolf +werewolf's +werewolves +west +west's +westbound +westerlies +westerly +westerly's +western +western's +westerner +westerner's +westerners +westernize +westernized +westernizes +westernizing +westernmost +westerns +westward +westwards +wet +wet's +wetback +wetback's +wetbacks +wetland +wetland's +wetlands +wetly +wetness +wetness's +wets +wetted +wetter +wettest +wetting +whack +whack's +whacked +whackier +whackiest +whacking +whacks +whacky +whale +whale's +whalebone +whalebone's +whaled +whaler +whaler's +whalers +whales +whaling +whaling's +wham +wham's +whammed +whammies +whamming +whammy +whammy's +whams +wharf +wharf's +wharfs +wharves +what +what's +whatchamacallit +whatchamacallit's +whatchamacallits +whatever +whatnot +whatnot's +whats +whatsoever +wheal +wheal's +wheals +wheat +wheat's +wheaten +wheedle +wheedled +wheedles +wheedling +wheel +wheel's +wheelbarrow +wheelbarrow's +wheelbarrows +wheelbase +wheelbase's +wheelbases +wheelchair +wheelchair's +wheelchairs +wheeled +wheeler +wheeling +wheels +wheelwright +wheelwright's +wheelwrights +wheeze +wheeze's +wheezed +wheezes +wheezier +wheeziest +wheezing +wheezy +whelk +whelk's +whelked +whelks +whelp +whelp's +whelped +whelping +whelps +when +when's +whence +whenever +whens +where +where's +whereabouts +whereabouts's +whereas +whereat +whereby +wherefore +wherefore's +wherefores +wherein +whereof +whereon +wheres +wheresoever +whereupon +wherever +wherewithal +wherewithal's +whet +whether +whets +whetstone +whetstone's +whetstones +whetted +whetting +whew +whey +whey's +which +whichever +whiff +whiff's +whiffed +whiffing +whiffs +while +while's +whiled +whiles +whiling +whilst +whim +whim's +whimper +whimper's +whimpered +whimpering +whimpers +whims +whimsey +whimsey's +whimseys +whimsical +whimsicality +whimsicality's +whimsically +whimsies +whimsy +whimsy's +whine +whine's +whined +whiner +whiner's +whiners +whines +whinier +whiniest +whining +whinnied +whinnies +whinny +whinny's +whinnying +whiny +whip +whip's +whipcord +whipcord's +whiplash +whiplash's +whiplashes +whipped +whippersnapper +whippersnapper's +whippersnappers +whippet +whippet's +whippets +whipping +whipping's +whippings +whippoorwill +whippoorwill's +whippoorwills +whips +whir +whir's +whirl +whirl's +whirled +whirligig +whirligig's +whirligigs +whirling +whirlpool +whirlpool's +whirlpools +whirls +whirlwind +whirlwind's +whirlwinds +whirr +whirr's +whirred +whirring +whirrs +whirs +whisk +whisk's +whisked +whisker +whisker's +whiskered +whiskers +whiskey +whiskey's +whiskeys +whiskies +whisking +whisks +whisky +whisky's +whiskys +whisper +whisper's +whispered +whispering +whispers +whist +whist's +whistle +whistle's +whistled +whistler +whistler's +whistlers +whistles +whistling +whit +whit's +white +white's +whitecap +whitecap's +whitecaps +whitefish +whitefish's +whitefishes +whiten +whitened +whitener +whitener's +whiteners +whiteness +whiteness's +whitening +whitens +whiter +whites +whitest +whitewall +whitewall's +whitewalls +whitewash +whitewash's +whitewashed +whitewashes +whitewashing +whither +whiting +whiting's +whitings +whitish +whits +whittle +whittled +whittler +whittler's +whittlers +whittles +whittling +whiz +whiz's +whizz +whizz's +whizzed +whizzes +whizzing +who +who'd +who'll +who're +who's +who've +whoa +whodunit +whodunit's +whodunits +whodunnit +whodunnit's +whodunnits +whodunnits's +whoever +whole +whole's +wholehearted +wholeheartedly +wholeness +wholeness's +wholes +wholesale +wholesale's +wholesaled +wholesaler +wholesaler's +wholesalers +wholesales +wholesaling +wholesome +wholesomeness +wholesomeness's +wholly +whom +whomever +whomsoever +whoop +whoop's +whooped +whoopee +whoopees +whooping +whoops +whoosh +whoosh's +whooshed +whooshes +whooshing +whopper +whopper's +whoppers +whopping +whore +whore's +whorehouse +whorehouse's +whorehouses +whores +whorl +whorl's +whorled +whorls +whose +whosoever +why +why's +whys +wick +wick's +wicked +wickeder +wickedest +wickedly +wickedness +wickedness's +wicker +wicker's +wickers +wickerwork +wickerwork's +wicket +wicket's +wickets +wicks +wide +widely +widen +widened +wideness +wideness's +widening +widens +wider +widescreen +widescreen's +widescreens +widespread +widest +widgeon +widgeon's +widgeons +widow +widow's +widowed +widower +widower's +widowers +widowhood +widowhood's +widowing +widows +width +width's +widths +wield +wielded +wielding +wields +wiener +wiener's +wieners +wife +wife's +wifely +wig +wig's +wigeon +wigeon's +wigeons +wigged +wigging +wiggle +wiggle's +wiggled +wiggler +wiggler's +wigglers +wiggles +wigglier +wiggliest +wiggling +wiggly +wight +wight's +wights +wigs +wigwag +wigwag's +wigwagged +wigwagging +wigwags +wigwam +wigwam's +wigwams +wiki +wiki's +wikis +wild +wild's +wildcat +wildcat's +wildcats +wildcatted +wildcatting +wildebeest +wildebeest's +wildebeests +wilder +wilderness +wilderness's +wildernesses +wildest +wildfire +wildfire's +wildfires +wildflower +wildflower's +wildflowers +wildfowl +wildfowl's +wildfowls +wildlife +wildlife's +wildly +wildness +wildness's +wilds +wile +wile's +wiled +wiles +wilful +wilfully +wilfulness +wilfulness's +wilier +wiliest +wiliness +wiliness's +wiling +will +will's +willed +willful +willfully +willfulness +willfulness's +willies +willies's +willing +willingly +willingness +willingness's +willow +willow's +willows +willowy +willpower +willpower's +wills +wilt +wilt's +wilted +wilting +wilts +wily +wimp +wimp's +wimpier +wimpiest +wimple +wimple's +wimpled +wimples +wimpling +wimps +wimpy +win +win's +wince +wince's +winced +winces +winch +winch's +winched +winches +winching +wincing +wind +wind's +windbag +windbag's +windbags +windbreak +windbreak's +windbreaker +windbreaker's +windbreakers +windbreaks +windburn +windburn's +winded +windfall +windfall's +windfalls +windier +windiest +windiness +windiness's +winding +winding's +windjammer +windjammer's +windjammers +windlass +windlass's +windlasses +windmill +windmill's +windmilled +windmilling +windmills +window +window's +windowed +windowing +windowpane +windowpane's +windowpanes +windows +windowsill +windowsill's +windowsills +windpipe +windpipe's +windpipes +winds +windscreen +windscreen's +windscreens +windshield +windshield's +windshields +windsock +windsock's +windsocks +windstorm +windstorm's +windstorms +windsurf +windsurfed +windsurfing +windsurfing's +windsurfs +windswept +windup +windup's +windups +windward +windward's +windy +wine +wine's +wined +wineglass +wineglass's +wineglasses +wineries +winery +winery's +wines +wing +wing's +winged +winger +wingers +winging +wingless +wingnut +wingnut's +wingnuts +wings +wingspan +wingspan's +wingspans +wingspread +wingspread's +wingspreads +wingtip +wingtip's +wingtips +wining +wink +wink's +winked +winking +winks +winner +winner's +winners +winning +winning's +winnings +winnow +winnowed +winnowing +winnows +wino +wino's +winos +wins +winsome +winsomely +winsomer +winsomest +winter +winter's +wintered +wintergreen +wintergreen's +winterier +winteriest +wintering +winterize +winterized +winterizes +winterizing +winters +wintertime +wintertime's +wintery +wintrier +wintriest +wintry +wipe +wipe's +wiped +wiper +wiper's +wipers +wipes +wiping +wire +wire's +wired +wireless +wireless's +wirelesses +wires +wiretap +wiretap's +wiretapped +wiretapping +wiretaps +wirier +wiriest +wiriness +wiriness's +wiring +wiring's +wiry +wisdom +wisdom's +wise +wise's +wiseacre +wiseacre's +wiseacres +wisecrack +wisecrack's +wisecracked +wisecracking +wisecracks +wisely +wiser +wises +wisest +wish +wish's +wishbone +wishbone's +wishbones +wished +wisher +wisher's +wishers +wishes +wishful +wishfully +wishing +wishlist's +wisp +wisp's +wispier +wispiest +wisps +wispy +wist +wistaria +wistaria's +wistarias +wisteria +wisteria's +wisterias +wistful +wistfully +wistfulness +wistfulness's +wit +wit's +witch +witch's +witchcraft +witchcraft's +witched +witchery +witchery's +witches +witching +with +withal +withdraw +withdrawal +withdrawal's +withdrawals +withdrawing +withdrawn +withdraws +withdrew +wither +withered +withering +withers +withers's +withheld +withhold +withholding +withholding's +withholds +within +within's +without +withstand +withstanding +withstands +withstood +witless +witlessly +witness +witness's +witnessed +witnesses +witnessing +wits +wits's +witticism +witticism's +witticisms +wittier +wittiest +wittily +wittiness +wittiness's +witting +wittingly +witty +wive +wives +wiz +wiz's +wizard +wizard's +wizardry +wizardry's +wizards +wizened +wizes +wizzes +wobble +wobble's +wobbled +wobbles +wobblier +wobbliest +wobbling +wobbly +woe +woe's +woebegone +woeful +woefuller +woefullest +woefully +woes +wok +wok's +woke +woken +woks +wolf +wolf's +wolfed +wolfhound +wolfhound's +wolfhounds +wolfing +wolfish +wolfram +wolfram's +wolfs +wolverine +wolverine's +wolverines +wolves +woman +woman's +womanhood +womanhood's +womanish +womanize +womanized +womanizer +womanizer's +womanizers +womanizes +womanizing +womankind +womankind's +womanlier +womanliest +womanlike +womanlike's +womanliness +womanliness's +womanly +womb +womb's +wombat +wombat's +wombats +wombs +women +women's +womenfolk +womenfolk's +womenfolks +womenfolks's +won +won's +won't +wonder +wonder's +wondered +wonderful +wonderfully +wondering +wonderland +wonderland's +wonderlands +wonderment +wonderment's +wonders +wondrous +wondrously +wont +wont's +wonted +woo +wood +wood's +woodbine +woodbine's +woodcarving +woodcarving's +woodcarvings +woodchuck +woodchuck's +woodchucks +woodcock +woodcock's +woodcocks +woodcraft +woodcraft's +woodcut +woodcut's +woodcuts +woodcutter +woodcutter's +woodcutters +woodcutting +woodcutting's +wooded +wooden +woodener +woodenest +woodenly +woodenness +woodenness's +woodier +woodies +woodiest +woodiness +woodiness's +wooding +woodland +woodland's +woodlands +woodman +woodman's +woodmen +woodpecker +woodpecker's +woodpeckers +woodpile +woodpile's +woodpiles +woods +woods's +woodshed +woodshed's +woodsheds +woodsier +woodsiest +woodsman +woodsman's +woodsmen +woodsy +woodwind +woodwind's +woodwinds +woodwork +woodwork's +woodworking +woodworking's +woodworm +woody +woody's +wooed +wooer +wooer's +wooers +woof +woof's +woofed +woofer +woofer's +woofers +woofing +woofs +wooing +wool +wool's +woolen +woolen's +woolens +woolgathering +woolgathering's +woolie +woolie's +woolier +woolies +wooliest +woollier +woollies +woolliest +woolliness +woolliness's +woolly +woolly's +wooly +wooly's +woos +woozier +wooziest +wooziness +wooziness's +woozy +word +word's +worded +wordier +wordiest +wordiness +wordiness's +wording +wording's +wordings +wordplay +wordplay's +words +wordy +wore +work +work's +workable +workaday +workaholic +workaholic's +workaholics +workbench +workbench's +workbenches +workbook +workbook's +workbooks +workday +workday's +workdays +worked +worker +worker's +workers +workfare +workfare's +workflow +workflow's +workflows +workforce +workforce's +workhorse +workhorse's +workhorses +workhouse +workhouse's +workhouses +working +working's +workingman +workingman's +workingmen +workings +workings's +workload +workload's +workloads +workman +workman's +workmanlike +workmanship +workmanship's +workmen +workout +workout's +workouts +workplace +workplace's +workplaces +works +works's +worksheet +worksheet's +worksheets +workshop +workshop's +workshops +workstation +workstation's +workstations +workweek +workweek's +workweeks +world +world's +worldlier +worldliest +worldliness +worldliness's +worldly +worlds +worldwide +worm +worm's +wormed +wormhole +wormhole's +wormholes +wormier +wormiest +worming +worms +wormwood +wormwood's +wormy +worn +worried +worrier +worrier's +worriers +worries +worrisome +worry +worry's +worrying +worryings +worrywart +worrywart's +worrywarts +worse +worse's +worsen +worsened +worsening +worsens +worship +worship's +worshiped +worshiper +worshiper's +worshipers +worshipful +worshiping +worshipped +worshipper +worshipper's +worshippers +worshipping +worships +worst +worst's +worsted +worsted's +worsting +worsts +worth +worth's +worthier +worthies +worthiest +worthily +worthiness +worthiness's +worthless +worthlessness +worthlessness's +worthwhile +worthy +worthy's +wot +would +would've +wouldn't +woulds +wound +wound's +wounded +wounder +wounding +wounds +wove +woven +wow +wow's +wowed +wowing +wows +wrack +wrack's +wraith +wraith's +wraiths +wrangle +wrangle's +wrangled +wrangler +wrangler's +wranglers +wrangles +wrangling +wrap +wrap's +wraparound +wraparound's +wraparounds +wrapped +wrapper +wrapper's +wrappers +wrapping +wrapping's +wrappings +wraps +wrapt +wrath +wrath's +wrathful +wrathfully +wreak +wreaked +wreaking +wreaks +wreath +wreath's +wreathe +wreathed +wreathes +wreathing +wreaths +wreck +wreck's +wreckage +wreckage's +wrecked +wrecker +wrecker's +wreckers +wrecking +wrecks +wren +wren's +wrench +wrench's +wrenched +wrenches +wrenching +wrens +wrest +wrest's +wrested +wresting +wrestle +wrestle's +wrestled +wrestler +wrestler's +wrestlers +wrestles +wrestling +wrestling's +wrests +wretch +wretch's +wretched +wretcheder +wretchedest +wretchedly +wretchedness +wretchedness's +wretches +wrier +wriest +wriggle +wriggle's +wriggled +wriggler +wriggler's +wrigglers +wriggles +wriggling +wriggly +wright +wring +wring's +wringer +wringer's +wringers +wringing +wrings +wrinkle +wrinkle's +wrinkled +wrinkles +wrinklier +wrinklies +wrinkliest +wrinkling +wrinkly +wrinkly's +wrist +wrist's +wristband +wristband's +wristbands +wrists +wristwatch +wristwatch's +wristwatches +writ +writ's +writable +write +writer +writer's +writers +writes +writhe +writhe's +writhed +writhes +writhing +writing +writing's +writings +writs +written +wrong +wrong's +wrongdoer +wrongdoer's +wrongdoers +wrongdoing +wrongdoing's +wrongdoings +wronged +wronger +wrongest +wrongful +wrongfully +wrongfulness +wrongfulness's +wrongheaded +wrongheadedly +wrongheadedness +wrongheadedness's +wronging +wrongly +wrongness +wrongness's +wrongs +wrote +wroth +wrought +wrung +wry +wryer +wryest +wryly +wryness +wryness's +wuss +wuss's +wusses +x +xenon +xenon's +xenophobia +xenophobia's +xenophobic +xerographic +xerography +xerography's +xylem +xylem's +xylophone +xylophone's +xylophones +xylophonist +xylophonist's +xylophonists +y +y'all +yacht +yacht's +yachted +yachting +yachting's +yachts +yachtsman +yachtsman's +yachtsmen +yack +yack's +yacked +yacking +yacks +yahoo +yahoo's +yahoos +yak +yak's +yakked +yakking +yaks +yam +yam's +yammer +yammer's +yammered +yammering +yammers +yams +yank +yank's +yanked +yanking +yanks +yap +yap's +yapped +yapping +yaps +yard +yard's +yardage +yardage's +yardages +yardarm +yardarm's +yardarms +yards +yardstick +yardstick's +yardsticks +yarmulke +yarmulke's +yarmulkes +yarn +yarn's +yarns +yaw +yaw's +yawed +yawing +yawl +yawl's +yawls +yawn +yawn's +yawned +yawning +yawns +yaws +yaws's +ye +yea +yea's +yeah +yeah's +yeahs +year +year's +yearbook +yearbook's +yearbooks +yearlies +yearling +yearling's +yearlings +yearly +yearly's +yearn +yearned +yearning +yearning's +yearnings +yearns +years +yeas +yeast +yeast's +yeastier +yeastiest +yeasts +yeasty +yell +yell's +yelled +yelling +yellow +yellow's +yellowed +yellower +yellowest +yellowing +yellowish +yellows +yells +yelp +yelp's +yelped +yelping +yelps +yen +yen's +yens +yeoman +yeoman's +yeomen +yep +yep's +yeps +yes +yes's +yeses +yeshiva +yeshiva's +yeshivah +yeshivah's +yeshivahs +yeshivas +yeshivot +yeshivoth +yessed +yessing +yest +yesterday +yesterday's +yesterdays +yesteryear +yesteryear's +yet +yeti +yew +yew's +yews +yield +yield's +yielded +yielding +yieldings +yields +yip +yip's +yipped +yippee +yipping +yips +yo +yock +yock's +yocks +yodel +yodel's +yodeled +yodeler +yodeler's +yodelers +yodeling +yodelled +yodeller +yodeller's +yodellers +yodelling +yodels +yoga +yoga's +yoghourt +yoghourt's +yoghourts +yoghurt +yoghurt's +yoghurts +yogi +yogi's +yogin +yogin's +yogins +yogis +yogurt +yogurt's +yogurts +yoke +yoke's +yoked +yokel +yokel's +yokels +yokes +yoking +yolk +yolk's +yolks +yon +yonder +yore +yore's +you +you'd +you'll +you're +you's +you've +young +young's +younger +youngest +youngish +youngster +youngster's +youngsters +your +yours +yourself +yourselves +yous +youth +youth's +youthful +youthfully +youthfulness +youthfulness's +youths +yowl +yowl's +yowled +yowling +yowls +yttrium +yttrium's +yucca +yucca's +yuccas +yuck +yuck's +yucked +yuckier +yuckiest +yucking +yucks +yucky +yuk +yuk's +yukked +yukking +yuks +yule +yule's +yuletide +yuletide's +yum +yummier +yummiest +yummy +yup +yup's +yuppie +yuppie's +yuppies +yuppy +yuppy's +yups +z +zanier +zanies +zaniest +zaniness +zaniness's +zany +zany's +zap +zap's +zapped +zapper +zapper's +zappers +zapping +zaps +zeal +zeal's +zealot +zealot's +zealots +zealous +zealously +zealousness +zealousness's +zebra +zebra's +zebras +zebu +zebu's +zebus +zed +zed's +zeds +zenith +zenith's +zeniths +zephyr +zephyr's +zephyrs +zeppelin +zeppelin's +zeppelins +zero +zero's +zeroed +zeroes +zeroing +zeros +zest +zest's +zestful +zestfully +zests +zeta +zigzag +zigzag's +zigzagged +zigzagging +zigzags +zilch +zilch's +zillion +zillion's +zillions +zinc +zinc's +zinced +zincing +zincked +zincking +zincs +zing +zing's +zinged +zinger +zinger's +zingers +zinging +zings +zinnia +zinnia's +zinnias +zip +zip's +zipped +zipper +zipper's +zippered +zippering +zippers +zippier +zippiest +zipping +zippy +zips +zircon +zircon's +zirconium +zirconium's +zircons +zit +zit's +zither +zither's +zithers +zits +zodiac +zodiac's +zodiacal +zodiacs +zombi +zombi's +zombie +zombie's +zombies +zombis +zonal +zone +zone's +zoned +zones +zoning +zonked +zoo +zoo's +zoological +zoologist +zoologist's +zoologists +zoology +zoology's +zoom +zoom's +zoomed +zooming +zooms +zoos +zucchini +zucchini's +zucchinis +zwieback +zwieback's +zygote +zygote's +zygotes +Ångström +Ångström's +éclair +éclair's +éclairs +éclat +éclat's +élan +élan's +émigré +émigré's +émigrés +épée +épée's +épées +étude +étude's +études diff --git a/marginalia_nu/src/main/resources/dictionary/latin-1000 b/marginalia_nu/src/main/resources/dictionary/latin-1000 new file mode 100644 index 00000000..b59ded2f --- /dev/null +++ b/marginalia_nu/src/main/resources/dictionary/latin-1000 @@ -0,0 +1,984 @@ +et +in +est +non +ut +cum +si +ad +quod +qui +sed +quae +ex +quam +esse +de +aut +hoc +nec +sunt +etiam +se +enim +quid +ab +per +sit +atque +id +autem +quo +uel +me +ne +te +ac +nam +tamen +eius +haec +mihi +ita +iam +neque +quidem +eo +quoque +ea +pro +tibi +quia +ego +nihil +eum +modo +nunc +sic +libro +an +quem +quibus +inter +qua +esset +causa +erat +nisi +hic +potest +uero +tum +quis +ipse +fuit +tu +ille +ante +sine +res +his +omnia +idem +ubi +sibi +illa +post +rem +ei +tam +apud +tantum +magis +at +erit +deinde +quos +cui +omnes +is +re +contra +nos +cuius +omnibus +minus +quasi +ergo +eam +igitur +sub +rei +posse +dum +eorum +sua +inquit +itaque +sint +primum +ipsa +habet +suo +illud +item +eos +illi +siue +satis +nobis +parte +hanc +ait +rerum +semper +propter +tempore +loco +possit +unde +rebus +fuerit +inde +omnis +omnium +quoniam +fieri +eadem +nomine +alia +maxime +hunc +alii +hac +pater +quas +facere +saepe +uerum +aliquid +suis +bene +ipsum +die +mea +multa +nomen +solum +uidetur +illum +unum +fuisse +nulla +natura +primo +simul +ob +una +dies +postea +quidam +factum +habere +tempus +senatus +iis +uos +tunc +dixit +licet +tua +iure +quantum +dicere +uti +bellum +dicitur +partem +genus +numquam +ideo +locum +ibi +pars +aliud +eodem +quorum +huius +erant +aduersus +filius +sum +nemo +suum +debet +animo +hominum +supra +opus +sui +causam +dicit +hinc +quin +uis +magna +fecit +sicut +plus +illo +ius +edictum +heres +arma +ista +illis +ipsi +fit +suam +huic +uerba +essent +homines +duo +facit +facta +usque +manus +cur +quamuis +quaedam +potius +ipso +omni +dedit +usus +animi +uenit +diem +populi +urbem +castra +romani +genere +manu +haud +prima +bello +uno +summa +forte +ui +aliis +ratio +prius +ratione +posset +adhuc +certe +tamquam +publicae +terra +filium +nostra +publica +bona +circa +es +meo +aqua +caput +uerbis +lege +super +denique +fortuna +illam +uelut +quamquam +possunt +sane +secundum +oportet +filio +hominem +praeter +patris +uobis +recte +modum +armis +belli +suae +pecuniam +tot +uita +necesse +meum +habent +corpus +aliter +caesar +suos +multis +utrum +diu +uitae +dare +hostium +paulus +praeterea +mortem +uix +animum +postquam +alio +omne +statim +totum +milia +dictum +uideri +adeo +intra +fere +uim +tanta +alter +mox +partes +alterum +umquam +patrem +multo +facile +iudicium +iudices +domum +ulpianus +scilicet +tota +magno +locis +quando +cura +locus +tertio +fidem +secundo +cetera +quare +corpore +urbe +eas +iudicio +bonorum +signa +deos +uiri +consul +homo +unus +sententia +legatum +potuit +decem +quicquam +numero +populo +fore +nostri +quisque +melius +dici +namque +tuo +gratia +prope +corporis +uocant +habeat +ceteris +nullum +sese +altera +omnem +puto +cicero +hi +multum +mare +rex +exercitus +aliqua +actio +uir +quattuor +pecunia +male +dicunt +annos +nullo +illius +fuerat +seruus +potestate +respondit +solet +diximus +dicta +iter +hereditatem +alias +decimo +iuris +anno +uirum +nostris +uitam +rationem +hos +dari +iussit +ipsis +ulla +paulo +debere +milites +habuit +ceterum +nondum +romae +procul +ipsius +castris +agere +quinque +quidquid +rursus +huc +alios +tandem +senatu +militum +uino +ora +actionem +consilium +bonis +omnino +legem +longe +generis +praetor +genera +interim +primus +deorum +tuis +publicam +consilio +uideatur +dicam +heredem +donec +maior +sanguine +ore +duobus +sabinum +alius +eis +exercitum +amplius +fructus +temporis +oculos +tuum +urbis +quemadmodum +data +meis +mi +toto +quippe +dixi +testamento +oculis +meus +fuisset +parum +caesaris +corpora +domus +possessionem +etsi +dicimus +hostes +bonum +litteris +imperium +mei +romam +aliquando +fama +nocte +regem +patre +utique +interdum +plures +meam +animus +quaeque +multi +uera +paene +constat +ii +meae +earum +pedes +poterit +consules +liber +ire +mortis +dicendum +terram +quinto +tres +caelo +hominis +populus +cn +extra +fuerunt +mater +tanto +ferre +hostem +diebus +eiusdem +seu +litteras +metu +quotiens +more +sola +morte +uirtute +condicione +fiat +senatum +mari +bella +malo +iste +scire +illos +sententiam +usum +tui +tempora +sumus +animos +lex +tribus +iterum +exercitu +uiro +medio +di +tuae +partibus +terrae +malum +uirtus +iubet +manibus +annis +opera +uxorem +graeci +centum +deus +patres +duas +dicuntur +fide +quarto +magnum +pertinet +nil +has +tuam +nomina +idque +sex +num +solis +quibusdam +domino +naturae +romanis +uideo +semel +alium +regis +placet +quisquam +mala +deum +capite +uires +datur +ultra +suas +usu +uolo +libertatem +aquae +nostrum +dico +plerumque +alterius +immo +unam +populum +longa +aliquis +ira +noua +domi +uia +plura +possum +futurum +leges +iouis +scribit +facto +filia +coepit +plane +ferro +secum +caelum +nostro +utrumque +dominus +solus +erunt +datum +mulier +iulianus +hostis +duos +seruum +legati +illic +unius +protinus +quanto +digestorum +patri +possint +imperio +debeat +bis +scio +aeque +romano +uoce +uelit +solent +haberet +faciunt +mecum +spem +accepit +quaeritur +dolo +oportere +sin +undique +diceret +oppidum +modi +romanus +nescio +quondam +oratio +factus +pretium +hodie +ueluti +plurimum +natus +horum +gloria +equidem +sequitur +poena +interest +sexto +uoluntate +fecerit +fuerint +nullam +periculum +putant +ciuitatis +naturam +equites +ipsam +significat +casus +caeli +annum +pluribus +tecum +spes +similis +fides +domo +pariter +turba +loci +casu +scripsit +gratiam +uideretur +uelim +uiginti +uersus +aquam +uellet +periculo +fundum +media +hominibus +uoluit +talis +poterat +amor +sacra +causis +prouincia +mille +mali +ostendit +uidentur +sis +primis +puer +fata +magni +olim +praestare +subito +septimo +ipsos +facilius +fecisse +italia +salutem +filii +demum +fortasse +qualis +putat +singulis +loca +ciuitate +profecto +aliquo +credo +petit +tulit +auctoritate +scriptum +boni +partim +minime +plebis +pomponius +cuncta +liberos +reliqua +uidit +fortunae +uolunt +ordine +causas +summo +quodam +maxima +multos +arte +causae +uult +suorum +inquam +conuenit +seruo +memoria +saepius +uenire +petere +tela +antea +finem +rege +membra +opere +pectore +frater +potestatem +nulli +etiamsi +malis +miles +ueteres +fiunt +amore +nimis +legiones +temporibus +ecce +placuit +serui +spe +dicat +liberis +actione +hostibus +oratione +potes +praecipue +uiribus +crimen +iuppiter +accipere +uiris +uxor +studio +officio +iuxta +dat +ingens +agros +sensus +paucis +uere +prior +refert +signum +uinum +partis +possent +habebat +eandem +sidera +iamque +par +uerborum +ignis +neminem +beneficium +quarum +uidere +consulem +romanos +totam +uerbum +eundem +patriae +foro +iniuria +maiores +romanum +moenia +auctor +ciuium +testamentum +familias +heredes +aetas +terras +manum +altero +legibus +legis +certum +uitia +forma +totius +dein +summam +noster +profectus +nihilo +antequam +quicquid +uocatur +uiam +utraque +iii +uitium +medium +domini +nimium +dolor +filiam +facies +nostrae +persona +ni +dextra +octauo +posuit +italiam +proelio +aciem +mora +uiros +operis +homini +repente +auro +ultro +titio +etenim +frustra +culpa +liceat +mente +fuga +possumus +maiore +equitum +motus +summum +uelle +imperator +operam +agi +quaesitum +dolore +minor +exemplum +faciat +uale +melle +peruenit +teneri +solo +matrem +terris +caesarem +sponte +tertia +quanta +soli \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/dictionary/swe-1000 b/marginalia_nu/src/main/resources/dictionary/swe-1000 new file mode 100644 index 00000000..1970dfb2 --- /dev/null +++ b/marginalia_nu/src/main/resources/dictionary/swe-1000 @@ -0,0 +1,641 @@ +och +att +det +som +en +på +är +för +av +med +den +till +inte +har +de +han +om +ett +jag +var +men +sig +så +vi +hon +från +man +kan +när +hade +nu +skulle +år +säger +där +också +eller +sin +under +efter +ut +ska +vid +mot +då +här +bara +mycket +upp +över +vara +alla +kommer +vad +än +andra +finns +får +in +sedan +du +få +ha +hur +blir +två +vill +hans +många +måste +något +mer +detta +utan +sina +går +allt +blev +fick +mig +honom +dem +skall +nya +bli +någon +mellan +även +några +första +varit +kronor +sitt +genom +ta +kom +dag +fram +Sverige +kunde +stora +hela +svenska +procent +ju +göra +ingen +sa +bra +tre +gör +kanske +oss +själv +bland +annat +gick +redan +se +inom +gå +aldrig +del +väl +åt +henne +kunna +helt +samma +denna +enligt +fått +olika +stor +tid +vet +lite +gång +både +sätt +ser +miljoner +hennes +därför +tidigare +dock +tror +ur +min +dessa +just +ner +flera +varje +hos +gjorde +tog +gäller +barn +tar +komma +Stockholm +igen +står +såg +lika +mest +sade +ändå +ännu +ja +tycker +tillbaka +bättre +innan +nog +ligger +deras +rätt +ni +människor +alltid +fall +ger +blivit +ge +fyra +ny +gamla +annan +eftersom +trots +kvar +vilket +säga +tiden +gjort +vår +ville +regeringen +samtidigt +längre +dessutom +större +bort +nej +nästan +per +mindre +först +inget +fanns +inför +pengar +fem +runt +själva +länge +senare +stället +fast +mitt +Göteborg +ofta +senaste +medan +exempel +före +började +varför +enda +svårt +egen +nästa +visar +tillsammans +liv +sista +hem +stort +stod +året +alltså +bakom +långt +haft +fler +fortfarande +plats +hand +menar +båda +väg +USA +utanför +förra +inga +framför +behöver +dess +dom +nytt +bör +våra +dig +folk +håller +par +tio +handlar +problem +heller +frågan +gått +vidare +borde +början +låg +minst +bästa +anser +liten +Anders +sett +egna +satt +svensk +berättar +kvinnor +bild +klart +känner +gången +samt +lätt +ytterligare +världen +särskilt +mål +kl +sidan +kommit +fråga +riktigt +Lars +innebär +flesta +hålla +största +egentligen +små +personer +Johansson +landet +ibland +sen +sex +arbete +gånger +emot +slut +kring +gav +hjälp +ihop +åren +omkring +varandra +företag +hemma +nära +ens +däremot +sagt +snart +Andersson +börjar +helst +vilka +polisen +Peter +därmed +vem +jobb +direkt +lilla +cirka +visst +ned +livet +liksom +tagit +miljarder +skolan +vissa +fel +politiska +sådan +ganska +sitter +vilken +dagens +beslut +kände +ingenting +lång +ute +förslag +spelar +steg +höll +ord +verkligen +tänker +möjligt +säkert +vägen +tv +meter +faktiskt +gott +snabbt +dagen +dagar +alldeles +all +Göran +ordförande +precis +mej +namn +hus +unga +länder +vårt +tyckte +skriver +talar +sådana +Sveriges +barnen +visste +tredje +mamma +Jan +klara +drygt +frågor +män +hoppas +tala +form +veckan +naturligtvis +kort +amerikanska +stöd +ekonomiska +the +hög +delar +mannen +god +gärna +EU +mina +Stockholms +spela +Europa +slutet +högre +visa +tänka +eget +tänkte +plötsligt +grund +viktigt +visade +alls +pappa +ifrån +stå +års +land +annars +vann +hittills +händer +verkar +kr +långa +bo +endast +krav +Persson +veta +dit +förstås +försöker +mera +ena +betala +enkelt +hör +sju +litet +sej +hårt +brukar +minuter +nämligen +er +låter +musik +slog +roll +huset +Johan +ungefär +gammal +månader +björn +rum +tycks +igenom +heter +övriga +väldigt +kväll +sådant +kommunen +bäst +åtta +bor +Bengt +trodde +börja +internationella +vore +främst +låta +antal +sida +vatten +åtminstone +kunnat +sätta +framtiden +numera +tro +kvinna +vecka +Thomas +äldre +knappast +försöka +goda +lägga +inne +maj +vilja +satte +rad +höga +fortsätter +arbetar +DN +bygga +håll +samhället +ex +arbetet +film +veckor +betyder +timmar +Nilsson +känna +utveckling +saker +sak +Mats +lag +skapa +huvudet +kräver +frågade +ansvar +lever +borta +dels +kallade +tills +& +Stefan +närmare +bok +antalet +skrev +årets +ungdomar +tag +dra +känns +via +new +försökte +drog +foto +använda +hitta +staden +spelade +fortsätta +klockan +totalt +svarade +extra +samband +tur +Tyskland +viss +lämna +staten +högt +höra +ökar +leva +förstår +din +ensam +behövs +köpa +möjlighet +historia +resultat +anställda +möjligheter +världens +slå +lär +stark +död +betydligt +visserligen +alltför +samarbete +lät +far +Ulf +chef +politiker +hit +Lennart +bl +Erik +beror +skäl +ögon +öppna +matchen +ställa +finnas +utbildning +ställer +någonting +försök +området +laget +leder +svar +hel +bilder +intresse +hävdar +arbeta +ökat +förslaget +Carlsson +klar diff --git a/marginalia_nu/src/main/resources/dictionary/word-frequency b/marginalia_nu/src/main/resources/dictionary/word-frequency new file mode 100644 index 00000000..f5f8eda9 --- /dev/null +++ b/marginalia_nu/src/main/resources/dictionary/word-frequency @@ -0,0 +1,1003 @@ +the +of +and +in +to +was +is +for +on +as +with +by +he +that +at +from +his +it +an +were +which +are +this +also +be +had +or +has +first +their +after +its +one +new +but +who +her +not +she +they +have +two +been +other +when +during +all +into +there +time +may +more +school +years +over +only +would +later +most +where +between +some +up +world +city +national +about +such +him +then +made +out +state +three +while +used +university +can +united +under +known +season +many +year +part +became +born +film +these +than +team +no +second +including +states +being +through +before +both +american +south +early +war +history +against +however +family +until +well +since +them +work +life +following +area +people +series +north +name +career +album +music +played +group +district +number +several +high +released +county +de +company +called +will +league +won +four +house +government +each +march +same +game +international +september +january +club +found +june +october +began +located +july +so +west +use +august +now +college +john +station +population +april +public +home +end +november +member +place +general +town +former +december +church +if +age +held +named +system +because +york +took +day +river +around +football +british +line +east +local +any +song +due +along +service +party +best +february +served +did +back +another +based +could +within +received +century +village +built +like +members +building +major +final +show +games +although +include +species +death +band +small +main +left +president +said +published +died +large +last +five +couldn't +what +me +order +st +single +set +third +own +those +education +according +included +long +very +park +still +road +army +division +book +development +among +law +often +french +moved +times +what +community +central +led +english +original +old +son +children +million +different +near +just +top +late +again +water +air +great +center +form +much +research +side +us +art +court +play +down +country +off +even +council +german +street +record +power +established +ii +london +land +cup +having +title +started +support +political +students +award +military +period +came +went +production +white +way +given +island +make +next +role +television +king +region +works +total +championship +using +various +head +office +six +do +player +become +father +list +business +western +produced +director +married +program +association +england +field +worked +election +black +department +joined +announced +created +point +returned +professional +union +written +few +you +young +without +take +described +site +royal +services +radio +together +social +force +northern +per +founded +act +though +society +wrote +further +women +days +lost +continued +design +william +every +version +project +summer +live +men +man +european +we +southern +position +board +india +france +round +railway +open +level +considered +control +opened +run +australia +recorded +important +san +once +video +california +special +win +popular +appeared +match +release +common +battle +areas +hall +event +working +records +james +formed +right +playing +see +average +others +short +similar +teams +elected +george +currently +making +example +awards +construction +story +living +red +originally +debut +race +language +forces +lead +la +signed +developed +modern +appointed +case +addition +police +wife +result +minister +schools +events +america +route +little +lake +canada +himself +songs +current +upon +how +points +rock +present +never +free +science +information +health +training +class +throughout +track +good +media +museum +across +australian +human +census +indian +style +personal +love +germany +available +province +tour +away +eventually +body +despite +eastern +sold +committee +performance +players +features +festival +coach +should +return +taken +sea +seven +centre +followed +designed +performed +official +david +less +gave +months +finished +daughter +process +refer +study +europe +institute +stage +term +range +chief +fire +does +rights +completed +arts +half +remained +largest +mother +character +includes +civil +private +light +leading +reported +network +help +usually +seen +groups +studies +featured +federal +full +episode +thus +academy +night +competition +women's +space +get +instead +china +must +robert +japanese +go +washington +front +uk +directed +tournament +my +thomas +news +books +brother +involved +campaign +independent +either +model +countries +awarded +able +japan +sports +charles +gold +section +capital +kingdom +close +middle +added +fourth +sent +movement +eight +studio +previous +provided +conference +above +soon +today +grand +magazine +canadian +replaced +aircraft +change +films +ten +medical +organization +bank +historic +coast +killed +management +degree +rather +industry +russian +professor +chinese +action +car +senior +systems +green +bridge +technology +almost +shows +big +lower +week +success +writing +base +data +families +post +least +market +primary +female +reached +beginning +valley +ground +type +stated +tv +operations +attack +hospital +saw +approximately +paul +culture +republic +size +previously +decided +introduced +hill +buildings +championships +provide +native +successful +outside +parts +via +theatre +placed +behind +bay +sometimes +los +prior +whose +natural +active +future +scored +italian +africa +spanish +attended +put +listed +brought +regional +structure +units +michael +possible +henry +municipality +higher +start +collection +regular +star +results +square +interest +leader +economic +especially +contract +too +trade +texas +goal +below +winning +officer +foreign +generally +operation +runs +medal +changed +taking +novel +staff +significant +real +standard +far +limited +traditional +african +come +initially +itself +location +commission +roman +me +artist +christian +plays +money +parliament +food +hit +governor +low +defeated +energy +student +strong +towards +notable +child +assembly +owned +catholic +course +commercial +ship +foundation +channel +allowed +represented +property +places +navy +unit +ended +annual +command +paris +km +library +companies +whom +met +ever +activities +spent +plan +numerous +blue +earlier +means +highway +dr +required +musical +additional +practice +bill +noted +mountain +airport +ireland +plant +security +income +issues +associated +manager +artists +related +access +brown +running +peter +individual +richard +older +victory +opening +programs +past +report +sound +press +woman +finally +find +background +policy +our +youth +financial +date +executive +launched +soviet +administration +historical +closed +here +mark +captain +basketball +lived +ran +better +edition +famous +contains +chicago +subsequently +move +selected +already +legal +rural +religious +studied +entered +cultural +lines +person +test +appearance +complete +increased +rest +men's +zealand +secretary +complex +seat +changes +matches +majority +room +loss +terms +review +empire +mission +virginia +angeles +olympics +italy +highest +stadium +becoming +goals +starting +wide +characters +writer +particularly +fact +mostly +mexico +thought +hours +stone +retired +recording +going +give +feature +cross +smith +author +operated +sir +recent +status +chart +theory +greek +islands +caused +entire +got +remains +nine +engine +source +genus +forced +issue +singles +evidence +meeting +congress +port +variety +pennsylvania +forest +passed +lord +uses +particular +key +supported +word +create +relationship +overall +hand +democratic +certain +castle +biography +nature +mary +names +fort +parish +decision +serving +score +cover +wales +singer +need +material +shown +florida +upper +referred +larger +marriage +length +leaving +weeks +movie +raised +rate +justice +fall +always +minutes +junior +competed +stations +turn +irish +temple +cases +era +individuals +township +claimed +friends +van \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/fonts/LM-regular.ttf b/marginalia_nu/src/main/resources/fonts/LM-regular.ttf new file mode 100644 index 00000000..6b4f6b8a Binary files /dev/null and b/marginalia_nu/src/main/resources/fonts/LM-regular.ttf differ diff --git a/marginalia_nu/src/main/resources/fonts/STIXTwoMath-Regular.ttf b/marginalia_nu/src/main/resources/fonts/STIXTwoMath-Regular.ttf new file mode 100644 index 00000000..a4705fc7 Binary files /dev/null and b/marginalia_nu/src/main/resources/fonts/STIXTwoMath-Regular.ttf differ diff --git a/marginalia_nu/src/main/resources/ip-banned-cidr.txt b/marginalia_nu/src/main/resources/ip-banned-cidr.txt new file mode 100644 index 00000000..cd25413a --- /dev/null +++ b/marginalia_nu/src/main/resources/ip-banned-cidr.txt @@ -0,0 +1,24 @@ +127.0.0.1/8 + +# Psychz + +104.216.0.0/15 +104.149.0.0/16 +45.34.0.0/15 + +# eSited + +172.80.0.0/17 + +# Cloud Yuqu LLC +172.247.0.0/16 + +107.151.64.0/18 + +# Google Cloud +# 35.208.0.0/12 +# 35.224.0.0/12 +# 35.240.0.0/13 + +# 1Blu +178.254.10.0/23 \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/log4j2.properties b/marginalia_nu/src/main/resources/log4j2.properties new file mode 100644 index 00000000..647ca8e8 --- /dev/null +++ b/marginalia_nu/src/main/resources/log4j2.properties @@ -0,0 +1,29 @@ + + +log4j2.isThreadContextMapInheritable=true + +status = info + +appender.console.type = Console +appender.console.name = LogToConsole +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %highlight{%-5level}{FATAL=red, ERROR=red, WARN=yellow} %c{1}- %msg{nolookups}%n +appender.console.filter.http.type = MarkerFilter + +appender.rolling.type = RollingFile +appender.rolling.name = RollingFile +appender.rolling.fileName = /var/log/wmsa/wmsa-${main:1}-server.log +appender.rolling.filePattern = /var/log/wmsa/wmsa-${main:1}-server-log-%d{MM-dd-yy-HH-mm-ss}-%i.log.gz +appender.rolling.layout.pattern = %-5level %d{yyyy-MM-dd HH:mm:ss,SSS} %-20t %-20c{1}: %msg{nolookups}%n +appender.rolling.layout.type = PatternLayout +appender.rolling.policies.type = Policies +appender.rolling.policies.size.type = SizeBasedTriggeringPolicy +appender.rolling.policies.size.size=10MB +appender.rolling.strategy.type = DefaultRolloverStrategy +appender.rolling.strategy.max = 10 + +rootLogger.level = info +rootLogger.appenderRef.console.ref = LogToConsole +rootLogger.appenderRef.rolling.ref = RollingFile + +#rootLogger.appenderRef.http.ref = LogHttpTraffic diff --git a/marginalia_nu/src/main/resources/sql/data-store-init.sql b/marginalia_nu/src/main/resources/sql/data-store-init.sql new file mode 100644 index 00000000..b47402b9 --- /dev/null +++ b/marginalia_nu/src/main/resources/sql/data-store-init.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS JSON_DATA; + +CREATE TABLE IF NOT EXISTS JSON_DATA( + DOM VARCHAR(255), + ID VARCHAR(255), + MODEL VARCHAR(255), + DATA MEDIUMTEXT); + +CREATE INDEX IF NOT EXISTS JSON_DATA_INDEX ON JSON_DATA (DOM, ID, MODEL); \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/sql/edge-crawler-cache.sql b/marginalia_nu/src/main/resources/sql/edge-crawler-cache.sql new file mode 100644 index 00000000..2f706bba --- /dev/null +++ b/marginalia_nu/src/main/resources/sql/edge-crawler-cache.sql @@ -0,0 +1,250 @@ +DROP TABLE IF EXISTS EC_URL_LINK; +DROP VIEW IF EXISTS EC_PAGE_VIEW; + +DROP TABLE IF EXISTS DISC_DOMAIN_TAG; +DROP TABLE IF EXISTS DISC_TAG; +DROP TABLE IF EXISTS DISC_USER; + +DROP TABLE IF EXISTS EC_DOMAIN_NEIGHBORS; +DROP TABLE IF EXISTS EC_FEED_URL; +DROP TABLE IF EXISTS EC_DOMAIN_LINK; +DROP TABLE IF EXISTS EC_PAGE_DATA; +DROP TABLE IF EXISTS EC_URL; +DROP TABLE IF EXISTS EC_DOMAIN; +DROP TABLE IF EXISTS EC_TOP_DOMAIN; +DROP TABLE IF EXISTS EC_URL_DETAILS; +DROP VIEW IF EXISTS EC_URL_VIEW; +DROP VIEW IF EXISTS EC_URL_PART_HASH; + +DROP TABLE IF EXISTS EC_URL_WORD; +DROP TABLE IF EXISTS EC_DICTIONARY; + + +CREATE TABLE IF NOT EXISTS EC_TOP_DOMAIN ( + ID INT PRIMARY KEY AUTO_INCREMENT, + URL_PART VARCHAR(255) UNIQUE NOT NULL, + ALIVE BOOLEAN DEFAULT TRUE NOT NULL +) +CHARACTER SET utf8mb4 +COLLATE utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS EC_DOMAIN ( + ID INT PRIMARY KEY AUTO_INCREMENT, + URL_PART VARCHAR(255) UNIQUE NOT NULL, + INDEXED INT DEFAULT 0 NOT NULL, + QUALITY DOUBLE DEFAULT -5 NOT NULL, + QUALITY_RAW DOUBLE DEFAULT -5 NOT NULL, + QUALITY_ORIGINAL DOUBLE DEFAULT -5 NOT NULL, + + URL_TOP_DOMAIN_ID INT NOT NULL, + URL_SUBDOMAIN VARCHAR(255) NOT NULL, + STATE INT DEFAULT 0 NOT NULL, + + RANK DOUBLE, + + DOMAIN_ALIAS INTEGER, + + INDEX_DATE TIMESTAMP DEFAULT NOW(), + DISCOVER_DATE TIMESTAMP DEFAULT NOW(), + + FOREIGN KEY (URL_TOP_DOMAIN_ID) REFERENCES EC_TOP_DOMAIN(ID) ON DELETE CASCADE +) +CHARACTER SET utf8mb4 +COLLATE utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS EC_DOMAIN_HISTORY ( + ID INT PRIMARY KEY AUTO_INCREMENT, + URL_PART VARCHAR(255) UNIQUE NOT NULL, + QUALITY_MEASURE DOUBLE DEFAULT -5 NOT NULL, + INBOUND_LINKS INT DEFAULT 1, + LINK_ADJUSTED_QUALITY DOUBLE GENERATED ALWAYS AS (0.3*QUALITY_MEASURE + 0.7*QUALITY_MEASURE / GREATEST(1, INBOUND_LINKS)), + RANK DOUBLE +) +CHARACTER SET utf8mb4 +COLLATE utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS EC_DOMAIN_BLACKLIST ( + ID INT PRIMARY KEY AUTO_INCREMENT, + URL_DOMAIN VARCHAR(255) UNIQUE NOT NULL +) +CHARACTER SET utf8mb4 +COLLATE utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS EC_URL ( + ID INT PRIMARY KEY AUTO_INCREMENT, + DOMAIN_ID INT NOT NULL, + PROTO ENUM('http','https','gemini') NOT NULL, + URL VARCHAR(255) NOT NULL, + PORT INT, + + VISITED BOOLEAN NOT NULL DEFAULT FALSE, + DATA_HASH INTEGER, + QUALITY_MEASURE DOUBLE, + + STATE ENUM('ok', 'redirect', 'dead', 'archived', 'disqualified') NOT NULL DEFAULT 'ok', + + IP VARCHAR(32), + + CONSTRAINT CONS UNIQUE (DOMAIN_ID, URL), + FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE +) +CHARACTER SET utf8mb4 +COLLATE utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS EC_PAGE_DATA ( + ID INT PRIMARY KEY AUTO_INCREMENT, + + TITLE VARCHAR(255), + DESCRIPTION VARCHAR(255), + + WORDS_DISTINCT INTEGER, + WORDS_TOTAL INTEGER, + FORMAT VARCHAR(8), + FEATURES INT, + + FOREIGN KEY (ID) REFERENCES EC_URL(ID) ON DELETE CASCADE +) +CHARACTER SET utf8mb4 +COLLATE utf8mb4_unicode_ci; + +CREATE TABLE EC_FEED_URL ( + ID INT PRIMARY KEY AUTO_INCREMENT, + DOMAIN_ID INT NOT NULL, + PROTO VARCHAR(8) NOT NULL, + URL VARCHAR(255) NOT NULL, + PORT INT, + + CONSTRAINT CONS UNIQUE (DOMAIN_ID, URL), + FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE +) +CHARACTER SET utf8mb4 +COLLATE utf8mb4_unicode_ci; + +CREATE TABLE EC_DOMAIN_NEIGHBORS ( + ID INT PRIMARY KEY AUTO_INCREMENT, + DOMAIN_ID INT NOT NULL, + NEIGHBOR_ID INT NOT NULL, + ADJ_IDX INT NOT NULL, + + CONSTRAINT CONS UNIQUE (DOMAIN_ID, ADJ_IDX), + FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE +) +CHARACTER SET utf8mb4 +COLLATE utf8mb4_unicode_ci; + +CREATE TABLE IF NOT EXISTS EC_DOMAIN_LINK ( + ID INT PRIMARY KEY AUTO_INCREMENT, + SOURCE_DOMAIN_ID INT NOT NULL, + DEST_DOMAIN_ID INT NOT NULL, + + CONSTRAINT CONS UNIQUE (SOURCE_DOMAIN_ID, DEST_DOMAIN_ID), + + FOREIGN KEY (SOURCE_DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE, + FOREIGN KEY (DEST_DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS EC_DOMAIN_LINK_AGGREGATE ( + DOMAIN_ID INT PRIMARY KEY NOT NULL, + LINKS INT +); + +CREATE OR REPLACE VIEW EC_URL_VIEW AS + SELECT + EC_DOMAIN.URL_PART AS URL_DOMAIN, + EC_URL.URL AS URL_PATH, + EC_TOP_DOMAIN.URL_PART AS URL_TOP, + EC_URL.ID AS ID, + EC_DOMAIN.ID AS DOMAIN_ID, + EC_TOP_DOMAIN.ID AS TOP_DOMAIN_ID, + EC_URL.PROTO AS URL_PROTO, + EC_URL.PORT AS URL_PORT, + EC_URL.VISITED AS VISITED, + EC_URL.DATA_HASH AS DATA_HASH, + EC_URL.QUALITY_MEASURE AS URL_QUALITY_MEASURE, + EC_DOMAIN.QUALITY AS DOMAIN_QUALITY_MEASURE, + EC_DOMAIN.QUALITY_RAW AS QUALITY_RAW, + EC_PAGE_DATA.TITLE AS TITLE, + EC_PAGE_DATA.DESCRIPTION AS DESCRIPTION, + EC_URL.IP AS IP, + EC_DOMAIN.STATE AS STATE, + EC_PAGE_DATA.WORDS_TOTAL AS WORDS_TOTAL, + EC_PAGE_DATA.FORMAT AS FORMAT, + EC_PAGE_DATA.FEATURES AS FEATURES, + EC_DOMAIN.RANK AS RANK, + EC_DOMAIN.STATE AS DOMAIN_STATE + FROM EC_URL + LEFT JOIN EC_PAGE_DATA + ON EC_PAGE_DATA.ID = EC_URL.ID + INNER JOIN EC_DOMAIN + ON EC_URL.DOMAIN_ID = EC_DOMAIN.ID + INNER JOIN EC_TOP_DOMAIN + ON EC_DOMAIN.URL_TOP_DOMAIN_ID=EC_TOP_DOMAIN.ID; + +CREATE OR REPLACE VIEW EC_DISCOVER_TASKS_VIEW AS + SELECT + ID, + URL_PART + FROM EC_DOMAIN + WHERE + DOMAIN_ALIAS IS NULL + AND INDEXED = 0 + ORDER BY QUALITY DESC, ID ASC; + +CREATE OR REPLACE VIEW EC_RELATED_LINKS_VIEW AS + SELECT + SOURCE_DOMAIN_ID, + SOURCE_DOMAIN.URL_PART AS SOURCE_URL, + SOURCE_TOP_DOMAIN.URL_PART AS SOURCE_TOP_URL, + DEST_DOMAIN_ID, + DEST_DOMAIN.URL_PART AS DEST_URL, + DEST_TOP_DOMAIN.URL_PART AS DEST_TOP_URL + FROM EC_DOMAIN_LINK + INNER JOIN EC_DOMAIN AS SOURCE_DOMAIN + ON SOURCE_DOMAIN.ID=SOURCE_DOMAIN_ID + INNER JOIN EC_TOP_DOMAIN AS SOURCE_TOP_DOMAIN + ON SOURCE_TOP_DOMAIN.ID=SOURCE_DOMAIN.URL_TOP_DOMAIN_ID + INNER JOIN EC_DOMAIN AS DEST_DOMAIN + ON DEST_DOMAIN.ID=DEST_DOMAIN_ID + INNER JOIN EC_TOP_DOMAIN AS DEST_TOP_DOMAIN + ON DEST_TOP_DOMAIN.ID=DEST_DOMAIN.URL_TOP_DOMAIN_ID + ; + +CREATE OR REPLACE VIEW EC_RELATED_LINKS_IN AS + SELECT + IN_URL.ID AS SRC_URL_ID, + IN_URL.QUALITY_MEASURE AS SRC_URL_QUALITY, + OUT_URL.ID AS DEST_URL_ID, + OUT_URL.QUALITY_MEASURE AS DEST_URL_QUALITY + FROM EC_URL AS IN_URL + INNER JOIN EC_DOMAIN_LINK + ON IN_URL.DOMAIN_ID=EC_DOMAIN_LINK.SOURCE_DOMAIN_ID + INNER JOIN EC_URL AS OUT_URL + ON OUT_URL.DOMAIN_ID=EC_DOMAIN_LINK.DEST_DOMAIN_ID + WHERE IN_URL.VISITED=TRUE + AND IN_URL.DATA_HASH IS NOT NULL + AND OUT_URL.VISITED=TRUE + AND OUT_URL.DATA_HASH IS NOT NULL; + +CREATE TABLE IF NOT EXISTS EC_DOMAIN_BACKLINKS ( + ID INT PRIMARY KEY, + LINKEDNESS INT +); + +CREATE TABLE IF NOT EXISTS EC_API_KEY ( + LICENSE_KEY VARCHAR(255) UNIQUE, + LICENSE VARCHAR(255) NOT NULL, + NAME VARCHAR(255) NOT NULL, + EMAIL VARCHAR(255) NOT NULL, + RATE INT DEFAULT 10 +); + +CREATE INDEX IF NOT EXISTS EC_DOMAIN_RANK_INDEX ON EC_DOMAIN (RANK); +CREATE INDEX IF NOT EXISTS EC_DOMAIN_QUALITY_INDEX ON EC_DOMAIN (QUALITY,STATE); + +CREATE INDEX IF NOT EXISTS EC_DOMAIN_INDEXED_INDEX ON EC_DOMAIN (INDEXED); +CREATE INDEX IF NOT EXISTS EC_DOMAIN_ID_INDEXED_INDEX ON EC_DOMAIN (ID, INDEXED); +CREATE INDEX IF NOT EXISTS EC_DOMAIN_TRIO ON EC_DOMAIN (STATE, DOMAIN_ALIAS, INDEXED, QUALITY); + +CREATE INDEX IF NOT EXISTS EC_URL_VISITED ON EC_URL (VISITED); +CREATE INDEX IF NOT EXISTS EC_URL_VISITED_STATE ON EC_URL (VISITED, STATE); +CREATE INDEX IF NOT EXISTS EC_URL_IP ON EC_URL (IP); \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/sql/monitor-log-init.sql b/marginalia_nu/src/main/resources/sql/monitor-log-init.sql new file mode 100644 index 00000000..23ca4fca --- /dev/null +++ b/marginalia_nu/src/main/resources/sql/monitor-log-init.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS MONITOR_LOG; +DROP INDEX IF EXISTS MONITOR_LOG_INDEX; + +CREATE TABLE IF NOT EXISTS LOG_ENTRY ( + SERVICE VARCHAR(32), + STATUS VARCHAR(32), + IP VARCHAR(32), + PORT INTEGER, + TS VARCHAR(32)); + +CREATE INDEX IF NOT EXISTS MONITOR_LOG_INDEX ON LOG_ENTRY (SERVICE); \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/sql/reference-data.sql b/marginalia_nu/src/main/resources/sql/reference-data.sql new file mode 100644 index 00000000..52d9abbb --- /dev/null +++ b/marginalia_nu/src/main/resources/sql/reference-data.sql @@ -0,0 +1,23 @@ +DROP TABLE IF EXISTS REF_DICTIONARY; + +CREATE TABLE IF NOT EXISTS REF_DICTIONARY( + TYPE VARCHAR(16), + WORD VARCHAR(255), + DEFINITION VARCHAR(255) +) +CHARACTER SET utf8mb4 +COLLATE utf8mb4_unicode_ci; + +CREATE INDEX IF NOT EXISTS REF_DICTIONARY_WORD ON REF_DICTIONARY (WORD); + +CREATE TABLE IF NOT EXISTS REF_WIKI_TITLE( + NAME VARCHAR(255), + NAME_LOWER VARCHAR(255) GENERATED ALWAYS AS (LOWER(NAME)), + REF_NAME VARCHAR(255) +) +CHARACTER SET utf8mb4 +COLLATE utf8mb4_unicode_ci; + + +CREATE INDEX IF NOT EXISTS REF_WIKI_LOWER ON REF_WIKI_TITLE (NAME_LOWER); +CREATE INDEX IF NOT EXISTS REF_WIKI_NAME ON REF_WIKI_TITLE (NAME); \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/static/dating/index.html b/marginalia_nu/src/main/resources/static/dating/index.html new file mode 100644 index 00000000..8626e8f8 --- /dev/null +++ b/marginalia_nu/src/main/resources/static/dating/index.html @@ -0,0 +1,71 @@ + + + + + Website Explorer + + + + +

    Website Explorer

    +

    + This is a game where you explore more or less random and obscure websites around the Internet, based on the + database of the Marginalia Search Engine. +

    +

    Instructions

    +

    Press the thumbnail to visit the website.

    +

    Press ➡️ to view the next website.

    +

    Press 🤩 to look for websites similar to the website you are seeing.

    +

    Press 🔀 to return to the default flavor of websites.

    +

    Cookie Consent

    +

    + The game uses a session cookie to keep track of which websites you have been shown so that + you do not see the same websites too repeatedly, and which websites you would like to see more of. +

    +

    + Consent To The Cookie And Begin +

    +

    About

    +

    + These websites are not manually curated. Most of them are clean, but if you do happen to see something particularly + objectionable, please let me know by sending me an email. kontakt@marginalia.nu +

    +

    + A less principled person would probably have plastered the page in ads, as it's a game basically revolving around + refreshing the same page over and over. Instead I invite you to consider supporting me + if you enjoy game. +

    + + diff --git a/marginalia_nu/src/main/resources/static/dating/robots.txt b/marginalia_nu/src/main/resources/static/dating/robots.txt new file mode 100644 index 00000000..5199c74f --- /dev/null +++ b/marginalia_nu/src/main/resources/static/dating/robots.txt @@ -0,0 +1,4 @@ +User-agent: * +Disallow: /init +Disallow: /random +Disallow: /similar diff --git a/marginalia_nu/src/main/resources/static/edge/about.html b/marginalia_nu/src/main/resources/static/edge/about.html new file mode 100644 index 00000000..dcc0b95e --- /dev/null +++ b/marginalia_nu/src/main/resources/static/edge/about.html @@ -0,0 +1,23 @@ + + + + + Marginalia Search - About + + + + + +
    + +
    +
    +

    + This page has been moved to the memex. +

    +
    + \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/static/edge/changelog.html b/marginalia_nu/src/main/resources/static/edge/changelog.html new file mode 100644 index 00000000..37d3d6b7 --- /dev/null +++ b/marginalia_nu/src/main/resources/static/edge/changelog.html @@ -0,0 +1,23 @@ + + + + + Marginalia Search - Change Log + + + + + +
    + +
    +
    +

    + This page has been moved to the memex. +

    +
    + \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/static/edge/crawler-ips.txt b/marginalia_nu/src/main/resources/static/edge/crawler-ips.txt new file mode 100644 index 00000000..dee018f1 --- /dev/null +++ b/marginalia_nu/src/main/resources/static/edge/crawler-ips.txt @@ -0,0 +1 @@ +81.170.128.52 diff --git a/marginalia_nu/src/main/resources/static/edge/error.html b/marginalia_nu/src/main/resources/static/edge/error.html new file mode 100644 index 00000000..7192bf07 --- /dev/null +++ b/marginalia_nu/src/main/resources/static/edge/error.html @@ -0,0 +1,23 @@ + + + + + Error + + + + + +
    + +
    +
    +

    An error has occurred!

    +

    + Something went wrong while processing your query. Please try again later. +

    +
    + \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/static/edge/favicon.ico b/marginalia_nu/src/main/resources/static/edge/favicon.ico new file mode 100644 index 00000000..a1136a7f Binary files /dev/null and b/marginalia_nu/src/main/resources/static/edge/favicon.ico differ diff --git a/marginalia_nu/src/main/resources/static/edge/index.html b/marginalia_nu/src/main/resources/static/edge/index.html new file mode 100644 index 00000000..13044a6c --- /dev/null +++ b/marginalia_nu/src/main/resources/static/edge/index.html @@ -0,0 +1,159 @@ + + + + + Marginalia Search + + + + + + + + + + + + + +
    + +
    + +
    +
    + +
    + +
    +
    +

    About

    +
    +

    This is an independent DIY search engine that focuses on non-commercial content, and attempts to + show you sites you perhaps weren't aware of in favor of the sort of sites you probably already knew + existed.

    +

    + The software for this search engine is all custom-built, and all crawling and indexing is + done in-house. +

    +
    +
    + Read More +
    +
    + +
    +

    Tips

    +
    +

    + This search engine isn't particularly well equipped to answering queries + posed like questions, instead try to imagine some text that might appear + in the website you are looking for, and search for that.

    +

    + Where this search engine really shines is finding small, old and obscure websites about some + given topic, perhaps + old video games, + a mystery, + theology, + the occult, + knitting, + compter science, + or art. +

    + +
    + +
    + + +
    +

    Updates

    +
    +

    ☛ The web design of the search engine has been completely overhauled. For the most part, this should + result in even smaller page loads, and better accessibility and easier navigation, but it may still + be a bit rough in some browsers, if you do find any bugs or accessibility problems, please let me + know. You can reach me at kontakt@marginalia.nu. +

    +

    ☛ The Random Mode has been overhauled, and is + quite entertaining. I encourage you to give it a spin.

    +

    ☛ A simple public API is now available.

    +
    + +
    + +
    +

    Publicity, Discussion and Events

    +
    +
    +
    You Should Check Out the Indie Web 🎞️
    +
    YouTube, You've Got Kat, 2022-03-15
    +
    + What Google Search Isn't Showing You +
    +
    The New Yorker, 2022-03-10
    +
    + Marginalia Search - Serendipity Engineering +
    +
    MetaFilter, 2022-03-09
    +
    + 🎂 First anniversary! 🎊 +
    +
    + 2022-02-26 +
    +
    + A Search Engine Designed To Surprise You +
    +
    Clive Thompson OneZero, 2021-09-16
    +
    + Hacker News Discussion +
    +
    + 2021-09-16 +
    +
    +
    +
    +
    +
    + +
    + This website complies with the GDPR by not collecting any personal + information, and with the EU Cookie Directive by not using + cookies. More Information. +

    + Reach me at kontakt@marginalia.nu. +

    + + \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/static/edge/known-issues.html b/marginalia_nu/src/main/resources/static/edge/known-issues.html new file mode 100644 index 00000000..e14588bc --- /dev/null +++ b/marginalia_nu/src/main/resources/static/edge/known-issues.html @@ -0,0 +1,29 @@ + + + + + Marginalia Search - Known Issues + + + + +
    + +
    +
    +

    Known Issues

    +
      +
    • Non-Latin text becomes horribly garbled in the summary and title description.
    • +
    +

    Mitigated Issues

    +
      +
    • Non-latin characters are stripped from search results (Ålö AB becomes l AB)
    • +
    • The page doesn't look good on mobile
    • +
    • Still a few link farms getting good results
    • +
    +
    + \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/static/edge/maintenance.html b/marginalia_nu/src/main/resources/static/edge/maintenance.html new file mode 100644 index 00000000..c8fdc227 --- /dev/null +++ b/marginalia_nu/src/main/resources/static/edge/maintenance.html @@ -0,0 +1,10 @@ + + Marginalia Search - Maintenance Notification + + +

    + Down For Maintenance! +

    +

    The search engine is currently down for maintenance.

    + To The Start Page + \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/static/edge/notes.html b/marginalia_nu/src/main/resources/static/edge/notes.html new file mode 100644 index 00000000..e59b3bff --- /dev/null +++ b/marginalia_nu/src/main/resources/static/edge/notes.html @@ -0,0 +1,28 @@ + + + + + Marginalia Search - Notes on Designing a Search Engine + + + + + + + +
    + +
    +
    +

    + This page has been moved to the memex. +

    + + + +
    + \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/static/edge/opensearch.xml b/marginalia_nu/src/main/resources/static/edge/opensearch.xml new file mode 100644 index 00000000..89b3efcf --- /dev/null +++ b/marginalia_nu/src/main/resources/static/edge/opensearch.xml @@ -0,0 +1,11 @@ + + + Marginalia + Search Marginalia + UTF-8 + https://search.marginalia.nu/favicon.ico + + https://search.marginalia.nu/ + \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/static/edge/robots.txt b/marginalia_nu/src/main/resources/static/edge/robots.txt new file mode 100644 index 00000000..0c0833e9 --- /dev/null +++ b/marginalia_nu/src/main/resources/static/edge/robots.txt @@ -0,0 +1,8 @@ +User-agent: * +Disallow: /browse/ +Disallow: /search/ +Disallow: /search +Disallow: /wiki/ +Disallow: /explore/ +Disallow: /site/ +Disallow: /links/ \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/static/edge/style-new.css b/marginalia_nu/src/main/resources/static/edge/style-new.css new file mode 100644 index 00000000..a10fd4da --- /dev/null +++ b/marginalia_nu/src/main/resources/static/edge/style-new.css @@ -0,0 +1,477 @@ +/* If you need to borrow something from below, that's fine */ + +body { + margin: 0px; + font-size: 12pt; + font-family: sans-serif; + background-color: #f8f8ee; +} + + +.sticker { + ruby-position: under; +} +.sticker rt { + background-color: #ff08; + font-size: 8pt; +} +header { + background-color: #acae89; + color: #fff; + border-bottom: 1px solid #888; +} + +h1, h2 { + font-weight: normal; +} + +header nav a { + text-decoration: none; + color: #000; + + margin-right: 1ch; + padding: .5ch; + display: inline-block; +} + +header nav a:hover, header nav a:focus { + background: #2f4858; + color: #fff !important; +} + +article { + max-width: 160ch; + margin-left: auto; + margin-right: auto; +} + +ul.semantic-results:empty { + display: none; +} +ul.semantic-results { + list-style: none; + padding: 0px; +} +ul.semantic-results li { + padding-left: 0px; +} +ul.semantic-results a { + color: #c00; +} + +.cards { + display: flex; + flex-direction: row; + flex-wrap: wrap; + justify-content: flex-start; + padding-left: 1ch; + gap: 1ch; +} + +article > section > p { display: none; } + +.w3m-helper { + display: none; +} + +/* +.card.rs-rank-1,.card.rs-rank-2,.card.rs-rank-3,.card.rs-rank-4 { + border: 1px solid #fe0; + box-sizing: border-box; + box-shadow: 0 0 5px #fe0; +} +*/ + + +.big .card { + min-width: 40ch; +} + +.card .info { + flex-grow: 1; + padding-left: 1ch; + line-height: 1.6; +} +.card { + flex-basis: 20ch; + border: 2px #ccc; + background-color: #fff; + min-width: 30ch; + display: flex; + flex-direction: column; + margin-left: 2px; + margin-right: 2px; + margin-bottom: 8px; + border-left: 1px solid #ecb; + border-top: 1px solid #ecb; + box-shadow: #0008 0 0 5px; +} + +.card h2 a { + display: block !important; +} + +.card img { + width: 30ch; + height: 22.5ch; /* 4:3 aspect ratio, card width = 30, height = 30*3/4 */ +} +.card a:focus img { + filter: sepia(100%); + box-shadow: #444 0px 0px 20px; +} + + +.card a:focus:not(.nofocus) { + background-color: black; + color: white; +} + +.problems ul { + flex-grow: 1; +} + +.card .description { + padding-left: 1ch; + padding-right: 1ch; + flex-grow: 1; + overflow: auto; + -webkit-hyphens: auto; + -moz-hyphens: auto; + -ms-hyphens: auto; + hyphens: auto; +} + +.card h2 { + color: #fff; + background-color: #2f4858; + font-size: medium; + font-family: serif; + padding: .5ch .5ch .5ch .5ch; + margin: 0px 0px 0px 0px; + + font-family: 'Trebuchet', 'Noto Sans', sans-serif; + + text-decoration: none; + border-bottom-right-radius: 2ch; +} + +.card h2 a { + color: #fff; + text-decoration: none; +} + +.problems h2, .info h2 { + background-color: #482f58; +} + +.browse-result { + background-color: #eee; +} +.semantic h2, .browse-result .h2, .definition h2 { + background-color: #48582f; +} + +.search-result .url a { + font-family: monospace; + margin: 1ch; + font-size: 8pt; + background-color: #3F5F6F; + display: block; + margin: 0px; + padding: 1ch; + word-break: break-all; + + color: #fff; +} +.search-result .url a:visited { + color: #FCC; +} +.card .utils { + display: flex; + font-size: 10pt; + + padding: 1ch; + background-color: #eee; +} + +.card .utils > * { + margin-right: 1ch; + margin-left: 1ch; +} + +.card .utils a { + color: #000; +} + +.card.definition { + min-height: 10ch; +} + +.card.definition .description { + padding: 2ch; + font-size: 12pt; +} + +.search-result .meta { + flex-grow: 2; + text-align: right; +} +.search-result .meta > * { + padding-left: 4px; +} + +.search-box { + display: flex; + flex-direction: row; + background-color: #fff; + margin-bottom: 4ch; + box-shadow: #ccc 4px 4px 5px; +} + +.search-box h1 { + color: #fff; + background-color: #2f4858; + font-size: medium; + font-family: serif; + padding: .5ch .5ch .5ch .5ch; + margin: 0px 0px 0px 0px; + border: 2px groove; + max-width: 100%; + font-size: 16pt; +} + +.search-box .input { + flex-grow: 2; + padding: .5ch; +} + + +select { + color: #444; + background-color: #fff; + border: 1px solid #444; + border-radius: 5px; +} + +.search-box .settings { + padding: .5ch; +} + +.search-box .extra { + padding: 1ch; +} + +.search-box .input { + display: flex; + gap: .5ch; +} + +.search-box input[name="query"] { + flex-grow: 2; + padding: .5ch; + font-size: 12pt; + font-family: 'fixedsys', monospace, serif; + + border: 2px inset #000; + background: #fff; + color: #444; +} + +.search-box input[name="query"]:focus { + color: #000; +} + +.search-box input[type="submit"] { + padding: .5ch; + min-width: 5ch; + font-size: 12pt; + font-family: 'fixedsys', monospace, serif; + border: 2px groove #ccc; + background-color: #eee; +} + +.search-box .settings > * { + margin-top: .5ch; +} + +footer { + padding: 2ch; + margin: 16ch 0px 0px 0px; + background-color: #acae89; + height: 20ch; + font-size: 10pt; +} + +a.underline { + text-decoration: underline !important; +} + + +.suggestions { + background-color: #fff; + padding: .5ch; + margin-top: 3.2ch; + position: absolute; + display: inline-block; + width: 300px; + border-left: 1px solid #ccc; + border-top: 1px solid #ccc; + box-shadow: 5px 5px 5px #888; + z-index: 10; +} + +.suggestions a { + display: block; + color: #000; + font-size: 12pt; + font-family: 'fixedsys', monospace, serif; + text-decoration: none; + outline: none; +} + +.suggestions a:focus { + display: block; + background-color: #000; + color: #eee; +} + +@media only screen and (max-device-width: 1024px) { + + .card { + margin-right: 2ch; + } + .card .utils a { + padding: 1ch; + } + + .cards { + justify-content: center; + } + + .suggestions { + display: none; + } +} + +@media only screen and (max-device-width: 800px) { + .search-box { + flex-direction: column; + } + header nav a { + padding: 1ch !important; + } + + .card { + flex-basis: 40ch !important; + max-width: unset; + margin-bottom: 2ch; + margin-right: unset; + } + + .card img { + width: 100%; + height: 100%; + } + + .cards { + padding-left: unset; + padding-right: 5px; + } + + +} + +/* https://www.youtube.com/watch?v=v0nmHymgM7Y */ +@media (prefers-color-scheme: dark) { + a { + color: #acf; + } + .card { + background-color: #222; + color: #aaa; + box-shadow: #0008 0 0 5px; + border: none; + } + .card .utils { + background-color: #000; + color: #fff; + } + .card .utils a { + color: #acf; + } + header { + background-color: #000; + } + footer { + background-color: #000; + color: #fff; + } + body { + background-color: #444; + } + header nav a { + color: #eee; + } + .search-box { + background-color: #222; + box-shadow: #0008 0 0 5px; + } + .search-box h1 { + background-color: unset; + border: none; + } +/* .card.rs-rank-1,.card.rs-rank-2,.card.rs-rank-3,.card.rs-rank-4 { + border: 2px solid #fe05; + box-sizing: border-box; + box-shadow: 0 0 20px #fe03; + }*/ + .search-box input[name="query"] { + background-color: #000 !important; + color: #aaa; + border: 1px solid #888; + } + .search-box input[name="query"]:focus { + color: #eee; + } + .search-box input[type="submit"] { + background-color: #000 !important; + color: #fff; + border: 1px solid #888; + } + .card img { + /* White images turn into an unpleasant death laser if you put them in an otherwise dark page */ + filter: brightness(80%) saturate(140%); + } + input { + color: #fff; + } + .card a:focus img { + filter: brightness(100%); + box-shadow: #ccca 0px 0px 50px; + } + + .card a:focus:not(.nofocus) { + background-color: white; + color: black; + } + + select { + color: #fff; + background-color: #000; + border: 1px solid #ccc; + border-radius: 5px; + } + + .suggestions { + background-color: #000; + border: 1px solid #444; + box-shadow: 2px 2px 5px #000; + } + .suggestions a { + color: #fff; + } + .suggestions a:focus { + background-color: #2f4858; + } + +} \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/static/edge/tts.js b/marginalia_nu/src/main/resources/static/edge/tts.js new file mode 100644 index 00000000..586ae10e --- /dev/null +++ b/marginalia_nu/src/main/resources/static/edge/tts.js @@ -0,0 +1,101 @@ + +if(!window.matchMedia("(pointer: coarse)").matches) { + query = document.getElementById('query'); + query.setAttribute('autocomplete', 'off'); + timer = null; + function fetchSuggestions(e) { + if (timer != null) { + clearTimeout(timer); + } + timer = setTimeout(() => { + req = new XMLHttpRequest(); + + req.onload = rsp => { + items = JSON.parse(req.responseText); + + var old = document.getElementById('suggestions'); + if (old != null) old.remove(); + + if (items.length == 0) return; + + suggestions = document.createElement('div'); + suggestions.setAttribute('id', 'suggestions'); + suggestions.setAttribute('class', 'suggestions'); + + + for (i=0;i { + if (e.key === "ArrowDown") { + if (e.target.nextElementSibling != null) { + e.target.nextElementSibling.focus(); + } + + e.preventDefault() + } + else if (e.key === "ArrowUp") { + if (e.target.previousElementSibling != null) { + e.target.previousElementSibling.focus(); + } + else { + query.focus(); + } + e.preventDefault() + } + else if (e.key === "Escape") { + var suggestions = document.getElementById('suggestions'); + if (suggestions != null) { + suggestions.remove(); + } + query.focus(); + e.preventDefault(); + } + }); + item.addEventListener('keypress', e=> { + if (e.key === "Enter") { + suggestionClickHandler(e); + } + }); + suggestions.appendChild(item); + } + document.getElementsByClassName('input')[0].appendChild(suggestions); + } + + req.open("GET", "https://api.marginalia.nu/suggest/?partial="+encodeURIComponent(query.value)); + req.send(); + }, 250); + } + query.addEventListener("input", fetchSuggestions); + query.addEventListener("click", e=> { var suggestions = document.getElementById('suggestions'); if (suggestions != null) suggestions.remove(); }); + query.addEventListener("keydown", e => { + if (e.key === "ArrowDown") { + var suggestions = document.getElementById('suggestions'); + if (suggestions != null) { + suggestions.childNodes[0].focus(); + } + else { + fetchSuggestions(e); + } + e.preventDefault() + } + else if (e.key === "Escape") { + var suggestions = document.getElementById('suggestions'); + if (suggestions != null) { + suggestions.remove(); + } + query.focus(); + e.preventDefault(); + } + }); +} diff --git a/marginalia_nu/src/main/resources/static/edge/wiki-clean.html b/marginalia_nu/src/main/resources/static/edge/wiki-clean.html new file mode 100644 index 00000000..a90b70b4 --- /dev/null +++ b/marginalia_nu/src/main/resources/static/edge/wiki-clean.html @@ -0,0 +1,76 @@ + + + + + Marginalia Search - About: Easy Read Wikipedia + + + + + +
    + +
    +
    +

    About: High Readability Wikipedia

    +
    +

    + This is a wikipedia client that strips away most links and almost all visual clutter + to provide a more book-like reading experience with fewer distractions. +

    +

    + This is primarily a helpful utility for a search engine focusing on similarly text-oriented + websites. +

    +

    + You are welcome to use it for general article reading as well. This may be useful + if you are on a low bandwidth connection, since the download size is typically reduced + from megabytes to dozens of kilobytes. +

    +

    + What's taken away is all the design elements that your brain would have to filter out + to read the text of the article. It seems as though overburdening this mental process + causes the reader to start scanning the text instead of reading it, which is experienced + as an inability to pay focus. +

    +

    + The cleaning process is not perfect and will occasionally produce strange results, + but significant problems should be relatively rare. +

    + About the Search Engine + +

    Limitations

    +

    This is a "stale" copy of wikipedia, based on an archived copy from January 2021. On the + other hand, we used to abide printed encyclopedias that didn't update at all.

    +

    + Be aware that the cleaning strips away a lot of information, including most references, + footnotes, quality warnings, and so forth. Refer to the original wikipedia article for + that information. +

    +
    +

    Legal

    +
    + The Wikipedia text is available under the the Creative Commons Attribution-ShareAlike 3.0 license, + and so is the wikipedia text forwarded to you through this service. +
    +
    +

    Further reading

    +
    Blom et al. 2017 - Comprehension and navigation of networked hypertexts
    +
    https://onlinelibrary.wiley.com/doi/pdf/10.1111/jcal.12243
    +
    +

    Have something to say?

    +
    +

    Send me an e-mail at kontakt@marginalia.nu. +

    +

    + Don't hesitate to let me know if the website is somehow being a nuisance, + it should respect robots.txt and reduce outgoing requests, but the format + isn't super-standardized, so occasionally it doesn't understand every directive. +

    +
    +
    + \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/static/encyclopedia/index.html b/marginalia_nu/src/main/resources/static/encyclopedia/index.html new file mode 100644 index 00000000..1b3f81ed --- /dev/null +++ b/marginalia_nu/src/main/resources/static/encyclopedia/index.html @@ -0,0 +1,24 @@ + + + + + Marginalia Encyclopedia + + + + + + + + \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/static/encyclopedia/robots.txt b/marginalia_nu/src/main/resources/static/encyclopedia/robots.txt new file mode 100644 index 00000000..77470cb3 --- /dev/null +++ b/marginalia_nu/src/main/resources/static/encyclopedia/robots.txt @@ -0,0 +1,2 @@ +User-agent: * +Disallow: / \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/static/encyclopedia/wiki-clean.html b/marginalia_nu/src/main/resources/static/encyclopedia/wiki-clean.html new file mode 100644 index 00000000..cd928003 --- /dev/null +++ b/marginalia_nu/src/main/resources/static/encyclopedia/wiki-clean.html @@ -0,0 +1,71 @@ + + + + + Marginalia Search - About: Easy Read Wikipedia + + + + + +
    + +
    +
    +

    About: High Readability Encyclopedia

    +
    +

    + This is an encyclopedia based on Wikipedia's database, that strips away most links and + almost all visual clutter to provide a more book-like reading experience with fewer + distractions. +

    +

    + This is primarily a helpful utility for a search engine focusing on similarly text-oriented + websites. +

    +

    + You are welcome to use it for general article reading as well. This may be useful + if you are on a low bandwidth connection, since the download size is typically reduced + from megabytes to dozens of kilobytes. +

    +

    + What's taken away is all the design elements that your brain would have to filter out + to read the text of the article. It seems as though overburdening this mental process + causes the reader to start scanning the text instead of reading it, which is experienced + as an inability to pay focus. +

    +

    + The cleaning process is not perfect and will occasionally produce strange results, + but significant problems should be relatively rare. +

    + About the Search Engine + +

    Limitations

    +

    This is a "stale" copy of wikipedia, based on an archived copy from January 2021. On the + other hand, we used to abide printed encyclopedias that didn't update at all.

    +

    + Be aware that the cleaning strips away a lot of information, including most references, + footnotes, quality warnings, and so forth. Refer to the original wikipedia article for + that information. +

    +
    +

    Legal

    +
    + The original Wikipedia text is available under the the Creative Commons Attribution-ShareAlike 3.0 license, + and so is the wikipedia text forwarded to you through this service. +
    +
    +

    Further reading

    +
    Blom et al. 2017 - Comprehension and navigation of networked hypertexts
    +
    https://onlinelibrary.wiley.com/doi/pdf/10.1111/jcal.12243
    +
    +

    Have something to say?

    +
    +

    Send me an e-mail at kontakt@marginalia.nu. +

    +
    +
    + \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/static/encyclopedia/wiki-start.html b/marginalia_nu/src/main/resources/static/encyclopedia/wiki-start.html new file mode 100644 index 00000000..f0f86947 --- /dev/null +++ b/marginalia_nu/src/main/resources/static/encyclopedia/wiki-start.html @@ -0,0 +1,26 @@ + + + + + Marginalia Search - Easy Read Wikipedia Search + + + + + +
    + +
    +
    +

    Search the Encyclopedia

    + +
    + \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/static/fonts/LM-bold-italic.ttf b/marginalia_nu/src/main/resources/static/fonts/LM-bold-italic.ttf new file mode 100644 index 00000000..7b684ee6 Binary files /dev/null and b/marginalia_nu/src/main/resources/static/fonts/LM-bold-italic.ttf differ diff --git a/marginalia_nu/src/main/resources/static/fonts/LM-bold-italic.woff b/marginalia_nu/src/main/resources/static/fonts/LM-bold-italic.woff new file mode 100644 index 00000000..d54af371 Binary files /dev/null and b/marginalia_nu/src/main/resources/static/fonts/LM-bold-italic.woff differ diff --git a/marginalia_nu/src/main/resources/static/fonts/LM-bold-italic.woff2 b/marginalia_nu/src/main/resources/static/fonts/LM-bold-italic.woff2 new file mode 100644 index 00000000..078ce292 Binary files /dev/null and b/marginalia_nu/src/main/resources/static/fonts/LM-bold-italic.woff2 differ diff --git a/marginalia_nu/src/main/resources/static/fonts/LM-bold.ttf b/marginalia_nu/src/main/resources/static/fonts/LM-bold.ttf new file mode 100644 index 00000000..17624d45 Binary files /dev/null and b/marginalia_nu/src/main/resources/static/fonts/LM-bold.ttf differ diff --git a/marginalia_nu/src/main/resources/static/fonts/LM-bold.woff b/marginalia_nu/src/main/resources/static/fonts/LM-bold.woff new file mode 100644 index 00000000..318a3ad2 Binary files /dev/null and b/marginalia_nu/src/main/resources/static/fonts/LM-bold.woff differ diff --git a/marginalia_nu/src/main/resources/static/fonts/LM-bold.woff2 b/marginalia_nu/src/main/resources/static/fonts/LM-bold.woff2 new file mode 100644 index 00000000..c14c6204 Binary files /dev/null and b/marginalia_nu/src/main/resources/static/fonts/LM-bold.woff2 differ diff --git a/marginalia_nu/src/main/resources/static/fonts/LM-italic.ttf b/marginalia_nu/src/main/resources/static/fonts/LM-italic.ttf new file mode 100644 index 00000000..b9a57b87 Binary files /dev/null and b/marginalia_nu/src/main/resources/static/fonts/LM-italic.ttf differ diff --git a/marginalia_nu/src/main/resources/static/fonts/LM-italic.woff b/marginalia_nu/src/main/resources/static/fonts/LM-italic.woff new file mode 100644 index 00000000..fafb147e Binary files /dev/null and b/marginalia_nu/src/main/resources/static/fonts/LM-italic.woff differ diff --git a/marginalia_nu/src/main/resources/static/fonts/LM-italic.woff2 b/marginalia_nu/src/main/resources/static/fonts/LM-italic.woff2 new file mode 100644 index 00000000..166d6e60 Binary files /dev/null and b/marginalia_nu/src/main/resources/static/fonts/LM-italic.woff2 differ diff --git a/marginalia_nu/src/main/resources/static/fonts/LM-regular.ttf b/marginalia_nu/src/main/resources/static/fonts/LM-regular.ttf new file mode 100644 index 00000000..6b4f6b8a Binary files /dev/null and b/marginalia_nu/src/main/resources/static/fonts/LM-regular.ttf differ diff --git a/marginalia_nu/src/main/resources/static/fonts/LM-regular.woff b/marginalia_nu/src/main/resources/static/fonts/LM-regular.woff new file mode 100644 index 00000000..eb9fec0a Binary files /dev/null and b/marginalia_nu/src/main/resources/static/fonts/LM-regular.woff differ diff --git a/marginalia_nu/src/main/resources/static/fonts/LM-regular.woff2 b/marginalia_nu/src/main/resources/static/fonts/LM-regular.woff2 new file mode 100644 index 00000000..869279ac Binary files /dev/null and b/marginalia_nu/src/main/resources/static/fonts/LM-regular.woff2 differ diff --git a/marginalia_nu/src/main/resources/static/memex/ico/dir.png b/marginalia_nu/src/main/resources/static/memex/ico/dir.png new file mode 100644 index 00000000..c4bc7f53 Binary files /dev/null and b/marginalia_nu/src/main/resources/static/memex/ico/dir.png differ diff --git a/marginalia_nu/src/main/resources/static/memex/ico/doc32.png b/marginalia_nu/src/main/resources/static/memex/ico/doc32.png new file mode 100644 index 00000000..5b9f3731 Binary files /dev/null and b/marginalia_nu/src/main/resources/static/memex/ico/doc32.png differ diff --git a/marginalia_nu/src/main/resources/static/memex/ico/file.png b/marginalia_nu/src/main/resources/static/memex/ico/file.png new file mode 100644 index 00000000..d9bf3b45 Binary files /dev/null and b/marginalia_nu/src/main/resources/static/memex/ico/file.png differ diff --git a/marginalia_nu/src/main/resources/static/memex/ico/folder32.png b/marginalia_nu/src/main/resources/static/memex/ico/folder32.png new file mode 100644 index 00000000..18c3241c Binary files /dev/null and b/marginalia_nu/src/main/resources/static/memex/ico/folder32.png differ diff --git a/marginalia_nu/src/main/resources/static/memex/ico/folderup16.png b/marginalia_nu/src/main/resources/static/memex/ico/folderup16.png new file mode 100644 index 00000000..112a5545 Binary files /dev/null and b/marginalia_nu/src/main/resources/static/memex/ico/folderup16.png differ diff --git a/marginalia_nu/src/main/resources/static/memex/ico/nav16.png b/marginalia_nu/src/main/resources/static/memex/ico/nav16.png new file mode 100644 index 00000000..bda9f33f Binary files /dev/null and b/marginalia_nu/src/main/resources/static/memex/ico/nav16.png differ diff --git a/marginalia_nu/src/main/resources/static/memex/ico/pic16.png b/marginalia_nu/src/main/resources/static/memex/ico/pic16.png new file mode 100644 index 00000000..9c5a562e Binary files /dev/null and b/marginalia_nu/src/main/resources/static/memex/ico/pic16.png differ diff --git a/marginalia_nu/src/main/resources/static/memex/ico/pic32.png b/marginalia_nu/src/main/resources/static/memex/ico/pic32.png new file mode 100644 index 00000000..108bafc5 Binary files /dev/null and b/marginalia_nu/src/main/resources/static/memex/ico/pic32.png differ diff --git a/marginalia_nu/src/main/resources/static/memex/ico/root.png b/marginalia_nu/src/main/resources/static/memex/ico/root.png new file mode 100644 index 00000000..e5cbd014 Binary files /dev/null and b/marginalia_nu/src/main/resources/static/memex/ico/root.png differ diff --git a/marginalia_nu/src/main/resources/static/memex/ico/shiba16.png b/marginalia_nu/src/main/resources/static/memex/ico/shiba16.png new file mode 100644 index 00000000..e41d0f60 Binary files /dev/null and b/marginalia_nu/src/main/resources/static/memex/ico/shiba16.png differ diff --git a/marginalia_nu/src/main/resources/static/memex/ico/world16.png b/marginalia_nu/src/main/resources/static/memex/ico/world16.png new file mode 100644 index 00000000..1f4513a9 Binary files /dev/null and b/marginalia_nu/src/main/resources/static/memex/ico/world16.png differ diff --git a/marginalia_nu/src/main/resources/static/memex/style-new.css b/marginalia_nu/src/main/resources/static/memex/style-new.css new file mode 100644 index 00000000..55733ed4 --- /dev/null +++ b/marginalia_nu/src/main/resources/static/memex/style-new.css @@ -0,0 +1,235 @@ +body { + font-family: sans-serif; + font-size: 12pt; + margin: 0px; + background-color: #f8f8ee; +} + +header { + background-color: #acae89; + color: #fff; + border-bottom: 1px solid #888; +} + +header nav a { + text-decoration: none; + color: #000; + + margin-right: 1ch; + padding: .5ch; + display: inline-block; +} + +nav.topbar { + display: flex; + justify-content: left; + align-content: center; + background-color: #fff; + margin-bottom: 4ch; + box-shadow: #ccc 4px 4px 5px; + max-width: 160ch; + margin-left: auto; + margin-right: auto; +} + +nav.topbar h1 { + padding: .5ch 1ch .5ch .5ch; + display: inline-block; + margin: 0px; + background-color: #2f4858; + color: #fff; + font-weight: normal; + font-family: serif; + font-size: 16pt; + border: 1px solid #fff; +} +nav.topbar a { + padding: 1ch; + display: inline-block; + text-decoration: none; + color: #000; + border-right: 2px solid #ccc; + padding-top: 1.20ch; + display: flex; + align-items:center; + gap: 1ch; +} + +nav.topbar > *:last-child { + background: #3F5F6F !important; + color: #fff; +} + +header nav a:hover, header nav a:focus { + background: #2f4858; + color: #fff !important; +} + +article a:focus { + color: #fff !important; + text-shadow: 0 0 5px #f00; + background-color: #000 !important;; + outline: none; +} +article { + max-width: 160ch; + margin-left: auto; + margin-right: auto; + display: flex; +} + +article > #memex-node { + line-height: 1.6; + margin: 1ch 2ch 2ch 2ch; + padding: 0ch 1ch 1ch 1ch; + flex-basis: 60ch; + background-color: #fff; + + border: 2px #ccc; + background-color: #fff; + border-left: 1px solid #ecb; + border-top: 1px solid #ecb; + + box-shadow: #0008 0 0 5px; + max-width: 60ch; + overflow: auto; +} + +pre { + font-size: 8pt; + overflow: auto; +} + +#memex-node h1, article #memex-node h2, article #memex-node h3 { + margin: 0px 0px 1ch 0px; + font-size: 12pt; + font-weight: normal; + margin-left: -1.6ch; + margin-right: -0.6ch; + background-color: #2f4858; + color: #fff; + padding: 0.75ch 1ch 0.5ch 1.5ch; + box-shadow: #000a 4px 4px; +} + +article a.doc:before { + width: 1ch; + height: 1ch; + content: url('/ico/file.png'); + margin-right: .5ch; +} +article a.dir:before { + width: 1ch; + height: 1ch; + content: url('/ico/dir.png'); + margin-right: .5ch; +} +article a.img:before { + width: 1ch; + height: 1ch; + content: url('/ico/pic16.png'); + margin-right: .5ch; +} +#sidebar > * { + padding: 1ch; +} +#sidebar h1 { + margin-top: 0; + font-size: 12pt; + font-weight: normal; + margin-left: -1.6ch; + margin-right: -0.6ch; + background-color: #2f4858; + color: #fff; + padding: 0.5ch 1ch 0.5ch 1ch; + box-shadow: #000a 4px 4px; +} + +article ul, article dl { + margin: 0px; +} +#sidebar a { + color: #000; +} + +#memex-node img { + width: 100%; + margin-left: auto; + margin-right: auto; +} + +.toc { + margin-top: 1ch; +} +.toc a { + display: block; +} + +footer { + padding: 2ch; + margin: 16ch 0px 0px 0px; + background-color: #acae89; + height: 20ch; + font-size: 10pt; +} +@media only screen and (max-device-width: 800px) { + header nav a { + padding: 1ch !important; + } + + .topbar { + flex-direction: column; + } + article { + flex-direction: column; + } + #memex-node { + flex-basis: unset !important; + } +} +@media (prefers-color-scheme: dark) { + a { + color: #acf; + } + header { + background-color: #000; + } + article #memex-node { + border: unset; + } + header nav a { + color: #eee; + } + nav.topbar { + background: #222; + box-shadow: #0008 0 0 5px; + } + nav.topbar a { + color: #eee; + } + nav.topbar h1 { + background-color: unset; + border-right: 2px solid #444; + border-left: none; + border-top: none; + border-bottom: none; + } + footer { + background-color: #000; + color: #fff; + } + body { + background-color: #444; + } + #sidebar { + color: #fff; + } + #sidebar a { + color: #ccc; + } + #memex-node { + background-color: #222 !important; + color: #aaa; + box-shadow: #0008 0 0 5px; + } +} \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/static/podcast/style.css b/marginalia_nu/src/main/resources/static/podcast/style.css new file mode 100644 index 00000000..1939da58 --- /dev/null +++ b/marginalia_nu/src/main/resources/static/podcast/style.css @@ -0,0 +1,3 @@ +.headline:visited { + color: #5f3945 !important; +} \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/static/smhi/favicon.ico b/marginalia_nu/src/main/resources/static/smhi/favicon.ico new file mode 100644 index 00000000..a1136a7f Binary files /dev/null and b/marginalia_nu/src/main/resources/static/smhi/favicon.ico differ diff --git a/marginalia_nu/src/main/resources/static/smhi/font.css b/marginalia_nu/src/main/resources/static/smhi/font.css new file mode 100644 index 00000000..29142ae7 --- /dev/null +++ b/marginalia_nu/src/main/resources/static/smhi/font.css @@ -0,0 +1,50 @@ +/* LÅNAD KOD */ + +/*! + * LaTeX.css (https://latex.now.sh/) + * + * Source: https://github.com/vincentdoerig/latex-css + * Licensed under MIT (https://github.com/vincentdoerig/latex-css/blob/master/LICENSE) +*/ + +@font-face { + font-family: 'Latin Modern'; + font-style: normal; + font-weight: normal; + font-display: swap; + src: url('https://www.marginalia.nu/fonts/LM-regular.woff2') format('woff2'), + url('https://www.marginalia.nu/fonts/LM-regular.woff') format('woff'), + url('https://www.marginalia.nu/fonts/LM-regular.ttf') format('truetype'); +} + +@font-face { + font-family: 'Latin Modern'; + font-style: italic; + font-weight: normal; + font-display: swap; + src: url('https://www.marginalia.nu/fonts/LM-italic.woff2') format('woff2'), + url('https://www.marginalia.nu/fonts/LM-italic.woff') format('woff'), + url('https://www.marginalia.nu/fonts/LM-italic.ttf') format('truetype'); +} + +@font-face { + font-family: 'Latin Modern'; + font-style: normal; + font-weight: bold; + font-display: swap; + src: url('https://www.marginalia.nu/fonts/LM-bold.woff2') format('woff2'), + url('https://www.marginalia.nu/fonts/LM-bold.woff') format('woff'), + url('https://www.marginalia.nu/fonts/LM-bold.ttf') format('truetype'); +} + +@font-face { + font-family: 'Latin Modern'; + font-style: italic; + font-weight: bold; + font-display: swap; + src: url('https://www.marginalia.nu/fonts/LM-bold-italic.woff2') format('woff2'), + url('https://www.marginalia.nu/fonts/LM-bold-italic.woff') format('woff'), + url('https://www.marginalia.nu/fonts/LM-bold-italic.ttf') format('truetype'); +} + +/* SLUT PÅ LÅN AV KOD */ \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/static/smhi/responsive.css b/marginalia_nu/src/main/resources/static/smhi/responsive.css new file mode 100644 index 00000000..3f3ebc01 --- /dev/null +++ b/marginalia_nu/src/main/resources/static/smhi/responsive.css @@ -0,0 +1,74 @@ +/** Anpassningar för tryck, osv. */ +.mobile-only { + display: none; +} + +@media only print { + .onlyprint { + display: block; + } + .onlyscreen { + display: none !important; + } + body { + font-family: 'Liberation', 'Times', Serif !important; + } + header { + display: none; + } + a { + color: #000 !important; + text-decoration: none; + } + figure, blockquote, p, section#footnotes { + page-break-inside: avoid; + } + abbr { + text-decoration: none; + } +} +@media only screen { + .onlyprint { + display: none; + } + .onlyscreen { + display: block; + } +} +@media only screen and (max-device-width: 480px) { + article { + margin: -0.5em !important; + padding: 0.5em !important. + display: block; + font-size: 10pt; + } + .title { + padding-left: 0.5em; + padding-right: 0.5em; + } + + body.essä article p::before { + display: none; + } + .mobile-only { + display: auto; + } +} + +@media only screen and (max-device-width: 480px) { + blockquote { + padding: 0em; + border: none; + } +} + +@media only screen and (min-device-width: 640px) { + header, footer, article { + margin-left: 8em; + margin-right: 8em; + } + body,blockquote { + font-size: 14pt; + } +} + diff --git a/marginalia_nu/src/main/resources/static/smhi/style.css b/marginalia_nu/src/main/resources/static/smhi/style.css new file mode 100644 index 00000000..126019a9 --- /dev/null +++ b/marginalia_nu/src/main/resources/static/smhi/style.css @@ -0,0 +1,192 @@ +.smhi-snabbhopp { + font-size: 18pt; + font-family: 'Latin Modern', 'Liberation', 'Times', Serif !important; + font-weight: bold; + text-decoration: none; + margin: 1ch; +} + +.smhi-snabbhoplista { + display: flex; + flex-wrap: wrap; +} + +div:target .smhi-platslank { + font-weight: bold; +} +@media only screen and (min-device-width: 1024px) { + .smhi-snabbhoplista { + display: none !important; + } +} +@media only screen and (max-device-width: 480px) { + + .smhi-snabbhopp { + font-family: 'Times', Serif !important; + padding: 1ch !important; + margin: 1ch !important; + } + + .smhi-platslank { + font-size: 14pt; + padding-bottom: 2ch !important; + } + +} + + + +/** MARGINALER */ + +body { + background-color: #f8f8ee; + font-family: 'Tahoma', sans-serif; + text-rendering: optimizeLegibility; + margin: 0 auto; + max-width: 80ch; + +} +article { + background-color: #f8f8ee; + margin: -2em; + padding: 2em; +} + +header, footer, article { + margin-left: 4em; + margin-right: 4em; + display: block; +} + +a { + color: #274fa5; +} +a.replyButton, a:visited.replyButton { + color: #274fa5; + float: right; + text-decoration: none; +} +.headline { + color: #a5274f; +} + +details { + border-left: 1px solid #ccc; + font-size: 12pt; +} + +/** HEADER */ +header { + padding-bottom: 0.5em; + margin-bottom: 1em; +} + +header a { + text-decoration: none; +} + +a:visited { + color: #14114f; +} + +article { + -webkit-hyphens: auto; + -moz-hyphens: auto; + -ms-hyphens: auto; + -o-hyphens: auto; + line-height: 1.6; +} + +h1, h2, h3 { + font-family: 'Garamond', 'Palatino', serif; + font-weight: normal; + text-align: left; + color: #342a00; + margin-left: -.5em; +} + +.title { + text-align: center; + font-family: 'Garamond', 'Palatino', serif; + color: #342a00; +} +.title h1, .title h2, .title h3 { + font-family: 'Garamond', 'Palatino', serif; + font-weight: normal; + text-align: center; + color: #342a00; + padding-left: 2em; + padding-right: 2em; + border: none !important; + margin-left: inherit; +} + + +h1 { + border-bottom: 3px double #14114f; +} + + +h2, h3 { + border-bottom: 1px solid #14114f; +} + +.noline { + border-bottom: none !important; +} + +dt a, dd a { + text-align: left !important; +} + +section#footnotes { + font-size: 10pt; +} + +/** FOOTER */ + +footer { + padding-top: 0.5em; + margin-top: 3em; + color: #444; + line-height: 2; + text-align: center; +} + +footer section#signatur.special { + display: none; +} + +/** CITAT */ + +q { + font-family: 'Latin Modern', 'Garamond', serif; + color: #444; +} +blockquote { + color: #444; + font-family: 'Latin Modern', 'Garamond', serif; + font-size: 12pt; +} + +blockquote.verse { + white-space: pre; + font-size: 10pt; + line-height: 1.2; + padding: 1em; + margin-left: -1em; + margin-right: -1em; + overflow: auto; +} +cite { + text-align: center; + display: block; +} + +.teknisk { + font-family: 'fixedspace', monospace; +} +.deemph { + color: #886; + font-family: 'fixedspace', monospace; +} diff --git a/marginalia_nu/src/main/resources/static/style.css b/marginalia_nu/src/main/resources/static/style.css new file mode 100644 index 00000000..87d93408 --- /dev/null +++ b/marginalia_nu/src/main/resources/static/style.css @@ -0,0 +1,246 @@ +/** MARGINALER */ + +body { + background-color: #f8f8ee; + font-family: 'Tahoma', sans-serif; + text-rendering: optimizeLegibility; + margin: 0 auto; + max-width: 80ch; + +} + +article { + background-color: #f8f8ee; + margin: -2em; + padding: 2em; +} + +body.utkast { + background: url('/images/utkast-bg.webp') repeat-y left top; +} + +header, footer, article { + margin-left: 4em; + margin-right: 4em; + display: block; +} + +a { + color: #274fa5; +} +a.replyButton, a:visited.replyButton { + color: #274fa5; + float: right; + text-decoration: none; +} +.headline { + color: #a5274f; +} + +details { + border-left: 1px solid #ccc; + font-size: 12pt; +} + +/** HEADER */ +header { + padding-bottom: 0.5em; + margin-bottom: 1em; +} + +header a { + text-decoration: none; +} + +a:visited { + color: #14114f; +} + +/** ARTIKEL */ + +article { + text-align: justify; + -webkit-hyphens: auto; + -moz-hyphens: auto; + -ms-hyphens: auto; + -o-hyphens: auto; + line-height: 1.6; +} + +h1, h2, h3 { + font-family: 'Garamond', 'Palatino', serif; + font-weight: normal; + text-align: left; + color: #342a00; + margin-left: -.5em; +} + +.title { + text-align: center; + font-family: 'Garamond', 'Palatino', serif; + color: #342a00; +} +.title h1, .title h2, .title h3 { + font-family: 'Garamond', 'Palatino', serif; + font-weight: normal; + text-align: center; + color: #342a00; + padding-left: 2em; + padding-right: 2em; + border: none !important; + margin-left: inherit; +} + + +h1 { + border-bottom: 3px double #14114f; +} + + +h2, h3 { + border-bottom: 1px solid #14114f; +} + +.noline { + border-bottom: none !important; +} + +dt a, dd a { + text-align: left !important; +} + +section#footnotes { + font-size: 10pt; +} + +/** FOOTER */ + +footer { + padding-top: 0.5em; + margin-top: 3em; + color: #444; + line-height: 2; + text-align: center; +} + +footer section#signatur.special { + display: none; +} + +/* monogram */ +footer img { + text-align: center; + display: block; + margin-left: auto; + margin-right: auto; + margin-top: 4em; + width: 50%; + height: 50%; + opacity: 0.5; +} + + +/** CITAT */ + +q { + font-family: 'Latin Modern', 'Garamond', serif; + color: #444; +} +blockquote { + color: #444; + font-family: 'Latin Modern', 'Garamond', serif; + font-size: 12pt; +} + +blockquote.verse { + white-space: pre; + font-size: 10pt; + line-height: 1.2; + padding: 1em; + margin-left: -1em; + margin-right: -1em; + overflow: auto; +} +cite { + text-align: center; + display: block; +} + +.teknisk { + font-family: 'fixedspace', monospace; +} +.deemph { + color: #886; + font-family: 'fixedspace', monospace; +} + +/** Anpassningar för tryck, osv. */ + +@media only print { + .onlyprint { + display: block; + } + .onlyscreen { + display: none; + } + body { + font-family: 'Liberation', 'Times', Serif !important; + } + header { + display: none; + } + a { + color: #000 !important; + text-decoration: none; + } + figure, blockquote, p, section#footnotes { + page-break-inside: avoid; + } + abbr { + text-decoration: none; + } +} +@media only screen { + .onlyprint { + display: none; + } + .onlyscreen { + display: block; + } +} +@media only screen and (max-device-width: 480px) { + nav a { + display: block; + margin-bottom: 1.5em; + margin-top: 1.5em; + } + + header, footer, article { + margin-left: 2em; + margin-right: 2em; + display: block; + } + + body.essä article p::before { + display: none; + } +} + +@media only screen and (max-device-width: 480px) { + blockquote { + padding: 0em; + border: none; + } +} + +@media only screen and (min-device-width: 640px) { + header, footer, article { + margin-left: 8em; + margin-right: 8em; + } + body,blockquote,blockquote.verse { + font-size: 14pt; + } +} + + diff --git a/marginalia_nu/src/main/resources/templates/auth/login.hdb b/marginalia_nu/src/main/resources/templates/auth/login.hdb new file mode 100644 index 00000000..dc78489c --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/auth/login.hdb @@ -0,0 +1,36 @@ + + + + {{service}} - log in + + + + + + +
    + +
    +
    +
    +

    {{service}}: You must log in

    +

    + This is not a public-access system. If you do not already have + the password, it is not yours to know. +

    +

    I am never the less legally obliged to inform you that if you + were to log in, this would place a cookie on your computer. This + cookie would then be used to keep track of your status of having + logged in, and nothing else. +

    +
    + + + +
    +
    +
    + + diff --git a/marginalia_nu/src/main/resources/templates/dating/dating-view.hdb b/marginalia_nu/src/main/resources/templates/dating/dating-view.hdb new file mode 100644 index 00000000..b6a5a997 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/dating/dating-view.hdb @@ -0,0 +1,150 @@ + + + + + Website Explorer - {{url}} + + + + + +
    +

    {{url}}

    + + + Screenshot of {{url}} + + + {{#if back}}⬅️{{/if}} + ➡️ + 🔀 + 🤩 +
    + + diff --git a/marginalia_nu/src/main/resources/templates/edge/browse-result.hdb b/marginalia_nu/src/main/resources/templates/edge/browse-result.hdb new file mode 100644 index 00000000..d2be3661 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/edge/browse-result.hdb @@ -0,0 +1,12 @@ +
    +

    {{url.domain}}

    + + + + + + +
    \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/edge/browse-results.hdb b/marginalia_nu/src/main/resources/templates/edge/browse-results.hdb new file mode 100644 index 00000000..a6a8b0f8 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/edge/browse-results.hdb @@ -0,0 +1,49 @@ + + + + + Marginalia Search - {{query}} + + + + + + + + +{{>edge/parts/search-header}} + +
    + {{>edge/parts/search-form}} + +
    + +{{#if focusDomain}} +
    +

    Similar Domains

    + +

    + Showing domains similar to {{focusDomain}}. +

    +
    +{{/if}} + +{{#each results}}{{>edge/browse-result}}{{/each}} + +{{#unless focusDomain}} +
    +

    Random Domains

    + +

    + This list of domains is random. Refresh to get + new domains, or click Similar Domains to + take the helm. +

    +
    +{{/unless}} + +
    +
    + +{{>edge/parts/search-footer}} + diff --git a/marginalia_nu/src/main/resources/templates/edge/conversion-results-gmi.hdb b/marginalia_nu/src/main/resources/templates/edge/conversion-results-gmi.hdb new file mode 100644 index 00000000..89c01d5e --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/edge/conversion-results-gmi.hdb @@ -0,0 +1,12 @@ +# Search Engine + +=> /search Search +=> /search-about.gmi About + +{{query}} = {{result}} + +## Warning + +These results use floating point calculations, and may not be accurate +for very large or very small numbers. Do not use for orbital calculations, +thesis projects, or other sensitive work. \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/edge/conversion-results.hdb b/marginalia_nu/src/main/resources/templates/edge/conversion-results.hdb new file mode 100644 index 00000000..50e5840b --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/edge/conversion-results.hdb @@ -0,0 +1,37 @@ + + + + + Marginalia Search - {{query}} + + + + + + + + +{{>edge/parts/search-header}} + +
    + {{>edge/parts/search-form}} + +
    +
    +

    {{query}}

    +

    {{result}}

    +
    +
    +

    Warning

    +

    + These results use floating point calculations, and may not be accurate + for very large or very small numbers. Do not use for orbital calculations, + thesis projects, or other sensitive work. +

    +
    +
    + +
    + +{{>edge/parts/search-footer}} + \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/edge/dictionary-results-gmi.hdb b/marginalia_nu/src/main/resources/templates/edge/dictionary-results-gmi.hdb new file mode 100644 index 00000000..7de8f20a --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/edge/dictionary-results-gmi.hdb @@ -0,0 +1,17 @@ +# Search Engine + +=> /search Search +=> /search-about.gmi About + +## Results for "{{{query}}}" + +{{#each entries}} +({{type}}) - {{definition}} +{{/each}} + +## Legal + +These definitions are from wiktionary, available under GFDL and CC BY-SA 3.0, except for fair use exceptions. + +=> https://en.wiktionary.org/ +=> https://dumps.wikimedia.org/legal.html \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/edge/dictionary-results.hdb b/marginalia_nu/src/main/resources/templates/edge/dictionary-results.hdb new file mode 100644 index 00000000..b7888418 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/edge/dictionary-results.hdb @@ -0,0 +1,48 @@ + + + + + Marginalia Search - {{query}} + + + + + + + + +{{>edge/parts/search-header}} + +
    + {{>edge/parts/search-form}} + +
    + {{#unless entries}} +
    +

    No Results

    +
    No definitions were found for that word
    +
    + {{/unless}} + + {{#each entries}} +
    +

    {{type}} - {{word}}

    +
    {{definition}}
    +
    + {{/each}} + + {{#if entries}} +
    +

    Legal

    +

    + This data is derived from wiktionary, + available under GFDL and CC BY-SA 3.0. More Information. +

    +
    + {{/if}} +
    + +
    + +{{>edge/parts/search-footer}} + diff --git a/marginalia_nu/src/main/resources/templates/edge/error-page.hdb b/marginalia_nu/src/main/resources/templates/edge/error-page.hdb new file mode 100644 index 00000000..f56edf19 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/edge/error-page.hdb @@ -0,0 +1,20 @@ + + + Error + + + + +
    +

    Error

    +

    Oops! It appears the index server is {{indexState}}.

    +

    The server was probably restarted to bring online some changes. Restarting the index typically takes + a few minutes, during which searches can't be served.

    + +

    In the event of a longer outage, the @marginalianu feed + on Twitter may have details, otherwise you can always send me an email at kontakt@marginalia.nu.

    + +

    This page will attempt to refresh automatically every few seconds.

    +
    + + \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/edge/parts/search-footer.hdb b/marginalia_nu/src/main/resources/templates/edge/parts/search-footer.hdb new file mode 100644 index 00000000..3e1a8637 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/edge/parts/search-footer.hdb @@ -0,0 +1,9 @@ +
    + This website complies with the GDPR by not collecting any personal + information, and with the EU Cookie Directive by not using + cookies. More Information. +

    + Reach me at kontakt@marginalia.nu. +

    + + \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/edge/parts/search-form.hdb b/marginalia_nu/src/main/resources/templates/edge/parts/search-form.hdb new file mode 100644 index 00000000..0f0a7a9f --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/edge/parts/search-form.hdb @@ -0,0 +1,26 @@ +
    + +
    \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/edge/parts/search-header.hdb b/marginalia_nu/src/main/resources/templates/edge/parts/search-header.hdb new file mode 100644 index 00000000..7bbbe580 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/edge/parts/search-header.hdb @@ -0,0 +1,8 @@ + +
    + +
    diff --git a/marginalia_nu/src/main/resources/templates/edge/search-result-gmi.hdb b/marginalia_nu/src/main/resources/templates/edge/search-result-gmi.hdb new file mode 100644 index 00000000..fe20c054 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/edge/search-result-gmi.hdb @@ -0,0 +1,4 @@ + +### {{{title}}} +=> {{geminiLink}} + {{{description}}} diff --git a/marginalia_nu/src/main/resources/templates/edge/search-result-metadata.hdb b/marginalia_nu/src/main/resources/templates/edge/search-result-metadata.hdb new file mode 100644 index 00000000..9127d2e6 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/edge/search-result-metadata.hdb @@ -0,0 +1,9 @@ +{{#if scripts}}🏭️{{/if}} +{{#if tracking}}🕵️️{{/if}} +{{#if media}}🎞️{{/if}} +{{#if affiliate}}💳️{{/if}} +{{#if cookies}}👁️️{{/if}} +{{format}} +{{#unless focusDomain}} +{{{rankingSymbol}}} +{{/unless}} \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/edge/search-result.hdb b/marginalia_nu/src/main/resources/templates/edge/search-result.hdb new file mode 100644 index 00000000..9a2b163a --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/edge/search-result.hdb @@ -0,0 +1,14 @@ + +
    + +

    {{title}}

    +

    {{description}}

    + +
    + Info + {{#unless focusDomain}}Search{{/unless}} + +
    {{>edge/search-result-metadata}}
    +
    +
    +
    diff --git a/marginalia_nu/src/main/resources/templates/edge/search-results-gmi.hdb b/marginalia_nu/src/main/resources/templates/edge/search-results-gmi.hdb new file mode 100644 index 00000000..32319c90 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/edge/search-results-gmi.hdb @@ -0,0 +1,19 @@ +# Search Engine + +=> /search Search +=> /search-about.gmi About + +{{#each problems}} +* {{{.}}}{{/each}} + +## Results for "{{{query}}}" + +{{#each results}} + +{{>edge/search-result-gmi}} + +{{/each}} + +-- + +=> / To index \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/edge/search-results.hdb b/marginalia_nu/src/main/resources/templates/edge/search-results.hdb new file mode 100644 index 00000000..82d7f707 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/edge/search-results.hdb @@ -0,0 +1,47 @@ + + + + + Marginalia Search - {{query}} + + + + + + + + + + + +{{>edge/parts/search-header}} + +
    +{{>edge/parts/search-form}} +
    +
    + {{#if maintenanceMessage}}

    Maintenance

    {{maintenanceMessage}}

    {{/if}} + {{#if evalResult}}

    Evaluation

    {{query}} = {{evalResult}}


    {{/if}} + {{#each wiki.entries}}

    Encyclopedia

    {{.}} Encyclopedia Page


    {{/each}} + + {{#if focusDomain}} +
    +

    {{focusDomain}}

    +

    + Showing results from {{focusDomain}} +

    + +
    + {{/if}} + + {{#each results}}{{>edge/search-result}}{{/each}} + + {{#unless evalResult}}{{#if problems}}

    Suggestions

      {{#each problems}}
    • {{{.}}}
    • {{/each}}
    {{/if}}{{/unless}} +
    +
    + +{{>edge/parts/search-footer}} + diff --git a/marginalia_nu/src/main/resources/templates/edge/site-info-gmi.hdb b/marginalia_nu/src/main/resources/templates/edge/site-info-gmi.hdb new file mode 100644 index 00000000..5696b251 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/edge/site-info-gmi.hdb @@ -0,0 +1,14 @@ +# Search Engine + +=> /search Search +=> /search-about.gmi About + +## Results for "{{{query}}}" + +Blacklisted: {{blacklisted}} +Pages Known: {{pagesKnown}} +Pages Indexed: {{pagesKnown}} +Inbound Links: {{inboundLinks}} +Outbound Links: {{outboundLinks}} +Nominal Quality: {{nominalQuality}}% +Crawl Ranking: {{ranking}}% \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/edge/site-info.hdb b/marginalia_nu/src/main/resources/templates/edge/site-info.hdb new file mode 100644 index 00000000..19b585b8 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/edge/site-info.hdb @@ -0,0 +1,58 @@ + + + + + Marginalia Search - {{query}} + + + + + + + + +{{>edge/parts/search-header}} + +
    + {{>edge/parts/search-form}} + +
    +
    +

    {{domain}}

    + Thumbnail image of {{domain}} +
    + +
    + +

    Indexing Information

    +

    + Blacklisted: {{blacklisted}}
    + Pages Known: {{pagesKnown}}
    + Pages Crawled: {{pagesFetched}}
    + Pages Indexed: {{pagesIndexed}}
    + Crawl State: {{state}}
    +

    +
    + +
    +

    Links

    +

    + Nominal Quality: {{nominalQuality}}%
    + Crawl Ranking: {{ranking}}%
    + Incoming Links: {{incomingLinks}}
    + Outbound Links: {{outboundLinks}}
    +

    + +
    + + {{#each results}}{{>edge/search-result}}{{/each}} +
    +
    + +{{>edge/parts/search-footer}} + + + diff --git a/marginalia_nu/src/main/resources/templates/encyclopedia/wiki-error.hdb b/marginalia_nu/src/main/resources/templates/encyclopedia/wiki-error.hdb new file mode 100644 index 00000000..37376677 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/encyclopedia/wiki-error.hdb @@ -0,0 +1,28 @@ + + + + + Error + + + + + +
    + +
    +
    +

    An error has occurred!

    +

    + Either the page you attempted to access does not exist, + or the automatic cleaning has process failed. +

    +

    + Please use this link as a back-up:
    + {{.}} +

    +
    + \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/encyclopedia/wiki-search.hdb b/marginalia_nu/src/main/resources/templates/encyclopedia/wiki-search.hdb new file mode 100644 index 00000000..bc5d5a12 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/encyclopedia/wiki-search.hdb @@ -0,0 +1,35 @@ + + + + + Encyclopedia Search: {{query}} + + + + + +
    + +
    +
    +

    Search the Encyclopedia

    + +

    Search results

    + {{#if error}} +
    Failed to find exact article match
    + {{/if}} +
    + {{#each results}} +
    {{name}}
    + {{#if refName}}
    {{refName}}
    {{/if}} + {{/each}} +
    +
    + \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/memex/memex-create-form.hdb b/marginalia_nu/src/main/resources/templates/memex/memex-create-form.hdb new file mode 100644 index 00000000..6fbd6cec --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/memex/memex-create-form.hdb @@ -0,0 +1,23 @@ + + +{{>memex/partial/memex-head}} + +{{>memex/partial/memex-topbar}} +
    +
    +

    New : {{url}}

    +
    +
    + +
    + + +
    +
    +

    Existing posts

    +
    +
      {{#each docs}}
    • {{url}}
    • {{/each}}
    +
    +
    +{{> memex/partial/memex-footer}} + diff --git a/marginalia_nu/src/main/resources/templates/memex/memex-delete-form.hdb b/marginalia_nu/src/main/resources/templates/memex/memex-delete-form.hdb new file mode 100644 index 00000000..ab41abcf --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/memex/memex-delete-form.hdb @@ -0,0 +1,23 @@ + + +{{>memex/partial/memex-head}} + +{{>memex/partial/memex-topbar}} +
    +

    Delete {{url}}

    +
    +
    + + +
    +
    + +
    +
    +{{#if doc}}{{{doc}}}{{/if}} +{{#if image}}{{/if}} +
    +{{>memex/partial/memex-backlinks}} +
    +{{> memex/partial/memex-footer}} + diff --git a/marginalia_nu/src/main/resources/templates/memex/memex-image.hdb b/marginalia_nu/src/main/resources/templates/memex/memex-image.hdb new file mode 100644 index 00000000..795d4f9c --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/memex/memex-image.hdb @@ -0,0 +1,24 @@ + + +{{>memex/partial/memex-head}} + + +{{>memex/partial/memex-topbar url=path}} + +
    +
    +

    {{path}}

    + +
    + +
    +{{> memex/partial/memex-footer}} + diff --git a/marginalia_nu/src/main/resources/templates/memex/memex-index-feed.hdb b/marginalia_nu/src/main/resources/templates/memex/memex-index-feed.hdb new file mode 100644 index 00000000..352897bb --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/memex/memex-index-feed.hdb @@ -0,0 +1,20 @@ + + + marginalia.nu{{url}} + {{title}} + + marginalia kontakt@marginalia.nu + {{domain}}/ + {{now}} +{{#each docs}} +{{#amgarp DRAFT}} + + {{title}} + + {{domain}}{{url}} + {{date}}T00:00:00Z + +{{/amgarp}} +{{/each}} + + \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/memex/memex-index.hdb b/marginalia_nu/src/main/resources/templates/memex/memex-index.hdb new file mode 100644 index 00000000..59243c33 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/memex/memex-index.hdb @@ -0,0 +1,34 @@ + + +{{>memex/partial/memex-head}} + + +{{>memex/partial/memex-topbar}} + +
    + +
    +{{#if indexData}}{{{indexData}}}{{/if}} +{{#unless indexData}}

    {{url}}

    {{/unless}} + +{{>memex/partial/memex-task-listing}} +{{>memex/partial/memex-documents-inline}} +
    + + +
    +{{> memex/partial/memex-footer}} + diff --git a/marginalia_nu/src/main/resources/templates/memex/memex-rename-form.hdb b/marginalia_nu/src/main/resources/templates/memex/memex-rename-form.hdb new file mode 100644 index 00000000..db969027 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/memex/memex-rename-form.hdb @@ -0,0 +1,23 @@ + + +{{>memex/partial/memex-head}} + +{{>memex/partial/memex-topbar}} +
    +

    Rename {{url}}

    +
    +
    + + +
    +
    + +
    +
    +{{#if doc}}{{{doc}}}{{/if}} +{{#if image}}{{/if}} +
    +{{>memex/partial/memex-backlinks}} +
    +{{> memex/partial/memex-footer}} + diff --git a/marginalia_nu/src/main/resources/templates/memex/memex-tombstone.hdb b/marginalia_nu/src/main/resources/templates/memex/memex-tombstone.hdb new file mode 100644 index 00000000..905b1bec --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/memex/memex-tombstone.hdb @@ -0,0 +1,17 @@ + + +{{>memex/partial/memex-head}} + +{{>memex/partial/memex-topbar}} +
    +
    +

    {{url}} is gone

    +

    +{{#if message}}{{{message}}}{{/if}} +{{#if redirect}}See {{redirect}}{{/if}} +

    +
    +{{>memex/partial/memex-backlinks}} +
    +{{> memex/partial/memex-footer}} + diff --git a/marginalia_nu/src/main/resources/templates/memex/memex-update-form.hdb b/marginalia_nu/src/main/resources/templates/memex/memex-update-form.hdb new file mode 100644 index 00000000..6b7a55e5 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/memex/memex-update-form.hdb @@ -0,0 +1,20 @@ + + +{{>memex/partial/memex-head}} + +{{>memex/partial/memex-topbar}} +
    +
    +
    + +
    + + +
    +
    + +
    +{{> memex/partial/memex-footer}} + diff --git a/marginalia_nu/src/main/resources/templates/memex/memex-upload-form.hdb b/marginalia_nu/src/main/resources/templates/memex/memex-upload-form.hdb new file mode 100644 index 00000000..e503a2db --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/memex/memex-upload-form.hdb @@ -0,0 +1,26 @@ + + +{{>memex/partial/memex-head}} + +{{>memex/partial/memex-topbar}} +
    +
    +

    Upload : {{url}}

    +
    +
    + +
    +
    +
    +
    + +
    +
    +

    Existing posts

    +
    +
      {{#each docs}}
    • {{url}}
    • {{/each}} +
        {{#each images}}
      • {{url}}
      • {{/each}} +
    +
    +{{> memex/partial/memex-footer}} + diff --git a/marginalia_nu/src/main/resources/templates/memex/memex-view.hdb b/marginalia_nu/src/main/resources/templates/memex/memex-view.hdb new file mode 100644 index 00000000..47eb1480 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/memex/memex-view.hdb @@ -0,0 +1,36 @@ + + +{{>memex/partial/memex-head}} + + +{{>memex/partial/memex-topbar url=baseDoc.url}} + +
    +
    +{{{doc}}} + +{{#pragma this "TOPIC"}} +{{>memex/partial/memex-backlinks-inline}} +{{/pragma}} +
    + +
    +{{> memex/partial/memex-footer}} + diff --git a/marginalia_nu/src/main/resources/templates/memex/partial/memex-backlinks-inline.hdb b/marginalia_nu/src/main/resources/templates/memex/partial/memex-backlinks-inline.hdb new file mode 100644 index 00000000..de40e40d --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/memex/partial/memex-backlinks-inline.hdb @@ -0,0 +1,6 @@ +{{#if backlinks}} +
    {{#each backlinks}} +
    {{url}}
    +
    {{description}}
    +{{/each}}
    +{{/if}} \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/memex/partial/memex-backlinks.hdb b/marginalia_nu/src/main/resources/templates/memex/partial/memex-backlinks.hdb new file mode 100644 index 00000000..a15d7710 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/memex/partial/memex-backlinks.hdb @@ -0,0 +1,9 @@ +{{#if backlinks}} + +{{/if}} \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/memex/partial/memex-directories.hdb b/marginalia_nu/src/main/resources/templates/memex/partial/memex-directories.hdb new file mode 100644 index 00000000..164707c5 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/memex/partial/memex-directories.hdb @@ -0,0 +1,9 @@ +
    +

    Directories

    +{{#if parent}} + ..
    +{{/if}} +{{#each directories}} + {{filename}}
    +{{/each}} +
    \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/memex/partial/memex-documents-inline.hdb b/marginalia_nu/src/main/resources/templates/memex/partial/memex-documents-inline.hdb new file mode 100644 index 00000000..59598460 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/memex/partial/memex-documents-inline.hdb @@ -0,0 +1,10 @@ +{{#pragma this "LISTING"}} +{{#if docs}}

    Documents


    {{/if}} +
    +{{#each docs}} +{{#unless index}} + {{url.filename}}
    +{{/unless}} +{{/each}} +
    +{{/pragma}} \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/memex/partial/memex-documents.hdb b/marginalia_nu/src/main/resources/templates/memex/partial/memex-documents.hdb new file mode 100644 index 00000000..242004ac --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/memex/partial/memex-documents.hdb @@ -0,0 +1,10 @@ +{{#amgarp this "LISTING"}} +
    +{{#if docs}}

    Documents

    {{/if}} +{{#each docs}} +{{#unless index}} + {{url.filename}}
    +{{/unless}} +{{/each}} +
    +{{/amgarp}} \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/memex/partial/memex-footer.hdb b/marginalia_nu/src/main/resources/templates/memex/partial/memex-footer.hdb new file mode 100644 index 00000000..32edef62 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/memex/partial/memex-footer.hdb @@ -0,0 +1,4 @@ + \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/memex/partial/memex-head.hdb b/marginalia_nu/src/main/resources/templates/memex/partial/memex-head.hdb new file mode 100644 index 00000000..af30c6f5 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/memex/partial/memex-head.hdb @@ -0,0 +1,9 @@ + + + MEMEX - {{title}} + + + {{#pragma this "FEED"}} + + {{/pragma}} + \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/memex/partial/memex-images.hdb b/marginalia_nu/src/main/resources/templates/memex/partial/memex-images.hdb new file mode 100644 index 00000000..6498d04e --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/memex/partial/memex-images.hdb @@ -0,0 +1,6 @@ +
    +{{#if images}}

    Images

    {{/if}} +{{#each images}} + {{path.filename}}
    +{{/each}} +
    \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/memex/partial/memex-task-listing.hdb b/marginalia_nu/src/main/resources/templates/memex/partial/memex-task-listing.hdb new file mode 100644 index 00000000..6f9b8060 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/memex/partial/memex-task-listing.hdb @@ -0,0 +1,10 @@ +{{#if tasks}} +
    +

    Open Tasks

    +
    +{{#each tasks}} +
    {{task}}
    +{{/each}} +
    +
    +{{/if}} diff --git a/marginalia_nu/src/main/resources/templates/memex/partial/memex-topbar.hdb b/marginalia_nu/src/main/resources/templates/memex/partial/memex-topbar.hdb new file mode 100644 index 00000000..940ec5bb --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/memex/partial/memex-topbar.hdb @@ -0,0 +1,13 @@ +
    + +
    + \ No newline at end of file diff --git a/marginalia_nu/src/main/resources/templates/podcast/episode.hdb b/marginalia_nu/src/main/resources/templates/podcast/episode.hdb new file mode 100644 index 00000000..dc1e4306 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/podcast/episode.hdb @@ -0,0 +1,35 @@ + + + + {{podcastName}}: {{title}} + + + + + + +
    + +
    + + + + diff --git a/marginalia_nu/src/main/resources/templates/podcast/listing.hdb b/marginalia_nu/src/main/resources/templates/podcast/listing.hdb new file mode 100644 index 00000000..0c076762 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/podcast/listing.hdb @@ -0,0 +1,33 @@ + + + + Podcasts: Nya avsnitt + + + + + + +
    + +
    +
    +

    Podcasts

    +
    + {{#each podcasts}} +
    + {{title}} +
    +
    +

    {{{description}}}

    +
    + {{/each}} +
    +
    + + + diff --git a/marginalia_nu/src/main/resources/templates/podcast/new.hdb b/marginalia_nu/src/main/resources/templates/podcast/new.hdb new file mode 100644 index 00000000..62c41782 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/podcast/new.hdb @@ -0,0 +1,34 @@ + + + + Podcasts: Nya avsnitt + + + + + + +
    + +
    +
    +

    Nya avsnitt

    +
    + {{#each episodes}} +
    + {{title}} +
    +
    +

    {{podcastName}}
    {{dateUploaded}}

    + +
    + {{/each}} +
    +
    + + + diff --git a/marginalia_nu/src/main/resources/templates/podcast/podcast.hdb b/marginalia_nu/src/main/resources/templates/podcast/podcast.hdb new file mode 100644 index 00000000..8af807ba --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/podcast/podcast.hdb @@ -0,0 +1,38 @@ + + + + Podcasts: {{title}} + + + + + + +
    + +
    +
    +

    {{metadata.title}}

    +

    + {{{metadata.description}}} +

    +

    {{metadata.extLink}}

    +

    Avsnitt

    +
    + {{#each episodes}} +
    + {{title}} +
    +
    +

    {{dateUploaded}}

    +
    + {{/each}} +
    +
    + + + diff --git a/marginalia_nu/src/main/resources/templates/smhi/index.hdb b/marginalia_nu/src/main/resources/templates/smhi/index.hdb new file mode 100644 index 00000000..556d8e7e --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/smhi/index.hdb @@ -0,0 +1,44 @@ + + + + {{title}} + + + + + + + + +
    + +
    + + + + diff --git a/marginalia_nu/src/main/resources/templates/smhi/prognos.hdb b/marginalia_nu/src/main/resources/templates/smhi/prognos.hdb new file mode 100644 index 00000000..274a6fce --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/smhi/prognos.hdb @@ -0,0 +1,73 @@ + + + + Väderprognos för {{plats.namn}} + + + + + + + + + + +
    + +
    +
    +

    {{plats.namn}}

    + + {{#each dygn}} + + + + + + + + + + + {{#each data}} + + + + + + + + {{/each}} + + {{/each}} +
    {{date}}{{{veckodag}}}
    TidTempVindNeder.Moln
    {{time}}{{temp}}{{vind}} ({{byvind}}){{nederbord}} {{nederbordTyp}}{{moln}}
    + + +

    Förklaring

    +

    Molntäcke (Moln.) visas på en skala 0-8, där höga värden indikerar + tjockt molntäcke, och låga värden indikerar blåare skyar.

    + +

    Nederbörd (Neder.) indikeras med förkortningar: + + + + + + + +
    SSnö
    SBSnöblandat regn
    RRegn
    DDimma
    UKRUnderkylt regn
    UKDUnderkyld dimma
    +

    + +

    Källa SMHI

    +

    + All prognosdata hämtas från SMHI:s öppna API:er, under licensen + Creative Commons Erkännande 2.5. + Bäst före {{bastFore}}. +

    +
    + + + diff --git a/marginalia_nu/src/main/resources/templates/status/server-status.hdb b/marginalia_nu/src/main/resources/templates/status/server-status.hdb new file mode 100644 index 00000000..e8217101 --- /dev/null +++ b/marginalia_nu/src/main/resources/templates/status/server-status.hdb @@ -0,0 +1,25 @@ + + + + Server Status + + + + + +
    + +
    +
    +

    Server Status

    + + {{#each status}} +

    + {{server}} - {{status}} +

    + {{/each}} + +
    + + + diff --git a/marginalia_nu/src/main/resources/units.csv b/marginalia_nu/src/main/resources/units.csv new file mode 100644 index 00000000..9dd990c6 --- /dev/null +++ b/marginalia_nu/src/main/resources/units.csv @@ -0,0 +1,62 @@ +30856775800000000,DISTANCE,pc,parsec,parsecs +9460500000000000,DISTANCE,ly,light years,light year +149597870700,DISTANCE,au,astronomical unit +1000,DISTANCE,km,kilometers,kilometer +1,DISTANCE,m,meters,meter +0.1,DISTANCE,dm,decimeters,decimeter +0.01,DISTANCE,cm,centimeters,centimeter +0.001,DISTANCE,mm,millimeters,millimeter +0.9144,DISTANCE,yd,yards,yard +0.0254,DISTANCE,in,inches,inch +0.3048,DISTANCE,ft,feet,foot +0.3048,DISTANCE,ft,feet,foot +1609.344,DISTANCE,miles,mile +1852,DISTANCE,nautical miles,nautical mile +201.168,DISTANCE,furlong,furlongs +1,WEIGHT,kg,kilograms,kilogram +0.001,WEIGHT,g,grams,gram +1000,WEIGHT,metric tons,ton,tons,tonne,tonnes +907.185,WEIGHT,short tons,short ton,imperial ton,imperial tons, +0.45359237,WEIGHT,lb,lbs,pounds +0.0283495231,WEIGHT,oz,ounces,ounce +1,AREA,m^2,square meters,square meter +0.01,AREA,dm^2,square decimeters,square decimeter +0.0001,AREA,cm^2,square centimeters,square centimeter +0.000001,AREA,mm^2,square millimeters,square millimeter +1000000,AREA,km^2,square kilometers,square kilometer +4046.9,AREA,ac,acre,acres +2589988.1103360,AREA,sq mi,mi^2,square miles,square mile +258.99881103360,AREA,hectares,hectare +0.09290304,AREA,ft^2,square foot,square feet +0.83612736,AREA,yd^2,square yard,square yards +0.00064516,AREA,in^2,square inch,square inches +1,VOLUME,m^3,cubic meter,cubic meters +1000000000,VOLUME,km^3,cubic kilometer,cubic kilometers +0.001,VOLUME,L,l,dm^3,liter,liters,cubic decimeter,cubic decimeter +0.0001,VOLUME,dl,deciliter,deciliters +0.0001,VOLUME,cl,centiliter,centiliters +0.00001,VOLUME,ml,milliliter,milliliters +0.000001,VOLUME,cm^3,cc,cubic centimeter,cubic centimeters +0.000000001,VOLUME,mm^3,cubic millimeter,cubic millimeters +0.000236588237,VOLUME,us cup,cup,cups +0.0000295735296,VOLUME,fl.oz.,fl oz,fluid ounces,fluid ounce +0.028316846592,VOLUME,ft^3,cubic foot,cubic feet +0.000016387064,VOLUME,in^3,cubic inch,cubic inches +0.764554857984,VOLUME,yd^3,cubic yard,cubic yards +0.000473176473,VOLUME,US pint,pint,pints +0.00378541178,VOLUME,gallon,gallons +1,TEMPERATURE,C,c,celsius,centigrade +0,TEMPERATURE,F,f,fahrenheit,fahrenheit +0,TEMPERATURE,K,k,kelvin,kelvins +1,TIME,S,s,second,seconds +0.001,TIME,ms,millisecond +60,TIME,min,minutes +3600,TIME,hour,hours +864000,TIME,day,days +604800,TIME,week,weeks +31557600.0,TIME,year,years +31557600.0,TIME,year,years +315576000.0,TIME,decade,decades +3155760000.0,TIME,century,centuries +1,ANGLE,degree,degrees +57.2957795,ANGLE,radians,radian diff --git a/marginalia_nu/src/test/java/EmptyTest.java b/marginalia_nu/src/test/java/EmptyTest.java new file mode 100644 index 00000000..e789f2cf --- /dev/null +++ b/marginalia_nu/src/test/java/EmptyTest.java @@ -0,0 +1,8 @@ +import org.junit.jupiter.api.Test; + +public class EmptyTest { + @Test + public void test() { + + } +} diff --git a/marginalia_nu/src/test/java/nu/marginalia/gemini/gmi/GemtextDatabaseTest.java b/marginalia_nu/src/test/java/nu/marginalia/gemini/gmi/GemtextDatabaseTest.java new file mode 100644 index 00000000..8a020257 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/gemini/gmi/GemtextDatabaseTest.java @@ -0,0 +1,30 @@ +package nu.marginalia.gemini.gmi; + +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.Optional; + +class GemtextDatabaseTest { + + @Test + public void test() { + var db = new GemtextDatabase(new MemexNodeUrl("/test.gmi"), new String[] { + "=> / foo", + "=> /x bar", + "=> /y baz", + "=> /z" + }); + verifyResult("foo", db.getLinkData(new MemexNodeUrl("/"))); + verifyResult("bar", db.getLinkData(new MemexNodeUrl("/x"))); + verifyResult("baz", db.getLinkData(new MemexNodeUrl("/y"))); + verifyResult("", db.getLinkData(new MemexNodeUrl("/z"))); + Assertions.assertFalse(db.getLinkData(new MemexNodeUrl("/w")).isPresent()); + } + + void verifyResult(String expected, Optional actual) { + Assertions.assertTrue(actual.isPresent(), () -> "No value found, expected " + expected); + Assertions.assertEquals(expected, actual.get()); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/gemini/gmi/GemtextDocumentTest.java b/marginalia_nu/src/test/java/nu/marginalia/gemini/gmi/GemtextDocumentTest.java new file mode 100644 index 00000000..5dd8f252 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/gemini/gmi/GemtextDocumentTest.java @@ -0,0 +1,88 @@ +package nu.marginalia.gemini.gmi; + +import nu.marginalia.gemini.gmi.line.GemtextLink; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import nu.marginalia.wmsa.memex.model.MemexUrl; +import org.junit.jupiter.api.Test; + +import java.util.Arrays; + +import static org.junit.jupiter.api.Assertions.*; + +class GemtextDocumentTest { + + @Test + void testEmpty() { + var document = GemtextDocument.of(new MemexNodeUrl("/test.gmi"), ""); + assertEquals("/test.gmi", document.getTitle()); + assertTrue(document.getLinks().isEmpty()); + } + + @Test + void testParseTombstone() { + var lines = new String[] { + "# Tombstone", +"", + "This special file contains information about removed resources.", +"", +"# Removed links", +"=> /dead.gmi It was never here, I swear", + "=> /dead2.png Removed through an act of God", + "=> /worklog.gmi Old and unused file.", + "=> /todo.gmi Empty file", + "=> /search-about.gmi Confusingly gemini-specific", + "=> /05-test.gmi Cursed testing file"}; + + var document = GemtextDatabase.of(new MemexNodeUrl("/test.gmi"), lines); + Arrays.stream(document.getLines()).forEach(System.out::println); + document.keys().forEach(k -> System.out.println(k + "-" + document.getLinkData(new MemexNodeUrl(k)).orElse(""))); + +} + @Test + void testVanilla() { + var document = GemtextDocument.of(new MemexNodeUrl("/test.gmi"), + "# Test Document", + "=> /foo.gmi\tMy foos", + "=>/bar.gmi\tMy bars", + "=>/baz.gmi", + "=>/foobar.gmi ", + "=>/volvo240.png hey cool car right", + "=>", + "=> ", + " => ", + "## Goodbye", + "... and good luck"); + assertEquals("Test Document", document.getTitle()); + Arrays.stream(document.getLines()).forEach(System.out::println); + assertEquals(5, document.getLinks().size()); + document.getLinks().forEach(System.out::println); + + assertArrayEquals(new String[] { + "/foo.gmi", "/bar.gmi", "/baz.gmi", "/foobar.gmi", "/volvo240.png" + }, + document.getLinks().stream().map(GemtextLink::getUrl).map(MemexUrl::getUrl).toArray()); + + assertArrayEquals(new String[] {"My foos", "My bars", null, null, "hey cool car right"}, + document.getLinks().stream().map(GemtextLink::getTitle).toArray()); + } + + + + @Test + void testTasks() { + var document = GemtextDocument.of(new MemexNodeUrl("/test.gmi"), + "# Test Document", + "- Go shopping", + "-- Milk", + "-- Eggs", + "-- Bacon", + "--- If they have organic, buy two", + "- Go dancing", + "Stuff", + "- Go dancing again"); + assertEquals("Test Document", document.getTitle()); + Arrays.stream(document.getLines()).forEach(System.out::println); + document.getOpenTopTasks().values().forEach(System.out::println); + } + +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/gemini/gmi/parser/GemtextTaskParserTest.java b/marginalia_nu/src/test/java/nu/marginalia/gemini/gmi/parser/GemtextTaskParserTest.java new file mode 100644 index 00000000..62a62082 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/gemini/gmi/parser/GemtextTaskParserTest.java @@ -0,0 +1,18 @@ +package nu.marginalia.gemini.gmi.parser; + +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; +import nu.marginalia.wmsa.memex.model.MemexNodeTaskId; +import org.junit.jupiter.api.Test; + +class GemtextTaskParserTest { + + @Test + void parse() { + System.out.println(GemtextTaskParser.parse("-task", new MemexNodeHeadingId(0), new MemexNodeTaskId(0))); + System.out.println(GemtextTaskParser.parse("- task", new MemexNodeHeadingId(0), new MemexNodeTaskId(0))); + System.out.println(GemtextTaskParser.parse("--task", new MemexNodeHeadingId(0), new MemexNodeTaskId(0))); + System.out.println(GemtextTaskParser.parse("-task(/)", new MemexNodeHeadingId(0), new MemexNodeTaskId(0))); + System.out.println(GemtextTaskParser.parse("-task(-)", new MemexNodeHeadingId(0), new MemexNodeTaskId(0))); + System.out.println(GemtextTaskParser.parse("-task(?)(x)", new MemexNodeHeadingId(0), new MemexNodeTaskId(0))); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/util/SeekDictionaryTest.java b/marginalia_nu/src/test/java/nu/marginalia/util/SeekDictionaryTest.java new file mode 100644 index 00000000..1987da6f --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/util/SeekDictionaryTest.java @@ -0,0 +1,31 @@ +package nu.marginalia.util; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class SeekDictionaryTest { + + @Test + public void testSeek() { + var dict = SeekDictionary.of((int[] x) -> x.length); + + for (int i = 0; i < 10000;) { + int j = (int)(1 + 9 * Math.random()); + int[] block = new int[j]; + for (int k = 0; k < j; k++) { + block[k] = i+k; + } + dict.add(block); + i+=j; + } + + o: for (int i = 0; i < 10000; i++) { + int[] vals = dict.bankForOffset(i); + for (var v : vals) { + if (v == i) continue o; + } + Assertions.fail("Could not find " + i); + } + } + +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/util/TestUtil.java b/marginalia_nu/src/test/java/nu/marginalia/util/TestUtil.java new file mode 100644 index 00000000..48e5202a --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/util/TestUtil.java @@ -0,0 +1,58 @@ +package nu.marginalia.util; + +import com.zaxxer.hikari.HikariConfig; +import com.zaxxer.hikari.HikariDataSource; +import lombok.SneakyThrows; +import org.junit.jupiter.api.Assertions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.sql.DriverManager; + +public class TestUtil { + private static final int TEST_PORT_BASE = 6000; + private static final int TEST_PORT_RANGE = 2000; + + public static int getPort() { + return TEST_PORT_BASE + (int)(TEST_PORT_RANGE * Math.random()); + } + private final static Logger logger = LoggerFactory.getLogger(TestUtil.class); + + @SneakyThrows + public static HikariDataSource getConnection() { + HikariConfig config = new HikariConfig(); + config.setJdbcUrl("jdbc:mysql://localhost:3306/WMSA_test"); + config.setUsername("wmsa"); + config.setPassword("wmsa"); + config.setMaximumPoolSize(16); + config.addDataSourceProperty("cachePrepStmts", "true"); + config.addDataSourceProperty("prepStmtCacheSize", "250"); + config.addDataSourceProperty("prepStmtCacheSqlLimit", "2048"); + + return new HikariDataSource(config); + } + @SneakyThrows + public static void evalScript(HikariDataSource hds, String scriptFile) { + + try (var conn = hds.getConnection()) { + + logger.info("Running script {}", scriptFile); + try (var scriptStream = ClassLoader.getSystemResourceAsStream(scriptFile); + var stmt = conn.createStatement()) { + for (String s : new String(scriptStream.readAllBytes()).split(";")) { + if (!s.isBlank()) { + try { + Assertions.assertTrue(stmt.executeUpdate(s) >= 0); + } catch (Exception ex) { + logger.error("Failed to execute\n{}" + s, ex); + } + + } + } + } + } + } + + +} diff --git a/marginalia_nu/src/test/java/nu/marginalia/util/btree/BTreeWriterTest.java b/marginalia_nu/src/test/java/nu/marginalia/util/btree/BTreeWriterTest.java new file mode 100644 index 00000000..694cf09a --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/util/btree/BTreeWriterTest.java @@ -0,0 +1,329 @@ +package nu.marginalia.util.btree; + +import nu.marginalia.util.btree.model.BTreeContext; +import nu.marginalia.util.btree.model.BTreeHeader; +import nu.marginalia.util.multimap.MultimapFileLong; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashSet; +import java.util.Set; +import java.util.StringJoiner; + +import static org.junit.jupiter.api.Assertions.*; + +class BTreeWriterTest { + + BTreeContext ctx = new BTreeContext(4, 2, 0xFFFF_FFFF_FFFF_FFFFL, 3); + BTreeWriter writer = new BTreeWriter(null, ctx); + + Logger logger = LoggerFactory.getLogger(getClass()); + @Test + void testSmallDataBlock() { + var header = writer.makeHeader(1024, ctx.BLOCK_SIZE_WORDS()/2); + assertEquals(1024 + BTreeHeader.BTreeHeaderSizeLongs, header.dataOffsetLongs()); + assertEquals(header.dataOffsetLongs(), header.indexOffsetLongs()); + } + + @Test + void testLayerCount() { + int wsq = ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS(); + int wcub = ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS(); + + assertEquals(2, writer.makeHeader(1024, wsq-1).layers()); + assertEquals(2, writer.makeHeader(1024, wsq).layers()); + assertEquals(3, writer.makeHeader(1024, wsq+1).layers()); + + assertEquals(3, writer.makeHeader(1024, wcub-1).layers()); + assertEquals(3, writer.makeHeader(1024, wcub).layers()); + assertEquals(4, writer.makeHeader(1024, wcub+1).layers()); + } + + @Test + void testLayerOffset() { + int wcub = ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS(); + System.out.println(writer.makeHeader(1025, wcub).relativeLayerOffset(ctx, 0)); + System.out.println(writer.makeHeader(1025, wcub).relativeLayerOffset(ctx, 1)); + System.out.println(writer.makeHeader(1025, wcub).relativeLayerOffset(ctx, 2)); + + for (int i = 0; i < 1024; i++) { + var header = writer.makeHeader(0, i); + + + printTreeLayout(i, header, ctx); + + if (header.layers() >= 1) { + assertEquals(1, ctx.layerSize(i, header.layers() - 1) / ctx.BLOCK_SIZE_WORDS()); + } + } + } + + private void printTreeLayout(int numEntries, BTreeHeader header, BTreeContext ctx) { + StringJoiner sj = new StringJoiner(","); + for (int l = 0; l < header.layers(); l++) { + sj.add(""+ctx.layerSize(numEntries, l)/ctx.BLOCK_SIZE_WORDS()); + } + System.out.println(numEntries + ":" + sj); + } + + @Test + public void testWriteEntrySize2() throws IOException { + + var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat"); + Set toPut = new HashSet<>(); + + for (int i = 0; i < 500; i++) { + while (!toPut.add((int)(Integer.MAX_VALUE * Math.random()))); + } + + int[] data = toPut.stream().mapToInt(Integer::valueOf).sorted().toArray(); + + try { + RandomAccessFile raf = new RandomAccessFile(tempFile.toFile(), "rw"); + MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000, true); + + { + var writer = new BTreeWriter(mmf, ctx); + writer.write(0, toPut.size(), (offset) -> { + for (int i = 0; i < data.length; i++) { + mmf.put(offset + 2L*i, data[i]); + mmf.put(offset + 2L*i + 1, i); + } + }); + mmf.force(); + } + + { + var reader = new BTreeReader(mmf, ctx); + var header = reader.getHeader(0); + for (int i = 0; i < data.length; i++) { + long offset = reader.offsetForEntry(header, data[i]); + assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset); + assertEquals(i, mmf.get(offset+1)); + } + } + } catch (Exception e) { + e.printStackTrace(); + } finally { + Files.delete(tempFile); + } + } + + @Test + public void testWriteEntrySize2Small() throws IOException { + + var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat"); + Set toPut = new HashSet<>(); + + for (int i = 0; i < 5; i++) { + while (!toPut.add((int)(Integer.MAX_VALUE * Math.random()))); + } + + int[] data = toPut.stream().mapToInt(Integer::valueOf).sorted().toArray(); + + try { + RandomAccessFile raf = new RandomAccessFile(tempFile.toFile(), "rw"); + MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000, true); + + { + var writer = new BTreeWriter(mmf, ctx); + writer.write( 0, toPut.size(), (offset) -> { + for (int i = 0; i < data.length; i++) { + mmf.put(offset + 2L*i, data[i]); + mmf.put(offset + 2L*i + 1, i); + } + }); + mmf.force(); + } + + { + var reader = new BTreeReader(mmf, ctx); + var header = reader.getHeader(0); + for (int i = 0; i < data.length; i++) { + long offset = reader.offsetForEntry(header, data[i]); + assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset); + assertEquals(i, mmf.get(offset+1)); + } + + for (int i = 0; i < 500; i++) { + long val = (long)(Long.MAX_VALUE * Math.random()); + while (toPut.contains(val)) val = (long)(Long.MAX_VALUE * Math.random()); + assertEquals(-1, reader.offsetForEntry(header, val)); + } + } + } catch (Exception e) { + e.printStackTrace(); + } finally { + Files.delete(tempFile); + } + } + + + @Test + public void testWriteEqualityNotMasked() throws IOException { + for (int bs = 2; bs <= 4; bs++) { + var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat"); + Set toPut = new HashSet<>(); + + var ctx = new BTreeContext(5, 1, ~0, bs); + + for (int i = 0; i < 500; i++) { + while (!toPut.add((long) (Long.MAX_VALUE * Math.random()))) ; + } + + long[] data = toPut.stream().mapToLong(Long::valueOf).sorted().toArray(); + + try (MultimapFileLong mmf = MultimapFileLong.forOutput(tempFile, 1000)) { + { + var writer = new BTreeWriter(mmf, ctx); + writer.write(0, toPut.size(), (offset) -> { + for (int i = 0; i < data.length; i++) { + mmf.put(offset + i, data[i]); + } + }); + mmf.force(); + } + + { + var reader = new BTreeReader(mmf, ctx); + var header = reader.getHeader(0); + + printTreeLayout(toPut.size(), header, ctx); + + for (int i = 0; i < data.length; i++) { + long offset = reader.offsetForEntry(header, data[i]); + assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset); + assertEquals(data[i], mmf.get(offset)); + } + + for (int i = 0; i < 500; i++) { + long val = (long) (Long.MAX_VALUE * Math.random()); + while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random()); + assertEquals(-1, reader.offsetForEntry(header, val)); + } + } + } catch (Exception e) { + e.printStackTrace(); + } finally { + Files.delete(tempFile); + } + } + } + + @Test + public void testWriteEqualityMasked() throws IOException { + + for (int bs = 2; bs <= 4; bs++) { + var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat"); + Set toPut = new HashSet<>(); + + long mask = 0xFFFF_FFFF_0000_0000L; + var ctx = new BTreeContext(5, 1, mask, bs); + + for (int i = 0; i < 500; i++) { + while (!toPut.add((long) (Long.MAX_VALUE * Math.random()))) ; + } + + long[] data = toPut.stream().mapToLong(Long::valueOf).sorted().toArray(); + + try (MultimapFileLong mmf = MultimapFileLong.forOutput(tempFile, 1000)) { + { + var writer = new BTreeWriter(mmf, ctx); + writer.write(0, toPut.size(), (offset) -> { + for (int i = 0; i < data.length; i++) { + mmf.put(offset + i, data[i]); + } + }); + mmf.force(); + } + + { + var reader = new BTreeReader(mmf, ctx); + var header = reader.getHeader(0); + + printTreeLayout(toPut.size(), header, ctx); + + for (int i = 0; i < data.length; i++) { + long offset = reader.offsetForEntry(header, data[i] & mask); + assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset); + assertEquals(data[i], mmf.get(offset)); + } + + for (int i = 0; i < 500; i++) { + long val = (long) (Long.MAX_VALUE * Math.random()); + while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random()); + assertEquals(-1, reader.offsetForEntry(header, val & mask)); + } + } + } catch (Exception e) { + e.printStackTrace(); + } finally { + Files.delete(tempFile); + } + } + } + + @Test + public void testWriteTwoEqualityMasked() throws IOException { + + for (int bs = 2; bs <= 4; bs++) { + var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat"); + Set toPut = new HashSet<>(); + + long mask = 0xFFFF_FFFF_0000_0000L; + var ctx = new BTreeContext(5, 2, mask, bs); + + for (int i = 0; i < 500; i++) { + while (!toPut.add((long) (Long.MAX_VALUE * Math.random()))) ; + } + + long[] data = toPut.stream().mapToLong(Long::valueOf).sorted().toArray(); + + try (MultimapFileLong mmf = MultimapFileLong.forOutput(tempFile, 1000)) { + { + var writer = new BTreeWriter(mmf, ctx); + writer.write(0, toPut.size(), (offset) -> { + for (int i = 0; i < data.length; i++) { + mmf.put(offset + i*2L, data[i]); + mmf.put(offset + i*2L+1, i); + } + }); + mmf.force(); + } + + { + var reader = new BTreeReader(mmf, ctx); + var header = reader.getHeader(0); + + printTreeLayout(toPut.size(), header, ctx); + + for (int i = 0; i < data.length; i++) { + long offset = reader.offsetForEntry(header, data[i] & mask); + assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset); + assertEquals(data[i], mmf.get(offset)); + assertEquals(i, mmf.get(offset+1)); + } + + for (int i = 0; i < 500; i++) { + long val = (long) (Long.MAX_VALUE * Math.random()); + while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random()); + assertEquals(-1, reader.offsetForEntry(header, val & mask)); + } + } + } catch (Exception e) { + e.printStackTrace(); + } finally { + Files.delete(tempFile); + } + } + } + + + +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/util/graphics/dithering/FloydSteinbergDitherTest.java b/marginalia_nu/src/test/java/nu/marginalia/util/graphics/dithering/FloydSteinbergDitherTest.java new file mode 100644 index 00000000..b9510517 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/util/graphics/dithering/FloydSteinbergDitherTest.java @@ -0,0 +1,32 @@ +package nu.marginalia.util.graphics.dithering; + +import org.junit.jupiter.api.Test; + +import javax.imageio.ImageIO; +import java.io.File; +import java.io.IOException; + +class FloydSteinbergDitherTest { + + @Test + public void test() throws IOException { + convert("/home/vlofgren/Work/dither/volvo.jpg", "/home/vlofgren/Work/dither/volvo-raster.png"); + convert("/home/vlofgren/Work/dither/dog.jpg", "/home/vlofgren/Work/dither/dog-raster.png"); + convert("/home/vlofgren/Work/dither/robocop.jpg", "/home/vlofgren/Work/dither/robocop-raster.png"); + convert("/home/vlofgren/Work/dither/socrates.jpeg", "/home/vlofgren/Work/dither/socrates-raster.png"); + + +// convert("C:\\Users\\vlofg\\Documents\\volvo.jpg", +// "C:\\Users\\vlofg\\Documents\\volvo-raster.png"); +// convert("C:\\Users\\vlofg\\Documents\\socrates.jpg", +// "C:\\Users\\vlofg\\Documents\\socrates-raster.png"); +// convert("C:\\Users\\vlofg\\Documents\\goya_nude_maja.jpg", +// "C:\\Users\\vlofg\\Documents\\goya_nude_maja-raster.png"); + } + + void convert(String in, String out) throws IOException { + var result = new FloydSteinbergDither(Palettes.MARGINALIA_PALETTE, 640, 480).convert(ImageIO.read(new File(in))); + + ImageIO.write(result, "png", new File(out)); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/util/hash/LongPairHashMapTest.java b/marginalia_nu/src/test/java/nu/marginalia/util/hash/LongPairHashMapTest.java new file mode 100644 index 00000000..326c9b15 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/util/hash/LongPairHashMapTest.java @@ -0,0 +1,58 @@ +package nu.marginalia.util.hash; + +import nu.marginalia.util.multimap.MultimapFileLong; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashSet; +import java.util.Set; + +class LongPairHashMapTest { + + @Test + public void test() throws IOException { + + var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat"); + Set toPut = new HashSet<>(); + + for (int i = 0; i < 500; i++) { + while (!toPut.add((int)(Integer.MAX_VALUE * Math.random()))); + } + + try { + RandomAccessFile raf = new RandomAccessFile(tempFile.toFile(), "rw"); + MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000, true); + var lphm = new LongPairHashMap(mmf, 1024); + toPut.forEach(i -> { + lphm.put(new LongPairHashMap.CellData(i, i)); + }); + mmf.force(); + lphm.close(); + + RandomAccessFile raf2 = new RandomAccessFile(tempFile.toFile(), "rw"); + MultimapFileLong mmf2 = new MultimapFileLong(raf2, FileChannel.MapMode.READ_WRITE, 10000, 1000, true); + var lphm2 = new LongPairHashMap(mmf2); + toPut.forEach(i -> { + Assertions.assertTrue(lphm2.get(i).isSet()); + Assertions.assertEquals(i, (int) lphm2.get(i).getKey()); + Assertions.assertEquals(i, (int) lphm2.get(i).getOffset()); + }); + + for (int i = 0; i < 10_000_000; i++) { + if (!toPut.contains(i)) { + Assertions.assertFalse(lphm2.get(i).isSet()); + } + } + } catch (Exception e) { + e.printStackTrace(); + } finally { + Files.delete(tempFile); + } + } + +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/util/test/TestUtil.java b/marginalia_nu/src/test/java/nu/marginalia/util/test/TestUtil.java new file mode 100644 index 00000000..2c5407ba --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/util/test/TestUtil.java @@ -0,0 +1,31 @@ +package nu.marginalia.util.test; + +import org.junit.jupiter.api.Assertions; + +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; + +public class TestUtil { + private static boolean isTempDir(Path dir) { + return dir.startsWith("/tmp") || dir.toString().contains("Temp"); + } + public static void clearTempDir(Path dir) { + if (!isTempDir(dir)) { + throw new IllegalArgumentException("Refusing to recursively delete directory with that name"); + } + if (Files.isDirectory(dir)) { + for (File f : dir.toFile().listFiles()) { + File[] files = f.listFiles(); + if (files != null) { + Arrays.stream(files).map(File::toPath).forEach(TestUtil::clearTempDir); + } + System.out.println("Deleting " + f); + f.delete(); + } + } + System.out.println("Deleting " + dir); + dir.toFile().delete(); + } +} diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/configuration/server/ServiceTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/configuration/server/ServiceTest.java new file mode 100644 index 00000000..fe709da9 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/configuration/server/ServiceTest.java @@ -0,0 +1,82 @@ +package nu.marginalia.wmsa.configuration.server; + +import com.zaxxer.hikari.HikariDataSource; +import lombok.SneakyThrows; +import nu.marginalia.util.TestUtil; +import nu.marginalia.wmsa.client.exception.RemoteException; +import nu.marginalia.wmsa.edge.assistant.EdgeAssistantService; +import nu.marginalia.wmsa.edge.assistant.client.AssistantClient; +import nu.marginalia.wmsa.edge.assistant.dict.DictionaryService; +import nu.marginalia.wmsa.edge.assistant.dict.SpellChecker; +import nu.marginalia.wmsa.edge.assistant.eval.MathParser; +import nu.marginalia.wmsa.edge.assistant.eval.Units; +import nu.marginalia.wmsa.edge.assistant.screenshot.ScreenshotService; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import spark.Spark; + +import static nu.marginalia.util.TestUtil.getConnection; +import static org.junit.Assert.assertEquals; + +class ServiceTest { + static EdgeAssistantService service; + static AssistantClient client; + + private static HikariDataSource dataSource; + + static int testPort = TestUtil.getPort(); + + @SneakyThrows + public static HikariDataSource provideConnection() { + return getConnection(); + } + + + @BeforeAll + public static void setUpClass() { + Spark.port(testPort); + System.setProperty("service-name", "test"); + + dataSource = provideConnection(); + dataSource.setKeepaliveTime(100); + dataSource.setIdleTimeout(100); + + client = new AssistantClient(); + client.setServiceRoute("127.0.0.1", testPort); + + service = new EdgeAssistantService("127.0.0.1", + testPort, + new Initialization(), null, + new DictionaryService(dataSource, new SpellChecker()), + new MathParser(), + new Units(new MathParser()), + null, + null, + new ScreenshotService(null), null); + + Spark.awaitInitialization(); + } + + @Test + public void testDenyXPublic() { + try { + client.ping(Context.internal().treatAsPublic()).blockingSubscribe(); + Assertions.fail("Expected exception"); + } + catch (RemoteException ex) { + // + } + } + @Test + public void testAllowInternalNoXPublic() { + client.ping(Context.internal()).blockingSubscribe(); + } + + @Test + public void testAllowOnPublic() { + Assertions.assertEquals("EdgeAssistantService", client.who(Context.internal()).blockingFirst()); + Assertions.assertEquals("EdgeAssistantService", client.who(Context.internal().treatAsPublic()).blockingFirst()); + } + +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/data_store/AssistantTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/data_store/AssistantTest.java new file mode 100644 index 00000000..0e0d4509 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/data_store/AssistantTest.java @@ -0,0 +1,152 @@ +package nu.marginalia.wmsa.data_store; + +import com.zaxxer.hikari.HikariDataSource; +import lombok.SneakyThrows; +import nu.marginalia.util.TestUtil; +import nu.marginalia.wmsa.client.exception.RemoteException; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.edge.assistant.dict.DictionaryService; +import nu.marginalia.wmsa.edge.assistant.dict.SpellChecker; +import nu.marginalia.wmsa.edge.assistant.eval.MathParser; +import nu.marginalia.wmsa.edge.assistant.eval.Units; +import nu.marginalia.wmsa.edge.assistant.EdgeAssistantService; +import nu.marginalia.wmsa.edge.assistant.client.AssistantClient; +import nu.marginalia.wmsa.edge.assistant.screenshot.ScreenshotService; +import nu.marginalia.wmsa.edge.search.UnitConversion; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; +import org.junit.jupiter.api.parallel.ResourceAccessMode; +import org.junit.jupiter.api.parallel.ResourceLock; +import spark.Spark; + +import static nu.marginalia.util.TestUtil.getConnection; +import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +@ResourceLock(value = "mariadb", mode = ResourceAccessMode.READ_WRITE) +@Execution(ExecutionMode.SAME_THREAD) +@Tag("db") +class AssistantTest { + static EdgeAssistantService service; + static AssistantClient client; + + private static HikariDataSource dataSource; + + static int testPort = TestUtil.getPort(); + + @SneakyThrows + public static HikariDataSource provideConnection() { + return getConnection(); + } + + + @BeforeAll + public static void setUpClass() { + Spark.port(testPort); + System.setProperty("service-name", "test"); + + dataSource = provideConnection(); + dataSource.setKeepaliveTime(100); + dataSource.setIdleTimeout(100); + + client = new AssistantClient(); + client.setServiceRoute("127.0.0.1", testPort); + + service = new EdgeAssistantService("127.0.0.1", + testPort, + new Initialization(), null, + new DictionaryService(dataSource, new SpellChecker()), + new MathParser(), + new Units(new MathParser()), + null, null, + new ScreenshotService(null), null); + + Spark.awaitInitialization(); + } + + @BeforeEach + public void clearDb() { + } + + @SneakyThrows + @AfterAll + public static void tearDownAll() { + dataSource.close(); + Spark.awaitStop(); + } + + @Test + public void testEncyclopedia() { + var result = client.encyclopediaLookup(Context.internal(), "plato").blockingFirst(); + System.out.println(result); + assertTrue(result.entries.size() >= 1); + } + @Test + public void testSpellCheck() { + var result = client.spellCheck(Context.internal(), "plato").blockingFirst(); + System.out.println(result); + } + @Test + public void testDictionary() { + var result = client.dictionaryLookup(Context.internal(), "adiabatic").blockingFirst(); + System.out.println(result); + assertTrue(result.entries.size() > 1); + } + + @Test + public void testDictionaryNoQuery() { + var result = client.dictionaryLookup(Context.internal(), "vlofgren").blockingFirst(); + System.out.println(result); + assertTrue(result.entries.isEmpty()); + } + + @Test + public void testEncyclopediaNoQuery() { + var result = client.dictionaryLookup(Context.internal(), "vlofgren").blockingFirst(); + System.out.println(result); + assertTrue(result.entries.isEmpty()); + } + + @Test + public void testConvertUnitsWithParser() { + var conversion = new UnitConversion(client); + assertEquals("0.3 m", conversion.tryConversion(Context.internal(), "30 cm in m").get()); + assertEquals("500 m", conversion.tryConversion(Context.internal(), "0.5 km in m").get()); + assertEquals("500 m", conversion.tryConversion(Context.internal(), "0.1+0.4 km in m").get()); + assertTrue(conversion.tryConversion(Context.internal(), "0.5 km in F").isEmpty()); + assertTrue(conversion.tryConversion(Context.internal(), "plato").isEmpty()); + } + + @Test + public void testConvertUnits() { + assertEquals("5 m", client.unitConversion(Context.internal(), "500", "cm", "meters").blockingFirst()); + } + + @Test + public void testEvalmath() { + assertEquals("300", client.evalMath(Context.internal(), "3*10^2").blockingFirst()); + } + + @Test + public void testEvalWithParser() { + var conversion = new UnitConversion(client); + assertEquals("305", conversion.tryEval(Context.internal(), "300+5").get()); + assertEquals("1.772", conversion.tryEval(Context.internal(), "sqrt(pi)").get()); + + } + + + @Test + public void testConvertUnitsWeirdError() { + try { + client.unitConversion(Context.internal(), "500", "kg", "meters").blockingFirst(); + fail("Wanted exception"); + } + catch (RemoteException ex) { + + } + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/data_store/DataStoreServiceTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/data_store/DataStoreServiceTest.java new file mode 100644 index 00000000..4f5e7b87 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/data_store/DataStoreServiceTest.java @@ -0,0 +1,173 @@ +package nu.marginalia.wmsa.data_store; + +import com.zaxxer.hikari.HikariDataSource; +import io.reactivex.rxjava3.functions.Consumer; +import lombok.SneakyThrows; +import nu.marginalia.util.TestUtil; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.data_store.client.DataStoreClient; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDaoImpl; +import nu.marginalia.wmsa.edge.model.*; +import org.eclipse.jetty.util.UrlEncoded; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; +import org.junit.jupiter.api.parallel.ResourceAccessMode; +import org.junit.jupiter.api.parallel.ResourceLock; +import spark.Spark; + +import java.net.URISyntaxException; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; + +import static nu.marginalia.util.TestUtil.evalScript; +import static nu.marginalia.util.TestUtil.getConnection; +import static org.junit.jupiter.api.Assertions.*; + +@ResourceLock(value = "mariadb", mode = ResourceAccessMode.READ_WRITE) +@Execution(ExecutionMode.SAME_THREAD) +@Tag("db") +class DataStoreServiceTest { + static DataStoreService service; + static DataStoreClient client; + + private static HikariDataSource dataSource; + private static EdgeDataStoreService edgeService; + + static int testPort = TestUtil.getPort(); + private static EdgeDataStoreDaoImpl edgeDataStore; + + @SneakyThrows + public static HikariDataSource provideConnection() { + return getConnection(); + } + + + @BeforeAll + public static void setUpClass() { + Spark.port(testPort); + System.setProperty("service-name", "test"); + + dataSource = provideConnection(); + dataSource.setKeepaliveTime(100); + dataSource.setIdleTimeout(100); + + client = new DataStoreClient(); + client.setServiceRoute("127.0.0.1", testPort); + + edgeDataStore = new EdgeDataStoreDaoImpl(dataSource); + edgeService = new EdgeDataStoreService(edgeDataStore); + service = new DataStoreService("127.0.0.1", + testPort, + new FileRepository(), + dataSource, + edgeService, + new Initialization(), null + ); + + Spark.awaitInitialization(); + } + + @SneakyThrows + @BeforeEach + public void clearDb() { + edgeDataStore.clearCaches(); + + evalScript(dataSource, "sql/data-store-init.sql"); + evalScript(dataSource, "sql/edge-crawler-cache.sql"); + + try (var connection = dataSource.getConnection()) { + + try (var stmt = connection.createStatement()) { + Assertions.assertTrue(stmt.executeUpdate("DELETE FROM EC_URL") >= 0); + Assertions.assertTrue(stmt.executeUpdate("DELETE FROM EC_DOMAIN_LINK") >= 0); + Assertions.assertTrue(stmt.executeUpdate("DELETE FROM EC_DOMAIN") >= 0); + } + connection.commit(); + } + } + + @SneakyThrows + @AfterAll + public static void tearDownAll() { + dataSource.close(); + Spark.awaitStop(); + } + + @Test + public void test() { + client.offerJson(Context.internal(), String.class, "Hello World", "test", "aaa").blockingSubscribe(); + assertEquals("Hello World", + client.getJson(Context.internal(), String.class, "test", "aaa").blockingFirst()); + } + + @Test + public void testUnderscore() { + client.offerJson(Context.internal(), String.class, "Hello World", "test", "aaa_bbb").blockingSubscribe();; + assertEquals("Hello World", + client.getJson(Context.internal(), String.class, "test", "aaa_bbb").blockingFirst()); + } + + @Test + public void testList() { + client.offerJson(Context.internal(), String.class, "Hello", "test", "aaa").blockingSubscribe();; + client.offerJson(Context.internal(), String.class, "World", "test", "bbb").blockingSubscribe();; + client.offerJson(Context.internal(), String.class, "Dude", "dummy", "ccc").blockingSubscribe(); + + List allElements = new ArrayList<>(); + client.getJsonIndicies(Context.internal(), String.class, "test") + .flatMapIterable(i->i) + .concatMap(id -> client.getJson(Context.internal(), String.class, "test", id)) + .blockingForEach(allElements::add); + + assertEquals(2, allElements.size()); + assertTrue(allElements.contains("Hello")); + assertTrue(allElements.contains("World")); + } + + + @Test + public void testEdgePutUrl() throws URISyntaxException { + client.putUrl(Context.internal(), -2, new EdgeUrl("https://marginalia.nu/")) + .blockingSubscribe(); + } + + @SneakyThrows + void query(String query, Consumer resultConsumer) { + try (var conn = dataSource.getConnection(); + var stmt = conn.createStatement() + ) { + resultConsumer.accept(stmt.executeQuery(query)); + + } catch (SQLException throwables) { + Assertions.fail(throwables); + } + } + + @SneakyThrows + void update(String sql) { + try (var conn = dataSource.getConnection(); + var stmt = conn.createStatement() + ) { + Assertions.assertTrue(stmt.executeUpdate(sql) >= 0); + conn.commit(); + } catch (SQLException throwables) { + Assertions.fail(throwables); + + } + } + + @Test + public void test2() throws URISyntaxException { + var request = new EdgeUrl("https://marginalia.nu/"); + var domain = UrlEncoded.encodeString(request.domain.toString()); + var path = UrlEncoded.encodeString(request.path); + + System.out.println(domain); + System.out.println(path); + } + +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/EdgeDirectorServiceTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/EdgeDirectorServiceTest.java new file mode 100644 index 00000000..f16dcc1f --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/EdgeDirectorServiceTest.java @@ -0,0 +1,263 @@ +package nu.marginalia.wmsa.edge; + +import com.zaxxer.hikari.HikariDataSource; +import io.reactivex.rxjava3.functions.Consumer; +import lombok.SneakyThrows; +import nu.marginalia.util.TestUtil; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.edge.crawler.domain.processor.HtmlFeature; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDaoImpl; +import nu.marginalia.wmsa.edge.data.dao.task.*; +import nu.marginalia.wmsa.edge.director.EdgeDirectorService; +import nu.marginalia.wmsa.edge.director.client.EdgeDirectorClient; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlState; +import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlVisit; +import org.eclipse.jetty.util.UrlEncoded; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; +import org.junit.jupiter.api.parallel.ResourceAccessMode; +import org.junit.jupiter.api.parallel.ResourceLock; +import spark.Spark; + +import java.net.URISyntaxException; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.stream.Stream; + +import static nu.marginalia.util.TestUtil.evalScript; +import static nu.marginalia.util.TestUtil.getConnection; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +@ResourceLock(value = "mariadb", mode = ResourceAccessMode.READ_WRITE) +@Execution(ExecutionMode.SAME_THREAD) +@Tag("db") +class EdgeDirectorServiceTest { + static EdgeDirectorService service; + static EdgeDirectorClient client; + + private static HikariDataSource dataSource; + + static int testPort = TestUtil.getPort(); + private static EdgeDataStoreTaskDaoImpl taskDao; + private static EdgeDataStoreDaoImpl dataDao; + + private static Initialization init; + + @SneakyThrows + public static HikariDataSource provideConnection() { + return getConnection(); + } + + + @BeforeAll + public static void setUpClass() { + Spark.port(testPort); + System.setProperty("service-name", "test"); + + dataSource = provideConnection(); + dataSource.setKeepaliveTime(100); + dataSource.setIdleTimeout(100); + + client = new EdgeDirectorClient(); + client.setServiceRoute("127.0.0.1", testPort); + + dataDao = new EdgeDataStoreDaoImpl(dataSource); + var ongoingJobs = new EdgeDataStoreTaskOngoingJobs(); + init = new Initialization(); + taskDao = new EdgeDataStoreTaskDaoImpl(dataSource, new EdgeDomainBlacklistImpl(dataSource), + new EdgeDataStoreTaskTuner(dataSource), ongoingJobs, new EdgeFinishTasksQueue(dataSource, ongoingJobs), init); + service = new EdgeDirectorService("127.0.0.1", + testPort, + init, + taskDao, null + ); + + Spark.awaitInitialization(); + } + + @SneakyThrows + @BeforeEach + public void clearDb() { + taskDao.clearCaches(); + + evalScript(dataSource, "sql/data-store-init.sql"); + evalScript(dataSource, "sql/edge-crawler-cache.sql"); + + try (var connection = dataSource.getConnection()) { + + try (var stmt = connection.createStatement()) { + Assertions.assertTrue(stmt.executeUpdate("DELETE FROM EC_URL") >= 0); + Assertions.assertTrue(stmt.executeUpdate("DELETE FROM EC_DOMAIN_LINK") >= 0); + Assertions.assertTrue(stmt.executeUpdate("DELETE FROM EC_DOMAIN") >= 0); + } + connection.commit(); + } + init.setReady(); + } + + @SneakyThrows + @AfterAll + public static void tearDownAll() { + dataSource.close(); + Spark.awaitStop(); + } + + @SneakyThrows + void query(String query, Consumer resultConsumer) { + try (var conn = dataSource.getConnection(); + var stmt = conn.createStatement() + ) { + resultConsumer.accept(stmt.executeQuery(query)); + + } catch (SQLException throwables) { + Assertions.fail(throwables); + } + } + + @SneakyThrows + void update(String sql) { + try (var conn = dataSource.getConnection(); + var stmt = conn.createStatement() + ) { + Assertions.assertTrue(stmt.executeUpdate(sql) >= 0); + conn.commit(); + } catch (SQLException throwables) { + Assertions.fail(throwables); + + } + } + + + @Test + public void testEdgeGetIndexTask() throws URISyntaxException, InterruptedException { + dataDao.putUrl(-2, + new EdgeUrl("https://marginalia.nu/"), + new EdgeUrl("https://marginalia.nu/a"), + new EdgeUrl("https://marginalia.nu/b"), + new EdgeUrl("https://marginalia.nu/c")); + + dataDao.putUrl(-1.5, + new EdgeUrl("https://www.marginalia.nu/"), + new EdgeUrl("https://www.marginalia.nu/a"), + new EdgeUrl("https://www.marginalia.nu/b"), + new EdgeUrl("https://www.marginalia.nu/c")); + + dataDao.putUrl(-2.5, + new EdgeUrl("https://memex.marginalia.nu/"), + new EdgeUrl("https://memex.marginalia.nu/a"), + new EdgeUrl("https://memex.marginalia.nu/b"), + new EdgeUrl("https://memex.marginalia.nu/c")); + + Thread.sleep(1000); + + for (int i = 0; i < 4; i++) { + var rsp = client.getDiscoverTask(Context.internal()).blockingFirst(); + System.out.println(rsp); + if (rsp.domain != null) { + client.finishTask(Context.internal(), rsp.domain, -2, EdgeDomainIndexingState.ACTIVE).blockingSubscribe(); + Thread.sleep(1000); + } + } + + for (int i = 0; i < 4; i++) { + var rsp = client.getIndexTask(Context.internal(), 2, 10).blockingFirst(); + System.out.println(rsp); + } + } + + + @Test + public void testEdgeGetDiscoverTask() throws URISyntaxException { + + update("UPDATE EC_DOMAIN SET INDEXED=0"); + dataDao.putUrl(-2, + new EdgeUrl("https://marginalia.nu/"), + new EdgeUrl("https://marginalia.nu/a"), + new EdgeUrl("https://marginalia.nu/b"), + new EdgeUrl("https://marginalia.nu/c")); + + + query("SELECT URL,VISITED FROM EC_URL WHERE DOMAIN_ID=1", (rsp) -> { + while (rsp.next()) { + System.out.println(rsp.getString(1) + " - " + rsp.getString(2)); + } + }); + dataDao.putUrlVisited(new EdgeUrlVisit(new EdgeUrl("https://marginalia.nu/c"), + 0xF34, -1.1, "title", "desc", + "ip", "test", HtmlFeature.AFFILIATE_LINK.bit,123, 456, + EdgeUrlState.OK)); + + query("SELECT URL,VISITED FROM EC_URL WHERE DOMAIN_ID=1", (rsp) -> { + while (rsp.next()) { + System.out.println(rsp.getString(1) + " - " + rsp.getString(2)); + } + }); + + dataDao.putUrl(-2., + new EdgeUrl("https://www.marginalia.nu/"), + new EdgeUrl("https://www.marginalia.nu/y")); + + query("SELECT URL_PART, INDEXED FROM EC_DOMAIN", (rsp) -> { + while (rsp.next()) { + System.out.println(rsp.getString(1) + " - " + rsp.getString(2)); + } + }); + + { + var task = client.getDiscoverTask(Context.internal()).blockingFirst(); + System.out.println( + task + ); + assertEquals(3, task.urls.size()); + task.urls.forEach(System.out::println); + } + + { + var task = client.getDiscoverTask(Context.internal()).blockingFirst(); + assertEquals(2, task.urls.size()); + task.urls.forEach(System.out::println); + } + + { + var task = client.getDiscoverTask(Context.internal()).blockingFirst(); + assertEquals(0, task.urls.size()); + task.urls.forEach(System.out::println); + } + } + + + @Test + public void testFinalizeTask() throws SQLException, URISyntaxException { + Stream.of(new EdgeUrl("https://marginalia.nu/"), + new EdgeUrl("https://marginalia.nu/q"), + new EdgeUrl("https://marginalia.nu/r")) + .forEach(data -> dataDao.putUrl(-2, data)); + + update("UPDATE EC_DOMAIN SET INDEXED=1"); + + { + var task = client.getIndexTask(Context.internal(), 1, 10).blockingFirst(); + assertEquals(3, task.urls.size()); + task.urls.forEach(System.out::println); + } + client.finishTask(Context.internal(), new EdgeDomain("https://marginalia.nu"), -5, EdgeDomainIndexingState.ACTIVE) + .blockingSubscribe(); + } + + + @Test + public void test2() throws URISyntaxException { + var request = new EdgeUrl("https://marginalia.nu/"); + var domain = UrlEncoded.encodeString(request.domain.toString()); + var path = UrlEncoded.encodeString(request.path); + + System.out.println(domain); + System.out.println(path); + } + +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/archive/ArchiveTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/archive/ArchiveTest.java new file mode 100644 index 00000000..701e0cdb --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/archive/ArchiveTest.java @@ -0,0 +1,72 @@ +package nu.marginalia.wmsa.edge.archive; + +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.edge.archive.archiver.Archiver; +import nu.marginalia.wmsa.edge.archive.client.ArchiveClient; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; +import spark.Spark; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import static nu.marginalia.util.TestUtil.getPort; +import static nu.marginalia.util.test.TestUtil.clearTempDir; + +@Execution(ExecutionMode.SAME_THREAD) +public class ArchiveTest { + static EdgeArchiveService service; + + static int testPort = getPort(); + private static Path tempPath; + private static Path tempPath2; + private static ArchiveClient archiveClient; + private static Archiver archiver; + + @BeforeAll + public static void setUpClass() throws IOException, InterruptedException { + Spark.port(testPort); + System.setProperty("service-name", "edge-archive"); + archiveClient = new ArchiveClient(); + archiveClient.setServiceRoute("127.0.0.1", testPort); + + tempPath = Files.createTempDirectory("archiveTest"); + tempPath2 = Files.createTempDirectory("wikiTest"); + + archiver = new Archiver(tempPath, 10); + service = new EdgeArchiveService("127.0.0.1", testPort, + tempPath, + archiver, + new Initialization(), null); + + Spark.awaitInitialization(); + } + + @AfterAll + public static void tearDown() throws Exception { + archiver.close(); + archiveClient.close(); + clearTempDir(tempPath); + clearTempDir(tempPath2); + } + + @SneakyThrows + @Test + public void testWiki() { + var url = "Plato_(Disambiguation)"; + + Assertions.assertFalse(archiveClient.hasWiki(Context.internal(), url).blockingFirst()); + + archiveClient.submitWiki(Context.internal(), url, "

    Hello

    ").blockingFirst(); + Assertions.assertTrue(archiveClient.hasWiki(Context.internal(), url).blockingFirst()); + Assertions.assertEquals("

    Hello

    ", archiveClient.getWiki(Context.internal(), url).blockingFirst()); + } + +} diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/archive/archiver/ArchiverTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/archive/archiver/ArchiverTest.java new file mode 100644 index 00000000..58dde1f8 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/archive/archiver/ArchiverTest.java @@ -0,0 +1,18 @@ +package nu.marginalia.wmsa.edge.archive.archiver; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; +import java.nio.file.Path; + +public class ArchiverTest { + + @Test + public void testArchiver() throws Exception { + Archiver archiver = new Archiver(Path.of("/tmp/"), 3); + archiver.writeData(new ArchivedFile("file1", "Hey".getBytes())); + archiver.writeData(new ArchivedFile("file2", "Hey".getBytes())); + archiver.writeData(new ArchivedFile("file3", "Hey".getBytes())); + archiver.writeData(new ArchivedFile("file4", "Hey".getBytes())); + archiver.close(); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/assistant/dict/WikiCleanerTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/assistant/dict/WikiCleanerTest.java new file mode 100644 index 00000000..ccea45ab --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/assistant/dict/WikiCleanerTest.java @@ -0,0 +1,47 @@ +package nu.marginalia.wmsa.edge.assistant.dict; + +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.openzim.ZIMTypes.ZIMFile; +import org.openzim.ZIMTypes.ZIMReader; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +class WikiCleanerTest { + + @Test + void cleanWikiJunk() throws IOException { + String str = new WikiCleaner().cleanWikiJunk("https://en.wikipedia.org/wiki/Scamander", new String(Files.readAllBytes(Path.of("/home/vlofgren/Work/wiki-cleaner/Scamander.wiki.html")))); + String str2 = new WikiCleaner().cleanWikiJunk("https://en.wikipedia.org/wiki/Plato", new String(Files.readAllBytes(Path.of("/home/vlofgren/Work/wiki-cleaner/Plato.wiki.html")))); + String str3 = new WikiCleaner().cleanWikiJunk("https://en.wikipedia.org/wiki/C++", new String(Files.readAllBytes(Path.of("/home/vlofgren/Work/wiki-cleaner/Cpp.wiki.html")))); + String str4 = new WikiCleaner().cleanWikiJunk("https://en.wikipedia.org/wiki/Memex", new String(Files.readAllBytes(Path.of("/home/vlofgren/Work/wiki-cleaner/Memex.wiki.html")))); + Files.writeString(Path.of("/home/vlofgren/Work/wiki-cleaner/Scamander.out.html"), str); + Files.writeString(Path.of("/home/vlofgren/Work/wiki-cleaner/Plato.out.html"), str2); + Files.writeString(Path.of("/home/vlofgren/Work/wiki-cleaner/Cpp.out.html"), str3); + Files.writeString(Path.of("/home/vlofgren/Work/wiki-cleaner/Memex.out.html"), str4); + } + + @Test @Disabled + public void readZim() throws IOException { + var zr = new ZIMReader(new ZIMFile("/home/vlofgren/Work/wikipedia_en_all_nopic_2021-01.zim")); +// try (var pw = new PrintWriter(new File("/home/vlofgren/Work/article-clusters.tsv"))) { +// zr.enumerateArticles(pw); +// } + zr.forEachArticles((url, art) -> { + if (art != null) { + System.out.println(url); + } +// if (art != null && art.length() > 5) { +// System.out.println(url + " -> " + art.substring(0, 5)); +// } + }, (p) -> true); + + /*try (var baos = zr.getArticleData("Giraffe", 'A')) { + String str = baos.toString(); + Files.writeString(Path.of("/home/vlofgren/Work/wiki-cleaner/Giraffe.wiki.html"), str); + Files.writeString(Path.of("/home/vlofgren/Work/wiki-cleaner/Giraffe.out.html"), new WikiCleaner().cleanWikiJunk("https://en.wikipedia.org/wiki/Giraffe", str)); + }*/ + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/assistant/eval/MathParserTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/assistant/eval/MathParserTest.java new file mode 100644 index 00000000..1935ad6b --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/assistant/eval/MathParserTest.java @@ -0,0 +1,42 @@ +package nu.marginalia.wmsa.edge.assistant.eval; + +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.text.ParseException; + +class MathParserTest { + + Logger logger = LoggerFactory.getLogger(getClass()); + + @Test + void parse() throws ParseException { + var parser = new MathParser(); + logger.info(parser.evalFormatted("3+5")); + logger.info(parser.evalFormatted("1+(300+log(5))")); + logger.info(parser.evalFormatted("sqrt(1+300)")); + logger.info(parser.evalFormatted("sqrt(pi)")); + logger.info(parser.evalFormatted("3+5-5")); + logger.info(parser.evalFormatted("3+-5+5")); + logger.info(parser.evalFormatted("3+-5+log 5")); + logger.info(parser.evalFormatted("log -5")); + } + + @Test + void tokenize() throws ParseException { + var parser = new MathParser(); + logger.info("{}", parser.tokenize("3.5")); + + logger.info("{}", parser.tokenize("(3.5 + 2)*3")); + } + + @Test + void parenthesize() throws ParseException { + var parser = new MathParser(); + logger.info("{}", parser.parenthesize(parser.tokenize("3.5"))); + logger.info("{}", parser.tokenize("(3.5)")); + logger.info("{}", parser.parenthesize(parser.tokenize("(3.5)"))); + logger.info("{}", parser.parenthesize(parser.tokenize("(3.5 * (2+5))"))); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/assistant/eval/UnitsTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/assistant/eval/UnitsTest.java new file mode 100644 index 00000000..93d1efd2 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/assistant/eval/UnitsTest.java @@ -0,0 +1,44 @@ +package nu.marginalia.wmsa.edge.assistant.eval; + +import org.junit.jupiter.api.Test; + +class UnitsTest { + + @Test + void convert() { + var units = new Units(new MathParser()); + units.convert("3.33", "cm", "m").ifPresent(System.out::println); + } + + @Test + void convert2() { + var units = new Units(new MathParser()); + units.convert("10", "km", "ft").ifPresent(System.out::println); + } + + @Test + void convert3() { + var units = new Units(new MathParser()); + units.convert("10", "oz", "tons").ifPresent(System.out::println); + } + + @Test + void convert4() { + var units = new Units(new MathParser()); + units.convert("10", "pc", "in").ifPresent(System.out::println); + } + + @Test + void convert5() { + var units = new Units(new MathParser()); + units.convert("50", "K", "K").ifPresent(System.out::println); + units.convert("50", "F", "K").ifPresent(System.out::println); + units.convert("50", "C", "K").ifPresent(System.out::println); + units.convert("50", "K", "F").ifPresent(System.out::println); + units.convert("50", "F", "F").ifPresent(System.out::println); + units.convert("50", "C", "F").ifPresent(System.out::println); + units.convert("50", "K", "C").ifPresent(System.out::println); + units.convert("50", "F", "C").ifPresent(System.out::println); + units.convert("50", "C", "C").ifPresent(System.out::println); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/assistant/suggest/SuggestionsTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/assistant/suggest/SuggestionsTest.java new file mode 100644 index 00000000..94f68001 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/assistant/suggest/SuggestionsTest.java @@ -0,0 +1,47 @@ +package nu.marginalia.wmsa.edge.assistant.suggest; + +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.assistant.dict.SpellChecker; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.nio.file.Path; +import java.util.List; + +class SuggestionsTest { + private static Suggestions suggestions; + + @BeforeAll + public static void setUp() { + LanguageModels lm = new LanguageModels( + Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), + Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo3.bin"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), + Path.of("/home/vlofgren/Work/ngrams/English.RDR"), + Path.of("/home/vlofgren/Work/ngrams/English.DICT"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") + ); + suggestions = new Suggestions(Path.of("/home/vlofgren/Work/sql-titles-clean"), + new SpellChecker(), new NGramDict(lm)); + } + + @Test + void getSuggestions() { + System.out.println(tryGetSuggestions("neop")); + System.out.println(tryGetSuggestions("neopla")); + System.out.println(tryGetSuggestions("middle p")); + System.out.println(tryGetSuggestions("new public mana")); + System.out.println(tryGetSuggestions("euse")); + } + + List tryGetSuggestions(String s) { + long start = System.currentTimeMillis(); + try { + return suggestions.getSuggestions(10, s); + } + finally { + System.out.println(System.currentTimeMillis() - start); + } + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/CrawlJobsSpecificationSetTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/CrawlJobsSpecificationSetTest.java new file mode 100644 index 00000000..2e147b42 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/CrawlJobsSpecificationSetTest.java @@ -0,0 +1,59 @@ +package nu.marginalia.wmsa.edge.crawler; + +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.*; + +class CrawlJobsSpecificationSetTest { + @Test + public void readSet() throws IOException { + Path tempFile = Files.createTempFile("tmp", "test"); + tempFile.toFile().deleteOnExit(); + Files.writeString(tempFile, "0\n10\n15"); + var specsSet = new CrawlJobsSpecificationSet(tempFile); + assertEquals(3, specsSet.size()); + assertEquals(0, specsSet.get(0).pass); + assertEquals(10, specsSet.get(1).pass); + assertEquals(15, specsSet.get(2).pass); + } + + @Test + public void readSetTrailingJunk() throws IOException { + Path tempFile = Files.createTempFile("tmp", "test"); + tempFile.toFile().deleteOnExit(); + Files.writeString(tempFile, "0\n10\n15\n"); + var specsSet = new CrawlJobsSpecificationSet(tempFile); + assertEquals(3, specsSet.size()); + assertEquals(0, specsSet.get(0).pass); + assertEquals(10, specsSet.get(1).pass); + assertEquals(15, specsSet.get(2).pass); + } + + @Test + public void readSetEmptyLines() throws IOException { + Path tempFile = Files.createTempFile("tmp", "test"); + tempFile.toFile().deleteOnExit(); + Files.writeString(tempFile, "\n0\n10\n\n15\n\n"); + var specsSet = new CrawlJobsSpecificationSet(tempFile); + assertEquals(3, specsSet.size()); + assertEquals(0, specsSet.get(0).pass); + assertEquals(10, specsSet.get(1).pass); + assertEquals(15, specsSet.get(2).pass); + } + + @Test + public void readSetEmptyLinesComments() throws IOException { + Path tempFile = Files.createTempFile("tmp", "test"); + tempFile.toFile().deleteOnExit(); + Files.writeString(tempFile, "#Hello\n0\n # World\n10\n\n15\n\n"); + var specsSet = new CrawlJobsSpecificationSet(tempFile); + assertEquals(3, specsSet.size()); + assertEquals(0, specsSet.get(0).pass); + assertEquals(10, specsSet.get(1).pass); + assertEquals(15, specsSet.get(2).pass); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerRobotsTxtTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerRobotsTxtTest.java new file mode 100644 index 00000000..8946231f --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerRobotsTxtTest.java @@ -0,0 +1,33 @@ +package nu.marginalia.wmsa.edge.crawler.domain; + +import crawlercommons.robots.SimpleRobotRules; +import crawlercommons.robots.SimpleRobotRulesParser; +import org.junit.jupiter.api.Test; + +import java.nio.charset.StandardCharsets; + +import static org.junit.jupiter.api.Assertions.*; + +class DomainCrawlerRobotsTxtTest { + @Test + public void testOverride() { + String contentsStr = "User-agent: *\n" + + "Disallow: /\n" + + "\n" + + "User-agent: Googlebot\n" + + "User-agent: YandexBot\n" + + "User-agent: Twitterbot\n" + + "User-agent: special_archiver\n" + + "User-agent: archive.org_bot\n" + + "User-agent: search.marginalia.nu\n" + + "Disallow:\n"; + + byte[] contents = contentsStr.getBytes(); + SimpleRobotRules rules = new SimpleRobotRulesParser().parseContent("https://www.brutman.com/robots.txt", + contents, + "text/plain", + "search.marginalia.nu"); + + assertTrue(rules.isAllowed("http://www.brutman.com/test")); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerTest.java new file mode 100644 index 00000000..931d2cf7 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerTest.java @@ -0,0 +1,287 @@ +package nu.marginalia.wmsa.edge.crawler.domain; + +import com.zaxxer.hikari.HikariDataSource; +import io.reactivex.rxjava3.exceptions.UndeliverableException; +import io.reactivex.rxjava3.plugins.RxJavaPlugins; +import lombok.SneakyThrows; +import nu.marginalia.util.TestUtil; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.data_store.DataStoreService; +import nu.marginalia.wmsa.data_store.EdgeDataStoreService; +import nu.marginalia.wmsa.data_store.FileRepository; +import nu.marginalia.wmsa.data_store.client.DataStoreClient; +import nu.marginalia.wmsa.edge.archive.EdgeArchiveService; +import nu.marginalia.wmsa.edge.archive.client.ArchiveClient; +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.language.LanguageFilter; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.processor.HtmlProcessor; +import nu.marginalia.wmsa.edge.crawler.domain.processor.PlainTextProcessor; +import nu.marginalia.wmsa.edge.crawler.fetcher.HttpFetcher; +import nu.marginalia.wmsa.edge.crawler.fetcher.HttpRedirectResolver; +import nu.marginalia.wmsa.edge.crawler.worker.GeoIpBlocklist; +import nu.marginalia.wmsa.edge.crawler.worker.IpBlockList; +import nu.marginalia.wmsa.edge.crawler.worker.Worker; +import nu.marginalia.wmsa.edge.crawler.worker.WorkerFactory; +import nu.marginalia.wmsa.edge.crawler.worker.data.CrawlJobsSpecification; +import nu.marginalia.wmsa.edge.crawler.worker.facade.TaskProvider; +import nu.marginalia.wmsa.edge.crawler.worker.facade.UploadFacadeDirectImpl; +import nu.marginalia.wmsa.edge.crawler.worker.results.WorkerResults; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDaoImpl; +import nu.marginalia.wmsa.edge.director.client.EdgeDirectorClient; +import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.crawl.EdgeIndexTask; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; +import org.junit.jupiter.api.parallel.ResourceAccessMode; +import org.junit.jupiter.api.parallel.ResourceLock; +import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Spark; + +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.concurrent.LinkedBlockingQueue; + +import static nu.marginalia.util.TestUtil.evalScript; +import static nu.marginalia.util.TestUtil.getConnection; +import static org.junit.jupiter.api.Assertions.assertFalse; + +@Tag("nobuild") +@ResourceLock(value = "mariadb", mode = ResourceAccessMode.READ_WRITE) +@Execution(ExecutionMode.SAME_THREAD) +@Tag("db") +class DomainCrawlerTest { + private static final Logger logger = LoggerFactory.getLogger(DomainCrawlerTest.class); + private static HttpFetcher fetcher; + private static LanguageFilter languageFilter; + + static DataStoreService service; + static DataStoreClient dataStoreClient; + static EdgeDirectorClient edgeDirectorClient; + + private static HikariDataSource dataSource; + private static EdgeDataStoreService edgeService; + + static int testPort = TestUtil.getPort(); + private static EdgeDataStoreDaoImpl edgeDataStore; + private static WorkerFactory workerFactory; + private static ArchiveClient archiveClient; + + private List crawlJobsSpecifications + = List.of( + new CrawlJobsSpecification(0), + new CrawlJobsSpecification(0), + new CrawlJobsSpecification(0), + new CrawlJobsSpecification(0), + new CrawlJobsSpecification(0), + new CrawlJobsSpecification(0), + new CrawlJobsSpecification(0), + new CrawlJobsSpecification(0), + new CrawlJobsSpecification(1), + new CrawlJobsSpecification(1), + new CrawlJobsSpecification(10), + new CrawlJobsSpecification(10), + new CrawlJobsSpecification(10), + new CrawlJobsSpecification(10), + new CrawlJobsSpecification(50), + new CrawlJobsSpecification(50) + ); + + static LinkedList indexTasks = new LinkedList<>(); + static LinkedList discoverTasks = new LinkedList<>(); + + @SneakyThrows + public static HikariDataSource provideConnection() { + var conn = getConnection(); + + evalScript(conn, "sql/data-store-init.sql"); + evalScript(conn, "sql/edge-crawler-cache.sql"); + + return conn; + } + + + @SneakyThrows + @BeforeAll + public static void setUpClass() { + Spark.port(testPort); + System.setProperty("service-name", "test"); + + dataSource = provideConnection(); + dataSource.setKeepaliveTime(100); + dataSource.setIdleTimeout(100); + dataStoreClient = new DataStoreClient(); + dataStoreClient.setServiceRoute("localhost", testPort); + edgeDirectorClient = new EdgeDirectorClient(); + edgeDirectorClient.setServiceRoute("localhost", testPort); + archiveClient = new ArchiveClient(); + archiveClient.setServiceRoute("localhost", testPort); + + edgeDataStore = new EdgeDataStoreDaoImpl(dataSource); + edgeService = new EdgeDataStoreService(edgeDataStore); + + + service = new DataStoreService("127.0.0.1", + testPort, + new FileRepository(), + dataSource, + edgeService, + new Initialization(), null + ); + + new EdgeArchiveService("127.0.0.1", + testPort, Files.createTempDirectory("domainCrawlerTest"), null, Initialization.already(), null); + + String userAgent = "nu.marginalia.wmsa.edge-crawler"; + fetcher = new HttpFetcher(userAgent); + + languageFilter = new LanguageFilter(); + + var lm = new LanguageModels( + Path.of("/var/lib/wmsa/model/ngrams-generous-emstr.bin"), + Path.of("/var/lib/wmsa/model/tfreq-generous-emstr.bin"), + Path.of("/var/lib/wmsa/model/opennlp-sentence.bin"), + Path.of("/var/lib/wmsa/model/English.RDR"), + Path.of("/var/lib/wmsa/model/English.DICT"), + Path.of("/var/lib/wmsa/model/opennlp-tok.bin") + ); + + var ke = new DocumentKeywordExtractor(new NGramDict(lm)); + var se = new SentenceExtractor(lm); + + DomainCrawlerFactory domainCrawlerFactory = + new DomainCrawlerFactory(fetcher, + new HtmlProcessor(ke,new SentenceExtractor(lm)), + new PlainTextProcessor(ke, se), archiveClient, new DomainCrawlerRobotsTxt(fetcher, userAgent), languageFilter, new IpBlockList(new GeoIpBlocklist())); + + workerFactory = new WorkerFactory(domainCrawlerFactory, + new TaskProvider() { + + @Override + public EdgeIndexTask getIndexTask(int pass) { + try { + return indexTasks.pop(); + } + catch (NoSuchElementException ex) { + return new EdgeIndexTask(null, 0, 0, 1.); + } + } + + @Override + public EdgeIndexTask getDiscoverTask() { + try { + return discoverTasks.pop(); + } + catch (NoSuchElementException ex) { + return new EdgeIndexTask(null, 0, 0, 1.); + } + } + }, + new HttpRedirectResolver(userAgent), + new UploadFacadeDirectImpl(edgeDataStore, + Mockito.mock(EdgeIndexClient.class), + edgeDirectorClient), + /* new UploadFacadeDirectImpl(edgeDataStore, new SearchIndexWriterDummyImpl()) */ + new IpBlockList(new GeoIpBlocklist())); + + RxJavaPlugins.setErrorHandler(ex -> { + if (ex instanceof UndeliverableException) { + ex = ex.getCause(); + } + logger.error("Error {} {}", ex.getClass(), ex.getMessage()); + }); + + Spark.get("/edge/task/blocked", (req,rsp) -> "false"); + Spark.awaitInitialization(); + } + + @SneakyThrows + @AfterAll + public static void tearDownAll() { + dataSource.close(); + Spark.awaitStop(); + } + + + @BeforeEach + @SneakyThrows + public void setUp() { + + edgeDataStore.clearCaches(); + + evalScript(dataSource, "sql/data-store-init.sql"); + evalScript(dataSource, "sql/edge-crawler-cache.sql"); + } + + @AfterEach + @SneakyThrows + public void tearDown() { + dataSource.close(); + } + + @Test + @Disabled + void localCrawl() throws URISyntaxException { + + dataStoreClient.putUrl(Context.internal(), 0, + new EdgeUrl("https://www.marginalia.nu/")) + .blockingSubscribe(); + dataStoreClient.putUrl(Context.internal(), 0, new EdgeUrl("http://www.cs.uni.edu/~mccormic/humor.html")).blockingSubscribe(); + dataStoreClient.putUrl(Context.internal(), 0, new EdgeUrl("https://www.leonardcohenfiles.com/")).blockingSubscribe(); + dataStoreClient.putUrl(Context.internal(), 0, new EdgeUrl("http://atsf.railfan.net/")).blockingSubscribe(); + dataStoreClient.putUrl(Context.internal(), 0, new EdgeUrl("http://sprott.physics.wisc.edu/")).blockingSubscribe(); + + final List> queues = new ArrayList<>(crawlJobsSpecifications.size()); + + for (int i = 0; i < crawlJobsSpecifications.size(); i++) { + queues.add(new LinkedBlockingQueue<>(1)); + } + + for (int i = 0; i < crawlJobsSpecifications.size()*16; i++) { + var spec = crawlJobsSpecifications.get(i/16); + var queue = queues.get(i/16); + + Worker worker; + if (spec.pass == 0) { + worker = workerFactory.buildDiscoverWorker(queue); + } + else { + worker = workerFactory.buildIndexWorker(queue, spec.pass); + } + + new Thread(worker, "Fetcher-"+i).start(); + } + + var uploader = workerFactory.buildUploader(queues); + uploader.run(); + } + + @SneakyThrows + @Test + void testCrawl() { + EdgeIndexTask task = new EdgeIndexTask(new EdgeDomain("www.marginalia.nu"), 0, 0, 1.); + task.urls.add(new EdgeUrl("https://www.marginalia.nu/")); + discoverTasks.add(task); + + LinkedBlockingQueue queue = new LinkedBlockingQueue<>(); + + var uploader = workerFactory.buildUploader(List.of(queue)); + workerFactory.buildDiscoverWorker(queue).runCycle(); + assertFalse(queue.isEmpty()); + + queue.poll().upload(uploader); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerTest2.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerTest2.java new file mode 100644 index 00000000..da026154 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/DomainCrawlerTest2.java @@ -0,0 +1,68 @@ +package nu.marginalia.wmsa.edge.crawler.domain; + +import com.opencsv.exceptions.CsvValidationException; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.archive.client.ArchiveClient; +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.language.LanguageFilter; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.processor.HtmlProcessor; +import nu.marginalia.wmsa.edge.crawler.domain.processor.PlainTextProcessor; +import nu.marginalia.wmsa.edge.crawler.fetcher.HttpFetcher; +import nu.marginalia.wmsa.edge.crawler.worker.GeoIpBlocklist; +import nu.marginalia.wmsa.edge.crawler.worker.IpBlockList; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.crawl.EdgeIndexTask; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; +import org.junit.jupiter.api.parallel.ResourceAccessMode; +import org.junit.jupiter.api.parallel.ResourceLock; +import org.mockito.Mockito; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.Comparator; +import java.util.stream.Collectors; + +@Tag("nobuild") +@Tag("db") +@ResourceLock(value = "mariadb", mode = ResourceAccessMode.READ_WRITE) +@Execution(ExecutionMode.SAME_THREAD) +class DomainCrawlerTest2 { + + @SneakyThrows + @Test + public void test() throws CsvValidationException, IOException { + var fetcher = new HttpFetcher("search.marginalia.nu"); + var ingress = new EdgeIndexTask(new EdgeDomain("memex.marginalia.nu"), 0, 10, 1.); + ingress.urls.add(new EdgeUrl("https://memex.marginalia.nu/")); + + LanguageModels lm = new LanguageModels( + Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), + Path.of("/home/vlofgren/Work/ngrams/tfreq-generous-emstr.bin"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), + Path.of("/home/vlofgren/Work/ngrams/English.RDR"), + Path.of("/home/vlofgren/Work/ngrams/English.DICT"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-tok.bin") + ); + var dict = new NGramDict(lm); + HtmlProcessor processor = new HtmlProcessor(new DocumentKeywordExtractor(dict),new SentenceExtractor(lm)); + + DomainCrawler dc = new DomainCrawler(fetcher, + Mockito.mock(PlainTextProcessor.class), + processor, + Mockito.mock(ArchiveClient.class), + new DomainCrawlerRobotsTxt(fetcher, "search.marginalia.nu") + , new LanguageFilter(), ingress , new IpBlockList(new GeoIpBlocklist())); + var res = dc.crawlToExhaustion(500, ()->true); + var wordsByCount = res.pageContents.values().stream().map(pc -> pc.words.get(IndexBlock.Top)).flatMap(top -> top.getWords().stream()).collect(Collectors.toMap(w -> w, w->1, Integer::sum)); + wordsByCount.entrySet().stream().filter(e -> dict.getTermFreq(e.getKey()) > 10_000).filter(e -> e.getValue()>2).sorted(Comparator.comparing(e -> e.getValue() / Math.max(1, dict.getTermFreq(e.getKey())))).forEach(System.out::println); + + } + +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/LanguageFilterTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/LanguageFilterTest.java new file mode 100644 index 00000000..d8429417 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/LanguageFilterTest.java @@ -0,0 +1,25 @@ +package nu.marginalia.wmsa.edge.crawler.domain; + +import nu.marginalia.wmsa.edge.crawler.domain.language.LanguageFilter; +import org.jsoup.Jsoup; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +class LanguageFilterTest { + + @Test + void isPageInteresting() { + var languageFilter = new LanguageFilter(); + assertTrue(languageFilter.isPageInterestingByHtmlTag(Jsoup.parse("")).orElse(true)); + assertTrue(languageFilter.isPageInterestingByHtmlTag(Jsoup.parse("")).orElse(false)); + assertFalse(languageFilter.isPageInterestingByHtmlTag(Jsoup.parse("")).orElse(false)); + } + + @Test + public void isStringChinsese() { + var languageFilter = new LanguageFilter(); + assertTrue(languageFilter.isBlockedUnicodeRange("溶岩ドームの手前に広がる斜面(木が生えているところ)は普賢岳の山体です.今回の噴火にともない,このあたりの山体がマグマに押されて変形し,北(写真では左)にむかって100mほどせりだしました\n")); + } + +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/LinkParserTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/LinkParserTest.java new file mode 100644 index 00000000..347b4195 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/LinkParserTest.java @@ -0,0 +1,53 @@ +package nu.marginalia.wmsa.edge.crawler.domain; + +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.jsoup.Jsoup; +import org.junit.jupiter.api.Test; + +import java.net.URISyntaxException; + +import static org.junit.jupiter.api.Assertions.*; + +class LinkParserTest { + + private String parseLink(String href, String base) throws URISyntaxException { + var url = new EdgeUrl("http://www.marginalia.nu/" + base); + var domain = url.domain; + var parser = new LinkParser(); + var stuff = Jsoup.parseBodyFragment("test"); + var lnk = parser.parseLink( + url, + stuff.getElementsByTag("a").get(0)); + + if (lnk.isEmpty()) { + return null; + } + + return lnk.get().toString(); + } + + @Test + void testRenormalization() throws URISyntaxException { + assertEquals("http://www.marginalia.nu/test", parseLink("http://www.marginalia.nu/../test", "/")); + } + + @Test + void testRenormalization2() { + assertTrue("http:".matches("^[a-zA-Z]+:")); + assertFalse("/foo".matches("^[a-zA-Z]+:")); + } + + + @Test + void testAnchor() throws URISyntaxException { + assertNull(parseLink("#test", "/")); + } + @Test + void testRelative() throws URISyntaxException { + assertEquals("http://www.marginalia.nu/test", parseLink("../test", "/")); + assertEquals("http://www.marginalia.nu/test", parseLink("test", "/")); + assertEquals("http://www.marginalia.nu/foo/test", parseLink("test", "/foo/index.html")); + assertEquals("http://www.marginalia.nu/test", parseLink("../test", "/foo/index.html")); + assertEquals("http://www.marginalia.nu/test", parseLink("/test", "/foo/index.html")); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/RssCrawlerTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/RssCrawlerTest.java new file mode 100644 index 00000000..87f4bbf0 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/RssCrawlerTest.java @@ -0,0 +1,66 @@ +package nu.marginalia.wmsa.edge.crawler.domain; + +import jdk.security.jarsigner.JarSigner; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.LinkedHashSet; +import java.util.Objects; +import java.util.Set; + +class RssCrawlerTest { + + LinkParser lp = new LinkParser(); + + @Test @Disabled + public void test() throws URISyntaxException, IOException { + getLinks(new EdgeUrl("https://eli.li/feed.rss"), new String(Files.readAllBytes(Path.of("/home/vlofgren/Work/feed.rss")))); + } + + private Set getLinks(EdgeUrl base, String str) { + + var doc = Jsoup.parse(str.replaceAll("link", "lnk")); + + Set urls = new LinkedHashSet<>(); + + doc.select("entry > lnk[rel=alternate]").forEach(element -> { + var href = element.attr("href"); + if (href != null && !href.isBlank()) { + lp.parseLink(base, href) + .filter(u -> Objects.equals(u.domain.domain, base.domain.domain)) + .ifPresent(urls::add); + } + }); + + doc.getElementsByTag("lnk").forEach(element -> { + var href = element.text(); + if (href != null && !href.isBlank()) { + lp.parseLink(base, href) + .filter(u -> Objects.equals(u.domain.domain, base.domain.domain)) + .ifPresent(urls::add); + } + }); + + doc.select("item > guid[isPermalink=true]").forEach(element -> { + var href = element.text(); + if (href != null && !href.isBlank()) { + lp.parseLink(base, href) + .filter(u -> Objects.equals(u.domain.domain, base.domain.domain)) + .ifPresent(urls::add); + } + }); + + return urls; + } + + + + +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/UrlsCacheTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/UrlsCacheTest.java new file mode 100644 index 00000000..92e5c8db --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/UrlsCacheTest.java @@ -0,0 +1,58 @@ +package nu.marginalia.wmsa.edge.crawler.domain; + +import nu.marginalia.wmsa.edge.model.WideHashable; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class UrlsCacheTest { + + static class TestBox implements WideHashable { + public final long value; + + TestBox(long value) { + this.value = value; + } + + @Override + public long wideHash() { + return value; + } + } + + @Test + void testCacheEviction() { + var cache = new UrlsCache(5); + cache.add(new TestBox(0)); + hasValues(cache, 0L); + cache.add(new TestBox(1)); + hasValues(cache, 0L, 1L); + cache.add(new TestBox(2)); + hasValues(cache, 0L, 1L, 2L); + cache.add(new TestBox(3)); + hasValues(cache, 0L, 1L, 2L, 3L); + cache.add(new TestBox(4)); + hasValues(cache, 0L, 1L, 2L, 3L, 4L); + cache.add(new TestBox(5)); + hasValues(cache, 1L, 2L, 3L, 4L, 5L); + hasntValues(cache, 0L); + cache.add(new TestBox(6)); + hasValues(cache, 2L, 3L, 4L, 5L, 6L); + hasntValues(cache, 0L, 1L); + cache.add(new TestBox(7)); + hasValues(cache, 3L, 4L, 5L, 6L); + hasntValues(cache, 0L, 1L, 2L); + } + + public void hasValues(UrlsCache box, long... values) { + for (long v : values) { + assertTrue(box.contains(new TestBox(v)), () -> "Testing if cache contains " + v); + } + } + public void hasntValues(UrlsCache box, long... values) { + for (long v : values) { + assertFalse(box.contains(new TestBox(v)), () -> "Testing if cache misses " + v); + } + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/SentenceExtractorTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/SentenceExtractorTest.java new file mode 100644 index 00000000..435e203d --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/language/processing/SentenceExtractorTest.java @@ -0,0 +1,255 @@ +package nu.marginalia.wmsa.edge.crawler.domain.language.processing; + +import com.zaxxer.hikari.HikariConfig; +import com.zaxxer.hikari.HikariDataSource; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.WordRep; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.WordSpan; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.tag.WordSeparator; +import nu.marginalia.wmsa.edge.index.service.util.ranking.BuggyReversePageRank; +import nu.marginalia.wmsa.edge.index.service.util.ranking.BuggyStandardPageRank; +import nu.marginalia.wmsa.edge.integration.wikipedia.WikipediaReader; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.jsoup.Jsoup; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; +import java.util.regex.Pattern; + +class SentenceExtractorTest { + SentenceExtractor newSe; + SentenceExtractor legacySe; + LanguageModels lm = new LanguageModels( + Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), + Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo4.bin"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), + Path.of("/home/vlofgren/Work/ngrams/English.RDR"), + Path.of("/home/vlofgren/Work/ngrams/English.DICT"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") + ); + @BeforeEach + public void setUp() { + + newSe = new SentenceExtractor(lm); + legacySe = new SentenceExtractor(lm); + legacySe.setLegacyMode(true); + } + + + @Test @Disabled + public void getTheData() throws IOException { + var connStr = "jdbc:mariadb://localhost:3306/WMSA_test?rewriteBatchedStatements=true"; + + HikariConfig config = new HikariConfig(); + + config.setJdbcUrl(connStr); + config.setUsername("wmsa"); + config.setPassword("wmsa"); + config.addDataSourceProperty("cachePrepStmts", "true"); + config.addDataSourceProperty("prepStmtCacheSize", "250"); + config.addDataSourceProperty("prepStmtCacheSqlLimit", "2048"); + config.setMaximumPoolSize(100); + config.setMinimumIdle(10); + + var conn = new HikariDataSource(config); + + var rpr = new BuggyReversePageRank(conn, "virginia.xroads.edu"); + var spr = new BuggyStandardPageRank(conn, "virginia.xroads.edu"); + + var rankVector = spr.pageRankVector(); + var norm = rankVector.norm(); + + int resultCount = rpr.size()/10; + var domains = spr.pageRank(i -> rankVector.get(i) / norm, resultCount).toArray(); + int i = 0; + + try (var bw = Files.newBufferedWriter(Path.of("/tmp/domains.txt")); + var stmt = conn.getConnection().prepareStatement("SELECT URL_PROTO, URL_DOMAIN, URL_PORT, URL_PATH FROM EC_URL_VIEW WHERE DOMAIN_ID=? AND TITLE IS NOT NULL ORDER BY ID ASC LIMIT 10 ")) { + for (int domainId : domains) { + bw.write(String.format("%f\n", i++/(double) resultCount)); + stmt.setInt(1, domainId); + var rsp = stmt.executeQuery(); + while (rsp.next()) { + var url = new EdgeUrl(rsp.getString(1), new EdgeDomain(rsp.getString(2)), + rsp.getInt(3), rsp.getString(4)); + bw.write(url.toString()); + bw.write("\n"); + } + bw.write(".\n"); + } + + } + catch (Exception e) { + + } + } + + @SneakyThrows + @Test + void testExtractSubject() { + var data = Path.of("/home/vlofgren/Code/tmp-data/"); + + System.out.println("Running"); + + var dict = new NGramDict(lm); + + SentenceExtractor se = new SentenceExtractor(lm); + KeywordExtractor keywordExtractor = new KeywordExtractor(); + + for (var file : Objects.requireNonNull(data.toFile().listFiles())) { + System.out.println(file); + var dld = se.extractSentences(Jsoup.parse(Files.readString(file.toPath()))); + Map counts = new HashMap<>(); + for (var sentence : dld.sentences) { + for (WordSpan kw : keywordExtractor.getNames(sentence)) { + if (kw.end + 2 >= sentence.length()) { + continue; + } + if (sentence.separators[kw.end] == WordSeparator.COMMA + || sentence.separators[kw.end + 1] == WordSeparator.COMMA) + break; + + if (("VBZ".equals(sentence.posTags[kw.end]) || "VBP".equals(sentence.posTags[kw.end])) + && ("DT".equals(sentence.posTags[kw.end + 1]) || "RB".equals(sentence.posTags[kw.end]) || sentence.posTags[kw.end].startsWith("VB")) + ) { + counts.merge(new WordRep(sentence, new WordSpan(kw.start, kw.end)).word, -1, Integer::sum); + } + } + } + + int best = counts.values().stream().mapToInt(Integer::valueOf).min().orElse(0); + + counts.entrySet().stream().sorted(Map.Entry.comparingByValue()) + .filter(e -> e.getValue()<-2 && e.getValue() { + + var newResult = newSe.extractSentences(Jsoup.parse(post.body)); + + var newRes = documentKeywordExtractor.extractKeywords(newResult); + System.out.println(newRes); + }); + reader.join(); + } + @Test + void extractSentences() throws IOException { + var data = Path.of("/home/vlofgren/Code/tmp-data/"); + + System.out.println("Running"); + + var dict = new NGramDict(lm); + + DocumentKeywordExtractor documentKeywordExtractor = new DocumentKeywordExtractor(dict); + +// documentKeywordExtractorLegacy.setLegacy(true); + +// for (;;) { + long st = System.currentTimeMillis(); + for (var file : Objects.requireNonNull(data.toFile().listFiles())) { + + + var newResult = newSe.extractSentences(Jsoup.parse(Files.readString(file.toPath()))); + + var newRes = documentKeywordExtractor.extractKeywords(newResult); + + +// var legacyRes = documentKeywordExtractorLegacy.extractKeywords(newResult); +// +// EdgePageWordSet difference = new EdgePageWordSet(); +// for (IndexBlock block : IndexBlock.values()) { + +// var newWords = new HashSet<>(newRes.get(block).words); +// var oldWords = new HashSet<>(legacyRes.get(block).words); +// newWords.removeAll(oldWords); + +// if (!newWords.isEmpty()) { +// difference.append(block, newWords); +// } +// } +// System.out.println(difference); + System.out.println(newRes); +// System.out.println("---"); + } + System.out.println(System.currentTimeMillis() - st); +// } + + } + + @SneakyThrows + @Test + @Disabled + public void testSE() { + var result = newSe.extractSentences(Jsoup.parse(new URL("https://memex.marginalia.nu/log/26-personalized-pagerank.gmi"), 10000)); + + var dict = new NGramDict(lm); + System.out.println(new DocumentKeywordExtractor(dict).extractKeywords(result)); + + +// +// var pke = new PositionKeywordExtractor(dict, new KeywordExtractor()); +// pke.count(result).stream().map(wr -> wr.word).distinct().forEach(System.out::println); +// for (var sent : result.sentences) { +// System.out.println(sent); +// } + + } + + @Test + public void separatorExtraction() { + seprateExtractor("Cookies, cream and shoes"); + seprateExtractor("Cookies"); + seprateExtractor(""); + + } + + Pattern p = Pattern.compile("([, ]+)"); + public void seprateExtractor(String sentence) { + var matcher = p.matcher(sentence); + + Arrays.stream(p.split(sentence)).forEach(System.out::println); + List words = new ArrayList<>(); + List separators = new ArrayList<>(); + + int start = 0; + int wordStart = 0; + while (wordStart <= sentence.length()) { + if (!matcher.find(wordStart)) { + words.add(sentence.substring(wordStart)); + separators.add("S"); + break; + } + + if (wordStart != matcher.start()) { + words.add(sentence.substring(wordStart, matcher.start())); + separators.add(sentence.substring(matcher.start(), matcher.end()).isBlank() ? "S" : "C"); + } + wordStart = matcher.end(); + } + + System.out.println(words); + System.out.println(separators); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlProcessorTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlProcessorTest.java new file mode 100644 index 00000000..b90585c0 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlProcessorTest.java @@ -0,0 +1,119 @@ +package nu.marginalia.wmsa.edge.crawler.domain.processor; + +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; +import nu.marginalia.wmsa.edge.model.crawl.EdgeRawPageContents; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.jsoup.Jsoup; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.LocalDateTime; +import java.util.List; + +@Disabled +class HtmlProcessorTest { + Logger logger = LoggerFactory.getLogger(getClass()); + + LanguageModels lm = new LanguageModels( + Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), + Path.of("/home/vlofgren/Work/ngrams/tfreq-generous-emstr.bin"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-sentence.bin"), + Path.of("/var/lib/wmsa/model/English.RDR"), + Path.of("/var/lib/wmsa/model/English.DICT"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-tok.bin") + ); + HtmlProcessor processor = new HtmlProcessor(new DocumentKeywordExtractor(new NGramDict(lm)),new SentenceExtractor(lm)); + + @Test + @Disabled + void processHtmlPage0() throws IOException, URISyntaxException { + List urls = List.of("https://www.marginalia.nu/", + "https://www.marginalia.nu/00-skrifter/", + "https://www.marginalia.nu/2021-04-smart/", + "https://www.marginalia.nu/2020-06-att-l%C3%A4ra/", + "https://www.marginalia.nu/2020-04-grader-av-liv/", + "https://www.marginalia.nu/2020-03-battre-internet/", + "https://www.marginalia.nu/2020-05-dr%C3%B6m/", + "https://www.marginalia.nu/2020-02-faktaresistens/", + "https://www.marginalia.nu/2020-01-dialog-forfattaren-i-verket/", + "https://search.marginalia.nu/about.html", + "https://www.putty.org/", + "https://www.chiark.greenend.org.uk/~sgtatham/putty/latest.html", + "https://legacy.3drealms.com/duke3d/", + "http://classics.mit.edu/Plato/stateman.html", + "http://www.southaustralianhistory.com.au/bruce.htm", + "http://www.castlecraft.com/main.htm", + "http://www.discoveryvallarta.com/gaybars.html", + "https://twitterrific.com/ios" + ); + + + + for (String url : urls) { + + + + var doc = Jsoup.parse(new URL(url), 15000); + var res = processor.processHtmlPage(new EdgeRawPageContents(new EdgeUrl("http://www.example.com/"), new EdgeUrl("http://www.example.com/"), doc.html(), null, "", true, LocalDateTime.now().toString()), + doc); + + + System.out.println("Q:" + res.metadata.quality()); + System.out.println(100*Math.exp(res.metadata.quality())); + System.out.println(res.metadata.rawLength + ", " + res.metadata.textBodyLength); + + System.out.println(res.metadata.totalWords + ", " + res.metadata.textDistinctWords / res.metadata.totalWords); + for (var words : res.words.values()) { + logger.info("{}: {}", words.block, words.getWords()); + } + } + } + + @Test @Disabled + void processHtmlPage() throws IOException, URISyntaxException { + var doc = Jsoup.parse(new URL("https://aysia.blondeninna.com/"), 5000); + var res = processor.processHtmlPage(new EdgeRawPageContents(new EdgeUrl("http://www.example.com/"), new EdgeUrl("http://www.example.com/"), doc.data(), null, "", true, LocalDateTime.now().toString()), + doc); + System.out.println(res); + System.out.println("--"); + System.out.println(res.metadata.title); + System.out.println("--"); + System.out.println(res.metadata.description); + System.out.println(res.metadata.textDistinctWords); + + System.out.println(res.metadata.smutCoefficient); + } + + @Test @Disabled + void processHtmlPage3() throws IOException, URISyntaxException { + var doc = Jsoup.parse(new URL("http://thelagniappechateau.com/wwwboard/720p/starcraft-2-pc-iso-download/"), 5000); + var res = processor.processHtmlPage(new EdgeRawPageContents(new EdgeUrl("http://www.example.com/"), new EdgeUrl("http://www.example.com/"), doc.data(), null, "", true, LocalDateTime.now().toString()), + doc); + System.out.println(res); + System.out.println("--"); + System.out.println(res.metadata.title); + System.out.println("--"); + System.out.println(res.metadata.description); + System.out.println(res.metadata.textDistinctWords); + + System.out.println(res.metadata.smutCoefficient); + } + + @Test @Disabled + void processHtmlPage2() throws IOException { + + var doc = Jsoup.parse(new String(Files.readAllBytes(Path.of("/home/vlofgren/monadnock.html")))); + doc.getElementsByTag("a").forEach(System.out::println); + + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlTagCleanerTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlTagCleanerTest.java new file mode 100644 index 00000000..0ad93a91 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/domain/processor/HtmlTagCleanerTest.java @@ -0,0 +1,27 @@ +package nu.marginalia.wmsa.edge.crawler.domain.processor; + +import org.jsoup.Jsoup; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +class HtmlTagCleanerTest { + + HtmlTagCleaner tagCleaner = new HtmlTagCleaner(); + + public String cleanTag(String text) { + var doc = Jsoup.parse(text); + tagCleaner.clean(doc); + return doc.text(); + } + + @Test + public void testBriefCodeTag() { + assertEquals("hello", cleanTag("hello")); + assertEquals("System out println", cleanTag("System.out.println")); + assertEquals("hello", cleanTag("hello()")); + assertEquals("hello", cleanTag("<hello>")); + assertEquals("hello", cleanTag("hello(p,q)")); + assertEquals("hello", cleanTag("hello(p,q);")); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/fetcher/HttpFetcherTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/fetcher/HttpFetcherTest.java new file mode 100644 index 00000000..0fb68535 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/fetcher/HttpFetcherTest.java @@ -0,0 +1,79 @@ +package nu.marginalia.wmsa.edge.crawler.fetcher; + +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.net.URISyntaxException; + +import static org.junit.Assert.assertTrue; + +class HttpFetcherTest { + + @SneakyThrows + @Test + void testUrlPattern() { + var fetcher = new HttpFetcher("nu.marginalia.edge-crawler"); + + Assertions.assertFalse(fetcher.isUrlLikeBinary(new EdgeUrl("https://marginalia.nu/log.txt"))); + Assertions.assertTrue(fetcher.isUrlLikeBinary(new EdgeUrl("https://marginalia.nu/log.bin"))); + Assertions.assertTrue(fetcher.isUrlLikeBinary(new EdgeUrl("https://marginalia.nu/log.tar.gz"))); + Assertions.assertFalse(fetcher.isUrlLikeBinary(new EdgeUrl("https://marginalia.nu/log.htm"))); + Assertions.assertFalse(fetcher.isUrlLikeBinary(new EdgeUrl("https://marginalia.nu/log.html"))); + Assertions.assertFalse(fetcher.isUrlLikeBinary(new EdgeUrl("https://marginalia.nu/log"))); + Assertions.assertFalse(fetcher.isUrlLikeBinary(new EdgeUrl("https://marginalia.nu/log.php?id=1"))); + } + + @Test + void fetchUTF8() throws URISyntaxException { + var fetcher = new HttpFetcher("nu.marginalia.edge-crawler"); + var str = fetcher.fetchContent(new EdgeUrl("https://www.marginalia.nu")); + System.out.println(str.contentType); + System.out.println(str.fetchTimestamp); + System.out.println(str.data.substring(0, 1000)); + } + + @Test + void fetchText() throws URISyntaxException { + var fetcher = new HttpFetcher("nu.marginalia.edge-crawler"); + var str = fetcher.fetchContent(new EdgeUrl("https://www.marginalia.nu/robots.txt")); + System.out.println(str); + } + + @Test + void resolveRedirect() throws URISyntaxException { + var fetcher = new HttpRedirectResolver("nu.marginalia.edge-crawler"); + var str = fetcher.probe(new EdgeUrl("https://www.marginalia.nu/robots.txt")); + System.out.println(str); + } + + @Test + void resolveRedirectRitEdu() throws URISyntaxException { + var fetcher = new HttpRedirectResolver("nu.marginalia.edge-crawler"); + var str = fetcher.probe(new EdgeUrl("http://www.rit.edu/cla/philosophy/Suits.html")).blockingFirst(); + System.out.println(str); + } + + @Test + void resolveRedirect2() throws URISyntaxException { + var fetcher = new HttpRedirectResolver("nu.marginalia.edge-crawler"); + var str = fetcher.probe(new EdgeUrl("https://www.marginalia.nu/robots.txt")).blockingFirst(); + System.out.println(str); + } + + @Test + void resolveRedirect3() throws URISyntaxException { + var fetcher = new HttpRedirectResolver("nu.marginalia.edge-crawler"); + var str = fetcher.probe(new EdgeUrl("https://www.marginalia.nu/robots.txt")); + System.out.println(str); + } + + + @Test + void resolveRedirect4() throws URISyntaxException { + var fetcher = new HttpRedirectResolver("nu.marginalia.edge-crawler"); + var str = fetcher.probe(new EdgeUrl("https://www.marginalia.nu/robots.txt")); + System.out.println(str); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/worker/IpBlockListTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/worker/IpBlockListTest.java new file mode 100644 index 00000000..1b874101 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/worker/IpBlockListTest.java @@ -0,0 +1,35 @@ +package nu.marginalia.wmsa.edge.crawler.worker; + +import com.opencsv.exceptions.CsvValidationException; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.net.InetAddress; + +class IpBlockListTest { + + @Test + void getCountry() throws IOException, CsvValidationException { + var blocklist = new GeoIpBlocklist(); + + String country = blocklist.getCountry(InetAddress.getByName("federali.st")); + country = blocklist.getCountry(InetAddress.getByName("hugo.md")); + System.out.println(country); + + country = blocklist.getCountry(InetAddress.getByName("hugo.md")); + System.out.println(country); + } + + @Test + void isAllowed() throws CsvValidationException, IOException { + var blocklist = new IpBlockList(new GeoIpBlocklist()); + +// Assertions.assertFalse(blocklist.isAllowed(new EdgeDomain("localhost"))); +// Assertions.assertFalse(blocklist.isAllowed(new EdgeDomain("www.cloudflare.com"))); +// Assertions.assertTrue(blocklist.isAllowed(new EdgeDomain("https://marginalia.nu"))); + Assertions.assertTrue(blocklist.isAllowed(new EdgeDomain("federali.st"))); + Assertions.assertTrue(blocklist.isAllowed(new EdgeDomain("hugo.md"))); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/worker/UrlBlocklistTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/worker/UrlBlocklistTest.java new file mode 100644 index 00000000..7433d200 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawler/worker/UrlBlocklistTest.java @@ -0,0 +1,22 @@ +package nu.marginalia.wmsa.edge.crawler.worker; + +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.junit.jupiter.api.Test; + +import java.net.URISyntaxException; + +import static org.junit.jupiter.api.Assertions.*; + +class UrlBlocklistTest { + + @Test + void isUrlBlocked() throws URISyntaxException { + UrlBlocklist blocklist = new UrlBlocklist(); + assertTrue(blocklist.isUrlBlocked(new EdgeUrl("https://memex.marginalia.nu/ghc/ghc/blob/1b1067d14b656bbbfa7c47f156ec2700c9751549/compiler/main/UpdateCafInfos.hs"))); + assertTrue(blocklist.isUrlBlocked(new EdgeUrl("https://memex.marginalia.nu//gn/+/d62642c920e6a0d1756316d225a90fd6faa9e21e"))); + assertTrue(blocklist.isUrlBlocked(new EdgeUrl("http://yelenasimone.com/pdf/download-a-course-in-algebra.html"))); + assertFalse(blocklist.isUrlBlocked(new EdgeUrl("http://yelenasimone.com/nope/x-a-course-in-algebra.html"))); + assertTrue(blocklist.isUrlBlocked(new EdgeUrl("http://yelenasimone.com/_module/slide/pqPan/library/american-sour-beer-innovative-techniques-for-mixed-fermentations/"))); + assertTrue(blocklist.isUrlBlocked(new EdgeUrl("http://w-m-p.de/images/book/download-firstborn-starcraft-dark-templar-book-1.php"))); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawling/CrawlPlanLoaderTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawling/CrawlPlanLoaderTest.java new file mode 100644 index 00000000..40b77484 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawling/CrawlPlanLoaderTest.java @@ -0,0 +1,50 @@ +package nu.marginalia.wmsa.edge.crawling; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.*; + +class CrawlPlanLoaderTest { + + Path tempFile; + + @BeforeEach + public void setUp() throws IOException { + tempFile = Files.createTempFile(getClass().getSimpleName(), ".yaml"); + } + @AfterEach + public void tearDown() throws IOException { + Files.delete(tempFile); + } + + @Test + void load() throws IOException { + Files.writeString(tempFile, """ + jobSpec: "job.spec" + crawl: + dir: "/foo" + logName: "foo.log" + process: + dir: "/bar" + logName: "bar.log" + """); + var loader = new CrawlPlanLoader(); + var ret = loader.load(tempFile); + + assertEquals(Path.of("job.spec"), ret.getJobSpec()); + + assertEquals(Path.of("/foo"), ret.crawl.getDir()); + assertEquals(Path.of("/foo/foo.log"), ret.crawl.getLogFile()); + + assertEquals(Path.of("/bar"), ret.process.getDir()); + assertEquals(Path.of("/bar/bar.log"), ret.process.getLogFile()); + + System.out.println(ret); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawling/WorkLogTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawling/WorkLogTest.java new file mode 100644 index 00000000..bf5a6bdb --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawling/WorkLogTest.java @@ -0,0 +1,54 @@ +package nu.marginalia.wmsa.edge.crawling; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.*; + +class WorkLogTest { + Path outFile; + @BeforeEach + public void setUp() throws IOException { + outFile = Files.createTempFile(getClass().getSimpleName(), ".log"); + } + @AfterEach + public void tearDown() throws IOException { + Files.delete(outFile); + } + + @Test + public void testLog() throws IOException { + var log = new WorkLog(outFile); + log.setJobToFinished("A", "a.txt",1); + log.setJobToFinished("B", "b.txt",2); + log.setJobToFinished("C", "c.txt",3); + assertTrue(log.isJobFinished("A")); + assertTrue(log.isJobFinished("B")); + assertTrue(log.isJobFinished("C")); + assertFalse(log.isJobFinished("E")); + } + + @Test + public void testLogResume() throws Exception { + WorkLog log = new WorkLog(outFile); + log.setJobToFinished("A", "a.txt",1); + log.setJobToFinished("B", "b.txt",2); + log.setJobToFinished("C", "c.txt",3); + log.close(); + log = new WorkLog(outFile); + log.setJobToFinished("E", "e.txt",4); + assertTrue(log.isJobFinished("A")); + assertTrue(log.isJobFinished("B")); + assertTrue(log.isJobFinished("C")); + assertTrue(log.isJobFinished("E")); + log.close(); + + Files.readAllLines(outFile).forEach(System.out::println); + } + +} diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/data/EdgeDataStoreDaoTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/data/EdgeDataStoreDaoTest.java new file mode 100644 index 00000000..b2dff60a --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/data/EdgeDataStoreDaoTest.java @@ -0,0 +1,213 @@ +package nu.marginalia.wmsa.edge.data; + +import com.zaxxer.hikari.HikariDataSource; +import io.reactivex.rxjava3.functions.Consumer; +import lombok.SneakyThrows; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDaoImpl; +import nu.marginalia.wmsa.edge.data.dao.task.*; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState; +import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainLink; +import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlState; +import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlVisit; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; +import org.junit.jupiter.api.parallel.ResourceAccessMode; +import org.junit.jupiter.api.parallel.ResourceLock; + +import java.net.URISyntaxException; +import java.sql.ResultSet; +import java.sql.SQLException; + +import static nu.marginalia.util.TestUtil.evalScript; +import static nu.marginalia.util.TestUtil.getConnection; +import static org.junit.jupiter.api.Assertions.*; + +@ResourceLock(value = "mariadb", mode = ResourceAccessMode.READ_WRITE) +@Execution(ExecutionMode.SAME_THREAD) +@Tag("db") +class EdgeDataStoreDaoTest { + HikariDataSource dataSource; + private EdgeDataStoreTaskDaoImpl taskDao; + + + @SneakyThrows + public static HikariDataSource provideConnection() { + var conn = getConnection(); + + evalScript(conn, "sql/edge-crawler-cache.sql"); + + return conn; + } + + + @SneakyThrows + void query(String query, Consumer resultConsumer) { + try (var conn = dataSource.getConnection(); + var stmt = conn.createStatement() + ) { + resultConsumer.accept(stmt.executeQuery(query)); + + } catch (Throwable throwables) { + Assertions.fail(throwables); + } + } + @SneakyThrows + void update(String sql) { + try (var conn = dataSource.getConnection(); + var stmt = conn.createStatement() + ) { + stmt.executeUpdate(sql); + conn.commit(); + + } catch (Throwable throwables) { + Assertions.fail(throwables); + + } + } + + @SneakyThrows + @AfterEach + public void tearDownDb() { + dataSource.close(); + } + + + @SneakyThrows + @BeforeEach + public void setUpDb() { + dataSource = provideConnection(); + try (var connection = dataSource.getConnection()) { + + try (var stmt = connection.createStatement()) { + stmt.execute("DELETE FROM EC_URL"); + stmt.execute("DELETE FROM EC_DOMAIN_LINK"); + stmt.execute("DELETE FROM EC_DOMAIN"); + stmt.execute("DELETE FROM EC_URL_DETAILS"); + } + connection.commit(); + } + var ongoingJobs = new EdgeDataStoreTaskOngoingJobs(); + Initialization init = new Initialization(); + taskDao = new EdgeDataStoreTaskDaoImpl(dataSource, + new EdgeDomainBlacklistImpl(dataSource), + new EdgeDataStoreTaskTuner(dataSource), + ongoingJobs, + new EdgeFinishTasksQueue(dataSource, ongoingJobs), + init); + } + + @SneakyThrows + @Test + public void test() { + try (var connection = dataSource.getConnection()) { + var ds = new EdgeDataStoreDaoImpl(dataSource); + assertFalse(ds.isBlacklisted(new EdgeDomain("https://www.marginalia.nu"))); + } + } + + @Test + void putLink() throws SQLException { + try (var connection = dataSource.getConnection()) { + var ds = new EdgeDataStoreDaoImpl(dataSource); + ds.putLink( + false, new EdgeDomainLink(new EdgeDomain("https://www.marginalia.nu"), + new EdgeDomain("https://www.marginalia.nu") + )); + var res = connection.createStatement().executeQuery("SELECT * FROM EC_DOMAIN_LINK"); + res.next(); + assertEquals(res.getString(1), res.getString(2)); + } + } + + @SneakyThrows + @Test + void putUrl() { + var ds = new EdgeDataStoreDaoImpl(dataSource); + ds.putUrl(-2, new EdgeUrl("https://www.marginalia.nu/")); + ds.putUrl(-2, new EdgeUrl("https://www.marginalia.nu/robots.txt")); + ds.putUrl(-2, new EdgeUrl("https://www.marginalia.nu/sitemap.xml")); + ds.putUrl(-2, new EdgeUrl("https://marginalia.nu/")); + ds.putUrl(-2, new EdgeUrl("https://marginalia.nu/robots.txt")); + ds.putUrl(-2, new EdgeUrl("https://marginalia.nu/sitemap.xml")); + + taskDao.getIndexTask(0, 100).urls.forEach(System.out::println); + taskDao.finishIndexTask(new EdgeDomain("https://www.marginalia.nu/"), 0.5, EdgeDomainIndexingState.ACTIVE); + System.out.println("-"); + taskDao.getIndexTask(0, 100).urls.forEach(System.out::println); + } + + + @SneakyThrows + @Test + void putUrlVisit() { + var ds = new EdgeDataStoreDaoImpl(dataSource); + + var url = new EdgeUrl("https://www.marginalia.nu/"); + ds.putUrl(-2, url); + ds.putUrlVisited(new EdgeUrlVisit(url, 255, -2., "Bob's Website", "A homepage", "", "test", 0,0, 0, EdgeUrlState.OK)); + var deets = ds.getUrlDetails(ds.getUrlId(url)); + assertEquals(-2., deets.urlQuality); + assertEquals("Bob's Website", deets.title); + assertEquals("A homepage", deets.description); + System.out.println(deets); + } + + @Test + void getDomainId() throws URISyntaxException { + var ds = new EdgeDataStoreDaoImpl(dataSource); + var domain = new EdgeDomain("www.marginalia.nu"); + var url = new EdgeUrl("https://www.marginalia.nu/"); + + ds.putUrl(-2, url); + var id = ds.getDomainId(domain); + assertEquals(domain, ds.getDomain(id)); + } + + @Test + public void setDomainAlias() throws URISyntaxException { + var ds = new EdgeDataStoreDaoImpl(dataSource); + + ds.putUrl(1.0, new EdgeUrl("https://marginalia.nu/")); + + ds.putDomainAlias(new EdgeDomain("marginalia.nu"), new EdgeDomain("www.marginalia.nu")); + + query("SELECT COUNT(*) FROM EC_DOMAIN", res -> { + assertTrue(res.next()); + assertEquals(2, res.getInt(1)); + }); + + query("SELECT COUNT(DISTINCT(QUALITY)) FROM EC_DOMAIN", res -> { + assertTrue(res.next()); + assertEquals(1, res.getInt(1)); + }); + + query("SELECT URL_PART, DOMAIN_ALIAS FROM EC_DOMAIN", res -> { + + while (res.next()) { + System.out.println(res.getString(1) + ":" + res.getString(2)); + switch (res.getString(1)) { + case "https://marginalia.nu": + assertNotNull(res.getString(2)); + break; + case "https://www.marginalia.nu": + assertNull(res.getString(2)); + break; + } + } + }); + } + + @Test + void getUrlId() throws URISyntaxException { + var ds = new EdgeDataStoreDaoImpl(dataSource); + var url = new EdgeUrl("https://www.marginalia.nu/"); + + ds.putUrl(-2, url); + var id = ds.getUrlId(url); + assertEquals(url, ds.getUrl(id)); + } + +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/DictionaryWriterTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/DictionaryWriterTest.java new file mode 100644 index 00000000..cf497193 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/DictionaryWriterTest.java @@ -0,0 +1,213 @@ +package nu.marginalia.wmsa.edge.index.service; + +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.index.service.dictionary.DictionaryReader; +import nu.marginalia.wmsa.edge.index.service.dictionary.DictionaryWriter; +import nu.marginalia.wmsa.edge.index.service.index.SearchIndexConverter; +import nu.marginalia.wmsa.edge.index.service.query.SearchIndexPartitioner; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import java.io.*; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; + +class DictionaryWriterTest { + /* + @Test @Disabled + + public void analyze2() throws IOException { + System.out.println("Loading dictionary"); + var dr = new DictionaryReader(null, new File("/home/vlofgren/dictionary.dat")); + System.out.println("Loading indices"); + var reader = new SearchIndexReader(new SearchIndex("test", Path.of("/tmp"), + new File("/tmp/urls-0"), + new File("/tmp/words-0")), + new SearchIndex("test", Path.of("/tmp"), + new File("/tmp/urls-24"), + new File("/tmp/words-24"))); + System.out.println("Gogo"); + long hitsTotal = 0L; + try (var wr = new PrintWriter(new FileOutputStream("/home/vlofgren/words-count"))) { + hitsTotal = dr.stream().mapToLong(w -> { + long hits = reader.numHits(dr.get(w)); + wr.printf("%08d %s\n", hits, w); + return hits; + }).sum(); + } + System.out.println(hitsTotal); + } + */ + @Test @Disabled + public void convert() { + new SearchIndexConverter(IndexBlock.Title, 0, Path.of("/tmp"), + new File("/home/vlofgren/page-index-0.dat"), + new File("/tmp/words-0"), + new File("/tmp/urls-0"), + new SearchIndexPartitioner(null), + val -> false); + } + @SneakyThrows + @Test + @Disabled + void test() { + try (var dict = new DictionaryWriter(Path.of("/home/vlofgren/Code/data/dictionary.dat").toFile(), 1L<<16, false)) { + wait(); + } + } + + + @SneakyThrows + @Test + void getFold() { + var path = Files.createTempFile("dict", ".tmp"); + try (var dict = new DictionaryWriter(path.toFile(), 1L<<16, false)) { + dict.get("hic"); + dict.get("hac"); + dict.commitToDisk(); + dict.get("quae"); + dict.get("quis"); + dict.get("quem1"); + dict.get("quem2"); + dict.get("quem3"); + dict.get("quem4"); + dict.get("quem5"); + dict.get("quem6"); + dict.get("quem7"); + dict.get("quem8"); + dict.get("quem9"); + dict.get("quem10"); + dict.get("cuis"); + dict.get("haec_hic"); + dict.get("hoc_hac_cuis"); + dict.commitToDisk(); + assertNotEquals(0, dict.get("hac")); + assertEquals(0, dict.get("hic")); + } + + try (var dict = new DictionaryWriter(path.toFile(), 1L<<16, false)) { + assertNotEquals(0, dict.get("hoc")); + assertEquals(0, dict.get("hic")); + } + + path.toFile().delete(); + } + + @SneakyThrows + @Test + void get() { + var path = Files.createTempFile("dict", ".tmp"); + try (var dict = new DictionaryWriter(path.toFile(), 1L<<16, false)) { + dict.get("hic"); + dict.get("hac"); + dict.get("haec"); + dict.get("hoc"); + dict.commitToDisk(); + dict.get("quae"); + dict.get("quis"); + dict.get("quem"); + dict.get("cuis"); + dict.commitToDisk(); + assertNotEquals(0, dict.get("hac")); + assertEquals(0, dict.get("hic")); + } + + try (var dict = new DictionaryWriter(path.toFile(), 1L<<16, false)) { + assertNotEquals(0, dict.get("hoc")); + assertEquals(0, dict.get("hic")); + } + + path.toFile().delete(); + } + + @SneakyThrows + @Test + void getDoubleWrite() { + var path = Files.createTempFile("dict", ".tmp"); + + try (var dict = new DictionaryWriter(path.toFile(), 1L<<16, false)) { + dict.commitToDisk(); + } + + try (var dict = new DictionaryWriter(path.toFile(), 1L<<16, false)) { + dict.get("hic"); + dict.get("hac"); + dict.get("haec"); + dict.get("hoc"); + dict.get("quae"); + dict.get("quis"); + dict.get("quem"); + dict.get("cuis"); + dict.commitToDisk(); + assertNotEquals(0, dict.get("hac")); + assertEquals(0, dict.get("hic")); + } + + var dict = new DictionaryReader(new DictionaryWriter(path.toFile(), 1L<<16, false)); + + assertNotEquals(0, dict.get("hoc")); + assertEquals(0, dict.get("hic")); + + path.toFile().delete(); + } + + @SneakyThrows + @Test + void getDoubleWrite2() { + var path = Files.createTempFile("dict", ".tmp"); + + try (var dict = new DictionaryWriter(path.toFile(), 1L<<16, false)) { + dict.get("hic"); + dict.get("hac"); + dict.get("haec"); + dict.get("hoc"); + dict.get("quae"); + dict.get("quis"); + dict.get("quem"); + dict.get("cuis"); + dict.commitToDisk(); + assertNotEquals(0, dict.get("hac")); + assertEquals(0, dict.get("hic")); + } + + try (var dict = new DictionaryWriter(path.toFile(), 1L<<16, false)) { + dict.get("fe"); + dict.get("fi"); + dict.get("fo"); + dict.get("fum"); + dict.commitToDisk(); + assertNotEquals(0, dict.get("hac")); + assertEquals(0, dict.get("hic")); + } + + try (var dict = new DictionaryWriter(path.toFile(), 1L<<16, false)) { + dict.get("bip"); + dict.get("bap"); + dict.commitToDisk(); + } + + + var dict = new DictionaryReader(new DictionaryWriter(path.toFile(), 1L<<16, false)); + + assertEquals(0, dict.get("hic")); + assertEquals(1, dict.get("hac")); + assertEquals(2, dict.get("haec")); + assertEquals(3, dict.get("hoc")); + assertEquals(4, dict.get("quae")); + assertEquals(5, dict.get("quis")); + assertEquals(6, dict.get("quem")); + assertEquals(7, dict.get("cuis")); + assertEquals(8, dict.get("fe")); + assertEquals(9, dict.get("fi")); + assertEquals(10, dict.get("fo")); + assertEquals(11, dict.get("fum")); + assertEquals(12, dict.get("bip")); + assertEquals(13, dict.get("bap")); + path.toFile().delete(); + } + +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/EdgeIndexClientTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/EdgeIndexClientTest.java new file mode 100644 index 00000000..bd3b194c --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/EdgeIndexClientTest.java @@ -0,0 +1,187 @@ +package nu.marginalia.wmsa.edge.index.service; + +import com.zaxxer.hikari.HikariDataSource; +import lombok.SneakyThrows; +import nu.marginalia.util.TestUtil; +import nu.marginalia.wmsa.client.exception.RemoteException; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.edge.index.EdgeIndexService; +import nu.marginalia.wmsa.edge.index.IndexServicesFactory; +import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.index.service.query.SearchIndexPartitioner; +import nu.marginalia.wmsa.edge.model.crawl.EdgePageWordSet; +import nu.marginalia.wmsa.edge.model.search.EdgeSearchSpecification; +import nu.marginalia.wmsa.edge.model.EdgeId; +import nu.marginalia.wmsa.edge.model.crawl.EdgePageWords; +import nu.marginalia.wmsa.edge.model.EdgeUrl; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; +import org.junit.jupiter.api.parallel.ResourceAccessMode; +import org.junit.jupiter.api.parallel.ResourceLock; +import spark.Spark; + +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import static nu.marginalia.util.TestUtil.getConnection; +import static nu.marginalia.wmsa.edge.index.EdgeIndexService.DYNAMIC_BUCKET_LENGTH; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +@ResourceLock(value = "mariadb", mode = ResourceAccessMode.READ_WRITE) +@Execution(ExecutionMode.SAME_THREAD) +@Tag("db") +public class EdgeIndexClientTest { + private static HikariDataSource dataSource; + private static EdgeIndexService service; + private static EdgeIndexClient client; + private static Path tempDir; + private static SearchIndexes indexes; + + @SneakyThrows + public static HikariDataSource provideConnection() { + return getConnection(); + } + + static int testPort = TestUtil.getPort(); + + @SneakyThrows + @BeforeAll + public static void setUpClass() { + Spark.port(testPort); + System.setProperty("service-name", "edge-index"); + + dataSource = provideConnection(); + dataSource.setKeepaliveTime(100); + dataSource.setIdleTimeout(100); + client = new EdgeIndexClient(); + client.setServiceRoute("127.0.0.1", testPort); + + tempDir = Files.createTempDirectory("EdgeIndexClientTest"); + + var servicesFactory = new IndexServicesFactory(tempDir,tempDir,tempDir,tempDir, + "writer-index", + "writer-dictionary", + "index-words-read", + "index-urls-read", + "index-words-write", + "index-urls-write", + 1L<<24, + id->false, + new SearchIndexPartitioner(null) + ); + + indexes = new SearchIndexes(servicesFactory, new SearchIndexPartitioner(null)); + service = new EdgeIndexService("127.0.0.1", + testPort, + new Initialization(), null, + indexes); + + Spark.awaitInitialization(); + } + + @Test + public void testMultiBucketHit() { + putWords(1, 1, -2, "fancy", "anagram", "dilbert", "whoah", "engram"); + putWords(2, 2, -5, "quibble", "angry", "whoah", "fancy"); + putWords(3, 3, -0.01, "strong", "manly", "muscles"); + indexes.repartition(); + indexes.preconvert(); + indexes.reindexAll(); + + var results = client.query(Context.internal(), EdgeSearchSpecification.justIncludes("fancy")).resultsList.get(IndexBlock.Title).get(0).results; + System.out.println(results); + List> flatResults = results.values().stream().flatMap(List::stream).map(rs -> rs.url).collect(Collectors.toList()); + + assertEquals(2, flatResults.size()); + assertTrue(flatResults.contains(new EdgeId(1))); + assertTrue(flatResults.contains(new EdgeId(2))); + } + + @Test + public void testLowHit() { + putWords(1, 4, 0, "elmoped"); + indexes.repartition(); + indexes.preconvert(); + indexes.reindexAll(); + var rsp = client.query(Context.internal(), EdgeSearchSpecification.justIncludes("elmoped")); + System.out.println(rsp); + assertEquals(4, rsp.resultsList.get(0).get(0).results.get(DYNAMIC_BUCKET_LENGTH).get(0).url.getId()); + } + + @Test + public void testHighHit() { + putWords(2, 5, -100, "trapphus"); + indexes.repartition(); + indexes.preconvert(); + indexes.reindexAll(); + var rsp = client.query(Context.internal(), EdgeSearchSpecification.justIncludes("trapphus")); + System.out.println(rsp); + assertEquals(5, rsp.resultsList.get(0).get(0).results.get(0).get(0).url.getId()); + } + + + @Test + public void testSearchDomain() { + putWords(8, 1, -2, "domain"); + putWords(8, 2, -5, "domain"); + putWords(10, 3, -0.01, "domain"); + putWords(11, 3, -0.01, "domain"); + putWords(12, 3, -0.01, "domain"); + indexes.repartition(); + indexes.preconvert(); + indexes.reindexAll(); + + var results = client.query(Context.internal(), EdgeSearchSpecification.justIncludes("fancy")).resultsList.get(IndexBlock.Title).get(0).results; + System.out.println(results); + List> flatResults = results.values().stream().flatMap(List::stream).map(rs -> rs.url).collect(Collectors.toList()); + + assertEquals(2, flatResults.size()); + assertTrue(flatResults.contains(new EdgeId(1))); + assertTrue(flatResults.contains(new EdgeId(2))); + } + + @Test + public void miss() { + indexes.repartition(); + indexes.preconvert(); + indexes.reindexAll(); + + try { + client.query(Context.internal(), EdgeSearchSpecification.justIncludes("skumtomte")); + Assertions.fail(); + } + catch (RemoteException ex) { + + } + } + + void putWords(int didx, int idx, double quality, String... words) { + EdgePageWords epw = new EdgePageWords(IndexBlock.Words); + epw.addAll(Arrays.asList(words)); + client.putWords(Context.internal(), new EdgeId<>(didx), new EdgeId<>(idx), quality, + new EdgePageWordSet(epw), 0).blockingSubscribe(); + } + + @AfterAll + public static void tearDownClass() { + for (File f : tempDir.toFile().listFiles()) { + if (f.isDirectory()) { + for (File f2 : f.listFiles()) { + f2.delete(); + } + } + f.delete(); + } + + tempDir.toFile().delete(); + } + +} diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/EdgeSearchTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/EdgeSearchTest.java new file mode 100644 index 00000000..216bc1f3 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/EdgeSearchTest.java @@ -0,0 +1,524 @@ +package nu.marginalia.wmsa.edge.index.service; + +import com.opencsv.exceptions.CsvValidationException; +import com.zaxxer.hikari.HikariDataSource; +import lombok.SneakyThrows; +import nu.marginalia.util.TestUtil; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.data_store.DataStoreService; +import nu.marginalia.wmsa.data_store.EdgeDataStoreService; +import nu.marginalia.wmsa.data_store.FileRepository; +import nu.marginalia.wmsa.data_store.client.DataStoreClient; +import nu.marginalia.wmsa.edge.archive.client.ArchiveClient; +import nu.marginalia.wmsa.edge.assistant.dict.DictionaryService; +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.assistant.dict.SpellChecker; +import nu.marginalia.wmsa.edge.assistant.eval.MathParser; +import nu.marginalia.wmsa.edge.assistant.eval.Units; +import nu.marginalia.wmsa.edge.assistant.EdgeAssistantService; +import nu.marginalia.wmsa.edge.assistant.client.AssistantClient; +import nu.marginalia.wmsa.edge.assistant.screenshot.ScreenshotService; +import nu.marginalia.wmsa.edge.crawler.domain.DomainCrawler; +import nu.marginalia.wmsa.edge.crawler.domain.DomainCrawlerRobotsTxt; +import nu.marginalia.wmsa.edge.crawler.domain.LinkParser; +import nu.marginalia.wmsa.edge.crawler.domain.language.LanguageFilter; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.processor.HtmlFeature; +import nu.marginalia.wmsa.edge.crawler.domain.processor.HtmlProcessor; +import nu.marginalia.wmsa.edge.crawler.fetcher.HttpFetcher; +import nu.marginalia.wmsa.edge.crawler.worker.GeoIpBlocklist; +import nu.marginalia.wmsa.edge.crawler.worker.IpBlockList; +import nu.marginalia.wmsa.edge.data.dao.*; +import nu.marginalia.wmsa.edge.data.dao.task.*; +import nu.marginalia.wmsa.edge.index.EdgeIndexService; +import nu.marginalia.wmsa.edge.index.IndexServicesFactory; +import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient; +import nu.marginalia.wmsa.edge.index.service.query.SearchIndexPartitioner; +import nu.marginalia.util.ParallelPipe; +import nu.marginalia.wmsa.edge.integration.model.BasicDocumentData; +import nu.marginalia.wmsa.edge.integration.stackoverflow.StackOverflowPostProcessor; +import nu.marginalia.wmsa.edge.integration.stackoverflow.StackOverflowPostsReader; +import nu.marginalia.wmsa.edge.integration.stackoverflow.model.StackOverflowPost; +import nu.marginalia.wmsa.edge.integration.wikipedia.WikipediaProcessor; +import nu.marginalia.wmsa.edge.integration.wikipedia.WikipediaReader; +import nu.marginalia.wmsa.edge.integration.wikipedia.model.WikipediaArticle; +import nu.marginalia.wmsa.edge.model.*; +import nu.marginalia.wmsa.edge.model.crawl.*; +import nu.marginalia.wmsa.edge.search.EdgeSearchOperator; +import nu.marginalia.wmsa.edge.search.EdgeSearchService; +import nu.marginalia.wmsa.edge.search.UnitConversion; +import nu.marginalia.wmsa.edge.search.query.*; +import nu.marginalia.wmsa.edge.search.results.SearchResultDecorator; +import nu.marginalia.wmsa.edge.search.results.SearchResultValuator; +import nu.marginalia.wmsa.renderer.mustache.RendererFactory; +import org.jsoup.Jsoup; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; +import org.junit.jupiter.api.parallel.ResourceAccessMode; +import org.junit.jupiter.api.parallel.ResourceLock; +import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Spark; + +import java.io.IOException; +import java.net.SocketTimeoutException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.time.LocalDateTime; +import java.util.*; + +import static nu.marginalia.util.TestUtil.evalScript; +import static nu.marginalia.util.TestUtil.getConnection; + +@ResourceLock(value = "mariadb", mode = ResourceAccessMode.READ_WRITE) +@Execution(ExecutionMode.SAME_THREAD) +@Tag("db") +public class EdgeSearchTest { + private static HikariDataSource dataSource; + private static EdgeIndexService indexService; + private static EdgeIndexClient indexClient; + private static Path tempDir; + private static EdgeDataStoreDao edgeStoreDao; + private static SentenceExtractor sentenceExtractor; + private static DocumentKeywordExtractor documentKeywordExtractor; + private static StackOverflowPostProcessor stackOverflowPostProcessor; + private static WikipediaProcessor wikipediaProcessor; + private static SearchIndexes indexes; + + Logger logger = LoggerFactory.getLogger(getClass()); + @SneakyThrows + public static HikariDataSource provideConnection() { + return getConnection(); + } + + static int testPort = TestUtil.getPort(); + + static Initialization init = new Initialization(); + private QueryParser parser; + private static NGramDict dict; + private static LanguageModels lm = new LanguageModels( + Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), + Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo3.bin"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), + Path.of("/home/vlofgren/Work/ngrams/English.RDR"), + Path.of("/home/vlofgren/Work/ngrams/English.DICT"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") + ); + + + @SneakyThrows + @BeforeAll + public static void setUpClass() { + Spark.port(testPort); + System.setProperty("service-name", "edge-index"); + System.setProperty("unit-test", "TRUE"); + Spark.staticFileLocation("/static/edge/"); + + dict = new NGramDict(lm); + + dataSource = provideConnection(); + dataSource.setKeepaliveTime(100); + dataSource.setIdleTimeout(100); + + indexClient = new EdgeIndexClient(); + indexClient.setServiceRoute("127.0.0.1", testPort); + + AssistantClient assistantClient = new AssistantClient(); + assistantClient.setServiceRoute("127.0.0.1", testPort); + + var dataStoreClient = new DataStoreClient(); + dataStoreClient.setServiceRoute("127.0.0.1", testPort); + tempDir = Files.createTempDirectory("EdgeIndexClientTest"); + + var servicesFactory = new IndexServicesFactory(tempDir,tempDir,tempDir,tempDir, + "writer-index", + "writer-dictionary", + "index-words-read", + "index-urls-read", + "index-words-write", + "index-urls-write", + 1L<<24, + id->false, + new SearchIndexPartitioner(null) + ); + + servicesFactory.getDictionaryWriter().noCommit = true; + + edgeStoreDao = new EdgeDataStoreDaoImpl(dataSource); + + sentenceExtractor = new SentenceExtractor(lm); + documentKeywordExtractor = new DocumentKeywordExtractor(new NGramDict(lm)); + + stackOverflowPostProcessor = new StackOverflowPostProcessor(sentenceExtractor, documentKeywordExtractor); + wikipediaProcessor = new WikipediaProcessor(sentenceExtractor, documentKeywordExtractor); + + var valuator = new SearchResultValuator(dict); + EdgeSearchService searchService = new EdgeSearchService("127.0.0.1", testPort, + edgeStoreDao, indexClient, + new RendererFactory(), new Initialization(), null, + dataStoreClient, assistantClient, new UnitConversion(assistantClient), + new EdgeSearchOperator(assistantClient, edgeStoreDao, indexClient, new QueryFactory(lm, dict, new EnglishDictionary(dict)), new SearchResultDecorator(edgeStoreDao, valuator), valuator), + new EdgeDomainBlacklistImpl(dataSource), new ScreenshotService(edgeStoreDao)); + + EdgeAssistantService assistantService = new EdgeAssistantService("127.0.0.1", testPort, Initialization.already(), null, + new DictionaryService(dataSource, new SpellChecker()), new MathParser(), + new Units(new MathParser()), null, null, + new ScreenshotService(edgeStoreDao), null); + + indexes = new SearchIndexes(servicesFactory, new SearchIndexPartitioner(null)); + + indexService = new EdgeIndexService("127.0.0.1", + testPort, + init, + null, + indexes); + + new DataStoreService("127.0.0.1", testPort, new FileRepository(), dataSource, new EdgeDataStoreService(new EdgeDataStoreDaoImpl(dataSource)), Initialization.already(), null); + + + Spark.awaitInitialization(); + } + + + @SneakyThrows + @BeforeEach + public void clearDb() { + evalScript(dataSource, "sql/data-store-init.sql"); + evalScript(dataSource, "sql/edge-crawler-cache.sql"); + evalScript(dataSource, "sql/reference-data.sql"); + + try (var connection = dataSource.getConnection()) { + + try (var stmt = connection.createStatement()) { + Assertions.assertTrue(stmt.executeUpdate("DELETE FROM EC_URL") >= 0); + Assertions.assertTrue(stmt.executeUpdate("DELETE FROM EC_DOMAIN_LINK") >= 0); + Assertions.assertTrue(stmt.executeUpdate("DELETE FROM EC_DOMAIN") >= 0); + } + } + } + + @Test @Disabled + public void getUrls() throws IOException { + var doc = Jsoup.parse(new URL("https://search.marginalia.nu/search?query=putty%20ssh%20download"), 1000); + + doc.select(".teknisk a").stream().map(e -> e.attr("href")).forEach( + href -> { + try { + var path = Path.of("/home/vlofgren/Code/tmp-data/").resolve("url-"+href.hashCode()); + if (!Files.exists(path)) { + var doc2 =Jsoup.parse(new URL(href), 9000); + Files.writeString(path, doc2.outerHtml(), StandardOpenOption.CREATE_NEW); + } + + } catch (IOException e) { + e.printStackTrace(); + } + } + ); + } + + + + HtmlProcessor processor = new HtmlProcessor(new DocumentKeywordExtractor(new NGramDict(lm)),new SentenceExtractor(lm)); + + @SneakyThrows + @Test + public void justLoadUrls() { + var data = Path.of("/home/vlofgren/Code/tmp-data/"); + for (var file : Objects.requireNonNull(data.toFile().listFiles())) { + testNgram(file.toPath()); + } +// cralUrl(new EdgeUrl("https://memex.marginalia.nu/"), 5); +// loadFile(Path.of("/home/vlofgren/Work/tmp.html")); + } + + + @SneakyThrows + @Test + // @Disabled + public void runStackOverflow() { + var data = Path.of("/home/vlofgren/Code/tmp-data/"); + + + var pipe = new ParallelPipe("pipe", 32, 5, 2) { + @Override + public BasicDocumentData onProcess(StackOverflowPost stackOverflowPost) { + return stackOverflowPostProcessor.process(stackOverflowPost); + } + + @Override + public void onReceive(BasicDocumentData stackOverflowIndexData) { + loadStackOverflowPost(stackOverflowIndexData); + } + }; + + var reader = new StackOverflowPostsReader("/mnt/storage/downloads.new/stackexchange/sites/philosophy/Posts.xml", + new EdgeDomain("philosophy.stackexchange.com"), pipe::accept); + reader.join(); + + init.setReady(); + indexService.initialize(); + + while (!indexes.repartition()); + while (!indexes.preconvert()); + while (!indexes.reindexAll()); + + System.err.println("http://localhost:"+testPort + "/public/search?query=putty%20ssh%20download%20site:localhost"); + Thread.currentThread().join(); + } + + @SneakyThrows + @Test + // @Disabled + public void runWikipedia() { + var data = Path.of("/home/vlofgren/Code/tmp-data/"); + + var reader = new WikipediaReader("/home/vlofgren/Work/wikipedia_en_100_nopic_2021-06.zim", new EdgeDomain("encyclopedia.marginalia.nu"), + this::loadWikipediaPost); + reader.join(); + + init.setReady(); + indexService.initialize(); + + while (!indexes.repartition()); + while (!indexes.preconvert()); + while (!indexes.reindexAll()); + + System.err.println("http://localhost:"+testPort + "/public/search?query=putty%20ssh%20download%20site:localhost"); + Thread.currentThread().join(); + } + + final LinkParser lp = new LinkParser(); + + + private void loadStackOverflowPost(BasicDocumentData indexData) { + + var url = indexData.getUrl(); + + edgeStoreDao.putUrl(-2, url); + edgeStoreDao.putUrlVisited(new EdgeUrlVisit(url, indexData.hashCode, -2., + indexData.getTitle(), + indexData.getDescription() + , "", + EdgeHtmlStandard.HTML5.toString(), + 1 << HtmlFeature.JS.bit, + 1000, 1000, EdgeUrlState.OK)); + edgeStoreDao.putLink(false, indexData.domainLinks); + + putWords(edgeStoreDao.getDomainId(url.domain).getId(), + edgeStoreDao.getUrlId(url).getId(), + -2, + indexData.words); + } + + private void loadWikipediaPost(WikipediaArticle post) { + + var indexData = wikipediaProcessor.process(post); + + var url = indexData.getUrl(); + + edgeStoreDao.putUrl(-2, url); + edgeStoreDao.putUrlVisited(new EdgeUrlVisit(url, post.body.hashCode(), -2., + indexData.getTitle(), + indexData.getDescription() + , "", + EdgeHtmlStandard.HTML5.toString(), + 1 << HtmlFeature.JS.bit, + 1000, 1000, EdgeUrlState.OK)); + edgeStoreDao.putLink(false, indexData.domainLinks); + + putWords(edgeStoreDao.getDomainId(url.domain).getId(), + edgeStoreDao.getUrlId(url).getId(), + -2, + indexData.words); + } + + @SneakyThrows + @Test + // @Disabled + public void run() { + var data = Path.of("/home/vlofgren/Code/tmp-data/"); + for (var file : Objects.requireNonNull(data.toFile().listFiles())) { + loadFile(file.toPath()); + } + +// cralUrl(new EdgeUrl("https://search.marginalia.nu/"), 5); +// cralUrl(new EdgeUrl("https://memex.marginalia.nu/"), 5); + +// loadUrl("https://reddit.marginalia.nu", "/reddit/login.html"); + + var conn = getConnection(); + ArrayList ids = new ArrayList<>(); + try (var c = conn.getConnection()) { + var stmt = c.prepareStatement("SELECT ID FROM EC_DOMAIN"); + + var rsp = stmt.executeQuery(); + while (rsp.next()) { + ids.add(rsp.getInt(1)); + } + + for (int i = 0; i < ids.size(); i++) { + try (var s2 = c.prepareStatement("INSERT INTO EC_DOMAIN_NEIGHBORS(DOMAIN_ID, NEIGHBOR_ID, ADJ_IDX) VALUES (?,?,?)")) { + for (int j = 0; j < 15; j++) { + s2.setInt(1, ids.get(i)); + s2.setInt(2, ids.get((int)(ids.size()*Math.random()))); + s2.setInt(3, j); + s2.addBatch(); + } + s2.executeBatch(); + } + } + + } + + init.setReady(); + indexService.initialize(); + + while (!indexes.repartition()); + while (!indexes.preconvert()); + while (!indexes.reindexAll()); + + System.err.println("http://localhost:"+testPort + "/public/search?query=putty%20ssh%20download%20site:localhost"); + Thread.currentThread().join(); + } + + @SneakyThrows + private void loadUrl(String uri) { + try { + var doc = Jsoup.parse(new URL(uri), 5000); + var res = processor.processHtmlPage(new EdgeRawPageContents(new EdgeUrl(new URI(uri)), new EdgeUrl(new URI(uri)), "", null, "", true, + LocalDateTime.now().toString()), + doc); + var url = new EdgeUrl(uri); + edgeStoreDao.putUrl(-2, url); + edgeStoreDao.putUrlVisited(new EdgeUrlVisit(url, 5, -2., + res.metadata.title, + res.metadata.description, "", + res.metadata.htmlStandard.toString(), + res.metadata.features, + 0, 0, EdgeUrlState.OK)); + + + logger.info("LW: {}", res.linkWords); + putWords(edgeStoreDao.getDomainId(url.domain).getId(), + edgeStoreDao.getUrlId(url).getId(), + -2, + res.words); + } + catch (SocketTimeoutException ex) { + ex.printStackTrace(); + } + } + + private void cralUrl(EdgeUrl url, int pass) throws CsvValidationException, IOException { + var fetcher = new HttpFetcher("search.marginalia.nu"); + var ingress = new EdgeIndexTask(url.domain, pass, 100, 1.); + ingress.urls.add(url); + DomainCrawler dc = new DomainCrawler(fetcher, null, processor, Mockito.mock(ArchiveClient.class), new DomainCrawlerRobotsTxt(fetcher, "search.marginalia.nu") + , new LanguageFilter(), ingress , new IpBlockList(new GeoIpBlocklist())); + System.err.println("Crawling " + url); + var cr = dc.crawl(); + System.err.println("Crawled " + url); + cr.pageContents.values().forEach(res -> { + logger.info("Put URL {} {}%", res.url, 100*Math.exp(res.metadata.quality())); + edgeStoreDao.putUrl(res.metadata.quality(), res.url); + edgeStoreDao.putUrlVisited(new EdgeUrlVisit(res.url, res.hash, res.metadata.quality(), + res.metadata.title, + res.metadata.description, "", + res.metadata.htmlStandard.toString(), + res.metadata.features, res.metadata.textDistinctWords, res.metadata.totalWords, EdgeUrlState.OK)); + + putWords(edgeStoreDao.getDomainId(res.url.domain).getId(), + edgeStoreDao.getUrlId(res.url).getId(), + -2, + res.words); + }); + } + + private void testNgram(Path path) throws IOException, URISyntaxException { + var doc = Jsoup.parse(Files.readString(path)); + doc.getElementsByTag("a").remove(); + String text = doc.text(); + + var res = processor.processHtmlPage(new EdgeRawPageContents(new EdgeUrl("http://www.example.com/"),new EdgeUrl("http://www.example.com/"), "", null, "", true, + LocalDateTime.now().toString()), + doc); + if (null == res) { + return; + } + System.out.println(doc.getElementsByTag("title").text()); + System.out.println(res.metadata.description); + System.out.println("---"); + +// System.out.println(Optional.ofNullable(doc.getElementsByTag("h1")).map(Elements::first).map(Element::text).orElse("")); +// +// System.out.println(res.words.get(IndexBlock.Topic_Names)); +// System.out.println(res.words.get(IndexBlock.Title_Names)); +// System.out.println(res.words.get(IndexBlock.Body_Names)); +// System.out.println(res.words.get(IndexBlock.TextRank)); +// System.out.println(res.words.get(IndexBlock.Keywords)); +// System.out.println(res.words.get(IndexBlock.Names)); +// System.out.println(res.words.get(IndexBlock.Title)); +// System.out.println(res.words.get(IndexBlock.Topic)); +// System.out.println(res.words.get(IndexBlock.Body)); +// +// var multi = new HashSet<>(); +// var trs = new HashSet<>(res.words.get(IndexBlock.TextRank).words); +// multi.addAll(Sets.intersection(trs,new HashSet<>(res.words.get(IndexBlock.Names).words))); +// multi.addAll(Sets.intersection(trs,new HashSet<>(res.words.get(IndexBlock.Title).words))); +// multi.addAll(Sets.intersection(trs,new HashSet<>(res.words.get(IndexBlock.Topic).words))); +// var uniq = new HashSet<>(Sets.difference(trs, multi)); +// res.words.get(IndexBlock.Body_Names).words.forEach(multi::remove); +// res.words.get(IndexBlock.Topic_Names).words.forEach(multi::remove); +// res.words.get(IndexBlock.Title_Names).words.forEach(multi::remove); +// System.out.println(multi); +// System.out.println(uniq); +// System.out.println("--\n--\n"); + + } + + private void loadFile(Path path) throws IOException, URISyntaxException { + var doc = Jsoup.parse(Files.readString(path)); + var url = new EdgeUrl("http://" + Math.abs(path.hashCode()) + ".example.com/"); + var res = processor.processHtmlPage(new EdgeRawPageContents(url, url, "", null, (int)(Math.random()*255)+"."+(int)(Math.random()*255)+"."+(int)(Math.random()*255), Math.random() > 0.5, LocalDateTime.now().toString()), + doc); + if (null == res) { + System.err.println("*** did not insert " + path); + return; + } + edgeStoreDao.putUrl(-2, url); + edgeStoreDao.putUrlVisited(new EdgeUrlVisit(url, 5, -2., + res.metadata.title, + res.metadata.description, res.ipAddress, + res.metadata.htmlStandard.toString(), + res.metadata.features, + 0, 0, EdgeUrlState.OK)); + + logger.info("LW: {}", res.linkWords); + putWords(edgeStoreDao.getDomainId(url.domain).getId(), + edgeStoreDao.getUrlId(url).getId(), + -2, + res.words + ); + + } + void putWords(int didx, int idx, double quality, EdgePageWordSet wordsSet) { + indexClient.putWords(Context.internal(), new EdgeId<>(didx), new EdgeId<>(idx), quality, + wordsSet, 0).blockingSubscribe(); + } + + @AfterAll + public static void tearDownClass() { + nu.marginalia.util.test.TestUtil.clearTempDir(tempDir); + } + +} diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/EdgeSearchTestLocal.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/EdgeSearchTestLocal.java new file mode 100644 index 00000000..c3b605a9 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/EdgeSearchTestLocal.java @@ -0,0 +1,143 @@ +package nu.marginalia.wmsa.edge.index.service; + +import com.zaxxer.hikari.HikariDataSource; +import lombok.SneakyThrows; +import nu.marginalia.util.TestUtil; +import nu.marginalia.wmsa.configuration.ServiceDescriptor; +import nu.marginalia.wmsa.configuration.module.DatabaseModule; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.data_store.DataStoreService; +import nu.marginalia.wmsa.data_store.EdgeDataStoreService; +import nu.marginalia.wmsa.data_store.FileRepository; +import nu.marginalia.wmsa.data_store.client.DataStoreClient; +import nu.marginalia.wmsa.edge.assistant.EdgeAssistantService; +import nu.marginalia.wmsa.edge.assistant.client.AssistantClient; +import nu.marginalia.wmsa.edge.assistant.dict.DictionaryService; +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.assistant.dict.SpellChecker; +import nu.marginalia.wmsa.edge.assistant.eval.MathParser; +import nu.marginalia.wmsa.edge.assistant.eval.Units; +import nu.marginalia.wmsa.edge.assistant.screenshot.ScreenshotService; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao; +import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDaoImpl; +import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklistImpl; +import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient; +import nu.marginalia.wmsa.edge.search.EdgeSearchOperator; +import nu.marginalia.wmsa.edge.search.EdgeSearchService; +import nu.marginalia.wmsa.edge.search.UnitConversion; +import nu.marginalia.wmsa.edge.search.query.EnglishDictionary; +import nu.marginalia.wmsa.edge.search.query.QueryFactory; +import nu.marginalia.wmsa.edge.search.query.QueryParser; +import nu.marginalia.wmsa.edge.search.results.SearchResultDecorator; +import nu.marginalia.wmsa.edge.search.results.SearchResultValuator; +import nu.marginalia.wmsa.renderer.mustache.RendererFactory; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; +import org.junit.jupiter.api.parallel.ResourceAccessMode; +import org.junit.jupiter.api.parallel.ResourceLock; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Spark; + +import java.nio.file.Files; +import java.nio.file.Path; + +import static nu.marginalia.util.TestUtil.getConnection; + +@Tag("nobuild") +@ResourceLock(value = "mariadb", mode = ResourceAccessMode.READ_WRITE) +@Execution(ExecutionMode.SAME_THREAD) +public class EdgeSearchTestLocal { + private static HikariDataSource dataSource; + private static EdgeIndexClient indexClient; + private static Path tempDir; + private static EdgeDataStoreDao edgeStoreDao; + + Logger logger = LoggerFactory.getLogger(getClass()); + @SneakyThrows + public static HikariDataSource provideConnection() { + return getConnection(); + } + + static int testPort = TestUtil.getPort(); + + static Initialization init = new Initialization(); + private QueryParser parser; + private static NGramDict dict; + private static LanguageModels lm = new LanguageModels( + Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), + Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo3.bin"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), + Path.of("/home/vlofgren/Work/ngrams/English.RDR"), + Path.of("/home/vlofgren/Work/ngrams/English.DICT"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") + ); + + + @SneakyThrows + @BeforeAll + public static void setUpClass() { + Spark.port(testPort); + System.setProperty("service-name", "edge-index"); + System.setProperty("unit-test", "TRUE"); + Spark.staticFileLocation("/static/edge/"); + + dict = new NGramDict(lm); + + dataSource = new DatabaseModule().provideConnection(); + dataSource.setKeepaliveTime(100); + dataSource.setIdleTimeout(100); + + indexClient = new EdgeIndexClient(); + indexClient.setServiceRoute("127.0.0.1", ServiceDescriptor.EDGE_INDEX.port); + + AssistantClient assistantClient = new AssistantClient(); + assistantClient.setServiceRoute("127.0.0.1", testPort); + + var dataStoreClient = new DataStoreClient(); + dataStoreClient.setServiceRoute("127.0.0.1", testPort); + tempDir = Files.createTempDirectory("EdgeIndexClientTest"); + + edgeStoreDao = new EdgeDataStoreDaoImpl(dataSource); + + var valuator = new SearchResultValuator(dict); + EdgeSearchService searchService = new EdgeSearchService("127.0.0.1", testPort, + edgeStoreDao, indexClient, + new RendererFactory(), new Initialization(), null, + dataStoreClient, assistantClient, new UnitConversion(assistantClient), + new EdgeSearchOperator(assistantClient, edgeStoreDao, indexClient, new QueryFactory(lm, dict, new EnglishDictionary(dict)), new SearchResultDecorator(edgeStoreDao, valuator), valuator), + new EdgeDomainBlacklistImpl(dataSource), new ScreenshotService(edgeStoreDao)); + + EdgeAssistantService assistantService = new EdgeAssistantService("127.0.0.1", testPort, Initialization.already(), null, + new DictionaryService(dataSource, new SpellChecker()), new MathParser(), + new Units(new MathParser()), null, null, + new ScreenshotService(edgeStoreDao), null); + + new DataStoreService("127.0.0.1", testPort, new FileRepository(), dataSource, new EdgeDataStoreService(new EdgeDataStoreDaoImpl(dataSource)), Initialization.already(), null); + + + Spark.awaitInitialization(); + } + + + @SneakyThrows + @Test + // @Disabled + public void run() { + init.setReady(); + + System.err.println("http://localhost:"+testPort + "/public/search?query=putty%20ssh%20download%20site:localhost"); + Thread.currentThread().join(); + } + + @AfterAll + public static void tearDownClass() { + nu.marginalia.util.test.TestUtil.clearTempDir(tempDir); + } + +} diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/MultimapFileTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/MultimapFileTest.java new file mode 100644 index 00000000..003552b2 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/MultimapFileTest.java @@ -0,0 +1,138 @@ +package nu.marginalia.wmsa.edge.index.service; + +import lombok.SneakyThrows; +import nu.marginalia.util.multimap.MultimapFileLong; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class MultimapFileTest { + File tmp; + File tmp2; + + @BeforeEach @SneakyThrows + public void setUp() { + + tmp = Files.createTempFile("test", "test").toFile(); + tmp2 = Files.createTempFile("test", "test").toFile(); + + } + @AfterEach + public void tearDown() { + tmp.delete(); + tmp2.delete(); + } + + @SneakyThrows + @Test + void transfer() { + ByteBuffer buf = ByteBuffer.allocateDirect(77); + try (var source = MultimapFileLong.forOutput(tmp.toPath(), 1024); + var dest = new MultimapFileLong(tmp, FileChannel.MapMode.READ_WRITE, 1024, 8); + ) { + for (int i = 0; i < 1024; i++) { + source.put(i, i); + } + source.force(); + dest.transferFromFileChannel(new RandomAccessFile(tmp, "r").getChannel(), 11, 55, 100); + for (int i = 0; i < 45; i++) { + System.out.println("source=" + (11+i) + ", dest = " + dest.get(11+i)); + assertEquals(55+i, dest.get(11+i)); + } + } + } + + @SneakyThrows + @Test + void put() { + var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8, false); + for (int i = 0; i < 32; i++) { + file.put(i, i); + } + for (int i = 0; i < 32; i++) { + assertEquals(i, file.get(i)); + } + } + + @SneakyThrows + @Test + void read() { + var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8, false); + for (int i = 0; i < 32; i++) { + file.put(i, i); + } + + for (int i = 0; i < 32-6; i++) { + long[] vals = new long[6]; + file.read(vals, i); + for (int j = 0; j < 6; j++) { + assertEquals(i+j, vals[j]); + } + } + + } + + @Test + void write() throws IOException { + var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8, false); + + for (int i = 0; i < 32-6; i++) { + file.write(new long[] { 0,1,2,3,4,5}, i); + for (int j = 0; j < 6; j++) { + assertEquals(j, file.get(i+j)); + } + } + + } + + @Test + void sortInternal() throws IOException { + var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8, false); + var sorter = file.createSorter(Path.of("/tmp"), 16); + var searcher = file.createSearcher(); + for (int i = 0; i < 32; i++) { + file.put(i, 32-i); + } + + sorter.sort( 2, 14); + + for (int i = 2+1; i < 16; i++) { + assertTrue(file.get(i) > file.get(i-1)); + assertTrue(searcher.binarySearch(file.get(i), 2, 18)); + } + } + + @Test + void sortExternal() throws IOException { + var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8, false); + var sorter = file.createSorter(Path.of("/tmp"), 2); + var searcher = file.createSearcher(); + + for (int i = 0; i < 32; i++) { + file.put(i, 32-i); + } + + sorter.sort( 2, 14); + file.force(); + + for (int i = 2+1; i < 16; i++) { + assertTrue(file.get(i) > file.get(i-1)); + assertTrue(searcher.binarySearch(file.get(i), 2, 18)); + } + } + + @Test + void close() { + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/SearchIndexConverterTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/SearchIndexConverterTest.java new file mode 100644 index 00000000..f69ad27c --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/SearchIndexConverterTest.java @@ -0,0 +1,88 @@ +package nu.marginalia.wmsa.edge.index.service; + +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.index.service.index.SearchIndexConverter; +import nu.marginalia.wmsa.edge.index.service.query.SearchIndexPartitioner; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; + +class SearchIndexConverterTest { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + @Test @Disabled + public void test() throws IOException { + // File dictFile = new File("/home/vlofgren/dictionary.dat"); + File inFile = new File("/home/vlofgren/Work/converter/3/page-index.dat"); + + new SearchIndexConverter(IndexBlock.Title, 0, Path.of("/tmp"), inFile, + new File("/home/vlofgren/Work/converter/words.dat"), + new File("/home/vlofgren/Work/converter/urls.dat"), new SearchIndexPartitioner(null), val -> false); + + // sanityCheck(); + } + + @Test @Disabled + public void sanityCheck() throws IOException { + File inFile = new File("/home/vlofgren/write/6/page-index.dat"); + +// SearchIndexReader sir = new SearchIndexReader(new SearchIndex[]{ +// new SearchIndex("body", Path.of("/tmp"), +// new File("/home/vlofgren/data/urls.dat"), +// new File("/home/vlofgren/data/words.dat")), +// new SearchIndex("body", Path.of("/tmp"), +// new File("/home/vlofgren/data/urls.dat"), +// new File("/home/vlofgren/data/words.dat")) +// , +// new SearchIndex("body", Path.of("/tmp"), +// new File("/home/vlofgren/data/urls.dat"), +// new File("/home/vlofgren/data/words.dat")) +// , +// new SearchIndex("body", Path.of("/tmp"), +// new File("/home/vlofgren/data/urls.dat"), +// new File("/home/vlofgren/data/words.dat")) +// }); + +// getQuery(sir, new EdgeIndexSearchTerms(List.of(152, 106), Collections.emptyList())).stream().forEach(System.out::println); +// sir.findWord(152).also(106).stream().forEach(System.out::println); +// scanFile(inFile, (url, word) -> { +// //System.out.println(url + " " + word); +// if (!sir.findWord(word).stream().anyMatch(url::equals)) { +// logger.error("Can't find word {} in {}", word, url); +// } +// }); + + + } +/* + private SearchIndexReader.Query getQuery(SearchIndexReader indexReader, EdgeIndexSearchTerms searchTerms) { + var orderedIncludes = searchTerms.includes + .stream() + .sorted(Comparator.comparingLong(indexReader::numHits)) + .distinct() + .mapToInt(Integer::intValue) + .toArray(); + + logger.info("Includes: ({}); excludes: ({})", Arrays. + stream(orderedIncludes) + .mapToObj(String::valueOf) + .collect(Collectors.joining(",")), + searchTerms.excludes.stream().map(String::valueOf).collect(Collectors.joining(","))); + SearchIndexReader.Query query = indexReader.findWord(orderedIncludes[0]); + for (int i = 1; i < orderedIncludes.length; i++) { + query = query.also(orderedIncludes[i]); + } + for (int term : searchTerms.excludes) { + query = query.not(term); + } + return query; + } + +*/ +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/SearchIndexWriterTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/SearchIndexWriterTest.java new file mode 100644 index 00000000..4a1e3e0d --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/SearchIndexWriterTest.java @@ -0,0 +1,90 @@ +package nu.marginalia.wmsa.edge.index.service; + +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.index.model.IndexBlock; +import nu.marginalia.wmsa.edge.index.service.dictionary.DictionaryWriter; +import nu.marginalia.wmsa.edge.index.service.index.SearchIndex; +import nu.marginalia.wmsa.edge.index.service.index.SearchIndexConverter; +import nu.marginalia.wmsa.edge.index.service.index.SearchIndexReader; +import nu.marginalia.wmsa.edge.index.service.index.SearchIndexWriterImpl; +import nu.marginalia.wmsa.edge.index.service.query.IndexSearchBudget; +import nu.marginalia.wmsa.edge.index.service.query.SearchIndexPartitioner; +import nu.marginalia.wmsa.edge.model.EdgeId; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.EnumMap; + +import static nu.marginalia.util.dict.DictionaryHashMap.NO_VALUE; +import static org.junit.jupiter.api.Assertions.*; + +class SearchIndexWriterTest { + DictionaryWriter dictionaryWriter; + SearchIndexWriterImpl writer; + + Path indexFile; + Path wordsFile1; + Path urlsFile1; + Path dictionaryFile; + + @BeforeEach @SneakyThrows + void setUp() { + dictionaryFile = Files.createTempFile("tmp", ".dict"); + dictionaryFile.toFile().deleteOnExit(); + + dictionaryWriter = new DictionaryWriter(dictionaryFile.toFile(), 1L<<16, false); + + indexFile = Files.createTempFile("tmp", ".idx"); + indexFile.toFile().deleteOnExit(); + writer = new SearchIndexWriterImpl(dictionaryWriter, indexFile.toFile()); + + wordsFile1 = Files.createTempFile("words1", ".idx"); + urlsFile1 = Files.createTempFile("urls1", ".idx"); + } + + @SneakyThrows + @AfterEach + void tearDown() { + dictionaryWriter.close(); + writer.close(); + indexFile.toFile().delete(); + dictionaryFile.toFile().delete(); + urlsFile1.toFile().delete(); + wordsFile1.toFile().delete(); + } + + public long[] findWord(SearchIndexReader reader, String word, IndexBlock block) { + IndexSearchBudget budget = new IndexSearchBudget(1_000_000); + return reader.findWord(block, budget, lv->true, dictionaryWriter.getReadOnly(word)).stream().toArray(); + } + + @Test + void put() throws IOException { + writer.put(new EdgeId<>(0), new EdgeId<>(1), IndexBlock.Words, Arrays.asList("Hello", "Salvete", "everyone!", "This", "is", "Bob")); + writer.put(new EdgeId<>(0), new EdgeId<>(2), IndexBlock.Words, Arrays.asList("Salvete", "omnes!", "Bob", "sum", "Hello")); + writer.forceWrite(); + + new SearchIndexConverter(IndexBlock.Words, 0, Path.of("/tmp"), indexFile.toFile(), wordsFile1.toFile(), urlsFile1.toFile(), new SearchIndexPartitioner(null), val -> false); + + EnumMap indices = new EnumMap(IndexBlock.class); + indices.put(IndexBlock.Words, new SearchIndex("0", urlsFile1.toFile(), wordsFile1.toFile())); + + var reader = new SearchIndexReader(indices); + + int bobId = dictionaryWriter.getReadOnly("Bob"); + assertNotEquals(NO_VALUE, bobId); + + assertEquals(2, reader.numHits(IndexBlock.Words, bobId)); + assertArrayEquals(new long[] { 1, 2 }, findWord(reader,"Bob", IndexBlock.Words)); + assertArrayEquals(new long[] { 2 }, findWord(reader,"sum", IndexBlock.Words)); + assertArrayEquals(new long[] { }, findWord(reader,"New Word", IndexBlock.Words)); + + writer.close(); + } + +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/TokenCompressorTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/TokenCompressorTest.java new file mode 100644 index 00000000..ee84472e --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/TokenCompressorTest.java @@ -0,0 +1,28 @@ +package nu.marginalia.wmsa.edge.index.service; + +import nu.marginalia.wmsa.edge.index.service.dictionary.TokenCompressor; +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +class TokenCompressorTest { + + @Test + public void getWordBytes() { + final Map map = new HashMap<>(); + TokenCompressor tc = new TokenCompressor(word -> { + map.put(word, map.size()); + return map.size()-1; + }); + + System.out.println(Arrays.toString(tc.getWordBytes("308"))); + System.out.println(Arrays.toString(tc.getWordBytes(".308"))); + System.out.println(Arrays.toString(tc.getWordBytes("308."))); + System.out.println(Arrays.toString(tc.getWordBytes("30.8."))); + System.out.println(Arrays.toString(tc.getWordBytes("30..."))); + + map.entrySet().forEach(System.out::println); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/util/ByteFolderTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/util/ByteFolderTest.java new file mode 100644 index 00000000..2fc21ac1 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/util/ByteFolderTest.java @@ -0,0 +1,35 @@ +package nu.marginalia.wmsa.edge.index.service.util; + +import nu.marginalia.util.ByteFolder; +import org.junit.jupiter.api.Test; + +import static nu.marginalia.util.ByteFolder.decodeBytes; +import static org.junit.jupiter.api.Assertions.*; + +class ByteFolderTest { + + @Test + void foldBytes() { + ByteFolder folder = new ByteFolder(); + // Edge cases + assertArrayEquals(new byte[]{1,0}, folder.foldBytes(0,0)); + assertArrayEquals(new int[]{Integer.MAX_VALUE-1,Integer.MAX_VALUE}, decodeBytes(folder.foldBytes(Integer.MAX_VALUE-1,Integer.MAX_VALUE))); + assertArrayEquals(new int[]{128, 1}, decodeBytes(folder.foldBytes(128,1))); + + // 1 byte boundary + for (int i = 0; i < 512; i++) { + for (int j = 0; j < 512; j++) { + assertArrayEquals(new int[]{i,j}, decodeBytes(folder.foldBytes(i,j)), "Discrepancy @ " + i + " ," + j ); + } + } + + // Scattershot + for (int i = 0; i < 1_000_000; i++) { + int p = (int) (Integer.MAX_VALUE * Math.random()); + int q = (int) (Integer.MAX_VALUE * Math.random()); + assertArrayEquals(new int[]{q,p}, decodeBytes(folder.foldBytes(q,p)), "Discrepancy @ " + q + " ," + p ); + } + + } + +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/util/DictionaryDataTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/util/DictionaryDataTest.java new file mode 100644 index 00000000..cd063ea8 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/util/DictionaryDataTest.java @@ -0,0 +1,21 @@ +package nu.marginalia.wmsa.edge.index.service.util; + +import nu.marginalia.util.dict.DictionaryData; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class DictionaryDataTest { + + @Test + public void testDataBankGrow2() { + var dataBank = new DictionaryData(65535); + for (int i = 0; i < 64; i++) { + String s = "" + i; + int offset = dataBank.add(s.getBytes(), i); + System.out.println(s + " " + offset + " " + new String(dataBank.getBytes(i)) + " " + dataBank.getValue(i)); + + Assertions.assertEquals(s, new String(dataBank.getBytes(i))); + Assertions.assertEquals(i, dataBank.getValue(i)); + } + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/util/DictionaryHashMapTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/util/DictionaryHashMapTest.java new file mode 100644 index 00000000..b9a54237 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/util/DictionaryHashMapTest.java @@ -0,0 +1,67 @@ +package nu.marginalia.wmsa.edge.index.service.util; + +import nu.marginalia.util.dict.DictionaryHashMap; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.HashSet; +import java.util.Set; + +import static org.junit.jupiter.api.Assertions.*; + +class DictionaryHashMapTest { + + @Test + public void testDictionaryHashMap() { + var dhm = new DictionaryHashMap(1<<6); + System.out.println(dhm.put("hello".getBytes(), 23)); + System.out.println(dhm.put("hello".getBytes(), 23)); + System.out.println(dhm.put("world".getBytes(), 54)); + assertEquals(23, dhm.get("hello".getBytes())); + assertEquals(54, dhm.get("world".getBytes())); + + } + + @Test + public void testDictionaryHashMapMissing() { + var dhm = new DictionaryHashMap(1<<8); + assertEquals(DictionaryHashMap.NO_VALUE, dhm.get(new byte[] { 1,2,3})); + + } + + @Test + public void randomTest() { + Set strings = new HashSet<>(); + var dhm = new DictionaryHashMap(1<<14); + + for (int i = 0; i < 10000; i++) { + strings.add(Double.toString(Math.random())); + } + + for (String s : strings) { + dhm.put(s.getBytes(), s.hashCode()); + } + + for (String s : strings) { + assertEquals(s.hashCode(), dhm.get(s.getBytes())); + } + + assertEquals(strings.size(), dhm.size()); + } + + @Test + public void fillHerUp2() { + var dhm = new DictionaryHashMap(1<<13); + + try { + for (int i = 0; i < 10000; i++) { + dhm.put(Double.toString(Math.random()).getBytes(), i); + } + Assertions.fail("Expected exception"); + } + catch (IllegalStateException ex) { + ex.printStackTrace(); + } + } + +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/util/PrimeUtilTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/util/PrimeUtilTest.java new file mode 100644 index 00000000..703ed8cd --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/util/PrimeUtilTest.java @@ -0,0 +1,31 @@ +package nu.marginalia.wmsa.edge.index.service.util; + +import nu.marginalia.util.PrimeUtil; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +class PrimeUtilTest { + + @Test + void isPrime() { + assertTrue(PrimeUtil.isPrime(1)); + assertTrue(PrimeUtil.isPrime(2)); + assertTrue(PrimeUtil.isPrime(3)); + assertFalse(PrimeUtil.isPrime(4)); + assertTrue(PrimeUtil.isPrime(5)); + assertFalse(PrimeUtil.isPrime(6)); + assertTrue(PrimeUtil.isPrime(7)); + assertFalse(PrimeUtil.isPrime(8)); + assertFalse(PrimeUtil.isPrime(9)); + assertFalse(PrimeUtil.isPrime(10)); + assertTrue(PrimeUtil.isPrime(11)); + } + + @Test + void nextPrime() { + System.out.println(PrimeUtil.nextPrime(1L<<31, -1)); + System.out.println(PrimeUtil.nextPrime(1L<<31, 1)); + + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/util/RandomWriteFunnelTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/util/RandomWriteFunnelTest.java new file mode 100644 index 00000000..1780b6bb --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/util/RandomWriteFunnelTest.java @@ -0,0 +1,70 @@ +package nu.marginalia.wmsa.edge.index.service.util; + +import nu.marginalia.util.RandomWriteFunnel; +import org.junit.jupiter.api.Test; + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class RandomWriteFunnelTest { + + @Test + public void test() { + new File("/tmp/test.bin").delete(); + try (var funnel = new RandomWriteFunnel(Path.of("/tmp"), 10_000, 5001); + var out = new RandomAccessFile("/tmp/test.bin", "rw")) { + for (int i = 10_000-1; i >= 0; i--) { + System.out.println(i); + funnel.put(i, 10_000-i); + } + funnel.write(out.getChannel()); + + } catch (Exception e) { + e.printStackTrace(); + } + + try (var in = new RandomAccessFile("/tmp/test.bin", "r")) { + for (int i = 0; i < 10_000; i++) { + assertEquals(10_000-i, in.readLong()); + } + } catch (IOException e) { + e.printStackTrace(); + } + } + + @Test + public void testSparse() { + new File("/tmp/test.bin").delete(); + for (int j = 1; j <= 20; j++) { + try (var funnel = new RandomWriteFunnel(Path.of("/tmp"), 10, j); + var out = new RandomAccessFile("/tmp/test.bin", "rw")) { + for (int i = 10 - 1; i >= 0; i -= 2) { + funnel.put(i, 10 - i); + } + funnel.write(out.getChannel()); + + } catch (Exception e) { + e.printStackTrace(); + } + + try (var in = new RandomAccessFile("/tmp/test.bin", "r")) { + assertEquals(0, in.readLong()); + assertEquals(9, in.readLong()); + assertEquals(0, in.readLong()); + assertEquals(7, in.readLong()); + assertEquals(0, in.readLong()); + assertEquals(5, in.readLong()); + assertEquals(0, in.readLong()); + assertEquals(3, in.readLong()); + assertEquals(0, in.readLong()); + assertEquals(1, in.readLong()); + } catch (IOException e) { + e.printStackTrace(); + } + } + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/integration/arxiv/ArxivParserTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/integration/arxiv/ArxivParserTest.java new file mode 100644 index 00000000..d522261b --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/integration/arxiv/ArxivParserTest.java @@ -0,0 +1,50 @@ +package nu.marginalia.wmsa.edge.integration.arxiv; + +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.model.DocumentLanguageData; +import nu.marginalia.wmsa.edge.integration.arxiv.model.ArxivMetadata; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.*; + +@Disabled // this isn't used and the test is hella slow +class ArxivParserTest { + LanguageModels lm = new LanguageModels( + Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), + Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo.bin"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), + Path.of("/home/vlofgren/Work/ngrams/English.RDR"), + Path.of("/home/vlofgren/Work/ngrams/English.DICT"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") + ); + + @Test + void parse() throws IOException { + var parser = new ArxivParser(); + var data = parser.parse(new File("/home/vlofgren/Work/arxiv/arxiv-metadata-oai-snapshot.json")); + + data.stream().map(ArxivMetadata::getAbstract).limit(100).forEach(System.out::println); + } + + @Test + void extractKeywords() throws IOException { + var dict = new NGramDict(lm); + + DocumentKeywordExtractor documentKeywordExtractor = new DocumentKeywordExtractor(dict); + + var parser = new ArxivParser(); + var data = parser.parse(new File("/home/vlofgren/Work/arxiv/arxiv-metadata-oai-snapshot.json")); + + var se = new SentenceExtractor(lm); + + data.stream().map(meta -> documentKeywordExtractor.extractKeywords(se.extractSentences(meta.getAbstract(), meta.getTitle()))).limit(100).forEach(System.out::println); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/integration/stackoverflow/StackOverflowPostsTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/integration/stackoverflow/StackOverflowPostsTest.java new file mode 100644 index 00000000..05f66976 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/integration/stackoverflow/StackOverflowPostsTest.java @@ -0,0 +1,54 @@ +package nu.marginalia.wmsa.edge.integration.stackoverflow; + +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; +import nu.marginalia.util.ParallelPipe; +import nu.marginalia.wmsa.edge.integration.model.BasicDocumentData; +import nu.marginalia.wmsa.edge.integration.stackoverflow.model.StackOverflowPost; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import org.junit.jupiter.api.Test; +import org.xml.sax.SAXException; + +import javax.xml.parsers.ParserConfigurationException; +import java.io.IOException; +import java.nio.file.Path; + +public class StackOverflowPostsTest { + LanguageModels lm = new LanguageModels( + Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), + Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo4.bin"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), + Path.of("/home/vlofgren/Work/ngrams/English.RDR"), + Path.of("/home/vlofgren/Work/ngrams/English.DICT"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") + ); + + @Test + public void test() throws IOException, ParserConfigurationException, SAXException, InterruptedException { + var documentKeywordExtractor = new DocumentKeywordExtractor(new NGramDict(lm)); + + ThreadLocal processor = ThreadLocal.withInitial(() -> { + return new StackOverflowPostProcessor(new SentenceExtractor(lm), documentKeywordExtractor); + }); + + var pipe = new ParallelPipe("pipe", 10, 5, 2) { + @Override + public BasicDocumentData onProcess(StackOverflowPost stackOverflowPost) { + return processor.get().process(stackOverflowPost); + } + + @Override + public void onReceive(BasicDocumentData stackOverflowIndexData) { + System.out.println(stackOverflowIndexData.url); + } + }; + + var reader = new StackOverflowPostsReader("/mnt/storage/downloads.new/stackexchange/sites/philosophy/Posts.xml", new EdgeDomain("philosophy.stackexchange.com"), + pipe::accept); + reader.join(); + System.out.println("Waiting for pipe"); + pipe.join(); + } +} diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/integration/wikipedia/WikipediaTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/integration/wikipedia/WikipediaTest.java new file mode 100644 index 00000000..41c6b362 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/integration/wikipedia/WikipediaTest.java @@ -0,0 +1,78 @@ +package nu.marginalia.wmsa.edge.integration.wikipedia; + +import lombok.SneakyThrows; +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.language.DocumentDebugger; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.DocumentKeywordExtractor; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; +import nu.marginalia.util.ParallelPipe; +import nu.marginalia.wmsa.edge.integration.model.BasicDocumentData; +import nu.marginalia.wmsa.edge.integration.wikipedia.model.WikipediaArticle; +import nu.marginalia.wmsa.edge.model.EdgeDomain; +import org.jsoup.Jsoup; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.file.Path; + +public class WikipediaTest { + LanguageModels lm = new LanguageModels( + Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), + Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo4.bin"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), + Path.of("/home/vlofgren/Work/ngrams/English.RDR"), + Path.of("/home/vlofgren/Work/ngrams/English.DICT"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") + ); + + @Test @SneakyThrows + public void test() { + var documentKeywordExtractor = new DocumentKeywordExtractor(new NGramDict(lm)); + ThreadLocal processor = ThreadLocal.withInitial(() -> { + return new WikipediaProcessor(new SentenceExtractor(lm), documentKeywordExtractor); + }); + + var pipe = new ParallelPipe("pipe", 10, 5, 2) { + @Override + public BasicDocumentData onProcess(WikipediaArticle stackOverflowPost) { + return processor.get().process(stackOverflowPost); + } + + @Override + public void onReceive(BasicDocumentData indexData) { + System.out.println(indexData.url); + System.out.println(indexData.title); + System.out.println(indexData.description); + } + }; + + var reader = new WikipediaReader("/home/vlofgren/Work/wikipedia_en_100_nopic_2021-06.zim", new EdgeDomain("encyclopedia.marginalia.nu"), + pipe::accept); + reader.join(); + } + + + @Test @SneakyThrows + public void test2() { + var documentKeywordExtractor = new DocumentKeywordExtractor(new NGramDict(lm)); + var debugger = new DocumentDebugger(lm); + + ThreadLocal processor = ThreadLocal.withInitial(() -> { + return new WikipediaProcessor(new SentenceExtractor(lm), documentKeywordExtractor); + }); + + var reader = new WikipediaReader("/home/vlofgren/Work/wikipedia_en_100_nopic_2021-06.zim", new EdgeDomain("encyclopedia.marginalia.nu"), + article -> { + try { + debugger.debugDocument(article.url.getPath(), Jsoup.parse(article.body)); + + } catch (IOException e) { + e.printStackTrace(); + } + }); + + reader.join(); + debugger.writeIndex(); + } +} diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/model/EdgeDomainTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/model/EdgeDomainTest.java new file mode 100644 index 00000000..9493e638 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/model/EdgeDomainTest.java @@ -0,0 +1,106 @@ +package nu.marginalia.wmsa.edge.model; + +import org.junit.jupiter.api.Test; + +import java.net.URISyntaxException; + +import static org.junit.jupiter.api.Assertions.*; + +class EdgeDomainTest { + + @Test + public void testSkepdic() throws URISyntaxException { + var domain = new EdgeUrl("http://www.skepdic.com/astrology.html"); + assertEquals("skepdic", domain.getDomain().getDomainKey()); + var domain2 = new EdgeUrl("http://skepdic.com/astrology.html"); + assertEquals("skepdic", domain2.getDomain().getDomainKey()); + } + + @Test + public void testHkDomain() throws URISyntaxException { + var domain = new EdgeUrl("http://l7072i3.l7c.net"); + assertEquals("http", domain.proto); + assertEquals("l7072i3", domain.domain.subDomain); + assertEquals("l7c.net", domain.domain.domain); + } + + @Test + public void testEduSubDomain() throws URISyntaxException { + var domain = new EdgeUrl("http://uj.edu.pl"); + assertEquals("http", domain.proto); + assertEquals("", domain.domain.subDomain); + assertEquals("uj.edu.pl", domain.domain.domain); + } + + + @Test + public void testGetDomain() throws URISyntaxException { + var domain = new EdgeUrl("http://www.marginalia.nu"); + assertEquals("http", domain.proto); + assertEquals("www", domain.domain.subDomain); + assertEquals("marginalia.nu", domain.domain.domain); + assertEquals("http://www.marginalia.nu/", domain.toString()); + } + + @Test + public void testUkDomain2() throws URISyntaxException { + var domain = new EdgeUrl("http://marginalia.co.uk"); + assertEquals("marginalia.co.uk", domain.domain.domain); + assertEquals("http", domain.proto); + assertEquals("", domain.domain.subDomain); + assertEquals("http://marginalia.co.uk/", domain.toString()); + } + + @Test + public void testUkDomain3() throws URISyntaxException { + var domain = new EdgeUrl("http://withcandour.co.uk"); + assertEquals("withcandour.co.uk", domain.domain.domain); + assertEquals("http", domain.proto); + assertEquals("", domain.domain.subDomain); + assertEquals("http://withcandour.co.uk/", domain.toString()); + } + + @Test + public void testUkDomain() throws URISyntaxException { + var domain = new EdgeUrl("http://www.marginalia.co.uk"); + assertEquals("http", domain.proto); + assertEquals("www", domain.domain.subDomain); + assertEquals("marginalia.co.uk", domain.domain.domain); + assertEquals("http://www.marginalia.co.uk/", domain.toString()); + } + + @Test + public void testThreeLetterDomain() throws URISyntaxException { + var domain = new EdgeUrl("http://www.marginalia.abcf.de"); + assertEquals("http", domain.proto); + assertEquals("abcf.de", domain.domain.domain); + assertEquals("www.marginalia", domain.domain.subDomain); + } + + @Test + public void testGetDomainNoSubdomain() throws URISyntaxException { + var domain = new EdgeUrl("http://marginalia.nu"); + assertEquals("http", domain.proto); + assertEquals("", domain.domain.subDomain); + assertEquals("marginalia.nu", domain.domain.domain); + assertEquals("http://marginalia.nu/", domain.toString()); + } + + @Test + public void testIpPort() throws URISyntaxException { + var domain = new EdgeUrl("https://127.0.0.1:8080"); + assertEquals("https", domain.proto); + assertEquals("", domain.domain.subDomain); + assertEquals("127.0.0.1", domain.domain.domain); + assertEquals("https://127.0.0.1:8080/", domain.toString()); + } + + @Test + public void testIp() throws URISyntaxException { + var domain = new EdgeUrl("https://192.168.1.32"); + assertEquals("https", domain.proto); + assertEquals("", domain.domain.subDomain); + assertEquals("192.168.1.32", domain.domain.domain); + assertEquals("https://192.168.1.32/", domain.toString()); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/model/EdgeUrlTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/model/EdgeUrlTest.java new file mode 100644 index 00000000..a6e690fe --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/model/EdgeUrlTest.java @@ -0,0 +1,22 @@ +package nu.marginalia.wmsa.edge.model; + +import org.junit.jupiter.api.Test; + +import java.net.URISyntaxException; + +import static org.junit.jupiter.api.Assertions.*; + +class EdgeUrlTest { + + @Test + public void testHashCode() throws URISyntaxException { + System.out.println(new EdgeUrl("https://memex.marginalia.nu").hashCode()); + } + + @Test + void urlencodeFixer() throws URISyntaxException { + System.out.println(EdgeUrl.urlencodeFixer("https://www.example.com/%-sign")); + System.out.println(EdgeUrl.urlencodeFixer("https://www.example.com/%22-sign")); + System.out.println(EdgeUrl.urlencodeFixer("https://www.example.com/\n \"huh\"")); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/BodyQueryParserTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/BodyQueryParserTest.java new file mode 100644 index 00000000..8f159817 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/BodyQueryParserTest.java @@ -0,0 +1,106 @@ +package nu.marginalia.wmsa.edge.search.query; + +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import org.junit.BeforeClass; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.nio.file.Path; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +class BodyQueryParserTest { + private QueryParser parser; + private static NGramDict dict; + private static EnglishDictionary englishDictionary; + private static LanguageModels lm = new LanguageModels( + Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), + Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo4.bin"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), + Path.of("/home/vlofgren/Work/ngrams/English.RDR"), + Path.of("/home/vlofgren/Work/ngrams/English.DICT"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") + ); + + @BeforeClass + public static void init() { + dict = new NGramDict(lm); + englishDictionary = new EnglishDictionary(dict); + } + + @BeforeEach + public void setUp() { + parser = new QueryParser(englishDictionary, new QueryVariants(lm, dict, englishDictionary)); + } + + @Test + public void testTitleMatcher() { + List terms = List.of("3d", "realms"); + assertEquals(2, terms.stream().map(String::toLowerCase).filter("3D Realms Site: Forums".toLowerCase()::contains).count()); + } + @Test + void parseSimple() { + var results = parser.parse("hello"); + results.forEach(System.out::println); + assertEquals(1, results.size()); + assertEquals(TokenType.LITERAL_TERM, results.get(0).type); + assertEquals("hello", results.get(0).str); + } + + @Test + void parseQuotes() { + var results = parser.parse("\u201Chello world\u201D"); + results.forEach(System.out::println); + assertEquals(TokenType.QUOT_TERM, results.get(0).type); + assertEquals("hello_world", results.get(0).str); + assertEquals("\u201Chello world\u201D", results.get(0).displayStr); + } + + @Test + void parseExclude() { + var results = parser.parse("-Hello"); + results.forEach(System.out::println); + assertEquals(TokenType.EXCLUDE_TERM, results.get(0).type); + assertEquals("hello", results.get(0).str); + assertEquals("-hello", results.get(0).displayStr); + } + + @Test + void parseCombined() { + for (var list : parser.permuteQueries(parser.parse("dune 2 remake"))) { + for (var t: list) { + System.out.printf("%s ", t.str); + } + System.out.println(); + } + } + @Test + void parseCombinedDOS() { + for (var list : parser.permuteQueries(parser.parse("ab ba baa abba baba ab ba"))) { + for (var t: list) { + System.out.printf("%s ", t.str); + } + System.out.println(); + } + } + + @Test + void parseCombinedSuperman() { + for (var list : parser.permuteQueries(parser.parse("wizardry proving grounds of the mad overlord"))) { + for (var t: list) { + System.out.printf("%s ", t.str); + } + System.out.println(); + } + } + @Test + void testEdgeCases() { + parser.parse("site:localhost 3D").forEach(System.out::println); + parser.parse("-wolfenstein 3D").forEach(System.out::println); + parser.parse("-wolfenstein 3D \"").forEach(System.out::println); + } + + +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/EnglishDictionaryTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/EnglishDictionaryTest.java new file mode 100644 index 00000000..4c0514ea --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/EnglishDictionaryTest.java @@ -0,0 +1,27 @@ +package nu.marginalia.wmsa.edge.search.query; + +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import org.junit.jupiter.api.Test; + +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.*; + +class EnglishDictionaryTest { + + @Test + void getWordVariants() { + LanguageModels lm = new LanguageModels( + Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), + Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo4.bin"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), + Path.of("/home/vlofgren/Work/ngrams/English.RDR"), + Path.of("/home/vlofgren/Work/ngrams/English.DICT"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") + ); + + var dict = new NGramDict(lm); + new EnglishDictionary(dict).getWordVariants("dos").forEach(System.out::println); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/QueryParserTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/QueryParserTest.java new file mode 100644 index 00000000..0e9aec69 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/QueryParserTest.java @@ -0,0 +1,40 @@ +package nu.marginalia.wmsa.edge.search.query; + +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import org.junit.BeforeClass; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.nio.file.Path; +import java.util.stream.Collectors; + +class QueryParserTest { + private QueryParser parser; + private static NGramDict dict; + private static EnglishDictionary englishDictionary; + private static LanguageModels lm = new LanguageModels( + Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), + Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo4.bin"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), + Path.of("/home/vlofgren/Work/ngrams/English.RDR"), + Path.of("/home/vlofgren/Work/ngrams/English.DICT"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") + ); + + @BeforeEach + public void setUp() { + dict = new NGramDict(lm); + englishDictionary = new EnglishDictionary(dict); + + parser = new QueryParser(englishDictionary, new QueryVariants(lm, dict, englishDictionary)); + } + + @Test + void variantQueries() { + var r = parser.parse("car stemming"); + parser.variantQueries(r).forEach(query -> { + System.out.println(query.stream().map(t -> t.str).collect(Collectors.joining(", "))); + }); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/QueryVariantsTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/QueryVariantsTest.java new file mode 100644 index 00000000..91ec77af --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/search/query/QueryVariantsTest.java @@ -0,0 +1,63 @@ +package nu.marginalia.wmsa.edge.search.query; + +import nu.marginalia.wmsa.edge.assistant.dict.NGramDict; +import nu.marginalia.wmsa.edge.crawler.domain.language.conf.LanguageModels; +import nu.marginalia.wmsa.edge.crawler.domain.language.processing.SentenceExtractor; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.nio.file.Path; +import java.util.List; + +class QueryVariantsTest { + QueryVariants variants; + QueryParser parser; + SentenceExtractor se; + @BeforeEach + public void setUp() { + LanguageModels lm = new LanguageModels( + Path.of("/home/vlofgren/Work/ngrams/ngrams-generous-emstr.bin"), + Path.of("/home/vlofgren/Work/ngrams/tfreq-new-algo4.bin"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin"), + Path.of("/home/vlofgren/Work/ngrams/English.RDR"), + Path.of("/home/vlofgren/Work/ngrams/English.DICT"), + Path.of("/home/vlofgren/Work/ngrams/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin") + ); + + se = new SentenceExtractor(lm); + + var dict = new NGramDict(lm); + variants = new QueryVariants(lm, dict, new EnglishDictionary(dict)); + parser = new QueryParser(new EnglishDictionary(dict), variants); + } + + @Test + void getQueryVariants() { + System.out.println(se.extractSentence("we are alone")); + testCase("DOS", List.of("DOS")); + testCase("dos", List.of("dos")); + testCase("we are alone", List.of("dos")); + testCase("3D Realms", List.of("dos")); + testCase("I am alone", List.of("dos")); + testCase("plato cave", List.of("dos")); + testCase("The internet is dead", List.of("dos")); + + testCase("TRS80", List.of("trs_80"), List.of("trs80")); + testCase("TRS-80", List.of("trs-80"), List.of("trs80")); + testCase("TRS-80", List.of("trs-80"), List.of("trs80")); + testCase("Raspberry Pi 2", List.of("trs-80"), List.of("trs80")); + testCase("Duke Nukem 3D", List.of("trs-80"), List.of("trs80")); + testCase("The Man of Tomorrow", List.of("trs-80"), List.of("trs80")); + testCase("Computer Manual", List.of("trs-80"), List.of("trs80")); + testCase("Knitting", List.of("trs-80"), List.of("trs80")); + testCase("capcom", List.of("trs-80"), List.of("trs80")); + testCase("the man of tomorrow", List.of("trs-80"), List.of("trs80")); + } + + private void testCase(String input, List... expected) { + var tokens = variants.getQueryVariants(parser.extractBasicTokens(input)); + System.out.println(tokens); +// var result = tokens.stream().map(lst -> lst.terms).collect(Collectors.toSet()); +// assertEquals(Set.of(expected), result, "Case failed: " + input); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/MemexFileWriterTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/MemexFileWriterTest.java new file mode 100644 index 00000000..fb240c25 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/MemexFileWriterTest.java @@ -0,0 +1,43 @@ +package nu.marginalia.wmsa.memex; + +import nu.marginalia.util.test.TestUtil; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import nu.marginalia.wmsa.memex.system.MemexFileWriter; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.*; + +class MemexFileWriterTest { + + Path root; + MemexFileWriter renderedResources; + @BeforeEach + void setUp() throws IOException { + root = Files.createTempDirectory(getClass().getSimpleName()); + renderedResources = new MemexFileWriter(root); + } + + @AfterEach + void tearDown() { + TestUtil.clearTempDir(root); + } + + @Test + void exists() throws IOException { + assertFalse(renderedResources.exists(new MemexNodeUrl("/test"))); + renderedResources.write(new MemexNodeUrl("/test"), "A line"); + assertTrue(renderedResources.exists(new MemexNodeUrl("/test"))); + } + + @Test + void write() throws IOException { + renderedResources.write(new MemexNodeUrl("/test"), "A line"); + assertEquals("A line", Files.readString(root.resolve("test"))); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/MemexTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/MemexTest.java new file mode 100644 index 00000000..10b833e9 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/MemexTest.java @@ -0,0 +1,12 @@ +package nu.marginalia.wmsa.memex; + +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.file.Path; + +class MemexTest { + @Test + public void test() throws IOException { + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextChangeTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextChangeTest.java new file mode 100644 index 00000000..7afa424a --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextChangeTest.java @@ -0,0 +1,274 @@ +package nu.marginalia.wmsa.memex.change; + +import io.reactivex.rxjava3.plugins.RxJavaPlugins; +import lombok.SneakyThrows; +import nu.marginalia.gemini.GeminiService; +import nu.marginalia.util.test.TestUtil; +import nu.marginalia.wmsa.memex.*; +import nu.marginalia.wmsa.memex.renderer.MemexHtmlRenderer; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import nu.marginalia.wmsa.memex.renderer.MemexRendererers; +import nu.marginalia.wmsa.memex.system.MemexFileSystemModifiedTimes; +import nu.marginalia.wmsa.memex.system.MemexFileWriter; +import nu.marginalia.wmsa.memex.system.MemexGitRepo; +import nu.marginalia.wmsa.memex.system.MemexSourceFileSystem; +import org.junit.BeforeClass; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; + + +class GemtextChangeTest { + + + private Memex memex; + private Path tempDir; + + private final String tombstonePath = "/special/tombstone.gmi"; + private final String redirectPath = "/special/redirects.gmi"; + private final String testFilePath = "/test.gmi"; + + static Logger logger = LoggerFactory.getLogger(GemtextChangeTest.class); + + @BeforeClass + public static void init() { + + RxJavaPlugins.setErrorHandler(e -> { + if (e.getMessage() == null) { + logger.error("Error", e); + } + else { + logger.error("Error {}: {}", e.getClass().getSimpleName(), e.getMessage()); + } + }); + } + + @SneakyThrows + @BeforeEach + public void setUp() { + tempDir = Files.createTempDirectory("test"); + Files.createDirectory(tempDir.resolve("special")); + var data = new MemexData(); + + memex = new Memex(data, null, + Mockito.mock(MemexGitRepo.class), new MemexLoader(data, new MemexFileSystemModifiedTimes(), + new MemexSourceFileSystem(tempDir, Mockito.mock(MemexGitRepo.class)), + tempDir, tombstonePath, redirectPath), + Mockito.mock(MemexFileWriter.class), + null, + Mockito.mock(MemexRendererers.class), + Mockito.mock(GeminiService.class)); + } + + @SneakyThrows + @AfterEach + public void tearDown() { + TestUtil.clearTempDir(tempDir); + } + + @Test + void appendHeading() throws IOException { + var url = new MemexNodeUrl(testFilePath); + new GemtextCreateOrMutate(url, "# Header", + new GemtextAppend(url, new MemexNodeHeadingId(1), new String[] { + "1", "## Header 2", "# Header 3" + }) + ).visit(memex); + new GemtextAppend(url, new MemexNodeHeadingId(1), new String[] { "3"}).visit(memex); + + + List lines = Files.readAllLines(Path.of(tempDir + testFilePath)); + lines.forEach(System.out::println); + assertEquals(5, lines.size()); + assertEquals("# Header", lines.get(0)); + assertEquals("1", lines.get(1)); + assertEquals("## Header 2", lines.get(2)); + assertEquals("3", lines.get(3)); + assertEquals("# Header 3", lines.get(4)); + } + + @Test + void appendRoot() throws IOException { + var url = new MemexNodeUrl(testFilePath); + new GemtextCreateOrMutate(url, "# Header", + new GemtextAppend(url, new MemexNodeHeadingId(1), new String[] { + "1", "## Header 2", "# Header 3" + }) + ).visit(memex); + new GemtextAppend(url, new MemexNodeHeadingId(0), new String[] { "3"}) + .visit(memex); + + + List lines = Files.readAllLines(Path.of(tempDir + testFilePath)); + lines.forEach(System.out::println); + assertEquals(5, lines.size()); + assertEquals("# Header", lines.get(0)); + assertEquals("1", lines.get(1)); + assertEquals("## Header 2", lines.get(2)); + assertEquals("# Header 3", lines.get(3)); + assertEquals("3", lines.get(4)); + } + + @Test + void appendMissing() throws IOException { + var url = new MemexNodeUrl(testFilePath); + new GemtextCreateOrMutate(url, "# Header", + new GemtextAppend(url, new MemexNodeHeadingId(1), new String[] { + "1", "## Header 2", "# Header 3" + }) + ).visit(memex); + new GemtextAppend(url, new MemexNodeHeadingId(5), new String[] { "3"}) + .visit(memex); + + + List lines = Files.readAllLines(Path.of(tempDir + testFilePath)); + lines.forEach(System.out::println); + assertEquals(5, lines.size()); + assertEquals("# Header", lines.get(0)); + assertEquals("1", lines.get(1)); + assertEquals("## Header 2", lines.get(2)); + assertEquals("# Header 3", lines.get(3)); + assertEquals("3", lines.get(4)); + } + + @Test + void replaceHeading() throws IOException { + var url = new MemexNodeUrl(testFilePath); + new GemtextCreateOrMutate(url, "# Header", + new GemtextAppend(url, new MemexNodeHeadingId(1), new String[] { + "1", "## Header 2", "# Header 3" + }) + ).visit(memex); + new GemtextReplace(url, new MemexNodeHeadingId(1), new String[] { "# New", "3"}) + .visit(memex); + + + List lines = Files.readAllLines(Path.of(tempDir + testFilePath)); + lines.forEach(System.out::println); + assertEquals(3, lines.size()); + assertEquals("# New", lines.get(0)); + assertEquals("3", lines.get(1)); + assertEquals("# Header 3", lines.get(2)); + } + + @Test + void replaceRoot() throws IOException { + var url = new MemexNodeUrl(testFilePath); + new GemtextCreateOrMutate(url, "# Header", + new GemtextAppend(url, new MemexNodeHeadingId(1), new String[] { + "1", "## Header 2", "# Header 3" + }) + ).visit(memex); + new GemtextReplace(url, new MemexNodeHeadingId(0), new String[] { "# New", "3"}) + .visit(memex); + + + List lines = Files.readAllLines(Path.of(tempDir + testFilePath)); + lines.forEach(System.out::println); + assertEquals(2, lines.size()); + assertEquals("# New", lines.get(0)); + assertEquals("3", lines.get(1)); + } + + @Test + void replaceMissing() throws IOException { + var url = new MemexNodeUrl(testFilePath); + new GemtextCreateOrMutate(url, "# Header", + new GemtextAppend(url, new MemexNodeHeadingId(1), new String[] { + "1", "## Header 2", "# Header 3" + }) + ).visit(memex); + new GemtextReplace(url, new MemexNodeHeadingId(5), new String[] { "# New", "3"}) + .visit(memex); + + + List lines = Files.readAllLines(Path.of(tempDir + testFilePath)); + lines.forEach(System.out::println); + assertEquals(7, lines.size()); + + + assertEquals("# Header", lines.get(0)); + assertEquals("1", lines.get(1)); + assertEquals("## Header 2", lines.get(2)); + assertEquals("# Header 3", lines.get(3)); + + assertEquals("# Error! Replace failed!", lines.get(4)); + assertEquals("# New", lines.get(5)); + assertEquals("3", lines.get(6)); + } + + @Test + void prependHeading() throws IOException { + var url = new MemexNodeUrl(testFilePath); + new GemtextCreateOrMutate(url, "# Header", + new GemtextAppend(url, new MemexNodeHeadingId(1), new String[] { + "1", "2" + }) + ).visit(memex); + new GemtextPrepend(url, new MemexNodeHeadingId(1), new String[] { "3"}) + .visit(memex); + + + List lines = Files.readAllLines(Path.of(tempDir + testFilePath)); + lines.forEach(System.out::println); + assertEquals(4, lines.size()); + assertEquals("# Header", lines.get(0)); + assertEquals("3", lines.get(1)); + assertEquals("1", lines.get(2)); + assertEquals("2", lines.get(3)); + } + + @Test + void prependRoot() throws IOException { + var url = new MemexNodeUrl(testFilePath); + new GemtextCreateOrMutate(url, "# Header", + new GemtextAppend(url, new MemexNodeHeadingId(1), new String[] { + "1", "2" + }) + ).visit(memex); + new GemtextPrepend(url, new MemexNodeHeadingId(0), new String[] { "3" }) + .visit(memex); + + List lines = Files.readAllLines(Path.of(tempDir + testFilePath)); + + lines.forEach(System.out::println); + assertEquals(4, lines.size()); + assertEquals("3", lines.get(0)); + assertEquals("# Header", lines.get(1)); + assertEquals("1", lines.get(2)); + assertEquals("2", lines.get(3)); + } + + + @Test + void prependMissing() throws IOException { + var url = new MemexNodeUrl(testFilePath); + new GemtextCreateOrMutate(url, "# Header", + new GemtextAppend(url, new MemexNodeHeadingId(1), new String[] { + "1", "2" + }) + ).visit(memex); + new GemtextPrepend(url, new MemexNodeHeadingId(5), new String[] { "3" }) + .visit(memex); + + List lines = Files.readAllLines(Path.of(tempDir + testFilePath)); + + lines.forEach(System.out::println); + assertEquals(4, lines.size()); + assertEquals("# Header", lines.get(0)); + assertEquals("1", lines.get(1)); + assertEquals("2", lines.get(2)); + assertEquals("3", lines.get(3)); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTaskUpdateTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTaskUpdateTest.java new file mode 100644 index 00000000..bdfe83dc --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTaskUpdateTest.java @@ -0,0 +1,226 @@ +package nu.marginalia.wmsa.memex.change; + +import io.reactivex.rxjava3.plugins.RxJavaPlugins; +import lombok.SneakyThrows; +import nu.marginalia.gemini.GeminiService; +import nu.marginalia.gemini.gmi.GemtextDocument; +import nu.marginalia.util.test.TestUtil; +import nu.marginalia.wmsa.memex.*; +import nu.marginalia.wmsa.memex.change.update.GemtextDocumentUpdateCalculator; +import nu.marginalia.wmsa.memex.renderer.MemexHtmlRenderer; +import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import nu.marginalia.wmsa.memex.renderer.MemexRendererers; +import nu.marginalia.wmsa.memex.system.MemexFileSystemModifiedTimes; +import nu.marginalia.wmsa.memex.system.MemexFileWriter; +import nu.marginalia.wmsa.memex.system.MemexGitRepo; +import nu.marginalia.wmsa.memex.system.MemexSourceFileSystem; +import org.junit.BeforeClass; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.util.Arrays; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class GemtextTaskUpdateTest { + + + private Memex memex; + private Path tempDir; + + private final String tombstonePath = "/special/tombstone.gmi"; + private final String redirectPath = "/special/redirects.gmi"; + private final String testFilePath = "/test.gmi"; + private final String todoFilePath = "/todo.gmi"; + private final String doneFilePath = "/done.gmi"; + + static Logger logger = LoggerFactory.getLogger(GemtextTaskUpdateTest.class); + + @BeforeClass + public static void init() { + + RxJavaPlugins.setErrorHandler(e -> { + if (e.getMessage() == null) { + logger.error("Error", e); + } + else { + logger.error("Error {}: {}", e.getClass().getSimpleName(), e.getMessage()); + } + }); + } + + @SneakyThrows + @BeforeEach + public void setUp() { + tempDir = Files.createTempDirectory("test"); + Files.createDirectory(tempDir.resolve("special")); + var data = new MemexData(); + + memex = new Memex(data, null, Mockito.mock(MemexGitRepo.class), new MemexLoader(data, new MemexFileSystemModifiedTimes(), + new MemexSourceFileSystem(tempDir, Mockito.mock(MemexGitRepo.class)), tempDir, tombstonePath, redirectPath), + Mockito.mock(MemexFileWriter.class), + null, + Mockito.mock(MemexRendererers.class), + Mockito.mock(GeminiService.class)); + } + + @SneakyThrows + @AfterEach + public void tearDown() { + TestUtil.clearTempDir(tempDir); + } + + @Test + void updateTodoFileWithTodoTask() throws IOException { + + var url = new MemexNodeUrl(testFilePath); + + GemtextMutation.createOrAppend(url, "%%%TASKS\n# Header", new MemexNodeHeadingId(1), + "%%% TASKS", "## Todo", "- A task yet finished").visit(memex); + + GemtextDocumentUpdateCalculator updateCalculator = new GemtextDocumentUpdateCalculator(memex); + var updates = updateCalculator.calculateUpdates(memex.getDocument(url), MemexNodeHeadingId.ROOT, + GemtextDocument.of(url, "%%% TASKS", "# Header", "## Todo", "- A task yet finished (?)", "- One More Task")); + updates.forEach(System.out::println); + for (var update : updates) { + update.visit(memex); + } + + verifyFile(testFilePath, + "%%% TASKS", + "# Header", + "## Todo", + "- A task yet finished (?)", + "- One More Task" + ); + } + + @Test + void updateDoneFileWithTodoTask() throws IOException { + + var url = new MemexNodeUrl(testFilePath); + + GemtextMutation.createOrAppend(url, "%%% TASKS\n# Header", new MemexNodeHeadingId(1), + "## Done", "- A task yet finished (/)").visit(memex); + + GemtextDocumentUpdateCalculator updateCalculator = new GemtextDocumentUpdateCalculator(memex); + var updates = updateCalculator.calculateUpdates(memex.getDocument(url), MemexNodeHeadingId.ROOT, + GemtextDocument.of(url, "%%% TASKS", "# Header", "## Done", "- A task yet finished (?)")); + updates.forEach(System.out::println); + for (var update : updates) { + update.visit(memex); + } + + verifyFile(testFilePath, + "%%% TASKS", + "# Header", + "## Done" + ); + + verifyFile(todoFilePath, + "%%% TASKS", + "# Todo", + "- A task yet finished (?)" + ); + } + + @Test + void moveToDoneNewDoneFile() throws IOException { + + var url = new MemexNodeUrl(testFilePath); + + GemtextMutation.createOrAppend(url, "%%% TASKS\n# Header", new MemexNodeHeadingId(1), + "%%% TASKS","## Todo", "- A task yet finished").visit(memex); + + GemtextDocumentUpdateCalculator updateCalculator = new GemtextDocumentUpdateCalculator(memex); + var updates = updateCalculator.calculateUpdates(memex.getDocument(url), MemexNodeHeadingId.ROOT, + GemtextDocument.of(url, "%%% TASKS", "# Header", "## Todo", "- A task yet finished (/)")); + updates.forEach(System.out::println); + for (var update : updates) { + update.visit(memex); + } + + verifyFile(doneFilePath, + "%%% TASKS", + "# Done", + "", + "## Done " + LocalDate.now().format(DateTimeFormatter.ISO_LOCAL_DATE), + "- A task yet finished (/)"); + + updates = updateCalculator.calculateUpdates(memex.getDocument(url), MemexNodeHeadingId.ROOT, + GemtextDocument.of(url, "%%% TASKS", "# Header", "## Todo", "- Another task yet finished (/)")); + updates.forEach(System.out::println); + for (var update : updates) { + update.visit(memex); + } + + verifyFile(doneFilePath, + "%%% TASKS", + "# Done", + "", + "## Done " + LocalDate.now().format(DateTimeFormatter.ISO_LOCAL_DATE), + "- Another task yet finished (/)", + "- A task yet finished (/)"); + } + + @Test + void moveToDoneOldDoneFile() throws IOException { + + var doneUrl = new MemexNodeUrl(doneFilePath); + var url = new MemexNodeUrl(testFilePath); + + + GemtextMutation.createOrAppend(doneUrl, "%%% TASKS\n# Done", new MemexNodeHeadingId(1), + "## Done 2012-04-30", "- A very old task (/)").visit(memex); + + GemtextMutation.createOrAppend(url, "%%% TASKS\n# Header", new MemexNodeHeadingId(1), + "## Todo", "- A task yet finished").visit(memex); + + GemtextDocumentUpdateCalculator updateCalculator = new GemtextDocumentUpdateCalculator(memex); + var updates = updateCalculator.calculateUpdates(memex.getDocument(url), MemexNodeHeadingId.ROOT, + GemtextDocument.of(url, "%%% TASKS", "# Header", "## Todo", "- A task yet finished (/)")); + updates.forEach(System.out::println); + for (var update : updates) { + update.visit(memex); + } + + verifyFile(doneFilePath, + "%%% TASKS", + "# Done", + "", + "## Done " + LocalDate.now().format(DateTimeFormatter.ISO_LOCAL_DATE), + "- A task yet finished (/)", + "## Done 2012-04-30", + "- A very old task (/)"); + + } + + public void verifyFile(String file, String... lines) throws IOException { + Path p = Path.of(tempDir + file); + assertTrue(Files.exists(p), "File " + file + " is missing"); + List actualLines = Files.readAllLines(p); + System.out.println("Expecting: "); + Arrays.stream(lines).forEach(System.out::println); + System.out.println("Got: "); + actualLines.forEach(System.out::println); + System.out.println("-- end -- "); + + assertEquals(lines.length, actualLines.size()); + for (int i = 0; i < lines.length; i++) { + assertEquals(lines[i], actualLines.get(i)); + } + } + +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTombstoneUpdateCaclulatorTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTombstoneUpdateCaclulatorTest.java new file mode 100644 index 00000000..4fe30379 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTombstoneUpdateCaclulatorTest.java @@ -0,0 +1,106 @@ +package nu.marginalia.wmsa.memex.change; + +import io.reactivex.rxjava3.plugins.RxJavaPlugins; +import lombok.SneakyThrows; +import nu.marginalia.gemini.GeminiService; +import nu.marginalia.util.test.TestUtil; +import nu.marginalia.wmsa.memex.*; +import nu.marginalia.wmsa.memex.renderer.MemexHtmlRenderer; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import nu.marginalia.wmsa.memex.renderer.MemexRendererers; +import nu.marginalia.wmsa.memex.system.MemexFileSystemModifiedTimes; +import nu.marginalia.wmsa.memex.system.MemexFileWriter; +import nu.marginalia.wmsa.memex.system.MemexGitRepo; +import nu.marginalia.wmsa.memex.system.MemexSourceFileSystem; +import org.junit.BeforeClass; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; + + +class GemtextTombstoneUpdateCaclulatorTest { + + private GemtextTombstoneUpdateCaclulator updateCaclulator; + private Memex memex; + private Path tempDir; + + private final String tombstonePath = "/special/tombstone.gmi"; + private final String redirectPath = "/special/redirects.gmi"; + + static Logger logger = LoggerFactory.getLogger(GemtextTombstoneUpdateCaclulatorTest.class); + + @BeforeClass + public static void init() { + + RxJavaPlugins.setErrorHandler(e -> { + if (e.getMessage() == null) { + logger.error("Error", e); + } + else { + logger.error("Error {}: {}", e.getClass().getSimpleName(), e.getMessage()); + } + }); + } + + @SneakyThrows + @BeforeEach + public void setUp() { + tempDir = Files.createTempDirectory("test"); + Files.createDirectory(tempDir.resolve("special")); + + updateCaclulator = new GemtextTombstoneUpdateCaclulator( + tombstonePath, + redirectPath + ); + var data = new MemexData(); + + memex = new Memex(data, null, + Mockito.mock(MemexGitRepo.class), + new MemexLoader(data, new MemexFileSystemModifiedTimes(), + new MemexSourceFileSystem(tempDir, Mockito.mock(MemexGitRepo.class)), tempDir, tombstonePath, redirectPath), + Mockito.mock(MemexFileWriter.class), + updateCaclulator, + Mockito.mock(MemexRendererers.class), + Mockito.mock(GeminiService.class)); + } + + @SneakyThrows + @AfterEach + public void tearDown() { + TestUtil.clearTempDir(tempDir); + } + + @Test + void addTombstone() throws IOException { + updateCaclulator.addTombstone(new MemexNodeUrl("/deleted.gmi"), "It's gone jimmy").visit(memex); + updateCaclulator.addTombstone(new MemexNodeUrl("/deleted2.gmi"), "RIP").visit(memex); + List lines = Files.readAllLines(Path.of(tempDir + tombstonePath)); + assertEquals(3, lines.size()); + + assertEquals("# Tombstones", lines.get(0)); + assertEquals("=> /deleted.gmi\tIt's gone jimmy", lines.get(1)); + assertEquals("=> /deleted2.gmi\tRIP", lines.get(2)); + } + + @Test + void addRedirect() throws IOException { + updateCaclulator.addRedirect(new MemexNodeUrl("/deleted.gmi"), "/new").visit(memex); + updateCaclulator.addRedirect(new MemexNodeUrl("/deleted2.gmi"), "/new2").visit(memex); + List lines = Files.readAllLines(Path.of(tempDir + redirectPath)); + + assertEquals(3, lines.size()); + assertEquals("# Redirects", lines.get(0)); + assertEquals("=> /deleted.gmi\t/new", lines.get(1)); + assertEquals("=> /deleted2.gmi\t/new2", lines.get(2)); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/model/MemexNodeHeadingIdTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/model/MemexNodeHeadingIdTest.java new file mode 100644 index 00000000..f0163b3b --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/model/MemexNodeHeadingIdTest.java @@ -0,0 +1,33 @@ +package nu.marginalia.wmsa.memex.model; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +class MemexNodeHeadingIdTest { + @Test + public void test() { + var heading = new MemexNodeHeadingId(0); + assertEquals("0", heading.toString()); + assertEquals("1", heading.next(0).toString()); + assertEquals("0.1", heading.next(1).toString()); + assertEquals("0.0.1", heading.next(2).toString()); + assertEquals("0.1", heading.next(2).next(1).toString()); + } + + @Test + public void testParenthood() { + var heading = new MemexNodeHeadingId(1,0,2); + + assertTrue(heading.isChildOf(new MemexNodeHeadingId(1,0))); + assertTrue(heading.isChildOf(new MemexNodeHeadingId(1))); + assertFalse(heading.isChildOf(new MemexNodeHeadingId(1,1))); + assertFalse(heading.isChildOf(new MemexNodeHeadingId(1,0,1))); + } + + @Test + public void testComparator() { + assertTrue(new MemexNodeHeadingId(1).compareTo(new MemexNodeHeadingId(2)) < 0); + assertTrue(new MemexNodeHeadingId(1).compareTo(new MemexNodeHeadingId(1, 1)) > 0); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/podcasts/PodcastFetcherTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/podcasts/PodcastFetcherTest.java new file mode 100644 index 00000000..0033464e --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/podcasts/PodcastFetcherTest.java @@ -0,0 +1,15 @@ +package nu.marginalia.wmsa.podcasts; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +class PodcastFetcherTest { + + @Test + void fetchPodcast() { + var result = new PodcastFetcher().fetchPodcast("hopwag", "https://rss.acast.com/readmeapoem"); + assertTrue(result.isPresent()); + System.out.println(result); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/resource_store/ResourceStoreServiceTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/resource_store/ResourceStoreServiceTest.java new file mode 100644 index 00000000..daf43337 --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/resource_store/ResourceStoreServiceTest.java @@ -0,0 +1,119 @@ +package nu.marginalia.wmsa.resource_store; + +import lombok.SneakyThrows; +import nu.marginalia.util.TestUtil; +import nu.marginalia.wmsa.configuration.server.Context; +import nu.marginalia.wmsa.configuration.server.Initialization; +import nu.marginalia.wmsa.resource_store.model.RenderedResource; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Spark; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.LocalDateTime; + +import static org.junit.jupiter.api.Assertions.*; + +class ResourceStoreServiceTest { + static ResourceStoreService service; + static ResourceStoreClient client; + + static int testPort = TestUtil.getPort(); + static ResourceEntityStore resourceStore; + static Path tempDir; + private static Logger logger = LoggerFactory.getLogger(ResourceStoreServiceTest.class); + + @SneakyThrows + @BeforeAll + public static void setUpClass() { + Spark.port(testPort); + System.setProperty("service-name", "renderer"); + + client = new ResourceStoreClient(); + client.setServiceRoute("127.0.0.1", testPort); + tempDir = Files.createTempDirectory("ResourceStoreServiceTest"); + resourceStore = new ResourceEntityStore(tempDir); + service = new ResourceStoreService("127.0.0.1", testPort, null, + resourceStore, new Initialization(), null); + + Spark.awaitInitialization(); + } + + @AfterEach + public void clearTempDir() { + for (File f : tempDir.toFile().listFiles()) { + for (File f2 : f.listFiles()) { + logger.debug("Deleting {} -> {}", f2, f2.delete()); + } + logger.debug("Deleting {} -> {}", f, f.delete()); + } + } + + @AfterAll + public static void tearDownAll() { + tempDir.toFile().delete(); + Spark.awaitStop(); + } + + @Test + public void sunnyDay() throws IOException { + client.putResource(Context.internal(), "test", new RenderedResource("index.html", LocalDateTime.MAX,"Hello World")).blockingSubscribe(); + assertEquals("Hello World", client.getResource(Context.internal(),"test", "index.html").blockingFirst()); + } + + + @Test + public void loadFromDisk() throws IOException, InterruptedException { + client.putResource(Context.internal(), "test", new RenderedResource("index.html", LocalDateTime.MAX,"Hello World")).blockingSubscribe(); + client.putResource(Context.internal(), "test", new RenderedResource("expired.html", LocalDateTime.now().minusDays(14),"Hello World")).blockingSubscribe(); + + var resourceStore2 = new ResourceEntityStore(tempDir, true); + Thread.sleep(1000); + var resource = resourceStore2.getResource("test", "index.html"); + + assertNotNull(resource); + assertEquals("Hello World", resource.data); + + assertNull(resourceStore2.getResource("test", "expired.html")); + } + + @Test + public void testReaper() throws IOException { + client.putResource(Context.internal(), "test", new RenderedResource("index.html", LocalDateTime.now().minusDays(14),"Hello World")).blockingSubscribe(); + assertEquals("Hello World", client.getResource(Context.internal(),"test", "index.html").blockingFirst()); + + resourceStore.reapStaleResources(); + + var ret = client + .getResource(Context.internal(), "test", "index.html") + .onErrorReturnItem("Error") + .blockingFirst(); + assertEquals("Error", ret); + } + + + @Test + public void update() throws IOException { + client.putResource(Context.internal(), "test", new RenderedResource("index.html", LocalDateTime.MAX,"Hello World")).blockingSubscribe(); + assertEquals("Hello World", client.getResource(Context.internal(),"test", "index.html").blockingFirst()); + client.putResource(Context.internal(), "test", new RenderedResource("index.html", LocalDateTime.MAX,"Hello World 2")).blockingSubscribe(); + assertEquals("Hello World 2", client.getResource(Context.internal(), "test", "index.html").blockingFirst()); + } + + @Test + public void missing() throws IOException { + var ret = client + .getResource(Context.internal(), "test", "invalid.html") + .onErrorReturnItem("Error") + .blockingFirst(); + assertEquals("Error", ret); + } + +} \ No newline at end of file diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/smhi/scraper/crawler/SmhiBackendApiTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/smhi/scraper/crawler/SmhiBackendApiTest.java new file mode 100644 index 00000000..b807afad --- /dev/null +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/smhi/scraper/crawler/SmhiBackendApiTest.java @@ -0,0 +1,33 @@ +package nu.marginalia.wmsa.smhi.scraper.crawler; + +import nu.marginalia.wmsa.smhi.model.Plats; +import org.junit.jupiter.api.Test; + +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; + +import static org.junit.jupiter.api.Assertions.*; + +class SmhiBackendApiTest { + + @Test + void hamtaData() throws Exception { + var api = new SmhiBackendApi("nu.marginalia"); + + + System.out.println(api.hamtaData(new Plats("Ystad", "55.42966", "13.82041")) + .jsonContent + ); + } + + @Test + public void testDatum() { + System.out.println(LocalDateTime.parse("2021-05-29T14:06:48Z", + DateTimeFormatter.ISO_ZONED_DATE_TIME) + .atZone(ZoneId.of("GMT")) + .toOffsetDateTime() + .atZoneSameInstant(ZoneId.of("Europe/Stockholm")) + ); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/test/resources/html/monadnock.html b/marginalia_nu/src/test/resources/html/monadnock.html new file mode 100644 index 00000000..324b8cd3 --- /dev/null +++ b/marginalia_nu/src/test/resources/html/monadnock.html @@ -0,0 +1,17 @@ + + + + Monadnock Valley Press + + + + + + +

    Monadnock Valley Press

    +

    The Monadnock Valley Press is an online publisher of public domain texts that reflect our vision of human potential, with a special focus on the literature of freedom and the classics of Western civilization, the Anglosphere, and America.

    +

    We publish works by a wide range of authors in literature, philosophy, history, the sciences, and the arts. For information about texts we have published so far, consult the chronology. The credits might also be of interest.

    +

    Last Updated: 2021-01-21.

    + + + diff --git a/marginalia_nu/src/test/resources/log4j2.properties b/marginalia_nu/src/test/resources/log4j2.properties new file mode 100644 index 00000000..9c2dbefd --- /dev/null +++ b/marginalia_nu/src/test/resources/log4j2.properties @@ -0,0 +1,15 @@ + +status = info + +appender.console.type = Console +appender.console.name = LogToConsole +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %highlight{%-5level}{FATAL=red, ERROR=red, WARN=yellow} %c{1}- %msg%n + +logger.console.name = nu.marginalia +logger.console.level = debug +logger.console.additivity = false +logger.console.appenderRef.rolling.ref = LogToConsole + +rootLogger.level = info +rootLogger.appenderRef.console.ref = LogToConsole diff --git a/marginalia_nu/src/test/resources/model-data.json b/marginalia_nu/src/test/resources/model-data.json new file mode 100644 index 00000000..063b93e7 --- /dev/null +++ b/marginalia_nu/src/test/resources/model-data.json @@ -0,0 +1 @@ +{"comments":[{"id":{"value":"t1_gku7btj"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-26 17:35:16","author":"TheEmpathyBox","body":"Hello,\n\nHow would you translate in latin \" *unreliable narrator*\" ?","sequenceNumber":2,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":596601000}}},{"id":{"value":"t1_gkudywj"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gku7btj"},"distinguished":false,"created_utc":"21-01-26 18:23:03","author":"kc_kennylau","body":"Literally it would be \"narr?tor ?nfid?lis\", but obviously this is a recent term ([Wiktionary](https://en.wiktionary.org/wiki/unreliable_narrator) says coined in 1961) that does not have an immediate equivalence to Roman literature that comes to mind.","sequenceNumber":3,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":597256000}}},{"id":{"value":"t1_gkxfa9w"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkudywj"},"distinguished":false,"created_utc":"21-01-27 09:46:25","author":"BobTheSCV","body":"Seems strange there wouldn\u0027t be a term for this. Unreliable narrators go back as far as Homer with the Odyssey.\n\nEvery part of the story Odysseus narrates is full of fantastical monsters and extremely unlikely events; a complete break from the story as narrated by the voice of Homer which is more in line with the style of the Illiad, a lot more down to earth except for minor interventions by the gods.\n\nAs to make a point, the guy is shown compulsively deceiving every single person he meets: Gods, beasts, enemies, allies alike. Why *wouldn\u0027t* he deceive the audience as well?","sequenceNumber":4,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":597341000}}},{"id":{"value":"t1_gkuf0n7"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gku7btj"},"distinguished":false,"created_utc":"21-01-26 18:30:35","author":"BaconJudge","body":"I\u0027d convey that idea with an adjective meaning \"untrustworthy,\" either *narrator infidus* or *narrator infidelis*, the former possibly helping to avoid the secondary religious sense of the latter.\n\nThe noun *narrator* traditionally existed only in masculine form, but the explicitly feminine version *narratrix* appears in newer references like the Vatican\u0027s *Lexicon Recentis Latinitatis* and Rene Hoeven\u0027s *Lexique de la Prose Latine de la Renaissance,* so for a female character you\u0027d have the option of *narratrix infida* (note the change at the end of the adjective) or *narratrix infidelis*, if you wanted.","sequenceNumber":5,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":597415000}}},{"id":{"value":"t1_gkug4j7"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gku7btj"},"distinguished":false,"created_utc":"21-01-26 18:38:25","author":"lutetiensis","body":"*incredibilis narrator.*\n\n[*incredibilis*](https://logeion.uchicago.edu/incredibilis) \u003d *in* \\+ [*credibilis*](https://logeion.uchicago.edu/credibilis), from [*credere*](https://logeion.uchicago.edu/credo), to believe, to intrust.","sequenceNumber":6,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":597483000}}},{"id":{"value":"t1_gkue6uo"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gku7btj"},"distinguished":false,"created_utc":"21-01-26 18:24:37","author":"EgoSumInHorto","body":"\"*Narr?tor viti?sissimus*\"","sequenceNumber":7,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":597550000}}},{"id":{"value":"t1_gkv6xf8"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkue6uo"},"distinguished":false,"created_utc":"21-01-26 21:39:47","author":"Tharadin1970","body":"Wouldnt that be more \"a very vice-ful narrator\"?","sequenceNumber":8,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":597617000}}},{"id":{"value":"t1_gkv76xq"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkv6xf8"},"distinguished":false,"created_utc":"21-01-26 21:41:21","author":"EgoSumInHorto","body":"I couldn\u0027t find a word for \"unreliable\"; \"viti?sus\" means \"full of faults, corrupt, vicious, morally faulty, defective\", hence \"unreliable\"","sequenceNumber":9,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":597684000}}},{"id":{"value":"t1_gkv7rzf"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkv76xq"},"distinguished":false,"created_utc":"21-01-26 21:45:06","author":"lutetiensis","body":"\u003e \"unreliable\"\n\nIncredibilis (see my comment).","sequenceNumber":10,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":597750000}}},{"id":{"value":"t1_gkv848d"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkv7rzf"},"distinguished":false,"created_utc":"21-01-26 21:47:13","author":"EgoSumInHorto","body":"That should have been a pretty obvious derivation to make... Doh!\nThanks :)","sequenceNumber":11,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":597823000}}},{"id":{"value":"t1_gkup1zh"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-26 19:39:00","author":"Gopnikcykablyat","body":"How do you say \"Hope never dies, because hope is the killer\" ?","sequenceNumber":12,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":597887000}}},{"id":{"value":"t1_gkuvwow"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkup1zh"},"distinguished":false,"created_utc":"21-01-26 20:26:06","author":"lutetiensis","body":"For stylistic reasons, I would render it as:\n\n*spes non decedit sed caedit.*\n\nHope doesn\u0027t die, but kills. You can replace *non* with *numquam* (\"never\") *sed* with *quia* (\"because \\[it\\]\").","sequenceNumber":13,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":597954000}}},{"id":{"value":"t1_gkuqvlo"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkup1zh"},"distinguished":false,"created_utc":"21-01-26 19:51:09","author":"BluuDuud","body":"Id say, \"sp?s numquam moritur, nam sp?s nec?tor est\"","sequenceNumber":14,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":598026000}}},{"id":{"value":"t1_gkvrzwi"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkuqvlo"},"distinguished":false,"created_utc":"21-01-27 00:08:45","author":"magistramegaera","body":"Should it be necatrix instead of necator, since spes is feminine? Or does that not really matter with an abstract concept like this?","sequenceNumber":15,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":598092000}}},{"id":{"value":"t1_gl60nlx"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkvrzwi"},"distinguished":false,"created_utc":"21-01-29 01:01:50","author":"Sochamelet","body":"I wouldn\u0027t say it\u0027s wrong per se to use *necator*, but in my experience, Roman authors were generally inclined to preserve correspondences between the gender of words, if possible.","sequenceNumber":16,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":598158000}}},{"id":{"value":"t1_gkwj57n"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkvrzwi"},"distinguished":false,"created_utc":"21-01-27 03:48:35","author":"BluuDuud","body":"I forgot that, I think you\u0027re correct","sequenceNumber":17,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":598223000}}},{"id":{"value":"t1_gkzsh08"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkwj57n"},"distinguished":false,"created_utc":"21-01-27 21:15:26","author":"glaraaaaaaah","body":"No it doesn?t, _necator_ is a masculine noun not an adjective, so it doesn?t need to agree. Unless you want to specify that hope is female, male-gendered words are more common for abstract concepts I think","sequenceNumber":18,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":598287000}}},{"id":{"value":"t1_gkuthtv"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-26 20:09:24","author":"Youngerthandumb","body":"There was a quote from a Scandanavian bishop who, on his deathbed suffering from intense pains, cried out \"Do not pray me out of god\u0027s battle!\" when his colleagues gathered round to pray for his recovery. I thought that was kind of metal. How would that translate to Latin?","sequenceNumber":19,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":598351000}}},{"id":{"value":"t1_gkv2ymf"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkuthtv"},"distinguished":false,"created_utc":"21-01-26 21:13:38","author":"nimbleping","body":"*M? (mihi (?)) ? pugn? De? n?n d?prec?re/d?prec?min?* (sg./pl. addressees).\n\n(Do not intercede by prayer on behalf of me away from the battle of God.)\n\nEDIT: I\u0027m not 100% sure if the accusative *m?* or the dative *mihi* should be used here. I\u0027d who knows to offer an opinion. I figure the ablative of motion away from would be appropriate here.","sequenceNumber":20,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":598419000}}},{"id":{"value":"t1_gkv8czl"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkv2ymf"},"distinguished":false,"created_utc":"21-01-26 21:48:49","author":"Youngerthandumb","body":"Thank you I appreciate it! I took 2 years of high school latin but I don\u0027t trust myself to translate anything beyond \"Caecilius ad venit\".","sequenceNumber":21,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":598484000}}},{"id":{"value":"t1_gkv9lku"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-26 21:57:03","author":"XENO-BLAZE","body":"How would you translate:\n\nAllow me to impress upon you the severe mistake you have made. For years my conduct has been largely benign. And yet, without provocation, you have severed our détente and forced me to unleash upon you the vengeful flames of a thousand suns. You shall curse your mother for the day of your birth. So, go now, go, and begin your life of fear, knowing that when you least expect it, the looming sword of Damocles will crash upon you, cleaving you in twain and as you gaze upon the smoking wreckage that was once your life, you will regret the day you crossed me","sequenceNumber":22,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":598545000}}},{"id":{"value":"t1_gkve8fd"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-26 22:28:44","author":"iillltt","body":"Hello! I\u0027m not sure if this fits here but I was wondering if anyone would be able to identify/translate what the chant in the beginning of [this song](https://youtu.be/3oUUG7Mfoc4) is. I think it\u0027s in Latin so forgive me if it\u0027s not","sequenceNumber":23,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":598610000}}},{"id":{"value":"t1_gkvkja8"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkve8fd"},"distinguished":false,"created_utc":"21-01-26 23:12:05","author":"lutetiensis","body":"That\u0027s hard... *regum satus*? *sanctus?* Do you know what they sampled?","sequenceNumber":24,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":598686000}}},{"id":{"value":"t1_gkvm21e"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkvkja8"},"distinguished":false,"created_utc":"21-01-26 23:23:51","author":"iillltt","body":"unfortunately not, i\u0027ve heard other people say it\u0027s \u0027Spiritus Sanctus\u0027 but i haven\u0027t been able to find the original sample- same with regum satus which doesn\u0027t have any search results. thank you so much as well","sequenceNumber":25,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":598753000}}},{"id":{"value":"t1_gkvmwyy"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkvm21e"},"distinguished":false,"created_utc":"21-01-26 23:30:26","author":"lutetiensis","body":"Ok. Sorry. I don\u0027t think I can do more on this one.","sequenceNumber":26,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":598819000}}},{"id":{"value":"t1_gkvnbr3"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkvmwyy"},"distinguished":false,"created_utc":"21-01-26 23:33:31","author":"iillltt","body":"thank you so much for trying!!! i\u0027ll be questioning producers where they get samples from next haha","sequenceNumber":27,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":598880000}}},{"id":{"value":"t1_gkvm2ak"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkve8fd"},"distinguished":false,"created_utc":"21-01-26 23:23:54","author":"ragnrikr","body":"Can\u0027t really help (to my ears it sounds like (sectum/secum) (satu/sato) I.e. gibberish), just wanted to point out this post https://www.reddit.com/r/kpophelp/comments/g49xob/latin_in_gottasadae/\n\nNo source sadly :/","sequenceNumber":28,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":598944000}}},{"id":{"value":"t1_gkvn3fo"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkvm2ak"},"distinguished":false,"created_utc":"21-01-26 23:31:47","author":"iillltt","body":"Yep :( \nI was reminded of this question when someone asked a similar question as to your linked post so it still hasn\u0027t been answered. well on the bright side I got to share a nice song and the mystery will remain unsolved ...","sequenceNumber":29,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":599016000}}},{"id":{"value":"t1_gkw3c5e"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-27 01:38:08","author":"luisafonsoteixeira","body":"I\u0027ve recently came across the US Navy Academy saying \"Ex Scientia Tridens\", ?Through Knowledge, Sea Power?. How could one correctly say \"through knowledge, power\"?","sequenceNumber":30,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":599082000}}},{"id":{"value":"t1_gkwzw4j"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkw3c5e"},"distinguished":false,"created_utc":"21-01-27 06:24:34","author":"lutetiensis","body":"\u003epower\n\n[Potentia](https://logeion.uchicago.edu/potentia), [imperium](https://logeion.uchicago.edu/imperium), [fortitudo](https://logeion.uchicago.edu/fortitudo)...\n\nNote the original motto is poetic. It doesn\u0027t say \"sea power\", but instead \"\\[Neptune\u0027s\\] trident\". You could find an artifact that would represent power for you (a sword? a crown?).","sequenceNumber":31,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":599146000}}},{"id":{"value":"t1_gkw4obz"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-27 01:48:56","author":"Eldonith","body":"Help me fix a cringe tattoo!\n\nBackground: I got a tattoo back when the emo craze was big and Hot Topic was one of the most popular stores in the mall about 12 years ago. It reads \"Nascentes Morimur\" which roughly translated to \"When we are born we begin to die.\" It seemed cool at the time in my addled 18 year old mind, but I\u0027m a family man now and not only does it no longer represent my mindset. but it\u0027s even embarrassing to explain.\n\nWhat line (in Latin) would you suggest I add to brighten it up? Please include a translation, as my highschool Latin days are far behind me. Thanks!","sequenceNumber":32,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":599207000}}},{"id":{"value":"t1_gkxz2rq"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkw4obz"},"distinguished":false,"created_utc":"21-01-27 14:11:03","author":"BaconJudge","body":"Given the constraint that it\u0027ll contain a reference to dying, is there any particular sentiment you want to convey? Because you\u0027re a family man, maybe you could expand it to something like *Amati Nascentes, Morimur Amati\" (inserting the optional comma if there\u0027s room) to imply roughly \"Born loved, we die loved.\"","sequenceNumber":33,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":599267000}}},{"id":{"value":"t1_gl1ij4p"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkxz2rq"},"distinguished":false,"created_utc":"21-01-28 04:12:22","author":"Eldonith","body":"Wow that\u0027s beautiful and exactly the kinda sentiment I\u0027d like it changed to! I love how you transformed it with a word added to the beginning and end instead of a whole 2nd line. I may very well end up going with this unless anybody can top that suggestion.","sequenceNumber":34,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":599334000}}},{"id":{"value":"t1_gl83w6w"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gl1ij4p"},"distinguished":false,"created_utc":"21-01-29 12:52:05","author":"BaconJudge","body":"Happy to help, and I hope the tattoo change works out for you.","sequenceNumber":35,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":599401000}}},{"id":{"value":"t1_gkwk2sa"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-27 03:56:29","author":"reds3232","body":" mater servum vituperavit","sequenceNumber":36,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":599464000}}},{"id":{"value":"t1_gkwlsx3"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-27 04:11:09","author":"megtheedemon","body":"How do you say ?I would rather be studying latin?","sequenceNumber":37,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":599524000}}},{"id":{"value":"t1_gkxgp1b"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkwlsx3"},"distinguished":false,"created_utc":"21-01-27 10:07:50","author":"kc_kennylau","body":"Lat?nae linguae stud?re m?l?","sequenceNumber":38,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":599586000}}},{"id":{"value":"t1_gkx6rax"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-27 07:45:53","author":"ResidentGift","body":"In a game I\u0027m playing, there are characters named Alatus, Bosacius, Indarias, Bonanus, and Menogias. I\u0027m pretty sure Alatus is the Latin word for \"*winged*\" (or at least related to \"*wing*\") and it also suits the character\u0027s motif. The other four names sound Latin-ish, but I can\u0027t find anything on them. Can anyone confirm if the other four names are Latin or rooted in Latin? If yes, how would they be translated?","sequenceNumber":39,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":599646000}}},{"id":{"value":"t1_gkxwqf0"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkx6rax"},"distinguished":false,"created_utc":"21-01-27 13:45:42","author":"BaconJudge","body":"You\u0027re right about Alatus, but I don\u0027t think the others are Latin-derived. Bosacius could have been loosely inspired by words like *boscis* (\"waterfowl\") or Medieval Latin *boscus* (\"woods\") if either of those makes sense for the character. If Bonanus is the good guy, the name might be inspired by the very common Latin adjective *bonus* (\"good\"). Words or names ending in -as are likely to be Greek rather than Latin, and I don\u0027t recognize any promising Latin roots for those two unless Indarias is from India, which is the same in Latin.","sequenceNumber":40,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":599714000}}},{"id":{"value":"t1_gl0mrhe"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkxwqf0"},"distinguished":false,"created_utc":"21-01-28 00:36:11","author":"ResidentGift","body":"Thank you for the help!","sequenceNumber":41,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":599777000}}},{"id":{"value":"t1_gkxpfxd"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-27 12:17:05","author":"stickybeak7","body":"Hi there! Hoping to get a translation similar to \"memento mori\" but for \"remember you are loved\" for a valentines gift :o) thank you so much!","sequenceNumber":42,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":599838000}}},{"id":{"value":"t1_gkxsyty"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkxpfxd"},"distinguished":false,"created_utc":"21-01-27 13:02:11","author":"aveCaecilius","body":"memento amari","sequenceNumber":43,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":599899000}}},{"id":{"value":"t1_gkzyg27"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkxsyty"},"distinguished":false,"created_utc":"21-01-27 21:54:16","author":"stickybeak7","body":"thank you so much! ?","sequenceNumber":44,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":599965000}}},{"id":{"value":"t1_gkxpgd0"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-27 12:17:16","author":"pierro_la_place","body":"Hi there!\n\nI am building a mediaval-ish world in which the people of kingdom A really don\u0027t like kingdom B they are at war with, to the point that they refuse to pronounce its name. Instead they say someting along the lines of \"land of the rapists\", but in Latin to give it an almost religious tone. After a bit of research, the translation I was thinking about was \"terra stuparotes\", but I am not an expert in Latin so I would like to know what you think.\n\nThanks!","sequenceNumber":45,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":600039000}}},{"id":{"value":"t1_gkxvjvf"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkxpgd0"},"distinguished":false,"created_utc":"21-01-27 13:32:32","author":"BaconJudge","body":"That\u0027s probably a typo for *stupratores*, which is the plural form of the word [*stuprator*](http://www.perseus.tufts.edu/hopper/text?doc\u003dPerseus%3Atext%3A1999.04.0059%3Aentry%3Dstuprator) when used as the subject of a sentence. Because you want it as a possessive plural (\"of the rapists\"), the phrase would be *terra stupratorum*.","sequenceNumber":46,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":600100000}}},{"id":{"value":"t1_gkxypyz"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkxvjvf"},"distinguished":false,"created_utc":"21-01-27 14:07:19","author":"pierro_la_place","body":"Yup, I mixed up accusative and genitive. Good thing I asked! Thx.","sequenceNumber":47,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":600161000}}},{"id":{"value":"t1_gkxzjnm"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkxypyz"},"distinguished":false,"created_utc":"21-01-27 14:15:50","author":"BaconJudge","body":"You\u0027re welcome, anytime.","sequenceNumber":48,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":600231000}}},{"id":{"value":"t1_gkylxmm"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-27 16:55:07","author":"Koa1121","body":"How would you say ?Where easy becomes hard? or ?Where easy is hard?","sequenceNumber":49,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":600292000}}},{"id":{"value":"t1_gl38i9x"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkylxmm"},"distinguished":false,"created_utc":"21-01-28 15:09:01","author":"quintus_sub_rosa","body":"in quo facile fit difficile.","sequenceNumber":50,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":600351000}}},{"id":{"value":"t1_gl0zg1x"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-28 01:53:37","author":"GIGABIT","body":"What\u0027s up my dudes. WSB \"investor\" here. \n\nIn light of the ongoing GameStop insanity you might have heard about I thought it could be cool to get a tattoo celebrating the gains. Naturally it has to be the \"Power to the players\" slogan, but from what I understand, the word \"player\" isn\u0027t really a thing in Latin.\n\nI thought \"Power to the Gambler\" might be a more appropriate choice considering the nature of both parties involved, and because there actually might be a proper word for \"gambler\".\n\nSo I thought I\u0027d ask you guys for some advice about the sentence so I don\u0027t go printing myself with something stupid like \"short the market\".\n\nThanks in advance!","sequenceNumber":51,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":600408000}}},{"id":{"value":"t1_gl29fog"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gl0zg1x"},"distinguished":false,"created_utc":"21-01-28 08:38:37","author":"kc_kennylau","body":"potentia ad aleatores\n\nPS: don\u0027t trust internet strangers (such as me!) for tattoo","sequenceNumber":52,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":600486000}}},{"id":{"value":"t1_gl33bxa"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gl29fog"},"distinguished":false,"created_utc":"21-01-28 14:37:06","author":"GIGABIT","body":"Thanks a lot!\n\nI SHOULD know better than to trust internet strangers... Then again, you know what everyone is over at WSB, so..","sequenceNumber":53,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":600548000}}},{"id":{"value":"t1_gl1qbxz"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-28 05:16:51","author":"dpm5150","body":"I want to mount a motto carved in wood in my library. I?m trying to figure out a good noun to express ?usefulness?. I?m want to express to my son that he should strive for contributing to society in ways that return tangible value. Usefulness, itself, is a dull word, so I?m not sure if Latin has something with a little more zing.","sequenceNumber":54,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":600612000}}},{"id":{"value":"t1_gl29810"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gl1qbxz"},"distinguished":false,"created_utc":"21-01-28 08:35:56","author":"kc_kennylau","body":"utilitas / ?tilit?s (whence English \"utility\")","sequenceNumber":55,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":600672000}}},{"id":{"value":"t1_gl3020g"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gl29810"},"distinguished":false,"created_utc":"21-01-28 14:13:35","author":"dpm5150","body":"Yes, this definitely can work. Now I have to decide how inspirational it is for a motto. Thank you so much.","sequenceNumber":56,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":600732000}}},{"id":{"value":"t1_gl4ppso"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-28 20:03:46","author":"Rashed8StringVi","body":"From the popular phrase ?In Vino Veritas?, how would one correctly substitute ?blood? instead of ?wine? such that the phrase becomes ?in blood is the truth??","sequenceNumber":57,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":600790000}}},{"id":{"value":"t1_gl54pgr"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gl4ppso"},"distinguished":false,"created_utc":"21-01-28 21:34:34","author":"kc_kennylau","body":"in sanguine veritas","sequenceNumber":58,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":600847000}}},{"id":{"value":"t1_gl55c0i"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-28 21:38:27","author":"coralcakes","body":"How would An Appeal to Heaven be translated into latin?","sequenceNumber":59,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":600903000}}},{"id":{"value":"t1_gl5nhd2"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gl55c0i"},"distinguished":false,"created_utc":"21-01-28 23:28:44","author":"jayzwasinnirvana","body":"*Obsecratio ad caelum* is one way. I\u0027m not sure if there is an existing phrase. I did not use *appellatio* because I think it\u0027s meaning in Latin is more strictly legal.","sequenceNumber":60,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":600978000}}},{"id":{"value":"t1_gl5quql"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gl55c0i"},"distinguished":false,"created_utc":"21-01-28 23:52:04","author":"kc_kennylau","body":"Quite literally \"appellatio ad caelum\".","sequenceNumber":61,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":601035000}}},{"id":{"value":"t1_gl57uz9"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-28 21:53:32","author":"RandyDautzenberg","body":"Hey all, \n\n\nCurrently I\u0027m looking for a translation of \u0027to the top\u0027 / \u0027the top\u0027 in Latin. Some internet translators are giving me different translations. Most of the time they translate it as \u0027ad summitatem\u0027 or \u0027ad verticem\u0027. \n\n\nI really like the word \u0027verticem\u0027, haha. So I was wondering if you can also use \u0027verticem\u0027 without the preposition \u0027ad\u0027. Or would that be grammatically incorrect? \n\n\nMany thanks! :)","sequenceNumber":62,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":601091000}}},{"id":{"value":"t1_gl5khcv"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gl57uz9"},"distinguished":false,"created_utc":"21-01-28 23:08:37","author":"kc_kennylau","body":"\"the top\" \u003d apex / vertex\n\n\"to the top\" \u003d ad apicem / ad verticem\n\nIt would be better to provide context (e.g. whole sentence).","sequenceNumber":63,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":601147000}}},{"id":{"value":"t1_gl5lazb"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gl5khcv"},"distinguished":false,"created_utc":"21-01-28 23:14:06","author":"RandyDautzenberg","body":"Many thanks for your quick reply! Well, I would like to use it as a brand name, haha. So that means that using verticem without the preposition is not grammatically correct, right?","sequenceNumber":64,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":601203000}}},{"id":{"value":"t1_gl5avpy"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-28 22:11:03","author":"ContuberniumSPQR","body":"Hello \n\nI have to determine the word Casus from Casus,Casus I think it is a nominative but that doesn\u0027t fit with its function in the phrase could it be an accusative?","sequenceNumber":65,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":601268000}}},{"id":{"value":"t1_gl5kal7"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gl5avpy"},"distinguished":false,"created_utc":"21-01-28 23:07:22","author":"kc_kennylau","body":"Perfacilis inventu ex [Wiktionary](https://en.wiktionary.org/wiki/casus#Latin)","sequenceNumber":66,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":601343000}}},{"id":{"value":"t1_gl7qke4"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gl5kal7"},"distinguished":false,"created_utc":"21-01-29 09:59:22","author":"ContuberniumSPQR","body":" Gratias tibi ago","sequenceNumber":67,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":601398000}}},{"id":{"value":"t1_gl6kjfb"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-29 03:30:49","author":"Havatra","body":"Hello!\n\nI need some translation help with this sentence:\n\n\"Remember you must die, so [do] thrive(vigorously), while you are [still] able to.\" \n\"Memento mori; ita vigemusque, dum es possunt.\"\n\nDoes this sound correct? Or is there perhaps a different way you\u0027d rather put it?\n\nI appreciate all help! :-)","sequenceNumber":68,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":601454000}}},{"id":{"value":"t1_gl7b1cb"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-29 07:03:49","author":"Ghost_1243","body":"Hello, I\u0027m trying to translate the following the phrases:\n\n1. A Love of One\u0027s Fate\n2. The Will to Power\n3. Strive for a Higher Purpose\n4. Embrace the Ordinary","sequenceNumber":69,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":601510000}}},{"id":{"value":"t1_gl7bfpk"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-29 07:07:53","author":"DV5161","body":"I?m having trouble translating ?live and die? so when I put that into google translate I get back ?vivere et mori? but when I switch it and go from Latin to English I get back ?live and? but when I put in ?vivamus, moriendum est.? I get back ?live and die? if someone could help me with which is right and wrong it would be greatly appreciated!","sequenceNumber":70,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":601564000}}},{"id":{"value":"t1_gl7hq77"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gl7bfpk"},"distinguished":false,"created_utc":"21-01-29 08:13:05","author":"kc_kennylau","body":"Anything google translate says is wrong.\n\n\"live and die\" \u003d vive et morere (command to 1 person) / vivete et morimini (command to multiple people)\n\nYou can also replace the \"et\" to \"atque\".","sequenceNumber":71,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":601618000}}},{"id":{"value":"t1_gl7hzbb"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gl7hq77"},"distinguished":false,"created_utc":"21-01-29 08:15:50","author":"DV5161","body":"Lmao I had no idea but ok nice, glad to have got that cleared up thank you so much!","sequenceNumber":72,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":601680000}}},{"id":{"value":"t1_gkuwp93"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t3_l5ejhj"},"distinguished":false,"created_utc":"21-01-26 20:31:29","author":"ki4clz","body":"Romanes eunt domis...?","sequenceNumber":73,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":601737000}}},{"id":{"value":"t1_gkuzut8"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkuwp93"},"distinguished":false,"created_utc":"21-01-26 20:52:42","author":"kc_kennylau","body":"People called Romanes, they go, the house?","sequenceNumber":74,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":601792000}}},{"id":{"value":"t1_gkvxdoo"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkuzut8"},"distinguished":false,"created_utc":"21-01-27 00:50:38","author":"ki4clz","body":"It says *Romans Go Home!*...","sequenceNumber":75,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":601848000}}},{"id":{"value":"t1_gkx8lf8"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkvxdoo"},"distinguished":false,"created_utc":"21-01-27 08:10:02","author":"rsotnik","body":"It doesn\u0027t :)\n\nhttps://en.m.wikipedia.org/wiki/Romani_ite_domum","sequenceNumber":76,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":601904000}}},{"id":{"value":"t1_gkxml3q"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkx8lf8"},"distinguished":false,"created_utc":"21-01-27 11:36:21","author":"ki4clz","body":"but latin for Roman is, Romanus...?","sequenceNumber":77,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":601957000}}},{"id":{"value":"t1_gkxn1hb"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkxml3q"},"distinguished":false,"created_utc":"21-01-27 11:42:53","author":"rsotnik","body":"Sorry, you lost me :) Do you want to know what \" Romanes eunt domis\" means or how one says \"Romans, go home!\"?","sequenceNumber":78,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":602016000}}},{"id":{"value":"t1_gkxo45g"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkxn1hb"},"distinguished":false,"created_utc":"21-01-27 11:58:33","author":"ki4clz","body":"Some kind soul was patient with me in this thread, and showed me the way: *(you may need to check their work?)*\n\nhttps://www.reddit.com/r/dankchristianmemes/comments/kp0ua2/a_catholic_a_protestant_and_an_orthodox_walk_into/ghv5c44?utm_medium\u003dandroid_app\u0026utm_source\u003dshare\u0026context\u003d3","sequenceNumber":79,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":602068000}}},{"id":{"value":"t1_gkxoeil"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkxo45g"},"distinguished":false,"created_utc":"21-01-27 12:02:38","author":"rsotnik","body":"Haha, you played me all along :)","sequenceNumber":80,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":602120000}}},{"id":{"value":"t1_gkxok0c"},"submission_id":{"id":"t3_l5ejhj"},"parent_id":{"id":"t1_gkxoeil"},"distinguished":false,"created_utc":"21-01-27 12:04:44","author":"ki4clz","body":"I\u0027ve only ever had one person take the bait... was hoping for a rematch...\n\n***Happy Cake Day!***\n-","sequenceNumber":81,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":602170000}}},{"id":{"value":"t1_gl5au93"},"submission_id":{"id":"t3_l765yl"},"parent_id":{"id":"t3_l765yl"},"distinguished":false,"created_utc":"21-01-28 22:10:49","author":"CabezadeVaca_","body":"Russian priests singing in Latin?? Very beautiful but also very odd","sequenceNumber":83,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":53,"nano":161325000}}},{"id":{"value":"t1_gl5h3k4"},"submission_id":{"id":"t3_l765yl"},"parent_id":{"id":"t1_gl5au93"},"distinguished":false,"created_utc":"21-01-28 22:48:23","author":"HanSo1oCup","body":"*Russian Orthodox* but the monastery is located in WV, USA","sequenceNumber":84,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":53,"nano":161442000}}},{"id":{"value":"t1_gl5i3sq"},"submission_id":{"id":"t3_l765yl"},"parent_id":{"id":"t1_gl5h3k4"},"distinguished":false,"created_utc":"21-01-28 22:54:09","author":"CabezadeVaca_","body":"Well that seems especially odd to me if they?re not even Eastern Catholics. I?ve never understood the Russian or the Greek churches to have much respect for Latin as a liturgical language, but of course that?s just based on my own experiences","sequenceNumber":85,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":53,"nano":161497000}}},{"id":{"value":"t1_gl5pu5j"},"submission_id":{"id":"t3_l765yl"},"parent_id":{"id":"t1_gl5i3sq"},"distinguished":false,"created_utc":"21-01-28 23:45:00","author":"greetings_traveler2","body":"yeah, it\u0027s pretty uncommon for Orthodox priests, I love their chants in ancient Greek though","sequenceNumber":86,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":53,"nano":161547000}}},{"id":{"value":"t1_gl4um7f"},"submission_id":{"id":"t3_l765yl"},"parent_id":{"id":"t3_l765yl"},"distinguished":false,"created_utc":"21-01-28 20:34:13","author":"HanSo1oCup","body":"Correction *It was adapted by the monks in honor of the Holy Prophet David*","sequenceNumber":87,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":53,"nano":161627000}}},{"id":{"value":"t1_gl5n4sd"},"submission_id":{"id":"t3_l765yl"},"parent_id":{"id":"t3_l765yl"},"distinguished":false,"created_utc":"21-01-28 23:26:23","author":"marktwainbrain","body":"Is this a group that has a ?Western Rite? orthodox parish?","sequenceNumber":88,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":53,"nano":161677000}}},{"id":{"value":"t1_gl64il6"},"submission_id":{"id":"t3_l765yl"},"parent_id":{"id":"t1_gl5n4sd"},"distinguished":false,"created_utc":"21-01-29 01:30:08","author":"greetings_traveler2","body":"Is that a thing?","sequenceNumber":89,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":53,"nano":161724000}}},{"id":{"value":"t1_gl64vsg"},"submission_id":{"id":"t3_l765yl"},"parent_id":{"id":"t1_gl64il6"},"distinguished":false,"created_utc":"21-01-29 01:32:52","author":"marktwainbrain","body":"Yes, not at all common. An overture to some Latin traditionalists. I think OCA has Western Rite parishes.\n\nETA: https://en.m.wikipedia.org/wiki/Western_Rite_Orthodoxy","sequenceNumber":90,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":53,"nano":161770000}}},{"id":{"value":"t1_gl5m4eg"},"submission_id":{"id":"t3_l7aez5"},"parent_id":{"id":"t3_l7aez5"},"distinguished":false,"created_utc":"21-01-28 23:19:33","author":"qed1","body":"There is only one appropriate manuscript to share on such an occasion as this: https://digi.vatlib.it/view/MSS_Vat.lat.9850/0016.\n\n^^^^^^^^(Not ^^^^^^^sure ^^^^^^^why ^^^^^^^there ^^^^^^^are ^^^^^^^two ^^^^^^^threads, ^^^^^^^only ^^^^^^^one ^^^^^^^of ^^^^^^^which ^^^^^^^I\u0027m ^^^^^^^seeing ^^^^^^^at ^^^^^^^a ^^^^^^^given ^^^^^^^time... ^^^^^^^but ^^^^^^^this ^^^^^^^should ^^^^^^^obviously ^^^^^^^be ^^^^^^^seen ^^^^^^^in ^^^^^^^both!)","sequenceNumber":92,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":55,"nano":144469000}}},{"id":{"value":"t1_gl730gk"},"submission_id":{"id":"t3_l7aez5"},"parent_id":{"id":"t1_gl5m4eg"},"distinguished":false,"created_utc":"21-01-29 05:52:40","author":"SheepExplosion","body":"Things that make me long for Merovingian chancery hands. Good thing he had 4 scribes or no one would have ever known what he wrote.","sequenceNumber":93,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":55,"nano":144676000}}},{"id":{"value":"t1_gl5ns9x"},"submission_id":{"id":"t3_l7aez5"},"parent_id":{"id":"t1_gl5m4eg"},"distinguished":false,"created_utc":"21-01-28 23:30:47","author":"Kingshorsey","body":"Are you sure that link goes where you want? The title page is nice, but your link goes to a random page in the middle.","sequenceNumber":94,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":55,"nano":144774000}}},{"id":{"value":"t1_gl5oqvi"},"submission_id":{"id":"t3_l7aez5"},"parent_id":{"id":"t1_gl5ns9x"},"distinguished":false,"created_utc":"21-01-28 23:37:26","author":"qed1","body":"That was the point, yes. I was aiming to land at a section of Aquinas\u0027s near incomprehensible handwriting. ;)","sequenceNumber":95,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":55,"nano":144866000}}},{"id":{"value":"t1_gl5ocsz"},"submission_id":{"id":"t3_l7aez5"},"parent_id":{"id":"t1_gl5m4eg"},"distinguished":false,"created_utc":"21-01-28 23:34:45","author":"EmergencySufficient6","body":"... What is *that?* \n\nI thought for sure it\u0027d be his musings upon [the effects of stars on demons](http://www.logicmuseum.com/wiki/Authors/Thomas_Aquinas/Summa_Theologiae/Part_I/Q115#q115a5arg1) or something similar.","sequenceNumber":96,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":55,"nano":144970000}}},{"id":{"value":"t1_gl5ovhj"},"submission_id":{"id":"t3_l7aez5"},"parent_id":{"id":"t1_gl5ocsz"},"distinguished":false,"created_utc":"21-01-28 23:38:18","author":"qed1","body":"It\u0027s Aquinas\u0027s totally incomprehensible handwriting.","sequenceNumber":97,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":55,"nano":145068000}}},{"id":{"value":"t1_gl5rrc7"},"submission_id":{"id":"t3_l7aez5"},"parent_id":{"id":"t1_gl5ovhj"},"distinguished":false,"created_utc":"21-01-28 23:58:24","author":"EmergencySufficient6","body":"How sure are we that there\u0027s a teleological argument against gays in there?\n\nEdit: Forgive me, downvote brigade, for I have sinned.","sequenceNumber":98,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":55,"nano":145174000}}},{"id":{"value":"t1_gl5kbyc"},"submission_id":{"id":"t3_l7aez5"},"parent_id":{"id":"t3_l7aez5"},"distinguished":false,"created_utc":"21-01-28 23:07:37","author":"Kingshorsey","body":"Source: [https://digi.vatlib.it/mss/detail/Urb.lat.136](https://digi.vatlib.it/mss/detail/Urb.lat.136)","sequenceNumber":99,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":55,"nano":145265000}}},{"id":{"value":"t1_gl315u1"},"submission_id":{"id":"t3_l6s45e"},"parent_id":{"id":"t3_l6s45e"},"distinguished":false,"created_utc":"21-01-28 14:21:53","author":"joaojcorreia","body":"Hi u/Irene_SaturaLanx, I was able to see part of the session live, really good. Congratulations. I was really happy, because I was able to follow it. Gratias.","sequenceNumber":103,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":59,"nano":155093000}}},{"id":{"value":"t1_gl3oyaf"},"submission_id":{"id":"t3_l6s45e"},"parent_id":{"id":"t1_gl315u1"},"distinguished":false,"created_utc":"21-01-28 16:28:46","author":"Irene_SaturaLanx","body":"Gratias tibi!","sequenceNumber":104,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":59,"nano":155276000}}},{"id":{"value":"t1_gl31nts"},"submission_id":{"id":"t3_l6s45e"},"parent_id":{"id":"t3_l6s45e"},"distinguished":false,"created_utc":"21-01-28 14:25:30","author":"ironicsadboy","body":"Congratulations on your work!","sequenceNumber":105,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":59,"nano":155374000}}},{"id":{"value":"t1_gl3oz4e"},"submission_id":{"id":"t3_l6s45e"},"parent_id":{"id":"t1_gl31nts"},"distinguished":false,"created_utc":"21-01-28 16:28:53","author":"Irene_SaturaLanx","body":"Thanks!","sequenceNumber":106,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":59,"nano":155466000}}},{"id":{"value":"t1_gl45js7"},"submission_id":{"id":"t3_l6s45e"},"parent_id":{"id":"t3_l6s45e"},"distinguished":false,"created_utc":"21-01-28 17:53:49","author":"logatwork","body":"I\u0027m reading/studying this book! Spoilers ahead!\n\nThank you","sequenceNumber":107,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":59,"nano":155556000}}},{"id":{"value":"t1_gl4w10f"},"submission_id":{"id":"t3_l6s45e"},"parent_id":{"id":"t3_l6s45e"},"distinguished":false,"created_utc":"21-01-28 20:42:52","author":"Redbubbles55","body":"hunc librem nunc perlego, sperans posthac linguam latinam modo eius docere. gaudeo multum sessionem tuam spectauisse, gratias ueras tibi ago !","sequenceNumber":108,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":59,"nano":155643000}}},{"id":{"value":"t1_gl510zz"},"submission_id":{"id":"t3_l6s45e"},"parent_id":{"id":"t3_l6s45e"},"distinguished":false,"created_utc":"21-01-28 21:12:53","author":"Monsieurantipyrine","body":"Truly one (or two) of the best texts out there for Latin students! Very glad I learned from this back in University.","sequenceNumber":109,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":59,"nano":155732000}}},{"id":{"value":"t1_gl58d7w"},"submission_id":{"id":"t3_l6s45e"},"parent_id":{"id":"t3_l6s45e"},"distinguished":false,"created_utc":"21-01-28 21:56:31","author":"scriptapuella","body":"My university is grammar focussed, but I teach with this book (alongside the companion) to aid comprehension and get students used to continuous Latin passages early. Evaluations indicate they like it more than Wheelock, at any rate.","sequenceNumber":110,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":59,"nano":155832000}}},{"id":{"value":"t1_gl7scfq"},"submission_id":{"id":"t3_l7iwsm"},"parent_id":{"id":"t3_l7iwsm"},"distinguished":false,"created_utc":"21-01-29 10:22:23","author":"kc_kennylau","body":"* You dare to drink wine in my presence?\n\n\"you dare to drink wine\" is indeed \"vinum bibere audes\", but \"ante\" takes the accusative instead of the dative, so it would be \"ante me\" instead of \"ante mi\". Other possible translations of \"in my presence\" include \"coram me\" and \"prae me\".\n\nE.g. Exodus 23:3 Non habebis deos alienos **coram me**. \"Thou shalt have no other gods **before me**.\"\n\n\u0026#x200B;\n\n* Because of covid, two million people are dead.\n\n[Latin Wikipedia](https://la.wikipedia.org/wiki/COVID-19) translates COVID-19 as \"morbus coronarii viri anni 2019\" and notes that this is their internal translation only, not found outside Wikipedia. To be safe, I would use COVID-19, but this does not admit cases. I guess in the end I would prefer \"morbus coronarii viri\" which does admit cases.\n\nNote that \"quia\" needs to be followed by a phrase, or put simply, \"quia\" \u003d \"because\" not \"because of\". I would use \"propter\" for \"because of\", or I would just use the ablative.\n\n\"concident\" is the future tense. The perfect tense \"conciderunt\" is more appropriate.\n\nIn conclusion: \"propter morbum coronarii viri, duo milliones homines conciderunt.\" or \"morbo coronarii viri, duo milliones homines conciderunt.\"","sequenceNumber":112,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":1,"nano":89793000}}},{"id":{"value":"t1_gl6k27a"},"submission_id":{"id":"t3_l7carp"},"parent_id":{"id":"t3_l7carp"},"distinguished":false,"created_utc":"21-01-29 03:27:06","author":"jayzwasinnirvana","body":"You\u0027ve almost got it - the verb is right, but consul should be in the nominative. It\u0027s not a direct object, but a subject (nominative? maybe someone can chime in with the grammatical term) complement. Here\u0027s Livy:\n\n\n\u003eDecembri mense summo patrum studio L.\tQuinctius Cincinnatus, pater Caesonis, **consul creatur**","sequenceNumber":114,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":3,"nano":83507000}}},{"id":{"value":"t1_gl6wvd8"},"submission_id":{"id":"t3_l7carp"},"parent_id":{"id":"t1_gl6k27a"},"distinguished":false,"created_utc":"21-01-29 05:03:53","author":"ogorangeduck","body":"It\u0027s an attributive, not a subject.","sequenceNumber":115,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":3,"nano":83662000}}},{"id":{"value":"t1_gl6u4mq"},"submission_id":{"id":"t3_l7gja1"},"parent_id":{"id":"t3_l7gja1"},"distinguished":false,"created_utc":"21-01-29 04:42:50","author":"isolde100","body":"Quis ut Deus refers to St. Michael the Archangel - it means ?who is like God?. It?s the literal translation of the Hebrew Michael or Mika?el.","sequenceNumber":117,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":5,"nano":86997000}}},{"id":{"value":"t1_gl7941t"},"submission_id":{"id":"t3_l7gja1"},"parent_id":{"id":"t1_gl6u4mq"},"distinguished":false,"created_utc":"21-01-29 06:45:24","author":"LurkinOG","body":"You are right..i looked up old latin versions of the I and one is shaped like what i thought what was a 3 but its a capital that looks like this only L only reversed £..what is that quote significant to..i know when translated to todays english its hard to know the meaning or significance without context..and thank you for sharing your knowledge in figuring this out","sequenceNumber":118,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":5,"nano":87152000}}},{"id":{"value":"t1_gl6lb05"},"submission_id":{"id":"t3_l7gja1"},"parent_id":{"id":"t3_l7gja1"},"distinguished":false,"created_utc":"21-01-29 03:36:43","author":"TWFM","body":"Qu3s is apparently an internet personality.","sequenceNumber":119,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":5,"nano":87249000}}},{"id":{"value":"t1_gl6mzss"},"submission_id":{"id":"t3_l7gja1"},"parent_id":{"id":"t1_gl6lb05"},"distinguished":false,"created_utc":"21-01-29 03:49:30","author":"LurkinOG","body":"I thought so to but old latin Ques can be translated to mean seek/ask its where you end up with english words like ques-tion, ques-t..3 could signify both holy trinity and a internet personality","sequenceNumber":120,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":5,"nano":87345000}}},{"id":{"value":"t1_gl88ce7"},"submission_id":{"id":"t3_l7gja1"},"parent_id":{"id":"t3_l7gja1"},"distinguished":false,"created_utc":"21-01-29 13:40:59","author":"BaconJudge","body":"There\u0027s a common Latin scribal abbreviation that resembles a 3, and it can stand in for various letter combinations, such as *-et*. For example, the modern-day abbreviation *viz.* for *videlicet* originated from misinterpreting *vi?* as *viz.* However, it\u0027s normally used at the end of words, and I can\u0027t see what it would represent here, so I mention it mainly to rule it out.","sequenceNumber":121,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":5,"nano":87445000}}},{"id":{"value":"t1_gl1hr7e"},"submission_id":{"id":"t3_l6mdp8"},"parent_id":{"id":"t3_l6mdp8"},"distinguished":false,"created_utc":"21-01-28 04:06:11","author":"antinousrex","body":"feel free to point out errors, please!","sequenceNumber":123,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":7,"nano":177467000}}},{"id":{"value":"t1_gl1tfor"},"submission_id":{"id":"t3_l6mdp8"},"parent_id":{"id":"t1_gl1hr7e"},"distinguished":false,"created_utc":"21-01-28 05:44:51","author":"Thalionwen20","body":"In section 4, it should be \"undecimum,\" accusative. In the last line of section 5, it should be \"et,\" not \"and.\" In 9, you might want to say \"inter se\" instead of \"eorum.\" In 10, it should be \"annulum\" and \"esse\" for indirect speech, or \"annulus delendus\" for a direct quote. In 11, you only need one \"est\" and a \"qui\" before \"nunc.\" In 12, it should be \"mortuos.\" In 15, \"Galadriela\" and \"principe\" are misspelled. In 16, you probably just want \"interficit,\" and I would put another verb such as \"incipit\" with \"iter facere.\"\n\nDespite that, I really enjoyed this and think you did a good job on it! :)","sequenceNumber":124,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":7,"nano":177562000}}},{"id":{"value":"t1_gl2y8de"},"submission_id":{"id":"t3_l6mdp8"},"parent_id":{"id":"t1_gl1tfor"},"distinguished":false,"created_utc":"21-01-28 13:57:47","author":"eglwufdeo","body":"Some more I found: \n\nIn section 1 \n\"ignotum eis\" seems sketchy, I would expect an ablative absolute \n\"eius\" should be \"suae\" \n\nIn section 2 \n\"foedum\" should be \"foedus\", having it as the subject seems a bit weird but I\u0027m not sure \n\"miletes\" should be \"milites\" \nI think the second sentence misses a verb \n\"saeculum\" not \"saecula\" \n\nIn section 3 \n\"pro se\" not \"pro sibi\" \n\nIn section 4 \n\"centesimus\" not \"centisimus\" \ndon\u0027t think \"discedere\" takes an accusative like that \n\"faciat\" not \"faceat\" \n\"suo\" not \"eius\" \n\nIn section 5 \n\"certior\" not \"certiorem\" \nYou need either indirect speech or some subjunction, but as it stands the sentence about Gollum\u0027s torture doesn\u0027t work \n\"aperiens verba\" doesn\u0027t work (match number), should be an ablative absolute \n\"discedat\" not \"discedeat\", also see above \n\nIn section 6 \nsame thing with \"certior\" \nthe relative clause needs a verb \n\nIn Section 7 \n\"Sarumano\" should be \"a Sarumano\" \n\"adiuvantur\" not \"adiuntur\" \n\"a venatore\" not \"venatori\" \n\nIn section 8 \nDon\u0027t know if \"do\" is the best verb here , maybe \"parare\"? \n\nIn section 9 \n\"curatur\" should probably be plural \n\"suum\" not \"eorum\" \n\nIn section 10 \n\"ambo\" doesn\u0027t work \n\nIn section 11 \nI don\u0027t think \"se voluntare\" is a thing \n\"comitatus\" not \"commitatus\" \n\nIn section 12 \n\"quae\" not \"qui\" \n\nIn section 13 \nThink there\u0027s a verb missing after \"vastum antrum\" \n\"cum se\" not \"cum sibi\" \n\nIn section 14 \n\"devastata\" not \"devastatus\" \n\"capturum\" not \"capturus\" \n\nIn section 15 \nI think \"per\" is the wrong preposition \n\nIn section 16 \n\"suos\" not \"eius\" \ndon\u0027t think \"iurandum\" works like that","sequenceNumber":125,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":7,"nano":177599000}}},{"id":{"value":"t1_gl3du83"},"submission_id":{"id":"t3_l6mdp8"},"parent_id":{"id":"t1_gl2y8de"},"distinguished":false,"created_utc":"21-01-28 15:37:05","author":"antinousrex","body":"Thank you so much!","sequenceNumber":126,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":7,"nano":177643000}}},{"id":{"value":"t1_gl3dsb3"},"submission_id":{"id":"t3_l6mdp8"},"parent_id":{"id":"t1_gl1tfor"},"distinguished":false,"created_utc":"21-01-28 15:36:49","author":"antinousrex","body":"Thank you so much! I do these off the seat of my pants and rely on people like you to catch the errors I miss after staring at my own text for 3 hrs","sequenceNumber":127,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":7,"nano":177681000}}},{"id":{"value":"t1_gl27bkf"},"submission_id":{"id":"t3_l6mdp8"},"parent_id":{"id":"t1_gl1hr7e"},"distinguished":false,"created_utc":"21-01-28 08:11:56","author":"Julius_The_Caesar","body":"Happy cake day","sequenceNumber":128,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":7,"nano":177721000}}},{"id":{"value":"t1_gl212je"},"submission_id":{"id":"t3_l6mdp8"},"parent_id":{"id":"t3_l6mdp8"},"distinguished":false,"created_utc":"21-01-28 06:59:30","author":"PinkyPiePerson","body":"You wouldn\u0027t happen to have the link to the Shrek one.\n\nAlso Bee Movie next???","sequenceNumber":129,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":7,"nano":177763000}}},{"id":{"value":"t1_gl3hymr"},"submission_id":{"id":"t3_l6mdp8"},"parent_id":{"id":"t1_gl212je"},"distinguished":false,"created_utc":"21-01-28 15:56:22","author":"antinousrex","body":"here\u0027s shrek. bee movie is in progress [https://docs.google.com/document/d/1-0GY-JqbusyDbOp7aaRJy1q8rY4LSTMQbccTek7673Q/edit?usp\u003dsharing](https://docs.google.com/document/d/1-0GY-JqbusyDbOp7aaRJy1q8rY4LSTMQbccTek7673Q/edit?usp\u003dsharing)","sequenceNumber":130,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":7,"nano":177822000}}},{"id":{"value":"t1_gl2ad9g"},"submission_id":{"id":"t3_l6mdp8"},"parent_id":{"id":"t3_l6mdp8"},"distinguished":false,"created_utc":"21-01-28 08:50:55","author":"-Frind-","body":"Is it classical?","sequenceNumber":131,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":7,"nano":177859000}}},{"id":{"value":"t1_gl2tdy1"},"submission_id":{"id":"t3_l6mdp8"},"parent_id":{"id":"t1_gl2ad9g"},"distinguished":false,"created_utc":"21-01-28 13:07:27","author":"OperaRotas","body":"Of course Lord of the Rings is a classical\n\n_[jocor]_","sequenceNumber":132,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":7,"nano":177893000}}},{"id":{"value":"t1_gl2bz5e"},"submission_id":{"id":"t3_l6mdp8"},"parent_id":{"id":"t3_l6mdp8"},"distinguished":false,"created_utc":"21-01-28 09:12:40","author":"MadScientist2854","body":"happy cake day!","sequenceNumber":133,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":7,"nano":177926000}}},{"id":{"value":"t1_gl42d9n"},"submission_id":{"id":"t3_l6mdp8"},"parent_id":{"id":"t3_l6mdp8"},"distinguished":false,"created_utc":"21-01-28 17:37:36","author":"OperaRotas","body":"\u003eSed potestas Annuli Isildurem corrumpit, qui Annulum pro se capit\n\nThis \"pro se\" sounds a bit weird to me, kinda \"he takes the ring for his own sake\". Maybe \"sibi\" would work better?\n\nI\u0027m also not sure if this \"capit\" means \"takes\", as in right after cutting Sauron\u0027s finger (in which case it\u0027s fine) or \"keeps\", as in \"keeps the ring longer than he should\", in which case \"tenet\" could be better. Any way I don\u0027t know if \"sibi tenet\" works well.","sequenceNumber":134,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":7,"nano":177969000}}},{"id":{"value":"t1_gl2mk6t"},"submission_id":{"id":"t3_l6thod"},"parent_id":{"id":"t3_l6thod"},"distinguished":false,"created_utc":"21-01-28 11:41:58","author":"bandzugfeder","body":"I\u0027ll check with the reference grammars. My guess beforehand (to record my failure for posterity) is that the two genitives would be on either side of the noun,or otherwise separated (eg by a determiner or a case-marked attribute). But I would also guess that it is a comparatively rare occurrence.","sequenceNumber":136,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":9,"nano":96392000}}},{"id":{"value":"t1_gl4dsl6"},"submission_id":{"id":"t3_l6thod"},"parent_id":{"id":"t1_gl2mk6t"},"distinguished":false,"created_utc":"21-01-28 18:38:18","author":"j1bb3r1sh","body":"One example I can recall that may apply here, if I understand the question correctly, is from Sallust?s *Bellum Catilinae* chapter 2.3, ?*Quod si regum atque imperatorum animi virtus in pace ita ut in bello valeret*...? if that would help in your research. \n\nI?m not certain if the situation warrants defining it as its own construction, but it was unique enough that I remembered it six months after reading it. I?d be interested to hear if you are able to find anything else on this. \n\nAlso apologies for formatting if it is messed up, I am on mobile","sequenceNumber":137,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":9,"nano":96530000}}}],"threads":[{"id":{"value":"t3_l5ejhj"},"parent_id":{"value":"t5_2qloa"},"title":"English to Latin translation requests go here!","body":" \n\n1. Ask and answer questions about mottos, tattoos, book titles, lines for your poem, slogans for your bowling club?s t-shirt, etc. in the comments of this thread. **Separate posts for these types of requests will be removed.**\n2. Here are some examples of what types of requests this thread is for: [Example #1](https://www.reddit.com/r/latin/comments/dyqs8p/would_the_correct_translation_of_satans_sister_be/?utm_source\u003dshare\u0026utm_medium\u003dweb2x), [Example #2](https://www.reddit.com/r/latin/comments/dyp18o/translation_from_english/?utm_source\u003dshare\u0026utm_medium\u003dweb2x), [Example #3](https://www.reddit.com/r/latin/comments/dy4o7b/i_need_help_in_translating_correctly_these_2_words/?utm_source\u003dshare\u0026utm_medium\u003dweb2x), [Example #4](https://www.reddit.com/r/latin/comments/dxdzpb/are_there_any_words_that_convey_the_idea_of_a/?utm_source\u003dshare\u0026utm_medium\u003dweb2x), [Example #5](https://www.reddit.com/r/latin/comments/dx5xzc/motto_in_latin/?utm_source\u003dshare\u0026utm_medium\u003dweb2x).\n3. This thread is **not for correcting longer translations and student assignments**. If you have some facility with the Latin language and have made an honest attempt to translate that is **NOT from Google Translate**, Yandex, or any other machine translator, create a separate thread requesting to check and correct your translation: [Separate thread example](https://www.reddit.com/r/latin/comments/dyjz4m/motto_idea_for_motorbike/). Make sure to take a look at Rule 4.\n4. [Previous iterations of this thread](https://www.reddit.com/r/latin/search/?q\u003dLatin%20translation%20requests%20here\u0026restrict_sr\u003d1).","url":"https://www.reddit.com/r/latin/comments/l5ejhj/english_to_latin_translation_requests_go_here/","sub":"latin","author":"NasusSyrae","num_comments":80,"created_utc":"21-01-26 15:00:22","sequenceNumber":82,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":51,"nano":602455000}}},{"id":{"value":"t3_l765yl"},"parent_id":{"value":"t5_2qloa"},"title":"A hymn dedicated to Saint Nicholas, performed by two monks of Holy Cross Monastery.","body":"","url":"https://v.redd.it/4n18nwz2k4e61","sub":"latin","author":"HanSo1oCup","num_comments":8,"created_utc":"21-01-28 20:30:17","sequenceNumber":91,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":53,"nano":161815000}}},{"id":{"value":"t3_l7aez5"},"parent_id":{"value":"t5_2qloa"},"title":"Hodie est Festum S. Thomae de Aquino - Ecce Pagina Illustrata","body":"","url":"https://i.redd.it/nar1o78fc5e61.jpg","sub":"latin","author":"Kingshorsey","num_comments":8,"created_utc":"21-01-28 23:07:28","sequenceNumber":100,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":55,"nano":145355000}}},{"id":{"value":"t3_l7isan"},"parent_id":{"value":"t5_2qloa"},"title":"Verbum Diei, die Jovis, A D V KAL FEB, anni AUC MMDCCLXXIV: excaeco","body":"Verbum diei hodie est:\n\nexcaeco, excaecare, excaecvi, excaecatum: to blind, to make blind\n\n1st conjugation verb\n\n*Frequens curatio est uenas in temporibus adurere, quae fere quidem in eiusmodi malo tument: sed tamen, ut inflentur magisque se ostendant, ceruix ante modice deliganda est, tenuibusque ferramentis et retussis uenae adurendae, donec in oculis pituitae cursus conquiescat. Id enim signum est quasi excaecatorum itinerum, per quae umor ferebatur*\n\nCelsus, *de Medicina*, 7.7","url":"https://www.reddit.com/r/latin/comments/l7isan/verbum_diei_die_jovis_a_d_v_kal_feb_anni_auc/","sub":"latin","author":"Glofkill","num_comments":0,"created_utc":"21-01-29 05:10:12","sequenceNumber":101,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":56,"nano":63285000}}},{"id":{"value":"t3_l7sduu"},"parent_id":{"value":"t5_2qloa"},"title":"I can?t understand this sub sometimes.","body":"I?m feeling lost when it comes to the natural method vs grammar method as laid out on this sub as the best way to learn and understand Latin. I am using LLPSI to learn Latin. Everyone posts about the subjunctive and the perfect and all the other what I believe are grammar rules or modes etc, and I?m over here thinking that none of that is In LLPSI. I feel like there is a whole world of information that I?m not getting because while I know that est is for singular and sunt is for plural, I have no idea what anything else is. I can parse our meaning from reading and context clues, but when or how should I learn to get into the grammar? Right now it?s all story and vocab- I know what I?m reading but I have no idea why it goes its changing in spelling etc. is this normal for reading LLPSI?","url":"https://www.reddit.com/r/latin/comments/l7sduu/i_cant_understand_this_sub_sometimes/","sub":"latin","author":"Squeeks0","num_comments":0,"created_utc":"21-01-29 14:22:11","sequenceNumber":102,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":57,"nano":85717000}}},{"id":{"value":"t3_l6s45e"},"parent_id":{"value":"t5_2qloa"},"title":"If you want to see how a lesson with \"Familia Romana\" works, you can now watch the first lesson on my YouTube channel! ? I can\u0027t recommend that book more to anyone who decides to learn Latin, be it with a teacher or alone.","body":"","url":"https://www.youtube.com/watch?v\u003dwCO_McKXEzA\u0026lc\u003dUgwFAsByoLpr_pnXNqZ4AaABAg.9IykSUZhPwo9J0N2_fqeL2\u0026ab_channel\u003dSaturaLanx","sub":"latin","author":"Irene_SaturaLanx","num_comments":8,"created_utc":"21-01-28 09:50:50","sequenceNumber":111,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":59,"nano":155917000}}},{"id":{"value":"t3_l7iwsm"},"parent_id":{"value":"t5_2qloa"},"title":"I like to do random translations; could someone check these and point out the errors?","body":"Vinum bibere audes ante mi?\n\nYou dare to drink wine in my presence?\n\nQuia coronavirus-morbus, duo milliones homines concident.\n\nBecause of covid, two million people are dead.\n\nI know these will probably be full of errors, can someone check them pwease.","url":"https://www.reddit.com/r/latin/comments/l7iwsm/i_like_to_do_random_translations_could_someone/","sub":"latin","author":"seaweedWorkers","num_comments":1,"created_utc":"21-01-29 05:16:07","sequenceNumber":113,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":1,"nano":90014000}}},{"id":{"value":"t3_l7carp"},"parent_id":{"value":"t5_2qloa"},"title":"Passive translation","body":"How would you say this in latin: The man is elected consul. \n\nIs elected in the passive? but it doesn?t take a direct object\n\nVir consulem creatur?","url":"https://www.reddit.com/r/latin/comments/l7carp/passive_translation/","sub":"latin","author":"SnooDoggos8723","num_comments":2,"created_utc":"21-01-29 00:23:35","sequenceNumber":116,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":3,"nano":83765000}}},{"id":{"value":"t3_l7gja1"},"parent_id":{"value":"t5_2qloa"},"title":"can someone translate. \"Qu3s Ut Deus\" i saw it tattooed on a stranger and made me wonder if 3 was just the way some use 3 in place of E and that was a personal choice or am i missing hidden meaning. Ques Ut Deus, would that not translate roughly to who is god. Am i missing something?","body":"","url":"https://www.reddit.com/r/latin/comments/l7gja1/can_someone_translate_qu3s_ut_deus_i_saw_it/","sub":"latin","author":"LurkinOG","num_comments":5,"created_utc":"21-01-29 03:27:25","sequenceNumber":122,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":5,"nano":87535000}}},{"id":{"value":"t3_l6mdp8"},"parent_id":{"value":"t5_2qloa"},"title":"From the guy who brought you Phineas and Ferb \u0026 Shrek, here\u0027s the story of Lord of the Rings, The Fellowship of the Ring, in Latin!","body":"[https://docs.google.com/document/d/1gZ2lLzlrOuzxNWyNHczDNonVRlMAvwfIT--W3xLMTzk/edit?usp\u003dsharing](https://docs.google.com/document/d/1gZ2lLzlrOuzxNWyNHczDNonVRlMAvwfIT--W3xLMTzk/edit?usp\u003dsharing)","url":"https://www.reddit.com/r/latin/comments/l6mdp8/from_the_guy_who_brought_you_phineas_and_ferb/","sub":"latin","author":"antinousrex","num_comments":12,"created_utc":"21-01-28 04:05:36","sequenceNumber":135,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":7,"nano":178008000}}},{"id":{"value":"t3_l6thod"},"parent_id":{"value":"t5_2qloa"},"title":"Is \"double genitive\" possible in Latin?","body":"There are verbs that take double accusative (e.g. doce?) and there is also double dative (e.g. cu? bon?). These duplicated cases take on different meanings, e.g. in the double dative, [one dative is the dative of purpose and the other dative is the dative of reference](http://dcc.dickinson.edu/grammar/latin/dative-purpose).\n\nIs it possible to have double genitive for a noun that can take on two genitives out of (a) the objective genitive, (b) the partitive genitive, and (c) the genitive of possession?\n\nFor example, odium *barbar?rum* **civilizati?nis**, where *barbar?rum* is the genitive of possession and **civilizati?nis** is the objective genitive.\n\nAre such formations attested?","url":"https://www.reddit.com/r/latin/comments/l6thod/is_double_genitive_possible_in_latin/","sub":"latin","author":"kc_kennylau","num_comments":2,"created_utc":"21-01-28 11:25:02","sequenceNumber":138,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":9,"nano":96574000}}}],"subreddits":[{"name":"latin","title":"The Latin Language","id":{"value":"t5_2qloa"},"description":"This is a community for discussions related to the Latin language.","sequenceNumber":139,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":36,"second":10,"nano":75346000}}}],"top":[{"sequenceNumber":1,"crawlTime":{"date":{"year":2021,"month":1,"day":29},"time":{"hour":14,"minute":35,"second":47,"nano":893685000}}}]} diff --git a/settings.gradle b/settings.gradle new file mode 100644 index 00000000..cb3868c8 --- /dev/null +++ b/settings.gradle @@ -0,0 +1,4 @@ +rootProject.name = 'wmsa' + +include 'marginalia_nu' +include 'third_party' \ No newline at end of file diff --git a/third_party/README.md b/third_party/README.md new file mode 100644 index 00000000..b72dec53 --- /dev/null +++ b/third_party/README.md @@ -0,0 +1,11 @@ +# Third Party Code + +This is a mix of code from other projects, that has either been aggressively modified to suite the needs of the project, +or lack an artifact. + +## Sources and Licenses +* [RDRPosTagger](https://github.com/datquocnguyen/RDRPOSTagger) - GPL3 +* [PorterStemmer](https://github.com/caarmen/porter-stemmer) - LGPL3 +* [Uppend](https://github.com/upserve/uppend) - MIT +* [OpenZIM](https://github.com/openzim/libzim) - GPL-2.0 +* [XZ for Java](https://tukaani.org/xz/) - Public Domain \ No newline at end of file diff --git a/third_party/build.gradle b/third_party/build.gradle new file mode 100644 index 00000000..2ff91d9f --- /dev/null +++ b/third_party/build.gradle @@ -0,0 +1,111 @@ +plugins { + id 'java' +} + +repositories { + mavenLocal() + maven { url "https://artifactory.cronapp.io/public-release/" } + maven { url "https://repo1.maven.org/maven2/" } + maven { url "https://www2.ph.ed.ac.uk/maven2/" } + maven { url "https://jitpack.io/" } + exclusiveContent { + forRepository { + maven { + url = uri("https://jitpack.io") + } + } + filter { + // Only use JitPack for the `gson-record-type-adapter-factory` library + includeModule("com.github.Marcono1234", "gson-record-type-adapter-factory") + } + } +} + +dependencies { + implementation 'junit:junit:4.13.2' + testImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.2' + testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine' + + implementation 'org.projectlombok:lombok:1.18.22' + annotationProcessor 'org.projectlombok:lombok:1.18.22' + + testCompileOnly 'org.projectlombok:lombok:1.18.22' + testImplementation 'org.projectlombok:lombok:1.18.22' + testAnnotationProcessor 'org.projectlombok:lombok:1.18.22' + + implementation 'com.github.jknack:handlebars:4.3.0' + implementation 'com.github.jknack:handlebars-markdown:4.2.1' + + implementation group: 'com.google.code.gson', name: 'gson', version: '2.9.0' + implementation 'io.reactivex.rxjava3:rxjava:3.1.4' + implementation "com.sparkjava:spark-core:2.9.3" + implementation 'com.opencsv:opencsv:5.6' + + implementation group: 'org.apache.logging.log4j', name: 'log4j-api', version: '2.17.1' + implementation group: 'org.apache.logging.log4j', name: 'log4j-core', version: '2.17.1' + implementation group: 'org.apache.logging.log4j', name: 'log4j-slf4j-impl', version: '2.17.1' + implementation group: 'org.apache.logging.log4j', name: 'log4j-api', version: '2.17.1' + implementation group: 'org.apache.logging.log4j', name: 'log4j-core', version: '2.17.1' + implementation group: 'org.apache.logging.log4j', name: 'log4j-slf4j-impl', version: '2.17.1' + + implementation 'org.slf4j:slf4j-api:1.7.36' + + implementation 'com.google.guava:guava:31.1-jre' + implementation 'com.google.inject:guice:5.1.0' + implementation 'com.github.jnr:jnr-ffi:2.1.1' + implementation 'org.apache.httpcomponents:httpcore:4.4.15' + implementation 'org.apache.httpcomponents:httpclient:4.5.13' + implementation 'com.github.ThatJavaNerd:JRAW:1.1.0' + + implementation group: 'com.h2database', name: 'h2', version: '2.1.210' + testImplementation group: 'org.mockito', name: 'mockito-core', version: '4.3.1' + + implementation 'org.jsoup:jsoup:1.14.3' + implementation group: 'com.github.crawler-commons', name: 'crawler-commons', version: '1.2' + + implementation 'org.mariadb.jdbc:mariadb-java-client:3.0.3' + implementation group: 'net.sf.trove4j', name: 'trove4j', version: '3.0.3' + + implementation 'com.zaxxer:HikariCP:5.0.1' + + implementation 'org.apache.opennlp:opennlp-tools:1.9.4' + implementation 'io.prometheus:simpleclient:0.15.0' + implementation 'io.prometheus:simpleclient_servlet:0.15.0' + implementation 'io.prometheus:simpleclient_httpserver:0.15.0' + implementation 'io.prometheus:simpleclient_hotspot:0.15.0' + implementation 'com.fasterxml.jackson.core:jackson-databind:2.13.2.1' + implementation 'org.apache.opennlp:opennlp-tools:1.9.4' + implementation 'io.prometheus:simpleclient:0.15.0' + implementation 'io.prometheus:simpleclient_servlet:0.15.0' + implementation 'io.prometheus:simpleclient_httpserver:0.15.0' + implementation 'io.prometheus:simpleclient_hotspot:0.15.0' + implementation 'com.fasterxml.jackson.core:jackson-databind:2.13.2.1' + + implementation group: 'org.yaml', name: 'snakeyaml', version: '1.30' + + implementation 'com.syncthemall:boilerpipe:1.2.2' + implementation 'com.github.luben:zstd-jni:1.5.2-2' + implementation 'com.github.vladimir-bukhtoyarov:bucket4j-core:7.3.0' + implementation 'de.rototor.jeuclid:jeuclid-core:3.1.14' + + implementation 'org.imgscalr:imgscalr-lib:4.2' + implementation 'org.jclarion:image4j:0.7' + + implementation 'commons-net:commons-net:3.6' + implementation 'org.eclipse.jgit:org.eclipse.jgit:5.12.0.202106070339-r' + implementation 'org.eclipse.jgit:org.eclipse.jgit.ssh.jsch:5.12.0.202106070339-r' + implementation 'com.jcraft:jsch:0.1.55' + + implementation group: 'org.apache.commons', name: 'commons-compress', version: '1.21' + implementation 'edu.stanford.nlp:stanford-corenlp:4.4.0' + + implementation group: 'it.unimi.dsi', name: 'fastutil', version: '8.5.8' + implementation 'org.roaringbitmap:RoaringBitmap:[0.6,)' + implementation group: 'mysql', name: 'mysql-connector-java', version: '8.0.29' + + implementation 'com.github.Marcono1234:gson-record-type-adapter-factory:0.2.0' +} + +test { + useJUnitPlatform() +} \ No newline at end of file diff --git a/third_party/src/main/java/ca/rmen/porterstemmer/PorterStemmer.java b/third_party/src/main/java/ca/rmen/porterstemmer/PorterStemmer.java new file mode 100644 index 00000000..bae1fa1a --- /dev/null +++ b/third_party/src/main/java/ca/rmen/porterstemmer/PorterStemmer.java @@ -0,0 +1,373 @@ +/* + * Copyright (c) 2016 Carmen Alvarez + * + * This file is part of Porter Stemmer. + * + * Porter Stemmer is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Porter Stemmer is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with Porter Stemmer. If not, see . + */ + +package ca.rmen.porterstemmer; + +import java.util.Locale; + +/** + * This is a simple implementation of the Porter stemming algorithm, defined here: + * http://tartarus.org/martin/PorterStemmer/def.txt + *

    + * This implementation has not been tuned for high performance on large amounts of text. It is + * a simple (naive perhaps) implementation of the rules. + */ +public class PorterStemmer { + + /** + * @param word the word to stem + * @return the stem of the word, in lowercase. + */ + public String stemWord(String word) { + String stem = word.toLowerCase(Locale.getDefault()); + if (stem.length() < 3) return stem; + stem = stemStep1a(stem); + stem = stemStep1b(stem); + stem = stemStep1c(stem); + stem = stemStep2(stem); + stem = stemStep3(stem); + stem = stemStep4(stem); + stem = stemStep5a(stem); + stem = stemStep5b(stem); + return stem; + } + + String stemStep1a(String input) { + // SSES -> SS + if (input.endsWith("sses")) { + return input.substring(0, input.length() - 2); + } + // IES -> I + if (input.endsWith("ies")) { + return input.substring(0, input.length() - 2); + } + // SS -> SS + if (input.endsWith("ss")) { + return input; + } + // S -> + if (input.endsWith("s")) { + return input.substring(0, input.length() - 1); + } + return input; + } + + String stemStep1b(String input) { + // (m>0) EED -> EE + if (input.endsWith("eed")) { + String stem = input.substring(0, input.length() - 1); + String letterTypes = getLetterTypes(stem); + int m = getM(letterTypes); + if (m > 0) return stem; + return input; + } + // (*v*) ED -> + if (input.endsWith("ed")) { + String stem = input.substring(0, input.length() - 2); + String letterTypes = getLetterTypes(stem); + if (letterTypes.contains("v")) { + return step1b2(stem); + } + return input; + } + // (*v*) ING -> + if (input.endsWith("ing")) { + String stem = input.substring(0, input.length() - 3); + String letterTypes = getLetterTypes(stem); + if (letterTypes.contains("v")) { + return step1b2(stem); + } + return input; + } + return input; + } + + private String step1b2(String input) { + // AT -> ATE + if (input.endsWith("at")) { + return input + "e"; + } + // BL -> BLE + else if (input.endsWith("bl")) { + return input + "e"; + } + // IZ -> IZE + else if (input.endsWith("iz")) { + return input + "e"; + } else { + // (*d and not (*L or *S or *Z)) + // -> single letter + char lastDoubleConsonant = getLastDoubleConsonant(input); + if (lastDoubleConsonant != 0 && + lastDoubleConsonant != 'l' + && lastDoubleConsonant != 's' + && lastDoubleConsonant != 'z') { + return input.substring(0, input.length() - 1); + } + // (m=1 and *o) -> E + else { + String letterTypes = getLetterTypes(input); + int m = getM(letterTypes); + if (m == 1 && isStarO(input)) { + return input + "e"; + } + + } + } + return input; + } + + String stemStep1c(String input) { + if (input.endsWith("y")) { + String stem = input.substring(0, input.length() - 1); + String letterTypes = getLetterTypes(stem); + if (letterTypes.contains("v")) return stem + "i"; + } + return input; + } + + String stemStep2(String input) { + String[] s1 = new String[]{ + "ational", + "tional", + "enci", + "anci", + "izer", + "bli", // the published algorithm specifies abli instead of bli. + "alli", + "entli", + "eli", + "ousli", + "ization", + "ation", + "ator", + "alism", + "iveness", + "fulness", + "ousness", + "aliti", + "iviti", + "biliti", + "logi", // the published algorithm doesn't contain this + }; + String[] s2 = new String[]{ + "ate", + "tion", + "ence", + "ance", + "ize", + "ble", // the published algorithm specifies able instead of ble + "al", + "ent", + "e", + "ous", + "ize", + "ate", + "ate", + "al", + "ive", + "ful", + "ous", + "al", + "ive", + "ble", + "log" // the published algorithm doesn't contain this + }; + // (m>0) ATIONAL -> ATE + // (m>0) TIONAL -> TION + for (int i = 0; i < s1.length; i++) { + if (input.endsWith(s1[i])) { + String stem = input.substring(0, input.length() - s1[i].length()); + String letterTypes = getLetterTypes(stem); + int m = getM(letterTypes); + if (m > 0) return stem + s2[i]; + return input; + } + } + return input; + } + + String stemStep3(String input) { + String[] s1 = new String[]{ + "icate", + "ative", + "alize", + "iciti", + "ical", + "ful", + "ness", + }; + String[] s2 = new String[]{ + "ic", + "", + "al", + "ic", + "ic", + "", + "", + }; + // (m>0) ICATE -> IC + // (m>0) ATIVE -> + for (int i = 0; i < s1.length; i++) { + if (input.endsWith(s1[i])) { + String stem = input.substring(0, input.length() - s1[i].length()); + String letterTypes = getLetterTypes(stem); + int m = getM(letterTypes); + if (m > 0) return stem + s2[i]; + return input; + } + } + return input; + + } + + String stemStep4(String input) { + String[] suffixes = new String[]{ + "al", + "ance", + "ence", + "er", + "ic", + "able", + "ible", + "ant", + "ement", + "ment", + "ent", + "ion", + "ou", + "ism", + "ate", + "iti", + "ous", + "ive", + "ize", + }; + // (m>1) AL -> + // (m>1) ANCE -> + for(String suffix : suffixes) { + if (input.endsWith(suffix)) { + String stem = input.substring(0, input.length() - suffix.length()); + String letterTypes = getLetterTypes(stem); + int m = getM(letterTypes); + if (m > 1) { + if (suffix.equals("ion")) { + if (stem.charAt(stem.length() - 1) == 's' || stem.charAt(stem.length() - 1) == 't') { + return stem; + } + } else { + return stem; + } + } + return input; + } + } + return input; + } + + String stemStep5a(String input) { + if (input.endsWith("e")) { + String stem = input.substring(0, input.length() - 1); + String letterTypes = getLetterTypes(stem); + int m = getM(letterTypes); + // (m>1) E -> + if (m > 1) { + return stem; + } + // (m=1 and not *o) E -> + if (m == 1 && !isStarO(stem)) { + return stem; + } + } + return input; + } + + String stemStep5b(String input) { + // (m > 1 and *d and *L) -> single letter + String letterTypes = getLetterTypes(input); + int m = getM(letterTypes); + if (m > 1 && input.endsWith("ll")) { + return input.substring(0, input.length() - 1); + } + return input; + } + + private char getLastDoubleConsonant(String input) { + if (input.length() < 2) return 0; + char lastLetter = input.charAt(input.length() - 1); + char penultimateLetter = input.charAt(input.length() - 2); + if (lastLetter == penultimateLetter && getLetterType((char) 0, lastLetter) == 'c') { + return lastLetter; + } + return 0; + } + + // *o - the stem ends cvc, where the second c is not W, X or Y (e.g. + // -WIL, -HOP) + private boolean isStarO(String input) { + if (input.length() < 3) return false; + + char lastLetter = input.charAt(input.length() - 1); + if (lastLetter == 'w' || lastLetter == 'x' || lastLetter == 'y') return false; + + char secondToLastLetter = input.charAt(input.length() - 2); + char thirdToLastLetter = input.charAt(input.length() - 3); + char fourthToLastLetter = input.length() == 3 ? 0 : input.charAt(input.length() - 4); + return getLetterType(secondToLastLetter, lastLetter) == 'c' + && getLetterType(thirdToLastLetter, secondToLastLetter) == 'v' + && getLetterType(fourthToLastLetter, thirdToLastLetter) == 'c'; + } + + String getLetterTypes(String input) { + StringBuilder letterTypes = new StringBuilder(input.length()); + for (int i = 0; i < input.length(); i++) { + char letter = input.charAt(i); + char previousLetter = i == 0 ? 0 : input.charAt(i - 1); + char letterType = getLetterType(previousLetter, letter); + if (letterTypes.length() == 0 || letterTypes.charAt(letterTypes.length() - 1) != letterType) { + letterTypes.append(letterType); + } + } + return letterTypes.toString(); + } + + int getM(String letterTypes) { + if (letterTypes.length() < 2) return 0; + if (letterTypes.charAt(0) == 'c') return (letterTypes.length() - 1) / 2; + return letterTypes.length() / 2; + } + + private char getLetterType(char previousLetter, char letter) { + switch (letter) { + case 'a': + case 'e': + case 'i': + case 'o': + case 'u': + return 'v'; + case 'y': + if (previousLetter == 0 || getLetterType((char) 0, previousLetter) == 'v') { + return 'c'; + } + return 'v'; + default: + return 'c'; + } + } +} \ No newline at end of file diff --git a/third_party/src/main/java/com/github/datquocnguyen/FWObject.java b/third_party/src/main/java/com/github/datquocnguyen/FWObject.java new file mode 100644 index 00000000..4d89465d --- /dev/null +++ b/third_party/src/main/java/com/github/datquocnguyen/FWObject.java @@ -0,0 +1,39 @@ +package com.github.datquocnguyen; + +import java.util.Arrays; + +/** + * @author DatQuocNguyen + * + */ + +/* + * Define a 5-word/tag window object to capture the context surrounding a word + */ +public class FWObject +{ + public String[] context; + private final static String[] contextPrototype; + static { + contextPrototype = new String[13]; + for (int i = 0; i < 10; i += 2) { + contextPrototype[i] = ""; + contextPrototype[i + 1] = ""; + } + contextPrototype[10] = ""; + contextPrototype[11] = ""; + contextPrototype[12] = ""; + } + public FWObject(boolean check) + { + // Previous2ndWord, Previous2ndTag, PreviousWord, PreviousTag, Word, + // Tag, NextWord, NextTag, Next2ndWord, Next2ndTag, 2-chars suffix, + // 3-char suffix, 4-char suffix + if (check) { + context = Arrays.copyOf(contextPrototype, 13); + } + else { + context = new String[13]; + } + } +} diff --git a/third_party/src/main/java/com/github/datquocnguyen/InitialTagger.java b/third_party/src/main/java/com/github/datquocnguyen/InitialTagger.java new file mode 100644 index 00000000..84819408 --- /dev/null +++ b/third_party/src/main/java/com/github/datquocnguyen/InitialTagger.java @@ -0,0 +1,78 @@ +package com.github.datquocnguyen; + +import java.util.HashMap; +import java.util.function.Predicate; +import java.util.regex.Pattern; + +/** GPLv3 + * @author DatQuocNguyen + * + */ +public class InitialTagger +{ + private static final Pattern QUOTATION = Pattern.compile("(“)|(”)|(\")"); + + private static final Predicate CD = Pattern.compile("[0-9]+").asPredicate(); + private static final Predicate URL = Pattern.compile("[A-Za-z]\\w*(\\.[A-Za-z]\\w+)+").asPredicate(); + private static final Predicate JJ1 = Pattern.compile("([0-9]+-)|(-[0-9]+)").asPredicate(); + private static final Predicate JJ2 = Pattern.compile("(^[Ii]nter.*)|(^[nN]on.*)|(^[Dd]is.*)|(^[Aa]nti.*)").asPredicate(); + private static final Predicate JJ3 = Pattern.compile("(.*ful$)|(.*ous$)|(.*ble$)|(.*ic$)|(.*ive$)|(.*est$)|(.*able$)|(.*al$)").asPredicate(); + private static final Predicate NN = Pattern.compile("(.*ness$)|(.*ment$)|(.*ship$)|(^[Ee]x-.*)|(^[Ss]elf-.*)").asPredicate(); + private static final Predicate NNS = Pattern.compile(".*s$").asPredicate(); + private static final Predicate VBG = Pattern.compile(".*ing$").asPredicate(); + private static final Predicate VBN = Pattern.compile(".*ed$").asPredicate(); + private static final Predicate RB = Pattern.compile(".*ly$").asPredicate(); + + public static String[] EnInitTagger4Sentence( + HashMap DICT, String[] sentence) + { + String[] wordtags = new String[sentence.length]; + + for (int i = 0; i < sentence.length; i++) { + wordtags[i] = getTagForWordEn(DICT, sentence[i]); + } + return wordtags; + } + + private static String getTagForWordEn(HashMap DICT, String word) { + if (QUOTATION.matcher(word).find()) { + return DICT.get("''"); + } + if ("[]()<>!".contains(word)) { + return "?"; + } + + if (DICT.containsKey(word)) + return DICT.get(word); + String lowerW = word.toLowerCase(); + if (DICT.containsKey(lowerW)) + return DICT.get(lowerW); + if (JJ1.test(word)) + return "JJ"; + if (URL.test(word)) + return "NN"; + if (CD.test(word)) + return "CD"; + if (NN.test(word)) + return "NN"; + if (NNS.test(word) + && Character.isLowerCase(word.charAt(0))) + return "NNS"; + if (Character.isUpperCase(word.charAt(0))) + return "NNP"; + if (JJ2.test(word)) + return "JJ"; + if (VBG.test(word)) + return "VBG"; + if (VBN.test(word)) + return "VBN"; + if (word.contains("-") || JJ3.test(word)) + return "JJ"; + if (RB.test(word)) + return "RB"; + + return "NN"; + } + + +} diff --git a/third_party/src/main/java/com/github/datquocnguyen/Node.java b/third_party/src/main/java/com/github/datquocnguyen/Node.java new file mode 100644 index 00000000..9a0db5c5 --- /dev/null +++ b/third_party/src/main/java/com/github/datquocnguyen/Node.java @@ -0,0 +1,67 @@ +package com.github.datquocnguyen; + +/** + * @author DatQuocNguyen + * + */ + +public class Node +{ + FWObject condition; + String conclusion; + Node exceptNode; + Node ifnotNode; + Node fatherNode; + int depth; + + public Node(FWObject inCondition, String inConclusion, Node inFatherNode, + Node inExceptNode, Node inIfnotNode, int inDepth) + { + this.condition = inCondition; + this.conclusion = inConclusion; + this.fatherNode = inFatherNode; + this.exceptNode = inExceptNode; + this.ifnotNode = inIfnotNode; + this.depth = inDepth; + } + + public void setIfnotNode(Node node) + { + this.ifnotNode = node; + } + + public void setExceptNode(Node node) + { + this.exceptNode = node; + } + + public void setFatherNode(Node node) + { + this.fatherNode = node; + } + + public int countNodes() + { + int count = 1; + if (exceptNode != null) { + count += exceptNode.countNodes(); + } + if (ifnotNode != null) { + count += ifnotNode.countNodes(); + } + return count; + } + + public boolean satisfy(FWObject object) + { + for (int i = 0; i < 13; i++) { + String key = condition.context[i]; + if (key != null) { + if (!key.equals(object.context[i])) { + return false; + } + } + } + return true; + } +} diff --git a/third_party/src/main/java/com/github/datquocnguyen/RDRPOSTagger.java b/third_party/src/main/java/com/github/datquocnguyen/RDRPOSTagger.java new file mode 100644 index 00000000..f2f67fee --- /dev/null +++ b/third_party/src/main/java/com/github/datquocnguyen/RDRPOSTagger.java @@ -0,0 +1,113 @@ +package com.github.datquocnguyen; + +import java.io.*; +import java.nio.file.Path; +import java.util.HashMap; + +/** + * @author DatQuocNguyen + * + */ +public class RDRPOSTagger +{ + private final HashMap FREQDICT; + public final Node root; + + public RDRPOSTagger(Path dictPath, Path rulesFilePath) throws IOException { + this.FREQDICT = Utils.getDictionary(dictPath.toString()); + + BufferedReader buffer = new BufferedReader(new InputStreamReader( + new FileInputStream(rulesFilePath.toFile()), "UTF-8")); + String line = buffer.readLine(); + + this.root = new Node(new FWObject(false), "NN", null, null, null, 0); + + Node currentNode = this.root; + int currentDepth = 0; + + for (; (line = buffer.readLine()) != null;) { + int depth = 0; + for (int i = 0; i <= 6; i++) { // Supposed that the maximum + // exception level is up to 6. + if (line.charAt(i) == '\t') + depth += 1; + else + break; + } + + line = line.trim(); + if (line.length() == 0) + continue; + + if (line.contains("cc:")) + continue; + + FWObject condition = Utils + .getCondition(line.split(" : ")[0].trim()); + String conclusion = Utils.getConcreteValue(line.split(" : ")[1] + .trim()); + + Node node = new Node(condition, conclusion, null, null, null, depth); + + if (depth > currentDepth) { + currentNode.setExceptNode(node); + } + else if (depth == currentDepth) { + currentNode.setIfnotNode(node); + } + else { + while (currentNode.depth != depth) + currentNode = currentNode.fatherNode; + currentNode.setIfnotNode(node); + } + node.setFatherNode(currentNode); + + currentNode = node; + currentDepth = depth; + } + buffer.close(); + } + + public Node findFiredNode(FWObject object) + { + Node currentN = root; + Node firedN = null; + while (true) { + if (currentN.satisfy(object)) { + firedN = currentN; + if (currentN.exceptNode == null) { + break; + } + else { + currentN = currentN.exceptNode; + } + } + else { + if (currentN.ifnotNode == null) { + break; + } + else { + currentN = currentN.ifnotNode; + } + } + + } + + return firedN; + } + + public String[] tagsForEnSentence(String[] sentence) + { + + var initialTags = InitialTagger.EnInitTagger4Sentence(FREQDICT, sentence); + + String[] tags = new String[initialTags.length]; + for (int i = 0; i < initialTags.length; i++) { + FWObject object = Utils.getObject(sentence, initialTags, initialTags.length, i); + tags[i] = findFiredNode(object).conclusion; + } + + return tags; + } + +} diff --git a/third_party/src/main/java/com/github/datquocnguyen/Utils.java b/third_party/src/main/java/com/github/datquocnguyen/Utils.java new file mode 100644 index 00000000..cd157174 --- /dev/null +++ b/third_party/src/main/java/com/github/datquocnguyen/Utils.java @@ -0,0 +1,185 @@ +package com.github.datquocnguyen; + +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +/** + * @author DatQuocNguyen + * + */ +public class Utils +{ + public static List getWordTagList(String initializedSentence) + { + List wordTagList = new ArrayList(); + for (String wordTag : initializedSentence.split("\\s+")) { + wordTag = wordTag.trim(); + if (wordTag.length() == 0) + continue; + + if (wordTag.equals("///")) + wordTagList.add(new WordTag("/", "/")); + else { + int index = wordTag.lastIndexOf("/"); + wordTagList.add(new WordTag(wordTag.substring(0, index), + wordTag.substring(index + 1))); + } + } + return wordTagList; + } + + public static HashMap getDictionary(String dictPath) + { + HashMap dict = new HashMap(); + BufferedReader buffer; + try { + buffer = new BufferedReader(new InputStreamReader( + new FileInputStream(dictPath), "UTF-8")); + for (String line; (line = buffer.readLine()) != null;) { + String[] wordTag = line.split(" "); + dict.put(wordTag[0], wordTag[1]); + } + buffer.close(); + } + catch (FileNotFoundException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + return dict; + } + + public static boolean isAbbre(String word) + { + for (int i = 0; i < word.length(); i++) { + if (Character.isLowerCase(word.charAt(i)) || word.charAt(i) == '_') + return false; + } + return true; + } + + public static FWObject getCondition(String strCondition) + { + FWObject condition = new FWObject(false); + + for (String rule : strCondition.split(" and ")) { + rule = rule.trim(); + String key = rule.substring(rule.indexOf(".") + 1, + rule.indexOf(" ")); + String value = getConcreteValue(rule); + + if (key.equals("prevWord2")) { + condition.context[4] = value; + } + else if (key.equals("prevTag2")) { + condition.context[5] = value; + } + else if (key.equals("prevWord1")) { + condition.context[2] = value; + } + else if (key.equals("prevTag1")) { + condition.context[3] = value; + } + else if (key.equals("word")) { + condition.context[1] = value; + } + else if (key.equals("tag")) { + condition.context[0] = value; + } + else if (key.equals("nextWord1")) { + condition.context[6] = value; + } + else if (key.equals("nextTag1")) { + condition.context[7] = value; + } + else if (key.equals("nextWord2")) { + condition.context[8] = value; + } + else if (key.equals("nextTag2")) { + condition.context[9] = value; + } + else if (key.equals("suffixL2")) { + condition.context[10] = value; + } + else if (key.equals("suffixL3")) { + condition.context[11] = value; + } + else if (key.equals("suffixL4")) { + condition.context[12] = value; + } + } + + return condition; + } + + public static FWObject getObject(String[] words, String[] tags, int size, int index) + { + FWObject object = new FWObject(true); + + if (index > 1) { + object.context[4] = words[index-2]; + object.context[5] = tags[index-2]; + } + + if (index > 0) { + object.context[2] = words[index-1]; + object.context[3] = tags[index-1]; + } + + String currentWord = words[index]; + String currentTag = tags[index]; + + object.context[1] = currentWord; + object.context[0] = currentTag; + + int numChars = currentWord.length(); + if (numChars >= 4) { + object.context[10] = currentWord.substring(numChars - 2); + object.context[11] = currentWord.substring(numChars - 3); + } + if (numChars >= 5) { + object.context[12] = currentWord.substring(numChars - 4); + } + + if (index < size - 1) { + object.context[6] = words[index+1]; + object.context[7] = tags[index+1]; + } + + if (index < size - 2) { + object.context[8] = words[index+2]; + object.context[9] = tags[index+2]; + } + + return object; + } + + public static String getConcreteValue(String str) + { + if (str.contains("\"\"")) { + if (str.contains("Word")) + return ""; + else if (str.contains("suffixL")) + return ""; + else + return ""; + } + String conclusion = str.substring(str.indexOf("\"") + 1, + str.length() - 1); + return conclusion; + } + + public static void main(String args[]) + { + } +} diff --git a/third_party/src/main/java/com/github/datquocnguyen/WordTag.java b/third_party/src/main/java/com/github/datquocnguyen/WordTag.java new file mode 100644 index 00000000..06ba123f --- /dev/null +++ b/third_party/src/main/java/com/github/datquocnguyen/WordTag.java @@ -0,0 +1,21 @@ +package com.github.datquocnguyen; + +/** + * @author DatQuocNguyen + * + */ +public class WordTag +{ + public String word; + public String tag; + + public WordTag(String iword, String itag) + { + word = iword; + tag = itag; + } + + public String getTag() { + return tag; + } +} diff --git a/third_party/src/main/java/com/upserve/uppend/blobs/NativeIO.java b/third_party/src/main/java/com/upserve/uppend/blobs/NativeIO.java new file mode 100644 index 00000000..80e05c64 --- /dev/null +++ b/third_party/src/main/java/com/upserve/uppend/blobs/NativeIO.java @@ -0,0 +1,75 @@ +package com.upserve.uppend.blobs; + + +import jnr.ffi.*; +import jnr.ffi.types.size_t; +import org.slf4j.Logger; +import com.kenai.jffi.MemoryIO; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.nio.*; + +// https://github.com/upserve/uppend/blob/70967c6f24d7f1a3bbc18799f485d981da93f53b/src/main/java/com/upserve/uppend/blobs/NativeIO.java +// MIT License + +public class NativeIO { + private static final Logger log = org.slf4j.LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private static final NativeC nativeC = LibraryLoader.create(NativeC.class).load("c"); + public static final int pageSize = nativeC.getpagesize(); // 4096 on most Linux + + public enum Advice { + // These seem to be fairly stable https://github.com/torvalds/linux + // TODO add to https://github.com/jnr/jnr-constants + Normal(0), Random(1), Sequential(2), WillNeed(3), DontNeed(4); + private final int value; + Advice(int val) { + this.value = val; + } + } + + public interface NativeC { + int madvise(@size_t long address, @size_t long size, int advice); + int getpagesize(); + } + + static long alignedAddress(long address) { + return address & (- pageSize); + } + + static long alignedSize(long address, int capacity) { + long end = address + capacity; + end = (end + pageSize - 1) & (-pageSize); + return end - alignedAddress(address); + } + + public static void madvise(MappedByteBuffer buffer, Advice advice) throws IOException { + + final long address = MemoryIO.getInstance().getDirectBufferAddress(buffer); + final int capacity = buffer.capacity(); + + long alignedAddress = alignedAddress(address); + long alignedSize = alignedSize(alignedAddress, capacity); + + int val = nativeC.madvise(alignedAddress, alignedSize, advice.value); + + if (val != 0) { + throw new IOException(String.format("System call madvise failed with code: %d", val)); + } + } + + public static void madviseRange(MappedByteBuffer buffer, Advice advice, long offset, int length) throws IOException { + + final long address = MemoryIO.getInstance().getDirectBufferAddress(buffer); + + long alignedAddress = alignedAddress(address+offset); + long alignedSize = alignedSize(alignedAddress, length); + + int val = nativeC.madvise(alignedAddress, alignedSize, advice.value); + + if (val != 0) { + throw new IOException(String.format("System call madvise failed with code: %d", val)); + } + } +} \ No newline at end of file diff --git a/third_party/src/main/java/org/openzim/ZIMTypes/ArticleEntry.java b/third_party/src/main/java/org/openzim/ZIMTypes/ArticleEntry.java new file mode 100644 index 00000000..c39478b0 --- /dev/null +++ b/third_party/src/main/java/org/openzim/ZIMTypes/ArticleEntry.java @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2011 Arunesh Mathur + * + * This file is a part of zimreader-java. + * + * zimreader-java is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3.0 as + * published by the Free Software Foundation. + * + * zimreader-java is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with zimreader-java. If not, see . + */ + + +package org.openzim.ZIMTypes; + +public class ArticleEntry extends DirectoryEntry { + + int clusterNumber; + + int blobnumber; + + public ArticleEntry(int mimeType, char namespace, int revision, + int clusterNumber, int blobNumber, String url, String title, + int urlListindex) { + + super(mimeType, namespace, revision, url, title, urlListindex); + + this.clusterNumber = clusterNumber; + this.blobnumber = blobNumber; + } + + public int getClusterNumber() { + return clusterNumber; + } + + public int getBlobnumber() { + return blobnumber; + } + +} diff --git a/third_party/src/main/java/org/openzim/ZIMTypes/DirectoryEntry.java b/third_party/src/main/java/org/openzim/ZIMTypes/DirectoryEntry.java new file mode 100644 index 00000000..d2a5eab7 --- /dev/null +++ b/third_party/src/main/java/org/openzim/ZIMTypes/DirectoryEntry.java @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2011 Arunesh Mathur + * + * This file is a part of zimreader-java. + * + * zimreader-java is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3.0 as + * published by the Free Software Foundation. + * + * zimreader-java is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with zimreader-java. If not, see . + */ + +package org.openzim.ZIMTypes; + +public abstract class DirectoryEntry { + + int mimeType; + + char namespace; + + int revision; + + String url; + + String title; + + int urlListindex; + + public DirectoryEntry(int mimeType, char namespace, int revision, + String url, String title, int index) { + this.mimeType = mimeType; + this.namespace = namespace; + this.revision = revision; + this.url = url; + this.title = title; + this.urlListindex = index; + } + + public int getMimeType() { + return mimeType; + } + + public char getNamespace() { + return namespace; + } + + public int getRevision() { + return revision; + } + + public String getUrl() { + return url; + } + + public String getTitle() { + return title; + } + + public int getUrlListindex() { + return urlListindex; + } + +} diff --git a/third_party/src/main/java/org/openzim/ZIMTypes/RedirectEntry.java b/third_party/src/main/java/org/openzim/ZIMTypes/RedirectEntry.java new file mode 100644 index 00000000..74967041 --- /dev/null +++ b/third_party/src/main/java/org/openzim/ZIMTypes/RedirectEntry.java @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2011 Arunesh Mathur + * + * This file is a part of zimreader-java. + * + * zimreader-java is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3.0 as + * published by the Free Software Foundation. + * + * zimreader-java is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with zimreader-java. If not, see . + */ + +package org.openzim.ZIMTypes; + +public class RedirectEntry extends DirectoryEntry { + + int redirectIndex; + + public RedirectEntry(int mimeType, char namespace, int revision, + int redirectIndex, String url, String title, int urlListindex) { + + super(mimeType, namespace, revision, url, title, urlListindex); + + this.redirectIndex = redirectIndex; + } + + public int getRedirectIndex() { + return redirectIndex; + } + +} diff --git a/third_party/src/main/java/org/openzim/ZIMTypes/ZIMFile.java b/third_party/src/main/java/org/openzim/ZIMTypes/ZIMFile.java new file mode 100644 index 00000000..ae8632b1 --- /dev/null +++ b/third_party/src/main/java/org/openzim/ZIMTypes/ZIMFile.java @@ -0,0 +1,201 @@ +/* + * Copyright (C) 2011 Arunesh Mathur + * + * This file is a part of zimreader-java. + * + * zimreader-java is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3.0 as + * published by the Free Software Foundation. + * + * zimreader-java is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with zimreader-java. If not, see . + */ + +package org.openzim.ZIMTypes; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.RandomAccessFile; +import java.util.*; + +import org.openzim.util.RandomAcessFileZIMInputStream; + +/** + * @author Arunesh Mathur + * + * A ZIM file implementation that stores the Header and the MIMETypeList + * + */ +public class ZIMFile extends File { + + /** + * + */ + private static final long serialVersionUID = 1L; + + private Header mHeader; + + private Map mMIMETypeList = new HashMap<>(); // Can be removed if not needed + + public ZIMFile(String path) { + super(path); + + try { + readHeader(); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + } + + private void readHeader() throws FileNotFoundException { + + // Helpers + int len = 0; + StringBuffer mimeBuffer = null; + + // The byte[] that will help us in reading bytes out of the file + byte[] buffer = new byte[16]; + + // Check whether the file exists + if (!(this.exists())) { + throw new FileNotFoundException( + "The file that you specified was not found."); + } + + // The reader that will be used to read contents from the file + + RandomAcessFileZIMInputStream reader = new RandomAcessFileZIMInputStream( + new RandomAccessFile(this, "r")); + + // The ZIM file header + mHeader = new Header(); + + // Read the contents of the header + try { + mHeader.magicNumber = reader.readFourLittleEndianBytesValue(buffer); + // System.out.println(mHeader.magicNumber); + + mHeader.version = reader.readFourLittleEndianBytesValue(buffer); + // System.out.println(mHeader.version); + + mHeader.uuid = reader.readSixteenLittleEndianBytesValue(buffer); + // System.out.println(mHeader.uuid); reader.read(buffer, 0, 4); + + mHeader.articleCount = reader + .readFourLittleEndianBytesValue(buffer); + // System.out.println(mHeader.articleCount); + + mHeader.clusterCount = reader + .readFourLittleEndianBytesValue(buffer); + // System.out.println(mHeader.clusterCount); + + mHeader.urlPtrPos = reader.readEightLittleEndianBytesValue(buffer); + // System.out.println(mHeader.urlPtrPos); + + mHeader.titlePtrPos = reader + .readEightLittleEndianBytesValue(buffer); + // System.out.println(mHeader.titlePtrPos); + + mHeader.clusterPtrPos = reader + .readEightLittleEndianBytesValue(buffer); + // System.out.println(mHeader.clusterPtrPos); + + mHeader.mimeListPos = reader + .readEightLittleEndianBytesValue(buffer); + // System.out.println(mHeader.mimeListPos); + + mHeader.mainPage = reader.readFourLittleEndianBytesValue(buffer); + // System.out.println(mHeader.mainPage); + + mHeader.layoutPage = reader.readFourLittleEndianBytesValue(buffer); + // System.out.println(mHeader.layoutPage); + + reader.seek(mHeader.mimeListPos); + // Initialise the MIMETypeList + while (true) { + reader.read(buffer, 0, 1); + len = 0; + mimeBuffer = new StringBuffer(); + while (buffer[0] != '\0') { + mimeBuffer.append((char) buffer[0]); + reader.read(buffer, 0, 1); + len++; + } + if (len == 0) { + break; + } + mMIMETypeList.put(mMIMETypeList.size(), mimeBuffer.toString()); + } + + } catch (Exception e) { + e.printStackTrace(); + } + } + + public int getVersion() { + return mHeader.version; + } + + public int getUuid() { + return mHeader.uuid; + } + + public int getArticleCount() { + return mHeader.articleCount; + } + + public int getClusterCount() { + return mHeader.clusterCount; + } + + public long getUrlPtrPos() { + return mHeader.urlPtrPos; + } + + public long getTitlePtrPos() { + return mHeader.titlePtrPos; + } + + public long getClusterPtrPos() { + return mHeader.clusterPtrPos; + } + + public String getMIMEType(int mimeNumber) { + return mMIMETypeList.get(mimeNumber); + } + public Map getMIMETypes() { + return Collections.unmodifiableMap(mMIMETypeList); + } + + public long getHeaderSize() { + return mHeader.mimeListPos; + } + + public int getMainPage() { + return mHeader.mainPage; + } + + public int getLayoutPage() { + return mHeader.layoutPage; + } + + public class Header { + int magicNumber; + int version; + int uuid; + int articleCount; + int clusterCount; + long urlPtrPos; + long titlePtrPos; + long clusterPtrPos; + long mimeListPos; + int mainPage; + int layoutPage; + } + +} diff --git a/third_party/src/main/java/org/openzim/ZIMTypes/ZIMReader.java b/third_party/src/main/java/org/openzim/ZIMTypes/ZIMReader.java new file mode 100644 index 00000000..e4cd83ee --- /dev/null +++ b/third_party/src/main/java/org/openzim/ZIMTypes/ZIMReader.java @@ -0,0 +1,677 @@ +/* + * Copyright (C) 2011 Arunesh Mathur + * + * This file is a part of zimreader-java. + * + * zimreader-java is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3.0 as + * published by the Free Software Foundation. + * + * zimreader-java is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with zimreader-java. If not, see . + */ + +package org.openzim.ZIMTypes; + +import java.io.*; +import java.util.*; +import java.util.function.BiConsumer; +import java.util.function.Consumer; +import java.util.function.Predicate; + +import com.github.luben.zstd.RecyclingBufferPool; +import com.github.luben.zstd.ZstdInputStream; +import lombok.AllArgsConstructor; +import lombok.Getter; +import org.jetbrains.annotations.NotNull; +import org.tukaani.xz.SingleXZInputStream; +import org.openzim.util.RandomAcessFileZIMInputStream; +import org.openzim.util.Utilities; + +/** + * @author Arunesh Mathur + * + * A ZIMReader that reads data from the ZIMFile + * + */ +public class ZIMReader { + + private ZIMFile mFile; + private RandomAcessFileZIMInputStream mReader; + private int targetMime; + + public ZIMReader(ZIMFile file) { + this.mFile = file; + targetMime = file.getMIMETypes().entrySet().stream().filter(e -> "text/html".equals(e.getValue())).map(Map.Entry::getKey).findFirst().orElseThrow(); + try { + mReader = new RandomAcessFileZIMInputStream(new RandomAccessFile( + mFile, "r")); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + } + + public List getURLListByURL() throws IOException { + + long i = 0, pos, mimeType; + + byte[] buffer = new byte[8]; + + // The list that will eventually return the list of URL's + ArrayList returnList = new ArrayList(); + + // Move to the spot where URL's are listed + mReader.seek(mFile.getUrlPtrPos()); + + for (i = 0; i < mFile.getArticleCount(); i++) { + + // The position of URL i + pos = mReader.readEightLittleEndianBytesValue(buffer); + + // Mark the current position that we need to return to + mReader.mark(); + + // Move to the position of URL i + mReader.seek(pos); + + // Article or Redirect entry? + mimeType = mReader.readTwoLittleEndianBytesValue(buffer); + + if (mimeType == 65535) { + mReader.seek(pos + 12); + returnList.add(mReader.readString()); + } else { + mReader.seek(pos + 16); + returnList.add(mReader.readString()); + } + + mReader.reset(); + } + + return returnList; + } + + public List getURLListByTitle() throws IOException { + + long i = 0, pos, mimeType, articleNumber, urlPtrPos; + + byte[] buffer = new byte[8]; + + // The list that will eventually return the list of URL's + ArrayList returnList = new ArrayList(); + + // Get the UrlPtrPos or one time storage + urlPtrPos = mFile.getUrlPtrPos(); + + // Move to the spot where URL's are listed + mReader.seek(mFile.getTitlePtrPos()); + + for (i = 0; i < mFile.getArticleCount(); i++) { + + // The articleNumber of the position of URL i + articleNumber = mReader.readFourLittleEndianBytesValue(buffer); + + // Mark the current position that we need to return to + mReader.mark(); + + mReader.seek(urlPtrPos + (8 * (articleNumber))); + + // The position of URL i + pos = mReader.readEightLittleEndianBytesValue(buffer); + mReader.seek(pos); + + // Article or Redirect entry? + mimeType = mReader.readTwoLittleEndianBytesValue(buffer); + + if (mimeType == 65535) { + mReader.seek(pos + 12); + String url = mReader.readString(); + returnList.add(url); + } else { + mReader.seek(pos + 16); + String url = mReader.readString(); + returnList.add(url); + } + + // Return to the marked position + mReader.reset(); + } + + return returnList; + } + + // Gives the minimum required information needed for the given articleName + public DirectoryEntry getDirectoryInfo(String articleName, char namespace) + throws IOException { + + DirectoryEntry entry; + String cmpStr; + long numberOfArticles = mFile.getArticleCount(); + long beg = mFile.getTitlePtrPos(), end = beg + (numberOfArticles * 4), mid; + + articleName = namespace + "/" + articleName; + + System.out.print((end - beg)/4 + " entries"); + + while (beg <= end) { + mid = beg + 4 * (((end - beg) / 4) / 2); + entry = getDirectoryInfoAtTitlePosition(mid); + if (entry == null) { + return null; + } + cmpStr = entry.getNamespace() + "/" + entry.getUrl(); + if (articleName.compareTo(cmpStr) < 0) { + end = mid - 4; + + } else if (articleName.compareTo(cmpStr) > 0) { + beg = mid + 4; + + } else { + return entry; + } + } + + return null; + + } + + public DirectoryEntry enumerateArticles(PrintWriter writer) + throws IOException { + + int numberOfArticles = mFile.getArticleCount(); + long beg = mFile.getTitlePtrPos(); + long end = beg + (numberOfArticles * 4L); + + System.out.println(numberOfArticles); + long start = System.currentTimeMillis(); + + for (long i = beg; i < end; i+=4) { + var entry = getDirectoryInfoAtTitlePosition(i); + + if (entry.mimeType == targetMime && entry instanceof ArticleEntry) { + ArticleEntry ae = (ArticleEntry) entry; + writer.printf("%d\t%d\t%s\n", ae.clusterNumber, ae.getBlobnumber(), ae.url); + } + } + + return null; + + } + + + @Getter @AllArgsConstructor + static class DataKey implements Comparable { + public final long cluster; + public final long blob; + + @Override + public int compareTo(@NotNull DataKey o) { + if (o.cluster != cluster) { + return (int)(cluster - o.cluster); + } + return (int)(blob - o.blob); + } + } + + // Gives the minimum required information needed for the given articleName + public DirectoryEntry forEachArticles(BiConsumer consumer, Predicate blobPred) + throws IOException { + + int numberOfArticles = mFile.getArticleCount(); + long beg = mFile.getTitlePtrPos(); + long end = beg + (numberOfArticles * 4L); + + System.out.println(numberOfArticles); + long start = System.currentTimeMillis(); + + Map> data = new TreeMap<>(); + + System.out.println("Indexing"); + + for (long i = beg; i < end; i+=4) { + var entry = getDirectoryInfoAtTitlePosition(i); + + if (((i-beg)%100_000) == 0) { + System.out.printf("%f%%\n", ((i-beg) * 100.) / (end-beg)); + } + + if (entry.mimeType == targetMime && entry instanceof ArticleEntry) { + ArticleEntry ae = (ArticleEntry) entry; + data.computeIfAbsent(ae.clusterNumber, (cn) -> new HashMap<>()).put(ae.blobnumber, ae.url); + } + } + + System.out.println("Iterating over " + data.keySet().stream().mapToInt(Integer::intValue).max() + "clusters"); + + data.forEach((pos,blobs) -> { + if (!blobPred.test(pos)) { + return; + } + + try { + getArticleData(consumer, pos, blobs); + } + catch (IOException ex) { + + } + }); + + return null; + + } + + + + // Gives the minimum required information needed for the given articleName + public DirectoryEntry forEachTitles(Consumer aeConsumer, Consumer reConsumer) + throws IOException { + + int numberOfArticles = mFile.getArticleCount(); + long beg = mFile.getTitlePtrPos(); + long end = beg + (numberOfArticles * 4L); + + System.err.println(numberOfArticles); + long start = System.currentTimeMillis(); + + Map> data = new TreeMap<>(); + + System.err.println("Indexing"); + + for (long i = beg; i < end; i+=4) { + var entry = getDirectoryInfoAtTitlePosition(i); + + if (((i-beg)%100_000) == 0) { + System.err.printf("%f%%\n", ((i-beg) * 100.) / (end-beg)); + } + + if (entry.mimeType == targetMime && entry instanceof ArticleEntry) { + aeConsumer.accept((ArticleEntry) entry); + } + else if (entry.mimeType == 65535 && entry instanceof RedirectEntry) { + + reConsumer.accept((RedirectEntry) entry); + + } + + } + + return null; + + } + + public String getArticleData(BiConsumer consumer, int clusterNumber, Map blobToUrl) throws IOException { + + byte[] buffer = new byte[8]; + + // Cast to ArticleEntry + + // Get the cluster and blob numbers from the article + + // Move to the cluster entry in the clusterPtrPos + mReader.seek(mFile.getClusterPtrPos() + clusterNumber * 8L); + + // Read the location of the cluster + long clusterPos = mReader + .readEightLittleEndianBytesValue(buffer); + + // Move to the cluster + mReader.seek(clusterPos); + + // Read the first byte, for compression information + int compressionType = mReader.read(); + + InputStream is; + InputStream cis; + switch (compressionType) { + case 0: + case 1: + is = mReader; + cis = null; + break; + case 4: + cis = is = new SingleXZInputStream(mReader, 4194304); + break; + case 5: + cis = is = new ZstdInputStream(new BufferedInputStream(mReader, 65535), RecyclingBufferPool.INSTANCE); + break; + default: + throw new IllegalArgumentException(); + } + + try { + buffer = new byte[4]; + is.read(buffer); + var firstOffset = Utilities.toFourLittleEndianInteger(buffer); + int numberOfBlobs = firstOffset / 4; + + long offsets[] = new long[numberOfBlobs]; + offsets[0] = firstOffset; + + buffer = new byte[4*(numberOfBlobs-1)]; + int rb = 0, trb = 0; + while (trb < buffer.length) { + rb = is.read(buffer, trb, buffer.length - trb); + trb += rb; + } + + for (int blobNumber = 0; blobNumber < numberOfBlobs-1; blobNumber++) { + offsets[blobNumber+1] = ((buffer[4*blobNumber] & 0xFF) | ((buffer[4*blobNumber+1] & 0xFF) << 8) + | ((buffer[4*blobNumber+2] & 0xFF) << 16) | ((buffer[4*blobNumber+3] & 0xFF) << 24)); + } + + int minRelBlob = blobToUrl.keySet().stream().mapToInt(Integer::intValue).min().orElse(0); + int maxRelBlob = blobToUrl.keySet().stream().mapToInt(Integer::intValue).max().orElse(0); + + if (minRelBlob > 0) { + Utilities.skipFully(is, offsets[minRelBlob] - offsets[0]); + } + + for (int blobNumber = minRelBlob; blobNumber < maxRelBlob; blobNumber++) { + int differenceOffset = (int)(offsets[blobNumber+1] - offsets[blobNumber]); + + if (!blobToUrl.containsKey(blobNumber)) { + Utilities.skipFully(is, differenceOffset); + } + else { + byte[] data = new byte[differenceOffset]; + trb = rb = 0; + while (trb < data.length) { + rb = is.read(data, trb, data.length - trb); + trb += rb; + } + consumer.accept(blobToUrl.get(blobNumber), new String(data)); + } + } + System.out.println(clusterNumber + " " + blobToUrl.size()); + + } + finally { + if (null != cis) { + cis.close(); + } + } + + + return null; + + } + + public String getArticleData(DirectoryEntry mainEntry) throws IOException { + + byte[] buffer = new byte[8]; + + if (mainEntry != null) { + + // Check what kind of an entry was mainEnrty + if (mainEntry.getClass() == ArticleEntry.class) { + + // Cast to ArticleEntry + ArticleEntry article = (ArticleEntry) mainEntry; + + // Get the cluster and blob numbers from the article + long clusterNumber = article.getClusterNumber(); + int blobNumber = article.getBlobnumber(); + + // Move to the cluster entry in the clusterPtrPos + mReader.seek(mFile.getClusterPtrPos() + clusterNumber * 8); + + // Read the location of the cluster + long clusterPos = mReader + .readEightLittleEndianBytesValue(buffer); + + // Move to the cluster + mReader.seek(clusterPos); + + // Read the first byte, for compression information + int compressionType = mReader.read(); + + // Reference declaration + SingleXZInputStream xzReader = null; + int firstOffset, numberOfBlobs, offset1, + offset2, + location, + differenceOffset; + + ByteArrayOutputStream baos; + + // Check the compression type that was read + switch (compressionType) { + + // TODO: Read uncompressed data directly + case 0: + case 1: + + // Read the first 4 bytes to find out the number of artciles + buffer = new byte[4]; + + // Create a dictionary with size 40MiB, the zimlib uses this + // size while creating + + // Read the first offset + mReader.read(buffer); + + // The first four bytes are the offset of the zeroth blob + firstOffset = Utilities + .toFourLittleEndianInteger(buffer); + + // The number of blobs + numberOfBlobs = firstOffset / 4; + + // The blobNumber has to be lesser than the numberOfBlobs + assert blobNumber < numberOfBlobs; + + + if (blobNumber == 0) { + // The first offset is what we read earlier + offset1 = firstOffset; + } else { + + location = (blobNumber - 1) * 4; + Utilities.skipFully(mReader, location); + mReader.read(buffer); + offset1 = Utilities.toFourLittleEndianInteger(buffer); + } + + mReader.read(buffer); + offset2 = Utilities.toFourLittleEndianInteger(buffer); + + differenceOffset = offset2 - offset1; + buffer = new byte[differenceOffset]; + + Utilities.skipFully(mReader, + (offset1 - 4 * (blobNumber + 2))); + + mReader.read(buffer, 0, differenceOffset); + + return new String(buffer); + + // LZMA2 compressed data + case 4: + + // Read the first 4 bytes to find out the number of artciles + buffer = new byte[4]; + + // Create a dictionary with size 40MiB, the zimlib uses this + // size while creating + xzReader = new SingleXZInputStream(mReader, 4194304); + + // Read the first offset + xzReader.read(buffer); + + // The first four bytes are the offset of the zeroth blob + firstOffset = Utilities + .toFourLittleEndianInteger(buffer); + + // The number of blobs + numberOfBlobs = firstOffset / 4; + + // The blobNumber has to be lesser than the numberOfBlobs + assert blobNumber < numberOfBlobs; + + if(blobNumber == 0) { + // The first offset is what we read earlier + offset1 = firstOffset; + } else { + + location = (blobNumber - 1) * 4; + Utilities.skipFully(xzReader, location); + xzReader.read(buffer); + offset1 = Utilities.toFourLittleEndianInteger(buffer); + } + + xzReader.read(buffer); + offset2 = Utilities.toFourLittleEndianInteger(buffer); + + differenceOffset = offset2 - offset1; + buffer = new byte[differenceOffset]; + + Utilities.skipFully(xzReader, + (offset1 - 4 * (blobNumber + 2))); + + xzReader.read(buffer, 0, differenceOffset); + return new String(buffer); + + case 5: + // Read the first 4 bytes to find out the number of artciles + buffer = new byte[4]; + + // Create a dictionary with size 40MiB, the zimlib uses this + // size while creating + var zstdInputStream = new com.github.luben.zstd.ZstdInputStream(new BufferedInputStream(mReader)); + + // Read the first offset + zstdInputStream.read(buffer); + + // The first four bytes are the offset of the zeroth blob + firstOffset = Utilities + .toFourLittleEndianInteger(buffer); + + // The number of blobs + numberOfBlobs = firstOffset / 4; + + // The blobNumber has to be lesser than the numberOfBlobs + assert blobNumber < numberOfBlobs; + + if(blobNumber == 0) { + // The first offset is what we read earlier + offset1 = firstOffset; + } else { + + location = (blobNumber - 1) * 4; + Utilities.skipFully(zstdInputStream, location); + zstdInputStream.read(buffer); + offset1 = Utilities.toFourLittleEndianInteger(buffer); + } + + zstdInputStream.read(buffer); + offset2 = Utilities.toFourLittleEndianInteger(buffer); + + differenceOffset = offset2 - offset1; + buffer = new byte[differenceOffset]; + + Utilities.skipFully(zstdInputStream, + (offset1 - 4 * (blobNumber + 2))); + + zstdInputStream.read(buffer, 0, differenceOffset); + + return new String(buffer); + + default: + System.err.print("What is compression = " + compressionType); + + } + + } + } + + return null; + + } + + public DirectoryEntry getDirectoryInfoAtTitlePosition(long position) + throws IOException { + + // Helpers + long pos; + byte[] buffer = new byte[8]; + + // At the appropriate position in the titlePtrPos + mReader.seek(position); + + // Get value of article at index + pos = mReader.readFourLittleEndianBytesValue(buffer); + + // Move to the position in urlPtrPos + mReader.seek(mFile.getUrlPtrPos() + 8L * pos); + + // Get value of article in urlPtrPos + pos = mReader.readEightLittleEndianBytesValue(buffer); + + // Go to the location of the directory entry + mReader.seek(pos); + + int type = mReader.readTwoLittleEndianBytesValue(buffer); + + // Ignore the parameter length + mReader.read(); + + char namespace = (char) mReader.read(); + // System.out.println("Namepsace: " + namespace); + + int revision = mReader.readFourLittleEndianBytesValue(buffer); + // System.out.println("Revision: " + revision); + + // TODO: Remove redundant if condition code + // Article or Redirect entry + if (type == 65535) { + + // System.out.println("MIMEType: " + type); + + int redirectIndex = mReader.readFourLittleEndianBytesValue(buffer); + // System.out.println("RedirectIndex: " + redirectIndex); + + String url = mReader.readString(); + // System.out.println("URL: " + url); + + String title = mReader.readString(); + title = title.equals("") ? url : title; + // System.out.println("Title: " + title); + + return new RedirectEntry(type, namespace, revision, redirectIndex, + url, title, (int)(position - mFile.getUrlPtrPos()) / 8); + + } else { + + // System.out.println("MIMEType: " + mFile.getMIMEType(type)); + + int clusterNumber = mReader.readFourLittleEndianBytesValue(buffer); + // System.out.println("Cluster Number: " + clusterNumber); + + int blobNumber = mReader.readFourLittleEndianBytesValue(buffer); + // System.out.println("Blob Number: " + blobNumber); + + String url = mReader.readString(); + // System.out.println("URL: " + url); + + String title = mReader.readString(); + title = title.equals("") ? url : title; + // System.out.println("Title: " + title); + + // Parameter data ignored + + return new ArticleEntry(type, namespace, revision, clusterNumber, + blobNumber, url, title, + (int)(position - mFile.getUrlPtrPos()) / 8); + } + + } + + public ZIMFile getZIMFile() { + return mFile; + } +} diff --git a/third_party/src/main/java/org/openzim/util/RandomAcessFileZIMInputStream.java b/third_party/src/main/java/org/openzim/util/RandomAcessFileZIMInputStream.java new file mode 100644 index 00000000..ce62f156 --- /dev/null +++ b/third_party/src/main/java/org/openzim/util/RandomAcessFileZIMInputStream.java @@ -0,0 +1,148 @@ +/* + * Copyright (C) 2011 Arunesh Mathur + * + * This file is a part of zimreader-java. + * + * zimreader-java is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3.0 as + * published by the Free Software Foundation. + * + * zimreader-java is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with zimreader-java. If not, see . + */ + +package org.openzim.util; + +import java.io.IOException; +import java.io.InputStream; +import java.io.RandomAccessFile; + +/** + * This is an implementation of RandomAccessFile to ensure that it is an + * InputStream as well, specifically designed for reading a ZIM file. Ad-Hoc + * implementation, can be improved. + * + * @author Arunesh Mathur + */ + +public class RandomAcessFileZIMInputStream extends InputStream { + + private RandomAccessFile mRAFReader; + + private long mMarked = -1; + + public RandomAcessFileZIMInputStream(RandomAccessFile reader) { + this.mRAFReader = reader; + } + + // TODO: Remove the parameter buffer + public int readTwoLittleEndianBytesValue(byte[] buffer) throws IOException { + if (buffer.length < 2) { + throw new OutOfMemoryError("buffer too small"); + } else { + mRAFReader.read(buffer, 0, 2); + return Utilities.toTwoLittleEndianInteger(buffer); + } + } + + // TODO: Remove the parameter buffer + public int readFourLittleEndianBytesValue(byte[] buffer) throws IOException { + if (buffer.length < 4) { + throw new OutOfMemoryError("buffer too small"); + } else { + mRAFReader.read(buffer, 0, 4); + return Utilities.toFourLittleEndianInteger(buffer); + } + } + + // TODO: Remove the parameter buffer + public long readEightLittleEndianBytesValue(byte[] buffer) + throws IOException { + if (buffer.length < 8) { + throw new OutOfMemoryError("buffer too small"); + } else { + mRAFReader.read(buffer, 0, 8); + return Utilities.toEightLittleEndianInteger(buffer); + } + } + + // TODO: Remove the parameter buffer + public int readSixteenLittleEndianBytesValue(byte[] buffer) + throws IOException { + if (buffer.length < 16) { + throw new OutOfMemoryError("buffer too small"); + } else { + mRAFReader.read(buffer, 0, 16); + return Utilities.toSixteenLittleEndianInteger(buffer); + } + } + + // Reads characters from the current position into a String and stops when a + // '\0' is encountered + public String readString() throws IOException { + StringBuilder sb = new StringBuilder(); + /* + * int i; byte[] buffer = new byte[100]; while (true) { + * mRAFReader.read(buffer); for (i = 0; i < buffer.length; i++) { if + * (buffer[i] == '\0') { break; } sb.append((char) buffer[i]); } if (i + * != buffer.length) break; } return sb.toString(); + */ + byte[] buffer = new byte[64]; + for (;;) { + mRAFReader.readFully(buffer); + for (int i = 0; i < buffer.length; i++) { + if (buffer[i] == 0) { + sb.append(new String(buffer, 0, i)); + mRAFReader.seek(mRAFReader.getFilePointer() + 1 + (i - buffer.length)); + return sb.toString(); + } + } + sb.append(new String(buffer)); + } + + } + + @Override + public int read() throws IOException { + return mRAFReader.read(); + } + + @Override + public int read(byte b[], int off, int len) throws IOException { + return mRAFReader.read(b, off, len); + } + + + public RandomAccessFile getRandomAccessFile() { + return mRAFReader; + } + + public void seek(long pos) throws IOException { + if (pos < 0) { + System.out.println(pos); + } + mRAFReader.seek(pos); + } + + public long getFilePointer() throws IOException { + return mRAFReader.getFilePointer(); + } + + public void mark() throws IOException { + this.mMarked = mRAFReader.getFilePointer(); + } + + public void reset() throws IOException { + if (this.mMarked == -1) { + return; + } else { + mRAFReader.seek(mMarked); + this.mMarked = -1; + } + } +} diff --git a/third_party/src/main/java/org/openzim/util/Utilities.java b/third_party/src/main/java/org/openzim/util/Utilities.java new file mode 100644 index 00000000..25a262d3 --- /dev/null +++ b/third_party/src/main/java/org/openzim/util/Utilities.java @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2011 Arunesh Mathur + * + * This file is a part of zimreader-java. + * + * zimreader-java is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 3.0 as + * published by the Free Software Foundation. + * + * zimreader-java is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with zimreader-java. If not, see . + */ + + +package org.openzim.util; + +import java.io.IOException; +import java.io.InputStream; + +public class Utilities { + + // TODO: Write a binary search algorithm + public static int binarySearch() { + return -1; + } + + public static int toTwoLittleEndianInteger(byte[] buffer) throws IOException { + if (buffer.length < 2) { + throw new OutOfMemoryError("buffer too small"); + } else { + int result = ((buffer[0] & 0xFF) | ((buffer[1] & 0xFF) << 8)); + return result; + } + } + + public static int toFourLittleEndianInteger(byte[] buffer) throws IOException { + if (buffer.length < 4) { + throw new OutOfMemoryError("buffer too small"); + } else { + int result = ((buffer[0] & 0xFF) | ((buffer[1] & 0xFF) << 8) + | ((buffer[2] & 0xFF) << 16) | ((buffer[3] & 0xFF) << 24)); + return result; + } + } + + public static long toEightLittleEndianInteger(byte[] buffer) throws IOException { + if (buffer.length < 8) { + throw new OutOfMemoryError("buffer too small"); + } else { + long result = ((buffer[0] & 0xFF) | ((buffer[1] & 0xFF) << 8) + | ((buffer[2] & 0xFF) << 16) | ((long) (buffer[3] & 0xFF) << 24L) + | ((long) (buffer[4] & 0xFF) << 32L) | ((long) (buffer[5] & 0xFF) << 40L) + | ((long) (buffer[6] & 0xFF) << 48L) | ((long) (buffer[7] & 0xFF) << 56L)); + return result; + } + } + + public static int toSixteenLittleEndianInteger(byte[] buffer) throws IOException { + if (buffer.length < 16) { + throw new OutOfMemoryError("buffer too small"); + } else { + int result = ((buffer[0] & 0xFF) | ((buffer[1] & 0xFF) << 8) + | ((buffer[2] & 0xFF) << 16) | ((buffer[3] & 0xFF) << 24) + | ((buffer[4] & 0xFF) << 32) | ((buffer[5] & 0xFF) << 40) + | ((buffer[6] & 0xFF) << 48) | ((buffer[7] & 0xFF) << 56) + | ((buffer[8] & 0xFF) << 64) | ((buffer[9] & 0xFF) << 72) + | ((buffer[10] & 0xFF) << 80) | ((buffer[11] & 0xFF) << 88) + | ((buffer[12] & 0xFF) << 96) + | ((buffer[13] & 0xFF) << 104) + | ((buffer[14] & 0xFF) << 112) | ((buffer[15] & 0xFF) << 120)); + return result; + } + } + + public static void skipFully(InputStream stream, long bytes) throws IOException { + for (long i = stream.skip(bytes); i < bytes; i += stream.skip(bytes - i)); + } + +} diff --git a/third_party/src/main/java/org/tukaani/xz/BlockInputStream.java b/third_party/src/main/java/org/tukaani/xz/BlockInputStream.java new file mode 100644 index 00000000..d50fbc78 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/BlockInputStream.java @@ -0,0 +1,212 @@ +/* + * BlockInputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import java.io.DataInputStream; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.Arrays; +import org.tukaani.xz.common.DecoderUtil; +import org.tukaani.xz.check.Check; + +class BlockInputStream extends InputStream { + private final InputStream in; + private final DataInputStream inData; + private final CountingInputStream inCounted; + private InputStream filterChain; + private final Check check; + + private long uncompressedSizeInHeader = -1; + private long compressedSizeInHeader = -1; + private long compressedSizeLimit; + private int headerSize; + private long uncompressedSize = 0; + + public BlockInputStream(InputStream in, Check check, int memoryLimit) + throws IOException, IndexIndicatorException { + this.in = in; + this.check = check; + inData = new DataInputStream(in); + + byte[] buf = new byte[DecoderUtil.BLOCK_HEADER_SIZE_MAX]; + + // Block Header Size or Index Indicator + inData.readFully(buf, 0, 1); + + // See if this begins the Index field. + if (buf[0] == 0x00) + throw new IndexIndicatorException(); + + // Read the rest of the Block Header. + headerSize = 4 * (buf[0] + 1); + inData.readFully(buf, 1, headerSize - 1); + + // Validate the CRC32. + if (!DecoderUtil.isCRC32Valid(buf, 0, headerSize - 4, headerSize - 4)) + throw new CorruptedInputException("XZ Block Header is corrupt"); + + // Check for reserved bits in Block Flags. + if ((buf[1] & 0x3C) != 0) + throw new UnsupportedOptionsException( + "Unsupported options in XZ Block Header"); + + // Memory for the Filter Flags field + int filterCount = (buf[1] & 0x03) + 1; + long[] filterIDs = new long[filterCount]; + byte[][] filterProps = new byte[filterCount][]; + + // Use a stream to parse the fields after the Block Flags field. + // Exclude the CRC32 field at the end. + ByteArrayInputStream bufStream = new ByteArrayInputStream( + buf, 2, headerSize - 6); + + try { + // Set the maximum valid compressed size. This is overriden + // by the value from the Compressed Size field if it is present. + compressedSizeLimit = (DecoderUtil.VLI_MAX & ~3) + - headerSize - check.getSize(); + + // Decode and validate Compressed Size if the relevant flag + // is set in Block Flags. + if ((buf[1] & 0x40) != 0x00) { + compressedSizeInHeader = DecoderUtil.decodeVLI(bufStream); + + if (compressedSizeInHeader == 0 + || compressedSizeInHeader > compressedSizeLimit) + throw new CorruptedInputException(); + + compressedSizeLimit = compressedSizeInHeader; + } + + // Decode Uncompressed Size if the relevant flag is set + // in Block Flags. + if ((buf[1] & 0x80) != 0x00) + uncompressedSizeInHeader = DecoderUtil.decodeVLI(bufStream); + + // Decode Filter Flags. + for (int i = 0; i < filterCount; ++i) { + filterIDs[i] = DecoderUtil.decodeVLI(bufStream); + + long filterPropsSize = DecoderUtil.decodeVLI(bufStream); + if (filterPropsSize > bufStream.available()) + throw new CorruptedInputException(); + + filterProps[i] = new byte[(int)filterPropsSize]; + bufStream.read(filterProps[i]); + } + + } catch (IOException e) { + throw new CorruptedInputException("XZ Block Header is corrupt"); + } + + // Check that the remaining bytes are zero. + for (int i = bufStream.available(); i > 0; --i) + if (bufStream.read() != 0x00) + throw new UnsupportedOptionsException( + "Unsupported options in XZ Block Header"); + + // Check if the Filter IDs are supported, decode + // the Filter Properties, and check that they are + // supported by this decoder implementation. + FilterDecoder[] filters = new FilterDecoder[filterIDs.length]; + + for (int i = 0; i < filters.length; ++i) { + if (filterIDs[i] == LZMA2Coder.FILTER_ID) + filters[i] = new LZMA2Decoder(filterProps[i]); + + else if (filterIDs[i] == DeltaCoder.FILTER_ID) + filters[i] = new DeltaDecoder(filterProps[i]); + + else + throw new UnsupportedOptionsException( + "Unknown Filter ID " + filterIDs[i]); + } + + RawCoder.validate(filters); + + // Check the memory usage limit. + if (memoryLimit >= 0) { + int memoryNeeded = 0; + for (int i = 0; i < filters.length; ++i) + memoryNeeded += filters[i].getMemoryUsage(); + + if (memoryNeeded > memoryLimit) + throw new MemoryLimitException(memoryNeeded, memoryLimit); + } + + // Use an input size counter to calculate + // the size of the Compressed Data field. + inCounted = new CountingInputStream(in); + + // Initialize the filter chain. + filterChain = inCounted; + for (int i = filters.length - 1; i >= 0; --i) + filterChain = filters[i].getInputStream(filterChain); + } + + public int read() throws IOException { + byte[] buf = new byte[1]; + return read(buf, 0, 1) == -1 ? -1 : (buf[0] & 0xFF); + } + + public int read(byte[] buf, int off, int len) throws IOException { + int ret = filterChain.read(buf, off, len); + long compressedSize = inCounted.getSize(); + + if (ret > 0) { + check.update(buf, off, ret); + uncompressedSize += ret; + + // Catch invalid values. + if (compressedSize < 0 + || compressedSize > compressedSizeLimit + || uncompressedSize < 0 + || (uncompressedSizeInHeader != -1 + && uncompressedSize > uncompressedSizeInHeader)) + throw new CorruptedInputException(); + + } else if (ret == -1) { + // Validate Compressed Size and Uncompressed Size if they were + // present in Block Header. + if ((compressedSizeInHeader != -1 + && compressedSizeInHeader != compressedSize) + || (uncompressedSizeInHeader != -1 + && uncompressedSizeInHeader != uncompressedSize)) + throw new CorruptedInputException(); + + // Block Padding bytes must be zeros. + for (long i = compressedSize; (i & 3) != 0; ++i) + if (inData.readUnsignedByte() != 0x00) + throw new CorruptedInputException(); + + // Validate the integrity check. + byte[] storedCheck = new byte[check.getSize()]; + inData.readFully(storedCheck); + if (!Arrays.equals(check.finish(), storedCheck)) + throw new CorruptedInputException("Integrity (" + + check.getName() + ") check does not match"); + } + + return ret; + } + + public int available() throws IOException { + return filterChain.available(); + } + + public long getUnpaddedSize() { + return headerSize + inCounted.getSize() + check.getSize(); + } + + public long getUncompressedSize() { + return uncompressedSize; + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/BlockOutputStream.java b/third_party/src/main/java/org/tukaani/xz/BlockOutputStream.java new file mode 100644 index 00000000..b031116d --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/BlockOutputStream.java @@ -0,0 +1,128 @@ +/* + * BlockOutputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.OutputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import org.tukaani.xz.common.EncoderUtil; +import org.tukaani.xz.check.Check; + +class BlockOutputStream extends FinishableOutputStream { + private final OutputStream out; + private final CountingOutputStream outCounted; + private FinishableOutputStream filterChain; + private final Check check; + + private final int headerSize; + private final long compressedSizeLimit; + private long uncompressedSize = 0; + + public BlockOutputStream(OutputStream out, FilterEncoder[] filters, + Check check) throws IOException { + this.out = out; + this.check = check; + + // Initialize the filter chain. + outCounted = new CountingOutputStream(out); + filterChain = outCounted; + for (int i = 0; i < filters.length; ++i) + filterChain = filters[i].getOutputStream(filterChain); + + // Prepare to encode the Block Header field. + ByteArrayOutputStream bufStream = new ByteArrayOutputStream(); + + // Write a dummy Block Header Size field. The real value is written + // once everything else except CRC32 has been written. + bufStream.write(0x00); + + // Write Block Flags. Storing Compressed Size or Uncompressed Size + // isn't supported for now. + bufStream.write(filters.length - 1); + + // List of Filter Flags + for (int i = 0; i < filters.length; ++i) { + EncoderUtil.encodeVLI(bufStream, filters[i].getFilterID()); + byte[] filterProps = filters[i].getFilterProps(); + EncoderUtil.encodeVLI(bufStream, filterProps.length); + bufStream.write(filterProps); + } + + // Header Padding + while ((bufStream.size() & 3) != 0) + bufStream.write(0x00); + + byte[] buf = bufStream.toByteArray(); + + // Total size of the Block Header: Take the size of the CRC32 field + // into account. + headerSize = buf.length + 4; + + // This is just a sanity check. + if (headerSize > EncoderUtil.BLOCK_HEADER_SIZE_MAX) + throw new UnsupportedOptionsException(); + + // Block Header Size + buf[0] = (byte)(buf.length / 4); + + // Write the Block Header field to the output stream. + out.write(buf); + EncoderUtil.writeCRC32(out, buf); + + // Calculate the maximum allowed size of the Compressed Data field. + // It is hard to exceed it so this is mostly to be pedantic. + compressedSizeLimit = (EncoderUtil.VLI_MAX & ~3) + - headerSize - check.getSize(); + } + + public void write(int b) throws IOException { + byte[] buf = new byte[1]; + buf[0] = (byte)b; + write(buf, 0, 1); + } + + public void write(byte[] buf, int off, int len) throws IOException { + filterChain.write(buf, off, len); + check.update(buf, off, len); + uncompressedSize += len; + validate(); + } + + public void finish() throws IOException { + // Finish the Compressed Data field. + filterChain.finish(); + validate(); + + // Block Padding + for (long i = outCounted.getSize(); (i & 3) != 0; ++i) + out.write(0x00); + + // Check + out.write(check.finish()); + } + + private void validate() throws IOException { + long compressedSize = outCounted.getSize(); + + // It is very hard to trigger this exception. + // This is just to be pedantic. + if (compressedSize < 0 || compressedSize > compressedSizeLimit + || uncompressedSize < 0) + throw new XZIOException("XZ Stream has grown too big"); + } + + public long getUnpaddedSize() { + return headerSize + outCounted.getSize() + check.getSize(); + } + + public long getUncompressedSize() { + return uncompressedSize; + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/CorruptedInputException.java b/third_party/src/main/java/org/tukaani/xz/CorruptedInputException.java new file mode 100644 index 00000000..d7d95207 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/CorruptedInputException.java @@ -0,0 +1,37 @@ +/* + * CorruptedInputException + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +/** + * Thrown when the compressed input data is corrupt. + * However, it is possible that some or all of the data + * already read from the input stream was corrupt too. + */ +public class CorruptedInputException extends XZIOException { + private static final long serialVersionUID = 3L; + + /** + * Creates a new CorruptedInputException with + * the default error detail message. + */ + public CorruptedInputException() { + super("Compressed data is corrupt"); + } + + /** + * Creates a new CorruptedInputException with + * the specified error detail message. + * + * @param s error detail message + */ + public CorruptedInputException(String s) { + super(s); + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/CountingInputStream.java b/third_party/src/main/java/org/tukaani/xz/CountingInputStream.java new file mode 100644 index 00000000..2d85eaf6 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/CountingInputStream.java @@ -0,0 +1,42 @@ +/* + * CountingInputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.FilterInputStream; +import java.io.InputStream; +import java.io.IOException; + +class CountingInputStream extends FilterInputStream { + private long size = 0; + + public CountingInputStream(InputStream in) { + super(in); + } + + public int read() throws IOException { + int ret = in.read(); + if (ret != -1 && size >= 0) + ++size; + + return ret; + } + + public int read(byte[] b, int off, int len) throws IOException { + int ret = in.read(b, off, len); + if (ret > 0 && size >= 0) + size += ret; + + return ret; + } + + public long getSize() { + return size; + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/CountingOutputStream.java b/third_party/src/main/java/org/tukaani/xz/CountingOutputStream.java new file mode 100644 index 00000000..16ffaa76 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/CountingOutputStream.java @@ -0,0 +1,46 @@ +/* + * CountingOutputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.OutputStream; +import java.io.IOException; + +class CountingOutputStream extends FinishableOutputStream { + private OutputStream out; + private long size = 0; + + public CountingOutputStream(OutputStream out) { + this.out = out; + } + + public void write(int b) throws IOException { + out.write(b); + if (size >= 0) + ++size; + } + + public void write(byte[] b, int off, int len) throws IOException { + out.write(b, off, len); + if (size >= 0) + size += len; + } + + public void flush() throws IOException { + out.flush(); + } + + public void close() throws IOException { + out.close(); + } + + public long getSize() { + return size; + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/DeltaCoder.java b/third_party/src/main/java/org/tukaani/xz/DeltaCoder.java new file mode 100644 index 00000000..808834c8 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/DeltaCoder.java @@ -0,0 +1,26 @@ +/* + * DeltaCoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +abstract class DeltaCoder implements FilterCoder { + public static final long FILTER_ID = 0x03; + + public boolean changesSize() { + return false; + } + + public boolean nonLastOK() { + return true; + } + + public boolean lastOK() { + return false; + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/DeltaDecoder.java b/third_party/src/main/java/org/tukaani/xz/DeltaDecoder.java new file mode 100644 index 00000000..2893bcec --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/DeltaDecoder.java @@ -0,0 +1,32 @@ +/* + * DeltaDecoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; + +class DeltaDecoder extends DeltaCoder implements FilterDecoder { + private int distance; + + DeltaDecoder(byte[] props) throws UnsupportedOptionsException { + if (props.length != 1) + throw new UnsupportedOptionsException( + "Unsupported Delta filter properties"); + + distance = (props[0] & 0xFF) + 1; + } + + public int getMemoryUsage() { + return 1; + } + + public InputStream getInputStream(InputStream in) { + return new DeltaInputStream(in, distance); + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/DeltaInputStream.java b/third_party/src/main/java/org/tukaani/xz/DeltaInputStream.java new file mode 100644 index 00000000..876c7033 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/DeltaInputStream.java @@ -0,0 +1,105 @@ +/* + * DeltaInputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import java.io.IOException; +import org.tukaani.xz.delta.DeltaDecoder; + +/** + * Decodes Delta-filtered data. + *

    + * The delta filter doesn't change the size of the data and thus it + * cannot have an end-of-payload marker. It will simply decode until + * its input stream indicates end of input. + */ +public class DeltaInputStream extends InputStream { + /** + * Smallest supported delta calculation distance. + */ + public static final int DISTANCE_MIN = 1; + + /** + * Largest supported delta calculation distance. + */ + public static final int DISTANCE_MAX = 256; + + private final InputStream in; + private final DeltaDecoder delta; + + /** + * Creates a new Delta decoder with the given delta calculation distance. + * + * @param in input stream from which Delta filtered data + * is read + * + * @param distance delta calculation distance, must be in the + * range [DISTANCE_MIN, + * DISTANCE_MAX] + */ + public DeltaInputStream(InputStream in, int distance) { + this.in = in; + this.delta = new DeltaDecoder(distance); + } + + /** + * Decode the next byte from this input stream. + * + * @return the next decoded byte, or -1 to indicate + * the end of input on the input stream in + * + * @throws IOException may be thrown by in + */ + public int read() throws IOException { + byte[] buf = new byte[1]; + return read(buf, 0, 1) == -1 ? -1 : (buf[0] & 0xFF); + } + + /** + * Decode into an array of bytes. + *

    + * This calls in.read(buf, off, len) and defilters the + * returned data. + * + * @param buf target buffer for decoded data + * @param off start offset in buf + * @param len maximum number of bytes to read + * + * @return number of bytes read, or -1 to indicate + * the end of the input stream in + * + * @throws IOException may be thrown by underlaying input + * stream in + */ + public int read(byte[] buf, int off, int len) throws IOException { + int size = in.read(buf, off, len); + if (size == -1) + return -1; + + delta.decode(buf, off, size); + return size; + } + + /** + * Calls in.available(). + * + * @return the value returned by in.available() + */ + public int available() throws IOException { + return in.available(); + } + + /** + * Calls in.close(). + */ + public void close() throws IOException { + in.close(); + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/FilterCoder.java b/third_party/src/main/java/org/tukaani/xz/FilterCoder.java new file mode 100644 index 00000000..1e95e37f --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/FilterCoder.java @@ -0,0 +1,16 @@ +/* + * FilterCoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +interface FilterCoder { + boolean changesSize(); + boolean nonLastOK(); + boolean lastOK(); +} diff --git a/third_party/src/main/java/org/tukaani/xz/FilterDecoder.java b/third_party/src/main/java/org/tukaani/xz/FilterDecoder.java new file mode 100644 index 00000000..8e2d0061 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/FilterDecoder.java @@ -0,0 +1,17 @@ +/* + * FilterDecoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; + +interface FilterDecoder extends FilterCoder { + int getMemoryUsage(); + InputStream getInputStream(InputStream in); +} diff --git a/third_party/src/main/java/org/tukaani/xz/FilterEncoder.java b/third_party/src/main/java/org/tukaani/xz/FilterEncoder.java new file mode 100644 index 00000000..2b2c2a51 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/FilterEncoder.java @@ -0,0 +1,16 @@ +/* + * FilterEncoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +interface FilterEncoder extends FilterCoder { + long getFilterID(); + byte[] getFilterProps(); + FinishableOutputStream getOutputStream(FinishableOutputStream out); +} diff --git a/third_party/src/main/java/org/tukaani/xz/FilterOptions.java b/third_party/src/main/java/org/tukaani/xz/FilterOptions.java new file mode 100644 index 00000000..9c5f9d8e --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/FilterOptions.java @@ -0,0 +1,28 @@ +/* + * FilterOptions + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import java.io.IOException; + +public abstract class FilterOptions implements Cloneable { + public abstract int getEncoderMemoryUsage(); + public abstract FinishableOutputStream getOutputStream( + FinishableOutputStream out); + + public abstract int getDecoderMemoryUsage(); + public abstract InputStream getInputStream(InputStream in) + throws IOException; + + abstract FilterEncoder getFilterEncoder(); + + FilterOptions() {} +} diff --git a/third_party/src/main/java/org/tukaani/xz/FinishableOutputStream.java b/third_party/src/main/java/org/tukaani/xz/FinishableOutputStream.java new file mode 100644 index 00000000..b360628b --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/FinishableOutputStream.java @@ -0,0 +1,31 @@ +/* + * FinishableOutputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.OutputStream; +import java.io.IOException; + +/** + * Output stream that supports finishing without closing + * the underlying stream. + */ +public abstract class FinishableOutputStream extends OutputStream { + /** + * Finish the stream without closing the underlying stream. + * No more data may be written to the stream after finishing. + *

    + * The finish method of FinishableOutputStream + * does nothing. Subclasses should override it if they need finishing + * support, which is the case, for example, with compressors. + * + * @throws IOException + */ + public void finish() throws IOException {}; +} diff --git a/third_party/src/main/java/org/tukaani/xz/IndexIndicatorException.java b/third_party/src/main/java/org/tukaani/xz/IndexIndicatorException.java new file mode 100644 index 00000000..fc6bc038 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/IndexIndicatorException.java @@ -0,0 +1,14 @@ +/* + * IndexIndicatorException + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +class IndexIndicatorException extends Exception { + private static final long serialVersionUID = 1L; +} diff --git a/third_party/src/main/java/org/tukaani/xz/LZMA2Coder.java b/third_party/src/main/java/org/tukaani/xz/LZMA2Coder.java new file mode 100644 index 00000000..b0963b75 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/LZMA2Coder.java @@ -0,0 +1,26 @@ +/* + * LZMA2Coder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +abstract class LZMA2Coder implements FilterCoder { + public static final long FILTER_ID = 0x21; + + public boolean changesSize() { + return true; + } + + public boolean nonLastOK() { + return false; + } + + public boolean lastOK() { + return true; + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/LZMA2Decoder.java b/third_party/src/main/java/org/tukaani/xz/LZMA2Decoder.java new file mode 100644 index 00000000..82075c21 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/LZMA2Decoder.java @@ -0,0 +1,35 @@ +/* + * LZMA2Decoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; + +class LZMA2Decoder extends LZMA2Coder implements FilterDecoder { + private int dictSize; + + LZMA2Decoder(byte[] props) throws UnsupportedOptionsException { + // Up to 1.5 GiB dictionary is supported. The bigger ones + // are too big for int. + if (props.length != 1 || (props[0] & 0xFF) > 37) + throw new UnsupportedOptionsException( + "Unsupported LZMA2 properties"); + + dictSize = 2 | (props[0] & 1); + dictSize <<= (props[0] >>> 1) + 11; + } + + public int getMemoryUsage() { + return LZMA2InputStream.getMemoryUsage(dictSize); + } + + public InputStream getInputStream(InputStream in) { + return new LZMA2InputStream(in, dictSize); + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/LZMA2Encoder.java b/third_party/src/main/java/org/tukaani/xz/LZMA2Encoder.java new file mode 100644 index 00000000..a1048082 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/LZMA2Encoder.java @@ -0,0 +1,35 @@ +/* + * LZMA2Encoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +class LZMA2Encoder extends LZMA2Coder implements FilterEncoder { + private LZMA2Options options; + private byte[] props = new byte[1]; + + LZMA2Encoder(LZMA2Options options) { + // Make a private copy so that the caller is free to change its copy. + this.options = (LZMA2Options)options.clone(); + + // TODO: Props!!! + + } + + public long getFilterID() { + return FILTER_ID; + } + + public byte[] getFilterProps() { + return props; + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return options.getOutputStream(out); + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/LZMA2InputStream.java b/third_party/src/main/java/org/tukaani/xz/LZMA2InputStream.java new file mode 100644 index 00000000..1551a1f4 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/LZMA2InputStream.java @@ -0,0 +1,329 @@ +/* + * LZMA2InputStream + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import java.io.DataInputStream; +import java.io.IOException; +import org.tukaani.xz.lz.LZDecoder; +import org.tukaani.xz.rangecoder.RangeDecoder; +import org.tukaani.xz.lzma.LZMADecoder; + +/** + * Decompresses a raw LZMA2 stream. + */ +public class LZMA2InputStream extends InputStream { + /** + * Smallest valid LZMA2 dictionary size. + *

    + * Very tiny dictionaries would be a performance problem, so + * the minimum is 4 KiB. + */ + public static final int DICT_SIZE_MIN = 4096; + + /** + * Largest dictionary size supported by this implementation. + *

    + * The LZMA2 algorithm allows dictionaries up to one byte less than 4 GiB. + * This implementation supports only 16 bytes less than 2 GiB for raw + * LZMA2 streams, and for .xz files the maximum is 1.5 GiB. This + * limitation is due to Java using signed 32-bit integers for array + * indexing. The limitation shouldn't matter much in practice since so + * huge dictionaries are not normally used. + */ + public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15; + + private static final int COMPRESSED_SIZE_MAX = 1 << 16; + + private final DataInputStream in; + + private final LZDecoder lz; + private final RangeDecoder rc = new RangeDecoder(COMPRESSED_SIZE_MAX); + private LZMADecoder lzma; + + private int uncompressedSize = 0; + private boolean isLZMAChunk; + + private boolean needDictReset = true; + private boolean needProps = true; + private boolean endReached = false; + + private IOException exception = null; + + /** + * Gets approximate decompressor memory requirements as kibibytes for + * the given dictionary size. + * + * @param dictSize LZMA2 dictionary size as bytes, must be + * in the range [DICT_SIZE_MIN, + * DICT_SIZE_MAX] + * + * @return approximate memory requirements as kibibytes (KiB) + */ + public static int getMemoryUsage(int dictSize) { + // The base state is aroudn 30-40 KiB (probabilities etc.), + // range decoder needs COMPRESSED_SIZE_MAX bytes for buffering, + // and LZ decoder needs a dictionary buffer. + return 40 + COMPRESSED_SIZE_MAX / 1024 + getDictSize(dictSize) / 1024; + } + + private static int getDictSize(int dictSize) { + if (dictSize < DICT_SIZE_MIN || dictSize > DICT_SIZE_MAX) + throw new IllegalArgumentException( + "Unsupported dictionary size " + dictSize); + + // Round dictionary size upward to a multiple of 16. This way LZMA + // can use LZDecoder.getPos() for calculating LZMA's posMask. + // Note that this check is needed only for raw LZMA2 streams; it is + // redundant with .xz. + return (dictSize + 15) & ~15; + } + + /** + * Creates a new input stream that decompresses raw LZMA2 data + * from in. + *

    + * The caller needs to know the dictionary size used when compressing; + * the dictionary size isn't stored as part of a raw LZMA2 stream. + *

    + * Specifying a too small dictionary size will prevent decompressing + * the stream. Specifying a too big dictionary is waste of memory but + * decompression will work. + *

    + * There is no need to specify a dictionary bigger than + * the uncompressed size of the data even if a bigger dictionary + * was used when compressing. If you know the uncompressed size + * of the data, this might allow saving some memory. + * + * @param in input stream from which LZMA2-compressed + * data is read + * + * @param dictSize LZMA2 dictionary size as bytes, must be + * in the range [DICT_SIZE_MIN, + * DICT_SIZE_MAX] + */ + public LZMA2InputStream(InputStream in, int dictSize) { + this.in = new DataInputStream(in); + this.lz = new LZDecoder(getDictSize(dictSize), null); + } + + /** + * Creates a new LZMA2 decompressor using a preset dictionary. + *

    + * This is like LZMAInputStream() except that the + * dictionary may be initialized using a preset dictionary. + * If a preset dictionary was used when compressing the data, the + * same preset dictionary must be provided when decompressing. + * + * @param in input stream from which LZMA2-compressed + * data is read + * + * @param dictSize LZMA2 dictionary size as bytes, must be + * in the range [DICT_SIZE_MIN, + * DICT_SIZE_MAX] + * + * @param presetDict preset dictionary or null + * to use no preset dictionary + */ + public LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict) + throws IOException { + this.in = new DataInputStream(in); + this.lz = new LZDecoder(getDictSize(dictSize), presetDict); + + if (presetDict.length > 0) + needDictReset = false; + } + + /** + * Decompresses the next byte from this input stream. + *

    + * Reading lots of data with read() from this input stream + * may be inefficient. Wrap it in java.io.BufferedInputStream + * if you need to read lots of data one byte at a time. + * + * @return the next decompressed byte, or -1 + * to indicate the end of the compressed stream + * + * @throws CorruptedInputException + * + * @throws EOFException + * compressed input is truncated or corrupt + * + * @throws IOException may be thrown by in + */ + public int read() throws IOException { + byte[] buf = new byte[1]; + return read(buf, 0, 1) == -1 ? -1 : (buf[0] & 0xFF); + } + + /** + * Decompresses into an array of bytes. + *

    + * If len is zero, no bytes are read and 0 + * is returned. Otherwise this will block until len + * bytes have been decompressed, the end of LZMA2 stream is reached, + * or an exception is thrown. + * + * @param buf target buffer for uncompressed data + * @param off start offset in buf + * @param len maximum number of uncompressed bytes to read + * + * @return number of bytes read, or -1 to indicate + * the end of the compressed stream + * + * @throws CorruptedInputException + * + * @throws EOFException + * compressed input is truncated or corrupt + * + * @throws IOException may be thrown by in + */ + public int read(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) + throw new IllegalArgumentException(); + + if (len == 0) + return 0; + + if (exception != null) + throw exception; + + if (endReached) + return -1; + + try { + int size = 0; + + while (len > 0) { + if (uncompressedSize == 0) { + decodeChunkHeader(); + if (endReached) + return size == 0 ? -1 : size; + } + + int copySizeMax = Math.min(uncompressedSize, len); + + if (!isLZMAChunk) { + lz.copyUncompressed(in, copySizeMax); + } else { + lz.setLimit(copySizeMax); + lzma.decode(); + } + + int copiedSize = lz.flush(buf, off); + off += copiedSize; + len -= copiedSize; + size += copiedSize; + uncompressedSize -= copiedSize; + + if (uncompressedSize == 0) + if (!rc.isFinished() || lz.hasPending()) + throw new CorruptedInputException(); + } + + return size; + + } catch (IOException e) { + exception = e; + throw e; + } + } + + private void decodeChunkHeader() throws IOException { + int control = in.readUnsignedByte(); + + if (control == 0x00) { + endReached = true; + return; + } + + if (control >= 0xE0 || control == 0x01) { + needProps = true; + needDictReset = false; + lz.reset(); + } else if (needDictReset) { + throw new CorruptedInputException(); + } + + if (control >= 0x80) { + isLZMAChunk = true; + + uncompressedSize = (control & 0x1F) << 16; + uncompressedSize += in.readUnsignedShort() + 1; + + int compressedSize = in.readUnsignedShort() + 1; + + if (control >= 0xC0) { + needProps = false; + decodeProps(); + + } else if (needProps) { + throw new CorruptedInputException(); + + } else if (control >= 0xA0) { + lzma.reset(); + } + + rc.prepareInputBuffer(in, compressedSize); + + } else if (control > 0x02) { + throw new CorruptedInputException(); + + } else { + isLZMAChunk = false; + uncompressedSize = in.readUnsignedShort() + 1; + } + } + + private void decodeProps() throws IOException { + int props = in.readUnsignedByte(); + + if (props > (4 * 5 + 4) * 9 + 8) + throw new CorruptedInputException(); + + int pb = props / (9 * 5); + props -= pb * 9 * 5; + int lp = props / 9; + int lc = props - lp * 9; + + if (lc + lp > 4) + throw new CorruptedInputException(); + + lzma = new LZMADecoder(lz, rc, lc, lp, pb); + } + + /** + * Returns the number of uncompressed bytes that can be read + * without blocking. The value is returned with an assumption + * that the compressed input data will be valid. If the compressed + * data is corrupt, CorruptedInputException may get + * thrown before the number of bytes claimed to be available have + * been read from this input stream. + *

    + * In LZMAInputStream, the return value will be non-zero when the + * decompressor is in the middle of an LZMA2 chunk. The return value + * will then be the number of uncompressed bytes remaining from that + * chunk. + * + * @return the number of uncompressed bytes that can be read + * without blocking + */ + public int available() { + return uncompressedSize; + } + + /** + * Calls in.close(). + */ + public void close() throws IOException { + in.close(); + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/LZMA2Options.java b/third_party/src/main/java/org/tukaani/xz/LZMA2Options.java new file mode 100644 index 00000000..58f21bd8 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/LZMA2Options.java @@ -0,0 +1,139 @@ +/* + * LZMA2Options + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import java.io.IOException; + +/** + * Options for LZMA2. + *

    + * FIXME: This is unfinished and things might change. + */ +public class LZMA2Options extends FilterOptions { + /** + * Default compression preset. + */ + public static final int PRESET_DEFAULT = 6; + + /** + * Minimum dictionary size. + */ + public static final int DICT_SIZE_MIN = 4096; + + /** + * Maximum dictionary size for compression. + *

    + * FIXME? Decompression dictionary size can be bigger. + */ + public static final int DICT_SIZE_MAX = 128 << 20; + + /** + * Maximum value for lc + lp. + */ + public static final int LC_LP_MAX = 4; + + /** + * Maximum value for pb. + */ + public static final int PB_MAX = 4; + + /** + * Compression mode: uncompressed. + * The data is wrapped into a LZMA2 stream without compression. + */ + public static final int MODE_UNCOMPRESSED = 0; + + /** + * Compression mode: fast. + * This is usually combined with a hash chain match finder. + */ + public static final int MODE_FAST = 1; + + /** + * Compression mode: normal. + * This is usually combined with a binary tree match finder. + */ + public static final int MODE_NORMAL = 2; + + /** + * Minimum value for niceLen. + */ + public static final int NICE_LEN_MIN = 8; + + /** + * Maximum value for niceLen. + */ + public static final int NICE_LEN_MAX = 273; + + /** + * Match finder: Hash Chain 2-3-4 + */ + public static final int MF_HC4 = 0x04; + + /** + * Match finder: Binary tree 2-3-4 + */ + public static final int MF_BT4 = 0x14; + + private int dictSize; + +/* + public int lc; + public int lp; + public int pb; + public int mode; + public int niceLen; + public int mf; + public int depth; +*/ + + public LZMA2Options() { + setPreset(PRESET_DEFAULT); + } + + public LZMA2Options(int preset) { + setPreset(preset); + } + + public void setPreset(int preset) { + // TODO + dictSize = 8 << 20; + } + + public int getEncoderMemoryUsage() { + return LZMA2OutputStream.getMemoryUsage(this); + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return new LZMA2OutputStream(out, this); + } + + public int getDecoderMemoryUsage() { + return LZMA2InputStream.getMemoryUsage(dictSize); + } + + public InputStream getInputStream(InputStream in) throws IOException { + return new LZMA2InputStream(in, dictSize); + } + + FilterEncoder getFilterEncoder() { + return new LZMA2Encoder(this); + } + + public Object clone() { + try { + return super.clone(); + } catch (CloneNotSupportedException e) { + // Never reached + throw new RuntimeException(); + } + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/LZMA2OutputStream.java b/third_party/src/main/java/org/tukaani/xz/LZMA2OutputStream.java new file mode 100644 index 00000000..156af2d7 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/LZMA2OutputStream.java @@ -0,0 +1,77 @@ +/* + * LZMA2OutputStream + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.IOException; + +// +// TODO: This creates a valid LZMA2 stream but it doesn't compress. +// So this is useless except for testing the .xz container support. +// + +class LZMA2OutputStream extends FinishableOutputStream { + private final FinishableOutputStream out; + + static int getMemoryUsage(LZMA2Options options) { + // TODO + return 1; + } + + LZMA2OutputStream(FinishableOutputStream out, LZMA2Options options) { + this.out = out; + } + + public void write(int b) throws IOException { + byte[] buf = new byte[1]; + buf[0] = (byte)b; + write(buf, 0, 1); + } + + public void write(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) + throw new IllegalArgumentException(); + + while (off > 0x10000) { + writeChunk(buf, off, 0x10000); + off += 0x10000; + len -= 0x10000; + } + + writeChunk(buf, off, len); + } + + private void writeChunk(byte[] buf, int off, int len) throws IOException { + out.write(0x01); + out.write((len - 1) >>> 8); + out.write(len - 1); + out.write(buf, off, len); + } + + private void writeEndMarker() throws IOException { + // TODO: Flush incomplete chunk. + out.write(0x00); + } + + public void flush() throws IOException { + throw new UnsupportedOptionsException( + "Flushing LZMA2OutputStream not implemented yet"); + } + + public void finish() throws IOException { + writeEndMarker(); + out.finish(); + } + + public void close() throws IOException { + writeEndMarker(); + out.close(); + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/MemoryLimitException.java b/third_party/src/main/java/org/tukaani/xz/MemoryLimitException.java new file mode 100644 index 00000000..1b254527 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/MemoryLimitException.java @@ -0,0 +1,60 @@ +/* + * MemoryLimitException + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +/** + * Thrown when the memory usage limit given to the XZ decompressor + * would be exceeded. + *

    + * The amount of memory required and the memory usage limit are + * included in the error detail message in human readable format. + */ +public class MemoryLimitException extends XZIOException { + private static final long serialVersionUID = 3L; + + private int memoryNeeded; + private int memoryLimit; + + /** + * Creates a new MemoryLimitException. + *

    + * The amount of memory needed and the memory usage limit are + * included in the error detail message. + * + * @param memoryNeeded amount of memory needed as kibibytes (KiB) + * @param memoryLimit specified memory usage limit as kibibytes (KiB) + */ + public MemoryLimitException(int memoryNeeded, int memoryLimit) { + super("" + memoryNeeded + " KiB of memory would be needed; limit was " + + memoryLimit + " KiB"); + + this.memoryNeeded = memoryNeeded; + this.memoryLimit = memoryLimit; + } + + /** + * Gets how much memory is required to decompress the data. + * + * @return amount of memory needed as kibibytes (KiB) + */ + public int getMemoryNeeded() { + return memoryNeeded; + } + + /** + * Gets what the memory usage limit was at the time the exception + * was created. + * + * @return memory usage limit as kibibytes (KiB) + */ + public int getMemoryLimit() { + return memoryLimit; + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/RawCoder.java b/third_party/src/main/java/org/tukaani/xz/RawCoder.java new file mode 100644 index 00000000..12c7da8f --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/RawCoder.java @@ -0,0 +1,33 @@ +/* + * RawCoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +class RawCoder { + static void validate(FilterCoder[] filters) + throws UnsupportedOptionsException { + for (int i = 0; i < filters.length - 1; ++i) + if (!filters[i].nonLastOK()) + throw new UnsupportedOptionsException( + "Unsupported XZ filter chain"); + + if (!filters[filters.length - 1].lastOK()) + throw new UnsupportedOptionsException( + "Unsupported XZ filter chain"); + + int changesSizeCount = 0; + for (int i = 0; i < filters.length; ++i) + if (filters[i].changesSize()) + ++changesSizeCount; + + if (changesSizeCount > 3) + throw new UnsupportedOptionsException( + "Unsupported XZ filter chain"); + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/SingleXZInputStream.java b/third_party/src/main/java/org/tukaani/xz/SingleXZInputStream.java new file mode 100644 index 00000000..ffac5540 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/SingleXZInputStream.java @@ -0,0 +1,285 @@ +/* + * SingleXZInputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.EOFException; +import org.tukaani.xz.common.DecoderUtil; +import org.tukaani.xz.common.StreamFlags; +import org.tukaani.xz.index.IndexHash; +import org.tukaani.xz.check.Check; + +/** + * Decompresses exactly one XZ Stream in streamed mode (no seeking). + * The decompression stops after the first XZ Stream has been decompressed, + * and the read position in the input stream is left at the first byte + * after the end of the XZ Stream. This can be useful when XZ data has + * been stored inside some other file format or protocol. + *

    + * Unless you know what you are doing, don't use this class to decompress + * standalone .xz files. For that purpose, use XZInputStream. + * + * @see XZInputStream + */ +public class SingleXZInputStream extends InputStream { + private InputStream in; + private int memoryLimit; + private StreamFlags streamHeaderFlags; + private Check check; + private BlockInputStream blockDecoder = null; + private IndexHash indexHash = new IndexHash(); + private boolean endReached = false; + private IOException exception = null; + + /** + * Creates a new input stream that decompresses exactly one XZ Stream + * from in. + *

    + * This constructor reads and parses the XZ Stream Header (12 bytes) + * from in. The header of the first Block is not read + * until read is called. + * + * @param in input stream from which XZ-compressed + * data is read + * + * @throws XZFormatException + * input is not in the XZ format + * + * @throws CorruptedInputException + * XZ header CRC32 doesn't match + * + * @throws UnsupportedOptionsException + * XZ header is valid but specifies options + * not supported by this implementation + * + * @throws EOFException + * less than 12 bytes of input was available + * from in + * + * @throws IOException may be thrown by in + */ + public SingleXZInputStream(InputStream in) throws IOException { + initialize(in, -1); + } + + /** + * Creates a new single-stream XZ decompressor with optional + * memory usage limit. + *

    + * This is identical to SingleXZInputStream(InputStream) + * except that this takes also the memoryLimit argument. + * + * @param in input stream from which XZ-compressed + * data is read + * + * @param memoryLimit memory usage limit as kibibytes (KiB) + * or -1 to impose no memory usage limit + * + * @throws XZFormatException + * input is not in the XZ format + * + * @throws CorruptedInputException + * XZ header CRC32 doesn't match + * + * @throws UnsupportedOptionsException + * XZ header is valid but specifies options + * not supported by this implementation + * + * @throws EOFException + * less than 12 bytes of input was available + * from in + * + * @throws IOException may be thrown by in + */ + public SingleXZInputStream(InputStream in, int memoryLimit) + throws IOException { + initialize(in, memoryLimit); + } + + SingleXZInputStream(InputStream in, int memoryLimit, + byte[] streamHeader) throws IOException { + initialize(in, memoryLimit, streamHeader); + } + + private void initialize(InputStream in, int memoryLimit) + throws IOException { + byte[] streamHeader = new byte[DecoderUtil.STREAM_HEADER_SIZE]; + new DataInputStream(in).readFully(streamHeader); + initialize(in, memoryLimit, streamHeader); + } + + private void initialize(InputStream in, int memoryLimit, + byte[] streamHeader) throws IOException { + this.in = in; + this.memoryLimit = memoryLimit; + streamHeaderFlags = DecoderUtil.decodeStreamHeader(streamHeader); + check = Check.getInstance(streamHeaderFlags.checkType); + } + + /** + * Gets the ID of the integrity check used in this XZ Stream. + * + * @return the Check ID specified in the XZ Stream Header + */ + public int getCheckType() { + return streamHeaderFlags.checkType; + } + + /** + * Gets the name of the integrity check used in this XZ Stream. + * + * @return the name of the check specified in the XZ Stream Header + */ + public String getCheckName() { + return check.getName(); + } + + /** + * Decompresses the next byte from this input stream. + *

    + * Reading lots of data with read() from this input stream + * may be inefficient. Wrap it in java.io.BufferedInputStream + * if you need to read lots of data one byte at a time. + * + * @return the next decompressed byte, or -1 + * to indicate the end of the compressed stream + * + * @throws CorruptedInputException + * @throws UnsupportedOptionsException + * @throws MemoryLimitException + * + * @throws EOFException + * compressed input is truncated or corrupt + * + * @throws IOException may be thrown by in + */ + public int read() throws IOException { + byte[] buf = new byte[1]; + return read(buf, 0, 1) == -1 ? -1 : (buf[0] & 0xFF); + } + + /** + * Decompresses into an array of bytes. + *

    + * If len is zero, no bytes are read and 0 + * is returned. Otherwise this will try to decompress len + * bytes of uncompressed data. Less than len bytes may + * be read only in the following situations: + *

      + *
    • The end of the compressed data was reached successfully.
    • + *
    • An error is detected after at least one but less len + * bytes have already been successfully decompressed. + * The next call with non-zero len will immediately + * throw the pending exception.
    • + *
    • An exception is thrown.
    • + *
    + * + * @param buf target buffer for uncompressed data + * @param off start offset in buf + * @param len maximum number of uncompressed bytes to read + * + * @return number of bytes read, or -1 to indicate + * the end of the compressed stream + * + * @throws CorruptedInputException + * @throws UnsupportedOptionsException + * @throws MemoryLimitException + * + * @throws EOFException + * compressed input is truncated or corrupt + * + * @throws IOException may be thrown by in + */ + public int read(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) + throw new IllegalArgumentException(); + + if (len == 0) + return 0; + + if (exception != null) + throw exception; + + if (endReached) + return -1; + + int size = 0; + + try { + while (len > 0) { + if (blockDecoder == null) { + try { + blockDecoder = new BlockInputStream(in, check, + memoryLimit); + } catch (IndexIndicatorException e) { + indexHash.validate(in); + validateStreamFooter(); + endReached = true; + return size > 0 ? size : -1; + } + } + + int ret = blockDecoder.read(buf, off, len); + + if (ret > 0) { + size += ret; + off += ret; + len -= ret; + } else if (ret == -1) { + indexHash.add(blockDecoder.getUnpaddedSize(), + blockDecoder.getUncompressedSize()); + blockDecoder = null; + } + } + } catch (IOException e) { + exception = e; + if (size == 0) + throw e; + } + + return size; + } + + private void validateStreamFooter() throws IOException { + byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE]; + new DataInputStream(in).readFully(buf); + StreamFlags streamFooterFlags = DecoderUtil.decodeStreamFooter(buf); + + if (!DecoderUtil.areStreamFlagsEqual(streamHeaderFlags, + streamFooterFlags) + || indexHash.getIndexSize() != streamFooterFlags.backwardSize) + throw new CorruptedInputException( + "XZ Stream Footer does not match Stream Header"); + } + + /** + * Returns the number of uncompressed bytes that can be read + * without blocking. The value is returned with an assumption + * that the compressed input data will be valid. If the compressed + * data is corrupt, CorruptedInputException may get + * thrown before the number of bytes claimed to be available have + * been read from this input stream. + * + * @return the number of uncompressed bytes that can be read + * without blocking + */ + public int available() throws IOException { + return blockDecoder == null ? 0 : blockDecoder.available(); + } + + /** + * Calls in.close(). + */ + public void close() throws IOException { + in.close(); + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/UnsupportedOptionsException.java b/third_party/src/main/java/org/tukaani/xz/UnsupportedOptionsException.java new file mode 100644 index 00000000..9aa16e8c --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/UnsupportedOptionsException.java @@ -0,0 +1,34 @@ +/* + * UnsupportedOptionsException + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +/** + * Thrown when compression options not supported by this implementation + * are detected. Some other implementation might support those options. + */ +public class UnsupportedOptionsException extends XZIOException { + private static final long serialVersionUID = 3L; + + /** + * Creates a new UnsupportedOptionsException with null + * as its error detail message. + */ + public UnsupportedOptionsException() {} + + /** + * Creates a new UnsupportedOptionsException with the given + * error detail message. + * + * @param s error detail message + */ + public UnsupportedOptionsException(String s) { + super(s); + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/XZ.java b/third_party/src/main/java/org/tukaani/xz/XZ.java new file mode 100644 index 00000000..4e0857ff --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/XZ.java @@ -0,0 +1,53 @@ +/* + * XZ + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +/** + * XZ constants. + */ +public class XZ { + /** + * XZ Header Magic Bytes begin a XZ file. + * This can be useful to detect XZ compressed data. + */ + public static final byte[] HEADER_MAGIC = { + (byte)0xFD, '7', 'z', 'X', 'Z', '\0' }; + + /** + * XZ Footer Magic Bytes are the last bytes of a XZ Stream. + */ + public static final byte[] FOOTER_MAGIC = { 'Y', 'Z' }; + + /** + * Integrity check ID indicating that no integrity check is calculated. + *

    + * Omitting the integrity check is strongly discouraged except when + * the integrity of the data will be verified by other means anyway, + * and calculating the check twice would be useless. + */ + public static final int CHECK_NONE = 0; + + /** + * Integrity check ID for CRC32. + */ + public static final int CHECK_CRC32 = 1; + + /** + * Integrity check ID for CRC64. + */ + public static final int CHECK_CRC64 = 4; + + /** + * Integrity check ID for SHA-256. + */ + public static final int CHECK_SHA256 = 10; + + private XZ() {} +} diff --git a/third_party/src/main/java/org/tukaani/xz/XZFormatException.java b/third_party/src/main/java/org/tukaani/xz/XZFormatException.java new file mode 100644 index 00000000..6f63020b --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/XZFormatException.java @@ -0,0 +1,24 @@ +/* + * XZFormatException + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +/** + * Thrown when the input data is not in the XZ format. + */ +public class XZFormatException extends XZIOException { + private static final long serialVersionUID = 3L; + + /** + * Creates a new exception with the default error detail message. + */ + public XZFormatException() { + super("Input is not in the XZ format"); + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/XZIOException.java b/third_party/src/main/java/org/tukaani/xz/XZIOException.java new file mode 100644 index 00000000..1801c70c --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/XZIOException.java @@ -0,0 +1,28 @@ +/* + * XZIOException + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +/** + * Generic IOException specific to this package. + * All IOExceptions thrown by this package are extended from XZIOException. + * This way it is easier to distinguish exceptions thrown by the XZ code + * from other IOExceptions. + */ +public class XZIOException extends java.io.IOException { + private static final long serialVersionUID = 3L; + + public XZIOException() { + super(); + } + + public XZIOException(String s) { + super(s); + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/XZInputStream.java b/third_party/src/main/java/org/tukaani/xz/XZInputStream.java new file mode 100644 index 00000000..3c44af40 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/XZInputStream.java @@ -0,0 +1,257 @@ +/* + * XZInputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.EOFException; +import org.tukaani.xz.common.DecoderUtil; + +/** + * Decompresses a .xz file in streamed mode (no seeking). + *

    + * Use this to decompress regular standalone .xz files. This reads from + * its input stream until the end of the input or until an error occurs. + * This supports decompressing concatenated .xz files. + * + * @see SingleXZInputStream + */ +public class XZInputStream extends InputStream { + private final int memoryLimit; + private final InputStream in; + private SingleXZInputStream xzIn; + private boolean endReached = false; + private IOException exception = null; + + /** + * Creates a new input stream that decompresses XZ-compressed data + * from in. + *

    + * This constructor reads and parses the XZ Stream Header (12 bytes) + * from in. The header of the first Block is not read + * until read is called. + * + * @param in input stream from which XZ-compressed + * data is read + * + * @throws XZFormatException + * input is not in the XZ format + * + * @throws CorruptedInputException + * XZ header CRC32 doesn't match + * + * @throws UnsupportedOptionsException + * XZ header is valid but specifies options + * not supported by this implementation + * + * @throws EOFException + * less than 12 bytes of input was available + * from in + * + * @throws IOException may be thrown by in + */ + public XZInputStream(InputStream in) throws IOException { + this.in = in; + this.memoryLimit = -1; + this.xzIn = new SingleXZInputStream(in, -1); + } + + /** + * Creates a new input stream that decompresses XZ-compressed data + * from in. + *

    + * This is identical to XZInputStream(InputStream) except + * that this takes also the memoryLimit argument. + * + * @param in input stream from which XZ-compressed + * data is read + * + * @param memoryLimit memory usage limit as kibibytes (KiB) + * or -1 to impose no memory usage limit + * + * @throws XZFormatException + * input is not in the XZ format + * + * @throws CorruptedInputException + * XZ header CRC32 doesn't match + * + * @throws UnsupportedOptionsException + * XZ header is valid but specifies options + * not supported by this implementation + * + * @throws EOFException + * less than 12 bytes of input was available + * from in + * + * @throws IOException may be thrown by in + */ + public XZInputStream(InputStream in, int memoryLimit) throws IOException { + this.in = in; + this.memoryLimit = memoryLimit; + this.xzIn = new SingleXZInputStream(in, memoryLimit); + } + + /** + * Decompresses the next byte from this input stream. + *

    + * Reading lots of data with read() from this input stream + * may be inefficient. Wrap it in java.io.BufferedInputStream + * if you need to read lots of data one byte at a time. + * + * @return the next decompressed byte, or -1 + * to indicate the end of the compressed stream + * + * @throws CorruptedInputException + * @throws UnsupportedOptionsException + * @throws MemoryLimitException + * + * @throws EOFException + * compressed input is truncated or corrupt + * + * @throws IOException may be thrown by in + */ + public int read() throws IOException { + byte[] buf = new byte[1]; + return read(buf, 0, 1) == -1 ? -1 : (buf[0] & 0xFF); + } + + /** + * Decompresses into an array of bytes. + *

    + * If len is zero, no bytes are read and 0 + * is returned. Otherwise this will try to decompress len + * bytes of uncompressed data. Less than len bytes may + * be read only in the following situations: + *

      + *
    • The end of the compressed data was reached successfully.
    • + *
    • An error is detected after at least one but less len + * bytes have already been successfully decompressed. + * The next call with non-zero len will immediately + * throw the pending exception.
    • + *
    • An exception is thrown.
    • + *
    + * + * @param buf target buffer for uncompressed data + * @param off start offset in buf + * @param len maximum number of uncompressed bytes to read + * + * @return number of bytes read, or -1 to indicate + * the end of the compressed stream + * + * @throws CorruptedInputException + * @throws UnsupportedOptionsException + * @throws MemoryLimitException + * + * @throws EOFException + * compressed input is truncated or corrupt + * + * @throws IOException may be thrown by in + */ + public int read(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) + throw new IllegalArgumentException(); + + if (len == 0) + return 0; + + if (exception != null) + throw exception; + + if (endReached) + return -1; + + int size = 0; + + try { + while (len > 0) { + if (xzIn == null) { + prepareNextStream(); + if (endReached) + return size == 0 ? -1 : size; + } + + int ret = xzIn.read(buf, off, len); + + if (ret > 0) { + size += ret; + off += ret; + len -= ret; + } else if (ret == -1) { + xzIn = null; + } + } + } catch (IOException e) { + exception = e; + if (size == 0) + throw e; + } + + return size; + } + + private void prepareNextStream() throws IOException { + DataInputStream inData = new DataInputStream(in); + byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE]; + + // The size of Stream Padding must be a multiple of four bytes, + // all bytes zero. + do { + // First try to read one byte to see if we have reached the end + // of the file. + int ret = inData.read(buf, 0, 1); + if (ret == -1) { + endReached = true; + return; + } + + // Since we got one byte of input, there must be at least + // three more available in a valid file. + inData.readFully(buf, 1, 3); + + } while (buf[0] == 0 && buf[1] == 0 && buf[2] == 0 && buf[3] == 0); + + // Not all bytes are zero. In a valid Stream it indicates the + // beginning of the next Stream. Read the rest of the Stream Header + // and initialize the XZ decoder. + inData.readFully(buf, 4, DecoderUtil.STREAM_HEADER_SIZE - 4); + + try { + xzIn = new SingleXZInputStream(in, memoryLimit, buf); + } catch (XZFormatException e) { + // Since this isn't the first .xz Stream, it is more + // logical to tell that the data is corrupt. + throw new CorruptedInputException( + "Garbage after a valid XZ Stream"); + } + } + + /** + * Returns the number of uncompressed bytes that can be read + * without blocking. The value is returned with an assumption + * that the compressed input data will be valid. If the compressed + * data is corrupt, CorruptedInputException may get + * thrown before the number of bytes claimed to be available have + * been read from this input stream. + * + * @return the number of uncompressed bytes that can be read + * without blocking + */ + public int available() throws IOException { + return xzIn == null ? 0 : xzIn.available(); + } + + /** + * Calls in.close(). + */ + public void close() throws IOException { + in.close(); + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/XZOutputStream.java b/third_party/src/main/java/org/tukaani/xz/XZOutputStream.java new file mode 100644 index 00000000..09ec3175 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/XZOutputStream.java @@ -0,0 +1,290 @@ +/* + * XZOutputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.OutputStream; +import java.io.IOException; + +import org.tukaani.xz.common.EncoderUtil; +import org.tukaani.xz.common.StreamFlags; +import org.tukaani.xz.check.Check; +import org.tukaani.xz.index.IndexEncoder; + +/** + * Compresses into the .xz file format. + */ +public class XZOutputStream extends FinishableOutputStream { + private OutputStream out; + private StreamFlags streamFlags = new StreamFlags(); + private Check check; + private IndexEncoder index = new IndexEncoder(); + private FilterEncoder[] filters; + private BlockOutputStream blockEncoder = null; + private IOException exception = null; + private boolean finished = false; + + /** + * Creates a new output stream that compressed data into the .xz format. + * This is takes options for one filter as an argument. This constructor + * is equivalent to passing a single-member filterOptions array to the + * other constructor. + * + * @param out output stream to which the compressed data + * will be written + * + * @param filterOptions + * filter options to use + * + * @param checkType type of the integrity check, + * for example XZ.CHECK_CRC64 + * + * @throws UnsupportedOptionsException + * invalid filter chain + * + * @throws IOException may be thrown from out + */ + public XZOutputStream(OutputStream out, FilterOptions filterOptions, + int checkType) throws IOException { + FilterOptions[] ops = new FilterOptions[1]; + ops[0] = filterOptions; + initialize(out, ops, checkType); + } + + /** + * Creates a new output stream that compressed data into the .xz format. + * This takes an array of filter options, allowing the caller to specify + * a filter chain with 1-4 filters. + * + * @param out output stream to which the compressed data + * will be written + * + * @param filterOptions + * array of filter options to use + * + * @param checkType type of the integrity check, + * for example XZ.CHECK_CRC64 + * + * @throws UnsupportedOptionsException + * invalid filter chain + * + * @throws IOException may be thrown from out + */ + public XZOutputStream(OutputStream out, FilterOptions[] filterOptions, + int checkType) throws IOException { + initialize(out, filterOptions, checkType); + } + + private void initialize(OutputStream out, FilterOptions[] filterOptions, + int checkType) throws IOException { + this.out = out; + updateFilters(filterOptions); + + streamFlags.checkType = checkType; + check = Check.getInstance(checkType); + + encodeStreamHeader(); + } + + /** + * Updates the filter chain. + *

    + * Currently this cannot be used to update e.g. LZMA2 options in the + * middle of a XZ Block. Use flush() to finish the current + * XZ Block before calling this function. The new filter chain will then + * be used for the next XZ Block. + */ + public void updateFilters(FilterOptions[] filterOptions) + throws XZIOException { + if (blockEncoder != null) + throw new UnsupportedOptionsException("Changing filter options " + + "in the middle of a XZ Block not implemented"); + + if (filterOptions.length < 1 || filterOptions.length > 4) + throw new UnsupportedOptionsException( + "XZ filter chain must be 1-4 filters"); + + FilterEncoder[] newFilters = new FilterEncoder[filterOptions.length]; + for (int i = 0; i < filterOptions.length; ++i) + newFilters[i] = filterOptions[i].getFilterEncoder(); + + RawCoder.validate(newFilters); + filters = newFilters; + } + + /** + * Writes one byte to be compressed. + * + * @throws XZIOException + * XZ stream has grown too big + * @throws IOException may be thrown by the underlying output stream + */ + public void write(int b) throws IOException { + byte[] buf = new byte[] { (byte)b }; + write(buf, 0, 1); + } + + /** + * Writes an array of bytes to be compressed. + * The compressors tend to do internal buffering and thus the written + * data won't be readable from the compressed output immediately. + * Use flush() to force everything written so far to + * be written to the underlaying output stream, but be aware that + * flushing reduces compression ratio. + * + * @param buf buffer of bytes to be written + * @param off start offset in buf + * @param len number of bytes to write + * + * @throws XZIOException + * XZ stream has grown too big + * @throws XZIOException + * finish() or close() + * was already called + * @throws IOException may be thrown by the underlying output stream + */ + public void write(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) + throw new IllegalArgumentException(); + + if (len == 0) + return; + + if (finished) + exception = new XZIOException( + "XZOutputStream.write was called on a finished stream"); + + if (exception != null) + throw exception; + + if (blockEncoder == null) + blockEncoder = new BlockOutputStream(out, filters, check); + + try { + blockEncoder.write(buf, off, len); + } catch (IOException e) { + exception = e; + throw e; + } + } + + /** + * Flushes the encoder and calls out.flush(). + *

    + * FIXME: I haven't decided yet how this will work in the final version. + * In the current implementation, flushing finishes the current .xz Block. + * This is equivalent to LZMA_FULL_FLUSH in liblzma (XZ Utils). + * Equivalent of liblzma's LZMA_SYNC_FLUSH might be implemented in + * the future, and perhaps should be what flush() should do. + */ + public void flush() throws IOException { + if (exception != null) + throw exception; + + if (blockEncoder != null) { + try { + blockEncoder.finish(); + index.add(blockEncoder.getUnpaddedSize(), + blockEncoder.getUncompressedSize()); + blockEncoder = null; + } catch (IOException e) { + exception = e; + throw e; + } + } + + out.flush(); + } + + /** + * Finishes compression without closing the underlying stream. + * No more data can be written to this stream after finishing + * (calling write with an empty buffer is OK). + *

    + * Repeated calls to finish() do nothing unless + * an exception was thrown by this stream earlier. In that case + * the same exception is thrown again. + *

    + * After finishing, the stream may be closed normally with + * close(). If the stream will be closed anyway, there + * usually is no need to call finish() separately. + */ + public void finish() throws IOException { + if (!finished) { + // flush() checks for pending exceptions so we don't need to + // worry about it here. + flush(); + + try { + index.encode(out); + encodeStreamFooter(); + finished = true; + } catch (IOException e) { + exception = e; + throw e; + } + } + } + + /** + * Finishes compression and closes the underlying stream. + * The underlying stream out is closed even if finishing + * fails. If both finishing and closing fail, the exception thrown + * by finish() is thrown and the exception from the failed + * out.close() is lost. + */ + public void close() throws IOException { + // If finish() throws an exception, it stores the exception to + // the variable "exception". So we can ignore the possible + // exception here. + try { + finish(); + } catch (IOException e) {} + + try { + out.close(); + } catch (IOException e) { + // Remember the exception but only if there is no previous + // pending exception. + if (exception == null) + exception = e; + } + + if (exception != null) + throw exception; + } + + private void encodeStreamFlags(byte[] buf, int off) { + buf[off] = 0x00; + buf[off + 1] = (byte)streamFlags.checkType; + } + + private void encodeStreamHeader() throws IOException { + out.write(XZ.HEADER_MAGIC); + + byte[] buf = new byte[2]; + encodeStreamFlags(buf, 0); + out.write(buf); + + EncoderUtil.writeCRC32(out, buf); + } + + private void encodeStreamFooter() throws IOException { + byte[] buf = new byte[6]; + long backwardSize = index.getIndexSize() / 4 - 1; + for (int i = 0; i < 4; ++i) + buf[i] = (byte)(backwardSize >>> (i * 8)); + + encodeStreamFlags(buf, 4); + + EncoderUtil.writeCRC32(out, buf); + out.write(buf); + out.write(XZ.FOOTER_MAGIC); + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/check/CRC32.java b/third_party/src/main/java/org/tukaani/xz/check/CRC32.java new file mode 100644 index 00000000..87bc6567 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/check/CRC32.java @@ -0,0 +1,33 @@ +/* + * CRC32 + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.check; + +public class CRC32 extends Check { + private java.util.zip.CRC32 state = new java.util.zip.CRC32(); + + public CRC32() { + size = 4; + name = "CRC32"; + } + + public void update(byte[] buf, int off, int len) { + state.update(buf, off, len); + } + + public byte[] finish() { + long value = state.getValue(); + byte[] buf = new byte[] { (byte)(value), + (byte)(value >>> 8), + (byte)(value >>> 16), + (byte)(value >>> 24) }; + state.reset(); + return buf; + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/check/CRC64.java b/third_party/src/main/java/org/tukaani/xz/check/CRC64.java new file mode 100644 index 00000000..c120ca9c --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/check/CRC64.java @@ -0,0 +1,54 @@ +/* + * CRC64 + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.check; + +public class CRC64 extends Check { + private static final long poly = 0xC96C5795D7870F42L; + private static final long crcTable[] = new long[256]; + + private long crc = -1; + + static { + for (int b = 0; b < crcTable.length; ++b) { + long r = b; + for (int i = 0; i < 8; ++i) { + if ((r & 1) == 1) + r = (r >>> 1) ^ poly; + else + r >>>= 1; + } + + crcTable[b] = r; + } + } + + public CRC64() { + size = 8; + name = "CRC64"; + } + + public void update(byte[] buf, int off, int len) { + int end = off + len; + + while (off < end) + crc = crcTable[(buf[off++] ^ (int)crc) & 0xFF] ^ (crc >>> 8); + } + + public byte[] finish() { + long value = ~crc; + crc = -1; + + byte[] buf = new byte[8]; + for (int i = 0; i < buf.length; ++i) + buf[i] = (byte)(value >> (i * 8)); + + return buf; + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/check/Check.java b/third_party/src/main/java/org/tukaani/xz/check/Check.java new file mode 100644 index 00000000..f2fe4bae --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/check/Check.java @@ -0,0 +1,57 @@ +/* + * Check + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.check; + +import org.tukaani.xz.XZ; +import org.tukaani.xz.UnsupportedOptionsException; + +public abstract class Check { + int size; + String name; + + public abstract void update(byte[] buf, int off, int len); + public abstract byte[] finish(); + + public void update(byte[] buf) { + update(buf, 0, buf.length); + } + + public int getSize() { + return size; + } + + public String getName() { + return name; + } + + public static Check getInstance(int checkType) + throws UnsupportedOptionsException { + switch (checkType) { + case XZ.CHECK_NONE: + return new None(); + + case XZ.CHECK_CRC32: + return new CRC32(); + + case XZ.CHECK_CRC64: + return new CRC64(); + + case XZ.CHECK_SHA256: + try { + return new SHA256(); + } catch (java.security.NoSuchAlgorithmException e) {} + + break; + } + + throw new UnsupportedOptionsException( + "Unsupported Check ID " + checkType); + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/check/None.java b/third_party/src/main/java/org/tukaani/xz/check/None.java new file mode 100644 index 00000000..b07c8e66 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/check/None.java @@ -0,0 +1,24 @@ +/* + * None + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.check; + +public class None extends Check { + public None() { + size = 0; + name = "None"; + } + + public void update(byte[] buf, int off, int len) {} + + public byte[] finish() { + byte[] empty = new byte[0]; + return empty; + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/check/SHA256.java b/third_party/src/main/java/org/tukaani/xz/check/SHA256.java new file mode 100644 index 00000000..02f0592a --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/check/SHA256.java @@ -0,0 +1,30 @@ +/* + * SHA256 + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.check; + +public class SHA256 extends Check { + private java.security.MessageDigest sha256; + + public SHA256() throws java.security.NoSuchAlgorithmException { + size = 32; + name = "SHA-256"; + sha256 = java.security.MessageDigest.getInstance("SHA-256"); + } + + public void update(byte[] buf, int off, int len) { + sha256.update(buf, off, len); + } + + public byte[] finish() { + byte[] buf = sha256.digest(); + sha256.reset(); + return buf; + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/common/DecoderUtil.java b/third_party/src/main/java/org/tukaani/xz/common/DecoderUtil.java new file mode 100644 index 00000000..77ba4413 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/common/DecoderUtil.java @@ -0,0 +1,121 @@ +/* + * DecoderUtil + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.common; + +import java.io.InputStream; +import java.io.IOException; +import java.io.EOFException; +import java.util.zip.CRC32; +import org.tukaani.xz.XZ; +import org.tukaani.xz.XZFormatException; +import org.tukaani.xz.CorruptedInputException; +import org.tukaani.xz.UnsupportedOptionsException; + +public class DecoderUtil extends Util { + public static boolean isCRC32Valid(byte[] buf, int off, int len, + int ref_off) { + CRC32 crc32 = new CRC32(); + crc32.update(buf, off, len); + long value = crc32.getValue(); + + for (int i = 0; i < 4; ++i) + if ((byte)(value >>> (i * 8)) != buf[ref_off + i]) + return false; + + return true; + } + + public static StreamFlags decodeStreamHeader(byte[] buf) + throws IOException { + for (int i = 0; i < XZ.HEADER_MAGIC.length; ++i) + if (buf[i] != XZ.HEADER_MAGIC[i]) + throw new XZFormatException(); + + if (!isCRC32Valid(buf, XZ.HEADER_MAGIC.length, 2, + XZ.HEADER_MAGIC.length + 2)) + throw new CorruptedInputException("XZ Stream Header is corrupt"); + + try { + return decodeStreamFlags(buf, XZ.HEADER_MAGIC.length); + } catch (UnsupportedOptionsException e) { + throw new UnsupportedOptionsException( + "Unsupported options in XZ Stream Header"); + } + } + + public static StreamFlags decodeStreamFooter(byte[] buf) + throws IOException { + if (buf[10] != XZ.FOOTER_MAGIC[0] || buf[11] != XZ.FOOTER_MAGIC[1]) { + // NOTE: The exception could be XZFormatException too. + // It depends on the situation which one is better. + throw new CorruptedInputException("XZ Stream Footer is corrupt"); + } + + if (!isCRC32Valid(buf, 4, 6, 0)) + throw new CorruptedInputException("XZ Stream Footer is corrupt"); + + StreamFlags streamFlags; + try { + streamFlags = decodeStreamFlags(buf, 8); + } catch (UnsupportedOptionsException e) { + throw new UnsupportedOptionsException( + "Unsupported options in XZ Stream Footer"); + } + + streamFlags.backwardSize = 0; + for (int i = 0; i < 4; ++i) + streamFlags.backwardSize |= (buf[i + 4] & 0xFF) << (i * 8); + + streamFlags.backwardSize = (streamFlags.backwardSize + 1) * 4; + + return streamFlags; + } + + private static StreamFlags decodeStreamFlags(byte[] buf, int off) + throws UnsupportedOptionsException { + if (buf[off] != 0x00 || (buf[off + 1] & 0xFF) >= 0x10) + throw new UnsupportedOptionsException(); + + StreamFlags streamFlags = new StreamFlags(); + streamFlags.checkType = buf[off + 1]; + + return streamFlags; + } + + public static boolean areStreamFlagsEqual(StreamFlags a, StreamFlags b) { + // backwardSize is intentionally not compared. + return a.checkType == b.checkType; + } + + public static long decodeVLI(InputStream in) throws IOException { + int b = in.read(); + if (b == -1) + throw new EOFException(); + + long num = b & 0x7F; + int i = 0; + + while ((b & 0x80) != 0x00) { + if (++i >= VLI_SIZE_MAX) + throw new CorruptedInputException(); + + b = in.read(); + if (b == -1) + throw new EOFException(); + + if (b == 0x00) + throw new CorruptedInputException(); + + num |= (long)(b & 0x7F) << (i * 7); + } + + return num; + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/common/EncoderUtil.java b/third_party/src/main/java/org/tukaani/xz/common/EncoderUtil.java new file mode 100644 index 00000000..57f688b5 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/common/EncoderUtil.java @@ -0,0 +1,36 @@ +/* + * EncoderUtil + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.common; + +import java.io.OutputStream; +import java.io.IOException; +import java.util.zip.CRC32; + +public class EncoderUtil extends Util { + public static void writeCRC32(OutputStream out, byte[] buf) + throws IOException { + CRC32 crc32 = new CRC32(); + crc32.update(buf); + long value = crc32.getValue(); + + for (int i = 0; i < 4; ++i) + out.write((byte)(value >>> (i * 8))); + } + + public static void encodeVLI(OutputStream out, long num) + throws IOException { + while (num >= 0x80) { + out.write((byte)(num | 0x80)); + num >>>= 7; + } + + out.write((byte)num); + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/common/StreamFlags.java b/third_party/src/main/java/org/tukaani/xz/common/StreamFlags.java new file mode 100644 index 00000000..b306987d --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/common/StreamFlags.java @@ -0,0 +1,15 @@ +/* + * StreamFlags + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.common; + +public class StreamFlags { + public int checkType = -1; + public long backwardSize = -1; +} diff --git a/third_party/src/main/java/org/tukaani/xz/common/Util.java b/third_party/src/main/java/org/tukaani/xz/common/Util.java new file mode 100644 index 00000000..c4324ce0 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/common/Util.java @@ -0,0 +1,28 @@ +/* + * Util + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.common; + +public class Util { + public static final int STREAM_HEADER_SIZE = 12; + public static final long BACKWARD_SIZE_MAX = 1L << 34; + public static final int BLOCK_HEADER_SIZE_MAX = 1024; + public static final long VLI_MAX = Long.MAX_VALUE; + public static final int VLI_SIZE_MAX = 9; + + public static int getVLISize(long num) { + int size = 0; + do { + ++size; + num >>= 7; + } while (num != 0); + + return size; + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/delta/DeltaCoder.java b/third_party/src/main/java/org/tukaani/xz/delta/DeltaCoder.java new file mode 100644 index 00000000..e3b300b0 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/delta/DeltaCoder.java @@ -0,0 +1,27 @@ +/* + * DeltaCoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.delta; + +abstract class DeltaCoder { + static final int DISTANCE_MIN = 1; + static final int DISTANCE_MAX = 256; + static final int DISTANCE_MASK = DISTANCE_MAX - 1; + + final int distance; + final byte[] history = new byte[DISTANCE_MAX]; + int pos = 0; + + public DeltaCoder(int distance) { + if (distance < DISTANCE_MIN || distance > DISTANCE_MAX) + throw new IllegalArgumentException(); + + this.distance = distance; + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/delta/DeltaDecoder.java b/third_party/src/main/java/org/tukaani/xz/delta/DeltaDecoder.java new file mode 100644 index 00000000..154cbf34 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/delta/DeltaDecoder.java @@ -0,0 +1,24 @@ +/* + * DeltaDecoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.delta; + +public class DeltaDecoder extends DeltaCoder { + public DeltaDecoder(int distance) { + super(distance); + } + + public void decode(byte[] buf, int off, int len) { + int end = off + len; + for (int i = off; i < end; ++i) { + buf[i] += history[(distance + pos) & DISTANCE_MASK]; + history[pos-- & DISTANCE_MASK] = buf[i]; + } + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/index/IndexBase.java b/third_party/src/main/java/org/tukaani/xz/index/IndexBase.java new file mode 100644 index 00000000..e08f17ce --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/index/IndexBase.java @@ -0,0 +1,56 @@ +/* + * IndexBase + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.index; + +import org.tukaani.xz.common.Util; +import org.tukaani.xz.XZIOException; + +abstract class IndexBase { + private final XZIOException invalidIndexException; + long blocksSum = 0; + long uncompressedSum = 0; + long indexListSize = 0; + long recordCount = 0; + + IndexBase(XZIOException invalidIndexException) { + this.invalidIndexException = invalidIndexException; + } + + private long getUnpaddedIndexSize() { + // Index Indicator + Number of Records + List of Records + CRC32 + return 1 + Util.getVLISize(recordCount) + indexListSize + 4; + } + + public long getIndexSize() { + return (getUnpaddedIndexSize() + 3) & ~3; + } + + long getStreamSize() { + return Util.STREAM_HEADER_SIZE + blocksSum + getIndexSize() + + Util.STREAM_HEADER_SIZE; + } + + int getIndexPaddingSize() { + return (int)((4 - getUnpaddedIndexSize()) & 3); + } + + void add(long unpaddedSize, long uncompressedSize) throws XZIOException { + blocksSum += (unpaddedSize + 3) & ~3; + uncompressedSum += uncompressedSize; + indexListSize += Util.getVLISize(unpaddedSize) + + Util.getVLISize(uncompressedSize); + ++recordCount; + + if (blocksSum < 0 || uncompressedSum < 0 + || getIndexSize() > Util.BACKWARD_SIZE_MAX + || getStreamSize() < 0) + throw invalidIndexException; + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/index/IndexEncoder.java b/third_party/src/main/java/org/tukaani/xz/index/IndexEncoder.java new file mode 100644 index 00000000..d2453abb --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/index/IndexEncoder.java @@ -0,0 +1,59 @@ +/* + * IndexEncoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.index; + +import java.io.OutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.zip.CheckedOutputStream; +import org.tukaani.xz.common.EncoderUtil; +import org.tukaani.xz.XZIOException; + +public class IndexEncoder extends IndexBase { + private ArrayList records = new ArrayList(); + + public IndexEncoder() { + super(new XZIOException("XZ Stream or its Index has grown too big")); + } + + public void add(long unpaddedSize, long uncompressedSize) + throws XZIOException { + super.add(unpaddedSize, uncompressedSize); + records.add(new IndexRecord(unpaddedSize, uncompressedSize)); + } + + public void encode(OutputStream out) throws IOException { + java.util.zip.CRC32 crc32 = new java.util.zip.CRC32(); + CheckedOutputStream outChecked = new CheckedOutputStream(out, crc32); + + // Index Indicator + outChecked.write(0x00); + + // Number of Records + EncoderUtil.encodeVLI(outChecked, recordCount); + + // List of Records + for (Iterator i = records.iterator(); i.hasNext(); ) { + IndexRecord record = (IndexRecord)i.next(); + EncoderUtil.encodeVLI(outChecked, record.unpadded); + EncoderUtil.encodeVLI(outChecked, record.uncompressed); + } + + // Index Padding + for (int i = getIndexPaddingSize(); i > 0; --i) + outChecked.write(0x00); + + // CRC32 + long value = crc32.getValue(); + for (int i = 0; i < 4; ++i) + out.write((byte)(value >>> (i * 8))); + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/index/IndexHash.java b/third_party/src/main/java/org/tukaani/xz/index/IndexHash.java new file mode 100644 index 00000000..ab168c69 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/index/IndexHash.java @@ -0,0 +1,94 @@ +/* + * IndexHash + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.index; + +import java.io.InputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.zip.CheckedInputStream; +import org.tukaani.xz.common.DecoderUtil; +import org.tukaani.xz.XZIOException; +import org.tukaani.xz.CorruptedInputException; + +public class IndexHash extends IndexBase { + private org.tukaani.xz.check.Check hash; + + public IndexHash() { + super(new CorruptedInputException()); + + try { + hash = new org.tukaani.xz.check.SHA256(); + } catch (java.security.NoSuchAlgorithmException e) { + hash = new org.tukaani.xz.check.CRC32(); + } + } + + public void add(long unpaddedSize, long uncompressedSize) + throws XZIOException { + super.add(unpaddedSize, uncompressedSize); + + ByteBuffer buf = ByteBuffer.allocate(2 * 8); + buf.putLong(unpaddedSize); + buf.putLong(uncompressedSize); + hash.update(buf.array()); + } + + public void validate(InputStream in) throws IOException { + // Index Indicator (0x00) has already been read by BlockInputStream + // so add 0x00 to the CRC32 here. + java.util.zip.CRC32 crc32 = new java.util.zip.CRC32(); + crc32.update('\0'); + CheckedInputStream inChecked = new CheckedInputStream(in, crc32); + + // Get and validate the Number of Records field. + long storedRecordCount = DecoderUtil.decodeVLI(inChecked); + if (storedRecordCount != recordCount) + throw new CorruptedInputException("XZ Index is corrupt"); + + // Decode and hash the Index field and compare it to + // the hash value calculated from the decoded Blocks. + IndexHash stored = new IndexHash(); + for (long i = 0; i < recordCount; ++i) { + long unpaddedSize = DecoderUtil.decodeVLI(inChecked); + long uncompressedSize = DecoderUtil.decodeVLI(inChecked); + + try { + stored.add(unpaddedSize, uncompressedSize); + } catch (XZIOException e) { + throw new CorruptedInputException("XZ Index is corrupt"); + } + + if (stored.blocksSum > blocksSum + || stored.uncompressedSum > uncompressedSum + || stored.indexListSize > indexListSize) + throw new CorruptedInputException("XZ Index is corrupt"); + } + + if (stored.blocksSum != blocksSum + || stored.uncompressedSum != uncompressedSum + || stored.indexListSize != indexListSize + || !Arrays.equals(stored.hash.finish(), hash.finish())) + throw new CorruptedInputException("XZ Index is corrupt"); + + // Index Padding + DataInputStream inData = new DataInputStream(inChecked); + for (int i = getIndexPaddingSize(); i > 0; --i) + if (inData.readUnsignedByte() != 0x00) + throw new CorruptedInputException("XZ Index is corrupt"); + + // CRC32 + long value = crc32.getValue(); + for (int i = 0; i < 4; ++i) + if (((value >>> (i * 8)) & 0xFF) != inData.readUnsignedByte()) + throw new CorruptedInputException("XZ Index is corrupt"); + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/index/IndexRecord.java b/third_party/src/main/java/org/tukaani/xz/index/IndexRecord.java new file mode 100644 index 00000000..97629cc3 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/index/IndexRecord.java @@ -0,0 +1,20 @@ +/* + * IndexRecord + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.index; + +public class IndexRecord { + public final long unpadded; + public final long uncompressed; + + IndexRecord(long unpadded, long uncompressed) { + this.unpadded = unpadded; + this.uncompressed = uncompressed; + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/lz/LZDecoder.java b/third_party/src/main/java/org/tukaani/xz/lz/LZDecoder.java new file mode 100644 index 00000000..ec9af08b --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/lz/LZDecoder.java @@ -0,0 +1,126 @@ +/* + * LZDecoder + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.lz; + +import java.io.DataInputStream; +import java.io.IOException; +import org.tukaani.xz.CorruptedInputException; + +public final class LZDecoder { + private byte[] buf; + private int start = 0; + private int pos = 0; + private int full = 0; + private int limit = 0; + private int pendingLen = 0; + private int pendingDist = 0; + + public LZDecoder(int dictSize, byte[] presetDict) { + buf = new byte[dictSize]; + + if (presetDict != null) { + pos = Math.min(presetDict.length, dictSize); + full = pos; + start = pos; + System.arraycopy(presetDict, presetDict.length - pos, buf, 0, pos); + } + } + + public void reset() { + start = 0; + pos = 0; + full = 0; + limit = 0; + buf[buf.length - 1] = 0x00; + } + + public void setLimit(int outMax) { + if (buf.length - pos <= outMax) + limit = buf.length; + else + limit = pos + outMax; + } + + public boolean hasSpace() { + return pos < limit; + } + + public boolean hasPending() { + return pendingLen > 0; + } + + public int getPos() { + return pos; + } + + public int getByte(int dist) { + int offset = pos - dist - 1; + if (dist >= pos) + offset += buf.length; + + return buf[offset] & 0xFF; + } + + public void putByte(byte b) { + buf[pos++] = b; + + if (full < pos) + full = pos; + } + + public void repeat(int dist, int len) throws IOException { + if (dist < 0 || dist >= full) + throw new CorruptedInputException(); + + int left = Math.min(limit - pos, len); + pendingLen = len - left; + pendingDist = dist; + + int back = pos - dist - 1; + if (dist >= pos) + back += buf.length; + + do { + buf[pos++] = buf[back++]; + if (back == buf.length) + back = 0; + } while (--left > 0); + + if (full < pos) + full = pos; + } + + public void repeatPending() throws IOException { + if (pendingLen > 0) + repeat(pendingDist, pendingLen); + } + + public void copyUncompressed(DataInputStream inData, int len) + throws IOException { + int copySize = Math.min(buf.length - pos, len); + inData.readFully(buf, pos, copySize); + pos += copySize; + + if (full < pos) + full = pos; + } + + public int flush(byte[] out, int outOff) { + int copySize = pos - start; + if (pos == buf.length) + pos = 0; + + System.arraycopy(buf, start, out, outOff, copySize); + start = pos; + + return copySize; + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/lzma/LZMACoder.java b/third_party/src/main/java/org/tukaani/xz/lzma/LZMACoder.java new file mode 100644 index 00000000..674680e0 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/lzma/LZMACoder.java @@ -0,0 +1,139 @@ +/* + * LZMACoder + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.lzma; + +import org.tukaani.xz.rangecoder.RangeCoder; + +abstract class LZMACoder { + static final int POS_STATES_MAX = 1 << 4; + + static final int MATCH_LEN_MIN = 2; + static final int MATCH_LEN_MAX = MATCH_LEN_MIN + LengthCoder.LOW_SYMBOLS + + LengthCoder.MID_SYMBOLS + + LengthCoder.HIGH_SYMBOLS - 1; + + static final int DIST_STATES = 4; + static final int DIST_SLOTS = 1 << 6; + static final int DIST_MODEL_START = 4; + static final int DIST_MODEL_END = 14; + + static final int ALIGN_BITS = 4; + static final int ALIGN_SIZE = 1 << ALIGN_BITS; + static final int ALIGN_MASK = ALIGN_SIZE - 1; + + static final int REPS = 4; + + final int posMask; + + final int[] rep = new int[4]; + final State state = new State(); + + final short[][] isMatch = new short[State.STATES][POS_STATES_MAX]; + final short[] isRep = new short[State.STATES]; + final short[] isRep0 = new short[State.STATES]; + final short[] isRep1 = new short[State.STATES]; + final short[] isRep2 = new short[State.STATES]; + final short[][] isRep0Long = new short[State.STATES][POS_STATES_MAX]; + final short[][] distSlots = new short[DIST_STATES][DIST_SLOTS]; + final short[][] distSpecial = { new short[2], new short[2], + new short[4], new short[4], + new short[8], new short[8], + new short[16], new short[16], + new short[32], new short[32] }; + final short[] distAlign = new short[ALIGN_SIZE]; + + static final int getDistState(int len) { + return len < DIST_STATES + MATCH_LEN_MIN + ? len - MATCH_LEN_MIN + : DIST_STATES - 1; + } + + LZMACoder(int pb) { + posMask = (1 << pb) - 1; + } + + void reset() { + rep[0] = 0; + rep[1] = 0; + rep[2] = 0; + rep[3] = 0; + state.reset(); + + for (int i = 0; i < isMatch.length; ++i) + RangeCoder.initProbs(isMatch[i]); + + RangeCoder.initProbs(isRep); + RangeCoder.initProbs(isRep0); + RangeCoder.initProbs(isRep1); + RangeCoder.initProbs(isRep2); + + for (int i = 0; i < isRep0Long.length; ++i) + RangeCoder.initProbs(isRep0Long[i]); + + for (int i = 0; i < distSlots.length; ++i) + RangeCoder.initProbs(distSlots[i]); + + for (int i = 0; i < distSpecial.length; ++i) + RangeCoder.initProbs(distSpecial[i]); + + RangeCoder.initProbs(distAlign); + } + + + abstract class LiteralCoder { + private final int lc; + private final int literalPosMask; + + LiteralCoder(int lc, int lp) { + this.lc = lc; + this.literalPosMask = (1 << lp) - 1; + } + + final int getSubcoderIndex(int prevByte, int pos) { + int low = prevByte >> (8 - lc); + int high = (pos & literalPosMask) << lc; + return low + high; + } + + + abstract class LiteralSubcoder { + final short[] probs = new short[0x300]; + + void reset() { + RangeCoder.initProbs(probs); + } + } + } + + + abstract class LengthCoder { + static final int LOW_SYMBOLS = 1 << 3; + static final int MID_SYMBOLS = 1 << 3; + static final int HIGH_SYMBOLS = 1 << 8; + + final short[] choice = new short[2]; + final short[][] low = new short[POS_STATES_MAX][LOW_SYMBOLS]; + final short[][] mid = new short[POS_STATES_MAX][MID_SYMBOLS]; + final short[] high = new short[HIGH_SYMBOLS]; + + void reset() { + RangeCoder.initProbs(choice); + + for (int i = 0; i < low.length; ++i) + RangeCoder.initProbs(low[i]); + + for (int i = 0; i < low.length; ++i) + RangeCoder.initProbs(mid[i]); + + RangeCoder.initProbs(high); + } + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/lzma/LZMADecoder.java b/third_party/src/main/java/org/tukaani/xz/lzma/LZMADecoder.java new file mode 100644 index 00000000..68337277 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/lzma/LZMADecoder.java @@ -0,0 +1,189 @@ +/* + * LZMADecoder + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.lzma; + +import java.io.IOException; +import org.tukaani.xz.lz.LZDecoder; +import org.tukaani.xz.rangecoder.RangeDecoder; +import org.tukaani.xz.CorruptedInputException; + +public final class LZMADecoder extends LZMACoder { + private final LZDecoder lz; + private final RangeDecoder rc; + private final LiteralDecoder literalDecoder; + private final LengthDecoder matchLenDecoder = new LengthDecoder(); + private final LengthDecoder repLenDecoder = new LengthDecoder(); + + public LZMADecoder(LZDecoder lz, RangeDecoder rc, int lc, int lp, int pb) { + super(pb); + this.lz = lz; + this.rc = rc; + this.literalDecoder = new LiteralDecoder(lc, lp); + reset(); + } + + public void reset() { + super.reset(); + literalDecoder.reset(); + matchLenDecoder.reset(); + repLenDecoder.reset(); + } + + public void decode() throws IOException { + lz.repeatPending(); + + while (lz.hasSpace()) { + int posState = lz.getPos() & posMask; + + if (rc.decodeBit(isMatch[state.get()], posState) == 0) { + literalDecoder.decode(); + } else { + int len = rc.decodeBit(isRep, state.get()) == 0 + ? decodeMatch(posState) + : decodeRepMatch(posState); + lz.repeat(rep[0], len); + } + } + + rc.normalize(); + + if (!rc.isInBufferOK()) + throw new CorruptedInputException(); + } + + private int decodeMatch(int posState) throws IOException { + state.updateMatch(); + + rep[3] = rep[2]; + rep[2] = rep[1]; + rep[1] = rep[0]; + + int len = matchLenDecoder.decode(posState); + int distSlot = rc.decodeBitTree(distSlots[getDistState(len)]); + + if (distSlot < DIST_MODEL_START) { + rep[0] = distSlot; + } else { + int limit = (distSlot >> 1) - 1; + rep[0] = (2 | (distSlot & 1)) << limit; + + if (distSlot < DIST_MODEL_END) { + rep[0] |= rc.decodeReverseBitTree( + distSpecial[distSlot - DIST_MODEL_START]); + } else { + rep[0] |= rc.decodeDirectBits(limit - ALIGN_BITS) + << ALIGN_BITS; + rep[0] |= rc.decodeReverseBitTree(distAlign); + } + } + + return len; + } + + private int decodeRepMatch(int posState) throws IOException { + if (rc.decodeBit(isRep0, state.get()) == 0) { + if (rc.decodeBit(isRep0Long[state.get()], posState) == 0) { + state.updateShortRep(); + return 1; + } + } else { + int tmp; + + if (rc.decodeBit(isRep1, state.get()) == 0) { + tmp = rep[1]; + } else { + if (rc.decodeBit(isRep2, state.get()) == 0) { + tmp = rep[2]; + } else { + tmp = rep[3]; + rep[3] = rep[2]; + } + + rep[2] = rep[1]; + } + + rep[1] = rep[0]; + rep[0] = tmp; + } + + state.updateLongRep(); + + return repLenDecoder.decode(posState); + } + + + private class LiteralDecoder extends LiteralCoder { + LiteralSubdecoder[] subdecoders; + + LiteralDecoder(int lc, int lp) { + super(lc, lp); + + subdecoders = new LiteralSubdecoder[1 << (lc + lp)]; + for (int i = 0; i < subdecoders.length; ++i) + subdecoders[i] = new LiteralSubdecoder(); + } + + void reset() { + for (int i = 0; i < subdecoders.length; ++i) + subdecoders[i].reset(); + } + + void decode() throws IOException { + int i = getSubcoderIndex(lz.getByte(0), lz.getPos()); + subdecoders[i].decode(); + } + + + private class LiteralSubdecoder extends LiteralSubcoder { + void decode() throws IOException { + int symbol = 1; + + if (state.isLiteral()) { + do { + symbol = (symbol << 1) | rc.decodeBit(probs, symbol); + } while (symbol < 0x100); + + } else { + int matchByte = lz.getByte(rep[0]); + int offset = 0x100; + int matchBit; + int bit; + + do { + matchByte <<= 1; + matchBit = matchByte & offset; + bit = rc.decodeBit(probs, offset + matchBit + symbol); + symbol = (symbol << 1) | bit; + offset &= (0 - bit) ^ ~matchBit; + } while (symbol < 0x100); + } + + lz.putByte((byte)symbol); + state.updateLiteral(); + } + } + } + + + private class LengthDecoder extends LengthCoder { + int decode(int posState) throws IOException { + if (rc.decodeBit(choice, 0) == 0) + return rc.decodeBitTree(low[posState]) + MATCH_LEN_MIN; + + if (rc.decodeBit(choice, 1) == 0) + return rc.decodeBitTree(mid[posState]) + + MATCH_LEN_MIN + LOW_SYMBOLS; + + return rc.decodeBitTree(high) + + MATCH_LEN_MIN + LOW_SYMBOLS + MID_SYMBOLS; + } + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/lzma/State.java b/third_party/src/main/java/org/tukaani/xz/lzma/State.java new file mode 100644 index 00000000..43895ab0 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/lzma/State.java @@ -0,0 +1,65 @@ +/* + * State + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.lzma; + +final class State { + static final int STATES = 12; + + private static final int LIT_STATES = 7; + + private static final int LIT_LIT = 0; + private static final int MATCH_LIT_LIT = 1; + private static final int REP_LIT_LIT = 2; + private static final int SHORTREP_LIT_LIT = 3; + private static final int MATCH_LIT = 4; + private static final int REP_LIT = 5; + private static final int SHORTREP_LIT = 6; + private static final int LIT_MATCH = 7; + private static final int LIT_LONGREP = 8; + private static final int LIT_SHORTREP = 9; + private static final int NONLIT_MATCH = 10; + private static final int NONLIT_REP = 11; + + private int state; + + void reset() { + state = LIT_LIT; + } + + int get() { + return state; + } + + void updateLiteral() { + if (state <= SHORTREP_LIT_LIT) + state = LIT_LIT; + else if (state <= LIT_SHORTREP) + state -= 3; + else + state -= 6; + } + + void updateMatch() { + state = state < LIT_STATES ? LIT_MATCH : NONLIT_MATCH; + } + + void updateLongRep() { + state = state < LIT_STATES ? LIT_LONGREP : NONLIT_REP; + } + + void updateShortRep() { + state = state < LIT_STATES ? LIT_SHORTREP : NONLIT_REP; + } + + boolean isLiteral() { + return state < LIT_STATES; + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/package-info.java b/third_party/src/main/java/org/tukaani/xz/package-info.java new file mode 100644 index 00000000..8c6caea2 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/package-info.java @@ -0,0 +1,21 @@ +/** + * XZ data compression support. + *

    + * In the (very) long term, this aims to be a complete implementation of + * XZ data compression in Java. Currently only streamed decompression is + * supported. + *

    + * For the latest source code, see the + * home page of XZ in Java. + * + *

    Decompression notes

    + * + * If you are decompressing complete files and your application knows + * exactly how much uncompressed data there should be, it is still good + * to try reading one more byte by calling read() and checking + * that it returns -1. This way the decompressor will parse the + * file footers and verify the integrity checks, giving the caller more + * confidence that the uncompressed data is valid. (This advice seems to + * apply to java.util.zip.GZIPInputStream too.) + */ +package org.tukaani.xz; diff --git a/third_party/src/main/java/org/tukaani/xz/rangecoder/RangeCoder.java b/third_party/src/main/java/org/tukaani/xz/rangecoder/RangeCoder.java new file mode 100644 index 00000000..3ce4fa77 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/rangecoder/RangeCoder.java @@ -0,0 +1,25 @@ +/* + * RangeCoder + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.rangecoder; + +public abstract class RangeCoder { + static final int SHIFT_BITS = 8; + static final int TOP_MASK = 0xFF000000; + static final int BIT_MODEL_TOTAL_BITS = 11; + static final int BIT_MODEL_TOTAL = 1 << BIT_MODEL_TOTAL_BITS; + static final short PROB_INIT = (short)(BIT_MODEL_TOTAL / 2); + static final int MOVE_BITS = 5; + + public static final void initProbs(short[] probs) { + for (int i = 0; i < probs.length; ++i) + probs[i] = PROB_INIT; + } +} diff --git a/third_party/src/main/java/org/tukaani/xz/rangecoder/RangeDecoder.java b/third_party/src/main/java/org/tukaani/xz/rangecoder/RangeDecoder.java new file mode 100644 index 00000000..f9ea4e56 --- /dev/null +++ b/third_party/src/main/java/org/tukaani/xz/rangecoder/RangeDecoder.java @@ -0,0 +1,129 @@ +/* + * RangeDecoder + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.rangecoder; + +import java.io.DataInputStream; +import java.io.IOException; +import org.tukaani.xz.CorruptedInputException; + +public final class RangeDecoder extends RangeCoder { + private static final int INIT_SIZE = 5; + + private final byte[] buf; + private int pos = 0; + private int end = 0; + + private int range = 0; + private int code = 0; + + public RangeDecoder(int inputSizeMax) { + buf = new byte[inputSizeMax - INIT_SIZE]; + } + + public void prepareInputBuffer(DataInputStream in, int len) + throws IOException { + if (len < INIT_SIZE) + throw new CorruptedInputException(); + + if (in.readUnsignedByte() != 0x00) + throw new CorruptedInputException(); + + code = in.readInt(); + range = 0xFFFFFFFF; + + pos = 0; + end = len - INIT_SIZE; + in.readFully(buf, 0, end); + } + + public boolean isInBufferOK() { + return pos <= end; + } + + public boolean isFinished() { + return pos == end && code == 0; + } + + public void normalize() throws IOException { + if ((range & TOP_MASK) == 0) { + try { + // If the input is corrupt, this might throw + // ArrayIndexOutOfBoundsException. + code = (code << SHIFT_BITS) | (buf[pos++] & 0xFF); + range <<= SHIFT_BITS; + } catch (ArrayIndexOutOfBoundsException e) { + throw new CorruptedInputException(); + } + } + } + + public int decodeBit(short[] probs, int index) throws IOException { + normalize(); + + int prob = probs[index]; + int bound = (range >>> BIT_MODEL_TOTAL_BITS) * prob; + int bit; + + // Compare code and bound as if they were unsigned 32-bit integers. + if ((code ^ 0x80000000) < (bound ^ 0x80000000)) { + range = bound; + probs[index] = (short)( + prob + ((BIT_MODEL_TOTAL - prob) >>> MOVE_BITS)); + bit = 0; + } else { + range -= bound; + code -= bound; + probs[index] = (short)(prob - (prob >>> MOVE_BITS)); + bit = 1; + } + + return bit; + } + + public int decodeBitTree(short[] probs) throws IOException { + int symbol = 1; + + do { + symbol = (symbol << 1) | decodeBit(probs, symbol); + } while (symbol < probs.length); + + return symbol - probs.length; + } + + public int decodeReverseBitTree(short[] probs) throws IOException { + int symbol = 1; + int i = 0; + int result = 0; + + do { + int bit = decodeBit(probs, symbol); + symbol = (symbol << 1) | bit; + result |= bit << i++; + } while (symbol < probs.length); + + return result; + } + + public int decodeDirectBits(int count) throws IOException { + int result = 0; + + do { + normalize(); + + range >>>= 1; + int t = (code - range) >>> 31; + code -= range & (t - 1); + result = (result << 1) | (1 - t); + } while (--count != 0); + + return result; + } +}