Update index.html for search engine (#25)
Co-authored-by: vlofgren <vlofgren@gmail.com> Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/25
This commit is contained in:
parent
9474f39225
commit
5c2f2d558f
18
README.md
18
README.md
@ -3,14 +3,18 @@
|
||||
This is the source code for marginalia.nu, including the [search engine](https://search.marginalia.nu),
|
||||
the [MEMEX/gemini server](https://memex.marginalia.nu), the and the [encyclopedia service](https://encyclopedia.marginalia.nu).
|
||||
|
||||
The canonical git server for this project is [https://git.marginalia.nu](https://git.marginalia.nu),
|
||||
it is fine to mirror it on other hosts, but if you have issues or questions
|
||||
that is where you want to go.
|
||||
The aim of the project is to develop new and alternative discovery methods for the Internet.
|
||||
It's an experimental workshop as much as it is a public service, the overarching goal is to
|
||||
elevate the more human, non-commercial sides of the Internet.
|
||||
|
||||
As it stands now, the project is a bit of a mess as it wasn't developed
|
||||
with the intention of going open source, a lot of tests and so on make
|
||||
assumptions about the directory structure, much configuration is hard coded
|
||||
and so on. Please stand by. A lot of the mess is fairly superficial.
|
||||
The canonical git server for this project is [https://git.marginalia.nu](https://git.marginalia.nu).
|
||||
It is fine to mirror it on other hosts, but if you have issues or questions
|
||||
git.marginalia.nu is where you want to go.
|
||||
|
||||
As it stands now, the project is still being set up and is a bit of a mess as
|
||||
it wasn't developed with the intention of going open source, a lot of tests
|
||||
and so on make assumptions about the directory structure, much configuration
|
||||
is hard coded and so on. Please stand by. A lot of the mess is fairly superficial.
|
||||
|
||||
## Contributing
|
||||
|
||||
|
@ -66,6 +66,6 @@ public class RateLimiter {
|
||||
private Bucket createBucket() {
|
||||
var refill = Refill.greedy(1, Duration.ofSeconds(refillRate));
|
||||
var bw = Bandwidth.classic(capacity, refill);
|
||||
return Bucket4j.builder().addLimit(bw).build();
|
||||
return Bucket.builder().addLimit(bw).build();
|
||||
}
|
||||
}
|
||||
|
@ -331,8 +331,6 @@ public class EdgeIndexService extends Service {
|
||||
final Map<Integer, List<EdgeSearchResultItem>> results = new HashMap<>();
|
||||
final DomainResultCountFilter localFilter = new DomainResultCountFilter(specs.limitByDomain);
|
||||
|
||||
boolean debug = sq.searchTermsExclude.contains("special:debug");
|
||||
|
||||
for (int i : specBuckets) {
|
||||
int foundResultsCount = results.values().stream().mapToInt(List::size).sum();
|
||||
|
||||
@ -341,37 +339,15 @@ public class EdgeIndexService extends Service {
|
||||
|
||||
List<EdgeSearchResultItem> resultsForBucket = new ArrayList<>(specs.limitByBucket);
|
||||
|
||||
if (debug) {
|
||||
getQuery(i, budget, sq.block, lv -> localFilter.filterRawValue(i, lv), searchTerms)
|
||||
.peek(l -> logger.info("Considering {}", Long.toHexString(l)))
|
||||
.mapToObj(id -> new EdgeSearchResultItem(i, sq.termSize(), id))
|
||||
.filter(ri -> {
|
||||
if (seenResults.contains(ri.url.getId())) {
|
||||
logger.info("Seen before: {}", Integer.toHexString(ri.url.getId()));
|
||||
return false;
|
||||
}
|
||||
else if (!localFilter.test(i, domainCountFilter, ri)) {
|
||||
logger.info("DCF: {} - {}:{}", ri.blockId, Integer.toHexString(ri.domain.getId()), Integer.toHexString(ri.url.getId()));
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
})
|
||||
.limit(specs.limitTotal * 3L)
|
||||
.distinct()
|
||||
.limit(Math.min(specs.limitByBucket
|
||||
- results.values().stream().mapToInt(Collection::size).sum(), limit - foundResultsCount))
|
||||
.forEach(resultsForBucket::add);
|
||||
}
|
||||
else {
|
||||
getQuery(i, budget, sq.block, lv -> localFilter.filterRawValue(i, lv), searchTerms)
|
||||
.mapToObj(id -> new EdgeSearchResultItem(i, sq.termSize(), id))
|
||||
.filter(ri -> !seenResults.contains(ri.url.getId()) && localFilter.test(i, domainCountFilter, ri))
|
||||
.limit(specs.limitTotal * 3L)
|
||||
.distinct()
|
||||
.limit(Math.min(specs.limitByBucket
|
||||
- results.values().stream().mapToInt(Collection::size).sum(), limit - foundResultsCount))
|
||||
.forEach(resultsForBucket::add);
|
||||
}
|
||||
getQuery(i, budget, sq.block, lv -> localFilter.filterRawValue(i, lv), searchTerms)
|
||||
.mapToObj(id -> new EdgeSearchResultItem(i, sq.termSize(), id))
|
||||
.filter(ri -> !seenResults.contains(ri.url.getId()) && localFilter.test(i, domainCountFilter, ri))
|
||||
.limit(specs.limitTotal * 3L)
|
||||
.distinct()
|
||||
.limit(Math.min(specs.limitByBucket
|
||||
- results.values().stream().mapToInt(Collection::size).sum(), limit - foundResultsCount))
|
||||
.forEach(resultsForBucket::add);
|
||||
|
||||
|
||||
for (var result : resultsForBucket) {
|
||||
seenResults.add(result.url.getId());
|
||||
|
@ -61,8 +61,12 @@
|
||||
existed. </p>
|
||||
<p>
|
||||
The software for this search engine is all custom-built, and all crawling and indexing is
|
||||
done in-house.
|
||||
done in-house. The project is open source. Feel free to poke about in the <a
|
||||
href="https://git.marginalia.nu/marginalia/marginalia.nu">source code</a> or contribute
|
||||
to the development!
|
||||
</p>
|
||||
<p>Consider <a href="https://memex.marginalia.nu/projects/edge/supporting.gmi">supporting the
|
||||
project</a>!</p>
|
||||
</div>
|
||||
<div class="utils">
|
||||
<a href="https://memex.marginalia.nu/projects/edge/about.gmi">Read More</a>
|
||||
@ -98,11 +102,6 @@
|
||||
<section class="card">
|
||||
<h2>Updates</h2>
|
||||
<div class="info">
|
||||
<p>☛ The web design of the search engine has been completely overhauled. For the most part, this should
|
||||
result in even smaller page loads, and better accessibility and easier navigation, but it may still
|
||||
be a bit rough in some browsers, if you do find any bugs or accessibility problems, please let me
|
||||
know. You can reach me at <tt><a href="mailto://kontakt@marginalia.nu">kontakt@marginalia.nu</a></tt>.
|
||||
</p>
|
||||
<p>☛ The <a href="https://search.marginalia.nu/explore/random">Random Mode</a> has been overhauled, and is
|
||||
quite entertaining. I encourage you to give it a spin. </p>
|
||||
<p>☛ A simple <a href="https://api.marginalia.nu/">public API</a> is now available.</p>
|
||||
@ -116,6 +115,8 @@
|
||||
<h2>Publicity, Discussion and Events</h2>
|
||||
<div class="info">
|
||||
<dl>
|
||||
<dt><a href="https://news.ycombinator.com/item?id=31536626" rel="nofollow">Marginalia Goes Open Source</a></dt>
|
||||
<dd>Hacker News, 2022-05-28</dd>
|
||||
<dt><a href="https://www.youtube.com/watch?v=rTSEr0cRJY8" rel="nofollow">You Should Check Out the Indie Web</a> 🎞️</dt>
|
||||
<dd>YouTube, You've Got Kat, 2022-03-15 </dd>
|
||||
<dt>
|
||||
@ -137,10 +138,10 @@
|
||||
</dt>
|
||||
<dd>Clive Thompson OneZero, 2021-09-16</dd>
|
||||
<dt>
|
||||
<a href="https://news.ycombinator.com/item?id=28550764" rel="nofollow">Hacker News Discussion</a>
|
||||
<a href="https://news.ycombinator.com/item?id=28550764" rel="nofollow"> A search engine that favors text-heavy sites and punishes modern web design</a>
|
||||
</dt>
|
||||
<dd>
|
||||
2021-09-16
|
||||
Hacker News, 2021-09-16
|
||||
</dd>
|
||||
</dl>
|
||||
</div>
|
||||
|
Loading…
Reference in New Issue
Block a user