(adjacencies) Clean up AdjacenciesLoader
Make JDBC batching more consistent, also adds a test case for the loader.
This commit is contained in:
parent
b6253b03c2
commit
dc773c5c20
@ -34,4 +34,8 @@ dependencies {
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
|
||||
testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4')
|
||||
testImplementation 'org.testcontainers:mariadb:1.17.4'
|
||||
testImplementation 'org.testcontainers:junit-jupiter:1.17.4'
|
||||
}
|
||||
|
@ -13,17 +13,16 @@ public class AdjacenciesLoader {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(AdjacenciesLoader.class);
|
||||
|
||||
final HikariDataSource dataSource;
|
||||
final LinkedBlockingDeque<WebsiteAdjacenciesCalculator.DomainSimilarities> similaritiesLinkedBlockingDeque = new LinkedBlockingDeque<>(100);
|
||||
final Thread loaderThread;
|
||||
private final HikariDataSource dataSource;
|
||||
private final LinkedBlockingDeque<WebsiteAdjacenciesCalculator.DomainSimilarities> similaritiesLinkedBlockingDeque = new LinkedBlockingDeque<>(100);
|
||||
private final Thread loaderThread;
|
||||
|
||||
volatile boolean running = true;
|
||||
|
||||
public AdjacenciesLoader(HikariDataSource dataSource) {
|
||||
this.dataSource = dataSource;
|
||||
|
||||
loaderThread = new Thread(this::insertThreadRun, "Adjacencies Loader Thread");
|
||||
loaderThread.start();
|
||||
loaderThread = Thread.ofPlatform().name("Adjacencies Loader Thread").start(this::insertThreadRun);
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
@ -47,18 +46,30 @@ public class AdjacenciesLoader {
|
||||
INSERT INTO EC_DOMAIN_NEIGHBORS_TMP (DOMAIN_ID, NEIGHBOR_ID, RELATEDNESS) VALUES (?, ?, ?)
|
||||
"""))
|
||||
{
|
||||
while (running || !similaritiesLinkedBlockingDeque.isEmpty()) {
|
||||
var item = similaritiesLinkedBlockingDeque.pollFirst(1, TimeUnit.SECONDS);
|
||||
if (item == null) continue;
|
||||
int itemCount = 0;
|
||||
|
||||
for (; item != null; item = similaritiesLinkedBlockingDeque.pollFirst()) {
|
||||
while (running || !similaritiesLinkedBlockingDeque.isEmpty()) {
|
||||
for (var item = similaritiesLinkedBlockingDeque.pollFirst(1, TimeUnit.SECONDS);
|
||||
item != null;
|
||||
item = similaritiesLinkedBlockingDeque.pollFirst())
|
||||
{
|
||||
for (var similarity : item.similarities()) {
|
||||
stmt.setInt(1, item.domainId());
|
||||
stmt.setInt(2, similarity.domainId());
|
||||
stmt.setDouble(3, similarity.value());
|
||||
stmt.addBatch();
|
||||
itemCount++;
|
||||
}
|
||||
|
||||
if (itemCount++ > 1000) {
|
||||
stmt.executeBatch();
|
||||
itemCount = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Flush remaining items
|
||||
if (itemCount > 0) {
|
||||
stmt.executeBatch();
|
||||
}
|
||||
}
|
||||
|
@ -134,8 +134,8 @@ public class WebsiteAdjacenciesCalculator {
|
||||
return weightedProduct(weights, a, b) / Math.sqrt(a.mulAndSum(weights) * b.mulAndSum(weights));
|
||||
}
|
||||
|
||||
record DomainSimilarities(int domainId, List<DomainSimilarity> similarities) {};
|
||||
record DomainSimilarity(int domainId, double value) {};
|
||||
public record DomainSimilarities(int domainId, List<DomainSimilarity> similarities) {};
|
||||
public record DomainSimilarity(int domainId, double value) {};
|
||||
|
||||
@SneakyThrows
|
||||
private void findAdjacentDtoS(int domainId, Consumer<DomainSimilarities> andThen) {
|
||||
|
@ -0,0 +1,86 @@
|
||||
package nu.marginalia.adjacencies;
|
||||
|
||||
import com.zaxxer.hikari.HikariConfig;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import org.junit.jupiter.api.*;
|
||||
import org.junit.jupiter.api.parallel.Execution;
|
||||
import org.testcontainers.containers.MariaDBContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD;
|
||||
|
||||
@Testcontainers
|
||||
@Execution(SAME_THREAD)
|
||||
@Tag("slow")
|
||||
public class AdjacenciesLoaderTest {
|
||||
private static HikariDataSource dataSource;
|
||||
|
||||
@Container
|
||||
static MariaDBContainer<?> mariaDBContainer = new MariaDBContainer<>("mariadb")
|
||||
.withDatabaseName("WMSA_prod")
|
||||
.withUsername("wmsa")
|
||||
.withPassword("wmsa")
|
||||
.withNetworkAliases("mariadb");
|
||||
|
||||
@BeforeAll
|
||||
public static void setup() {
|
||||
|
||||
HikariConfig config = new HikariConfig();
|
||||
config.setJdbcUrl(mariaDBContainer.getJdbcUrl());
|
||||
config.setUsername("wmsa");
|
||||
config.setPassword("wmsa");
|
||||
|
||||
dataSource = new HikariDataSource(config);
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.createStatement()) {
|
||||
stmt.execute("""
|
||||
CREATE TABLE EC_DOMAIN_NEIGHBORS_2 (
|
||||
DOMAIN_ID INT NOT NULL,
|
||||
NEIGHBOR_ID INT NOT NULL,
|
||||
RELATEDNESS DOUBLE NOT NULL,
|
||||
PRIMARY KEY (DOMAIN_ID, NEIGHBOR_ID)
|
||||
)
|
||||
""");
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
public static void teardown() {
|
||||
dataSource.close();
|
||||
mariaDBContainer.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLoad() {
|
||||
var loader = new AdjacenciesLoader(dataSource);
|
||||
try {
|
||||
loader.load(new WebsiteAdjacenciesCalculator.DomainSimilarities(1,
|
||||
List.of(new WebsiteAdjacenciesCalculator.DomainSimilarity(2, 0.5),
|
||||
new WebsiteAdjacenciesCalculator.DomainSimilarity(3, 0.6)
|
||||
)));
|
||||
loader.stop();
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("SELECT * FROM EC_DOMAIN_NEIGHBORS_2 WHERE DOMAIN_ID=1");
|
||||
) {
|
||||
var rs = stmt.executeQuery();
|
||||
Assertions.assertTrue(rs.next());
|
||||
Assertions.assertEquals(2, rs.getInt(2));
|
||||
Assertions.assertEquals(0.5, rs.getDouble(3));
|
||||
Assertions.assertTrue(rs.next());
|
||||
Assertions.assertEquals(3, rs.getInt(2));
|
||||
Assertions.assertEquals(0.6, rs.getDouble(3));
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user