Big brain web developers were using onload and onerror handlers to load JS without script tags...

This commit is contained in:
Viktor Lofgren 2023-06-30 17:10:25 +02:00
parent 7d86586594
commit d2fdaafc7a
2 changed files with 28 additions and 5 deletions

View File

@ -36,8 +36,15 @@ public class DocumentValuator {
var scriptVisitor = new ScriptVisitor();
parsed.getElementsByTag("script").traverse(scriptVisitor);
int value = scriptVisitor.score();
return scriptVisitor.score();
for (var links : parsed.head().getElementsByTag("link")) {
if (links.hasAttr("onerror") || links.hasAttr("onload")) {
value += 1;
}
}
return value;
}
private static class ScriptVisitor implements NodeVisitor {
@ -56,7 +63,6 @@ public class DocumentValuator {
}
else if (node instanceof TextNode tn) {
visitScriptText(tn);
}
}
@ -73,8 +79,7 @@ public class DocumentValuator {
String srcAttr = el.attr("src");
if (srcAttr.contains("wp-content") || srcAttr.contains("wp-includes") || srcAttr.contains("jquery")) {
penalty += 0.49;
}
else if (!Strings.isBlank(srcAttr)) {
} else if (!Strings.isBlank(srcAttr)) {
penalty += 1;
}
}

View File

@ -76,6 +76,19 @@ public class FeatureExtractor {
}
}
// 500 IQ web developers use <link> error or load handlers
// sneakily load JS without explicit script tags
for (var link : doc.head().getElementsByTag("link")) {
if (link.hasAttr("onerror")) {
features.add(HtmlFeature.JS);
break;
}
if (link.hasAttr("onload")) {
features.add(HtmlFeature.JS);
break;
}
}
if (features.contains(HtmlFeature.JS) && adblockSimulator.hasAds(doc.clone())) {
features.add(HtmlFeature.ADVERTISEMENT);
}
@ -117,8 +130,13 @@ public class FeatureExtractor {
}
private boolean hasTrackingScript(Element scriptTag) {
return hasTrackingScript(scriptTag.attr("src"));
}
private boolean hasTrackingScript(String scriptText) {
for (var tracker : trackers) {
if (scriptTag.attr("src").contains(tracker)) {
if (scriptText.contains(tracker)) {
return true;
}
}