ClearURLs: use rules from ClearURLs browser extension (#3657)

Co-authored-by: V <vendicated@riseup.net>
This commit is contained in:
thororen 2025-09-08 21:35:01 -04:00 committed by GitHub
parent 98058f0cae
commit 8eabb11125
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 90 additions and 231 deletions

View file

@ -0,0 +1,11 @@
# ClearURLs
Automatically removes tracking elements from URLs you send.
Uses data from the [ClearURLs browser extension](https://clearurls.xyz/).
## Example
**Before:** `https://www.amazon.com/dp/exampleProduct/ref=sxin_0_pb?__mk_de_DE=ÅMÅŽÕÑ&keywords=tea&pd_rd_i=exampleProduct&pd_rd_r=8d39e4cd-1e4f-43db-b6e7-72e969a84aa5&pd_rd_w=1pcKM&pd_rd_wg=hYrNl&pf_rd_p=50bbfd25-5ef7-41a2-68d6-74d854b30e30&pf_rd_r=0GMWD0YYKA7XFGX55ADP&qid=1517757263&rnid=2914120011`
**After:** `https://www.amazon.com/dp/exampleProduct/`

View file

@ -1,159 +0,0 @@
/*
* Vencord, a modification for Discord's desktop app
* Copyright (c) 2022 Vendicated and contributors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
export const defaultRules = [
"action_object_map",
"action_type_map",
"action_ref_map",
"spm@*.aliexpress.com",
"scm@*.aliexpress.com",
"aff_platform",
"aff_trace_key",
"algo_expid@*.aliexpress.*",
"algo_pvid@*.aliexpress.*",
"btsid",
"ws_ab_test",
"pd_rd_*@amazon.*",
"_encoding@amazon.*",
"psc@amazon.*",
"tag@amazon.*",
"ref_@amazon.*",
"pf_rd_*@amazon.*",
"pf@amazon.*",
"crid@amazon.*",
"keywords@amazon.*",
"sprefix@amazon.*",
"sr@amazon.*",
"ie@amazon.*",
"node@amazon.*",
"qid@amazon.*",
"callback@bilibili.com",
"cvid@bing.com",
"form@bing.com",
"sk@bing.com",
"sp@bing.com",
"sc@bing.com",
"qs@bing.com",
"pq@bing.com",
"sc_cid",
"mkt_tok",
"trk",
"trkCampaign",
"ga_*",
"gclid",
"gclsrc",
"hmb_campaign",
"hmb_medium",
"hmb_source",
"spReportId",
"spJobID",
"spUserID",
"spMailingID",
"itm_*",
"s_cid",
"elqTrackId",
"elqTrack",
"assetType",
"assetId",
"recipientId",
"campaignId",
"siteId",
"mc_cid",
"mc_eid",
"pk_*",
"sc_campaign",
"sc_channel",
"sc_content",
"sc_medium",
"sc_outcome",
"sc_geo",
"sc_country",
"nr_email_referer",
"vero_conv",
"vero_id",
"yclid",
"_openstat",
"mbid",
"cmpid",
"cid",
"c_id",
"campaign_id",
"Campaign",
"hash@ebay.*",
"fb_action_ids",
"fb_action_types",
"fb_ref",
"fb_source",
"fbclid",
"refsrc@facebook.com",
"hrc@facebook.com",
"gs_l",
"gs_lcp@google.*",
"ved@google.*",
"ei@google.*",
"sei@google.*",
"gws_rd@google.*",
"gs_gbg@google.*",
"gs_mss@google.*",
"gs_rn@google.*",
"_hsenc",
"_hsmi",
"__hssc",
"__hstc",
"hsCtaTracking",
"source@sourceforge.net",
"position@sourceforge.net",
"t@*.twitter.com",
"s@*.twitter.com",
"ref_*@*.twitter.com",
"t@*.x.com",
"s@*.x.com",
"ref_*@*.x.com",
"t@*.fixupx.com",
"s@*.fixupx.com",
"ref_*@*.fixupx.com",
"t@*.fxtwitter.com",
"s@*.fxtwitter.com",
"ref_*@*.fxtwitter.com",
"t@*.twittpr.com",
"s@*.twittpr.com",
"ref_*@*.twittpr.com",
"t@*.fixvx.com",
"s@*.fixvx.com",
"ref_*@*.fixvx.com",
"tt_medium",
"tt_content",
"lr@yandex.*",
"redircnt@yandex.*",
"feature@*.youtube.com",
"kw@*.youtube.com",
"si@*.youtube.com",
"pp@*.youtube.com",
"si@*.youtu.be",
"wt_zmc",
"utm_source",
"utm_content",
"utm_medium",
"utm_campaign",
"utm_term",
"si@open.spotify.com",
"igshid",
"igsh",
"share_id@reddit.com",
"si@soundcloud.com",
];

View file

@ -22,77 +22,74 @@ import {
import { Devs } from "@utils/constants"; import { Devs } from "@utils/constants";
import definePlugin from "@utils/types"; import definePlugin from "@utils/types";
import { defaultRules } from "./defaultRules"; const CLEAR_URLS_JSON_URL = "https://raw.githubusercontent.com/ClearURLs/Rules/master/data.min.json";
// From lodash interface Provider {
const reRegExpChar = /[\\^$.*+?()[\]{}|]/g; urlPattern: string;
const reHasRegExpChar = RegExp(reRegExpChar.source); completeProvider: boolean;
rules?: string[];
rawRules?: string[];
referralMarketing?: string[];
exceptions?: string[];
redirections?: string[];
forceRedirection?: boolean;
}
interface ClearUrlsData {
providers: Record<string, Provider>;
}
interface RuleSet {
name: string;
urlPattern: RegExp;
rules?: RegExp[];
rawRules?: RegExp[];
exceptions?: RegExp[];
}
export default definePlugin({ export default definePlugin({
name: "ClearURLs", name: "ClearURLs",
description: "Removes tracking garbage from URLs", description: "Automatically removes tracking elements from URLs you send",
authors: [Devs.adryd], authors: [Devs.adryd, Devs.thororen],
start() { rules: [] as RuleSet[],
this.createRules();
async start() {
await this.createRules();
},
stop() {
this.rules = [];
}, },
onBeforeMessageSend(_, msg) { onBeforeMessageSend(_, msg) {
return this.onSend(msg); return this.cleanMessage(msg);
}, },
onBeforeMessageEdit(_cid, _mid, msg) { onBeforeMessageEdit(_cid, _mid, msg) {
return this.onSend(msg); return this.cleanMessage(msg);
}, },
escapeRegExp(str: string) { async createRules() {
return (str && reHasRegExpChar.test(str)) const res = await fetch(CLEAR_URLS_JSON_URL)
? str.replace(reRegExpChar, "\\$&") .then(res => res.json()) as ClearUrlsData;
: (str || "");
},
createRules() { this.rules = [];
// Can be extended upon once user configs are available
// Eg. (useDefaultRules: boolean, customRules: Array[string])
const rules = defaultRules;
this.universalRules = new Set(); for (const [name, provider] of Object.entries(res.providers)) {
this.rulesByHost = new Map(); const urlPattern = new RegExp(provider.urlPattern, "i");
this.hostRules = new Map();
for (const rule of rules) { const rules = provider.rules?.map(rule => new RegExp(rule, "i"));
const splitRule = rule.split("@"); const rawRules = provider.rawRules?.map(rule => new RegExp(rule, "i"));
const paramRule = new RegExp( const exceptions = provider.exceptions?.map(ex => new RegExp(ex, "i"));
"^" +
this.escapeRegExp(splitRule[0]).replace(/\\\*/, ".+?") +
"$"
);
if (!splitRule[1]) { this.rules.push({
this.universalRules.add(paramRule); name,
continue; urlPattern,
} rules,
const hostRule = new RegExp( rawRules,
"^(www\\.)?" + exceptions,
this.escapeRegExp(splitRule[1]) });
.replace(/\\\./, "\\.")
.replace(/^\\\*\\\./, "(.+?\\.)?")
.replace(/\\\*/, ".+?") +
"$"
);
const hostRuleIndex = hostRule.toString();
this.hostRules.set(hostRuleIndex, hostRule);
if (this.rulesByHost.get(hostRuleIndex) == null) {
this.rulesByHost.set(hostRuleIndex, new Set());
}
this.rulesByHost.get(hostRuleIndex).add(paramRule);
}
},
removeParam(rule: string | RegExp, param: string, parent: URLSearchParams) {
if (param === rule || rule instanceof RegExp && rule.test(param)) {
parent.delete(param);
} }
}, },
@ -106,34 +103,40 @@ export default definePlugin({
} }
// Cheap way to check if there are any search params // Cheap way to check if there are any search params
if (url.searchParams.entries().next().done) { if (url.searchParams.entries().next().done) return match;
// If there are none, we don't need to modify anything
return match;
}
// Check all universal rules // Check rules for each provider that matches
this.universalRules.forEach(rule => { this.rules.forEach(({ urlPattern, exceptions, rawRules, rules }) => {
url.searchParams.forEach((_value, param, parent) => { if (!urlPattern.test(url.href) || exceptions?.some(ex => ex.test(url.href))) return;
this.removeParam(rule, param, parent);
});
});
// Check rules for each hosts that match const toDelete: string[] = [];
this.hostRules.forEach((regex, hostRuleName) => {
if (!regex.test(url.hostname)) return; if (rules) {
this.rulesByHost.get(hostRuleName).forEach(rule => { // Add matched params to delete list
url.searchParams.forEach((_value, param, parent) => { url.searchParams.forEach((_, param) => {
this.removeParam(rule, param, parent); if (rules.some(rule => rule.test(param))) {
toDelete.push(param);
}
}); });
}
// Delete matched params from list
toDelete.forEach(param => url.searchParams.delete(param));
// Match and remove any raw rules
let cleanedUrl = url.href;
rawRules?.forEach(rawRule => {
cleanedUrl = cleanedUrl.replace(rawRule, "");
}); });
url = new URL(cleanedUrl);
}); });
return url.toString(); return url.toString();
}, },
onSend(msg: MessageObject) { cleanMessage(msg: MessageObject) {
// Only run on messages that contain URLs // Only run on messages that contain URLs
if (msg.content.match(/http(s)?:\/\//)) { if (/http(s)?:\/\//.test(msg.content)) {
msg.content = msg.content.replace( msg.content = msg.content.replace(
/(https?:\/\/[^\s<]+[^<.,:;"'>)|\]\s])/g, /(https?:\/\/[^\s<]+[^<.,:;"'>)|\]\s])/g,
match => this.replacer(match) match => this.replacer(match)

View file

@ -602,6 +602,10 @@ export const Devs = /* #__PURE__*/ Object.freeze({
name: "Cootshk", name: "Cootshk",
id: 921605971577548820n id: 921605971577548820n
}, },
thororen: {
name: "thororen",
id: 848339671629299742n
},
} satisfies Record<string, Dev>); } satisfies Record<string, Dev>);
// iife so #__PURE__ works correctly // iife so #__PURE__ works correctly