ClearURLs: use rules from ClearURLs browser extension (#3657)

Co-authored-by: V <vendicated@riseup.net>
This commit is contained in:
thororen 2025-09-08 21:35:01 -04:00 committed by GitHub
parent 98058f0cae
commit 8eabb11125
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 90 additions and 231 deletions

View file

@ -0,0 +1,11 @@
# ClearURLs
Automatically removes tracking elements from URLs you send.
Uses data from the [ClearURLs browser extension](https://clearurls.xyz/).
## Example
**Before:** `https://www.amazon.com/dp/exampleProduct/ref=sxin_0_pb?__mk_de_DE=ÅMÅŽÕÑ&keywords=tea&pd_rd_i=exampleProduct&pd_rd_r=8d39e4cd-1e4f-43db-b6e7-72e969a84aa5&pd_rd_w=1pcKM&pd_rd_wg=hYrNl&pf_rd_p=50bbfd25-5ef7-41a2-68d6-74d854b30e30&pf_rd_r=0GMWD0YYKA7XFGX55ADP&qid=1517757263&rnid=2914120011`
**After:** `https://www.amazon.com/dp/exampleProduct/`

View file

@ -1,159 +0,0 @@
/*
* Vencord, a modification for Discord's desktop app
* Copyright (c) 2022 Vendicated and contributors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
export const defaultRules = [
"action_object_map",
"action_type_map",
"action_ref_map",
"spm@*.aliexpress.com",
"scm@*.aliexpress.com",
"aff_platform",
"aff_trace_key",
"algo_expid@*.aliexpress.*",
"algo_pvid@*.aliexpress.*",
"btsid",
"ws_ab_test",
"pd_rd_*@amazon.*",
"_encoding@amazon.*",
"psc@amazon.*",
"tag@amazon.*",
"ref_@amazon.*",
"pf_rd_*@amazon.*",
"pf@amazon.*",
"crid@amazon.*",
"keywords@amazon.*",
"sprefix@amazon.*",
"sr@amazon.*",
"ie@amazon.*",
"node@amazon.*",
"qid@amazon.*",
"callback@bilibili.com",
"cvid@bing.com",
"form@bing.com",
"sk@bing.com",
"sp@bing.com",
"sc@bing.com",
"qs@bing.com",
"pq@bing.com",
"sc_cid",
"mkt_tok",
"trk",
"trkCampaign",
"ga_*",
"gclid",
"gclsrc",
"hmb_campaign",
"hmb_medium",
"hmb_source",
"spReportId",
"spJobID",
"spUserID",
"spMailingID",
"itm_*",
"s_cid",
"elqTrackId",
"elqTrack",
"assetType",
"assetId",
"recipientId",
"campaignId",
"siteId",
"mc_cid",
"mc_eid",
"pk_*",
"sc_campaign",
"sc_channel",
"sc_content",
"sc_medium",
"sc_outcome",
"sc_geo",
"sc_country",
"nr_email_referer",
"vero_conv",
"vero_id",
"yclid",
"_openstat",
"mbid",
"cmpid",
"cid",
"c_id",
"campaign_id",
"Campaign",
"hash@ebay.*",
"fb_action_ids",
"fb_action_types",
"fb_ref",
"fb_source",
"fbclid",
"refsrc@facebook.com",
"hrc@facebook.com",
"gs_l",
"gs_lcp@google.*",
"ved@google.*",
"ei@google.*",
"sei@google.*",
"gws_rd@google.*",
"gs_gbg@google.*",
"gs_mss@google.*",
"gs_rn@google.*",
"_hsenc",
"_hsmi",
"__hssc",
"__hstc",
"hsCtaTracking",
"source@sourceforge.net",
"position@sourceforge.net",
"t@*.twitter.com",
"s@*.twitter.com",
"ref_*@*.twitter.com",
"t@*.x.com",
"s@*.x.com",
"ref_*@*.x.com",
"t@*.fixupx.com",
"s@*.fixupx.com",
"ref_*@*.fixupx.com",
"t@*.fxtwitter.com",
"s@*.fxtwitter.com",
"ref_*@*.fxtwitter.com",
"t@*.twittpr.com",
"s@*.twittpr.com",
"ref_*@*.twittpr.com",
"t@*.fixvx.com",
"s@*.fixvx.com",
"ref_*@*.fixvx.com",
"tt_medium",
"tt_content",
"lr@yandex.*",
"redircnt@yandex.*",
"feature@*.youtube.com",
"kw@*.youtube.com",
"si@*.youtube.com",
"pp@*.youtube.com",
"si@*.youtu.be",
"wt_zmc",
"utm_source",
"utm_content",
"utm_medium",
"utm_campaign",
"utm_term",
"si@open.spotify.com",
"igshid",
"igsh",
"share_id@reddit.com",
"si@soundcloud.com",
];

View file

@ -22,77 +22,74 @@ import {
import { Devs } from "@utils/constants";
import definePlugin from "@utils/types";
import { defaultRules } from "./defaultRules";
const CLEAR_URLS_JSON_URL = "https://raw.githubusercontent.com/ClearURLs/Rules/master/data.min.json";
// From lodash
const reRegExpChar = /[\\^$.*+?()[\]{}|]/g;
const reHasRegExpChar = RegExp(reRegExpChar.source);
interface Provider {
urlPattern: string;
completeProvider: boolean;
rules?: string[];
rawRules?: string[];
referralMarketing?: string[];
exceptions?: string[];
redirections?: string[];
forceRedirection?: boolean;
}
interface ClearUrlsData {
providers: Record<string, Provider>;
}
interface RuleSet {
name: string;
urlPattern: RegExp;
rules?: RegExp[];
rawRules?: RegExp[];
exceptions?: RegExp[];
}
export default definePlugin({
name: "ClearURLs",
description: "Removes tracking garbage from URLs",
authors: [Devs.adryd],
description: "Automatically removes tracking elements from URLs you send",
authors: [Devs.adryd, Devs.thororen],
start() {
this.createRules();
rules: [] as RuleSet[],
async start() {
await this.createRules();
},
stop() {
this.rules = [];
},
onBeforeMessageSend(_, msg) {
return this.onSend(msg);
return this.cleanMessage(msg);
},
onBeforeMessageEdit(_cid, _mid, msg) {
return this.onSend(msg);
return this.cleanMessage(msg);
},
escapeRegExp(str: string) {
return (str && reHasRegExpChar.test(str))
? str.replace(reRegExpChar, "\\$&")
: (str || "");
},
async createRules() {
const res = await fetch(CLEAR_URLS_JSON_URL)
.then(res => res.json()) as ClearUrlsData;
createRules() {
// Can be extended upon once user configs are available
// Eg. (useDefaultRules: boolean, customRules: Array[string])
const rules = defaultRules;
this.rules = [];
this.universalRules = new Set();
this.rulesByHost = new Map();
this.hostRules = new Map();
for (const [name, provider] of Object.entries(res.providers)) {
const urlPattern = new RegExp(provider.urlPattern, "i");
for (const rule of rules) {
const splitRule = rule.split("@");
const paramRule = new RegExp(
"^" +
this.escapeRegExp(splitRule[0]).replace(/\\\*/, ".+?") +
"$"
);
const rules = provider.rules?.map(rule => new RegExp(rule, "i"));
const rawRules = provider.rawRules?.map(rule => new RegExp(rule, "i"));
const exceptions = provider.exceptions?.map(ex => new RegExp(ex, "i"));
if (!splitRule[1]) {
this.universalRules.add(paramRule);
continue;
}
const hostRule = new RegExp(
"^(www\\.)?" +
this.escapeRegExp(splitRule[1])
.replace(/\\\./, "\\.")
.replace(/^\\\*\\\./, "(.+?\\.)?")
.replace(/\\\*/, ".+?") +
"$"
);
const hostRuleIndex = hostRule.toString();
this.hostRules.set(hostRuleIndex, hostRule);
if (this.rulesByHost.get(hostRuleIndex) == null) {
this.rulesByHost.set(hostRuleIndex, new Set());
}
this.rulesByHost.get(hostRuleIndex).add(paramRule);
}
},
removeParam(rule: string | RegExp, param: string, parent: URLSearchParams) {
if (param === rule || rule instanceof RegExp && rule.test(param)) {
parent.delete(param);
this.rules.push({
name,
urlPattern,
rules,
rawRules,
exceptions,
});
}
},
@ -106,34 +103,40 @@ export default definePlugin({
}
// Cheap way to check if there are any search params
if (url.searchParams.entries().next().done) {
// If there are none, we don't need to modify anything
return match;
}
if (url.searchParams.entries().next().done) return match;
// Check all universal rules
this.universalRules.forEach(rule => {
url.searchParams.forEach((_value, param, parent) => {
this.removeParam(rule, param, parent);
});
});
// Check rules for each provider that matches
this.rules.forEach(({ urlPattern, exceptions, rawRules, rules }) => {
if (!urlPattern.test(url.href) || exceptions?.some(ex => ex.test(url.href))) return;
// Check rules for each hosts that match
this.hostRules.forEach((regex, hostRuleName) => {
if (!regex.test(url.hostname)) return;
this.rulesByHost.get(hostRuleName).forEach(rule => {
url.searchParams.forEach((_value, param, parent) => {
this.removeParam(rule, param, parent);
const toDelete: string[] = [];
if (rules) {
// Add matched params to delete list
url.searchParams.forEach((_, param) => {
if (rules.some(rule => rule.test(param))) {
toDelete.push(param);
}
});
}
// Delete matched params from list
toDelete.forEach(param => url.searchParams.delete(param));
// Match and remove any raw rules
let cleanedUrl = url.href;
rawRules?.forEach(rawRule => {
cleanedUrl = cleanedUrl.replace(rawRule, "");
});
url = new URL(cleanedUrl);
});
return url.toString();
},
onSend(msg: MessageObject) {
cleanMessage(msg: MessageObject) {
// Only run on messages that contain URLs
if (msg.content.match(/http(s)?:\/\//)) {
if (/http(s)?:\/\//.test(msg.content)) {
msg.content = msg.content.replace(
/(https?:\/\/[^\s<]+[^<.,:;"'>)|\]\s])/g,
match => this.replacer(match)

View file

@ -602,6 +602,10 @@ export const Devs = /* #__PURE__*/ Object.freeze({
name: "Cootshk",
id: 921605971577548820n
},
thororen: {
name: "thororen",
id: 848339671629299742n
},
} satisfies Record<string, Dev>);
// iife so #__PURE__ works correctly