Files
simple-mail-cleaner/backend/src/mail/newsletter.ts
2026-01-23 14:01:49 +01:00

99 lines
3.5 KiB
TypeScript

export type NewsletterConfig = {
threshold: number;
headerKeys: string[];
subjectTokens: string[];
fromTokens: string[];
weightHeader: number;
weightPrecedence: number;
weightSubject: number;
weightFrom: number;
};
const DEFAULT_CONFIG: NewsletterConfig = {
threshold: 2,
headerKeys: [
"list-unsubscribe",
"list-id",
"list-help",
"list-archive",
"list-post",
"list-owner",
"list-subscribe",
"list-unsubscribe-post"
],
subjectTokens: ["newsletter", "unsubscribe", "update", "news", "digest"],
fromTokens: ["newsletter", "no-reply", "noreply", "news", "updates"],
weightHeader: 1,
weightPrecedence: 1,
weightSubject: 1,
weightFrom: 1
};
const headerValue = (headers: Map<string, string>, key: string) =>
headers.get(key.toLowerCase()) ?? "";
const containsAny = (value: string, tokens: string[]) =>
tokens.some((token) => value.includes(token));
const normalizeList = (items: string[]) =>
items.map((item) => item.trim().toLowerCase()).filter(Boolean);
export const detectNewsletter = (params: {
headers: Map<string, string>;
subject?: string | null;
from?: string | null;
config?: Partial<NewsletterConfig>;
}) => {
const subject = (params.subject ?? "").toLowerCase();
const from = (params.from ?? "").toLowerCase();
const headers = params.headers;
const config: NewsletterConfig = {
threshold: params.config?.threshold ?? DEFAULT_CONFIG.threshold,
headerKeys: normalizeList(params.config?.headerKeys ?? DEFAULT_CONFIG.headerKeys),
subjectTokens: normalizeList(params.config?.subjectTokens ?? DEFAULT_CONFIG.subjectTokens),
fromTokens: normalizeList(params.config?.fromTokens ?? DEFAULT_CONFIG.fromTokens),
weightHeader: params.config?.weightHeader ?? DEFAULT_CONFIG.weightHeader,
weightPrecedence: params.config?.weightPrecedence ?? DEFAULT_CONFIG.weightPrecedence,
weightSubject: params.config?.weightSubject ?? DEFAULT_CONFIG.weightSubject,
weightFrom: params.config?.weightFrom ?? DEFAULT_CONFIG.weightFrom
};
const matchedHeaderKeys = config.headerKeys.filter((key) => headers.has(key));
const precedence = headerValue(headers, "precedence").toLowerCase();
const bulkHeader = headerValue(headers, "x-precedence").toLowerCase();
const precedenceHint = containsAny(precedence, ["bulk", "list"]) || containsAny(bulkHeader, ["bulk", "list"]);
const subjectMatches = config.subjectTokens.filter((token) => subject.includes(token));
const fromMatches = config.fromTokens.filter((token) => from.includes(token));
const headerScore = matchedHeaderKeys.length * config.weightHeader;
const precedenceScore = precedenceHint ? config.weightPrecedence : 0;
const subjectScore = subjectMatches.length ? config.weightSubject : 0;
const fromScore = fromMatches.length ? config.weightFrom : 0;
const score = headerScore + precedenceScore + subjectScore + fromScore;
return {
isNewsletter: score >= config.threshold,
score,
signals: {
headerKeys: matchedHeaderKeys,
precedenceHint,
subjectTokens: subjectMatches,
fromTokens: fromMatches,
scoreBreakdown: {
headerMatches: matchedHeaderKeys.length,
headerWeight: config.weightHeader,
headerScore,
precedenceWeight: config.weightPrecedence,
precedenceScore,
subjectMatches: subjectMatches.length,
subjectWeight: config.weightSubject,
subjectScore,
fromMatches: fromMatches.length,
fromWeight: config.weightFrom,
fromScore
}
}
};
};