Aktueller Stand

This commit is contained in:
MDeeApp
2025-10-19 12:14:03 +02:00
parent 327a663bcf
commit 9745d38995
6 changed files with 1304 additions and 189 deletions

View File

@@ -19,7 +19,7 @@ const DEFAULT_PROFILE_NAMES = {
const PROFILE_SCOPE_COOKIE = 'fb_tracker_scope';
const PROFILE_SCOPE_MAX_AGE = 60 * 60 * 24 * 30; // 30 days
const FACEBOOK_TRACKING_PARAM_PREFIXES = ['__cft__', '__tn__', '__eep__', 'mibextid'];
const SEARCH_POST_HIDE_THRESHOLD = 3;
const SEARCH_POST_HIDE_THRESHOLD = 2;
const SEARCH_POST_RETENTION_DAYS = 90;
const screenshotDir = path.join(__dirname, 'data', 'screenshots');
@@ -277,10 +277,14 @@ function normalizeFacebookPostUrl(rawValue) {
const cleanedParams = new URLSearchParams();
parsed.searchParams.forEach((paramValue, paramKey) => {
const lowerKey = paramKey.toLowerCase();
if (FACEBOOK_TRACKING_PARAM_PREFIXES.some((prefix) => lowerKey.startsWith(prefix)) || lowerKey === 'set' || lowerKey === 'comment_id') {
return;
}
if (lowerKey === 'hoisted_section_header_type') {
const isSingleUnitParam = lowerKey === 's' && paramValue === 'single_unit';
if (
FACEBOOK_TRACKING_PARAM_PREFIXES.some((prefix) => lowerKey.startsWith(prefix))
|| lowerKey === 'set'
|| lowerKey === 'comment_id'
|| lowerKey === 'hoisted_section_header_type'
|| isSingleUnitParam
) {
return;
}
cleanedParams.append(paramKey, paramValue);
@@ -394,6 +398,34 @@ db.exec(`
);
`);
db.exec(`
CREATE TABLE IF NOT EXISTS post_urls (
id INTEGER PRIMARY KEY AUTOINCREMENT,
post_id TEXT NOT NULL,
url TEXT NOT NULL UNIQUE,
is_primary INTEGER NOT NULL DEFAULT 0,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (post_id) REFERENCES posts(id) ON DELETE CASCADE
);
`);
db.exec(`
CREATE INDEX IF NOT EXISTS idx_post_urls_post_id
ON post_urls(post_id);
`);
db.exec(`
CREATE UNIQUE INDEX IF NOT EXISTS idx_post_urls_primary
ON post_urls(post_id)
WHERE is_primary = 1;
`);
db.prepare(`
INSERT OR IGNORE INTO post_urls (post_id, url, is_primary)
SELECT id, url, 1
FROM posts
`).run();
db.exec(`
CREATE TABLE IF NOT EXISTS checks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -1132,13 +1164,59 @@ function cleanupExpiredSearchPosts() {
}
}
function expandPhotoUrlHostVariants(url) {
if (typeof url !== 'string' || !url) {
return [];
}
try {
const parsed = new URL(url);
const hostname = parsed.hostname.toLowerCase();
if (!hostname.endsWith('facebook.com')) {
return [];
}
const pathname = parsed.pathname.toLowerCase();
if (!pathname.startsWith('/photo')) {
return [];
}
const protocol = parsed.protocol || 'https:';
const search = parsed.search || '';
const hosts = ['www.facebook.com', 'facebook.com', 'm.facebook.com'];
const variants = [];
for (const candidateHost of hosts) {
if (candidateHost === hostname) {
continue;
}
const candidateUrl = `${protocol}//${candidateHost}${parsed.pathname}${search}`;
const normalized = normalizeFacebookPostUrl(candidateUrl);
if (normalized && normalized !== url && !variants.includes(normalized)) {
variants.push(normalized);
}
}
return variants;
} catch (error) {
return [];
}
}
function collectNormalizedFacebookUrls(primaryUrl, candidates = []) {
const normalized = [];
const pushNormalized = (value) => {
const pushNormalized = (value, expandVariants = true) => {
const normalizedUrl = normalizeFacebookPostUrl(value);
if (normalizedUrl && !normalized.includes(normalizedUrl)) {
normalized.push(normalizedUrl);
if (expandVariants) {
const photoVariants = expandPhotoUrlHostVariants(normalizedUrl);
for (const variant of photoVariants) {
pushNormalized(variant, false);
}
}
}
};
@@ -1155,6 +1233,105 @@ function collectNormalizedFacebookUrls(primaryUrl, candidates = []) {
return normalized;
}
function collectPostAlternateUrls(primaryUrl, candidates = []) {
const normalizedPrimary = normalizeFacebookPostUrl(primaryUrl);
if (!normalizedPrimary) {
return [];
}
const normalized = collectNormalizedFacebookUrls(normalizedPrimary, candidates);
return normalized.filter(url => url !== normalizedPrimary);
}
const insertPostUrlStmt = db.prepare(`
INSERT OR IGNORE INTO post_urls (post_id, url, is_primary)
VALUES (?, ?, ?)
`);
const setPrimaryPostUrlStmt = db.prepare(`
UPDATE post_urls
SET is_primary = CASE WHEN url = ? THEN 1 ELSE 0 END
WHERE post_id = ?
`);
const selectPostByPrimaryUrlStmt = db.prepare('SELECT * FROM posts WHERE url = ?');
const selectPostByAlternateUrlStmt = db.prepare(`
SELECT p.*
FROM post_urls pu
JOIN posts p ON p.id = pu.post_id
WHERE pu.url = ?
LIMIT 1
`);
const selectPostIdByPrimaryUrlStmt = db.prepare('SELECT id FROM posts WHERE url = ?');
const selectPostIdByAlternateUrlStmt = db.prepare('SELECT post_id FROM post_urls WHERE url = ?');
const selectAlternateUrlsForPostStmt = db.prepare(`
SELECT url
FROM post_urls
WHERE post_id = ?
AND is_primary = 0
ORDER BY created_at ASC
`);
function storePostUrls(postId, primaryUrl, additionalUrls = []) {
if (!postId || !primaryUrl) {
return;
}
const normalizedPrimary = normalizeFacebookPostUrl(primaryUrl);
if (!normalizedPrimary) {
return;
}
insertPostUrlStmt.run(postId, normalizedPrimary, 1);
setPrimaryPostUrlStmt.run(normalizedPrimary, postId);
if (Array.isArray(additionalUrls)) {
for (const candidate of additionalUrls) {
const normalized = normalizeFacebookPostUrl(candidate);
if (!normalized || normalized === normalizedPrimary) {
continue;
}
insertPostUrlStmt.run(postId, normalized, 0);
}
}
}
function findPostIdByUrl(normalizedUrl) {
if (!normalizedUrl) {
return null;
}
const primaryRow = selectPostIdByPrimaryUrlStmt.get(normalizedUrl);
if (primaryRow && primaryRow.id) {
return primaryRow.id;
}
const alternateRow = selectPostIdByAlternateUrlStmt.get(normalizedUrl);
if (alternateRow && alternateRow.post_id) {
return alternateRow.post_id;
}
return null;
}
function findPostByUrl(normalizedUrl) {
if (!normalizedUrl) {
return null;
}
const primary = selectPostByPrimaryUrlStmt.get(normalizedUrl);
if (primary) {
return primary;
}
const alternate = selectPostByAlternateUrlStmt.get(normalizedUrl);
if (alternate) {
return alternate;
}
return null;
}
function removeSearchSeenEntries(urls) {
if (!Array.isArray(urls) || urls.length === 0) {
return;
@@ -1191,9 +1368,6 @@ const updateSearchSeenStmt = db.prepare(`
SET seen_count = ?, manually_hidden = ?, last_seen_at = CURRENT_TIMESTAMP
WHERE url = ?
`);
const deleteSearchSeenStmt = db.prepare('DELETE FROM search_seen_posts WHERE url = ?');
const selectTrackedPostStmt = db.prepare('SELECT id FROM posts WHERE url = ?');
const checkIndexes = db.prepare("PRAGMA index_list('checks')").all();
for (const idx of checkIndexes) {
if (idx.unique) {
@@ -1274,6 +1448,9 @@ function mapPostRow(post) {
checked_at: sqliteTimestampToUTC(status.checked_at)
}));
const alternateUrlRows = selectAlternateUrlsForPostStmt.all(post.id);
const alternateUrls = alternateUrlRows.map(row => row.url);
return {
...post,
created_at: sqliteTimestampToUTC(post.created_at),
@@ -1289,7 +1466,8 @@ function mapPostRow(post) {
created_by_profile: creatorProfile,
created_by_profile_name: creatorProfile ? getProfileName(creatorProfile) : null,
created_by_name: creatorName,
deadline_at: post.deadline_at || null
deadline_at: post.deadline_at || null,
alternate_urls: alternateUrls
};
}
@@ -1321,12 +1499,16 @@ app.get('/api/posts/by-url', (req, res) => {
return res.status(400).json({ error: 'URL parameter must be a valid Facebook link' });
}
const post = db.prepare('SELECT * FROM posts WHERE url = ?').get(normalizedUrl);
const post = findPostByUrl(normalizedUrl);
if (!post) {
return res.json(null);
}
const alternates = collectPostAlternateUrls(post.url, [normalizedUrl]);
if (alternates.length) {
storePostUrls(post.id, post.url, alternates);
}
res.json(mapPostRow(post));
} catch (error) {
res.status(500).json({ error: error.message });
@@ -1344,16 +1526,19 @@ app.post('/api/search-posts', (req, res) => {
cleanupExpiredSearchPosts();
let isTracked = false;
let trackedPost = null;
for (const candidate of normalizedUrls) {
const tracked = selectTrackedPostStmt.get(candidate);
if (tracked) {
isTracked = true;
deleteSearchSeenStmt.run(candidate);
const found = findPostByUrl(candidate);
if (found) {
trackedPost = found;
break;
}
}
if (isTracked) {
if (trackedPost) {
const alternateUrls = collectPostAlternateUrls(trackedPost.url, normalizedUrls);
storePostUrls(trackedPost.id, trackedPost.url, alternateUrls);
removeSearchSeenEntries([trackedPost.url, ...alternateUrls]);
return res.json({ seen_count: 0, should_hide: false, tracked: true });
}
@@ -1555,6 +1740,7 @@ app.post('/api/posts', (req, res) => {
} = req.body;
const validatedTargetCount = validateTargetCount(typeof target_count === 'undefined' ? 1 : target_count);
const alternateUrlsInput = Array.isArray(req.body.alternate_urls) ? req.body.alternate_urls : [];
const normalizedUrl = normalizeFacebookPostUrl(url);
@@ -1591,7 +1777,9 @@ app.post('/api/posts', (req, res) => {
const post = db.prepare('SELECT * FROM posts WHERE id = ?').get(id);
removeSearchSeenEntries([normalizedUrl]);
const alternateUrls = collectPostAlternateUrls(normalizedUrl, alternateUrlsInput);
storePostUrls(id, normalizedUrl, alternateUrls);
removeSearchSeenEntries([normalizedUrl, ...alternateUrls]);
res.json(mapPostRow(post));
} catch (error) {
@@ -1607,6 +1795,12 @@ app.put('/api/posts/:postId', (req, res) => {
try {
const { postId } = req.params;
const { target_count, title, created_by_profile, created_by_name, deadline_at, url } = req.body || {};
const alternateUrlsInput = Array.isArray(req.body && req.body.alternate_urls) ? req.body.alternate_urls : [];
const existingPost = db.prepare('SELECT * FROM posts WHERE id = ?').get(postId);
if (!existingPost) {
return res.status(404).json({ error: 'Post not found' });
}
const updates = [];
const params = [];
@@ -1672,9 +1866,8 @@ app.put('/api/posts/:postId', (req, res) => {
params.push(postId);
const stmt = db.prepare(`UPDATE posts SET ${updates.join(', ')} WHERE id = ?`);
let result;
try {
result = stmt.run(...params);
stmt.run(...params);
} catch (error) {
if (error && error.code === 'SQLITE_CONSTRAINT_UNIQUE') {
return res.status(409).json({ error: 'Post with this URL already exists' });
@@ -1682,18 +1875,25 @@ app.put('/api/posts/:postId', (req, res) => {
throw error;
}
if (result.changes === 0) {
return res.status(404).json({ error: 'Post not found' });
}
recalcCheckedCount(postId);
const updatedPost = db.prepare('SELECT * FROM posts WHERE id = ?').get(postId);
if (normalizedUrlForCleanup) {
removeSearchSeenEntries([normalizedUrlForCleanup]);
const alternateCandidates = [...alternateUrlsInput];
if (existingPost.url && existingPost.url !== updatedPost.url) {
alternateCandidates.push(existingPost.url);
}
const alternateUrls = collectPostAlternateUrls(updatedPost.url, alternateCandidates);
storePostUrls(updatedPost.id, updatedPost.url, alternateUrls);
const cleanupUrls = new Set([updatedPost.url]);
alternateUrls.forEach(urlValue => cleanupUrls.add(urlValue));
if (normalizedUrlForCleanup && normalizedUrlForCleanup !== updatedPost.url) {
cleanupUrls.add(normalizedUrlForCleanup);
}
removeSearchSeenEntries(Array.from(cleanupUrls));
res.json(mapPostRow(updatedPost));
} catch (error) {
res.status(500).json({ error: error.message });
@@ -1785,6 +1985,32 @@ app.post('/api/posts/:postId/check', (req, res) => {
}
});
app.post('/api/posts/:postId/urls', (req, res) => {
try {
const { postId } = req.params;
const { urls } = req.body || {};
const post = db.prepare('SELECT * FROM posts WHERE id = ?').get(postId);
if (!post) {
return res.status(404).json({ error: 'Post not found' });
}
const candidateList = Array.isArray(urls) ? urls : [];
const alternateUrls = collectPostAlternateUrls(post.url, candidateList);
storePostUrls(post.id, post.url, alternateUrls);
removeSearchSeenEntries([post.url, ...alternateUrls]);
const storedAlternates = selectAlternateUrlsForPostStmt.all(post.id).map(row => row.url);
res.json({
success: true,
primary_url: post.url,
alternate_urls: storedAlternates
});
} catch (error) {
res.status(500).json({ error: error.message });
}
});
// Check by URL (for web interface auto-check)
app.post('/api/check-by-url', (req, res) => {
try {
@@ -1799,11 +2025,15 @@ app.post('/api/check-by-url', (req, res) => {
return res.status(400).json({ error: 'URL must be a valid Facebook link' });
}
const post = db.prepare('SELECT * FROM posts WHERE url = ?').get(normalizedUrl);
const post = findPostByUrl(normalizedUrl);
if (!post) {
return res.status(404).json({ error: 'Post not found' });
}
const alternateUrls = collectPostAlternateUrls(post.url, [normalizedUrl]);
storePostUrls(post.id, post.url, alternateUrls);
removeSearchSeenEntries([post.url, ...alternateUrls]);
// Check if deadline has passed
if (post.deadline_at) {
const deadline = new Date(post.deadline_at);
@@ -1970,8 +2200,8 @@ app.patch('/api/posts/:postId', (req, res) => {
const { url, is_successful } = req.body;
// Check if post exists
const post = db.prepare('SELECT id FROM posts WHERE id = ?').get(postId);
if (!post) {
const existingPost = db.prepare('SELECT * FROM posts WHERE id = ?').get(postId);
if (!existingPost) {
return res.status(404).json({ error: 'Post not found' });
}
@@ -1989,7 +2219,15 @@ app.patch('/api/posts/:postId', (req, res) => {
// Update URL
db.prepare('UPDATE posts SET url = ? WHERE id = ?').run(normalizedUrl, postId);
removeSearchSeenEntries([normalizedUrl]);
const alternateCandidates = [];
if (existingPost.url && existingPost.url !== normalizedUrl) {
alternateCandidates.push(existingPost.url);
}
const alternateUrls = collectPostAlternateUrls(normalizedUrl, alternateCandidates);
storePostUrls(postId, normalizedUrl, alternateUrls);
removeSearchSeenEntries([normalizedUrl, ...alternateUrls]);
return res.json({ success: true, url: normalizedUrl });
}