This commit is contained in:
MDeeApp
2025-10-22 18:24:28 +02:00
parent 6a2b6e46e0
commit 36dff70f98
5 changed files with 648 additions and 46 deletions

View File

@@ -4,6 +4,7 @@ const Database = require('better-sqlite3');
const { v4: uuidv4 } = require('uuid');
const path = require('path');
const fs = require('fs');
const crypto = require('crypto');
const app = express();
const PORT = process.env.PORT || 3000;
@@ -21,6 +22,8 @@ const PROFILE_SCOPE_MAX_AGE = 60 * 60 * 24 * 30; // 30 days
const FACEBOOK_TRACKING_PARAM_PREFIXES = ['__cft__', '__tn__', '__eep__', 'mibextid'];
const SEARCH_POST_HIDE_THRESHOLD = 2;
const SEARCH_POST_RETENTION_DAYS = 90;
const MAX_POST_TEXT_LENGTH = 4000;
const MIN_TEXT_HASH_LENGTH = 120;
const screenshotDir = path.join(__dirname, 'data', 'screenshots');
if (!fs.existsSync(screenshotDir)) {
@@ -65,6 +68,53 @@ const dbPath = path.join(__dirname, 'data', 'tracker.db');
const db = new Database(dbPath);
db.pragma('foreign_keys = ON');
function ensureColumn(table, column, definition) {
const info = db.prepare(`PRAGMA table_info(${table})`).all();
if (!info.some((row) => row.name === column)) {
db.prepare(`ALTER TABLE ${table} ADD COLUMN ${definition}`).run();
}
}
ensureColumn('posts', 'post_text', 'post_text TEXT');
ensureColumn('posts', 'post_text_hash', 'post_text_hash TEXT');
ensureColumn('posts', 'content_key', 'content_key TEXT');
db.exec(`
CREATE INDEX IF NOT EXISTS idx_posts_content_key
ON posts(content_key)
`);
const updateContentKeyStmt = db.prepare('UPDATE posts SET content_key = ? WHERE id = ?');
const updatePostTextColumnsStmt = db.prepare('UPDATE posts SET post_text = ?, post_text_hash = ? WHERE id = ?');
const postsMissingKey = db.prepare(`
SELECT id, url
FROM posts
WHERE content_key IS NULL OR content_key = ''
`).all();
for (const entry of postsMissingKey) {
const normalizedUrl = normalizeFacebookPostUrl(entry.url);
const key = extractFacebookContentKey(normalizedUrl);
if (key) {
updateContentKeyStmt.run(key, entry.id);
}
}
const postsMissingHash = db.prepare(`
SELECT id, post_text
FROM posts
WHERE post_text IS NOT NULL
AND TRIM(post_text) <> ''
AND (post_text_hash IS NULL OR post_text_hash = '')
`).all();
for (const entry of postsMissingHash) {
const normalizedText = normalizePostText(entry.post_text);
const hash = computePostTextHash(normalizedText);
updatePostTextColumnsStmt.run(normalizedText, hash, entry.id);
}
function parseCookies(header) {
if (!header || typeof header !== 'string') {
return {};
@@ -242,6 +292,30 @@ function normalizeCreatorName(value) {
return trimmed.slice(0, 160);
}
function normalizePostText(value) {
if (typeof value !== 'string') {
return null;
}
let text = value.replace(/\s+/g, ' ').trim();
if (!text) {
return null;
}
if (text.length > MAX_POST_TEXT_LENGTH) {
text = text.slice(0, MAX_POST_TEXT_LENGTH);
}
return text;
}
function computePostTextHash(text) {
if (!text) {
return null;
}
return crypto.createHash('sha256').update(text, 'utf8').digest('hex');
}
function normalizeFacebookPostUrl(rawValue) {
if (typeof rawValue !== 'string') {
return null;
@@ -274,6 +348,10 @@ function normalizeFacebookPostUrl(rawValue) {
return null;
}
parsed.hostname = 'www.facebook.com';
parsed.protocol = 'https:';
parsed.port = '';
const cleanedParams = new URLSearchParams();
parsed.searchParams.forEach((paramValue, paramKey) => {
const lowerKey = paramKey.toLowerCase();
@@ -307,6 +385,88 @@ function normalizeFacebookPostUrl(rawValue) {
return formatted.replace(/[?&]$/, '');
}
function extractFacebookContentKey(normalizedUrl) {
if (!normalizedUrl) {
return null;
}
try {
const parsed = new URL(normalizedUrl);
const pathnameRaw = parsed.pathname || '/';
const pathname = pathnameRaw.replace(/\/+$/, '') || '/';
const lowerPath = pathname.toLowerCase();
const params = parsed.searchParams;
const reelMatch = lowerPath.match(/^\/reel\/([^/]+)/);
if (reelMatch) {
return `reel:${reelMatch[1]}`;
}
const watchId = params.get('v') || params.get('video_id');
if ((lowerPath === '/watch' || lowerPath === '/watch/') && watchId) {
return `video:${watchId}`;
}
if (lowerPath === '/video.php' && watchId) {
return `video:${watchId}`;
}
const photoId = params.get('fbid');
if ((lowerPath === '/photo.php' || lowerPath === '/photo') && photoId) {
return `photo:${photoId}`;
}
const storyFbid = params.get('story_fbid');
if (storyFbid) {
const ownerId = params.get('id') || params.get('gid') || params.get('group_id') || params.get('page_id') || '';
return `story:${ownerId}:${storyFbid}`;
}
const groupPostMatch = lowerPath.match(/^\/groups\/([^/]+)\/posts\/([^/]+)/);
if (groupPostMatch) {
return `group-post:${groupPostMatch[1]}:${groupPostMatch[2]}`;
}
const groupPermalinkMatch = lowerPath.match(/^\/groups\/([^/]+)\/permalink\/([^/]+)/);
if (groupPermalinkMatch) {
return `group-post:${groupPermalinkMatch[1]}:${groupPermalinkMatch[2]}`;
}
const pagePostMatch = lowerPath.match(/^\/([^/]+)\/posts\/([^/]+)/);
if (pagePostMatch) {
return `profile-post:${pagePostMatch[1]}:${pagePostMatch[2]}`;
}
const pageVideoMatch = lowerPath.match(/^\/([^/]+)\/videos\/([^/]+)/);
if (pageVideoMatch) {
return `video:${pageVideoMatch[2]}`;
}
const pagePhotoMatch = lowerPath.match(/^\/([^/]+)\/photos\/[^/]+\/([^/]+)/);
if (pagePhotoMatch) {
return `photo:${pagePhotoMatch[2]}`;
}
if (lowerPath === '/' && storyFbid) {
const ownerId = params.get('id') || '';
return `story:${ownerId}:${storyFbid}`;
}
if ((lowerPath === '/permalink.php' || lowerPath === '/story.php') && storyFbid) {
const ownerId = params.get('id') || '';
return `story:${ownerId}:${storyFbid}`;
}
const sortedParams = Array.from(params.entries())
.map(([key, value]) => `${key}=${value}`)
.sort()
.join('&');
return `generic:${lowerPath}?${sortedParams}`;
} catch (error) {
return `generic:${normalizedUrl}`;
}
}
function getRequiredProfiles(targetCount) {
const count = clampTargetCount(targetCount);
return Array.from({ length: count }, (_, index) => index + 1);
@@ -393,6 +553,12 @@ db.exec(`
target_count INTEGER NOT NULL,
checked_count INTEGER DEFAULT 0,
screenshot_path TEXT,
created_by_profile INTEGER,
created_by_name TEXT,
deadline_at DATETIME,
post_text TEXT,
post_text_hash TEXT,
content_key TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
last_change DATETIME DEFAULT CURRENT_TIMESTAMP
);
@@ -415,17 +581,9 @@ db.exec(`
`);
db.exec(`
CREATE UNIQUE INDEX IF NOT EXISTS idx_post_urls_primary
ON post_urls(post_id)
WHERE is_primary = 1;
DROP INDEX IF EXISTS idx_post_urls_primary;
`);
db.prepare(`
INSERT OR IGNORE INTO post_urls (post_id, url, is_primary)
SELECT id, url, 1
FROM posts
`).run();
db.exec(`
CREATE TABLE IF NOT EXISTS checks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -497,13 +655,6 @@ db.exec(`
ON search_seen_posts(last_seen_at);
`);
const ensureColumn = (table, column, definition) => {
const columns = db.prepare(`PRAGMA table_info(${table})`).all();
if (!columns.some(col => col.name === column)) {
db.exec(`ALTER TABLE ${table} ADD COLUMN ${definition}`);
}
};
ensureColumn('posts', 'checked_count', 'checked_count INTEGER DEFAULT 0');
ensureColumn('posts', 'screenshot_path', 'screenshot_path TEXT');
ensureColumn('posts', 'created_by_profile', 'created_by_profile INTEGER');
@@ -588,6 +739,8 @@ function normalizeExistingPostUrls() {
try {
db.prepare('UPDATE posts SET url = ? WHERE id = ?').run(cleaned, row.id);
const updatedKey = extractFacebookContentKey(cleaned);
updateContentKeyStmt.run(updatedKey || null, row.id);
updatedCount += 1;
} catch (error) {
if (error && error.code === 'SQLITE_CONSTRAINT_UNIQUE') {
@@ -605,6 +758,42 @@ function normalizeExistingPostUrls() {
normalizeExistingPostUrls();
function normalizeExistingPostUrlMappings() {
const rows = db.prepare('SELECT id, url FROM post_urls').all();
let updated = 0;
let removed = 0;
for (const row of rows) {
const normalized = normalizeFacebookPostUrl(row.url);
if (!normalized) {
continue;
}
if (normalized === row.url) {
continue;
}
try {
db.prepare('UPDATE post_urls SET url = ? WHERE id = ?').run(normalized, row.id);
updated += 1;
} catch (error) {
if (error && error.code === 'SQLITE_CONSTRAINT_UNIQUE') {
db.prepare('DELETE FROM post_urls WHERE id = ?').run(row.id);
removed += 1;
} else {
console.warn(`Failed to normalize post_urls entry ${row.id}:`, error.message);
}
}
}
if (updated || removed) {
console.log(`Normalized post_urls entries: updated ${updated}, removed ${removed}`);
}
}
normalizeExistingPostUrlMappings();
db.prepare('DELETE FROM post_urls WHERE url IN (SELECT url FROM posts)').run();
function truncateString(value, maxLength) {
if (typeof value !== 'string') {
return value;
@@ -1239,19 +1428,22 @@ function collectPostAlternateUrls(primaryUrl, candidates = []) {
return [];
}
const primaryKey = extractFacebookContentKey(normalizedPrimary);
const normalized = collectNormalizedFacebookUrls(normalizedPrimary, candidates);
return normalized.filter(url => url !== normalizedPrimary);
return normalized.filter((url) => {
if (url === normalizedPrimary) {
return false;
}
const candidateKey = extractFacebookContentKey(url);
return candidateKey && candidateKey === primaryKey;
});
}
const insertPostUrlStmt = db.prepare(`
INSERT OR IGNORE INTO post_urls (post_id, url, is_primary)
VALUES (?, ?, ?)
`);
const setPrimaryPostUrlStmt = db.prepare(`
UPDATE post_urls
SET is_primary = CASE WHEN url = ? THEN 1 ELSE 0 END
WHERE post_id = ?
VALUES (?, ?, 0)
`);
const selectPostByPrimaryUrlStmt = db.prepare('SELECT * FROM posts WHERE url = ?');
@@ -1268,9 +1460,9 @@ const selectAlternateUrlsForPostStmt = db.prepare(`
SELECT url
FROM post_urls
WHERE post_id = ?
AND is_primary = 0
ORDER BY created_at ASC
`);
const selectPostByTextHashStmt = db.prepare('SELECT * FROM posts WHERE post_text_hash = ?');
function storePostUrls(postId, primaryUrl, additionalUrls = []) {
if (!postId || !primaryUrl) {
@@ -1282,8 +1474,7 @@ function storePostUrls(postId, primaryUrl, additionalUrls = []) {
return;
}
insertPostUrlStmt.run(postId, normalizedPrimary, 1);
setPrimaryPostUrlStmt.run(normalizedPrimary, postId);
const primaryKey = extractFacebookContentKey(normalizedPrimary);
if (Array.isArray(additionalUrls)) {
for (const candidate of additionalUrls) {
@@ -1291,6 +1482,16 @@ function storePostUrls(postId, primaryUrl, additionalUrls = []) {
if (!normalized || normalized === normalizedPrimary) {
continue;
}
const candidateKey = extractFacebookContentKey(normalized);
if (!candidateKey || candidateKey !== primaryKey) {
continue;
}
const existingPostId = findPostIdByUrl(normalized);
if (existingPostId && existingPostId !== postId) {
continue;
}
insertPostUrlStmt.run(postId, normalized, 0);
}
}
@@ -1398,6 +1599,24 @@ function mapPostRow(post) {
return null;
}
let postContentKey = post.content_key;
if (!postContentKey) {
const normalizedUrl = normalizeFacebookPostUrl(post.url);
postContentKey = extractFacebookContentKey(normalizedUrl);
if (postContentKey) {
updateContentKeyStmt.run(postContentKey, post.id);
post.content_key = postContentKey;
}
}
if (post.post_text && (!post.post_text_hash || !post.post_text_hash.trim())) {
const normalizedPostText = normalizePostText(post.post_text);
const hash = computePostTextHash(normalizedPostText);
post.post_text = normalizedPostText;
post.post_text_hash = hash;
updatePostTextColumnsStmt.run(normalizedPostText, hash, post.id);
}
const checks = db.prepare('SELECT id, profile_number, checked_at FROM checks WHERE post_id = ? ORDER BY checked_at ASC').all(post.id);
const requiredProfiles = getRequiredProfiles(post.target_count);
const { statuses, completedChecks } = buildProfileStatuses(requiredProfiles, checks);
@@ -1467,7 +1686,10 @@ function mapPostRow(post) {
created_by_profile_name: creatorProfile ? getProfileName(creatorProfile) : null,
created_by_name: creatorName,
deadline_at: post.deadline_at || null,
alternate_urls: alternateUrls
alternate_urls: alternateUrls,
post_text: post.post_text || null,
post_text_hash: post.post_text_hash || null,
content_key: post.content_key || postContentKey || null
};
}
@@ -1736,7 +1958,8 @@ app.post('/api/posts', (req, res) => {
created_by_profile,
created_by_name,
profile_number,
deadline_at
deadline_at,
post_text
} = req.body;
const validatedTargetCount = validateTargetCount(typeof target_count === 'undefined' ? 1 : target_count);
@@ -1754,6 +1977,31 @@ app.post('/api/posts', (req, res) => {
const id = uuidv4();
const normalizedPostText = normalizePostText(post_text);
const postTextHash = computePostTextHash(normalizedPostText);
const contentKey = extractFacebookContentKey(normalizedUrl);
const useTextHashDedup = normalizedPostText && normalizedPostText.length >= MIN_TEXT_HASH_LENGTH && postTextHash;
if (useTextHashDedup) {
let existingByHash = selectPostByTextHashStmt.get(postTextHash);
if (existingByHash) {
const alternateCandidates = [normalizedUrl, ...alternateUrlsInput];
const alternateUrls = collectPostAlternateUrls(existingByHash.url, alternateCandidates);
storePostUrls(existingByHash.id, existingByHash.url, alternateUrls);
const cleanupSet = new Set([existingByHash.url, normalizedUrl, ...alternateUrls]);
removeSearchSeenEntries(Array.from(cleanupSet));
if (normalizedPostText && (!existingByHash.post_text || !existingByHash.post_text.trim())) {
updatePostTextColumnsStmt.run(normalizedPostText, postTextHash, existingByHash.id);
touchPost(existingByHash.id);
existingByHash = db.prepare('SELECT * FROM posts WHERE id = ?').get(existingByHash.id);
}
return res.json(mapPostRow(existingByHash));
}
}
let creatorProfile = sanitizeProfileNumber(created_by_profile);
if (!creatorProfile) {
creatorProfile = sanitizeProfileNumber(profile_number) || null;
@@ -1770,10 +2018,35 @@ app.post('/api/posts', (req, res) => {
const creatorDisplayName = normalizeCreatorName(created_by_name);
const stmt = db.prepare(`
INSERT INTO posts (id, url, title, target_count, checked_count, screenshot_path, created_by_profile, created_by_name, deadline_at, last_change)
VALUES (?, ?, ?, ?, 0, NULL, ?, ?, ?, CURRENT_TIMESTAMP)
INSERT INTO posts (
id,
url,
title,
target_count,
checked_count,
screenshot_path,
created_by_profile,
created_by_name,
deadline_at,
post_text,
post_text_hash,
content_key,
last_change
)
VALUES (?, ?, ?, ?, 0, NULL, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
`);
stmt.run(id, normalizedUrl, title || '', validatedTargetCount, creatorProfile, creatorDisplayName, normalizedDeadline);
stmt.run(
id,
normalizedUrl,
title || '',
validatedTargetCount,
creatorProfile,
creatorDisplayName,
normalizedDeadline,
normalizedPostText,
postTextHash,
contentKey || null
);
const post = db.prepare('SELECT * FROM posts WHERE id = ?').get(id);
@@ -1794,7 +2067,15 @@ app.post('/api/posts', (req, res) => {
app.put('/api/posts/:postId', (req, res) => {
try {
const { postId } = req.params;
const { target_count, title, created_by_profile, created_by_name, deadline_at, url } = req.body || {};
const {
target_count,
title,
created_by_profile,
created_by_name,
deadline_at,
url,
post_text
} = req.body || {};
const alternateUrlsInput = Array.isArray(req.body && req.body.alternate_urls) ? req.body.alternate_urls : [];
const existingPost = db.prepare('SELECT * FROM posts WHERE id = ?').get(postId);
@@ -1805,6 +2086,7 @@ app.put('/api/posts/:postId', (req, res) => {
const updates = [];
const params = [];
let normalizedUrlForCleanup = null;
let updatedContentKey = null;
if (typeof target_count !== 'undefined') {
const validatedTargetCount = validateTargetCount(target_count);
@@ -1856,6 +2138,19 @@ app.put('/api/posts/:postId', (req, res) => {
updates.push('url = ?');
params.push(normalizedUrl);
normalizedUrlForCleanup = normalizedUrl;
const newContentKey = extractFacebookContentKey(normalizedUrl);
updates.push('content_key = ?');
params.push(newContentKey || null);
updatedContentKey = newContentKey || null;
}
if (typeof post_text !== 'undefined') {
const normalizedPostText = normalizePostText(post_text);
const postTextHash = computePostTextHash(normalizedPostText);
updates.push('post_text = ?');
params.push(normalizedPostText);
updates.push('post_text_hash = ?');
params.push(postTextHash);
}
if (!updates.length) {
@@ -2217,8 +2512,9 @@ app.patch('/api/posts/:postId', (req, res) => {
return res.status(409).json({ error: 'URL already used by another post' });
}
const contentKey = extractFacebookContentKey(normalizedUrl);
// Update URL
db.prepare('UPDATE posts SET url = ? WHERE id = ?').run(normalizedUrl, postId);
db.prepare('UPDATE posts SET url = ?, content_key = ? WHERE id = ?').run(normalizedUrl, contentKey || null, postId);
const alternateCandidates = [];
if (existingPost.url && existingPost.url !== normalizedUrl) {