import { PaperlessDocument } from "../types"; const CORRESPONDENT_KEYS = ["correspondent_name", "correspondent__name", "correspondent"]; const TAG_SOURCES = ["tag_names", "tags__name", "tagNames", "tags", "tagLabels", "tag_details"]; export function extractPaperlessTitle(document: PaperlessDocument): string | null { const record = document as Record; const value = record.title; if (typeof value === "string" && value.trim().length > 0) { return value.trim(); } return null; } export function extractPaperlessProvider(document: PaperlessDocument): string | null { const record = document as Record; for (const key of CORRESPONDENT_KEYS) { const value = record[key]; if (typeof value === "string" && value.trim().length > 0) { return value.trim(); } if (value && typeof value === "object") { const candidate = (value as Record).name ?? (value as Record).title ?? (value as Record).label ?? (value as Record).value; if (typeof candidate === "string" && candidate.trim().length > 0) { return candidate.trim(); } } } const metadata = record.metadata as Record | undefined; if (metadata) { if (Array.isArray(metadata)) { const providerFromArray = extractProviderFromArray(metadata); if (providerFromArray) { return providerFromArray; } } const keys = Object.keys(metadata); for (const key of keys) { if (!key.toLowerCase().includes("correspondent")) { continue; } const value = metadata[key]; if (typeof value === "string" && value.trim().length > 0) { return value.trim(); } if (value && typeof value === "object") { const candidate = (value as Record).name ?? (value as Record).title ?? (value as Record).label ?? (value as Record).value; if (typeof candidate === "string" && candidate.trim().length > 0) { return candidate.trim(); } } } } return null; } export function extractPaperlessTags(document: PaperlessDocument): string[] { const record = document as Record; const names = new Set(); for (const key of TAG_SOURCES) { const source = record[key]; if (!Array.isArray(source)) { maybeCollectFromValue(source, names); continue; } collectFromArray(source, names); } const metadata = record.metadata as Record | Array | undefined; if (metadata) { if (Array.isArray(metadata)) { for (const entry of metadata) { maybeCollectFromValue(entry, names); } } else { for (const key of TAG_SOURCES) { const value = metadata[key]; if (Array.isArray(value)) { collectFromArray(value, names); } else { maybeCollectFromValue(value, names); } } const metaTags = metadata.tags ?? metadata.tag_list ?? metadata.TagList; if (Array.isArray(metaTags)) { collectFromArray(metaTags, names); } else if (typeof metaTags === "string") { collectFromString(metaTags, names); } for (const [key, value] of Object.entries(metadata)) { if (!key.toLowerCase().includes("tag")) { continue; } if (Array.isArray(value)) { collectFromArray(value, names); } else if (typeof value === "string") { collectFromString(value, names); } } } } return Array.from(names); } function collectFromArray(items: Array, names: Set) { for (const item of items) { if (typeof item === "string" && item.trim().length > 0) { names.add(item.trim()); continue; } maybeCollectFromValue(item, names); } } function maybeCollectFromValue(value: unknown, names: Set) { if (!value) { return; } if (typeof value === "string") { collectFromString(value, names); return; } if (Array.isArray(value)) { collectFromArray(value, names); return; } const record = value as Record; const candidate = record.name ?? record.label ?? record.title ?? record.value ?? record.slug; if (typeof candidate === "string" && candidate.trim().length > 0) { names.add(candidate.trim()); return; } const objectValues = Object.values(record); for (const nested of objectValues) { if (typeof nested === "string" && nested.trim().length > 0) { names.add(nested.trim()); } else if (nested && typeof nested === "object") { maybeCollectFromValue(nested, names); } } } function collectFromString(value: string, names: Set) { const parts = value .split(/[,;]+/) .map((part) => part.trim()) .filter(Boolean); for (const part of parts) { names.add(part); } } function extractProviderFromArray(entries: Array): string | null { for (const entry of entries) { if (!entry || typeof entry !== "object") { continue; } const record = entry as Record; const lowerKeys = Object.keys(record).map((key) => key.toLowerCase()); const hasCorrespondentKey = lowerKeys.some((key) => key.includes("correspondent")); const slug = (record.slug ?? record.key ?? record.field) as string | undefined; const slugMatches = typeof slug === "string" && slug.toLowerCase().includes("correspondent"); if (!hasCorrespondentKey && !slugMatches) { continue; } const candidate = record.value ?? record.name ?? record.label ?? record.title; if (typeof candidate === "string" && candidate.trim().length > 0) { return candidate.trim(); } } return null; }