Title, Provider, tags from paperless

This commit is contained in:
MDeeApp
2025-10-11 11:32:04 +02:00
parent 8eb060f380
commit 342a73ecb5
11 changed files with 530 additions and 36 deletions

View File

@@ -0,0 +1,198 @@
import { PaperlessDocument } from "../types";
const CORRESPONDENT_KEYS = ["correspondent_name", "correspondent__name", "correspondent"];
const TAG_SOURCES = ["tag_names", "tags__name", "tagNames", "tags", "tagLabels", "tag_details"];
export function extractPaperlessTitle(document: PaperlessDocument): string | null {
const record = document as Record<string, unknown>;
const value = record.title;
if (typeof value === "string" && value.trim().length > 0) {
return value.trim();
}
return null;
}
export function extractPaperlessProvider(document: PaperlessDocument): string | null {
const record = document as Record<string, unknown>;
for (const key of CORRESPONDENT_KEYS) {
const value = record[key];
if (typeof value === "string" && value.trim().length > 0) {
return value.trim();
}
if (value && typeof value === "object") {
const candidate =
(value as Record<string, unknown>).name ??
(value as Record<string, unknown>).title ??
(value as Record<string, unknown>).label ??
(value as Record<string, unknown>).value;
if (typeof candidate === "string" && candidate.trim().length > 0) {
return candidate.trim();
}
}
}
const metadata = record.metadata as Record<string, unknown> | undefined;
if (metadata) {
if (Array.isArray(metadata)) {
const providerFromArray = extractProviderFromArray(metadata);
if (providerFromArray) {
return providerFromArray;
}
}
const keys = Object.keys(metadata);
for (const key of keys) {
if (!key.toLowerCase().includes("correspondent")) {
continue;
}
const value = metadata[key];
if (typeof value === "string" && value.trim().length > 0) {
return value.trim();
}
if (value && typeof value === "object") {
const candidate =
(value as Record<string, unknown>).name ??
(value as Record<string, unknown>).title ??
(value as Record<string, unknown>).label ??
(value as Record<string, unknown>).value;
if (typeof candidate === "string" && candidate.trim().length > 0) {
return candidate.trim();
}
}
}
}
return null;
}
export function extractPaperlessTags(document: PaperlessDocument): string[] {
const record = document as Record<string, unknown>;
const names = new Set<string>();
for (const key of TAG_SOURCES) {
const source = record[key];
if (!Array.isArray(source)) {
maybeCollectFromValue(source, names);
continue;
}
collectFromArray(source, names);
}
const metadata = record.metadata as Record<string, unknown> | Array<unknown> | undefined;
if (metadata) {
if (Array.isArray(metadata)) {
for (const entry of metadata) {
maybeCollectFromValue(entry, names);
}
} else {
for (const key of TAG_SOURCES) {
const value = metadata[key];
if (Array.isArray(value)) {
collectFromArray(value, names);
} else {
maybeCollectFromValue(value, names);
}
}
const metaTags = metadata.tags ?? metadata.tag_list ?? metadata.TagList;
if (Array.isArray(metaTags)) {
collectFromArray(metaTags, names);
} else if (typeof metaTags === "string") {
collectFromString(metaTags, names);
}
for (const [key, value] of Object.entries(metadata)) {
if (!key.toLowerCase().includes("tag")) {
continue;
}
if (Array.isArray(value)) {
collectFromArray(value, names);
} else if (typeof value === "string") {
collectFromString(value, names);
}
}
}
}
return Array.from(names);
}
function collectFromArray(items: Array<unknown>, names: Set<string>) {
for (const item of items) {
if (typeof item === "string" && item.trim().length > 0) {
names.add(item.trim());
continue;
}
maybeCollectFromValue(item, names);
}
}
function maybeCollectFromValue(value: unknown, names: Set<string>) {
if (!value) {
return;
}
if (typeof value === "string") {
collectFromString(value, names);
return;
}
if (Array.isArray(value)) {
collectFromArray(value, names);
return;
}
const record = value as Record<string, unknown>;
const candidate =
record.name ??
record.label ??
record.title ??
record.value ??
record.slug;
if (typeof candidate === "string" && candidate.trim().length > 0) {
names.add(candidate.trim());
return;
}
const objectValues = Object.values(record);
for (const nested of objectValues) {
if (typeof nested === "string" && nested.trim().length > 0) {
names.add(nested.trim());
} else if (nested && typeof nested === "object") {
maybeCollectFromValue(nested, names);
}
}
}
function collectFromString(value: string, names: Set<string>) {
const parts = value
.split(/[,;]+/)
.map((part) => part.trim())
.filter(Boolean);
for (const part of parts) {
names.add(part);
}
}
function extractProviderFromArray(entries: Array<unknown>): string | null {
for (const entry of entries) {
if (!entry || typeof entry !== "object") {
continue;
}
const record = entry as Record<string, unknown>;
const lowerKeys = Object.keys(record).map((key) => key.toLowerCase());
const hasCorrespondentKey = lowerKeys.some((key) => key.includes("correspondent"));
const slug = (record.slug ?? record.key ?? record.field) as string | undefined;
const slugMatches = typeof slug === "string" && slug.toLowerCase().includes("correspondent");
if (!hasCorrespondentKey && !slugMatches) {
continue;
}
const candidate =
record.value ??
record.name ??
record.label ??
record.title;
if (typeof candidate === "string" && candidate.trim().length > 0) {
return candidate.trim();
}
}
return null;
}