199 lines
5.8 KiB
TypeScript
199 lines
5.8 KiB
TypeScript
import { PaperlessDocument } from "../types";
|
|
|
|
const CORRESPONDENT_KEYS = ["correspondent_name", "correspondent__name", "correspondent"];
|
|
|
|
const TAG_SOURCES = ["tag_names", "tags__name", "tagNames", "tags", "tagLabels", "tag_details"];
|
|
|
|
export function extractPaperlessTitle(document: PaperlessDocument): string | null {
|
|
const record = document as Record<string, unknown>;
|
|
const value = record.title;
|
|
if (typeof value === "string" && value.trim().length > 0) {
|
|
return value.trim();
|
|
}
|
|
return null;
|
|
}
|
|
|
|
export function extractPaperlessProvider(document: PaperlessDocument): string | null {
|
|
const record = document as Record<string, unknown>;
|
|
|
|
for (const key of CORRESPONDENT_KEYS) {
|
|
const value = record[key];
|
|
if (typeof value === "string" && value.trim().length > 0) {
|
|
return value.trim();
|
|
}
|
|
if (value && typeof value === "object") {
|
|
const candidate =
|
|
(value as Record<string, unknown>).name ??
|
|
(value as Record<string, unknown>).title ??
|
|
(value as Record<string, unknown>).label ??
|
|
(value as Record<string, unknown>).value;
|
|
if (typeof candidate === "string" && candidate.trim().length > 0) {
|
|
return candidate.trim();
|
|
}
|
|
}
|
|
}
|
|
|
|
const metadata = record.metadata as Record<string, unknown> | undefined;
|
|
if (metadata) {
|
|
if (Array.isArray(metadata)) {
|
|
const providerFromArray = extractProviderFromArray(metadata);
|
|
if (providerFromArray) {
|
|
return providerFromArray;
|
|
}
|
|
}
|
|
|
|
const keys = Object.keys(metadata);
|
|
for (const key of keys) {
|
|
if (!key.toLowerCase().includes("correspondent")) {
|
|
continue;
|
|
}
|
|
const value = metadata[key];
|
|
if (typeof value === "string" && value.trim().length > 0) {
|
|
return value.trim();
|
|
}
|
|
if (value && typeof value === "object") {
|
|
const candidate =
|
|
(value as Record<string, unknown>).name ??
|
|
(value as Record<string, unknown>).title ??
|
|
(value as Record<string, unknown>).label ??
|
|
(value as Record<string, unknown>).value;
|
|
if (typeof candidate === "string" && candidate.trim().length > 0) {
|
|
return candidate.trim();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
export function extractPaperlessTags(document: PaperlessDocument): string[] {
|
|
const record = document as Record<string, unknown>;
|
|
const names = new Set<string>();
|
|
|
|
for (const key of TAG_SOURCES) {
|
|
const source = record[key];
|
|
if (!Array.isArray(source)) {
|
|
maybeCollectFromValue(source, names);
|
|
continue;
|
|
}
|
|
collectFromArray(source, names);
|
|
}
|
|
|
|
const metadata = record.metadata as Record<string, unknown> | Array<unknown> | undefined;
|
|
if (metadata) {
|
|
if (Array.isArray(metadata)) {
|
|
for (const entry of metadata) {
|
|
maybeCollectFromValue(entry, names);
|
|
}
|
|
} else {
|
|
for (const key of TAG_SOURCES) {
|
|
const value = metadata[key];
|
|
if (Array.isArray(value)) {
|
|
collectFromArray(value, names);
|
|
} else {
|
|
maybeCollectFromValue(value, names);
|
|
}
|
|
}
|
|
|
|
const metaTags = metadata.tags ?? metadata.tag_list ?? metadata.TagList;
|
|
if (Array.isArray(metaTags)) {
|
|
collectFromArray(metaTags, names);
|
|
} else if (typeof metaTags === "string") {
|
|
collectFromString(metaTags, names);
|
|
}
|
|
|
|
for (const [key, value] of Object.entries(metadata)) {
|
|
if (!key.toLowerCase().includes("tag")) {
|
|
continue;
|
|
}
|
|
if (Array.isArray(value)) {
|
|
collectFromArray(value, names);
|
|
} else if (typeof value === "string") {
|
|
collectFromString(value, names);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return Array.from(names);
|
|
}
|
|
|
|
function collectFromArray(items: Array<unknown>, names: Set<string>) {
|
|
for (const item of items) {
|
|
if (typeof item === "string" && item.trim().length > 0) {
|
|
names.add(item.trim());
|
|
continue;
|
|
}
|
|
maybeCollectFromValue(item, names);
|
|
}
|
|
}
|
|
|
|
function maybeCollectFromValue(value: unknown, names: Set<string>) {
|
|
if (!value) {
|
|
return;
|
|
}
|
|
if (typeof value === "string") {
|
|
collectFromString(value, names);
|
|
return;
|
|
}
|
|
if (Array.isArray(value)) {
|
|
collectFromArray(value, names);
|
|
return;
|
|
}
|
|
const record = value as Record<string, unknown>;
|
|
const candidate =
|
|
record.name ??
|
|
record.label ??
|
|
record.title ??
|
|
record.value ??
|
|
record.slug;
|
|
if (typeof candidate === "string" && candidate.trim().length > 0) {
|
|
names.add(candidate.trim());
|
|
return;
|
|
}
|
|
const objectValues = Object.values(record);
|
|
for (const nested of objectValues) {
|
|
if (typeof nested === "string" && nested.trim().length > 0) {
|
|
names.add(nested.trim());
|
|
} else if (nested && typeof nested === "object") {
|
|
maybeCollectFromValue(nested, names);
|
|
}
|
|
}
|
|
}
|
|
|
|
function collectFromString(value: string, names: Set<string>) {
|
|
const parts = value
|
|
.split(/[,;]+/)
|
|
.map((part) => part.trim())
|
|
.filter(Boolean);
|
|
for (const part of parts) {
|
|
names.add(part);
|
|
}
|
|
}
|
|
|
|
function extractProviderFromArray(entries: Array<unknown>): string | null {
|
|
for (const entry of entries) {
|
|
if (!entry || typeof entry !== "object") {
|
|
continue;
|
|
}
|
|
const record = entry as Record<string, unknown>;
|
|
const lowerKeys = Object.keys(record).map((key) => key.toLowerCase());
|
|
const hasCorrespondentKey = lowerKeys.some((key) => key.includes("correspondent"));
|
|
const slug = (record.slug ?? record.key ?? record.field) as string | undefined;
|
|
const slugMatches = typeof slug === "string" && slug.toLowerCase().includes("correspondent");
|
|
if (!hasCorrespondentKey && !slugMatches) {
|
|
continue;
|
|
}
|
|
const candidate =
|
|
record.value ??
|
|
record.name ??
|
|
record.label ??
|
|
record.title;
|
|
if (typeof candidate === "string" && candidate.trim().length > 0) {
|
|
return candidate.trim();
|
|
}
|
|
}
|
|
return null;
|
|
}
|