// src/extractors/BaseDocumentExtractor.ts
export interface ExtractionOptions {
    maxSizeBytes?: number;
    encoding?: string;
}

export interface ExtractionResult {
    metadata?: {
        title?: string;
        author?: string;
        createdAt?: Date;
        modifiedAt?: Date;
        pageCount?: number;
        wordCount?: number;
    };
}

export abstract class BaseDocumentExtractor {
    protected constructor(protected file: File) {}

    protected abstract validateFileType(): Promise<void>;
    protected abstract preProcess(): Promise<void>;
    protected abstract extractContent(): Promise<string>;
    protected abstract cleanup(): Promise<void>;
    protected abstract extractMetadata(): Promise<Partial<ExtractionResult['metadata']>>;

    async extract(): Promise<{
        success: boolean;
        content?: string;
        metadata?: Partial<ExtractionResult['metadata']>;
        error?: string;
    }> {
        try {
            await this.validateFileType();
            await this.preProcess();
            
            const content = await this.extractContent();
            const metadata = await this.extractMetadata();
            
            await this.cleanup();
            
            return {
                success: true,
                content,
                metadata
            };
        } catch (error) {
            await this.cleanup();
            return {
                success: false,
                error: error instanceof Error ? error.message : 'Unknown error'
            };
        }
    }
}