"""
RAGDocumentManager - Business logic per gestione documenti RAG
"""

import os
from typing import List, Optional
from dataclasses import dataclass
from datetime import datetime


# Cache embeddings per-profile (dict keyed by profile_name)
# Permette isolamento perfetto e supporto futuri modelli diversi per profilo
_EMBEDDINGS_CACHE = {}


@dataclass
class DocumentInfo:
    """Info documento RAG"""
    filename: str           # es: "meditazioni.pdf" (senza hash)
    title: str              # Da metadata PDF
    author: str             # Da metadata PDF
    chunk_count: int        # Numero chunks child in ChromaDB
    file_size: int          # Bytes
    indexed_date: float     # Unix timestamp
    source_path: str        # Path completo con hash


class RAGDocumentManager:
    """Gestisce operazioni CRUD su documenti RAG per un profilo"""

    def __init__(self, profile_name: str, project_root: str):
        self.profile_name = profile_name
        self.project_root = project_root

        # Paths
        self.chroma_path = os.path.join(
            project_root, 'data', 'agents', profile_name, 'chroma_db'
        )
        self.docstore_path = os.path.join(
            project_root, 'data', 'agents', profile_name, 'doc_store'
        )
        self.source_docs_path = os.path.join(
            project_root, 'data', 'agents', profile_name, 'source_docs'
        )

        # Embeddings lazy-loaded quando necessario
        self._embeddings = None

    def _get_embeddings(self):
        """Lazy load embeddings (cache per-profile)"""
        global _EMBEDDINGS_CACHE

        # Controlla cache per questo specifico profilo
        if self.profile_name in _EMBEDDINGS_CACHE:
            return _EMBEDDINGS_CACHE[self.profile_name]

        # Altrimenti crea nuova istanza
        if self._embeddings is None:
            try:
                from langchain_huggingface import HuggingFaceEmbeddings
                print(f"[RAGDocumentManager] Caricamento embeddings per profilo: {self.profile_name}")
                self._embeddings = HuggingFaceEmbeddings(
                    model_name="sentence-transformers/all-MiniLM-L6-v2"
                )
                # Salva in cache per questo profilo
                _EMBEDDINGS_CACHE[self.profile_name] = self._embeddings
            except ImportError as e:
                raise ImportError(
                    "Dipendenze RAG non installate. "
                    "Installa con: pip install langchain-huggingface sentence-transformers"
                ) from e
        return self._embeddings

    def list_documents(self) -> List[DocumentInfo]:
        """
        Lista tutti i documenti indicizzati per il profilo

        Returns:
            Lista DocumentInfo ordinata per data (più recenti prima)
            Lista vuota se nessun documento o ChromaDB non esiste
        """
        if not os.path.exists(self.chroma_path):
            return []

        try:
            # Lazy import per evitare crash se dipendenze non installate
            from langchain_chroma import Chroma

            # Carica ChromaDB
            vectorstore = Chroma(
                collection_name="split_parents",
                persist_directory=self.chroma_path,
                embedding_function=self._get_embeddings()
            )

            collection = vectorstore._collection
            results = collection.get(include=['metadatas'])

            if not results['metadatas']:
                return []

            # Estrai unique sources
            unique_sources = {}
            for meta in results['metadatas']:
                source_path = meta.get('source', '')
                if source_path and source_path not in unique_sources:
                    unique_sources[source_path] = meta

            # Crea DocumentInfo per ogni source
            documents = []
            for source_path, meta in unique_sources.items():
                # Count chunks per questo source
                doc_results = collection.get(
                    where={"source": {"$eq": source_path}},
                    include=[]
                )

                # File info
                file_size = 0
                indexed_date = 0
                if os.path.exists(source_path):
                    file_size = os.path.getsize(source_path)
                    indexed_date = os.path.getmtime(source_path)

                # Estrai filename senza hash (formato: hash__filename.pdf)
                basename = os.path.basename(source_path)
                filename = basename.split('__', 1)[1] if '__' in basename else basename

                documents.append(DocumentInfo(
                    filename=filename,
                    title=meta.get('title', ''),
                    author=meta.get('author', ''),
                    chunk_count=len(doc_results['ids']),
                    file_size=file_size,
                    indexed_date=indexed_date,
                    source_path=source_path
                ))

            # Ordina per data (più recenti prima)
            return sorted(documents, key=lambda d: d.indexed_date, reverse=True)

        except Exception as e:
            print(f"[RAGDocumentManager] Errore list_documents: {e}")
            return []

    def delete_document(self, source_path: str) -> bool:
        """
        Elimina documento e tutti i suoi chunks

        Rimuove:
        1. Vectors da ChromaDB
        2. Parent chunks da doc_store
        3. File originale da source_docs

        Returns:
            True se successo, False altrimenti
        """
        try:
            # Lazy import per evitare crash se dipendenze non installate
            from langchain_chroma import Chroma

            # 1. Delete vectors from ChromaDB
            vectorstore = Chroma(
                collection_name="split_parents",
                persist_directory=self.chroma_path,
                embedding_function=self._get_embeddings()
            )

            collection = vectorstore._collection
            results = collection.get(
                where={"source": {"$eq": source_path}},
                include=['ids']
            )

            if results['ids']:
                collection.delete(ids=results['ids'])
                print(f"[RAGDocumentManager] Deleted {len(results['ids'])} vectors from ChromaDB")

            # 2. Delete parent chunks from doc_store
            deleted_chunks = 0
            for chunk_id in results['ids']:
                doc_file = os.path.join(self.docstore_path, chunk_id)
                if os.path.exists(doc_file):
                    os.remove(doc_file)
                    deleted_chunks += 1

            print(f"[RAGDocumentManager] Deleted {deleted_chunks} doc_store files")

            # 3. Delete original PDF from source_docs
            if os.path.exists(source_path):
                os.remove(source_path)
                print(f"[RAGDocumentManager] Deleted source: {source_path}")

            return True

        except Exception as e:
            print(f"[RAGDocumentManager] Errore delete_document: {e}")
            return False

    def get_stats(self) -> dict:
        """Statistiche knowledge base del profilo"""
        docs = self.list_documents()

        total_size = sum(d.file_size for d in docs)
        total_chunks = sum(d.chunk_count for d in docs)

        return {
            'document_count': len(docs),
            'total_chunks': total_chunks,
            'total_size_bytes': total_size,
            'total_size_mb': round(total_size / (1024 * 1024), 2)
        }