.h*#|SSKrSSKrSSKrSSKrSSKrSSKrSSKrSSKJr SSK J r SSK J r SSK Jr SSKJrJr SSKJr SSKJr \R,R/\R,R/\R,R1\555r\R,R7\S S 5r\R,R;\5(a\"\S 9 \"S \35 S \4Sjr Sr!Sr"Sr#Sr$S \S\S\4Sjr%Sr&\'S:Xa\RP"SS9r)\)RUSSS9 \)RUSSSS9 \)RUSS SS9 \)RUS!S"SS9 \)RUS#S$S%S&S'9 \)RWS(S)9 \)RY5r-\-R\(a\&"\-R\\-5 g\"S*5 gg)+N) load_dotenv) PyMuPDFLoader)ChromaGoogleGenerativeAIEmbeddings)LocalFileStoreEncoderBackedStore)ParentDocumentRetriever)RecursiveCharacterTextSplitterdataz.env) dotenv_pathz2[INDICIZZATORE] Variabili d'ambiente caricate da: profilec[RR[RR[RR[555n[RR USS5nSnSn[ USSS9n[R"U5nSSS5 W=(d 0RS05RU=(d S /5nUHPnURS 5S :XdMURS05n U RS U5nU RS U5n O U=(d SU4$!,(df  N=f![a N*f=f)zHLoad embedding provider and model from config/config.yaml for a profile.config config.yamlgoogleNrutf-8encodingprofiles service_nameCognitiveServiceembedding_providerembedding_model ospathdirnameabspath__file__joinopenyaml safe_loadget Exception) r project_rootcfg_pathprovidermodelfcfgprofile_servicessvccs scripts/indicizza_documenti.pyload_profile_embedding_configr3s77??277??277??83L#MNLww||L(MBHH E  (C' 2a..#C3I2??:r:>>w}"bQ#Cww~&*<<GGHb)55!5x@/7 $  5 ((3 2    s7 EE&AE>7E6E EE E$#E$c[RR[RR[RR[555n[RR USS5n[ USSS9n[R"U5nSSS5 W=(d 0RS5$!,(df  N(=f![a gf=f)Nrrrrrrr)r)r*r-r.s r2!_load_default_profile_from_configr51swwrwwrwwx7P'QR 77<< h F (C' 2a..#C3 ry))3 2 s*B C C"!C CC C"!C"c2[RR[RR[RR[555nUR =(d' [R "S5=(d [5nUR(a URnO+U(a"[RRSUS5nOSnUR(a URnO+U(a"[RRSUS5nOSn[RRUSU5n[RRUSU5n[R"USS9 [R"USS9 XV4$)zDResolve vectorstore/docstore directories based on args/env/defaults. AGENT_PROFILEagents chroma_db doc_storer Texist_ok) rrr r!r"rgetenvr5 vectorstorer#docstoremakedirs)argsr)r vector_reldoc_relvectorstore_path docstore_paths r2 resolve_pathsrF<s77??277??277??83L#MNLll_bii8_<]<_G %% WW\\(G[A   }}-- '',,x+>ww||L&*EGGLLvw?MKK 40KK -  **cP[UR=(d [R"S5=(d S5upUR=(d( [R"S5=(d U=(d SR 5nUR =(d [R"S5=(d UnUS:Xa$SSKJn U(dS n[S U35 U"US9$SSK J n U(dSn[SU35 U"US9$![an[S US 35 eS nAff=f)z0Instantiate embeddings based on args/config/env.r7aurelioEMBEDDING_PROVIDERrEMBEDDING_MODELhfr)HuggingFaceEmbeddingsz5[INDICIZZATORE] ERRORE: manca HuggingFaceEmbeddings (z&). Installare 'sentence-transformers'.Nz&sentence-transformers/all-MiniLM-L6-v2z.[INDICIZZATORE] Embeddings provider=hf, model=) model_namerzmodels/text-embedding-004z2[INDICIZZATORE] Embeddings provider=google, model=)r,) r3rrr=rlowerrlangchain_community.embeddingsrMr(printlangchain_google_genair)rAprovider_default model_defaultr+r,rMers r2build_embeddingsrVYs'DDLLD{TVT]T]^mTnD{r{&|#''j2995I+JjN^jbjqqsH  QBII.?$@ QME4  LugFG$66G/E B5'JK+%88  I!Lrs t  sD D%D  D%cUR=(d0 [R"S5=(d [5=(d S$)Nr7rI)rrr=r5)rAs r2_profile_from_args_envrXrs) << i299_5 i9Z9\ i`iirG file_pathreturncX^ [RR[RR[RR[555n[RR USSUS5n[R "USS9 [R"5n[US5m [U 4SjS5HnURU5 M S S S 5 UR5S S n[RRU5nUS U3n[RR X85n [RRU 5(d&[R "X5 [#S U 35 U $[#S U 35 U $!,(df  N=f)zSave a copy of the original file under data/agents//source_docs with hash prefix. Returns the path to the saved file (existing or newly copied).r r8 source_docsTr;rbc&>TRS5$)Ni)read)r-sr2%_ensure_source_copy..s!&&"5rGrGN__z([INDICIZZATORE] Copia sorgente salvata: u.[INDICIZZATORE] Copia sorgente già presente: )rrr r!r"r#r@hashlibsha256r$iterupdate hexdigestbasenameexistsshutilcopy2rQ) rrYr) target_dirhchunkdigestbase target_name target_pathr-s @r2_ensure_source_copyrtvs777??277??277??83L#MNLlFHg}UJKK T* A i !5s;E HHUO< [[]3B F 77  I &DHBtf%K'',,z7K 77>>+ & & Y, 8 FG  >{mLM   s 8)F F)cv[U5up#[S[RR U5S35 [ USS5(a[ U5n[X@5n[S5 [U5nUR5n[S5 [U5n[SXrS9n[U5n [U S [R[R 5n [#S S S 9n [#S SS 9n [S5 [%UU U U S9n [S5 U R'USS9 [S5 [S5 g)z_ Funzione principale per indicizzare un documento usando la strategia ParentDocumentRetriever. z'--- Avvio Indicizzazione Avanzata per: z --- save_sourceTz!1/5: Caricamento del documento...z2/5: Setup degli store... split_parents)collection_nameembedding_functionpersist_directorycU$)N)keys r2r`main..ssrGi) chunk_size chunk_overlapi(z(3/5: Creazione del retriever avanzato...)r>r?child_splitterparent_splitterz+4/5: Aggiunta dei documenti al retriever...N)idsz,5/5: Indicizzazione completata con successo!z--- Fine Indicizzazione ---)rFrQrrrigetattrrXrtrloadrVrrr pickledumpsloadsr r add_documents)rYrArDrErloaderdocs embeddingsr>fsstorerr retrievers r2mainrs,'4D&9# 3BGG4D4DY4O3PPT UVt]D))(.';  -. 9 %F ;;=D %&!$'JZ|K  &B r?FLL&,, OE5TWXO3sRTUN 45'%' I 78 Dd+ 89 '(rG__main__z@Indicizza un documento PDF per il RAG (ParentDocumentRetriever).) descriptionz&Percorso del file da indicizzare (PDF))helpz --vectorstorez1Percorso relativo sotto data/ per il vector store)rdefaultz --docstorez3Percorso relativo sotto data/ per il document storez --profilez?Nome profilo agente (usa cartelle in data/agents//...)z--no-save-sourcerv store_falsez+Non salvare copia sorgente (default: salva))destactionrT)rvzCErrore: Fornire il percorso del file da indicizzare come argomento.)/rsysargparserdrkr%rdotenvr$langchain_community.document_loadersrlangchain_chromarrRrlangchain.storagerr langchain.retrieversr langchain.text_splitterr rr r!r"project_root_for_envr#env_pathrjrQstrr3r5rFrVrXrtr__name__ArgumentParserparser add_argument set_defaults parse_argsrArYr|rGr2rs  >#?@8B wwrwwrwwx7P'QR 77<<,ff =77>>(H% >xj IJ)3)*+:92j02)h z  $ $1s tF  *RS .akop  +`jno  *kuyz *}\IJ D)    D ~~ T^^T" STrG