`L iv} dZddlZddlZddlmZddlmZddlZddl Z ddl m Z ddl m Z ddlmZmZmZddlmZmZmZmZdd lmZmZmZdd lmZmZdd lmZdd l m!Z!dd l"m#Z#m$Z$ddl%m&Z&m'Z'm(Z(m)Z)ddl*m+Z+ddl,m-Z-ddl.m/Z/ddl0m1Z1m2Z2m3Z3m4Z4m5Z5ddl6m7Z7dZ8dZ9dZ:dZ;dZd=dgfd?ddgfgd@ZVdAZWdBZXdCZYy)Dz= Several basic tests for hierarchical clustering procedures N)partial)mkdtemp) hierarchy)connected_components)AgglomerativeClusteringFeatureAgglomeration ward_tree)_TREE_BUILDERS_fix_connectivity_hc_cut linkage_tree) average_merge max_mergemst_linkage_core) make_circles make_moons) grid_to_graph)DistanceMetric)adjusted_rand_scorenormalized_mutual_info_score)PAIRED_DISTANCEScosine_distancesmanhattan_distancespairwise_distances)METRICS_DEFAULT_PARAMS)kneighbors_graph) IntFloatDict)assert_almost_equalassert_array_almost_equalassert_array_equalcreate_memmap_backed_dataignore_warnings)LIL_CONTAINERScptjjd}|jd}t j t 5t|ddddt j t 5t|tjddddtj|t|}t|d }t|d t|d d t|t }t|d t|d d y#1swYxYw#1swYxYw) N*)r&sizefoo)linkager, connectivity precomputedaffinityrcosine manhattan)nprandom RandomStatenormalpytestraises ValueErrorr onesrfitrr r)rngXdisress m/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/sklearn/cluster/tests/test_hierarchical.pytest_linkage_miscrB6s ))   #C  A z "'Q&' z "6QRWWV_56q! 1 C s] 3Cs1v|AA!DE q#6 7Cs1v|A DQGH#''66s D :!D, D),D5ctjjd}tjddgt}d|ddddf<|j dd}t |j}tjD]}||j|\}}}}d |jd zd z } t||z| k(sJtjt5||jtjd dddtjt5||jdd|dddy#1swYExYw#1swYxYw) Nr dtyper,2dr-r+)r4r5r6r;boolrandnrshaper valuesTlenr8r9r:) r=maskr>r. tree_builderchildren n_componentsn_leavesparentn_nodess rAtest_structured_linkage_treerYOs5 ))   "C 77B84 (DD1acN "cA $**-L&--/ = 3? CCl4 0,&aggaj.1$8}x'7222]]: & < 2776? ; <]]: & = Ra| < = = = < < = =s(E &E E E c tjjd}|jdd}||dfD]{}t 5t j t5t|jd\}}}}ddddddd|jdzdz }tz|k(r{JtjD]}||dfD]x}t 5t j t5||jd\}}}}ddddddd|jdzdz }tz|k(rxJy#1swYxYw#1swYxYw#1swYVxYw#1swYZxYw)NrrHrIrD) n_clustersrJrK)r4r5r6rMr"r8warns UserWarningr rPrNrQr rO)r=r>this_XrTrXrVrWrSs rAtest_unstructured_linkage_treer_fs ))   "C "cAad)3  Yk* Y6?UW6X3'8V Y Yaggaj.1$8}x'72223'--/7 !A$i 7F " \\+.:FR;7Hgx  !''!*nq(Gx=8+w6 66 77  Y Y Y Y  sHE E=E"E7<E+ E7E EE( +E4 0E77F cxtjjd}tjddgt}|j dd}t |j}tjD]C}||j|\}}}}d|jdzdz }t||z|k(rCJy) NrrDrErHrIr-rJrK) r4r5r6r;rLrMrrNr rOrPrQ) r=rRr>r. linkage_funcrTrXrVrWs rAtest_height_linkage_treerb~s ))   "C 77B84 (D "cA $**-L&--/3 .: CCl/ +'8Vaggaj.1$8}x'7222 3ctjddgddgg}d}tjt|5t |ddddy#1swYyxYw)NrrKz;Cosine affinity cannot be used when X contains zero vectorsmatchr2r0)r4arrayr8r9r:r )r>msgs rAtest_zero_cosine_linkage_treerisQ 1a&1a&!"A GC z -+Q*+++s AAzn_clusters, distance_threshold)N?)rDNcompute_distancesTFr*wardcompleteaveragesinglectjjd}tjddgt}d}|j |d}t |j}t|||||} | j||s|Lt| dsJ| jjd} | dz} | jj| dz fk(sJyt| drJy) NrrDrErIrH)r[r.r*distance_thresholdrk distances_rK) r4r5r6r;rLrMrrNrr<hasattr children_rs) r[rkrrr*r=rR n_samplesr>r. clustering n_childrenrXs rA'test_agglomerative_clustering_distancesrys ))   "C 77B84 (DI )R A $**-L(!-+ JNN1/;z<000))//2 q.$$**w{n<<<:|4444rc lil_containerc tjj|}tjddgt}d}|j |d}t |j}dD]}td||}|j| t} td|| |}|j||j} tjtj| dk(sJ tj| td||}d|_|j|t#t%|j| d d|_|j|tjtj|jdk(sJtd||j)ddddf|}t+j,t.5|j|dddtd|j)d d }t+j,t.5|j|dddt1j2D]} tdtj||f| d }|j|tdd| d } | j|t#t%| j|jd td|d }|j|t5|} td|dd } | j| t7|j| jy#tj wxYw#1swY xYw#1swY4xYw)NrDrErIrHrlr[r.r*)r[r.memoryr*FrKr3rm)r[r.metricr*rnr/)r4r5r6r;rLrMrrNrr<rlabels_r(uniqueshutilrmtreecompute_full_treerrr.toarrayr8r9r:rkeysrr )global_random_seedrzr=rRrvr>r.r*rwtempdirlabelsr~ clustering2X_dists rAtest_agglomerative_clusteringrs ))   2 3C 77B84 (DI )R A $**-L<&, g  q #iG0) J NN1 ''F77299V,-3 33 MM' ", g (- $q89K9KVTVWX"& qwwryy!3!345;;;,&|';';'=crc3B3h'GH ]]: &  NN1   K&T)!))+ J z "q#'') ,)Y!78  q-VZ   ()<)P>P QST  &)L*JNN1  "F)! K OOFz));+>+>?C MM' "&  s% A#M M%.M2 M"%M/ 2M<ctjjd}t|j dd}t ddj |y)zhAgglomerativeClustering must work on mem-mapped dataset. Non-regression test for issue #19875. rrHrI euclideanrpr~r*N)r4r5r6r!rMrr<)r=Xmms rA+test_agglomerative_clustering_memory_mappedrsB ))   "C #CIIb#$6 7C;AEEcJrcctjj|}tjddgt}|j dd}t |j}td|}|j|tjtj|jdk(sJ|j|}|jddk(sJ|j|}tj|djdk(sJt|j||t!j"t$5|j|dddddy#1swYyxYw) NrDrErHrIr&r[r.rKr)r4r5r6r;rLrMrrNrr<r(rr transforminverse_transformrr8r9r:)rr=rRr>r.aggloX_redX_fulls rAtest_ward_agglomerationrs! ))   2 3C 77B84 (D "cA $**-L AL IE IIaL 77299U]]+ , 11 1 OOA E ;;q>Q    $ $U +F 99VAY  $ $ )) )eoof5u= z " !BQ%s E%%E.c>tdd\}}tdd}|j|tt |j |dt dd d \}}tdd}|j|tt |j |dy) Ng?r%)noise random_staterJrp)r[r*rKrjg?)factorrr)rrr<rrrr)moons moon_labelsrwcircles circle_labelss rAtest_single_linkage_clusteringr2s#$R@E;(AxHJNN5$Z%7%7Eq*EPRSG](AxHJNN7$Z%7%7GrccLg}||fD]}t|}|jdz}tj||f}d|tj||f<|j tj ||j|d|dk(jsJy)zUtil for comparison with scipyrKrN) rQmaxr4zerosarangeappenddotrPall)cut1cut2co_clustcutnkecuts rAassess_same_labellingrCsHd|. H GGIMxxA"#RYYq\3 tTVV,- . QK8A; & + + -- -rcc\d\}}}tjj|}tj||f}t j D]}t dD]}d|j||fz}|dtj|ddtjfzz}||jdddtjfz}tj||} | dddd fjtd } t ||| \} } } } | jdt!| | d |zt#|| | }t#|| | }t%|| t'j(t*5t# dz | dddy#1swYyxYw)NrDr&r&皙?r'@rKaxismethodrJFcopyr-z2linkage tree differs from scipy impl for linkage: )r4r5r6r;r rranger7rnewaxismeanrr*astypeintsortr r rr8r9r:)rrprr=r.r*ir>outrurT_rVrcut_s rAtest_sparse_scikit_vs_scipyrOsGAq! ))   2 3C77Aq6?L!&&(-q -Acjjq!fj--A ryy|ArzzM22 2A Q2:: . .A##Ag6CArrE ))#E):I'5g'> ( $Ha1 MMqM ! DwN  !Xx0C1i2D !#t ,- --4 z "21 h1222s F""F+cJd\}}}tjj|}d|j||fz}|dtj|ddtj fzz}||j dddtj fz}tj|d}|dddd fjt}td|\}} } } |jdt||d t||| } t||| } t| | y) Nrrr'rrKrrprrJz8linkage tree differs from scipy impl for single linkage.)r4r5r6r7rrrrr*rrr rr r r) rrv n_featuresr[r=r>rchildren_scipyrTrrVr cut_scipys rA)test_vector_scikit_single_vs_scipy_singlerws(0%Iz: ))   2 3C cjjy*5j66Aryy#ArzzM2 22AQ2:: &&A   Ah /CBQBZ&&s+N-h7:Ha1 MMqMB *h 1C NH=I#y)rcmetric_param_gridctjjd}|jd}t |}|\}}|j }t j|jD]e}tt||}tj|fi|} t|| } t|| } tjj| | gy)zoThe MST-LINKAGE-CORE algorithm must work on mem-mapped dataset. Non-regression test for issue #19875. rK)seed)r,r'N)r4r5r6r7r!r itertoolsproductrOdictzipr get_metricrtesting assert_equal) rr=r>rr~ param_gridrvalskwargsdistance_metricmstmst_mms rA#test_mst_linkage_core_memory_mappedrs ))  Q  'C   A #A &C*FJ ?? D!!:#4#4#67-c$o&(33FEfEq/2!#7 V, -rcc ltjgdgdgdgdgdgdg}tjgd}t|dd}d||jzz}t ||d \}}d D]A}t d|| }|j |tt|j|d Cy) N)rrr)rKrKrK)rJrJrJ)rrrKrKrJrJrF n_neighbors include_selfrjr)rprororm)r[r*r.rK) r4rgrrPr rr<rrr)r> true_labelsr.rUr*rws rAtest_identical_pointsrs )Y 9iSTA((-.K#A15IL,78L!21lK!PL,; ,'   q (););[ I1  rcctjgd}t|dd}td|d}|j |y)N))y&1?gQ?)rgMbX?)rgEԸ?g rh?/$?r;On?rrrrrrr)rg~jt?)rgOn?)rg;On?rDFrr,rmr|)r4rgrrr<)r>r.rms rAtest_connectivity_propagationrsE   A&$Ar>L "< D  HHQKrccd\}}tjj|}tj||f}t dD]}d|j ||fz}|dtj |ddtjfzz}||jdddtjfz}t|}t||}t|d |d y) NrDr&r&rr'rrKrr-r) r4r5r6r;rr7rrrr r ) rrrr=r.rr>out_unstructuredout_structureds rAtest_ward_tree_children_orderrs DAq ))   2 3C77Aq6?L 1XC #**1a&*) ) S299Q<2:: . .. QVVV^ArzzM **$Q<"1<@+A.q0ABCrcc Vd\}}tjj|}tj||f}t dD]}d|j ||fz}|dtj |ddtjfzz}||jdddtjfz}t|d }t||d }|d } |d } t| | |d } |d } t| | d D]Q} t||| dd }t|| dd }|d }|d }|d }|d }t||t||Stjddgddgddgddgddgddgg}tjgdgdgdgdgd g}tjgdgdgdgd!gd"g}tjgdgdgdgd#gd$g}tj|\}}tj||f}t|d }t||d }t|dddd%f|d t|dddd%f|d t|ddd%f|d&t|ddd%f|d&gd'}||g}t||D]~\} }t|d| (}t||| d}t|dddd%f|d t|dddd%f|d t|ddd%f|d&t|ddd%f|d&y))Nrr&rr'rrKrT)return_distance)r.rr)rornrp)r.r*r)r*rgя?g eGgw7@g})J@gZ!E@gn]#g!܄@g,8g!Yz @gRա&rrchildren_unstructuredchildren_structureddist_unstructureddist_structuredr*structured_itemsunstructured_itemsstructured_distunstructured_diststructured_childrenunstructured_childrenlinkage_X_wardlinkage_X_completelinkage_X_averagervrconnectivity_Xout_X_unstructuredout_X_structuredlinkage_optionsX_linkage_truthX_truths rA&test_ward_linkage_tree_return_distancers DAq ))   2 3C77Aq6?L 1X!R #**1a&*) ) S299Q<2:: . .. QVVV^ArzzM **$Q="1q.ABnQT24Fq4IJnQT24DQ4GH7O)+<=OA F)!T7S' NGT  71bqb5>+=a+@A71bqb5>+;A+>? "'!Q$-1CA1FG!'!Q$-1A!1DE Frcc$tjddgddgg}tjddgddgg}tdd|}t|d}t j t 5|j|dddy#1swYyxYw) NrrKTFrJ)n_xn_yrRrmr.r*)r4rgrrr8r\r]r<)xmcws rA test_connectivity_fixing_non_lilr`s 1a&1a&!"A 4-%/0A!+AQ?A k " as +BBctjjd}tj|j ddj tj d}|jt|}t||}t||D]\}}|||k(rJtjdtj ddd }tjdd ddd }t||}t||tjdtj d d t||tjdtj d d y) NrrIrDr'FrrHrErJrjrK)rRn_an_b)r4r5r6rrandintrintprandrQrrrfullrr;r) r=rrOdkeyvalue other_keys other_valuesothers rAtest_int_float_dictr%ms ))   "C 99S[[2[.55bggE5J KD XXc$i FT6"A$' Uv2RWW-cc2J772s#CaC(L \ 2E aRWWS8aQG!UBGG!r.aglc1aglc2s rAtest_connectivity_callabler)~s~ ))   "C QA#Aqu=L # >E #-15Q E IIaL IIaLu}}emm4rccRtjjd}|jdd}t |dd}t |dd}t |}t |}|j ||j |t|j|jy) Nrrr&rFrTr-) r4r5r6rrrr<r r)r=r>r.connectivity_include_selfr'r(s rA"test_connectivity_ignores_diagonalr,s ))   "C QA#Aqu=L 0AD I # >E #1J KE IIaL IIaLu}}emm4rcctjjd}|jdd}t |dd}t d|}|j ||jd}|jjd}||dz k(sJd }|jd d}t |dd}t ||}|j ||jd}|jjd}|||z k(sJy) NrrDrJr&FrrrKe) r4r5r6rMrrr<rNru)r=r>r.agcrvrXr[s rAtest_compute_full_treer1s ))   "C "aA#Aqu=L "Q\ JCGGAJ Imm!!!$G i!m ## #J #qA#Ar>L !Zl SCGGAJ Imm!!!$G i*, ,, ,rcctjjd}|jdd}tjd}t j D]}t|||ddk(rJy)Nrr&r-rK)r4r5r6reyer rOr")r=r>r.ras rAtest_n_componentsr4sp ))   "C AA66!9L&--/S ,|,Q\J1MQRRRRSrccFd}tjjd}|j||}tjgd}t |||tj }Gdd}|}t|||j|jdk(sJy) NrJr)TFFT)rrrR return_asceZdZdZdZy)>test_affinity_passed_to_fix_connectivity..FakeAffinitycd|_y)Nrcounter)selfs rA__init__zGtest_affinity_passed_to_fix_connectivity..FakeAffinity.__init__s DLrccD|xjdz c_|jS)NrKr:)r<argsrs rA incrementzHtest_affinity_passed_to_fix_connectivity..FakeAffinity.increments LLA L<< rcN)__name__ __module__ __qualname__r=r@rcrA FakeAffinityr8s   rcrE)r.r1r) r4r5r6rMrgrndarrayr r@r;)r(r=r>rRr.rEfas rA(test_affinity_passed_to_fix_connectivityrHs D ))   "C $A 88. /D Tt$"**UL   B E ::??rc)rmrnrocLtjj|}tjddgt}d}|j |d}t |j}d}d|fD]}td|||} | j|| j} ttj| j} t|} | ||dd\} }}}}tj||k\dz}|| k(sJt| | | }tj | |rJy) NrDrErIrH)r[rrr.r*T)r.r[rrK)r[rTrV)r4r5r6r;rLrMrrNrr<rrQrr count_nonzeror array_equiv)r*rr=rRrvr>r.rrconnrwclusters_producednum_clusters_producedrSrTrUrVrW distancesnum_clusters_at_thresholdclusters_at_thresholds rA5test_agglomerative_clustering_with_distance_thresholdrRs; ))   2 3C 77B84 (DI )R A $**-L|$H,1  q&.. #BIIj.@.@$A B&g. >J DT4? ;,&)   Y*<< = A "),AAAA ',x(! ~~/1FGGG3Hrccftjj|}d}|jdd|df}t dddj |}t |d d }tj|tjtj|d kDsJ|j|k(sJy) NrDii,rr'rrpr[rrr* minkowskirJr~rr) r4r5r6rrr<r fill_diagonalinfr n_clusters_)rr=rvr>rw all_distancess rAtest_small_distance_thresholdr[s ))   2 3CI D#YN 3A)C c!f'qBM]BFF+ 66-#% && &  ! !Y .. .rccVtjj|}d}|jdd|df}d}t d|dj |}|j }t|d d }tj|tjtj|D]}}||k(} || dd| fjd j} || dd| fjd j} | jdkDr| |ksJ| |k\r}Jy)NrIirDrr'r,rprTrUrJrVrrrK)r4r5r6rrr<rrrWrXrminrsum) rr=rvr>rrrwrDlabelin_cluster_maskmax_in_cluster_distancemin_out_cluster_distances rA.test_cluster_distances_with_distance_thresholdrds: ))   2 3CI C9a. 1A(,> c!f  F1[A6AQ6" > E/ o q/1 2 6 6A 6 > B B D  o q?"22 3 7 7Q 7 ? C C E !    1 $*-?? ??'+==== >rc) thresholdy_truerjrKrg?cpdgdgg}td||}|j|}t||dk(sJy)NrrKrT)r fit_predictr)r*rerfr> clusterery_preds rA?test_agglomerative_clustering_with_distance_threshold_edge_caserk*sI qc A'IwI " "1 %F vv .! 33 3rccdgdgg}tjtd5tddj |dddtjtd5tddj |ddddgdgg}tjtd5tddd j |dddy#1swYxYw#1swY]xYw#1swYyxYw) NrrKzExactly one of re)r[rrrJz!compute_full_tree must be True ifF)r[rrr)r8r9r:rr<)r>s rA&test_dist_threshold_invalid_parametersrm:s qc A z): ;Q4DIMMaPQ z): ;K1CGGJK qc A z)L MU #a&QQKKs#C"C(C'CC$'C0ctjjd}|jdd}t j t d5tddj|dddy#1swYyxYw) Nrr&rz>Distance matrix should be square, got matrix of shape \(5, 3\)rer/rnr) r4r5r6rr8r9r:rr<)r=r>s rA*test_invalid_shape_precomputed_dist_matrixroIsi ))   "C AA O Q }jIMMaP QQQs A33A<ctjgdgdgdgdgdg}t|ddk(sJtjj d}|j dd}t |}td |d }d }tjt| 5|j|dddt|d }tjt| 5|j|dddt|j|jt|j|jy#1swYxYw#1swYVxYw)zCheck that connecting components works when connectivity and affinity are both precomputed and the number of connected components is greater than 1. Non-regression test for #16151. )rrKrKrr)rrrKrr)rrrrr)rrrrrKrrJr&rDr/rn)r~r.r*z.Completing it to avoid stopping the tree earlyreNr)r4rgrr5r6rMrrr8r\r]r<r rru)connectivity_matrixr=r>rclusterer_precomputedrhris rA@test_precomputed_connectivity_metric_with_2_connected_componentsrsUs- ((        3 4Q 71 << < ))   "C !RA  "F3+>  ;C k -*!!&)*((*I k - ay((*?*G*GHy**,A,K,KL** sD<!E<EE)Z__doc__rr functoolsrtempfilernumpyr4r8 scipy.clusterrscipy.sparse.csgraphrsklearn.clusterrrr sklearn.cluster._agglomerativer r r r "sklearn.cluster._hierarchical_fastrrrsklearn.datasetsrr sklearn.feature_extraction.imagersklearn.metricsrsklearn.metrics.clusterrrsklearn.metrics.pairwiserrrr'sklearn.metrics.tests.test_dist_metricsrsklearn.neighborsrsklearn.utils._fast_dictrsklearn.utils._testingrrr r!r"sklearn.utils.fixesr#rBrYr_rbrimark parametrizeryrrrrrrrrrrrrrr%r)r,r1r4rHrRr[rdrkrmrorsrDrcrArsF  #5TT  6:*U K.1/I2=.70 3+9K;TU,tUm<$MN5O=V58.9[@:[@|K*" .#2P*2,.DE-F-$ (>C(sFl L" 5 5-4 S4$CD#HE#HL/">6$CDsQFmcAq6]S1a&MJ 4E 4  Q$Mrc