`L ir>UdZddlZddlZddlZddlZddlZddlZddlmZm Z m Z ddl Z ddl Z ddlZddlmZddlmZddlmZmZmZddlmZddlmZdd lmZdd lmZmZm Z dd l!m"Z"m#Z#dd l$m%Z%dd l&m'Z'ddl(m)Z)m*Z*m+Z+m,Z,ddl-m.Z.m/Z/m0Z0m1Z1ddl2m3Z3ddl4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;ddl4mm?Z?ddl@mAZAmBZBmCZCmDZDmEZEmFZFddlGmHZHmIZImJZJmKZKddlLmMZMdZNdZOe)e+dZPe*e,dZQeRZSeReTd<eSjePeSjeQgdZVe jgdgdgdgdgd gd!gd"gd#gd$gd%gd&gd'gd(gd)gd*gd+gd,gd-gd.gd/gd0gd1gd2gZXgd3ZYgd4ZZd5d6gd6d6gd6d5gd7d7gd7d8gd8d7ggZ[gd9Z\d6d6gd8d8gd:d8ggZ]gd;Z^ejZ`e jjd7Zcecje`jjZge`jege`_he`jege`_eejZjecjejjjZgejjegej_hejjegej_eejZlecjeljjZgeljegel_heljegel_eeMdZmejdd\ZoZpemjd?@ZrdAererdBk<emjddCdD@Zte'dEd=dFdGjZve`je`jdHejjejjdHeljeljdHe[e\dHeXeYdHeXeZdHeoepdHeretdHer etdHevetdHe jdIetdHdJ ZxdKZydLZzdMZ{ejjdNeQjejjdOeOdPZdQZdRZejjdSeQjejjdOeOdTZeFejjdSeQjejjdUdVdWe dXfdYdEe dXfdZdWe dXfd[dWedr=rrNr)rNrr:r:r:r@r:rrrCr=rr:r)rNrr:rNr=r@rErNrr@r:rNrNr)r:r:rrNrNr@r:rNrr>r:rNr=r)r=r:rr=rr;rErr:r>r=rr=r:)rGrMrHrIrr:rrrFrKrLrrDr:)rGrMrHrIrr:rrrFrK?r:r@r@)rGrMrHrIrrErrrFrKrLrr@r@)rNrr<r:rLrCrErr:r>r=r:rr@)rNrr:r:r:rCr:rrrCrrrr:)rNr:r:r:rNr@rErNrr@rrNr:r:)r:r:rrr:rDr:rNrr>r:rNr:r:)r=r:rr:rr;r:rr:rCrrr:r)r:r:rrrrr:r:r:r:r:r:rrrr:rrr:rrrr)?rB333333?皙?rEg333333@@g)\(?{Gz?gףp= @rSg?rQrNrJrr@g|?5^?g(\??rrCr@r:rN)r@r@r@r:r:r:r=)r@r:r:rE) random_state n_samples n_features)r<sizerUg?r9)r\r\g?)densityrYXy)r\r=) irisdiabetesdigitstoy clf_small reg_small multilabel sparse-pos sparse-neg sparse-mixzerosc~|j|jk(s,Jdj||j|jt|j|j|dzt|j|j|dz|jt k(}t j|}t|j||j||dzt|j||j||dzt|jj|jj|dzt|j|j|dzt|j|j|dz t|j||j||d z y) Nz({0}: inequal number of node ({1} != {2})z: inequal children_rightz: inequal children_leftz: inequal featuresz: inequal thresholdz: inequal sum(n_node_samples)z: inequal n_node_samplesz: inequal impurityerr_msgz: inequal value) node_countformatr)children_right children_leftrnp logical_notfeature thresholdn_node_samplessumr'impurityr(value)dsmessageexternalinternals b/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/sklearn/tree/tests/test_tree.pyassert_tree_equalrs <<1<< '299 Q\\1<<  '  !**G6P,P '4M*M9,H~~h'H (QYYx0';8I8P8PQU8VW2 1 3;;q>;8I8P8PQU8VWXrc  tjD]\}}|d}|jttt j ttt|jttdj||jttt jttdt|jttdj|y)Nrr sample_weightrrL)rrrrarbruoneslenr)rrrrrfullrs r test_weighted_classification_toyrsoo'X d" 1BGGCFO43;;q>;8I8P8PQU8VW 1BGGCFC$893;;q>;8I8P8PQU8VWXrr% criterionc|dk(rhtjtjtdz}tjt|z}tjt |z}n t}t }||d}|j t|t|jt|||dd}|j t|t|jt|y)Nr7r:rrYrrrY) ruabsminrbarrayrrrarrr)r%ray_trainy_testregrs rtest_regression_toyrsI FF266!9  !((1+/+&*  3CGGAwCKKNF+  CCGGAwCKKNF+rctjd}d|ddddf<d|ddddf<tj|j\}}tj|j |j gj }|j }tjD]\}}|d}|j|||j||dk(sJdj||dd}|j|||j||dk(r~Jdj|y) N)rErEr:r<rrrPrrYr) rurmindicesshapevstackravelrrrrscorerr)rbgridxgridyrarr%rs rtest_xorrs AAbqb"1"fIAab!"fI::agg&LE5 5;;=%++-0133A  Aoo'F d" 1 yyA#%E'8'?'?'EE%2 1 yyA#%E'8'?'?'EE%FrcttjtD]"\\}}}||d}|j t j t jt|jt j t j}|dkDsJdj|||||dd}|j t j t jt|jt j t j}|dkDr Jdj|||y)NrrrWz0Failed with {0}, criterion = {1} and score = {2}rNrrL) rrrCLF_CRITERIONSrrcdatatargetrrrr)rr%rrrs r test_irisr3s#*9??+D t h   rc  tjD]v\}}|ddd}|jtjtj |j tj}ttj|dtjtjjddj|ttj|d|jtjdj|t!|j tjtj"|j%tjddj|yy)Nr:*rrrYrrrorM)rrrrcrr predict_probar(rurzrrrrr)argmaxrr'exppredict_log_proba)rr%r prob_predicts rtest_probabilityrgs oo' dQQR@  4;;'((3 ! FF< # GGDIIOOA& '%,,T2  IIlA & KK "%,,T2    dii ( FF3((3 4 %,,T2   rctjdddtjf}tjd}tj D]!\}}|dd}|j ||#y)Ni'rrrY)ruarangenewaxis REG_TREESrrrarbrr%rs rtest_arrayreprrs` %BJJ'A %Aoo' dT2 1 rcddgddgddgddgddgddgg}gd}tjD]L\}}|d}|j||t|j ||dj | Nt jD]L\}}|d}|j||t|j ||dj | Ny) NrCr@r:rN)r:r:r:r:r:r:rrrro)rrrr)rrrrr')rarbrTreeClassifierr TreeRegressorrs r test_pure_setrs bB8b"X1v1v1v>AA ) 1Vn!, 1 3;;q>16G6N6Nt6TUV )0Wm+ 1 CKKNA7H7O7OPT7UVWrc tjgdgdgdgdgdgdgdg}tjgd}tjd 5tj D]Z\}}|d }|j |||j || |j | ||j | | \ dddy#1swYyxYw) N)gs_c@d a@籛`8`@?c@)g_9a@g 8`@g-V u]@g@Xd@)gSW j_@rrr)g ً`@4Ta@ lKa@{c@)g|@Y@g~G`a@gwI?lKa@g/"c@)g_@rrr)g :^@rrr)rPgAw?gtQ?5??rUg7G?gۺ?gb'?raise)allrr)rurerrstaterrrrs rtest_numerical_stabilityrs  D D D D D D D  A WXA  !#//+ JD$A&C GGAqM GGArN GGQBN GGQBO  s A2CCc tjddddddd\}}tjD]\}}|d}|j |||j }t j|dkD}|jddk(sJd j||dk(rsJd j|td}|j tjtjtdttj }|j tjtjt|j |j y) NrEr=rFrZr[ n_informative n_redundant n_repeatedshufflerYr皙?rrYmax_leaf_nodes)r make_classificationrrrfeature_importances_rurzrrrrrcrrrr))rarbrr%r importances n_importantclf2s rtest_importancesrs)  ' ' DAq oo'@ d" 1 .. ff[3./   #r)I+<+C+CD+II)a?!2!9!9$!??@ !a 0CGGDIIt{{# !qTYY PDHHTYY $s//1J1JKrct}tjt5t |ddddy#1swYyxYw)Nr)rrraises ValueErrorgetattr)rs rtest_importances_raisesrs6 "C z "-+,---s :Ac tjddddddd\}}tddd j||}t d dd j||}t |j |j t|jj|jjt|jj|jjt|jj|jjt|jj|jjy) NirEr=rFrr2r<)rrrYr4) r rrrrr'rr)tree_rwrtrsry)rarbrrs r)test_importances_gini_equal_squared_errorrs  ' ' DAq !6QQ O S S 1 C !QQ  c!Qi00#2J2JKsyy((#))*;*;<syy.. 0G0GHsyy//1I1IJsyy//1I1IJrcVtjD]\}}|d}|jtjtj |j ttjtjjdk(sJ|d}|jtjtj |j ttjtjjdk(sJ|d}|jtjtj |j dk(sJ|d}|jtjtj |j dk(sJ|d}|jtjtj |j dk(sJ|d}|jtjtj |j tdtjjdzk(sJ|d}|jtjtj |j tjjdk(sJ|d}|jtjtj |j tjjdk(rJy) Nsqrt)rr:log2r=rTrLrP) r8rrrcrr max_features_intrurrr)r TreeEstimatorests rtest_max_featuresrs$(07m0  4;;'  C 0B(C$DDDD0  4;;'  C 0B(C$DDDD+  4;;'  A%%%+  4;;'  A%%%.  4;;'  A%%%-  4;;'  Cdiiooa.@(@$AAAA-  4;;'  DIIOOA$6666.  4;;'  DIIOOA$6666?7rcJ tjD]o\}}|}tjt5|j t ddd|jt tgdg}tjt5|j |ddd|}tdd}tjt5|jt |dddtjt }|}|j|tt|jtt|}tjt5|jtddd|jt ttj t}tjt5|j|ddddfdddtj"t j}|}|jtj$t |ttjt5|jt dddtjt5|j't ddd|}|jt ttjt5|j|dddtjt5|j'|ddd|}tjt5|j'tdddrt)d}tjtd5|jgdggd dddtjtd 5|jgdggd dddy#1swYxYw#1swYvxYw#1swY;xYw#1swYxYw#1swYOxYw#1swYxYw#1swYxYw#1swY\xYw#1swY6xYw#1swYuxYw#1swYxYw#1swYyxYw) N)rCr@r:r@r:r7rzy is not positive.*Poissonmatchrr:rN)rrrzSome.*y are negative.*Poisson)r<grN)rrrrr rrarrbrruasfortranarrayr'rrrasarrayrdotapplyr) rrrX2y2XftXtrs r test_errorrsH(06mo ]]> * !   a  ! 1 ] ]]: & "   b ! "o sV ]]: &  GGArN   q !o ACKKNK8o ]]> *  KKN  1 JJqM ]]: & " KK!QR% ! "XXa[]]o q" q! ]]: &  KKN  ]]: &  IIaL o 1 ]]: &  KKO  ]]: &  IIbM o ]]> *  IIaL  k6r ) 4C z)E F(  Y'( z)H I+  \*++s ! !  " "       " "             ((++sP PP%-P2P?+Q "Q:Q&-Q3'R/R *R P P" %P/ 2P< ?Q Q Q# &Q0 3Q= R RR"ctjtjtj j }tj}tdtjD]\}}t|}|d|d}|j|||jj|jjdk7}tj|dkDsJdj!||d |d}|j|||jj|jjdk7}tj|dkDrJdj!|y ) z Test min_samples_split parameterdtypeNrEr)min_samples_splitrrYr@ rr?N)rurrcrr _treeDTYPErrr8keysrrryrtrrr)rarbrrrr node_sampless rtest_min_samples_splitr_s4 $))4::+;+;sL 3&&rzz2A# Q 3A771:LdOM ' bkk!S!6L M %))   1  '))//!''),C))//!$C{{3' #LA$56 vvl#s C88 81(   O V V #..0D0D   % .!( bkk!S!6L M %))    1  '))//!''),C))//!$C{{3' #LA$56 vvl#s C88 8 C00 0(    P V V #..0D0D   % rct|dyr3r>r5s rBtest_min_weight_fraction_leaf_with_min_samples_leaf_on_dense_inputrA"s 8vFrc t|d|yr9r@r:s rCtest_min_weight_fraction_leaf_with_min_samples_leaf_on_sparse_inputrC's 9 l]rctjd|\}}tdtj D]\}}t|}||d}||dd}||dd}||d d} |d f|df|df| d ffD]\} } | j | ks!Jd j | j | | j||t| jjD]M} | jj| tk7s%| jj| } | jj| }| jj| }| jj|}| jj|}||z}| jj| }| jj|}| jj|}||z}||z}||z}| jj| |j dz }|| |z z}|| k\r9Jd j || y) NdrZrYrrrrYrR)rmin_impurity_decreaserYg-C6?rgHz>z)Failed, min_impurity_decrease = {0} > {1}z2Failed with {0} expected min_impurity_decrease={1})r rrr8rrHrrrrangerrqrtrr{weighted_n_node_samplesrsr)global_random_seedrarbrrrest1est2est3est4rexpected_decreasenode imp_parent wtd_n_nodeleft wtd_n_leftimp_left wtd_imp_leftright wtd_n_right imp_right wtd_imp_rightwtd_avg_left_right_impfractional_node_weightactual_decreases rtest_min_impurity_decreaser_1st  ' '#DV WDAq!( inn6F G>!$ NK)TU )VW )ST  4L 4L 6N 3K ' ,  "C" ,,0AA ;BB--/@ A GGAqMcii223 99**40I=!$!3!3D!9J!$!B!B4!HJ992248D!$!B!B4!HJ"yy11$7H#-#8LII44T:E"%))"C"CE"JK # 2 25 9I$/)$;M-:\-I**j8* 99$?!''!*L+'="%;;'O+.??LSS+->?9 , %>rc tjD]C\}}d|vr!tjtj}}n t jt j}}|d}|j |||j||}gd}|Dcic]}|t|j|}}tj|} tj| } t| |jk(sJ| j||} || k(sJdj||D]-}t!t| j|||d|d|/Fy cc}w) z8Test pickling preserves Tree properties and performance. Classifierrr) rrqcapacity n_classesrtrsn_leavesrwrxr{ryrJr|z6Failed to generate same score after pickling with {0}z"Failed to generate same attribute z after pickling with roN)r8rrcrrrdrrrrpickledumpsloadstype __class__rrr)) rrrarbrr attributes attributefitted_attributeserialized_objectrMscore2s r test_pickleroysV(0.m 4 99dkkqA==(//qA+ 1  !Q GQ 9BIwsyy)4 4  #LL-||-.DzS]]***Aq! D K KD Q * I  I. +8 Dv   M.4 sEc ddgddgddgddgddgddgddgddgddgddgddgddgg }ddgddgddgddgddgddgddgddgddgddgddgddgg }ddgddgddgddgg}ddgddgddgddgg}tjD]\}}|d}|j||j|}t |||j dk(sJ|j |}t|dk(sJ|dj dk(sJ|dj d k(sJ|j|} t| dk(sJ| dj dk(sJ| dj d k(rJtjD]L\}} | d} | j||j|}t|||j dk(rLJy) NrCr@r:rNrr=rr9rN)r9r9) rrrrr)rrrrrr') rarbry_truerrry_hatproba log_probarrs rtest_multioutputrvsR R R R A A A Q Q Q B B B A Q Q Q A A A Q Q Q A A A A bAq6B7QG,A1g1vAwA /F!* 1,n!,1 %%a(5&){{f$$$!!!$5zQQx~~'''Qx~~'''))!, 9~"""|!!V+++|!!V+++," )0%m+1 %%a(E6*{{f$$$ %rcdtjD]\}}|d}|jtt|j dk(sJt |jddgtjttjtdzfj}|d}|jt|t|j dk(sJt|jdk(sJt |j ddgt |jddgddggy)NrrrNr@r:rC) rrrrarb n_classes_r)classes_rurrrr)rrr_ys rtest_classes_shaper{s ) 1=n!, 1 ~~"""3<<"a1YY288A;?+ , . .!, 23>>"a'''3<< A%%%3>>Aq623<<2q'B7);<=rctjdd}tjdd}td|}tj D]=\}}|d}|j |||t|j||?y)N}balancedrrr) rcrrr&rrrr'r) unbalanced_X unbalanced_yrrrrs rtest_unbalanced_irisrsy99Tc?L;;t$L)*lCM ) 1En!,  l-HCKK 5|DErcttjtjtj gD]\\}}}|d}tj tj|}tj}t|j||j||tj tjd|}tj}t|j||j||tj tjd|}tj}t|j||j||tjtj|}tj}t|j||j||tD]U}|tj|}tj}t|j||j||WtD]U}|tj|}tj}t|j||j||Wtj tjddd|}tjddd}t|j||j||y)NrrrC)orderrFr=)rr8rrufloat64r'rrcrrr)rrascontiguousarrayr0r/)rrrrrarb csr_containerr7s rtest_memory_layoutrs%(/BJJ 3)(8$}u+ JJtyy . KK3771a=003Q7 JJtyy5 9 KK3771a=003Q7 JJtyy5 9 KK3771a=003Q7  % 8 KK3771a=003Q7, *>>H H%tzz';';H'ErcDtjdddtjf}tjd}d|ddt d}tj j dd}tjt5|j|||dddtjd}tjd}tjt| 5|j|||dddy#1swYlxYw#1swYyxYw) NrErUrrrr:rzgInput should have at least 1 dimension i.e. satisfy `len(x.shape) > 0`, got scalar `array(0.)` instead.r)rurrrrrandomr)rrrrrreescape TypeError)rarbrr expected_errs rtest_sample_weight_invalidris #q"**}%A  A AcrF a 0CIINN3*M z "3 1M23HHQKM99 BL y 53 1M2333333s D ,D DDct|}|d}|jtjtj|dd}|jtjtjt |j |j tjtjtjtjfj}|ddddddddddddgd}|jtj|t |j |j |dd}|jtj|t |j |j tjtjj}|tjdk(xxd zcc<dd dd}|d}|jtjtj|||d}|jtjtjt |j |j |d}|jtjtj|d z||d}|jtjtj|t |j |j y) Nrrr~ class_weightrYg@rPrr:rEgY@rN) rrrcrrr'rrurrrr) rrclf1r iris_multiclf3clf4rrs rtest_class_weightsrst_N q )DHHTYY $ z BDHHTYY $1143L3LMDKKdkkBCEEJ  $ $ $   D HHTYY #1143L3LM z BDHHTYY #1143L3LMGGDKK--.M$++"#s*#u-L q )DHHTYY ]3 |! DDHHTYY $1143L3LM q )DHHTYY ]A%56 |! DDHHTYY ]31143L3LMrc@t|}tjttjtdzfj }|dddgd}d}t jt|5|jt|dddy#1swYyxYw) NrNrLrPr@r:rrzBnumber of elements in class_weight should match number of outputs.r) rrurrbrrrrrrra)rrrzrrps rtest_class_weight_errorsrs}t_N Arxx{Q' ( * *B CC'8&9 JCRG z 1 2s 4BBctjdd\}}d}tjD]:\}}|d|dzj ||}|j |dzk(r:JyNrEr:rFr9)rr)r make_hastie_10_2r8rr get_n_leavesrarbkrrrs rtest_max_leaf_nodesrsp  $ $s CDAq A(0+md1q5AEEaK!QU***+rctjdd\}}d}tjD]4\}}|d|j ||}|j dk(r4Jyr)r rr8rr get_depthrs rtest_max_leaf_nodes_max_depthrse  $ $s CDAq A(0$ma:>>q!D}}!###$rcdD]]}ttjdgdggddgj|}d|jdcxkr dkrPJdJdy)N)rcr|rtrsrxr{rwryrr:rDr=z Array points to arbitrary memory)rrrrflat)attrr|s rtest_arrays_persistrst  K.044qcA3Z!QHNNPTUUZZ]&Q&J(JJ&J(JJ& Krctd}tjd}|jddd}tj D];\}}|d}|j |||jjdk(r;Jy)Nr)rEr\rN)rEr) r1rurmrr8rrrr)rYrarbrrrs rtest_only_constant_featuresrsx%a(L AQ5)A(0(m+ 1 yy""a'''(rcrtjtjgdgtjdf}gd}tj D]\\}}d|vs |dd}|j |||jjdk(sJ|jjd k(r\Jy) N) rrrrrr:rNr9r<rK)r9rJ) rrrr:r:rNrNrNr=r=r= ExtraTreerr:rrNr<) ru transposerrmr8rrrrrqrarbrrrs r,test_behaviour_constant_feature_after_splitsrs  568IJK A *A(0-m d "QQ?C GGAqM99&&!+ ++99''1, ,, -rctjtjdgdgdgdggtjdg}tjgd}tj D]k\}}|dd}|j |||jjdk(sJt|j|tjdd mtj D]k\}}|dd}|j |||jjdk(sJt|j|tjd d my) NrPrU)r9r)rUrPrUrPrr:rrqrL)r9)ruhstackrrmrrrrrr)rrrrrs r(test_with_only_one_non_constant_featuresrs  288cUSEC53%89288I;NOPA %&A(0Gm; 1 yy""a'''3,,Q/1EF G )0?m; 1 yy""a'''3;;q>2774+=> ?rc&tjddjtjj dd}t }t jtd5|j|gddddy#1swYyxYw)Ng\)c=Hr9r@r:r'r)rr:rr:) rurepeatr&rreshaperrrrr)rars rtest_big_inputr sg (A%%bjj199"a@A "C z 3! < !!!s )BBczddlm}tjt5|dddy#1swYyxYw)Nr _realloc_test)sklearn.tree._utilsrrr MemoryErrorrs r test_reallocrs+1 { #s1:cdtjdz}tjj dd}tjj ddd}d|dzz}t d|}tjt5|j||dddd|dz zdz }t d|}tjt5|j||dddy#1swYVxYw#1swYyxYw) NrMPrErNrr:best)splitterr) structcalcsizerurrandnrrrr Exceptionrr)n_bitsrarbhugers rtest_huge_allocationsrs % %F AA !Q#A ! D & FC y ! 1  ! q D & FC { # 1 sC0C<0C9<Dc t|}t|d}t|d}|dvr|jddz}|d|}|d|}ttzt zD]5}||}|d|j ||} |d|j ||} t| j| jdj|| j|} |tvr"| j|} | j|} tt ztzD]t}||tj}t!| j|| |tvs?t!| j| t!| j| v8y) Nrarb)rerdrr<rYr5{0} with dense and sparse format gave different treesr)r8r%rr.r/r0rrrrrrrrrrur'r()r datasetrrrarbrZr,X_sparser}r~y_predy_proba y_log_probasparse_container_test X_sparse_tests rcheck_sparse_inputr/s|dOM#A#A((GGAJ!O jyM jyM*^;nL#A& qI > B B1a H qI > B B8Q O GG GG C J J4 P 1 9 ooa(G--a0K%3n%D~%U  !1("**MM %aii &> Gy )!//-*H'R)'' 6  %r tree_typer)rgrfrerirjrkrlrmc0|dk(rdnd}t|||y)Nrer=r)rrrs rtest_sparse_inputrXs(dIy'95rrdrhct||dy)NrNr)rrs rtest_sparse_input_reg_treesrks y'1-r)rjrkrlrmct|}t|d}||}t|d}|dddj||}|dddj||}t|j|jdj |t |j||j||ddd j||}|ddd j||}t|j|jdj |t |j||j||d|jddz j||}|d|jddz j||}t|j|jdj |t |j||j||dd j||}|dd j||}t|j|jdj |t |j||j|y) Nrarbrr:rN)rYrrrrE)rYrr)rYrr=r) r8r%rrrrrr(rr) rrr7rrarrbr}r~s rtest_sparse_parametersrss9i(M#AQH#A 11BFFq!LA11BFFxQRSA  ?FFyQ aiilAIIaL9 11KOOPQSTUA11KOO! A  ?FFyQ aiilAIIaL9 1x~~a7HA7MNRRSTVWXA1x~~a7HA7MNRR! A  ?FFyQ aiilAIIaL9 1Q7;;AqAA1Q7;;HaHA  ?FFyQ aiilAIIaL9rztree_type, criterioncvt|}t|d}||}t|d}|dd|j||}|dd|j||} t|j| jdj |t | j||j|y)Nrarbrr=rYrrr)r8r%rrrrrr(r) rrr7rrrarrbr}r~s rtest_sparse_criteriarsi(M#AQH#A1YGKKAqQA1YGKKHVWXA  ?FFyQ aiilAIIaL9rzcsc_container,csr_containerct|}d}d}|}tj|}td}g} g} d} | g} t |D]x} |j |d}|j |d|}| j||j dd|fdz }| j|| |z } | j| ztj| jtj} tj| tj} tjtj| tj} || | | f||f}|j}|| | | f||f}|j}|jdd|f}|j}|j d k(j#dkDsJ|j d k(j#dkDsJ|d| j%||}|d| j%||}t'|j(|j(d j+t,||f}t/||D]\}}t1|j(j3||j(j3|t1|j3||j3|t1|j3||j(j3|t1|j(j5|j|j(j5|jt1|j5|j|j5|jt1|j5|j|j(j5|jt1|j7||j7|t,t8vst1|j;||j;|y) Nr=rErrLr]r:rrrUrr)r8rurr1rIbinomial permutationappend concatenater&int32rr'toarrayrcopyrrzrrrrrr rr(r decision_pathrrr)rr7rrrr[rZsamplesrYrroffsetindptri n_nonzero_i indices_idata_irrarX_testrbr}r~XsX1r s rtest_explicit_sparse_zerosrs i(MIJIii "G&a(LG D FXF : "++Is;  ,,W5l{C y!&&q#[N&CaG F+ fnnW%,,RXX6G XXfBHH -F 88BNN4( ;DdGV4Y M " " $FQ 5A"&&(M MMS % % '! ++ +   # % * * ,q 00 0 1 :>>q!DA1 :>>xKA  ?FFtL - B"b/PB!!''--"3QWW]]25FG!!''"+qwwr{;!!''"+qww}}R/@A! GG ! !" % - - /1F1Fr1J1R1R1T  " OOB  ' ' )1??2+>+F+F+H  " OOB  ' ' )177+@+@+D+L+L+N  "!))B-2? 9  %aoob&91??2;N O%Prct|}tjdddfj}tjdddfj d}tj }t jt5|dj||ddd|d}|j||t jt5|j|gdddy#1swYYxYw#1swYyxYw)Nrrr) r8rcrrrrrrrrr)rrraX_2drbrs rcheck_raise_error_on_1d_inputrsdOM !Q$A 99QT? " "7 +D A z "01%))!Q/0 Q 'CGGD! z " QC 00 s>C0C<0C9<DcXt5t|dddy#1swYyxYwN)r+rr5s r test_1d_inputr"s%  ,%d+,,,s )r,cZt|}tjdgdgdgdgdgg}gd}gd}|||}|d}|j||||jj dk(sJ|dd}|j||||jj dk(sJy) Nrr:)rrrrr:)r?r?r?r?r?rrg?)rYr#)r8rurrrr)rr,rrarbrrs r test_min_weight_leaf_split_levelr(sdOM 1#sQC!qc*+AA-M# Q  Q 'CGGAq G. 99  ! ## # Q ECGGAq G. 99  ! ## #rc tjtjjd}t |}|j ttt|jt|jj|yNFr X_smallr&r rrr8ry_smallr)rr)r X_small32rs rtest_public_apply_all_treesr <sXtzz//erctj}tj}|jd}t|}|dd}|j |||j |}|j}|j||jjfk(sJ|j|}t|D cgc] \} } || | f} } } t| tj||jjt k(} ttj"|| tj||j%dj'} |jj(| ksJycc} } w)NrrNrrr:axis)rcrrrr8rrrrrqr enumerater(rurrtrrrzr=r)rrarbrZrrnode_indicator_csrrleavesrjleave_indicator all_leavesrs rtest_decision_pathrWs? A A IdOM Q! 4CGGAqM**1-'//1N   Isyy/C/C#D DD DYYq\F8A&8IJ1~ad+JOJorwwY/GH((I5J ~z*BGG),D """*..0I 99  ) ++ +Ks<E=ct|t}}t|}tjt 5|dj ||dddy#1swYyxYwNrr) X_multilabel y_multilabelr8rrrr)rrrarbrs rtest_no_sparse_y_supportr usQ |4qAdOM y !01%))!Q/000s AA!ctddd}|jdgdgdgdgdgggdgd  t|jjgd t |jj jgd |jdgdgdgdgdgggdtjd t |jjgd t |jj jgd|jdgdgdgdgdgggdt |jjgd t |jj jgdy)aQ Check MAE criterion produces correct results on small toy dataset: ------------------ | X | y | weight | ------------------ | 3 | 3 | 0.1 | | 5 | 3 | 0.3 | | 8 | 4 | 1.0 | | 3 | 6 | 0.6 | | 5 | 7 | 0.3 | ------------------ |sum wt:| 2.3 | ------------------ Because we are dealing with sample weights, we cannot find the median by simply choosing/averaging the centre value(s), instead we consider the median where 50% of the cumulative weight is found (in a y sorted data set) . Therefore with regards to this test data, the cumulative weight is >= 50% when y = 4. Therefore: Median = 4 For all the samples, we can get the total error by summing: Absolute(Median - y) * weight I.e., total error = (Absolute(4 - 3) * 0.1) + (Absolute(4 - 3) * 0.3) + (Absolute(4 - 4) * 1.0) + (Absolute(4 - 6) * 0.6) + (Absolute(4 - 7) * 0.3) = 2.5 Impurity = Total error / total weight = 2.5 / 2.3 = 1.08695652173913 ------------------ From this root node, the next best split is between X values of 3 and 5. Thus, we have left and right child nodes: LEFT RIGHT ------------------ ------------------ | X | y | weight | | X | y | weight | ------------------ ------------------ | 3 | 3 | 0.1 | | 5 | 3 | 0.3 | | 3 | 6 | 0.6 | | 8 | 4 | 1.0 | ------------------ | 5 | 7 | 0.3 | |sum wt:| 0.7 | ------------------ ------------------ |sum wt:| 1.6 | ------------------ Impurity is found in the same way: Left node Median = 6 Total error = (Absolute(6 - 3) * 0.1) + (Absolute(6 - 6) * 0.6) = 0.3 Left Impurity = Total error / total weight = 0.3 / 0.7 = 0.428571428571429 ------------------- Likewise for Right node: Right node Median = 4 Total error = (Absolute(4 - 3) * 0.3) + (Absolute(4 - 4) * 1.0) + (Absolute(4 - 7) * 0.3) = 1.2 Right Impurity = Total error / total weight = 1.2 / 1.6 = 0.75 ------ rr5rN)rYrrr=r<rM)rKrr=r9r=)333333?333333?rrPr#)rarbr)g,d?gܶm۶m?g?)@g@r$)ffffff?rOgUUUUUU?)r9rVr$r`N) rrrrr{r)r|rrur)dt_maes rtest_maer's0T#"21F  JJ 3aS1#s # / FLL))+LMv||))..@ JJ1#sQC!qc*oRWWUVZJXv||,,.CDv||)).. >  JJ1#sQC!qc*oJ>v||,,.CDv||)).. >rcd}tjdtj}d}d}tjtj|fD]}t j D]G\}}|||}||j}|\} \} } }|| k(sJ|| k(sJt|| Itj D]B\}}|||}||j}|\} \} } }|| k(sJ|| k(sJ|| k(rBJy)Nr=rrEcRtjtj|Sr)rergrf)objs r _pickle_copyz)test_criterion_copy.._pickle_copys||FLL-..r) rurintprdeepcopyrr __reduce__r)r) n_outputsrcrZr+ copy_func_typenamecriteriaresult typename_ n_outputs_rx n_samples_s rtest_criterion_copyr8s1I !277+II/ii =+ '--/ 6KAx 95Hx(335F5; 2I/ Jy( (( * ** y* 5  6(--/ +KAx 95Hx(335F5; 2I/ Jy( (( * ** * **  ++rctjjdjdddz}tj|j d}|ddddf}|||}|dddf}t dj||}|j|}ttj|jjtk(d}|j|}tjtj|jj d}t#|dk(sJt#|dk(sJy)NrrErJg*Gr'r@r)rur RandomStater nan_to_numr&rrrsetwhererrtr differenceisfiniterxr) r,rrarbr terminal_regions left_leaf empty_leafinfinite_thresholds r"test_empty_leaf_infinite_thresholdrDs 99  # ) )#r 2T 9D ==Y/ 0D QV A# Q  QU A a 0 4 4Q :D!tzz!}BHHTZZ55BCAFGI%%&67J2;;tzz/C/C#D"DEaH ! "a '' ' z?a  rtree_clscbt|}|d|d}}|dd}|j||}|j}|j}t j t j |dk\sJt j t j |dk\sJt||||yNrarbr\rrGr%cost_complexity_pruning_path ccp_alphas impuritiesrurdiffassert_pruning_creates_subtreerrErarbrinfo pruning_pathrKs r'test_prune_tree_classifier_are_subtreesrQs wG 3<qA "1 5C  + +Aq 1D??LJ 66"'','1, -- - 66"''*%* ++ +"8Q<@rcbt|}|d|d}}|dd}|j||}|j}|j}t j t j |dk\sJt j t j |dk\sJt||||yrGrHrNs r'test_prune_tree_regression_are_subtreesrS$swG 3<qA "1 5C  + +Aq 1D??LJ 66"'','1, -- - 66"''*%* ++ +"8Q<@rctd}|jdgdggddgtdd}|jdgdggddgt|j|jy)Nrrr:rE)rY ccp_alpha)rrassert_is_subtreer)rrs rtest_prune_single_node_treerW5s` !q 1DHHqcA3Z!Q  "qB ?DHHqcA3Z!Q djj$**-rcg}|D].}|d|dj||}|j|0t|D]%\}}t|j|j'y)Nr\r)rrUrY)rrrrVr) estimator_clsrarbrP estimatorsrUrprev_estnext_ests rrMrMAsrJ! 2QRSWW q  # 'z2:((..(..9:rc>|j|jk\sJ|j|jk\sJ|j}|j}|j}|j}dg}|r1|j \}}t |j ||j |t|j||j|t|j||j|t|j||j|||||k(rtt|j|nXt|j||j||j||||f|j||||f|r0yy)N)rr)rqrrtrspopr(r|r'r{ryrJr rxr) r subtree tree_c_left tree_c_rightsubtree_c_leftsubtree_c_rightstack tree_node_idxsubtree_node_idxs rrVrVPs ??g00 00 0 >>W.. .. .$$K&&L**N,,O HE */))+' '! JJ} %w}}5E'F   MM- ('*:*:;K*L      .0F0FGW0X    ( ( 7  + +,< = * +?O/P P 0A0ABR0S T }-w/@/@AQ/R  LL+m4nEU6VW X LLm,o>N.OP 3 rrrrc8td}|djtjjd}| t |}n||d}t j|jtjj|_t |j|j|jf\|_|_ |_ t t jttjj}t||}|j||t|j||j|t|j!|j#|j!|j#y)NrgraFrr)r)r%r&r rrr*rurrrrr r8rr)rrtodense)rrr,rr X_readonly y_readonlyrs r"test_apply_path_readonly_all_treesrkxs7{#Gcl!!$**"2"2!?G.w7 %gcl3 ((:??$**:J:JK  & __j00*2C2C D  O     +288G4::CSCS+TUJ D/8 ,CGGJ #s{{:. G0DE *%--/1B1B71K1S1S1Ur)r4r6r7ctjtj}}||}|j||t j |j |tjt j |k(sJy)Nr) rdrrrrurzrrr)rr%rarbrs rtest_balance_propertyrms\ ==(//qA  #CGGAqM 66#++a. !V]]266!9%= == =rseedc ddgddgddgddgddgddgddgddgg}gd}td|}|j||tj|j |dk(sJtd|}|j||tj |j |dkDsJd }t j|dzdzd d ||dzdz| \}}d|d |k|dkz<tj|}td|}|j||tj |j |dkDsJy)Nrr:rNr=)rrrrr:rNr=r9r4rr7rEr"r)effective_rank tail_strengthrZr[rrYr@) rrruaminrrr make_regressionr)rnrarbrr[s rtest_poisson_zero_nodesrtsN Q!Q!Q!Q!Q!Q!Q!QHA A / MCGGAqM 773;;q> "a '' ' )$ GCGGAqM 66#++a.1$ %% %J  # #!A~* 1n)  DAqArAv!a% q A )$ GCGGAqM 66#++a.1$ %% %rc 0tjjd}d\}}}tj||z||}|j dd|tj |dz }|jtj||z }t|||| \}}} } td d | } tdd | } | j|| | j|| tdj|| } || df|| dffD]\}}}t|| j|}t|tj| j|dd}t|| j|}|dk(r |d|zksJ|d|zkrJy)Nr)rvrErZr[rYrCrN)lowhighr^rr)lam) test_sizerYr7rE)rrrYr4mean)strategytraintestgV瞯.>q.AB *1bggh6F6Fq6I5RV.WX ,Q a0@A  &=j 00 00D<//// 0rrccNd\}}tj||||dd\}}|ddj||}|ddj||}t|j|j|d t |j ||j |y ) z3Test that criterion=entropy gives same as log_loss.)rr<rr)rcrZr[rrrYr3+rentropyz> with criterion 'entropy' and 'log_loss' gave different trees.N)r rrrrrr)r%rcrZr[rarb tree_log_loss tree_entropys r'test_criterion_entropy_same_as_log_lossrs"Iz  ' '   DAq:B?CCAqIM)"=AA!QGL (PQ M))!,l.B.B1.EFrc6tjd\}}tddj||j ||}dfd}t j |}|j ||}tj||sJy)Nrrr=rc|jj|jjj Sr)byteswapviewr newbyteorderr.)arrs rreduce_ndarrayz8test_different_endianness_pickle..reduce_ndarrays/||~""399#9#9#;<GGIIrctj}tj|}tj j |_|j tj<|j|jd|SNr) ioBytesIOrePicklercopyregdispatch_tablerrundarraydumpseek)fprrs r get_pickle_non_native_endiannesszJtest_different_endianness_pickle..get_pickle_non_native_endiannesssb JJL NN1 "11668'5$ s  q r) r rrrrreloadruisclose)rarbrrnew_clf new_scorerrs @@r test_different_endianness_picklers  ' 'Q 7DAq a1 =CGGAqM IIaOEJkk:<=G a#I ::eY '' 'rcNtjd\}}tddj||j ||}Gddt fd}t j|}|j ||}tj||sJy)Nrrr=rceZdZfdZxZS)Ptest_different_endianness_joblib_pickle..NonNativeEndiannessNumpyPicklerct|tjr7|jj |j j }t|!|yr) isinstancerurrrrrsupersave)selfr*ris rrzUtest_different_endianness_joblib_pickle..NonNativeEndiannessNumpyPickler.save(s@#rzz*lln))#))*@*@*BC GL r)__name__ __module__ __qualname__r __classcell__)ris@rNonNativeEndiannessNumpyPicklerr's   rrctj}|}|j|jd|Sr)rrrr)rrrrs r'get_joblib_pickle_non_native_endiannesszXtest_different_endianness_joblib_pickle..get_joblib_pickle_non_native_endianness-s3 JJL +A . s  q r) r rrrrrjoblibrrur)rarbrrrrrrs @@r'test_different_endianness_joblib_pickler s  ' 'Q 7DAq a1 =CGGAqM IIaOE, kkACDG a#I ::eY '' 'rctrtjntj}gd}|jj j Dcic] \}\}}|| }}}}|D]}|||< tjt|jt|jd}|j|dScc}}}w)N) left_child right_childrwrynamesformats same_kindcasting) r-ruint64rrfieldsrlistrvaluesr&) node_ndarraynew_dtype_for_indexing_fieldsindexing_field_namesrrr1new_dtype_dict new_dtypes r"get_different_bitness_node_ndarrayr:s09BHHrxx!V-9,>,>,E,E,K,K,M(juae N%=<t=~**,-$~?T?T?V:WXI   y+  >>sC c|jjjDcic] \}\}}|| }}}}|jjjDcgc]\}}| }}}|Dcgc]}d|z }}t jt |j t |j|d}|j|dScc}}}wcc}}wcc}w)NrM)rroffsetsrr)rrrrrurrr&) rrrr1rrrshifted_offsetsrs r$get_different_alignment_node_ndarrayrLs,8,>,>,E,E,K,K,M(juae N,8+=+=+D+D+K+K+MN-%vNGN078fq6z8O8.--/0N1134& I   y+  >>O8sC$ C&7 C,ctrtjntj}|j\}\}}}}|j |d}|j }t|d|d<||||f|fS)Nrrnodes)r-rurrr.r&rr) r rrEr[rcr/state new_n_classes new_states r"reduce_tree_with_different_bitnessr]sw%288I:I$//:K7H0z9i%$$Y $DM I;Ig.pickle_dump_with_different_bitnessos^ JJL NN1 "11668'I$ s  q r) r rrrrrerrr)rarbrrrrrs @rtest_different_bitness_picklerhs  ' 'Q 7DAq a1 =CGGAqM IIaOEkk<>?G a#I FMM), ,, ,rc0tjd\}}tddj||j ||}fd}t j |}|j ||}|tj|k(sJy)Nrrr=rctj}t|}tjj |_t |jt<|j|jd|Sr) rrrrrrrrrrrs r"joblib_dump_with_different_bitnesszPtest_different_bitness_joblib_pickle..joblib_dump_with_different_bitnesssY JJL O"11668'I$ s  q r) r rrrrrrrr)rarbrrrrrs @r$test_different_bitness_joblib_pickler~s  ' 'Q 7DAq a1 =CGGAqM IIaOEkk<>?G a#I FMM), ,, ,rcLtr#tjtjn"tjtj}tjtjtjtjg}||Dcgc]}|j c}z }tj ddg|}|D]}t|j||tjtd5tj ddgg|}t||dddtjtd5|jtj}t||dddycc}w#1swY^xYw#1swYyxYw)Nrr:rzWrong dimensions.+n_classesrzn_classes.+incompatible dtype) r-rurrrrrr"r&rrrr)expected_dtypeallowed_dtypesdtrcwrong_dim_n_classeswrong_dtype_n_classess rtest_check_n_classesrs;+4RXXbhh'"((288:LNhhrxx("((288*<=N>BRr(BBN!Q~6I?))"-~>? z)F G> hhAx~F,n=> z)H I@ ) 0 0 <.?@@C >>@@sF  'F,FFF#ctjtj}d}tj||}||j g}|D]}t |||t jtd5t ||dddd|ddddddftj|fD]>}t jtd5t |||jddd@t jtd 5t |jtj||dddy#1swYxYw#1swYxYw#1swYyxYw) N)r<r:rNr)rexpected_shapezWrong shape.+value arrayr)r:rNr:zvalue array.+C-contiguouszvalue array.+incompatible dtype) rurrrmrr$rrrrrr&r')rr value_ndarrayrrproblematic_arrs rtest_check_value_ndarrayrsJXXbjj)NNHH^>BM$n&A&A&CDN  "^  z)C D  .  *!Q(3R5F5F}5UV ]]:-H I  -.44    z)J K    ,))         s$?E E,E" EE "E+ct}tjd|}|t|t |g}||Dcgc]+}|j |j j-c}z }|D]}t||tjtd5tjd|}t||dddtjtd5|ddd}t||ddd|j jjDcic] \}\}}|| }}}}|j} tj| d <tj t!| j#t!| j%d } |j | }tjtd 5t||ddd|j} tj&| d <tj t!| j#t!| j%d } |j | }tjtd 5t||dddycc}w#1swYxYw#1swYxYwcc}}}w#1swYxYw#1swYyxYw) N)r<r)rzWrong dimensions.+node arrayr)r<rNznode array.+C-contiguousrNrxrznode array.+incompatible dtyper)rrurmrrr&rrr#rrrrrrrrrrr) rrvalid_node_ndarraysrproblematic_node_ndarrayrrr1 dtype_dictrrs rtest_check_node_ndarrayrsN88D7L *<8,\: 8K14 399))+,#ILHI z)G HU#%88F.#I 4^TU z)C DU#/!#4 4^TU7C6H6H6O6O6U6U6WXX"2$ $+XJX __&N"$((N;~**,-$~?T?T?V:WXI ,229= z)I JU4^TU __&N#%::N< ~**,-$~?T?T?V:WXI ,229= z)I JU4^TUUMUUUUYUUUUs;0J%J&J),J6J=K J&)J3=K KSplittercdtjjd}d}dtjddgtj}}t d||}|||dd|d }t j|}t j|}|j|k(sJt||sJy ) z&Check that splitters are serializable.rrErNr=rr2r<rLN) monotonic_cst) rurr:rr,rrerfrgrr) rr(rr/rcrrsplitter_serialize splitter_backs rtest_splitter_serializabler s ))   #CLbhh1vRWW=yIV$Y :I <CDQHh/LL!34M  % % 55 5 mX .. .rc t|jd}td}|jtt t j||t j|d}t|j|jdy)zhCheck that Trees can be deserialized with read only buffers. Non-regression test for gh-25584. z clf.joblibrrr) mmap_modez?The trees of the original and loaded classifiers are not equal.N) strjoinrrrr rrrrr)tmpdir pickle_pathr loaded_clfs r/test_tree_deserialization_from_read_only_bufferr sh fkk,/0K a 0CGGGW KK[!8J Irc6tjddgddgg}tjddg}|dj|||d}d}tjt |5|j||dddy#1swYyxYw)zhCheck that an error is raised when min_sample_split=1. non-regression test for issue gh-25481. rr:rP)rzb'min_samples_split' .* must be an int in the range \[2, inf\) or a float in the range \(0.0, 1.0\]rN)rurrrrr)r%rarbr msgs rtest_min_sample_split_1_errorr% s 1a&1a&!"A !QA 3##Aq) ! $D 0 z -As 2BBctjgdgj}tjgd}tdd|}|j |||j tj gg}t|tj|ddg|dd}|dd}tdd|}|j |||j tj gg}t|tj|d dgy) z=Check missing values goes to correct node during predictions. rr:rNr=rMrrJ r rr?r#r?r%r%rOg?g@rr:rr>Nr@r;) rurrrrrnanrr|)rrarbdtcrX_equaly_equals r;test_missing_values_best_splitter_on_equal_nodes_no_missingr; s 01244A >?A R1 RCGGAqM[[266( $FFRWWQrsV_-.fGfG R1 RCGGGW[[266( $FFRWWWRS\234rcptjgdgj}tjgd}t|d|}|j |||j j d}|j jd}|j j|}|j j|}||kD} |j j|d} |j j|d} |jtjgg} | r t| | yt| | y)zCheck missing values go to the correct node during predictions for ExtraTree. Since ETC use random splits, we use different seeds to verify that the left/right node is chosen correctly when the splits occur. r r r:rrN) rurrrrrrtrsrJr|rr r) rrnrarbetrrr left_samples right_samples went_left y_pred_left y_pred_rightrs r=test_missing_values_random_splitter_on_equal_nodes_no_missingrU s 01244A >?A $!y QCGGAqM((+J))**1-K9944Z@LII55kBM},I))//*-a0K99??;/2L[[266( $F V, f-rrr2cd}tjtjgdzgdzgj}tj|gdzdgdzzdgdzz}t dd|}|j ||tjtjdd ggj}|j |}t||ddgy ) zITest when missing values are uniquely present in a class among 3 classes.rr9)rr:rNr=rMrrJr r:rNrrr=r Nrurr rrrrr))rmissing_values_classrarbrr y_nan_preds r/test_missing_values_best_splitter_three_classesrx s 266(Q,!;;<=??A &'!+qcAg5a?@A bA SCGGAqM XX2' ( * *FV$Jz$8!Q#?@rctjtjgdzgdzgj}tjdgdzdgdzz}t dd|}|j ||tjtjd tjggj}|j |}t|gd y ) zMissing values spanning only one class at fit-time must make missing values at predict-time be classified has belonging to this class.r9rr:rNr=r9r<rr:rKrrNrr<)rr:rNrrrarbrrrs r)test_missing_values_best_splitter_to_leftr" s 266(Q,!334577A !qA37"#A bA SCGGAqM XX266*+ , . .F [[ Fvy)rctjtjgdzgdzgj}tjdgdzdgdzzdgdzz}t dd|}|j ||tjtjdd ggj}|j |}t|gd y ) zMissing values and non-missing values sharing one class at fit-time must make missing values at predict-time be classified has belonging to this class.r9r r:rrNrrrQg333333@rNrr!s r*test_missing_values_best_splitter_to_rightr$ s 266(Q,!334577A !qA37"aS1W,-A bA SCGGAqM XXS)* + - -F [[ Fvy)rc tjddddtjddddtjg gj}tjd gdzdgdzz}t d d| }|j ||tjtjd d ggj}|j |}t|gdy)zNCheck behavior of missing value when there is one missing value in each class.r:rNr=r<rEr\rXrrrrgffffff@gA@rNrr!s r>test_missing_values_best_splitter_missing_both_classes_has_nanr& s 1aArvvr2r2rvv>?@BBA !qA37"#A bA SCGGAqM XXT*+ , . .F [[ Fvy)rr rc jtjddddtjddddtjg gj}tjd gdzdgdzz}|||}t j t d 5|j||d d d y #1swYy xYw) z4Check unsupported configurations for missing values.r:rNr=r<rEr\rXrrNzInput X contains NaNr)rurr rrrrr)r,r rarbs rtest_missing_value_errorsr( s 1aArvvr2r2rvv>?@BBA !qA37"#A# Q  z)? @As B))B2cDtjjtj}}tj |ddddf<tj |ddddf<|dd}|j |||j|}|d k\jsJy) z5Smoke test for poisson regression and missing values.Nr<rrKr@r7rrrU) rdrrrrur rrr)r%rarbrrs rtest_missing_values_poissonr* s ==   qAAcc1fIAcc2gJ  4CGGAqM [[^F cM    rcDtj|i|\}}|dkD}||fS)N)r make_friedman1)argskwargsrarbs rmake_friedman1_classificationr0 s-  " "D 3F 3DAq BA a4Krzmake_data, Tree, tolerancegQ?gQ?gQ?sample_weight_trainrc~d\}}|||d|\}}|j} tjj|} tj| | j ddg|j ddg<t| || \} } } }|d k(r#tj| j d }nd }d }|||}|j| | ||j| |}tt|||}|j| | |j| |}||z|kDsJd|d|d|y )zFCheck that trees can deal with missing values have decent performance.)rrErP)rZr[noiserYFTrWrr^rrrrNrErrzscore_native_tree=z + z! should be strictly greater than ) rrurr:r choicerrrrrrr ) make_datar%r1rK tolerancerZr[rarb X_missingr(X_missing_trainX_missing_testrrrr native_treescore_native_treetree_with_imputerscore_tree_with_imputers r!test_missing_values_is_resiliencer? sh(&Iz '  DAqI ))   2 3CGIvvIcjj%QWWc jCD7G1#584O^Wff$ 5 5a 89  I9KLKOOOWMOJ#)).&A% @RS/73/55nfM y (+B B   c)- # $ & BrzTree, expected_scoreg333333?g(\?cHtjjd}d}|j|df}tjtj |dztj |dzg}|jddg|dd g }|jjt}||||<|j|} tj| |<| |d d d f<|| } t| ||d j} | |k\s Jd|d| y )z@Check the tree learns when only the missing value is predictive.rrvr\r]rNFTgffffff?rRr4Nr<r)cvzExpected CV score: z but got )rurr:standard_normalrrmrr5rr&boolr rr|) r%expected_scorerKr(rZrarb X_random_masky_mask X_predictiver tree_cv_scores r test_missing_value_is_predictiverI s ))   "CI )R1A a0"'')q.2IJKAJJt}9t JMM VVX__T "F#M22F=&&I&6L66LAadG / 0D$D!Q15:: s ))   "C$Iz yZc RDAq@BvvAcjj%QWWc j;<GGAGGAJ'MM#A#Q'LQ7Q/Qqt!tQwZ14a41(003\5I5I!5LMrcFtdjtjtj}tdjtjtj}t j |}t j |}||k(sJyr)rrrcrrrerf)tree1tree2pickle1pickle2s rtest_deterministic_picklerS[ sl # 2 6 6tyy$++ NE " 2 6 6tyy$++ NEll5!Gll5!G g  rrar<rKc|jdd}tjd}||dj||}t |j|jdd|}|j j }t|dk\sJ|jt|j j dd|j j ddtj|j jdk(|j jdk(z}t|j j |dy) a'Check that we properly handle missing values in regression trees using a toy dataset. The regression targeted by this test was that we were not reinitializing the criterion when it comes to the number of missing values. Therefore, the value of the critetion (i.e. MSE) was completely wrong. This test check that the MSE is null when there is a single sample in the leaf. Non-regression test for: https://github.com/scikit-learn/scikit-learn/issues/28254 https://github.com/scikit-learn/scikit-learn/issues/28316 r@r:rKrrNrNrU) rrurrrrr{rrr flatnonzerortry)r%rarrbr tree_refr{ leaves_idxs r'test_regression_tree_missing_values_toyrXh s6 "aA ! A )! 4 8 8A >DT{qyyQ/3Hzz""H x1} -x||~- DJJ''+X^^-D-DRa-HI  ! !R 'DJJ,E,E,JKJDJJ'' 3S9rctjj|}d}tj|tjj dd}tj |ddddf<|j|tj|}t|dj||}|jj}t|dk\sJ|y) NrErr@r:ir<rr) rurr:rrrr rrrrr{r)rKr(rZrarbr r{s r-test_regression_extra_tree_missing_values_toyrZ s ))   2 3CI )2::.66r1=AAcdAgJKKN )A += K O OPQST UDzz""H x1} 'x' rc>tjd\}}tjj d}|j }|j tjdtj|dddgfdz jt}tj||<t||d \}}}}tjgd tj }tddd} | j||||t!| j"j$dk\sJtj&| j"j(dk(| j"j*dk(z} t-| j"j$| dy)aCheck that we properly handle missing values in classification trees using a toy dataset. The test is more involved because we use a case where we detected a regression in a random forest. We therefore define the seed and bootstrap indices to detect one of the non-frequent regression. Here, we check that the impurity is null or positive in the leaves. Non-regression test for: https://github.com/scikit-learn/scikit-learn/issues/28254 T) return_X_yr)r:r9)rrNrNrM)nr r)prNQ'a[&.er^YRrErEr_IJ3/kKnr\rh9rrrO#MZrnrdr^^rbrM]r}rlryr r^rmmrsrEr|rtrj\4r\r~rMrMrjrxr r rrrrXreNr rirrrlr frr^rer:rirJ rrrzjr{r8rxrq>Ur_r`Prk?rKrTr=r=Lrrr=riHnrrr@r:rU)r load_irisrurr:rrrrr&rCr rrrrrrr{rUrtryr) rarbr(r8maskrr1rrr rWs r+test_classification_tree_missing_values_toyr sW    .DAq ))   #CI << ''bhh /1QV9q=   fTl ffIdO-iLGQhhXXG "&z D DHHWW ww/0 tzz""a' (( (  ! !R 'DJJ,E,E,JKJDJJ'' 3S9rctdd}|jtjtjt j |j}t|j||j}t j|jjt j}d|d<t||j||jjdk(sJ|jdk(sJt!j"t$5t'|jj(|j(dddt'|jj(d|j(dt|j||j}t j|jjt j}d|ddt||j||jjdk(sJ|jdk(sJ|jt'|jj(|j(y#1swYxYw)zHTest pruning a tree with the Python caller of the Cythonized prune tree.rr:rrr=N)rrrcrrru atleast_1drxrn_features_in_r6rmrrquint8r!rrAssertionErrorr)r|r rc pruned_treeleave_in_subtrees rtest_build_pruned_tree_pyr s !qA >D DHHTYY $ doo.IT00)T__MKxx 5 5RXXFQ+tzz3CD :: A %% %  ! !Q && & ~ &@4::++[->->?@tzz''*K,=,=a,@AT00)T__MKxx 5 5RXXFQR+tzz3CD :: A %% %  ! !Q &> (>(>> &tzz''):):;@@s +II ctdd}|jtjtjt j |j}t|j||j}t j|jjt j}d|d<tj t"d5t%||j|dddy#1swYyxYw)z8Test pruning a tree does not result in an infinite loop.rr:rrz,Node has reached a leaf in the original treerN)rrrcrrrurrxrrr6rmrrqrrrrr!rs r$test_build_pruned_tree_infinite_loopr s "qA >D DHHTYY $ doo.IT00)T__MKxx 5 5RXXFQ H I k4::7GHIIIs C77DcZtjjd}|jdddj tj }tj |gdz}tjdtj}t||dgd }t||y ) zNon-regression test for gh-30554. Using log2 and log in sort correctly sorts feature_values, but the tie breaking is different which can results in placing samples in a different order. rrrUg$@rE)locscaler^r<rr)2r(rXr\rEr`1r-rrxr<rJre)r:r rNrrr|r^rr=!rK$rdrkrvr9r,r",rjror%rqrMrc0rN) rur default_rngnormalr&r'rrr,rr))r(somefeature_valuesrexpected_sampless rtest_sort_log2_buildr s ))   #C ::#T: 3 : :2:: FD^^TFQJ/Nii"''*G ^Wb) w 01rr)__doc__rrrrerr itertoolsrrrrnumpyrurjoblib.numpy_pickler numpy.testingrsklearnrr r sklearn.dummyr sklearn.exceptionsr sklearn.imputer sklearn.metricsrrrsklearn.model_selectionrrsklearn.pipelinersklearn.random_projectionr sklearn.treerrrrsklearn.tree._classesrrrrsklearn.tree._partitionerrsklearn.tree._treerrr r!r"r#r$r%r sklearn.utilsr&sklearn.utils._testingr'r(r)r*r+r,sklearn.utils.fixesr-r.r/r0sklearn.utils.validationr1rREG_CRITERIONSrrdictr8__annotations__update SPARSE_TREESrrr  y_small_regrarbrrrrcrr:r(rrr^permr load_diabetesrd load_digitsrerYmake_multilabel_classificationrrr X_sparse_posry_randomr X_sparse_mixrmr%rrrmark parametrizerrrrrrrrrrrrrrrrrr!r1r6r;r>rArCr_rorvr{rrrrrrrrrrrrrrrrrsortedr< intersectionrrrrziprrrrr r rrr r'r8rDrrQrSrWrMrVrkrmrIrtrrrrrrrrrrrrrrrrrrr"r$r&r(r*r0r-r?rIrsrrMrSr rXrZrrrr)r s0rrs  ..  ,)))(-(UUE*;  /2/ 8%O5.  3,  & 4    "((56<94:@>>@74545?@A84544/ 8 P 6 "XBx"bAq6Aq6Aq6:"X1v1v xiiA t{{''( IIdO kk$  "8 ! ! # x++, d# //$'     v}}))*kk$  d# !!$ DXDDbR l ###1 $' \S !   15  1$RTJRRT ))$++ .mm(//:KKfmm4 W-[1$<8$84% H5$84288G$8 4 $N X X!1!1!34n5,65,*F* $y'89n56:y'89, "0"5 2126 /4 B-r2 : 4 W 2L>-KFB8 v+1,1..9W:/W &*: z+G,G..9:/EP0f9%x=( E*8Z1h30+,N,,N^+ , +$K$( -?"!*&Rl3   6 46 fS->-K-KI-V&WXZ$=>.?Y. l3$WX.90::Y40:f<E4493D$E~ VW ,D$$)2CDnU$WX.9::Y:"l3!3~~#FHP4HPV  +,,, ++dVn-DE$F,$$+G,G..9G:/G?+,,,:+.90:,0a?H+8+dVn-DE F $ vc(--/*k:-FFG&<>Q%RS AT AHMMO4&;=O%PQ AR5 A . :%P+fh%78+dVn-D~-UVW9,4&RS!1!1!34 >5T >q*&+&B'0T"8:M!NOq!f-G.PG,(2(4?$?"I-,-6@$ B1Uh,o,,.0G0@0G0G0IJ / /&!1!1!345*&GH5I52q*&GH.I+.By&&9: A; Ay&&9: *; *y&&9: *; * y&&9: *; *+dVn-DE (89%56 F !1!1!34 !5 !    "7;  "4d; &(>E &(;TB .v?'@ 'Z/Y5E5E5G$PT1VWX:  ! !#89  % %'=>NN, "79K!LM "&&!RVVQ1-."&&"&&!Q1-.!Q1bffbff-.!Q2661bff-.  &GH:I N:B (,:^<>I$2s*FDs ~!~8 ~~