JL i ~ddlZddlZddlZ ddlZddlmZGddeZdZe dk(reyy#e$rY)wxYw)N)VectorSpaceClusterercZeZdZdZ d dZd dZd dZdZdZdZ d Z d Z d Z y)KMeansClusterera The K-means clusterer starts with k arbitrary chosen means then allocates each vector to the cluster with the closest mean. It then recalculates the means of each cluster as the centroid of the vectors in the cluster. This process repeats until the cluster memberships stabilise. This is a hill-climbing algorithm which may converge to a local maximum. Hence the clustering is often repeated with random initial means and the most commonly occurring output means are chosen. Nc tj|||||_||_||_|rt ||k(sJ||_|dk\sJ|r|dkDrJ||_|r|ntj|_ | |_ y)a :param num_means: the number of means to use (may use fewer) :type num_means: int :param distance: measure of distance between two vectors :type distance: function taking two vectors and returning a float :param repeats: number of randomised clustering trials to use :type repeats: int :param conv_test: maximum variation in mean differences before deemed convergent :type conv_test: number :param initial_means: set of k initial means :type initial_means: sequence of vectors :param normalise: should vectors be normalised to length 1 :type normalise: boolean :param svd_dimensions: number of dimensions to use in reducing vector dimensionsionality with SVD :type svd_dimensions: int :param rng: random number generator (or None) :type rng: Random :param avoid_empty_clusters: include current centroid in computation of next one; avoids undefined behavior when clusters become empty :type avoid_empty_clusters: boolean N) r__init__ _num_means _distance_max_differencelen_means_repeatsrandomRandom_rng_avoid_empty_clusters) self num_meansdistancerepeats conv_test initial_means normalisesvd_dimensionsrngavoid_empty_clusterss Y/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/nltk/cluster/kmeans.pyrzKMeansClusterer.__init__ sH %%dI~F#!( C $6)$CCC# !||!gk22 CFMMO %9"c|jr|jdkDr tdg}t|jD]}|r td||jr|dkDr4|jj t ||j|_|j|||j|jt|dkDr|D]}|jtdx}}tt|D]M}d} tt|D]#} || k7s | |j|||| z } %|| |ksG| ||}}O||_yy)Nrz6Warning: means will be discarded for subsequent trialsz k-means trial)keyr)r rprintrangersamplelistr _cluster_vectorspaceappendr sortsum_sum_distances) rvectorstracemeansstrialmeansmin_difference min_meansidjs rcluster_vectorspacez#KMeansClusterer.cluster_vectorspacePsS ;;4==1, J K4==) 'Eou-;;%!)"ii..tG}dooN  % %gu 5 MM$++ &  ' v;?  $ s # $*. -NY3v;' =s6{+GAAvT00F1IFFG")Q-?016!9IN  =$DK# rc|jt|krd}|st|jDcgc]}g}}|D]'}|j|}||j |)|r t dt t|j||j}|j|j|} | |jkrd}||_ |syyycc}w)NF iterationT) r r r"classify_vectorspacer&r!r$map _centroidr r)r ) rr*r+ convergedmclustersvectorindex new_means differences rr%z$KMeansClusterer._cluster_vectorspaceps ??S\ )I).doo(>?1B??%3F 55f=EUO**623+& !T^^Xt{{!KL "00iH  4 44 $I( -  * @s Ccdx}}tt|jD]/}|j|}|j||}|||ks,||}}1|SN)r"r r r )rr= best_distance best_indexr>meandists rr7z$KMeansClusterer.classify_vectorspacesg&*) 3t{{+, 8E;;u%D>>&$/D$}(<,14M  8 rc\|jrt|jS|jSrB)r r r rs r num_clusterszKMeansClusterer.num_clusterss" ;;t{{# #?? "rc|jS)z0 The means used for clustering. )r rHs rr.zKMeansClusterer.meanss{{rc\d}t||D]\}}||j||z }|S)Ng)zipr )rvectors1vectors2r@uvs rr)zKMeansClusterer._sum_distancess< (+ /DAq $..A. .J /rc~|jr2tj|}|D]}||z } |dt|zz St|s@tjj dtjj dJtj|d}|ddD]}||z } |t|z S)Nrz.Error: no centroid defined for empty cluster. z4Try setting argument 'avoid_empty_clusters' to True r)rcopyr sysstderrwrite)rclusterrEcentroidr=s rr9zKMeansClusterer._centroids  % %yyH! #F" #q3w</0 0w<   !RS   Kuyy,H!!"+ #F" #c'l* *rc8d|j|jfzS)Nz%)r rrHs r__repr__zKMeansClusterer.__repr__s6$++t}}9UUUr)rgư>NFNNF)F) __name__ __module__ __qualname____doc__rr4r%r7rIr.r)r9rYrrrrsN ".:`$@(8 #   +$Vrrcddlm}m}ddgddgddgddgfDcgc]}tj|}}ddgd d gg}|d|| }|j |d d }t d |t d|t d|jt ddgddgddgddgddgddgfDcgc]}tj|}}|d|d}|j |d }t d |t d|t d|jt tjddg}t d|zdt |j|t ycc}wcc}w)Nr)reuclidean_distancer)rT)r+z Clustered:zAs:zMeans: )rz classify(%s): )end) nltk.clusterrr`numpyarrayrVr!r.classify)rr`fr*r. clustererr<r=s rdemorpsuA)*AAAA'GH!u{{1~HGHVaV E#5UKI  $d ;H ,  % (IOO%& G)*AAAAAQRTUPV'WX!u{{1~XGX  #5rBI  $/H ,  % (IOO%& G[[!Q F /F ", )  V $% G7IYs E30E8__main__) rRrrSrk ImportErrornltk.cluster.utilrrrprZr^rrrts^   3gV*gVZ  F zFo  s 4<<