L iqF :ddlZddlZddlmZddlmZmZddlmZddl m Z m Z ddl m Z ddlmZddlmZmZmZddlZdd lmZgd ZeeZGd d ee Ze d GddZededeededfdZdededdfdZde eddfdZ!de eddfdZ"dedede efdZ#dede efdZ$dede efdZ%dede efdZ&dede efdZ'edZ(edZ)d ee)d!ee)ge(fde*e(e e)ffd"Z+d#ede efd$Z,d#ede efd%Z-d&ede efd'Z.d&ede efd(Z/defd)Z0dedefd*Z1d&ede efd+Z2d&edefd,Z3d-edefd.Z4d&edefd/Z5d0ede efd1Z6d2eedefd3Z7de efd4Z8d5ede efd6Z9de efd7Z:y)8N) defaultdict)IterableIterator)contextmanager)asdict dataclass)Enum) getLogger)CallableOptionalTypeVar)signpost_event) AffinityMode6maybe_temporarily_apply_numa_binding_to_current_thread NumaOptionsc eZdZdZdZdZdZdZy)rzW See behavior description for each affinity mode in torch.distributed.run. nodesocket exclusivez core-complexN)__name__ __module__ __qualname____doc__NODESOCKET EXCLUSIVE CORE_COMPLEXX/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/torch/numa/binding.pyrrs D FI!LrrT)frozenc(eZdZUeed< dZeed<y)r affinity_modeF!should_fall_back_if_binding_failsN)rrrr__annotations__r$boolrrr rr$s/4%t3rr gpu_index numa_optionsreturnc#hK|dyt}t||dt|yw)z 1. Applies NUMA binding to the current thread, suitable for the thread which will be interacting with GPU gpu_index. 2. Resets to the original CPU affinity before exiting the context manager. Nr'r(logical_cpu_indices)+_get_allowed_cpu_indices_for_current_thread%_apply_numa_binding_to_current_thread$_bind_current_thread_to_logical_cpus)r'r(original_logical_cpu_indicess r rr2s: #N#P ), (8s02c |t|d}tjd| t||}tjdt |t |tjdt |t |tjdt |tddi|d t |i y#t$rttdd i|d tji tjd ||jr*tjdtjYywxYw)Nr+z0Attempting to apply NUMA binding, given input %rz0Computed logical_cpu_indices=%s for NUMA bindingr,z1Validated logical_cpu_indices=%s for NUMA bindingz=Successfully bound to logical_cpu_indices=%s for NUMA binding numa_binding apply_successr-)categoryname parametersapply_exception tracebackz)Failed to apply NUMA binding for input=%rzHContinuing executing without applying NUMA binding, despite exception %s)rloggerinfo_get_logical_cpus_to_bind_to_get_ranges_str_from_ints%_raise_if_logical_cpu_indices_invalidr0r Exceptionr9 format_exc exceptionr$warning)r'r(kwargsr-s r r/r/IsH|,F KKBFK-:l   > %&9 : .BUV ? %&9 : -ATU K %&9 : # %'@AT'U  #"Y113  DfM  9 9 NNZ$$&  !sBCA9EEr-c|s tdy)Nz+Must bind to a non-empty set of CPU indices) RuntimeErrorr,s r r>r>s HII rc0tjd|yNr)ossched_setaffinityr,s r r0r0s/0rc|jtjk(rt|}|S|jtjk(rt |}|S|jtj k(rt|}|S|jtjk(rt|}|Std|jd)z Args: gpu_index: The index of the GPU that will be used by the subprocess. Example: 0 numa_options: See NumaOptions for details. Returns: Set of logical CPU indices to bind to. r'zAffinity mode z not supported.) r#rr!_node_get_logical_cpus_to_bind_tor#_socket_get_logical_cpus_to_bind_tor&_exclusive_get_logical_cpus_to_bind_tor)_core_complex_get_logical_cpus_to_bind_to ValueError)r'r( logical_cpuss r r<r<s!!\%6%6689M    # #|':': ::YO    # #|'='= == R   # #|'@'@ @@9U  >,*D*D)E_UVVrc2t|}t|S)z- Core logic of 'node' numa strategy. rKnuma_node_index)"_get_numa_node_index_for_gpu_index._get_allowed_logical_cpu_indices_for_numa_node)r'rTs r rLrLs99MO 9' rct|}t|}t|}t}|D]}|j t ||S)z/ Core logic of 'socket' numa strategy. rKrS) socket_index)rU_get_socket_index_for_numa_node'_get_numa_node_indices_for_socket_indexsetupdaterV)r'numa_node_index_of_gpurXnuma_node_indicesrQrTs r rMrMsg@)T2.L@!5L,  : /    rcRt|}t|}t|}|j|}t |}t |d}t t|j}t|t|z}t|t|z}|dkr+tdt|d|ddt|d z||zt||z}||z||krdnd z} t|j|| D chc] } | D]} |  } } } | Scc} } w) z2 Core logic of 'exclusive' numa strategy. rKrSc,tt|SNlogical_cpu_index)min6_get_logical_cpu_indices_sharing_same_physical_core_asrbs r z8_exclusive_get_logical_cpus_to_bind_to..s# B"3 # rzThere are only z# physical cores on numa_node_index=,z but there are z% GPUs associated with this NUMA node.r) rU_get_gpu_indices_for_numa_nodesortedindexrV _group_bydictitemslenrErdlistvalues) r'rT gpu_indicesoriginal_gpu_relative_indexallowed_logical_cpu_indices,physical_core_to_allowed_logical_cpu_indicesnum_physical_cores_per_gpu(num_gpus_to_give_one_extra_physical_corestartendr-rc$logical_cpu_indices_for_original_gpus r rNrNs99MO0QK%K"-"3"3I">"P'# 4=# 4048;AACD40"%4" [ " 0340 K0,"A%c"NOPPtdscuuv wK 011VW X  (*D Ds#%MH E  $ %+-UU   $( 8 ? ? A$ $, "5 ,  ,,(, 0/,sD#c>t|}t|}t|}|j|}t |}t |d}t t|jd}|t|z}t|j|}|S)z Core logic of 'core-complex' numa strategy. Each GPU is assigned a full core complex (group of cores sharing L3 cache) within its affined NUMA node. rKrSc,tt|Sra)rd1_get_logical_cpus_sharing_same_max_level_cache_asrbs r rfz;_core_complex_get_logical_cpus_to_bind_to..%s# ="3 # rc*t|d |dfS)Nrgr)ro)items r rfz;_core_complex_get_logical_cpus_to_bind_to..1ss47|mT!W5r)key) rUrirjrkrVrlrmrnrorprq)r'rTrrrsrt.max_level_cache_to_allowed_logical_cpu_indicescache_index_for_original_gpurzs r rOrOs99MO0QK%K"-"3"3I">"P'# 6?# 626: : @ @ B6  62$?6B$ ,06==?,",$( 0/rKVrqget_keycjtt}|D]}||}||j| |S)z2 Groups elements with same key into sets. )rr[add)rqr key_to_valuesvaluers r rlrlCsA-8,c8d|d}d}t}tj|D]I}|jdr|ddj s)tj j ||}tj j |d}t|5}|jjdvr ddd dddtj j |d}t|5} t| j} ddd |kr| }tj j |d } t| 5} t| j}dddL|S#1swYxYw#1swYoxYw#1swYqxYw) Nrz/cacherktype>DataUnifiedlevelshared_cpu_list) r[rHlistdir startswith isdecimalpathjoinrrstripintr) rccpu_cache_dir_absolute_path max_level$logical_cpus_sharing_max_level_cacheentrycache_index_absolute_pathtype_absolute_path type_filelevel_absolute_path level_filershared_cpu_list_absolute_pathshare_cpu_list_files r r}r}Xs &&7%8? I+.5(78(ab 0C0C0E $&GGLL1Le$T! WW\\*CVL $ % ~~%%'/BB  B !ggll+DgN % & +* )*E + I   (* %'8) %/ 0 4G3R#((*4 0  +4 0/'    + +  s$"E7;FF7F F F rTc8t|}t}||zSNrS)0_get_cpu_indices_for_numa_node_MAYBE_NOT_ALLOWEDr.)rTall_cpu_indicesallowed_cpu_indicess r rVrV~s'F'OFG 0 00rcd|d} t|5}|j}dddt S#1swYxYw#t$r}td|d|d}~wwxYw)z Returns: Indices of all CPUs associated with numa_node_index. However, the list is not filtered based on whether the thread is allowed to use them. z/sys/devices/system/node/nodez/cpulistNz:Could not determine CPUs corresponding to numa_node_index=.)rrFileNotFoundErrorrEr)rTcpulist_absolute_pathr cpu_range_stres r rrs}tjjS)N)torchcuda device_countrrr _get_gpu_countrs :: " " $$rcTtjj|}|j}|j}|j }|dd|dd|dd}d|d}t |5}tt|jjdcdddS#1swYyxYw)N04x:02xz.0z/sys/bus/pci/devices/z /numa_noder) rrget_device_properties pci_domain_id pci_bus_id pci_device_idrmaxrrr)r'device_propertiesdomainbusdevicepci_addrpci_numa_node_absolute_pathrs r rUrUs 88C  , ,F  & &C  , ,FQs3iq B7H$9(:"N ) *-a3qvvx~~'(!, ---s #1BB'clttDchc]}t||k(r|c}Scc}w)NrK)rangerrU)rTr's r riris9~/0   - Bo U   s1c2t|}t|SNrS) cpu_index)._get_arbitrary_allowed_cpu_index_for_numa_node_get_socket_index_for_cpu)rTarbitrary_cpu_indexs r rYrYsH' %/B CCrrcd|d} t|5}t|jjcdddS#1swYyxYw#t$r}t d||d}~wwxYw)Nrz/topology/physical_package_idz)Could not determine socket for cpu_index=)rrrrrrE)rpackage_id_absolute_pathrrs r rrsv %i[0MNR * + )qqvvx~~'( ) ) ) RGYLIJPQQRs3 A'A AA  A A A-A((A-c,tt|Sr)rdrVrSs r rrs 6W r ranges_strc:t}|jdD]|}|j}|sd|vrI|jd\}}t|t|}}|j t ||dzc|j t|~|S)z Util for parsing a string of int ranges, as in a sysfs file. Args: ranges_str: E.g., "0-2,4,6-7" Returns: E.g., {0, 1, 2, 4, 6, 7} rh-rg)r[splitrrr\rr)rints range_str start_strend_strrxrys r rrsUD%%c* % OO%   ) !*!5 IwYW3E KKeS1W- . HHS^ $ % Krrc<|syt|}g}|dx}}|ddD]?}||dzk(r|}||k(r|j|n|j|d||x}}A||k(r|j|n|j|d|dj|S)z Convert a set of integers to a compact string with ranges. Args: ints: E.g., {0, 1, 2, 4, 6, 7} Returns: E.g., "0-2,4,6-7" rrgNrrh)rjappendr)r sorted_intsrangesrxprevnums r r=r=s ,K Fq>!ED12 $(?D} ) q/0 ED } ! q'( 88F rctd5}|j}dddt|S#1swYtSxYw)Nz!/sys/devices/system/node/possible)rrr)rpossible_nodes_strs r !_get_systemwide_numa_node_indicesr sE 1 2&aVVX& ++= >>& ++= >>s 0ArXct}t}|D]/}t|}|t|k(s|j |1|Sr)rr[rrr)rXsystemwide_numa_node_indicesmatching_numa_node_indicesrTrs r rZrZsW#D#F !$7<L+  4?RS S & * *? ; < &%rc,tjdSrG)rHsched_getaffinityrrr r.r.!s   ""r);rHr9 collectionsrcollections.abcrr contextlibr dataclassesrrenumr loggingr typingr r r rtorch._utils_internalr__all__rr:strrrrrr/r[r>r0r<rLrMrNrOrrrmrlrer}rVrrrUrirYrrrr=rrZr.rrr rs #.%).. 0  8  "3 " $ 4 4 4%-k%: d^,66%06 6rJ#c(JtJ 1S1d1  X 8CCHcc#h.C0C0SC0L,0C,0CH,0^ CL CLhqkHaS!V,<aQi99X9#0#0X#0L1s1sSVx1::X:&%%-S-S-$ss3xDDDRCRCRss C0!HSM!c!H?3s8? &S &SX &#SX#r