L i^ dZddlZddlZddlZddlZddlZddlZddlZddlZddl Z ddl m Z ddl m Z mZddlmZmZmZddlZddlmcmZddlmZddlmZdZedZed Zed Z e!Z"e!Z#e!Z$e!Z%ejLe'Z(ejRd Z*Gd d ejVZ,e GddZ-Gdde.Z/Gdde/Z0Gdde.Z1e GddZ2GddZ3GddZ4GddZ5de6eefde6ee fd e e7eee ffd!Z8d"ejrd#e7ed$fd%e6e:efd e e7ejveffd&Z<Gd'd(Z=Gd)d*eZ>Gd+d,Z?d-Z@e?ZAy).a This module introduces CUDA Sanitizer, a tool for detecting synchronization errors between kernels ran on different streams. It stores information on accesses to tensors to determine if they are synchronized or not. When enabled in a python program and a possible data race is detected, a detailed warning will be printed and the program will exit. It can be enabled either by importing this module and calling :func:`enable_cuda_sanitizer()` or by exporting the ``TORCH_CUDA_SANITIZER`` environment variable. N)Iterator) dataclassfield)AnyOptionalTypeVar)_pytree)TorchDispatchModeTKTVaTVbz(new_.*|.*_like)cVeZdZejZejZdZy) AccessTypec.|tjurdSdS)Nz reading fromz writing to)rREADselfs [/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/torch/cuda/_sanitizer.py__str__zAccessType.__str__7s!%!8~JlJN)__name__ __module__ __qualname__enumautorWRITErrrrr3s" 499;D DIIKEKrrcreZdZUdZeed<eed<eed<eed<e eed<e ed<e jed<y ) AccessaWStores information about a single access to a tensor by a kernel. Args: type: either AccessType.READ or AccessType.Write. seq_num: the sequential number of the kernel performing the access. stream: the stream id of the stream executing the kernel. operator: the schema of the launched kernel, which lists the arguments and return type. aliases: the arguments in the schema this access corresponds to. is_output: Whether the tensor was an output of the kernel. stack_trace: the stack summary object captured during access. typeseq_numstreamoperatoraliases is_output stack_traceN) rrr__doc__r__annotations__SeqNumStreamIdstrlistbool traceback StackSummaryrrrrr;s:   O M #YO'''rrceZdZdZy)SynchronizationErrorz1Base class for errors detected by CUDA Sanitizer.N)rrrr'rrrr1r1Ss;rr1cHeZdZdZdedeejdedefdZ dZ y) UnsynchronizedAccessErrorzIStores information about two unsynchronized accesses to one data pointer.data_ptrallocation_stack_tracecurrent_accessprevious_accessc<||_||_||_||_yN)r4r5r6r7)rr4r5r6r7s r__init__z"UnsynchronizedAccessError.__init__Zs#! &<#,.rc ndtffd }tj5jt j d|j d|jjd||jjd|jjd||j|jr<jdd j|jjnjd jcdddS#1swYyxYw) Naccesscj|jd|j|jrJjddj |jz|j rjd|j rjdjddj |j jdy)N z argument(s) z, z, and toz the outputz With stack trace: )writer#r r$joinr%r&format)r<messages r format_accessz8UnsynchronizedAccessError.__str__..format_accessgs MMV__-R }= >~~ o &..0IIJ##MM*- m, MM'0B0B0I0I0K(L'MRP rz ============================ CSAN detected a possible data race on tensor with data pointer z& Access by stream z$ during kernel: zPrevious access by stream z during kernel: z'Tensor was allocated with stack trace: r?z&Trace for tensor allocation not found.)rioStringIOr@textwrapdedentr4r6r"r7r5rArBgetvalue)rrDrCs @rrz!UnsynchronizedAccessError.__str__fs & [[] &g MMTTXTaTaSbc&&*&9&9&@&@%AB  $-- . MM,T-A-A-H-H,IIZ[  $.. /** >wwt::AACDEG  FG##%1 & & &s C?D++D4N) rrrr'DataPtrrr.r/rr:rrrrr3r3Ws@S / /!))?)? @ / /  /%&rr3c(eZdZdZdeefdZdZy)CUDASanitizerErrorsz4Wrapper class for errors reported by CUDA Sanitizer.errorsc||_yr9)rM)rrMs rr:zCUDASanitizerErrors.__init__s  rc4dt|jdS)Nz detected z errors)lenrMrs rrzCUDASanitizerErrors.__str__s3t{{+,G44rN)rrrr'r,r1r:rrrrrLrLs>t$895rrLcleZdZUdZeej ed<ee Z e e ed<dZ ee ed<y) TensorInfoaStores information about a single tensor and recent accesses to it. Args: allocation_stack_trace: the stack summary object captured during tensor allocation. Can be ``None`` if the allocation wasn't caught by CSAN. reads: list of read accesses to the tensor that were performed since the last write. write: the last write access to the tensor. r5)default_factoryreadsNr@) rrrr'rr.r/r(rr,rTrr@rrrrRrRs<%Y%;%;<<5E4<5"E8F "rrRceZdZddZdeddfdZdeddfdZdedeejddfdZ deddfd Z dede fd Z dedeejfd Zdedeefd Zdedeefd ZdededdfdZdededdfdZy)_TensorsAccessedreturnNci|_yr9accessesrs rr:z_TensorsAccessed.__init__s 35 rr4cr||jvr)tjd||j|dyy)NzFound tensor with pointer: %s, but no matching tensor allocation in the trace. Backfilling the trace now. Perhaps the sanitizer was enabled after some torch operations?)rZloggerinfo create_tensorrr4s rensure_tensor_existsz%_TensorsAccessed.ensure_tensor_existss< 4== ( KKQ     x . )rcp||jvr(tjd||j|yy)NzFound duplicate tensor allocation in the trace for tensor with pointer: %s. Assuming the trace for tensor deallocation wasn't caught and backfilling it now. Perhaps the sanitizer was enabled after some torch operations?)rZr\r] delete_tensorr_s rensure_tensor_does_not_existz-_TensorsAccessed.ensure_tensor_does_not_exists: t}} $ KKQ     x ( %rr&c4t||j|<yr9)rRrZrr4r&s rr^z_TensorsAccessed.create_tensors#-["9 hrc|j|=yr9rYr_s rrbz_TensorsAccessed.delete_tensors MM( #rc<|j|jrdSdS)NTFrZrTr_s r!were_there_reads_since_last_writez2_TensorsAccessed.were_there_reads_since_last_writes}}X.44t?%?rc4|j|jSr9)rZr5r_s rget_allocation_stack_tracez+_TensorsAccessed.get_allocation_stack_traces}}X&===rc4|j|jSr9)rZr@r_s r get_writez_TensorsAccessed.get_write}}X&,,,rc4|j|jSr9rhr_s r get_readsz_TensorsAccessed.get_readsrnrr<cT|j|jj|yr9)rZrTappendrr4r<s radd_readz_TensorsAccessed.add_reads h%%,,V4rcT||j|_g|j|_yr9)rZr@rTrss r set_writez_TensorsAccessed.set_writes$(. h%(* h%rrWN)rrrr:rJr`rcrr.r/r^rbr-rirkrrmr,rprtrvrrrrVrVs6/W// )W ) )::.6y7M7M.N: : $g$$$@'@d@>> )(( )> -'-hv.>--'-d6l-55&5T5+'+6+d+rrVceZdZddZdeddfdZdeddfdZdeddfdZdeddfd Z deddfd Z deddfd Z ded e ddfd Z dededdfdZdeee fdeee fddfdZdededdfdZdeddfdZdeddfdZddZded e dedefdZy)StreamSynchronizationsrWNcXi|_i|_i|_|jtyr9)current_sync_statesrecorded_sync_stateshost_sync_state create_streamDEFAULT_STREAM_IDrs rr:zStreamSynchronizations.__init__s)KM KM!79 ,-rr"cp||jvr(tjd||j|yy)NzFound Stream with id: %s, but no matching stream creation in the trace. Backfilling the trace now. Perhaps the sanitizer was enabled after some torch operations?)r{r\r]r~rr"s r_ensure_stream_existsz,StreamSynchronizations._ensure_stream_existss< 11 1 KKQ     v & 2reventcp||jvr(tjd||j|yy)NzFound Event with id: %s, but no matching event creation in the trace. Backfilling the trace now. Perhaps the sanitizer was enabled after some torch operations?)r|r\r] create_eventrrs r_ensure_event_existsz+StreamSynchronizations._ensure_event_existss< 11 1 KKQ     e $ 2rcp||jvr(tjd||j|yy)NzFound duplicate event creation in the trace for event with id: %s. Assuming the trace for event deletion wasn't caught and backfilling it now. Perhaps the sanitizer was enabled after some torch operations?)r|r\r] delete_eventrs r_ensure_event_does_not_existz3StreamSynchronizations._ensure_event_does_not_exists< D-- - KKQ     e $ .rc||jvrtjd|yd|j|<|jj |j|<y)NzFound duplicate Stream creation in the trace for Stream with id: %s. PyTorch Streams are only created once, so this trace entry is ignored.r)r{r\r]r}copyrs rr~z$StreamSynchronizations.create_streamsW T-- - KK*  ,-D  (/3/C/C/H/H/JD $ $V ,rcD|j|i|j|<yr9)rr|rs rrz#StreamSynchronizations.create_events  ))%0+-!!%(rc@|j||j|=yr9)rr|rs rrz#StreamSynchronizations.delete_events !!%(  % %e ,rr!cJ|j|||j||<yr9)rr{)rr"r!s rupdate_seq_numz%StreamSynchronizations.update_seq_nums% ""6*3:  (0rc|j||j||j|j|j|<yr9)rrr{rr|rrr"s r record_statez#StreamSynchronizations.record_statesB !!%( ""6*+/+C+CF+K+P+P+R!!%(rstateothercr|jD]$\}}t|j|d|||<&yN)itemsmaxget)rrrr"r!s r_state_wait_for_otherz,StreamSynchronizations._state_wait_for_other#s: %{{} @OFG &" 5w?E&M @rc|j||j||j|j||j|yr9)rrrr{r|)rr"rs rstream_wait_for_eventz,StreamSynchronizations.stream_wait_for_event)sG ""6* !!%( ""  $ $V ,d.G.G.N rc|j||jjD]}|j|||j |j |j |yr9)rr{keysrrr}r|rs rall_streams_wait_for_eventz1StreamSynchronizations.all_streams_wait_for_event0sd !!%(..335 6F  & &vu 5 6 ""  $";";E"B rc|j||jjD]!}|j||j|#|j|j|j|yr9)rr{valuesrr}rr"rs rall_streams_wait_for_streamz2StreamSynchronizations.all_streams_wait_for_stream9sq ""6*--446 PE  & &ud.F.Fv.N O P ""  $":":6"B rc|jjD]\}}|||j|<|jjD]}|j ||j yr9)r{rr}rrrs rsync_all_streamsz'StreamSynchronizations.sync_all_streamsBsp!55;;= 9MFE+0=D  ( 9--446 DE  & &ud.B.B C Drcurrent_stream other_streamc|j||j|||j|j|dkSr)rr{r)rrr!rs ris_ordered_afterz'StreamSynchronizations.is_ordered_afterIsD "">2 ""<0$22>BFF|UWXXXrrw)rrrr:r*rEventIdrrr~rrr)rrdictrrrrrr-rrrrryrysY. 'H''%'%d% %' %d % KH K K.'.d.-'-d-;X;;4;S'S8SS @(F*+@4869I4J@ @  H W    D  ( t DY&Y17YGOY YrryceZdZdZddZdedeedeedeed ed e e e efde e fd Z d eddfd Zd eddfdZd ededdfdZd ededdfdZdeddfdZdeddfdZdeddfdZddZdeddfdZd eddfdZy) EventHandlerzAnalyzes CSAN trace for synchronization errors. Stores information on each stream's synchronizations with other streams as well as tensor accesses to determine whether a given kernel launch might cause a data race. rWNcNt|_t|_d|_y)Nr)rVtensors_accessedrysyncsr!rs rr:zEventHandler.__init__Ys 0 2+-   rr" read_only read_writeoutputsr#tensor_aliasesc < dtdtdttddf fd }g xjdz c_jj |jt jjt jtjd}|j|D]} jj| ttj j|||| | |v|} || | jj#| jj%| | |D]} jj| ttj&j|||| | |v|} jj)| r+jj+| D] } || | | n#|| | jj#| jj-| |  S) Nr4r6r7rWc|yjj|j|j|js7j t |j j|||yyr9)rrr"r!rrr3rrk)r4r6r7 error_listrs rcheck_conflictz:EventHandler._handle_kernel_launch..check_conflictgss&::..%%'>'>@V@V!!- --HHR&' rF lookup_lines)rJrrr!rrr.r/extract walk_stackinspect currentframereverserr`rrrmrtrrirprv) rr"rrrr#rrr&r4r6r7rs ` @r_handle_kernel_launchz"EventHandler._handle_kernel_launch^s   /5 HPQWHX   "24    !!&$,,7,,44  !5!5!7 8u5 ! EH  ! ! 6 6x @# x(G#N .$*?*?*I*I(*S   ! ! * *8^ D E # FH  ! ! 6 6x @#   x(G#N$$FFxP'+'<'<'F'Fx'PNO"8^_MNnd.C.C.M.Mh.W  ! ! + +Hn E% F(rrc:|jj|yr9)rrrs r_handle_event_creationz#EventHandler._handle_event_creation &rc:|jj|yr9)rrrs r_handle_event_deletionz#EventHandler._handle_event_deletionrrc<|jj||yr9)rrrs r_handle_event_recordz!EventHandler._handle_event_records v.rc<|jj||yr9)rrrs r_handle_event_waitzEventHandler._handle_event_waits ((7rr4c|jj|tjj tj t jd}|j|jj||y)NFr) rrcr.r/rrrrrr^res r_handle_memory_allocationz&EventHandler._handle_memory_allocationsr ::8D,,44  !5!5!7 8u5  ++   rcp|jj||jj|yr9)rr`rbr_s r_handle_memory_deallocationz(EventHandler._handle_memory_deallocations* 228< ++H5rc:|jj|yr9)rr~rs r_handle_stream_creationz$EventHandler._handle_stream_creations   (rc8|jjyr9)rrrs r_handle_device_synchronizationz+EventHandler._handle_device_synchronizations ##%rc:|jj|yr9)rrrs r_handle_stream_synchronizationz+EventHandler._handle_stream_synchronizations ..v6rc:|jj|yr9)rrrs r_handle_event_synchronizationz*EventHandler._handle_event_synchronizations --e4rrw)rrrr'r:r*setrJr+rintr,r1rrrrrrrrrrrrrrrrrQs;! HHw<HL H W H  HS$s)^,H " #HT'G'''G''/'/8//888d8  '  d  6G66)h)4)&7X7$7575t5rrabrWc#ZK|jD]\}}||vs ||||fywr9)r)rrargvalues r zip_by_keyrs7ggi% U !8uaf$ $%s+ +schemaargs.kwargsc#K|jdt|}|jt|dDcic]}|j|}}t||Ed{t ||D] \}}}||fycc}w7%wr9) argumentsrPnamezipr) rrr schema_argsr schema_kwargs_argumentrs r zip_argumentsrs"";SY/K.4.>.>s4y{.KLsSXXs]LML;%%%(? 8U M%s3BA; BB&Bc eZdZddZ ddedededeededdf d Zd e jd e ed fd e eefdeddf dZ d e jdededdfdZy)ArgumentHandlerrWNclt|_t|_i|_t|_yr9)r dataptrs_readdataptrs_writtenrrrs rr:zArgumentHandler.__init__s'+.5.1e8:%(U rris_write metadata_onlyrr%ct|tjr|jr|j }|r|j j |n|s|jj ||jj|g||j|j||r|jj |yyyyr9) isinstancetorchTensoris_cudar4raddrr setdefaultrrr)rrrrrr%r4s r_handle_argumentz ArgumentHandler._handle_arguments eU\\ *u}}~~'H%%))(3"""&&x0    * *8R 8##H-44T:   *0= *rrr.r is_factoryc Rt|||D]\}}|jduxr|jj}|xs'|jduxr|jj }tjt j |j||j||y)N)rrr) r alias_inforpytree tree_map_ functoolspartialrr) rrrrrrrrrs r parse_inputszArgumentHandler.parse_inputss -VT6B OHe**$6W8;N;N;W;WH'##4/T8K8K8T8T4T    !!))%!"/    rrc t|j|fD]h\}}|xs'|jduxr|jj }t j t j|j| d||jy)NT)rr%r) rreturnsrrrrrrr)rrrrresrrs r parse_outputszArgumentHandler.parse_outputssfnnwj9 JC&d*J3>>3J3J/J    !!))!..""/    rrwNF)rrrr:rr-rr+rrFunctionSchematuplerrrrrrrrs+# +++ + sm +  + +*$$CHoS#X    2**58IM rrceZdZddZddZy)CUDASanitizerDispatchModeNct|_tjj t j |jjt j|jjt j|jjt j|jjt j|jjt j |jj"t j$|jj&t j(|jj*t j,|jj.t j0|jj2yr9)r event_handlerr_C_activate_gpu_trace gpu_trace$register_callback_for_event_creationr$register_callback_for_event_deletionr"register_callback_for_event_recordr register_callback_for_event_waitr'register_callback_for_memory_allocationr)register_callback_for_memory_deallocationr%register_callback_for_stream_creationr,register_callback_for_device_synchronizationr,register_callback_for_stream_synchronizationr+register_callback_for_event_synchronizationrrs rr:z"CUDASanitizerDispatchMode.__init__,sP)^ $$&66    5 5  66    5 5  44    3 3  22    1 1  99    8 8  ;;    : :  77    6 6  >>    = =  >>    = =  ==    < < rcx|i}ttj|jj}t }|j |j|||||i|}|j|j|||jjtjjj|j|jz |j|j |j|j"}|r-|D]} t%| t&j(t+||S)N)r)file)r-FACTORY_FUNCTION_REGEXmatch_schemarrrrrrrcudar cuda_streamrrrrprintsysstderrrL) rfunctypesrrrargument_handlerrrMerrors r__torch_dispatch__z,CUDASanitizerDispatchMode.__torch_dispatch__Ns  >F066t||7H7HIJ *,%%dllD&Z%X''&&t||W&T##99 JJ % % ' 3 3  * *-=-N-N N  - -  $ $ LL  + +    .e#**- .%f- -rrw)rN)rrrr:r*rrrr r +s   Drr c*eZdZdZddZdZdZdZy) CUDASanitizeraManages the lifetime of a CUDASanitizer dispatch mode object. The CUDASanitizer class wraps the entering/exiting functions of the dispatch mode context manager in the enable function/destructor, respectively. This is to explicitly set the lifetime of the dispatch mode object to that of the application. This approach was deemed more elegant than using the atexit module. Nc0t|_d|_yr)r dispatchenabledrs rr:zCUDASanitizer.__init__ss13  rcF|jjd|_y)NT)r. __enter__r/rs renablezCUDASanitizer.enablews ! rcL|jjdddd|_yr)r.__exit__r/rs rdisablezCUDASanitizer.disable{s tT40 rcvt3tjs|jr|jyyyyr9)r$ is_finalizingr/r5rs r__del__zCUDASanitizer.__del__s/ Oc&7&7&9t|| LLN@L&9Orrw)rrrr'r:r2r5r8rrrr,r,js rr,c,tjy)acEnable CUDA Sanitizer. The sanitizer will begin to analyze low-level CUDA calls invoked by torch functions for synchronization errors. All data races found will be printed to the standard error output along with stack traces of suspected causes. For best results, the sanitizer should be enabled at the very beginning of the program. N)cuda_sanitizerr2rrrenable_cuda_sanitizerr;sr)Br'rrrrEloggingrer$rGr.collections.abcr dataclassesrrtypingrrrrtorch.cuda._gpu_tracer! _gpu_tracer torch.utilsr rtorch.utils._python_dispatchr rr r r rrJr*rr) getLoggerrr\compilerEnumrr Exceptionr1r3rLrRrVryrrr rr r+Argumentrrr r,r;r:rrrrJs    $()) ))): T] en en       8 $$$67KK (( (.<9<4& 44&n5)5  # #  # 4+4+nnYnYb~5~5B%$r3w-%DSM%huRc\?R6S%     (-c3h  AEc3h   eENNC'()  DDN< 1<~Br