~L i9UdZddlZddlZddlZddlZddlZddlZddlZddlZddl Z ddl Z ddl Z ddl Z ddl mZmZmZmZmZmZmZmZddlmZm Z!dejDfdZ#ddl$m%Z&m'Z'm(Z(ddl)m*Z*m+Z+m,Z,m-Z-m.Z.dd l/m0Z0erdd l1m2Z2m3Z3gd Z4e4e5e4k(sJ d d l6m7Z7e7jp[7e jdk(r ddZ:e:[:de;de;de;deddZ?e.sejdrge jdk7rSe jZCe jejejzddlGe jeC[Cne-re?ddlGGddZHGddZIGddZJd ZKd!ZLd"ZMd#ZNdeOeOePd$feOePd$fffd%ZQd&ZRd'ZSd(ZTd)\ZUZVZWd*D]'ZVd+eVZWeTeVZUeWxeU_XeU_YeUeZeW<)[U[V[W[TeZd,Z[e4jd-d.Z]d/Z^ dd0lGm_Z_dd2lem`Z`d3\ZVZfege`D]ZVeVdd4k7rceVjd5sRe4jeVeie`eVZfejefsejefsQefjeYk7saeVd6vsfeYef_lneVd7k(steme jeYeV[V[fes dd8Zoeoe`[od9ede;fd:Zpd9ede!d;fd<Zqd9ede!ed=fd>Zre jatdd?ZuddAZvdBeePd;e;fddfdCZwddDZxdEdFdGejDdHejDddfdIZydejDfdJZzdejDfdKZ{dLeeje;fddfdMZ}dejfdNZ~de;fdOZdPe;ddfdQZdRejDddfdSZdejDfdTZdUeejDeJfdVege;ffdWZddXZdddYdZZdd[Zdd\Zdd]Zdd^Zdd_Zdd`ZddalmZmZmZmZdZdedb<e4j%gdcdddlmZddelemZddflmZmZmZmZmZGdgdheZGdidjeZGdkdleZGdmdneZGdodpeZGdqdreZGdsdteZGdudveZGdwdxeZGdydzeZGd{d|eZGd}d~eZGddeZGddeZGddeZGddeZGddeZeeeeeeeeeeeeeeeeeeehZeePeeefed<eZeePd;ed<ddlemZmZmZddlmZddlmZmZddlmZmZmZmZmZddlmZmZdZe`je[erddl­eZ[dZd3\ZVZfege`jD]vZVeVjdseVevreie`jeVZfeYef_leVdk(refeZeV<d4eVzZVefeZeV<eVjd4rfe4jeVx[V[fddleZee4j%degeeDddlmZddlemZmZddḽ[[dZddlmZmZmZmZddlemZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZddlmZddlemZddlZeddlZeddlZeddlZee`jeZ>m?Z?n hdǣZ@dȄZAejdd@eeeeje;ffdɄZD ddeejdeejfd̄ZEddlemFZFeFjd΄ZHdejDfdτZIdЄZJeIr eHyy#e9$rY wxYw#e9$r;ddlGm`Zaeaj'e9e jd1jdwxYw)a The torch package contains data structures for multi-dimensional tensors and defines mathematical operations over these tensors. Additionally, it provides many utilities for efficient serialization of Tensors and arbitrary types, and other useful utilities. It has a CUDA counterpart, that enables you to run your tensor computations on an NVIDIA GPU with compute capability >= 3.0. N)AnyCallable get_originOptionaloverload TYPE_CHECKINGTypeVarUnion) ParamSpecTypeIsreturncy)NFrT/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/torch/__init__.py_running_with_deployr)s r)_functionalize_sync_import_dotted_name classproperty) get_file_path#prepare_multiprocessing_environment1profiler_allow_cudagraph_cupti_lazy_reinit_cuda12USE_GLOBAL_DEPSUSE_RTLD_GLOBAL_WITH_LIBTORCH) __version__)Device IntLikeType)G BoolStorage BoolTensor ByteStorage ByteTensor CharStorage CharTensor DoubleStorage DoubleTensor FloatStorage FloatTensor GradScaler IntStorage IntTensor LongStorage LongTensor ShortStorage ShortTensorSymBoolSymFloatSymIntTensor TypedStorageUntypedStorage$are_deterministic_algorithms_enabledautocastchunkcompilecond enable_gradexportget_default_deviceget_deterministic_debug_modeget_device_moduleget_float32_matmul_precision get_rng_stateinference_mode initial_seed-is_deterministic_algorithms_warn_only_enabled is_storage is_tensoris_warn_always_enabledloadlobpcg manual_seedmatmulno_gradrandrandnsaveseedset_default_deviceset_default_tensor_typeset_deterministic_debug_modeset_float32_matmul_precisionset_printoptions set_rng_stateset_warn_alwayssplitstack sym_floatsym_fresh_sizesym_intsym_itesym_maxsym_minsym_notsym_sumtypename unravel_indexuse_deterministic_algorithmsvmap) _rocm_initwin32c  ddl}ddlm}tjdd}tj j tjdd}tj j tj jtd}tj j |jddd}tj j tjd}tjtjk7r0tj j tjdd}nd }|||||fDcgc]#}tj j|r|%} }tjd | DsUtj j tjd tj j |d d dd} nd } |rtj d| Drq|j#dd} d| z} tj j |ddd|} tj j tj| | d}nd }| j%d| |fDt'j(dd}t+|d}|j-d}t&j.|j0_|rt&j.|j4_| D]}tj6| t'j8dt'j8dt;j<dk7rt'j8dtIjHtj j |d!}d"}|D]}d"}|rb|j5|dd#}t'jJ}|5|d$k7r0t'jL|}|xjNd%|d&z c_'||d}|rl|s9d'j | tjPd(gztjPd(<d}|j1|}|t'jLt'jJ}|xjNd%|d&z c_'||j-|ycc}w#t>$r0tAtCjDd jGYwxYw))Nrcuda ProgramFileszC:\Program FilesLibrarybinlibuserbasec3K|]A}tjjtjj|dCyw)znvToolsExt64_1.dllN)ospathexistsjoin.0ps r z&_load_dll_libraries..s2 FGBGGNN277<<+?@ A sAA NVTOOLSEXT_PATHzNVIDIA Corporation NvToolsExtx64c3K|]8}tjtjj|d :yw)z cudart64*.dllN)globrrrsrurvs rryz&_load_dll_libraries..s0) @A "'',,q/:; ;) s>A._ CUDA_PATH_VzNVIDIA GPU Computing ToolkitCUDAvc3`K|]&}tjj|s#|(ywN)rrrsrtrvs rryz&_load_dll_libraries..s$ 277>>!;LA s$..z kernel32.dllT)use_last_errorAddDllDirectoryrezvcruntime140.dllz msvcp140.dllARM64zvcruntime140_1.dllz Microsoft Visual C++ Redistributable is not installed, this may lead to the DLL load failure. It can be downloaded at https://aka.ms/vs/17/release/vc_redist.x64.exe z*.dllFi~z Error loading "z" or one of its dependencies.;PATH)) sysconfig torch.versionrjrrgetenvrsrusys exec_prefixdirname__file__get_config_varbase_exec_prefixrtbuiltinsanyallreplaceextendctypesWinDLLhasattr SetErrorModec_void_p LoadLibraryWrestypeLoadLibraryExWadd_dll_directoryCDLLplatformmachineOSErrorprinttextwrapdedentstripr~get_last_errorWinErrorstrerrorenviron)r cuda_version pfiles_path py_dll_path th_dll_path usebase_pathpy_root_bin_pathbase_py_dll_pathrx dll_pathsnvtoolsext_dll_pathcuda_version_1 cuda_path_var default_path cuda_pathkernel32with_load_library_flagsprev_error_modedll_pathdlls path_patcheddll is_loadedres last_errorerrs r_load_dll_librariesrs06ii0CD ggll3??IuE ggll277??8#R2R)SBJJv&#'L++C0; //&*?*?*ABCLL*3%/LMLI/ 2 o.  t   %'   s (S$AS5S?>S?rs lib_folderlib_namec ddlm}tjtjj |d|d|}|P|j dd}|tjtjj |dd|d|z }tjtjj ||d|}||zS)Nrrinvidiarnrcu)rrjr~rrrsrurW)rsrrrnvidia_lib_pathsmaj_cuda_version lib_pathss r_get_cuda_dep_pathsrs 3yy  T8ZA'--c215DII GGLLx2.>-?)@% R   "'',,tZIJI i ''rrequiredctjdk(sJdd}tjD]}t |||}|s|d}n|s |rt |dtj|rt j|yy)z8Preloads cuda deps if they could not be found otherwise.LinuxzShould only be called on LinuxNrz not found in the system path )rsystemrrsr ValueErrorrr)rrrlib_pathrscandidate_lib_pathss r_preload_cuda_depsr1s ??  'I)II 'H1$ HM *1-H   H:%CCHH:NOO Hrctjdk(rytjdk(rdnd}d|}tjj t }tjj tjj|d|} tj|tj td5}|j}dddd vrytd d td d td dy#1swY3xYw#t$rYywxYw#t$r}ddd dddddddddddd}|j!Dcgc](}|j#dd|j$dvs'|*ncc}w} }| s||j'D]\} }t| |tddd !tj|tjYd}~yd}~wwxYw)"NWindowsDarwinz.dylibz.solibtorch_global_depsrn)modez/proc/self/mapsz libcudart.so cuda_nvrtczlibnvrtc.so.*[0-9]zlibnvrtc-builtins.so.*[0-9] nvjitlinkzlibnvJitLink.so.*[0-9]zlibcublas.so.*[0-9]zlibcudnn.so.*[0-9]zlibcudart.so.*[0-9]zlibcupti.so.*[0-9]zlibcufft.so.*[0-9]zlibcurand.so.*[0-9]zlibcusparse.so.*[0-9]zlibcusparseLt.so.*[0-9]zlibcusolver.so.*[0-9]zlibnccl.so.*[0-9]zlibnvshmem_host.so.*[0-9]zlibcufile.so.*[0-9])cublascudnnr cuda_runtime cuda_cupticufftcurandrcusparse cusparseltcusolverncclnvshmemcufilerrnvtxzlibnvToolsExt.so.*[0-9]F)r)rrrrrsabspathrrurrr RTLD_GLOBALopenreadr ExceptionrvaluesrWargsitems) lib_extrhereglobal_deps_lib_pathf_mapsr cuda_libsrnis_cuda_lib_errrs r_load_global_depsrCsI%#//+x7hUG%gY/H 77??8 $D77<<(=uhO4C (v/A/AB '( !A !U* |-A B |-J K {,D E ! !    C,).1.)+1/3/'2+% $%++- 31Bchhqk1QC   I$-OO$5 5 J z8 4 5 6#4F4x}}44x}}44x}}44x}}44x}}44444444444444444444'B(,,B%(,,(>"?/HLL/rr1ceZdZdZdZdZdZdZdZdZ dZ d Z d Z d e d ejfd Zd ejfdZd ejfdZd ejfdZd ejfdZd"dZd"dZd"dZd"dZdZdZdZdZdZd eej>ej>ffdZ dZ!dZ"dZ#d"dZ$d e%fd Z&y!)#r0z Like a float (including magic methods), but redirects all operations on the wrapped node. This is used in particular to symbolically record operations in the symbolic shape workflow. c||_yrrrs rrzSymFloat.__init__zrrct|tjtjtt fst S|jt|Sr) rrrrr1r0rrrYrs rr zSymFloat.__truediv__s9%(,,!QR! !%%i&677rct|tjtjtt fst S|jt|Sr) rrrrr1r0rr"rYrs rr$zSymFloat.__rtruediv__s9%(,,!QR! !&&y'788rct|tjtjtt fst Sttj|t|z Sr rrrrr1r0rrYr&r'rs rr)zSymFloat.__floordiv__sA%(,,!QR! !D9U+;$;<==rct|tjtjtt fst Sttjt||z Srrrs rr,zSymFloat.__rfloordiv__sA%(,,!QR! !Ie$4t$;<==rc6|jjSrrbool_r s rr zSymFloat.__bool__yy  rc:|jjddS)Nrpr)r guard_floatr s r __float__zSymFloat.__float__syy$$R++rct|tjtjtt fst Stj|dk\|j|Sr) rrrrr1r0rtorch_check __float_pow__rs rr.zSymFloat.__pow__sD%(,,!QR! ! TQY!!%((rct|tjtjtt fst Stj|dk\|j|Sr) rrrrr1r0rrr__rfloat_pow__rs rr1zSymFloat.__rpow__sD%(,,!QR! ! UaZ ""5))rrr ctdr4r6rs rr8zSymFloat.__eq__r9rctdr4r6rs rr;zSymFloat.__lt__r9rctdr4r6rs rr=zSymFloat.__gt__r9rctdr4r6rs rr?zSymFloat.__le__r9rctdr4r6rs rrAzSymFloat.__ge__r9rctdr4r6rs rrzSymFloat.__float_pow__r9rctdr4r6rs rrzSymFloat.__rfloat_pow__r9rctdr4r6rs rrzSymFloat.__float_truediv__r9rctdr4r6rs rr"zSymFloat.__rfloat_truediv__r9rctdr4r6r s r __trunc__zSymFloat.__trunc__r9rctdr4r6rs rrSzSymFloat.__sym_max__r9rctdr4r6rs rrUzSymFloat.__sym_min__r9rctdr4r6r s r __sym_int__zSymFloat.__sym_int__r9rctd)z'Return True if the float is an integer.r5r6r s r is_integerzSymFloat.is_integers233rcHtj|jS)z.Represent this float as an exact integer ratio)rrrrr s rrrzSymFloat.as_integer_ratios~~d#4466rc6|jjSrrcr s rrezSymFloat.__repr__rfrc.|jjSrrhr s rrjzSymFloat._sympy_rkrc>ttj|Sr)rnrrr s rrpzSymFloat.__hash__sHNN4())rc|S)z+Returns the complex conjugate of the float.rr s rrvzSymFloat.conjugates rcV|jjddjS)z4Returns the hexadecimal representation of the float.rpr)rrhexr s rrz SymFloat.hexs"yy$$R+//11rNrw)'rxryrzr{rr r$r)r,r rr.r1r|rr r8r;r=r?rArrrr"rrSrUrrr}rrrrerjrprvstrrrrrr0r0ss  8 9 > > !, ) *4F4x}}44x}}44x}}44x}}44x}}44444444447% hll(B"C7'*2S2rr0creZdZdZdZdZdZddZddZddZ d Z de jfd Z d Zd Zd Zy)r/am Like a bool (including magic methods), but redirects all operations on the wrapped node. This is used in particular to symbolically record operations in the symbolic shape workflow. Unlike regular bools, regular boolean operators will force extra guards instead of symbolically evaluate. Use the bitwise operators instead to handle this. c||_yrrrs rrzSymBool.__init__rrc6|jjSrrr s rr zSymBool.__bool__rrc\tj|jjSr)rrrrr s rrzSymBool.__int__s||DIIOO-..rr ctdr4r6rs rr_zSymBool.__and__r9rctdr4r6rs rrazSymBool.__or__r9rctdr4r6r s r __sym_not__zSymBool.__sym_not__r9rctdr4r6)rthen_valelse_vals r __sym_ite__zSymBool.__sym_ite__r9rctdr4r6rs rr8zSymBool.__eq__r9rc6|jjSrrcr s rrezSymBool.__repr__ rfrc.|jjSrrhr s rrjzSymBool._sympy_#rkrc|jjr#t|jjStt j |Sr)r is_constantrnrrr r s rrpzSymBool.__hash__&s> 99 " )* * d+, ,rN)r r/)rxryrzr{rr rr_rarrrr r8rerjrprrrr/r/sI !/44(444x}}4'-rr/cddl}tj|rtjt|f|St |dr|j St||jr|S| S)zhSymInt-aware utility for logical negation. Args: a (SymBool or bool): Object to negate rNr) sympy overrideshas_torch_function_unaryhandle_torch_functionr_rrrBasic)ars rr_r_.s^ ))!,..wa@@q- }}!U[[!r 5Lrctj|rtjt|f|St |t r|St |dr|jStj|S)zoSymInt-aware utility for float casting. Args: a (SymInt, SymFloat, or object): Object to cast rW) rrrrYrr0rrWrrrs rrYrY?s^ ))!,..y1$BB!X O $  >>! rctj|rtjt|f|St |t r|St |t rtj|Stj|S)zmSymInt-aware utility for int casting. Args: a (SymInt, SymFloat, or object): Object to cast ) rrrr[rr1r0r&truncrrrs rr[r[Ns^ ))!,..wa@@!V Ax zz!} <<?rc@tj||frtjt||f||St |t t fr|j|St |t t fr|j|St\}}t ||sJt|t ||sJt|t ||s t ||r)tjtj||Stj||S)a SymInt-aware utility for max which avoids branching on a < b. Unlike builtins.max(), this only works for int/float, and it always promotes to float if any argument is float (unlike builtins.max, which will faithfully preserve the type of the input argument). ) rhas_torch_functionrr]rr1r0rS__all_and_float_typestyperrmaxrb all_types float_typess rr]r]]s##QF+..wA1EE!fh'(}}Q A) *}}Q34I{ a #,T!W, # a #,T!W, #![!Z;%?~~hll1a011||Aq!!r.c> ddl}|j|jtjtj f}|jtj f}||fS#t $r7tjtj f}tj f}Y||fSwxYwr)numpyintegerfloatingrrrModuleNotFoundError)nprrs rrrxs ( JJ KK LL NN ' *,hnn(E k !! (\\8>>2 ~~' k !! (sAA:BBc@tj||frtjt||f||St |t t fr|j|St |t t fr|j|St\}}t ||sJt|t ||sJt|t ||s t ||r)tjtj||Stj||S)zSymInt-aware utility for min().) rrrr^rr1r0rUrrrrminrs rr^r^s##QF+..wA1EE!fh'(}}Q A) *}}Q24I{ a #,T!W, # a #,T!W, #![!Z;%?~~hll1a011||Aq!!rctj|rtjt||Sd|D]V}t |t t jfst j|cSt |t sK|jXt j|Sddl m m }|jtfd|DS)z N-ary add which is faster to compute for long lists than iterated binary addition. Only does something special for integers. Nr)to_node wrap_nodec30K|] }|ywrr)rwrfoundrs rryzsym_sum..s(Iq):(Is)rrrr`rr1rrsumrtorch.fx.experimental.sym_noderrr})rrrrrs @@rr`r`s ##D)..wdCC E !fhll34<<% % a FFE   }||D!!A U]]5(ID(I#IJ KKrcfdS)Nctj|rtj|f|St|trt j |}t|ddrt|ddStt|S)N__sym___) rrrrr1rrYrgetattrr&)rfnnames rrz_get_sym_math_fn..fns  - -a 0222tQ? ? a "A 1tfB' (071tfB/02 2"wtT"1%%rr)rrs`@r_get_sym_math_fnrs& Ir)Nrprp) sqrtcoscoshsinsinhtantanhasinacosatanlog2_sym_ _sym_sqrtsym_sqrtc4tj|||fr tjt|||f|||St |t t jfrt|t|k(sJt |t r|j||S|r|S|S)z>SymInt-aware utility for ternary operator (``t if b else f``.)) rrrr\rr/rr rr)rtrs rr\r\s##Q1I...wAq 1aKK a'8==1 2tAw$q'7II I!W}}Q""1qrcHtj|jSr)rtensoritem)ris rrZrZs <<  " " $$r)_initExtensiona Failed to load PyTorch C extensions: It appears that PyTorch has loaded the `torch/_C` folder of the PyTorch repository rather than the C extensions which are expected in the `torch._C` namespace. This can occur when using the `install` workflow. e.g. $ python -m pip install --no-build-isolation -v . && python -c "import torch" This error can generally be solved using the `develop` workflow $ python -m pip install --no-build-isolation -v -e . && python -c "import torch" # This should succeed or by running Python from a different directory. )_C)rpNrBase> GeneratorDisableTorchFunctionDisableTorchFunctionSubclass TensorBasecZ| t}||vry|j||j}t|D]o}t ||}t |dd}t j |s2|j|sDtjj||t||qy)Nrxrp) setaddrxdirrinspectismodule startswithrmodules setdefault _import_extension_to_sys_modules)modulememo module_namermember member_names rr r %s <5D T>  oo K ?DVT*F!&*b9K'K,B,B;,O &&{F;0>  ?robjcZt|tjr|jSt |ddxsd}d}t |dr |j }nIt |dr |j}n0|jjxsd}|jj }|dvr|S|d|S)a String representation of the type of an object. This function returns a fully qualified string representation of an object's type. Args: obj (object): The object whose type to represent Returns: str: the type of the object `o` Example: >>> x = torch.tensor([1, 2, 3]) >>> torch.typename(x) 'torch.LongTensor' >>> torch.typename(torch.nn.Parameter) 'torch.nn.parameter.Parameter' ryrprzrx>rprr) rrr2rrrrzrx __class__ry)rr qualnames rrara<s #u||$xxz S, + 1rFHsN### j !<<))/R==-- !!XQxj !!rz torch.Tensorc6t|tjS)aReturns True if `obj` is a PyTorch tensor. Note that this function is simply doing ``isinstance(obj, Tensor)``. Using that ``isinstance`` check is better for type checking with mypy, and more explicit - so it's recommended to use that instead of ``is_tensor``. Args: obj (object): Object to test Example:: >>> x = torch.tensor([1, 2, 3]) >>> torch.is_tensor(x) True )rrr2rs rrErE_s" c5<< ((r)r3r4c$t|tvS)zgReturns True if `obj` is a PyTorch storage object. Args: obj (Object): Object to test )r_storage_classesrs rrDrDss 9( ((rc *ddlm}ddlmd}t t fdt |d}|r|j}||Sttdr"tjj}||StjdS) z?Gets the default ``torch.Tensor`` to be allocated on ``device``r) _get_current_function_mode_stack DeviceContextc\|j|StjgjSr)indexrrdevice)rs r_get_device_with_indexz2get_default_device.._get_device_with_indexs( << #M<<#** *rct|Sr)r)rrs rz$get_default_device..sD-8rNdevice_contextcpu) torch.overridesrtorch.utils._devicernextfilterreversedrr_GLOBAL_DEVICE_CONTEXTr"r)rr device_moderrs @rr<r<sA1+ 8 57 8  K##%f--%'78'66==%f--||E""rrcttdr%tj}||jddd|d}|t_yddlm}||}|j |t_y)aSets the default ``torch.Tensor`` to be allocated on ``device``. This does not affect factory function calls which are called with an explicit ``device`` argument. Factory calls will be performed as if they were passed ``device`` as an argument. To only temporarily change the default device instead of setting it globally, use ``with torch.device(device):`` instead. The default device is initially ``cpu``. If you set the default tensor device to another device (e.g., ``cuda``) without a device index, tensors will be allocated on whatever the current device for the device type, even after :func:`torch.cuda.set_device` is called. .. warning:: This function imposes a slight performance cost on every Python call to the torch API (not just factory functions). If this is causing problems for you, please comment on https://github.com/pytorch/pytorch/issues/92701 .. note:: This doesn't affect functions that create tensors that share the same memory as the input, like: :func:`torch.from_numpy` and :func:`torch.frombuffer` Args: device (device or string): the device to set as default Example:: >>> # xdoctest: +SKIP("requires cuda, changes global state") >>> torch.get_default_device() device(type='cpu') >>> torch.set_default_device('cuda') # current device is 0 >>> torch.get_default_device() device(type='cuda', index=0) >>> torch.set_default_device('cuda') >>> torch.cuda.set_device('cuda:1') # current device is 1 >>> torch.get_default_device() device(type='cuda', index=1) >>> torch.set_default_device('cuda:1') >>> torch.get_default_device() device(type='cuda', index=1) r"Nrr)rr)r"__exit__r%r __enter__)rr"rs rrPrPsm^%'78/>>  %  # #D$ 5 ~ -;) 6&v.  ",:)rrcdt|tr t|}tj|y)a .. warning:: This function is deprecated as of PyTorch 2.1, please use :func:`torch.set_default_dtype()` and :func:`torch.set_default_device()` as alternatives. Sets the default ``torch.Tensor`` type to floating point tensor type ``t``. This type will also be used as default floating point type for type inference in :func:`torch.tensor`. The default floating point tensor type is initially ``torch.FloatTensor``. Args: t (type or string): the floating point tensor type or its name Example:: >>> # xdoctest: +SKIP("Other tests may have changed the default type. Can we reset it?") >>> torch.tensor([1.2, 3]).dtype # initial default for floating point is torch.float32 torch.float32 >>> torch.set_default_tensor_type(torch.DoubleTensor) >>> torch.tensor([1.2, 3]).dtype # a new floating point tensor torch.float64 N)rrrr_set_default_tensor_type)rs rrQrQs&4!S  ""rc.tj|y)a Sets the default floating point dtype to :attr:`d`. Supports floating point dtype as inputs. Other dtypes will cause torch to raise an exception. When PyTorch is initialized its default floating point dtype is torch.float32, and the intent of set_default_dtype(torch.float64) is to facilitate NumPy-like type inference. The default floating point dtype is used to: 1. Implicitly determine the default complex dtype. When the default floating type is float16, the default complex dtype is complex32. For float32, the default complex dtype is complex64. For float64, it is complex128. For bfloat16, an exception will be raised because there is no corresponding complex type for bfloat16. 2. Infer the dtype for tensors constructed using Python floats or complex Python numbers. See examples below. 3. Determine the result of type promotion between bool and integer tensors and Python floats and complex Python numbers. Args: d (:class:`torch.dtype`): the floating point dtype to make the default. Example: >>> # xdoctest: +SKIP("Other tests may have changed the default type. Can we reset it?") >>> # initial default for floating point is torch.float32 >>> # Python floats are interpreted as float32 >>> torch.tensor([1.2, 3]).dtype torch.float32 >>> # initial default for floating point is torch.complex64 >>> # Complex Python numbers are interpreted as complex64 >>> torch.tensor([1.2, 3j]).dtype torch.complex64 >>> torch.set_default_dtype(torch.float64) >>> # Python floats are now interpreted as float64 >>> torch.tensor([1.2, 3]).dtype # a new floating point tensor torch.float64 >>> # Complex Python numbers are now interpreted as complex128 >>> torch.tensor([1.2, 3j]).dtype # a new complex tensor torch.complex128 >>> torch.set_default_dtype(torch.float16) >>> # Python floats are now interpreted as float16 >>> torch.tensor([1.2, 3]).dtype # a new floating point tensor torch.float16 >>> # Complex Python numbers are now interpreted as complex128 >>> torch.tensor([1.2, 3j]).dtype # a new complex tensor torch.complex32 N)r_set_default_dtype)ds rset_default_dtyper3sd!rF warn_onlyrr5c2tj||y)aSets whether PyTorch operations must use "deterministic" algorithms. That is, algorithms which, given the same input, and when run on the same software and hardware, always produce the same output. When enabled, operations will use deterministic algorithms when available, and if only nondeterministic algorithms are available they will throw a :class:`RuntimeError` when called. .. note:: This setting alone is not always enough to make an application reproducible. Refer to :ref:`reproducibility` for more information. .. note:: :func:`torch.set_deterministic_debug_mode` offers an alternative interface for this feature. The following normally-nondeterministic operations will act deterministically when ``mode=True``: * :class:`torch.nn.Conv1d` when called on CUDA tensor * :class:`torch.nn.Conv2d` when called on CUDA tensor * :class:`torch.nn.Conv3d` when called on CUDA tensor * :class:`torch.nn.ConvTranspose1d` when called on CUDA tensor * :class:`torch.nn.ConvTranspose2d` when called on CUDA tensor * :class:`torch.nn.ConvTranspose3d` when called on CUDA tensor * :class:`torch.nn.ReplicationPad1d` when attempting to differentiate a CUDA tensor * :class:`torch.nn.ReplicationPad2d` when attempting to differentiate a CUDA tensor * :class:`torch.nn.ReplicationPad3d` when attempting to differentiate a CUDA tensor * :func:`torch.bmm` when called on sparse-dense CUDA tensors * :func:`torch.Tensor.__getitem__` when attempting to differentiate a CPU tensor and the index is a list of tensors * :func:`torch.Tensor.index_put` with ``accumulate=False`` * :func:`torch.Tensor.index_put` with ``accumulate=True`` when called on a CPU tensor * :func:`torch.Tensor.put_` with ``accumulate=True`` when called on a CPU tensor * :func:`torch.Tensor.scatter_add_` when called on a CUDA tensor * :func:`torch.gather` when called on a CUDA tensor that requires grad * :func:`torch.index_add` when called on CUDA tensor * :func:`torch.index_select` when attempting to differentiate a CUDA tensor * :func:`torch.repeat_interleave` when attempting to differentiate a CUDA tensor * :func:`torch.Tensor.index_copy` when called on a CPU or CUDA tensor * :func:`torch.Tensor.scatter` when `src` type is Tensor and called on CUDA tensor * :func:`torch.Tensor.scatter_reduce` when ``reduce='sum'`` or ``reduce='mean'`` and called on CUDA tensor The following normally-nondeterministic operations will throw a :class:`RuntimeError` when ``mode=True``: * :class:`torch.nn.AvgPool3d` when attempting to differentiate a CUDA tensor * :class:`torch.nn.AdaptiveAvgPool2d` when attempting to differentiate a CUDA tensor * :class:`torch.nn.AdaptiveAvgPool3d` when attempting to differentiate a CUDA tensor * :class:`torch.nn.MaxPool3d` when attempting to differentiate a CUDA tensor * :class:`torch.nn.AdaptiveMaxPool2d` when attempting to differentiate a CUDA tensor * :class:`torch.nn.FractionalMaxPool2d` when attempting to differentiate a CUDA tensor * :class:`torch.nn.FractionalMaxPool3d` when attempting to differentiate a CUDA tensor * :class:`torch.nn.MaxUnpool1d` * :class:`torch.nn.MaxUnpool2d` * :class:`torch.nn.MaxUnpool3d` * :func:`torch.nn.functional.interpolate` when attempting to differentiate a CUDA tensor and one of the following modes is used: - ``linear`` - ``bilinear`` - ``bicubic`` - ``trilinear`` * :class:`torch.nn.ReflectionPad1d` when attempting to differentiate a CUDA tensor * :class:`torch.nn.ReflectionPad2d` when attempting to differentiate a CUDA tensor * :class:`torch.nn.ReflectionPad3d` when attempting to differentiate a CUDA tensor * :class:`torch.nn.NLLLoss` when called on a CUDA tensor * :class:`torch.nn.CTCLoss` when attempting to differentiate a CUDA tensor * :class:`torch.nn.EmbeddingBag` when attempting to differentiate a CUDA tensor when ``mode='max'`` * :func:`torch.Tensor.put_` when ``accumulate=False`` * :func:`torch.Tensor.put_` when ``accumulate=True`` and called on a CUDA tensor * :func:`torch.histc` when called on a CUDA tensor * :func:`torch.bincount` when called on a CUDA tensor and ``weights`` tensor is given * :func:`torch.kthvalue` with called on a CUDA tensor * :func:`torch.median` with indices output when called on a CUDA tensor * :func:`torch.nn.functional.grid_sample` when attempting to differentiate a CUDA tensor * :func:`torch.cumsum` when called on a CUDA tensor when dtype is floating point or complex * :func:`torch.Tensor.scatter_reduce` when ``reduce='prod'`` and called on CUDA tensor * :func:`torch.Tensor.resize_` when called with a quantized tensor In addition, several operations fill uninitialized memory when this setting is turned on and when :attr:`torch.utils.deterministic.fill_uninitialized_memory` is turned on. See the documentation for that attribute for more information. A handful of CUDA operations are nondeterministic if the CUDA version is 10.2 or greater, unless the environment variable ``CUBLAS_WORKSPACE_CONFIG=:4096:8`` or ``CUBLAS_WORKSPACE_CONFIG=:16:8`` is set. See the CUDA documentation for more details: ``_ If one of these environment variable configurations is not set, a :class:`RuntimeError` will be raised from these operations when called with CUDA tensors: * :func:`torch.mm` * :func:`torch.mv` * :func:`torch.bmm` Note that deterministic operations tend to have worse performance than nondeterministic operations. .. note:: This flag does not detect or prevent nondeterministic behavior caused by calling an inplace operation on a tensor with an internal memory overlap or by giving such a tensor as the :attr:`out` argument for an operation. In these cases, multiple writes of different data may target a single memory location, and the order of writes is not guaranteed. Args: mode (:class:`bool`): If True, makes potentially nondeterministic operations switch to a deterministic algorithm or throw a runtime error. If False, allows nondeterministic operations. Keyword args: warn_only (:class:`bool`, optional): If True, operations that do not have a deterministic implementation will throw a warning instead of an error. Default: ``False`` Example:: >>> # xdoctest: +SKIP >>> torch.use_deterministic_algorithms(True) # Forward mode nondeterministic error >>> torch.randn(10, device='cuda').kthvalue(1) ... RuntimeError: kthvalue CUDA does not have a deterministic implementation... # Backward mode nondeterministic error >>> torch.nn.AvgPool3d(1)(torch.randn(3, 4, 5, 6, requires_grad=True).cuda()).sum().backward() ... RuntimeError: avg_pool3d_backward_cuda does not have a deterministic implementation... r4N)r_set_deterministic_algorithms)rr5s rrcrc3sV$$TY?rc*tjS)zReturns True if the global deterministic flag is turned on. Refer to :func:`torch.use_deterministic_algorithms` documentation for more details. )r_get_deterministic_algorithmsrrrr5r5  + + --rc*tjS)zReturns True if the global deterministic flag is set to warn only. Refer to :func:`torch.use_deterministic_algorithms` documentation for more details. )r'_get_deterministic_algorithms_warn_onlyrrrrCrCs  5 5 77r debug_modect|tjtfst dt |t|tr&|dk(rd}n|dk(rd}n|dk(rd}nt d||dk(rtjd y |dk(rtjd d y |dk(rtjd y t d |)aSets the debug mode for deterministic operations. .. note:: This is an alternative interface for :func:`torch.use_deterministic_algorithms`. Refer to that function's documentation for details about affected operations. Args: debug_mode(str or int): If "default" or 0, don't error or warn on nondeterministic operations. If "warn" or 1, warn on nondeterministic operations. If "error" or 2, error on nondeterministic operations. z'debug_mode must be str or int, but got defaultrwarnreerrorzQinvalid value of debug_mode, expected one of `default`, `warn`, `error`, but got FTr4z:invalid value of debug_mode, expected 0, 1, or 2, but got N) rrrrr7r RuntimeErrorrr7)r=s rrRrRs j8<<"5 6A$zBRASTUU*c"  "J 6 !J 7 "J,,6<9  Q ((/ q ((> q ((.H U  rcXtjrtjryyy)zReturns the current value of the debug mode for deterministic operations. Refer to :func:`torch.set_deterministic_debug_mode` documentation for more details. rerBr)rr9r<rrrr=r=s%  '') 5 5 7rc*tjS)zReturns the current value of float32 matrix multiplication precision. Refer to :func:`torch.set_float32_matmul_precision` documentation for more details. )r_get_float32_matmul_precisionrrrr?r? r:r precisionc.tj|y)aSets the internal precision of float32 matrix multiplications. Running float32 matrix multiplications in lower precision may significantly increase performance, and in some programs the loss of precision has a negligible impact. Supports three settings: * "highest", float32 matrix multiplications use the float32 datatype (24 mantissa bits with 23 bits explicitly stored) for internal computations. * "high", float32 matrix multiplications either use the TensorFloat32 datatype (10 mantissa bits explicitly stored) or treat each float32 number as the sum of two bfloat16 numbers (approximately 16 mantissa bits with 14 bits explicitly stored), if the appropriate fast matrix multiplication algorithms are available. Otherwise float32 matrix multiplications are computed as if the precision is "highest". See below for more information on the bfloat16 approach. * "medium", float32 matrix multiplications use the bfloat16 datatype (8 mantissa bits with 7 bits explicitly stored) for internal computations, if a fast matrix multiplication algorithm using that datatype internally is available. Otherwise float32 matrix multiplications are computed as if the precision is "high". When using "high" precision, float32 multiplications may use a bfloat16-based algorithm that is more complicated than simply truncating to some smaller number mantissa bits (e.g. 10 for TensorFloat32, 7 for bfloat16 explicitly stored). Refer to [Henry2019]_ for a complete description of this algorithm. To briefly explain here, the first step is to realize that we can perfectly encode a single float32 number as the sum of three bfloat16 numbers (because float32 has 23 mantissa bits while bfloat16 has 7 explicitly stored, and both have the same number of exponent bits). This means that the product of two float32 numbers can be exactly given by the sum of nine products of bfloat16 numbers. We can then trade accuracy for speed by dropping some of these products. The "high" precision algorithm specifically keeps only the three most significant products, which conveniently excludes all of the products involving the last 8 mantissa bits of either input. This means that we can represent our inputs as the sum of two bfloat16 numbers rather than three. Because bfloat16 fused-multiply-add (FMA) instructions are typically >10x faster than float32 ones, it's faster to do three multiplications and 2 additions with bfloat16 precision than it is to do a single multiplication with float32 precision. .. [Henry2019] http://arxiv.org/abs/1904.06376 .. note:: This does not change the output dtype of float32 matrix multiplications, it controls how the internal computation of the matrix multiplication is performed. .. note:: This does not change the precision of convolution operations. Other flags, like `torch.backends.cudnn.allow_tf32`, may control the precision of convolution operations. .. note:: This flag currently only affects one native device type: CUDA. If "high" or "medium" are set then the TensorFloat32 datatype will be used when computing float32 matrix multiplications, equivalent to setting `torch.backends.cuda.matmul.allow_tf32 = True`. When "highest" (the default) is set then the float32 datatype is used for internal computations, equivalent to setting `torch.backends.cuda.matmul.allow_tf32 = False`. Args: precision(str): can be set to "highest" (default), "high", or "medium" (see above). N)r_set_float32_matmul_precision)rGs rrSrSs~$$Y/rrc.tj|y)aWhen this flag is False (default) then some PyTorch warnings may only appear once per process. This helps avoid excessive warning information. Setting it to True causes these warnings to always appear, which may be helpful when debugging. Args: b (:class:`bool`): If True, force warnings to always be emitted If False, set to the default behaviour N)r_set_warnAlways)rs rrVrVTsqrc*tjS)zReturns True if the global warn_always flag is turned on. Refer to :func:`torch.set_warn_always` documentation for more details. )r_get_warnAlwaysrrrrFrFas    rr9messagec8t|tjtfst dt |ddlm}||ryt|trt|trJ|d}n&t|s t dt|}||)Nzcond must be a bool, but got r) expect_truezExpected cond to be True, but got False. (Could this error message be improved? If so, please report an enhancement request to PyTorch.)zmessage must be a callable) rrr r/r7r%torch.fx.experimental.symbolic_shapesrP issubclassrWarningcallabler) error_typer9rNrPmessage_evaluateds r _check_withrWps dX]]G4 57T |DEEA4 j) ,Z G5TT T   89 9 N & ''rc&tt||y)aThrows error containing an optional message if the specified condition is False. Error type: ``RuntimeError`` C++ equivalent: ``TORCH_CHECK`` Args: cond (:class:`bool`): If False, throw error message (Callable, optional): Callable that returns either a string or an object that has a ``__str__()`` method to be used as the error message. Default: ``None`` N)rWrCr9rNs rrrs dG,r)rct|dk\|ddlm}|||t||k|ddlm}|||yy)a{Checks that a given integer is a valid size (i.e., is non-negative). You should use this over ``_check(i >= 0)`` because it can prevent ``GuardOnDataDependentSymNode`` exceptions by opting yourself into alternate semantics for ``guard_size_oblivious`` tests that treat values 0 and 1 equivalently to all other values. When max is not None, this specifies an upper bound equivalent to ``_check(i <= max)``. This bound is also subject to alternate semantics: in ``guard_size_oblivious`` tests, we assume that a constant max bound is treated equivalently to all other values. Symbolic max bounds are not yet supported. NB: Do NOT use this in contexts where a -1 size would be valid (indicating to infer the size from context, or if you should wrap-around or truncate). Only use this if the only valid value is an honest to goodness size. r)_advise_is_sizeN)_advise_is_bounded)rrQr[r\)irNrr[r\s r_check_is_sizer^sB$ 167EA qCx!L1c" rc&tt||y)aThrows error containing an optional message if the specified condition is False. Error type: ``IndexError`` C++ equivalent: ``TORCH_CHECK_INDEX`` Args: cond (:class:`bool`): If False, throw error message (Callable, optional): Callable that returns either a string or an object that has a ``__str__()`` method to be used as the error message. Default: ``None`` N)rW IndexErrorrYs r _check_indexra D'*rc&tt||y)aThrows error containing an optional message if the specified condition is False. Error type: ``ValueError`` C++ equivalent: ``TORCH_CHECK_VALUE`` Args: cond (:class:`bool`): If False, throw error message (Callable, optional): Callable that returns either a string or an object that has a ``__str__()`` method to be used as the error message. Default: ``None`` N)rWrrYs r _check_valuerdrbrc&tt||y)aThrows error containing an optional message if the specified condition is False. Error type: ``TypeError`` C++ equivalent: ``TORCH_CHECK_TYPE`` Args: cond (:class:`bool`): If False, throw error message (Callable, optional): Callable that returns either a string or an object that has a ``__str__()`` method to be used as the error message. Default: ``None`` N)rWr7rYs r _check_typerfs 4)rc&tt||y)aThrows error containing an optional message if the specified condition is False. Error type: ``NotImplementedError`` C++ equivalent: ``TORCH_CHECK_NOT_IMPLEMENTED`` Args: cond (:class:`bool`): If False, throw error message (Callable, optional): Callable that returns either a string or an object that has a ``__str__()`` method to be used as the error message. Default: ``None`` N)rWNotImplementedErrorrYs r_check_not_implementedris#T73rct|stdt||jtj k(std|jt ||jj|y)Nzcond must be a tensor, but got z0cond tensor must have dtype torch.bool, but got ) rEr7rdtyperr rW _is_all_truer)rUr9rNs r_check_tensor_all_withrm sh T?9$t*FGG :: #J4::,WXX -D--/446@rc&tt||y)aThrows error containing an optional message if the specified condition is False. Error type: ``RuntimeError`` C++ equivalent: ``TORCH_CHECK_TENSOR_ALL`` Args: cond (:class:`torch.Tensor`): Tensor of dtype ``torch.bool``. If any element is ``False``, throw error message (Callable, optional): Callable that returns either a string or an object that has a ``__str__()`` method to be used as the error message. Default: ``None`` N)rmrCrYs r_check_tensor_allros <w7r)einfnanpinewaxis)rprsrrrqrt)r2)storage)_LegacyStorage _StorageBase_warn_typed_storage_removalr3r4c,eZdZedZedZy)r c2td|jSN stacklevelrx_dtyper s rrkzByteStorage.dtypeH#q1{{rc"tjSr)ruint8r s rrzByteStorage._dtypeM {{rNrxryrzrrkrrrrr r G(rr c,eZdZedZedZy)r$c2td|jSr{rr s rrkzDoubleStorage.dtypeSrrc"tjSr)rdoubler s rrzDoubleStorage._dtypeX ||rNrrrrr$r$R(rr$c,eZdZedZedZy)r&c2td|jSr{rr s rrkzFloatStorage.dtype^rrc"tjSr)rrr s rrzFloatStorage._dtypecrrNrrrrr&r&]rrr&c,eZdZedZedZy) HalfStoragec2td|jSr{rr s rrkzHalfStorage.dtypeirrc"tjSr)rhalfr s rrzHalfStorage._dtypen zzrNrrrrrrh(rrc,eZdZedZedZy)r+c2td|jSr{rr s rrkzLongStorage.dtypetrrc"tjSr)rlongr s rrzLongStorage._dtypeyrrNrrrrr+r+srrr+c,eZdZedZedZy)r)c2td|jSr{rr s rrkzIntStorage.dtyperrc"tjSr)rrr s rrzIntStorage._dtypes yyrNrrrrr)r)~s(rr)c,eZdZedZedZy)r-c2td|jSr{rr s rrkzShortStorage.dtyperrc"tjSr)rshortr s rrzShortStorage._dtyperrNrrrrr-r-rrr-c,eZdZedZedZy)r"c2td|jSr{rr s rrkzCharStorage.dtyperrc"tjSr)rint8r s rrzCharStorage._dtyperrNrrrrr"r"rrr"c,eZdZedZedZy)rc2td|jSr{rr s rrkzBoolStorage.dtyperrc"tjSr)rr r s rrzBoolStorage._dtyperrNrrrrrrrrrc,eZdZedZedZy)BFloat16Storagec2td|jSr{rr s rrkzBFloat16Storage.dtyperrc"tjSr)rbfloat16r s rrzBFloat16Storage._dtype ~~rNrrrrrr(rrc,eZdZedZedZy)ComplexDoubleStoragec2td|jSr{rr s rrkzComplexDoubleStorage.dtyperrc"tjSr)rcdoubler s rrzComplexDoubleStorage._dtypes }}rNrrrrrrs(rrc,eZdZedZedZy)ComplexFloatStoragec2td|jSr{rr s rrkzComplexFloatStorage.dtyperrc"tjSr)rcfloatr s rrzComplexFloatStorage._dtyperrNrrrrrrrrrc,eZdZedZedZy) QUInt8Storagec2td|jSr{rr s rrkzQUInt8Storage.dtyperrc"tjSr)rquint8r s rrzQUInt8Storage._dtyperrNrrrrrrrrrc,eZdZedZedZy) QInt8Storagec2td|jSr{rr s rrkzQInt8Storage.dtyperrc"tjSr)rqint8r s rrzQInt8Storage._dtyperrNrrrrrrrrrc,eZdZedZedZy) QInt32Storagec2td|jSr{rr s rrkzQInt32Storage.dtyperrc"tjSr)rqint32r s rrzQInt32Storage._dtyperrNrrrrrrrrrc,eZdZedZedZy)QUInt4x2Storagec2td|jSr{rr s rrkzQUInt4x2Storage.dtyperrc"tjSr)rquint4x2r s rrzQUInt4x2Storage._dtyperrNrrrrrrrrrc,eZdZedZedZy)QUInt2x4Storagec2td|jSr{rr s rrkzQUInt2x4Storage.dtyperrc"tjSr)rquint2x4r s rrzQUInt2x4Storage._dtyperrNrrrrrrrrrr_tensor_classes)amprandom serialization)rT)r6r()r@rBrIrOrU)rGrNctjdk(rytddd}ttdtj j |std|z|jdS)Nrrrrmtorch_shm_managerz$Unable to find torch_shm_manager at zutf-8) rrrrrrrsrtrCencode)rss r _manager_pathr)s`I% %)< =D' g(>? 77>>$ ADHII ;;w r) unique_dimrsegment_reducec#rK|]/}ttt|tjs,|1ywr)rrrrk)rwrs rryryas) :geT.BEKK#PDs-77)_disable_dynamo)_VF functionalct|tjur3tj|frtj t |f||S|sJ|y)zAA wrapper around Python's assert which is symbolically traceable.N)rrr2rrr_assert) conditionrNs rrrsS Iell*y/K/K  0.. i\9g   g9r)r:rArKset_grad_enabled) __config__ __future___awaits acceleratorautogradbackendsr#rj distributed distributionsfftfutureshubjitlinalgmpsmtiamultiprocessingnestednnoptimrprofilersparsespecialtestingtypesutilsversionxpu)windows)ao) _size_docs _storage_docs _tensor_docs _torch_docscy)z?Returns whether PyTorch was built with _GLIBCXX_USE_CXX11_ABI=1Trrrrcompiled_with_cxx11_abirs r)_library_ops)ops)classesz.opsz.classes) quantization) quasirandom)register_after_fork)rH)masked)_symeigeiglstsq matrix_ranksolve) from_dlpack to_dlpackc\eZdZdZdZdZdeefdZdee ee ffdZ dZ d Z d Zy ) _TorchCompileInductorWrapperinductorcddlm}i|_||_|j ||j ||j |j dd}ttdr'ddl m }|ttjdd}|jjdd r9|r|d ks ts'd tj d <d tj d<yyy)NrCompilerBisectorrr) TorchVersionrjz0.0triton.cudagraphsFz12.61DISABLE_CUPTI_LAZY_REINIT0TEARDOWN_CUPTI)!torch._inductor.compiler_bisectorrconfigdynamic apply_mode apply_optionsget_config_changerrtorch.torch_versionrrrgetrrrr)rroptionsr%rrrs rrz%_TorchCompileInductorWrapper.__init__ sF')    7# +==jIJ 5) $ 8' vu(MNL ;;??. 6 lV3DF69BJJ2 3 ,/BJJ' (G 7rct|txr4|j|jk(xr|j|jk(Sr)rrr$r%rs rr8z#_TorchCompileInductorWrapper.__eq__- s< u: ; . u||+ .  - rrcf|r/|dk7r)ddlm}|j|||jyyy)Nr?r)list_mode_options)torch._inductorr.r'r%)rrr.s rr&z'_TorchCompileInductorWrapper.apply_mode4 s0 DI% 9   0t||D E&4rr+c |syddlm}|j}|jD]\}}|j dd}||vr(t d|dt |j|j|}t|Mt||sAt|j}t||j} t d|d|d | ||j|<y) Nrr$-rzUnexpected optimization option z, known options are zUnexpected type of attr z, got z should be ) r/r$get_config_copyrrrClistkeysget_type _get_originrrrx) rr+r$current_configkeyval attr_name attr_type val_type_strexpected_type_strs rr'z*_TorchCompileInductorWrapper.apply_options: s **0*@*@*B  )HC C-I."5cU:NtTbTgTgTiOjNkl 2I9%-!#y1#'9#5#5L(,^I-F(G(P(P%&23%vl^;WhVij&)DKK "# )rc8ddlm}||||jS)Nr) compile_fxconfig_patches)torch._inductor.compile_fxr@r$)rmodel_inputs_r@s r__call__z%_TorchCompileInductorWrapper.__call__U s9&'$++FFrc4ddlm}||jS)Nr)get_patched_config_dictrA)rCrHr$)rrHs rget_compiler_configz0_TorchCompileInductorWrapper.get_compiler_configZ sF&dkkBBrcddlm}d|jvs|jjr+|jj ddrddlm}|yyy)Nrr1rT)reset_cudagraph_trees)r/r$triton cudagraphsr*torch._inductor.cudagraph_treesrK)rr$rKs rresetz"_TorchCompileInductorWrapper.reset_ sD* $++ -1I1I{{2D9Q%':2JrN)rxryrz compiler_namerr8 _Optionalrr&dict_Anyr'rFrIrOrrrrr sMM/4 Fy~F )YtCI%?)6G C (rrc$eZdZdZdZdZdZy)_TorchCompileWrapperc"ddlm}t|tr||_n.t |dr|j |_nt||_||_|||_i|_ |r|dk7r||jd<|r||jd<yy)Nr)lookup_backendrxr?rr+) torch._dynamo.backends.registryrWrrrPrrxr% compiler_fnkwargs)rbackendrr+r%rWs rrz_TorchCompileWrapper.__init__j sB gs #!(D  Wj )!(!1!1D !$WD  )'2 DI%"&DKK  %,DKK " rct|txrO|j|jk(xr4|j|jk(xr|j|jk(Sr)rrUrYrZr%rs rr8z_TorchCompileWrapper.__eq__| sW u2 3 .  E$5$55 . u||+ .  -  rc>|j||fi|jSr)rYrZ)rrDrEs rrFz_TorchCompileWrapper.__call__ s t?4;;??rcft|jdr|jjyy)NrO)rrYrOr s rrOz_TorchCompileWrapper.reset s) 4##W -    " " $ .rN)rxryrzrr8rFrOrrrrUrUi s-$ @%rrU_InputT_RetTr fullgraphr%r[rr+disablemodelrbr%r[r+rccyrrrdrbr%r[rr+rcs rr8r8 s!$rcyrrrfs rr8r8 s ILrcddl}tjdtjdk\r t d|j ddk(rtjdkr t d |3d tttfd tttfffd }|S  t d dddl m } | jx} r| d} r"ttrjdd} dk(rt!nt#t%j&j)| |S)a Optimizes given model/function using TorchDynamo and specified backend. If you are compiling an :class:`torch.nn.Module`, you can also use :meth:`torch.nn.Module.compile` to compile the module inplace without changing its structure. Concretely, for every frame executed within the compiled region, we will attempt to compile it and cache the compiled result on the code object for future use. A single frame may be compiled multiple times if previous compiled results are not applicable for subsequent calls (this is called a "guard failure), you can use TORCH_LOGS=guards to debug these situations. Multiple compiled results can be associated with a frame up to ``torch._dynamo.config.recompile_limit``, which defaults to 8; at which point we will fall back to eager. Note that compile caches are per *code object*, not frame; if you dynamically create multiple copies of a function, they will all share the same code cache. Args: model (Callable or None): Module/function to optimize fullgraph (bool): If False (default), torch.compile attempts to discover compilable regions in the function that it will optimize. If True, then we require that the entire function be capturable into a single graph. If this is not possible (that is, if there are graph breaks), then this will raise an error. dynamic (bool or None): Use dynamic shape tracing. When this is True, we will up-front attempt to generate a kernel that is as dynamic as possible to avoid recompilations when sizes change. This may not always work as some operations/optimizations will force specialization; use TORCH_LOGS=dynamic to debug overspecialization. When this is False, we will NEVER generate dynamic kernels, we will always specialize. By default (None), we automatically detect if dynamism has occurred and compile a more dynamic kernel upon recompile. backend (str or Callable): backend to be used - "inductor" is the default backend, which is a good balance between performance and overhead - Non experimental in-tree backends can be seen with `torch._dynamo.list_backends()` - Experimental or debug in-tree backends can be seen with `torch._dynamo.list_backends(None)` - To register an out-of-tree custom backend: https://pytorch.org/docs/main/torch.compiler_custom_backends.html#registering-custom-backends mode (str): Can be either "default", "reduce-overhead", "max-autotune" or "max-autotune-no-cudagraphs" - "default" is the default mode, which is a good balance between performance and overhead - "reduce-overhead" is a mode that reduces the overhead of python with CUDA graphs, useful for small batches. Reduction of overhead can come at the cost of more memory usage, as we will cache the workspace memory required for the invocation so that we do not have to reallocate it on subsequent runs. Reduction of overhead is not guaranteed to work; today, we only reduce overhead for CUDA only graphs which do not mutate inputs. There are other circumstances where CUDA graphs are not applicable; use TORCH_LOG=perf_hints to debug. - "max-autotune" is a mode that leverages Triton or template based matrix multiplications on supported devices and Triton based convolutions on GPU. It enables CUDA graphs by default on GPU. - "max-autotune-no-cudagraphs" is a mode similar to "max-autotune" but without CUDA graphs - To see the exact configs that each mode sets you can call `torch._inductor.list_mode_options()` options (dict): A dictionary of options to pass to the backend. Some notable ones to try out are - `epilogue_fusion` which fuses pointwise ops into templates. Requires `max_autotune` to also be set - `max_autotune` which will profile to pick the best matmul configuration - `fallback_random` which is useful when debugging accuracy issues - `shape_padding` which pads matrix shapes to better align loads on GPUs especially for tensor cores - `triton.cudagraphs` which will reduce the overhead of python with CUDA graphs - `trace.enabled` which is the most useful debugging flag to turn on - `trace.graph_diagram` which will show you a picture of your graph after fusion - `guard_filter_fn` that controls which dynamo guards are saved with compilations. This is an unsafe feature and there is no backward compatibility guarantee provided for dynamo guards as data types. For stable helper functions to use, see the documentations in `torch.compiler`, for example: - `torch.compiler.skip_guard_on_inbuilt_nn_modules_unsafe` - `torch.compiler.skip_guard_on_all_nn_modules_unsafe` - `torch.compiler.keep_tensor_guards_unsafe` - For inductor you can see the full list of configs that it supports by calling `torch._inductor.list_options()` disable (bool): Turn torch.compile() into a no-op for testing Example:: @torch.compile(options={"triton.cudagraphs": True}, fullgraph=True) def foo(x): return torch.sin(x) + torch.cos(x) rNz torch.compile)r|z.torch.compile is not supported on Python 3.14+Py_GIL_DISABLEDre)r| r|zetorch.compile is not supported on Python < 3.13.3 built with GIL disabled. Please use Python 3.13.3+.rdr c B| tdt|S)NzModel can't be Nonera)rCr8)rdr[rcr%rbrr+s rrzcompile..fn, s6}"#899# rzVEither mode or options can be specified, but both can't be specified at the same time.r?rguard_filter_fnr)r[nopythonr%rcrm)rr_log_api_usage_oncer version_inforCr _Callabler_r`r#r get_backendrrRpoprrUr_dynamooptimize) rdrbr%r[rr+rcrrrbisect_backendrms `````` rr8r8 shX?+ 7"KLL  ! !"3 4 9c>N>NR?  )   } i/ Igun4M    G/ d   |B)5577~7 O:gt,!++&7>*.tWgF&wgwG == ! !' "     rc .tj|j}tjt }t ||rtd|dt||dt|||djt |g}|tj|<y)zRegister an external runtime module of the specific :attr:`device_type` supported by torch. After the :attr:`module` is registered correctly, the user can refer the external runtime module as part of torch with attribute torch.xxx. zThe runtime module of 'z$' has already been registered with ''rN) rrrrrrxrrCrsetattrru) device_typer mtorch_module_names r_register_device_moduler}Y s,,{+00K HAq+%k]3%%,Q %<$=Q @   A{F#(K!89%+CKK!"r)r;funclibrary return_types)r9 while_loop)rd)_meta_registrationsTORCH_CUDA_SANITIZER)fx)compilercxeZdZUejj ddZiZee e e fe fe d<e dZy)_TritonLibraryrLDEF ops_tablec||f|jvrL|jj||jjd|z||||j||f<|j||fS)Nztriton::)rrndefineimpl)clsop_key full_schemaop_impl dispatch_keys r registerOpz_TritonLibrary.registerOp sc L ! 6 GGNN; ' GGLLf,g| D4;CMM6<0 1}}fl344rN)rxryrzrrrlrnrrRr}rrq__annotations__ classmethodrrrrrr sF --  % 0C24ItE#s(OY./455rr)has_mpshas_cuda has_cudnn has_mkldnn)rt _inductor _subclassesonnx>rrt_exportrc tj|}|r> s &%,,'#[[ FC "\\&166 "XX668=='x/S T  E#5t= 2 (when we do a size-oblivious guard). This makes it easier to use the unbacked int in size contexts, as we will often attempt to guard on a size being zero/one (e.g., when computing the contiguity of a tensor, or testing if broadcasting can occur), which will not work on unbacked SymInts. However, if we conservatively assume that the size is not zero/one, we will end up with a graph that will still work even if the size is zero/one. For more details, see https://docs.google.com/document/d/1HSuTTVvYH1pTew89Rtpeu84Ht3nQEFTYhAX3Ypa_xJs/edit ``` )rrN)rsym_constrain_range_for_size)symbolrrs r_constrain_as_sizer s. &&v3C@r)_loggingcddlm}d}tjdkr|j |d}n ||}|D]} |j }|y #t $r}td|jd|d }~wwxYw) z Leverage the Python plugin mechanism to load out-of-the-tree device extensions. See this RFC: https://github.com/pytorch/pytorch/issues/122468 r) entry_pointsztorch.backends)r| r)groupz&Failed to load the backend extension: zN. You can disable extension auto-loading with TORCH_DEVICE_BACKEND_AUTOLOAD=0.N) importlib.metadatarrrpr*rGrrCr)r group_namebackend_extensionsbackend_extension entrypointrs r_import_device_backendsr s 0!J '!)^// B?) ;/  *//1J L   89J9O9O8PQ_`  sA B&BBc4tjdddk(S)ab Whether autoloading out-of-the-tree device extensions is enabled. The switch depends on the value of the environment variable `TORCH_DEVICE_BACKEND_AUTOLOAD`. Returns: bool: Whether to enable autoloading the extensions. Enabled by default. Examples: >>> torch._is_device_backend_autoload_enabled() True TORCH_DEVICE_BACKEND_AUTOLOADr)rrrrrr#_is_device_backend_autoload_enabledr s 994c :c AArct|}|tjur%tj|tj S|tj ur%tj|tjStj|S)z Like torch.as_tensor, but when given Python data types it will keep them in full precision. Used for calling convention for Dynamo. )rk)rrrr as_tensorfloat64rint64)rtys r_as_tensor_fullprecr, s^ aB X^^q 66 x|| q 44q!!r)r N)Tr)r z torch.device)rrr N)r2z torch.dtyper N)NN(Kr{rr functoolsr~rrr&rrrrr threadingtypingrrSrrqrr7rrQr _overloadrr _TypeVarr _Uniontyping_extensionsr _ParamSpecr _TypeIsr r torch._utilsr_syncrrtorch._utils_internalrrrrrr)r torch.typesrr__all__sortedrprf initialize ImportErrorrrr4rrrrrgetdlopenflags old_flagssetdlopenflagsr RTLD_LAZYtorch._Cr1r0r/r_rYr[r]r}rrr^r`r__fn__name __sym_namerzrxglobalsrappendr\rZrr_C_for_compiled_checkrrrr__objrendswithrrTisclassrydelattrrr rarErDlocalr)r<rPrQr3rcr5rCrrRr=r?rSrVrFrWrr^rardrfrirmrorprqrrrsrtrr torch._tensorr2ru torch.storagervrwrxr3r4r r$r&rr+r)r-r"rrrrrrrrrrrrrrrtorch._tensor_strrT torch.ampr6r( torch.randomr@rBrIrOrUtorch.serializationrGrNrtorch._C._VariableFunctionsr_segment_reduce PRIVATE_OPS_VariableFunctionsrtorch._compilerrrtorch.functionalrtorch.autogradr:rArKrrrrrrrr#rjrrrrrrrrrrrrrrrrrrrrrr torch.signalrrtorch.nn.intrinsic torch.nn.qattorch.nn.quantizabletorch.nn.quantized _init_namesrrrrrrr  torch._opsr torch._classesr r r r contiguous_formatlegacy_contiguous_formattorch.multiprocessing._atforkrget_num_threads torch._lobpcgrHatenquantized_lstm quantized_grurtorch._linalg_utilsrsymeigrrrrtorch.utils.dlpackrrrrUr_r`rRr8r}r;r~rrtorch._higher_order_opsr9r torch.funcrdrrtorch.cuda._sanitizer _sanitizercsanenable_cuda_sanitizerrr_initrris_builtr is_availablemkldnnrrtrrrrrcacherr>rr _init_logsrrrrrrrs       I hmm ;/H V&/!!!J<<7u/n(c(s(c(d3i((3#QU$>CB"YRYY/F%GHOO"$#""$ICr~~ 45Cy!BBJt2t2nA-A-H"  "6"uU49%5uT3Y7G%GH"$"&L. (fj !F!J F #D(22D  GIj!!& &*. 9[ !z%  '4 "g/F ayC 7vF# E?ogooe48+" (0E$ <  H%v./" E ?%R(( "$ "c "F)4)w~6)()D)/O(P Q))*#D;;|#vd>&:C&?@##>2p %K@ --K@}}K@ K@\.hmm.8x}}8) VHLL#4E-F) 4) X hll .c.?0C?0D?0D x}} D    ( ' ((r3w (@-$#4#>+$+$*$4$A82! 34 !%.N>..>..n>.N>Nnn'E#d6,">?@A..1UT.)*2ON.*VV* -/" .%O  "''( F &K"7 B))6 2EE !!! &vGIf   S !v  E  Z+7 (@, t$%&GF}j 5"!-- (4(#. (8,g6/- ->=E))*+* ((&& #65V(V(r % %F Y   %(,&0" " $ We^ $ $}} $x}} % $ CN # $ d  $ S&hllHMM9DE EF $]] $w~ $  $  L %(,&0" " L  L}} Lx}} % L CN # L d  L S&hllHMM9DE EF L]] L '5.)*Igun,EEF L  L37h  %(,&0" "h Yw~. /h }}h x}} % h CN # h d  h  S&hllHMM9DE EFh ]]h   y%()9We^+DDE gunh V,( KJ##)RZZ'((D && 5 5 ~~!!**##,,%%22..''44 MO& iu||S/@(AB4$(#'A 8<< A 8<< A44BX]]B" ""'()IU  n ,%%- HOO  eg    + snn$n! n!$Ao$