L i5 NddlmZddlZddlmZmZddlmZeje Z eZ ddejjdejdejd ejd eejd ed eed eedeedeejdffdZy))OptionalN)_flash_attention_forward!flash_attn_supports_top_left_mask)loggingmodulequerykeyvalueattention_maskdropoutscalingsliding_windowsoftcapreturnc  x| jdds| jdtjd|jd} t d|jDr t d|j dd}|j dd}|j dd}d} |jtjk(rtjrtj} nat|jd r|jj} n4td |j!Dj"j} | j%d d} | |j&} t)||||f| | ||||t*| |jj,t|d r |j.ndd | } | dfS)Noutput_attentionsF head_maskz`flash_attention_2` does not support `output_attentions=True` or `head_mask`. Please set your attention to `eager` if you want any of these features.rc3&K|] }|dk( yw)rN).0dims o/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/transformers/integrations/flash_attention.py z*flash_attention_forward..#s +3!8 +szTensor query has shape with a zero dimension. FlashAttention does not support inputs with dim=0. Please check your input shapes or use SDPA instead._pre_quantization_dtypec3jK|]+}t|tjjs(|-yw)N) isinstancetorchnnLinear)rlayers rrz*flash_attention_forward..;s'j%zRWY^YaYaYhYhGijs)33 is_causal layer_idx) query_lengthr#r softmax_scalerruse_top_left_mask target_dtypeattn_implementationr$)getlogger warning_onceshapeany ValueError transposedtyperfloat32is_autocast_enabledget_autocast_gpu_dtypehasattrconfigrnextmodulesweightpopr#r_use_top_left_mask_attn_implementationr$)rr r r r r rrrkwargsseq_lenr(r# attn_outputs rflash_attention_forwardr@szz%u-K1H1T W kk!nG +u{{ ++ B  OOAq !E --1 C OOAq !EL {{emm#  $ $ & 779L V]]$= >!==@@Lj6>>3CjjqqwwL ;-I$$ *     %,!"MM>>&-fk&B&"" K$  )gNNN)typingrrmodeling_flash_attention_utilsrrutilsr get_logger__name__r+r;r ModuleTensorfloatinttupler@rrArrLs h   H %68#$(#F HHOOF <<F F << F U\\* F  Fe_FSMFe_F 5<< FrA