L iddlZddlZddlZddlZddlZddlZddlZddlZddlZddl Z ddl Z ddl m Z m Z ddlmZddlmZddlmZmZddlmZmZmZmZddlmZdd lmZdd lmZdd lm Z dd l!m"Z"dd l#m$Z$m%Z%ddl&m'Z'ddl(m)Z)ddl*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9ddl:m;Z;eeZ?ddl@mAZAe1serddlBZBddlCmDZDmEZEddlFmGZGddlHmIZIddlJmKZKndZEdZKer ddlLmMZMddlFmGZGe9jeOZPdZQdZRdZS d\de$d eeTeUeVeWfd!eeUd"eeUfd#ZX d\d eeTeUeVeWfd!eeUd"eeUfd$ZYd]d%eeUfd&ZZd'eTd"eeUd(eed)eVeUeUffd*Z[d+d,d-eeeUd,fd.ee)d)eVed,ee)ffd/Z\Gd0d1e]Z^Gd2d3e Z_Gd4d5Z`Gd6d7e`ZaGd8d9e`ZbGd:d;e`ZcGd<d=e Zd d^d?eed@eedAeedBeedCeed)eUf dDZfefd>d>d>d>d>EZgdFgdGdHgdFgdIgdFgdIgdJgdJgdKgdLgdLgdIgdIgdJgdMZhe1r ddNlimjZjmkZkmlZlmmZme-efd>d>d>d>OGdPdQede,Zne.enjen_oenjjGenjjjdRdSdTUjdVdWenj_pGdXdYenZsGdZd[Zty)_N)ABCabstractmethod)UserDict)contextmanager)abspathexists) TYPE_CHECKINGAnyOptionalUnion)custom_object_save)PreTrainedFeatureExtractor)GenerationConfig)BaseImageProcessor) ModelCard) AutoConfig AutoTokenizer)ProcessorMixin)PreTrainedTokenizer) ModelOutputPushToHubMixinadd_end_docstrings copy_funcinfer_frameworkis_tf_availableis_torch_availableis_torch_cuda_availableis_torch_hpu_availableis_torch_mlu_availableis_torch_mps_availableis_torch_musa_availableis_torch_npu_availableis_torch_xpu_availablelogging)deprecate_kwarg GenericTensorz torch.Tensorz tf.Tensor) TFAutoModel) DataLoaderDataset)PreTrainedModel) AutoModel) KeyDataset)TFPreTrainedModelc>t|dk7r td|dS)Nr-z5This collate_fn is meant to be used with batch_size=1r)len ValueError)itemss a/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/transformers/pipelines/base.py no_collate_fnr5Xs" 5zQPQQ 8Oc ft|}t|dtjr|dj}t|}|dk(r(tj |Dcgc]}| c}dSdvr(tj |Dcgc]}| c}dS|dk(r-dk(r(tj |Dcgc]}| c}dSt fd|D}tfd|D} |dj} |d k(rJ|| k(r(tj |Dcgc]}| c}dStj||f| |z} nO|d k(r!tj|||d f| |z} n)|dk(r$tj|||d |d f| |z} t|D]H\} }|d k(r`|dk(r.|dj | t|d df<?|dj | dt|df<l|d k(rf|dk(r1|dj | t|d dddf<|dj | dt|dddf<|dk(s|dk(r5|dj | t|d dddddf<|dj | dt|dddddf<K S|Dcgc]}| c}Scc}wcc}wcc}wcc}wcc}w)Nrr-)dim) pixel_valuesimageinput_featuresc3BK|]}|jdywr-Nshape.0itemkeys r4 z_pad..n>c+>c3BK|]}|jdywr>r?rAs r4rEz_pad..orFrGr )dtypeleft) r1 isinstancetorchTensorr@catmaxminrIzeros enumerateclone) r3rD padding_value padding_side batch_sizer@r8rC max_length min_lengthrItensoris ` r4_padr^^sOUJ%(3-.a ##%j !899E:Dd3i:B B + +99E:Dd3i:B B AX#!1199E:Dd3i:B B>>> >>> a ## !8Z'yy!>$s)!>AFF[[*j!9G-WF AX[[*j%)!DERUbbF AX[[*j%)U2Y!OW\]`mmF ' PGAtax6)6:3il6H6H6JF1s49Q<0022359#Yq\5G5G5IF11DIaL 11126)9=c19K9K9MF1s49Q<002A568 input_valuesr9r<>p_maskspecial_tokens_maskr->attention_masktoken_type_ids)setkeysr2r^) r3rkrCpaddedrD_padding_valuef_padding_valuefeature_extractorrXt_padding_value tokenizers r4innerzpad_collate_fn..inners58==?# D499;4' [\_`d`i`i`k\l[mnvQ   ICk!$):)F%4N%4NJJ!099!"<<HF3K! I" r6)r2 pad_token_idrXgetattr)rqrot_padding_sidef_padding_siderrrnrXrps`` @@@r4pad_collate_fnrwsNN.6]^^  ! ! )M  (44O&33N$!"3_dK !2NDI!n&@^WeEeOP^O__cdrcs t  L!% !% : Lr6config model_classestask frameworkc xtsts tdt|tr||d<d}txr|dv}txr|dv}|r8|r||j dt fz}|r||j dtfz}|jrg} |jD]`} tjd} |r t| | d } | | j| |s=t| d | d } | P| j| b|t| z}t|d k(rtd |i} |D]}|j!}|dk(r,|j#d rd|d<t$j'dn0|dk(r+|j#drd|d<t$j'd |j(|fi|}t+|dr|j-}nt|tr;d}| j?D]\}}|d|d|dz }td|d|d|d|tA|jB}||fS#t.tt0tf$rd}trd|vrd d l}d}|j!}|j4|d< |j(|fi|}t+|dr|j-}t$j'dY#t6$r&t9j:| |j<<YYwxYw|s!t9j:| |j<<YwxYw)a Select framework (TensorFlow or PyTorch) to use from the `model` passed. Returns a tuple (framework, model). If `model` is instantiated, this function will just infer the framework from the model class. Otherwise `model` is actually a checkpoint name and this method will try to instantiate it using `model_classes`. Since we don't want to instantiate the model twice, this model is returned for use by the pipeline. If both frameworks are installed and available for `model`, PyTorch is selected. Args: model (`str`, [`PreTrainedModel`] or [`TFPreTrainedModel]`): The model to infer the framework from. If `str`, a checkpoint name. The model to infer the framewrok from. config ([`AutoConfig`]): The config associated with the model to help using the correct class model_classes (dictionary `str` to `type`, *optional*): A mapping framework to class. task (`str`): The task defining which pipeline will be returned. model_kwargs: Additional dictionary of keyword arguments passed along to the model's `from_pretrained(..., **model_kwargs)` function. Returns: `Tuple`: A tuple framework, model. At least one of TensorFlow 2.0 or PyTorch should be installed. To install TensorFlow 2.0, read the instructions at https://www.tensorflow.org/install/ To install PyTorch, read the instructions at https://pytorch.org/._from_pipeline>Npt>Ntfrr transformersNTFrz2Pipeline cannot infer suitable model classes from z.h5Tfrom_tfz}Model might be a TensorFlow model (ending with `.h5`) but TensorFlow is not available. Trying to load the model with PyTorch.z.binfrom_ptz{Model might be a PyTorch model (ending with `.bin`) but PyTorch is not available. Trying to load the model with Tensorflow.evalFrIzbFalling back to torch.float32 because loading with the original dtype failed on the target device.zwhile loading with z, an error is thrown:  zCould not load model z$ with any of the following classes: z. See the original errors: )"rr RuntimeErrorrNstrgetr,r( architectures importlib import_modulertappendtupler1r2copyendswithloggerwarningfrom_pretrainedhasattrrOSError TypeErrorrOfloat32 Exception traceback format_exc__name__r3r __class__)modelrxryrzr{ model_kwargs class_tuplelook_ptlook_tfclasses architecturetransformers_module_class all_traceback model_classkwargsfallback_triedrO fp32_kwargserror class_nametraces r4infer_framework_load_modelrsB  %7%9 Q  %)- %& $&D9 +D!#A \(A )M,=,=dYL,QQ )M,=,=d[N,SS   G & 4 4 / &/&=&=n&M#$%8,MF)v.$%8B|n:MtTF)v. /&g6K { q QRWQXYZ Z &2 K!&&(FD U^^E%:$(y!=d"u~~f'=$(y!@ # 3 33EDVD5&)!JJLE)2 h eS !E%2%8%8%: \! E.zl:QRWQXXZ[[ \'w.RS^R__}D~EEGH #EOO4 e SZLA "'%'W-> %)N"(++-K+0==K( ! ; ; ;E Q[ Q"5&1$)JJLE.$!>G>R>R>T k&:&:; ! &:C:N:N:PM+"6"67; s22/I  AL9AK  *L L9L&L98L9c t|trtj|fd|i|}n |j}t ||f||||d|S)ar Select framework (TensorFlow or PyTorch) to use from the `model` passed. Returns a tuple (framework, model). If `model` is instantiated, this function will just infer the framework from the model class. Otherwise `model` is actually a checkpoint name and this method will try to instantiate it using `model_classes`. Since we don't want to instantiate the model twice, this model is returned for use by the pipeline. If both frameworks are installed and available for `model`, PyTorch is selected. Args: model (`str`, [`PreTrainedModel`] or [`TFPreTrainedModel]`): The model to infer the framework from. If `str`, a checkpoint name. The model to infer the framewrok from. model_classes (dictionary `str` to `type`, *optional*): A mapping framework to class. task (`str`): The task defining which pipeline will be returned. model_kwargs: Additional dictionary of keyword arguments passed along to the model's `from_pretrained(..., **model_kwargs)` function. Returns: `Tuple`: A tuple framework, model. r~)ryr~rzr{)rNrrrrxr)rryrzr{rrxs r4infer_framework_from_modelrVsZ<%++EW$W,W % v %24d^g kw r6revisionctjdttst s t dt |trpt r"tstj||}nDtr"t stj||}n tj||}t|j}|S#t$rtj||}Y9wxYw)a[ Select framework (TensorFlow or PyTorch) to use. Args: model (`str`, [`PreTrainedModel`] or [`TFPreTrainedModel]`): If both frameworks are installed, picks the one corresponding to the model passed (either a model class or the model name). If no specific model is provided, defaults to using PyTorch. zb`get_framework` is deprecated and will be removed in v5, use `infer_framework_from_model` instead.r})r)warningswarn FutureWarningrrrrNrr,rr(rrr)rrr{s r4 get_frameworkr}s MMl  %7%9 Q  %  (9--ehGE  '9';//IE N!11%(K 0I   N#33EHM Ns#C C43C4 targeted_task task_optionsreturnctr tsd}ntr tsd}|d}|r||vrtd|||d}nd|vr |dd}n td|d}||S)a Select a default model to use for a given task. Defaults to pytorch if ambiguous. Args: targeted_task (`Dict`): Dictionary representing the given task, that should contain default models framework (`str`, None) "pt", "tf" or None, representing a specific framework if it was specified, or None if we don't know yet. task_options (`Any`, None) Any further value required by the task to get fully specified, for instance (SRC, TGT) languages for translation task. Returns Tuple: - `str` The model string representing the default model for this pipeline. - `str` The revision of the model. rrdefaultz9The task does not provide any default models for options rzXThe task defaults can't be correctly selected. You probably meant "translation_xx_to_yy")rrr2)rr{rdefaultsdefault_modelss r4get_default_model_and_revisionrs.O$5  #5#7 Y'H x 'XYeXfgh h!,/8 H &y1':tuu ) $$r6rr+assistant_modelassistant_tokenizerc&jr|ytddk7stts t dt|t rbt j|}t||\}jjjtj|}n||}jjjjk(}tfdd D}|r|rd}|fS| t d |fS) a Prepares the assistant model and the assistant tokenizer for a pipeline whose model that can call `generate`. Args: model ([`PreTrainedModel`]): The main model that will be used by the pipeline to make predictions. assistant_model (`str` or [`PreTrainedModel`], *optional*): The assistant model that will be used by the pipeline to make predictions. assistant_tokenizer ([`PreTrainedTokenizer`], *optional*): The assistant tokenizer that will be used by the pipeline to encode data for the model. Returns: Tuple: The loaded assistant model and (optionally) the loaded tokenizer. N)NNr{rzAssisted generation, triggered by the `assistant_model` argument, is only available for `PreTrainedModel` model instances. For instance, TF or JAX models are not supported.rx)devicerIc3xK|]1}tj|tj|k(3ywN)rtrx)rBtokenloaded_assistant_modelrs r4rEz'load_assistant_model..s7   e$0F0M0Mu(UUs7:) eos_token_idrs bos_token_idzkThe assistant model has a different tokenizer than the main model. You should pass the assistant tokenizer.) can_generatertrNr+r2rrrrtorrIrrx vocab_sizeall) rrrassistant_config_loaded_assistant_tokenizersame_vocab_sizesame_special_tokensrs ` @r4load_assistant_modelrs1&    ?#:uk"d**UO2T c  /3'%55oF$>Wg$h! !!7!:!:%,,V[VaVa!:!b%2%B%B?%S"!0%8"ll--1G1N1N1Y1YYOE.%)" "#= == $ +   "#= ==r6c0eZdZdZdededeffd ZxZS)PipelineExceptionz Raised by a [`Pipeline`] when handling __call__. Args: task (`str`): The task of the pipeline. model (`str`): The model used by the pipeline. reason (`str`): The error message to display. rzrreasonc@t||||_||_yr)super__init__rzr)selfrzrrrs r4rzPipelineException.__init__s    r6)r __module__ __qualname____doc__rr __classcell__rs@r4rrs'Scr6rc eZdZdZedZy)ArgumentHandlerzQ Base interface for handling arguments for each [`~pipelines.Pipeline`]. ctrNotImplementedError)rargsrs r4__call__zArgumentHandler.__call__ !##r6N)rrrrrrrr6r4rrs$$r6rc eZdZdZgdZ ddeedeedeedefdZe dZ e d e e e e ffd Zd e e e e fd efd Ze dd edeedeedeed df dZy)PipelineDataFormata Base class for all the pipeline supported data format both for reading and writing. Supported data formats currently includes: - JSON - CSV - stdin/stdout (pipe) `PipelineDataFormat` also includes some utilities to work with multi-columns like mapping from datasets columns to pipelines keyword arguments through the `dataset_kwarg_1=dataset_column_1` format. Args: output_path (`str`): Where to save the outgoing data. input_path (`str`): Where to look for the input data. column (`str`): The column to read. overwrite (`bool`, *optional*, defaults to `False`): Whether or not to overwrite the `output_path`. )jsoncsvpipe output_path input_pathcolumn overwritec||_||_||jdndg|_t |jdkD|_|j r?|jDcgc]$}d|vrt |jdn||f&c}|_|8|s6tt|jrt|jd|7tt|jst|jdyycc}w)N,rr-=z already exists on diskz doesn't exist on disk) rrsplitrr1is_multi_columnsrrrr)rrrrrcs r4rzPipelineDataFormat.__init__6s'$+1+=fll3'B4 #DKK 01 4  PTP[P[\1#(5.AF\DK  "9gd../0!1!1 22IJKK  !'$//23 11GHII4 " ]s!)Dctrrrs r4__iter__zPipelineDataFormat.__iter__Mrr6datact)z Save the provided data object with the representation for the current [`~pipelines.PipelineDataFormat`]. Args: data (`dict` or list of `dict`): The data to store. rrrs r4savezPipelineDataFormat.saveQs "##r6rc$tjj|j\}}tjjj |df}t |d5}tj||ddd|S#1swY|SxYw)z Save the provided data object as a pickle-formatted binary data on the disk. Args: data (`dict` or list of `dict`): The data to store. Returns: `str`: Path where the data has been saved. picklezwb+N) ospathsplitextrextsepjoinopenrdump)rrrr binary_pathf_outputs r4 save_binaryzPipelineDataFormat.save_binary[sy''""4#3#34aggnn))4*:; +u % ( KKh ' ( (s $BBformatc|dk(rt||||S|dk(rt||||S|dk(rt||||Std|d)a Creates an instance of the right subclass of [`~pipelines.PipelineDataFormat`] depending on `format`. Args: format (`str`): The format of the desired pipeline. Acceptable values are `"json"`, `"csv"` or `"pipe"`. output_path (`str`, *optional*): Where to save the outgoing data. input_path (`str`, *optional*): Where to look for the input data. column (`str`, *optional*): The column to read. overwrite (`bool`, *optional*, defaults to `False`): Whether or not to overwrite the `output_path`. Returns: [`~pipelines.PipelineDataFormat`]: The proper data format. rrrrzUnknown reader z% (Available reader are json/csv/pipe))JsonPipelineDataFormatCsvPipelineDataFormatPipedPipelineDataFormatKeyError)r rrrrs r4from_strzPipelineDataFormat.from_strmsf4 V )+z6U^_ _ u_(j&T]^ ^ v *; FV_` `_VH4YZ[ [r6NF)rrrrSUPPORTED_FORMATSr rboolrrrr dictlistrr  staticmethodrrr6r4rr s&0 Jc]JSMJ J  J.$$$tT$Z/0$$dDJ&6 7C$  \ \c] \SM \ \  \ \r6rc^eZdZdZ d deedeedeeffd ZdZdee fdZ xZ S) raa Support for pipelines using CSV data format. Args: output_path (`str`): Where to save the outgoing data. input_path (`str`): Where to look for the input data. column (`str`): The column to read. overwrite (`bool`, *optional*, defaults to `False`): Whether or not to overwrite the `output_path`. rrrc,t|||||y)Nr)rr)rrrrrrs r4rzCsvPipelineDataFormat.__init__s j&INr6c #4Kt|jd5}tj|}|D]H}|jr&|j Dcic] \}}||| c}}5||j dJ dddycc}}w#1swYyxYww)Nrr)rrr DictReaderrr)rfreaderrowkrs r4rzCsvPipelineDataFormat.__iter__s $//3 ' .1^^A&F .((15=A1c!f9==dkk!n--  . . .>  . .s.B6B B B = BB  BBrct|jd5}t|dkDrQtj|t |dj }|j|j|dddy#1swYyxYw)z Save the provided data object with the representation for the current [`~pipelines.PipelineDataFormat`]. Args: data (`list[dict]`): The data to store. wrN) rrr1r DictWriterrrk writeheader writerows)rrrwriters r4rzCsvPipelineDataFormat.saveso$""C ( 'A4y1}4Q +?@""$  &  ' ' 's A BB r) rrrrr rrrrrrrrs@r4rrsS   Oc]OSMO O. 'd 'r6rcXeZdZdZ d deedeedeeffd ZdZdefdZ xZ S) rab Support for pipelines using JSON file format. Args: output_path (`str`): Where to save the outgoing data. input_path (`str`): Where to look for the input data. column (`str`): The column to read. overwrite (`bool`, *optional*, defaults to `False`): Whether or not to overwrite the `output_path`. rrrct|||||t|d5}tj||_dddy#1swYyxYw)Nrr)rrrrload_entries)rrrrrrrs r4rzJsonPipelineDataFormat.__init__sK j&IN *c " )a IIaLDM ) ) )s AAc#K|jD]H}|jr&|jDcic] \}}||| c}}5||jdJycc}}ww)Nr)r,rr)rentryr"rs r4rzJsonPipelineDataFormat.__iter__sY]] ,E$$/3{{;tq!q%({;;DKKN++  ,;s+A!A$A!rct|jd5}tj||dddy#1swYyxYw)z| Save the provided data object in a json file. Args: data (`dict`): The data to store. r$N)rrrr)rrrs r4rzJsonPipelineDataFormat.saves9$""C ( A IIdA    s 7Ar) rrrrr rrrrrrrs@r4rrsK   )c] )SM ) ),r6rcNeZdZdZdZdefdZdeeeefde ffd Z xZ S)ra Read data from piped input to the python process. For multi columns data, columns should separated by If columns are provided, then the output will be a dictionary with {column_x: value_x} Args: output_path (`str`): Where to save the outgoing data. input_path (`str`): Where to look for the input data. column (`str`): The column to read. overwrite (`bool`, *optional*, defaults to `False`): Whether or not to overwrite the `output_path`. c #KtjD]g}d|vr]|jd}|jr2t |j|Dcic] \\}}}|| c}}}Vt |d|iycc}}}ww)N )sysstdinrrzipr)rlinerrls r4rz PipedPipelineDataFormat.__iter__swII Dt|zz$';;;>t{{D;QRR&!a619RR+%   SsABA> !$Brct|y)z^ Print the data. Args: data (`dict`): The data to store. N)printrs r4rzPipedPipelineDataFormat.saves  d r6rcP|j tdt| |S)NzWhen using piped input on pipeline outputting large object requires an output file path. Please provide such output path through --output argument.)rrrr )rrrs r4r z#PipedPipelineDataFormat.save_binary s4    #M  w"4((r6) rrrrrrrr rrr rrs@r4rrs?  )dDJ&6 7)C))r6rc0eZdZdZedZedZy) _ScikitCompatzA Interface layer for the Scikit and Keras compatibility. ctrrrXs r4 transformz_ScikitCompat.transformrr6ctrrr>s r4predictz_ScikitCompat.predict rr6N)rrrrrr@rBrr6r4r<r<s/$$$$r6r<T has_tokenizerhas_feature_extractorhas_image_processor has_processorsupports_binary_outputcZd}|r|dz }|r|dz }|r|dz }|r|dz }|dz }|r|dz }|S)Na Arguments: model ([`PreTrainedModel`] or [`TFPreTrainedModel`]): The model that will be used by the pipeline to make predictions. This needs to be a model inheriting from [`PreTrainedModel`] for PyTorch and [`TFPreTrainedModel`] for TensorFlow.z tokenizer ([`PreTrainedTokenizer`]): The tokenizer that will be used by the pipeline to encode data for the model. This object inherits from [`PreTrainedTokenizer`].z feature_extractor ([`SequenceFeatureExtractor`]): The feature extractor that will be used by the pipeline to encode data for the model. This object inherits from [`SequenceFeatureExtractor`].z image_processor ([`BaseImageProcessor`]): The image processor that will be used by the pipeline to encode data for the model. This object inherits from [`BaseImageProcessor`].a4 processor ([`ProcessorMixin`]): The processor that will be used by the pipeline to encode data for the model. This object inherits from [`ProcessorMixin`]. Processor is a composite object that might contain `tokenizer`, `feature_extractor`, and `image_processor`.ae modelcard (`str` or [`ModelCard`], *optional*): Model card attributed to the model for this pipeline. framework (`str`, *optional*): The framework to use, either `"pt"` for PyTorch or `"tf"` for TensorFlow. The specified framework must be installed. If no framework is specified, will default to the one currently installed. If no framework is specified and both frameworks are installed, will default to the framework of the `model`, or to PyTorch if no model is provided. task (`str`, defaults to `""`): A task-identifier for the pipeline. num_workers (`int`, *optional*, defaults to 8): When the pipeline will use *DataLoader* (when passing a dataset, on GPU for a Pytorch model), the number of workers to be used. batch_size (`int`, *optional*, defaults to 1): When the pipeline will use *DataLoader* (when passing a dataset, on GPU for a Pytorch model), the size of the batch to use, for inference this is not always beneficial, please read [Batching with pipelines](https://huggingface.co/transformers/main_classes/pipelines.html#pipeline-batching) . args_parser ([`~pipelines.ArgumentHandler`], *optional*): Reference to the object in charge of parsing supplied pipeline parameters. device (`int`, *optional*, defaults to -1): Device ordinal for CPU/GPU supports. Setting this to -1 will leverage CPU, a positive will run the model on the associated CUDA device id. You can pass native `torch.device` or a `str` too dtype (`str` or `torch.dtype`, *optional*): Sent directly as `model_kwargs` (just a simpler shortcut) to use the available precision for this model (`torch.float16`, `torch.bfloat16`, ... or `"auto"`)z binary_output (`bool`, *optional*, defaults to `False`): Flag indicating if the output the pipeline should happen in a serialized format (i.e., pickle) or as the raw output data e.g. text.r)rCrDrErFrG docstrings r4build_pipeline_init_argsrJ%sYI ( ( - - ' ' " " DDI6. .  r6)rCrDrErFrGPeftModelForQuestionAnsweringPeftModelForFeatureExtraction PeftModelPeftModelForSeq2SeqLM"PeftModelForSequenceClassificationPeftModelForCausalLMPeftModelForTokenClassification)zdocument-question-answeringzfeature-extractionzquestion-answering summarizationztable-question-answeringztext2text-generationztext-classificationzsentiment-analysisztext-generationztoken-classificationner translationtranslation_xx_to_yyzzero-shot-classification)PipelineChunkIteratorPipelineDatasetPipelineIteratorPipelinePackIterator)rCrDrErFceZdZdZdZdZdZdZdZdZ d4de dde e de e de ed e ed e ed e ed ed e e edfdefdZ d5de eej,fdedefdZdZdZede dfdZede dfdZedZdZ dZ!de e"ee#ffdZ$e%d Z&e%d!ed"e#de#ee'ffd#Z(e%d$e#ee'fd%e#de)fd&Z*e%d'e)d(e#defd)Z+d*Z,d+Z-d,ed-efd.Z.ddd/d0Z/d1Z0d2Z1d3Z2y)6Pipelinea The Pipeline class is the class from which all pipelines inherit. Refer to this class for methods shared across different pipelines. Base class implementing pipelined operations. Pipeline workflow is defined as a sequence of the following operations: Input -> Tokenization -> Model Inference -> Post-Processing (task dependent) -> Output Pipeline supports running on CPU or GPU through the device argument (see below). Some pipeline, like for instance [`FeatureExtractionPipeline`] (`'feature-extraction'`) output large tensor object as nested-lists. In order to avoid dumping such large structure as textual data we provide the `binary_output` constructor argument. If set to `True`, the output will be stored in the pickle format. NFr)r+r/rqroimage_processor processor modelcardr{rzrz torch.device binary_outputc  | jdd| jdd| jddc} } } |t||j\}}|dvrtj d||_||_||_||_||_ ||_ ||_ ||_ t|j dd} | | td| '| #tt!| j#} nd } t%rU|jd k(rE| d k(r,|j j&|j j&} t)| t*j&rK| j,d k(r t/d r| j,dk(rt1st| d| |_nt)| t2rHd | vr t/d rd| vrt1st| dt+j&| |_nb| d krt+j&d|_nAt5rt+j&d| |_nt7rt+j&d| |_nt9rt+j&d| |_nt;rt+j&d| |_nt1rt+j&d| |_nxt/d rt+j&d| |_nNt=rt+j&d| |_n&t+j&d|_n | | nd |_t%rWt*j>jAr9t*j>jCr|j j&|_tjEd|j&| |_#|jd k(rs|j j&|j&k7rPt)|j&tHr|j&d ks'| %|j jK|j&|jLrb|j jOrGtQ|j | jdd| jdd\|_)|_*tW|j jdr |j jjXnd|_,t|dt[}tW|j dr|j j\d&|d d| \}} ||_/|j`|j^j`|j`k(ro|j^jbY|j^jbd k7r@d|j^_0n.tejf|j j^|_/|j jjh}|K||vrG|jk|}d|vr|jd|_,|j^jld&i||jQ|jjn;|j^jn%|jjn|j^_7d |_8| jd!d|_9| jd"d|_:|jvd&i| \|_<|_=|_>|jt|jdu|jdu|jdugrTt|jd#d|_t|jd$d|_t|jd%d|_ |j:|j-t)|jtr|j|_ yyyy)'N args_parser torch_dtyperIr)rjaxzTensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers. hf_device_mapzThe model has been loaded with `accelerate` and therefore cannot be moved to a specific device. Please discard the `device` argument when creating your pipeline object.rrrKxpuT) check_devicehpuz6 is not available, you should use device="cpu" insteadcpuzmlu:zmusa:zcuda:znpu:zhpu:zxpu:zmps:zDevice set to use rrprefix_default_generation_config_prepare_generation_config)generation_configuse_model_defaultsrY num_workersrqror\r)Apoprrxr warning_oncerzrrqror\r]r^r{rtr2nextitervaluesrrrNrOtyper$rrr r"rr#r! distributed is_availableis_initializedrr_intr_pipeline_calls_generaterrrrrrirrkrlmax_new_tokensrZrdeepcopytask_specific_paramsrupdaters call_count _batch_size _num_workers_sanitize_parameters_preprocess_params_forward_params_postprocess_paramsrr)rrrqror\r]r^r{rzrr_rrrd"default_pipeline_generation_configprepared_generation_configr}this_task_paramss r4rzPipeline.__init__s**]D16::mT3RTZT^T^_fhlTm1a  9% U Iu  %   b    "!2."""  OTB  $);T  >(d=#7#7#9:;  DNNd$:| 1 1 =**&%,,/KK5(1GUY1ZKK5(1G1I$x/e%fgg$ FC(VO,BPT,UVO,B,D$x/e%fgg#ll62 !#ll51 ')#llT&?; (*#llU6(+;< (*#llU6(+;< ')#llT&?; ')#llT&?; 'T:#llT&?; ')#llT&?; #ll51 $*$6&BDK  E$5$5$B$B$DIZIZIiIiIk**++DK+DKK=9:* NNd " !!T[[0 S1dkkAo% JJMM$++ &  ( (TZZ-D-D-F=Q FJJ'8$?LacgAh> :D $":7>djj>O>OQY6Z$**++22`dDK29?[]m]o1p .tzz#?@6[TZZ5Z5Z6&H]a6ek62*F*D& 7EEQ..==AcArArr..99E..99R?<@D**9 *.tzz7S7S)T&$(::#4#4#I#I #/D> %# ^^t #T%;%;t%CTEYEY]aEa b+ %T^^[$GDN%,T^^=PRV%WD "#*4>>;Ld#SD    'D,B,B,N$002DE(,'='=$ F-O 'r6save_directorysafe_serializationrc |jdd}|;tjdt|j d t d||d<t jj|rtjd|dyt j|d t|d r|jj}i}|jD]\}}|d |j k7r|j}|d j"} | j%d d } | d |d j&|d <t)d|dD|d<t)d|dD|d<|||<||j*j,_t1||||d<|j*j2|fi||j4|j4j2|fi||j6|j6j2|fi||j8|j8j2|fi||j:|j:j3|yy)a Save the pipeline's model and tokenizer. Args: save_directory (`str` or `os.PathLike`): A path to the directory where to saved. It will be created if it doesn't exist. safe_serialization (`str`): Whether to save the model using `safetensors` or the traditional way for PyTorch or Tensorflow. kwargs (`dict[str, Any]`, *optional*): Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method. use_auth_tokenNzrThe `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.rzV`token` and `use_auth_token` are both specified. Please set only the argument `token`.zProvided path (z#) should be a directory, not a fileT)exist_ok_registered_implimpl.rKc34K|]}|jywrrrBrs r4rEz+Pipeline.save_pretrained.."B!1::"Brc34K|]}|jywrrrs r4rEz+Pipeline.save_pretrained..rrrr)rprrrrr2rrisfilerrmakedirsrrrr3rrrrrrrxcustom_pipelinesrsave_pretrainedrqror\r^) rrrrr pipeline_inforrzinfo module_name last_modules r4rzPipeline.save_pretrainedasF" $4d;  % MME zz'". l-F7O 77>>. ) LL?>*::]^ _  NT2 4+ , 11668M! +113 . d<4>>1yy{"6l55 )//4R8 "-aV 0E0E/FGV ""BtDz"BBT ""BtDz"BBT )- & .2BDJJ   . t^ 4'9#$" "">> % *DNN * *> DV D  ! ! - 2D " " 2 2> LV L    + 0D 0 0 J6 J >> % NN * *> : &r6c||Szn Scikit / Keras interface to transformers' pipelines. This method will forward to __call__(). rr>s r4r@zPipeline.transform Awr6c||Srrr>s r4rBzPipeline.predictrr6rz torch.dtypec0t|jddS)zO Dtype of the model (if it's Pytorch model), `None` otherwise. rIN)rtrrs r4rIzPipeline.dtypes tzz7D11r6cZtjdt|jddS)zU Torch dtype of the model (if it's Pytorch model), `None` otherwise. z;`torch_dtype` attribute is deprecated. Use `dtype` instead!rIN)rrqrtrrs r4rbzPipeline.torch_dtypes& YZtzz7D11r6c#K|jdk(rAtj|jdk(rdnd|j5ddddy|jjdk(r7tj j|j5ddddy|jjdk(r7tj j|j5ddddy|jjdk(r7tjj|j5ddddy|jjd k(r7tjj|j5ddddydy#1swYyxYw#1swYyxYw#1swYyxYw#1swYyxYw#1swYyxYww) a Context Manager allowing tensor allocation on the user-specified device in framework agnostic way. Returns: Context manager Examples: ```python # Explicitly ask for tensor allocation on CUDA device :0 pipe = pipeline(..., device=0) with pipe.device_placement(): # Every framework specific tensor allocation will be done on the request device output = pipe(...) ```rrKz/CPU:0z /device:GPU:Ncudamlumusare) r{rrrurOrrrrers r4device_placementzPipeline.device_placementso" >>T !t{{b'88 T[[M>Z[   {{6)ZZ&&t{{3!!U*YY%%dkk2!!V+ZZ&&t{{3!!U*YY%%dkk2!  sAGF A GF$A G%F0*A G5F<:A GG GF!G$F-)G0F95G<GGG Gc :|j||jS)av Ensure PyTorch tensors are on the specified device. Args: inputs (keyword arguments that should be `torch.Tensor`, the rest is ignored): The tensors to place on `self.device`. Recursive on lists **only**. Return: `dict[str, torch.Tensor]`: The same as `inputs` but on the proper device. )_ensure_tensor_on_devicer)rinputss r4ensure_tensor_on_devicez Pipeline.ensure_tensor_on_devices,,VT[[AAr6c t|tr=t|jDcic]\}}|j|c}}St|tr4|jDcic]\}}|j|c}}St|t r=t |jDcic]\}}|j|c}}St|t r |Dcgc]}j|c}St|trtfd|DSt|tjr|jS|Scc}}wcc}}wcc}}wcc}w)Nc3BK|]}j|ywr)r)rBrCrrs r4rEz4Pipeline._ensure_tensor_on_device..sX66tVDXrG) rNrr3rrrrrrOrPr)rrrnamer\rCs` ` r4rz!Pipeline._ensure_tensor_on_devices? fk *Y_YeYeYghvt44VVDDh  %\b\h\h\jkLDRXD$77GGk k  )ekeqeqestUaUY[aT4#@#@#PPtu u  %LRSDD11$?S S  &XQWXX X  -99V$ $MiltSsE# 3E)<E/ 5E5supported_modelsc *t|tsg}|jtvr"|j t|j|j D]>}t|t r|j t|.|j|@t|dr~|jjj D]W}tt r*|j |Dcgc]}|jc}=|j|jY|}|jjj|vrHtjd|jjjd|jd|dyycc}w)z Check if the model class is in supported by the pipeline. Args: supported_models (`list[str]` or `dict`): The list of models supported by the pipeline, or a dictionary with model class values. _model_mappingz The model 'z' is not supported for z. Supported models are rN)rNrrzSUPPORTED_PEFT_TASKSextendrtrrrr_extra_contentrrrrr)rrsupported_models_names model_namerms r4check_model_typezPipeline.check_model_types]*D1%' "yy00&--.B499.MN.557 > j%0*11$z2BC*11*=  > ')9:-<<KKRRTFE!*e4.5556Qaqzz6QR.55ennE F 6  ::   ( (0@ @ LLdjj22;;<??r6 input_tensorsforward_parametersc td)a _forward will receive the prepared dictionary from `preprocess` and run it on the model. This method might involve the GPU or the CPU and should be agnostic to it. Isolating this function is the reason for `preprocess` and `postprocess` to exist, so that the hot path, this method generally can run as fast as possible. It is not meant to be called directly, `forward` is preferred. It is basically the same but contains additional code surrounding `_forward` making sure tensors and models are on the same device, disabling the training part of the code (leading to faster inference). z_forward not implementedr)rrrs r4_forwardzPipeline._forward<s""<==r6 model_outputspostprocess_parametersc td)a Postprocess will receive the raw outputs of the `_forward` method, generally tensors, and reformat them into something more friendly. Generally it will output a list or a dict or results (containing just strings and numbers). zpostprocess not implementedr)rrrs r4 postprocesszPipeline.postprocessIs""?@@r6c"tjSr)rOno_gradrs r4get_inference_contextzPipeline.get_inference_contextRs }}r6c |j5|jdk(rd|d<|j|fi|}n|jdk(rw|j}|5|j ||j }|j|fi|}|j |t j d}dddntd|jddddS#1swYxYw#1swYSxYw) NrFtrainingr)rrhz Framework z is not supported)rr{rrrrrOr2)r model_inputsforward_paramsrinference_contexts r4forwardzPipeline.forwardUs   " " $ Q~~%+0 Z( - l Mn M 4'$($>$>$@!&(m#'#@#@VZVaVa#@#bL$1DMM,$Q.$QM$($A$A-X]XdXdejXk$A$lMmm !:dnn-==N!OPP Q mm Qs%AC/ AC#7"C/#C, (C//C9rorYc`t|tjjrt ||j |}n3|dkDrt jdd}t||j |}dtjvr(t jddtjd<|j |jn |j}|dk(rtnt|j |} t#|||| } t| |j$||} t| |j&|} | S)Nr-zFor iterable dataset using num_workers>1 is likely to result in errors since everything is iterable, setting `num_workers=1` to guarantee correctness.TOKENIZERS_PARALLELISMNDisabling tokenizer parallelism, we're using DataLoader multithreading alreadyfalserorY collate_fnloader_batch_size)rN collectionsabcSizedrWrrrrXrenvironrror\r5rwrqr)rr rrrorYpreprocess_paramsrpostprocess_paramsdatasetror dataloadermodel_iteratorfinal_iterators r4 get_iteratorzPipeline.get_iteratords fkoo33 4%fdoo?PQGQ1  &vt@QRG #2:: 5 KKh i3:BJJ/ 06:6L6L6XD22^b^r^r&0Ao]>$..Zk;l [Zdno )*dllNfpq).$:J:JL^_r6)rorYc T|rtjd|||jd}n |j}||jd}n |j}|jdi|\}}}i|j |}i|j |}i|j|}|xjdz c_|jdkDr=|jdk(r.|jjdk(rtjdtduxrt|t} t|tj } t|t"} | xs| xs| } |jdk(xr | xs| xs| } | r9| r#|j%||||||}t#|}|S|j'||||S| r|j%||||||S| r|j)||||S|jdk(r9t|t*r)t-t/|j%|g|||||S|j1||||S) NzIgnoring args : rr- rrzlYou seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a datasetr)rrrrrrrrrr{rrurqr*rNtypes GeneratorTyperr run_multiiterate ChunkPipelinerrrs run_single)rrrorYrrrrr is_dataset is_generatoris_list is_iterablecan_use_iteratorroutputss r4rzPipeline.__call__}sb  NN-dV4 5    ( "//  ' !-- @Y@Y@Y@c\b@c=>+=Mt66L:KLCD00CNCO 8 8O VT* ;L;G  >>T1]z7\\7\U\ !%!2!2K5FXj"~.~~f.?Qcdd $$ Z1BNTf <<(9>K]^ ^ ^^t # 4(G%%+z;Ln^p ??6+;<o>/0o>$$>? o> ""45 o> N+ o>I&o>C=o>o>sN234o>o>h$(E;c2;;./E;!E; E;N  2x .22 2Xm422!!F B$tCy$1G@ J J@@t@SR_M_H`@@ >d3 +=&> >VZ >_j > >AAPTAY\AA #&472374>b@q ar6r[rpipelinez pipeline file)object object_class object_filesz.from_pretrainedrc"eZdZdZdedefdZy)rcg}|j|fi|D]&}|j|fi|}|j|(|j|fi|}|Sr)rrrr) rrrrr all_outputsrrrs r4rzChunkPipeline.run_singlesi +DOOFH6GH .L(DLLHHM   } - .#$"";E2DEr6rorYcdtjvr(tjddtjd<|dkDrtj dd}t ||j |}|j |jn |j}|dk(rtnt|j|} t|||| } t| |j||} t| |j |} | S)Nrrrr-zFor ChunkPipeline using num_workers>0 is likely to result in errors since everything is iterable, setting `num_workers=1` to guarantee correctness.rr)rrrrrrVrror\r5rwrqr)rYrrXrrs r4rzChunkPipeline.get_iterators $2:: 5 KKh i3:BJJ/ 0 ? NNE K'ARS7;6L6L6XD22^b^r^r&0Ao]>$..Zk;l [Zdno -j$,,jtu).$:J:JL^_r6N)rrrrryrrr6r4rrs#&47r6rceZdZdeeefdeeefddfdZdeefdZdede eeeffdZ e d d  dded e d e ee e e fd e ee e e fde ede eddfdZdZy)PipelineRegistrysupported_tasks task_aliasesrNc ||_||_yr)r r )rr r s r4rzPipelineRegistry.__init__s.(r6ct|jjt|jjz}|j |Sr)rr rkr sort)rsupported_tasks r4get_supported_tasksz$PipelineRegistry.get_supported_taskssFd22779:T$BSBSBXBXBZ=[[r6rzc||jvr|j|}||jvr|j|}||dfS|jdr\|jd}t |dk(r.|ddk(r&|ddk(r|jd}d}|||d|dffSt d |d t d |d |j d gz)NrTrr;rr rr-rJzInvalid translation task z#, use 'translation_XX_to_YY' formatz Unknown task z, available tasks are translation_XX_to_YY)r r  startswithrr1rr)rrzrtokenss r4 check_taskzPipelineRegistry.check_tasks 4$$ $$$T*D 4'' ' 006M, , ??= )ZZ_F6{aF1I$>6!9PTCT $ 4 4] C $]VAYq ,BBB6tf<_`a aD6!78P8P8RVlUm8m7n o  r6tf_modelz5.0.0)old_nameversionpipeline_classpt_modelrruc.||jvrtj|d|d|d}nt|ts|f}|d}nt|ts|f}|||d}|d|vr d|vsd|vrd|i}||d<|||d <||j|<||i|_y) Nz6 is already registered. Overwriting pipeline for task z...r)rrrrrrrru)r rrrNrr)rrzrrrrru task_impls r4register_pipelinez"PipelineRegistry.register_pipelines 4'' ' NNdV#YZ^Y__bc d  HHe, {H  HHe, {H+88L  g%47?dgo"G,#*Ii   $If %.T"+/*;'r6c|jSr)r rs r4to_dictzPipelineRegistry.to_dict5s###r6)NNNN)rrrrrr rrrrrr&rur r rrrr6r4rrs)S#X)d3PS8n)Y])T#Y  s uS$^'< &j': 8<7;"&"!<!<!<5uT{!234 !< 5uT{!234 !< $ !<sm!< !<;!r7s    # %#665A)7!3-4"0d?+^[HI :=405%GJ50   H % 2-j<D7;# A ADeDk!123A 3- A } AL7;# $DeDk!123$ 3-$} $N8C=D+%+%$,SM+%AI#+% 38_+%\5> 5>eC):$:;<5>""565> 8% &1D(E EF 5>p "$c$n\n\b)'.)'X'/'T-)0-)` $C $ "' %#' AAAA A ! A  AH. %D#D:KH:;-.!@ A45@A?@./>? - .+,45!E F"$D`d za}nza  zaz!!5!56 +#+#7#7#?#?#F#FJ_$G$ g "%  H@B$B$r6