L i!dZddlZddlZddlZddlZddlZddlmZdZdZ dZ dZ dZ d Z d Zd Zd]d Zd ZdZdZdZdZdZdZdZdZdZdZdZdZdZdZeeeeeeeeeeeeeedZ dZ!dZ"eZ#eZ$dZ%d Z&d!Z'd!Z(d!Z)d!Z*d!Z+d!Z,eZ-d!Z.d!Z/d!Z0d!Z1eZ2d!Z3d!Z4d!Z5eZ6d!Z7eZ8d!Z9eZ:eZ;eZed#e!fd$e"fd%efd*e%fd+e0fd,e&fd-e'fd.e=fd/e2fd0e(fd1e5fd2e3fd3e)fd4e*fd5e1fd6e9fd7e+fd8e6fd9e7fd:e,fd;e-fde4fd?e/fd@e.fgZ?egdAZ@dBZAdCZBdDZCdEZDdFZEdGZFdHZGdIZHdJZIdKZJdLZKeCeBeAeFeDeGeEeHeIeJeKdM ZLdNZMdOZNdNZOdPZPdQZQdRZRdSZSeOeNeMeRePeQeSdTZTdUZUdddddVdWdXdddddddY dZZVd^d[ZWd\ZXy)_z3 Doc utilities: Utilities related to documentation N) OrderedDictctj|rytj|}|jd}t |t |j z }d|zS)z^Return the indentation level of the start of the docstring of a class or function (or method).r)inspectisclass getsource splitlineslenlstrip)funcsource first_linefunction_def_levels \/mnt/ssd/data/python-lab/Trading/venv/lib/python3.12/site-packages/transformers/utils/doc.pyget_docstring_indentation_levelrs_t   t $F""$Q'JZ3z/@/@/B+CC ! !!cfd}|S)Ncjdj|j |jndz|_|SN)join__doc__fndocstrs rdocstring_decoratorz1add_start_docstrings..docstring_decorator&s,WWV_bjj6L RTU  rrrs` radd_start_docstringsr% rcfd}|S)Nctd|jjddd}d|d}t|}|j |jnd} t d|j D}t |t |jz } }|d |zk(re Dcgc].}tjtj|d |z0}}tjtj|d |z}dj||z} || z|_|S#t$r|}YwxYwcc}w) Nz[`.rz`]z The aa forward method, overrides the `__call__` special method. Although the recipe for forward pass needs to be defined within this function, one should call the [`Module`] instance afterwards instead of this since the former takes care of running the pre and post processing steps while the latter silently ignores them. rc3HK|]}|jdk7s|yw)rN)strip).0lines r zUadd_start_docstrings_to_model_forward..docstring_decorator..>s""cDPTPZPZP\`bPb4"cs""r ) __qualname__splitrrnextr r r StopIterationtextwrapindentdedentr) r class_nameintrocorrect_indentation current_docfirst_non_emptydoc_indentationdocsdoc docstringrs rrzBadd_start_docstrings_to_model_forward..docstring_decorator.s7"////4Q78; j\ * >bA$&JJ$:bjj  2""cK4J4J4L"ccO!/2S9O9O9Q5RRO a"55 5`fgY\HOOHOOC$8#@S:STgDgOOHOOE$:CBU.docstring_decoratorRs+$&JJ$:bjjbggfoU  rrrs` radd_end_docstringsr=Qr ra: Returns: [`{full_output_type}`] or `tuple(torch.FloatTensor)`: A [`{full_output_type}`] or a tuple of `torch.FloatTensor` (if `return_dict=False` is passed or when `config.return_dict=False`) comprising various elements depending on the configuration ([`{config_class}`]) and inputs. a* Returns: [`{full_output_type}`] or `tuple(tf.Tensor)`: A [`{full_output_type}`] or a tuple of `tf.Tensor` (if `return_dict=False` is passed or when `config.return_dict=False`) comprising various elements depending on the configuration ([`{config_class}`]) and inputs. c\tjd|}|dS|jdS)z.Returns the indentation in the first line of tz^(\s*)\Srr)researchgroups)tr@s r _get_indentrCks, YY{A &F27V]]_Q%77rct|}g}d}|jdD]C}t||k(r(t|dkDr|j|dd|d}9||dddz }E|j|ddt t|D]<}t j dd||||<t j d d ||||<>dj|S) z,Convert output_args_doc to display properly.r rNz^(\s+)(\S+)(\s+)z \1- **\2**\3z :\s*\n\s*(\S)z -- \1)rCr+r appendranger?subr)output_args_docr/blocks current_blockr'is r_convert_output_args_docrOqs )F FM%%d+ - t  &=!A% mCR01#fBKM QRz_ ,M - MM-$%3v; CFF.Kq FF+Yq Bq C 99V rc|j}d}||jd}d}|t|krFtjd||-|dz }|t|krtjd||-|t|kr#dj ||dzd}t |}n|rtd|jd|rV|jd|j}|jjd rtnt} | j|| } nt|}d |d } || d z } | } || |z } || jd}d}t||dk(r|dz }t||dk(rtt||} | |kr#yyAy)9: 78HI  RS^SgSgRhiGG  )445Q{7K7K6LM*5*>*>*I*I$*O&Uk .>\ Z{+#$4#5Q7  ' UNE F#"" T" %(mq FA%(mq [q*+ J J/0FPUV3t9q='dBVEVYYu%F MWs"GaJ This example uses a random model as the real ones are all very big. To get proper results, you should use {real_checkpoint} instead of {fake_checkpoint}. If you get out-of-memory when loading that checkpoint, you can try adding `device_map="auto"` in the `from_pretrained` call. a Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import torch >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer( ... "HuggingFace is a company based in Paris and New York", add_special_tokens=False, return_tensors="pt" ... ) >>> with torch.no_grad(): ... logits = model(**inputs).logits >>> predicted_token_class_ids = logits.argmax(-1) >>> # Note that tokens are classified rather then input words which means that >>> # there might be more predicted token classes than words. >>> # Multiple token classes might account for the same word >>> predicted_tokens_classes = [model.config.id2label[t.item()] for t in predicted_token_class_ids[0]] >>> predicted_tokens_classes {expected_output} >>> labels = predicted_token_class_ids >>> loss = model(**inputs, labels=labels).loss >>> round(loss.item(), 2) {expected_loss} ``` a_ Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import torch >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet" >>> inputs = tokenizer(question, text, return_tensors="pt") >>> with torch.no_grad(): ... outputs = model(**inputs) >>> answer_start_index = outputs.start_logits.argmax() >>> answer_end_index = outputs.end_logits.argmax() >>> predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1] >>> tokenizer.decode(predict_answer_tokens, skip_special_tokens=True) {expected_output} >>> # target is "nice puppet" >>> target_start_index = torch.tensor([{qa_target_start_index}]) >>> target_end_index = torch.tensor([{qa_target_end_index}]) >>> outputs = model(**inputs, start_positions=target_start_index, end_positions=target_end_index) >>> loss = outputs.loss >>> round(loss.item(), 2) {expected_loss} ``` a Example of single-label classification: ```python >>> import torch >>> from transformers import AutoTokenizer, {model_class} >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> with torch.no_grad(): ... logits = model(**inputs).logits >>> predicted_class_id = logits.argmax().item() >>> model.config.id2label[predicted_class_id] {expected_output} >>> # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)` >>> num_labels = len(model.config.id2label) >>> model = {model_class}.from_pretrained("{checkpoint}", num_labels=num_labels) >>> labels = torch.tensor([1]) >>> loss = model(**inputs, labels=labels).loss >>> round(loss.item(), 2) {expected_loss} ``` Example of multi-label classification: ```python >>> import torch >>> from transformers import AutoTokenizer, {model_class} >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}", problem_type="multi_label_classification") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> with torch.no_grad(): ... logits = model(**inputs).logits >>> predicted_class_ids = torch.arange(0, logits.shape[-1])[torch.sigmoid(logits).squeeze(dim=0) > 0.5] >>> # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)` >>> num_labels = len(model.config.id2label) >>> model = {model_class}.from_pretrained( ... "{checkpoint}", num_labels=num_labels, problem_type="multi_label_classification" ... ) >>> labels = torch.sum( ... torch.nn.functional.one_hot(predicted_class_ids[None, :].clone(), num_classes=num_labels), dim=1 ... ).to(torch.float) >>> loss = model(**inputs, labels=labels).loss ``` a Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import torch >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="pt") >>> with torch.no_grad(): ... logits = model(**inputs).logits >>> # retrieve index of {mask} >>> mask_token_index = (inputs.input_ids == tokenizer.mask_token_id)[0].nonzero(as_tuple=True)[0] >>> predicted_token_id = logits[0, mask_token_index].argmax(axis=-1) >>> tokenizer.decode(predicted_token_id) {expected_output} >>> labels = tokenizer("The capital of France is Paris.", return_tensors="pt")["input_ids"] >>> # mask labels of non-{mask} tokens >>> labels = torch.where(inputs.input_ids == tokenizer.mask_token_id, labels, -100) >>> outputs = model(**inputs, labels=labels) >>> round(outputs.loss.item(), 2) {expected_loss} ``` a Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import torch >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> outputs = model(**inputs) >>> last_hidden_states = outputs.last_hidden_state ``` a Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import torch >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced." >>> choice0 = "It is eaten with a fork and a knife." >>> choice1 = "It is eaten while held in the hand." >>> labels = torch.tensor(0).unsqueeze(0) # choice0 is correct (according to Wikipedia ;)), batch size 1 >>> encoding = tokenizer([prompt, prompt], [choice0, choice1], return_tensors="pt", padding=True) >>> outputs = model(**{{k: v.unsqueeze(0) for k, v in encoding.items()}}, labels=labels) # batch size is 1 >>> # the linear classifier still needs to be trained >>> loss = outputs.loss >>> logits = outputs.logits ``` a Example: ```python >>> import torch >>> from transformers import AutoTokenizer, {model_class} >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt") >>> outputs = model(**inputs, labels=inputs["input_ids"]) >>> loss = outputs.loss >>> logits = outputs.logits ``` aA Example: ```python >>> from transformers import AutoProcessor, {model_class} >>> import torch >>> from datasets import load_dataset >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate >>> processor = AutoProcessor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> # audio file is decoded on the fly >>> inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt") >>> with torch.no_grad(): ... outputs = model(**inputs) >>> last_hidden_states = outputs.last_hidden_state >>> list(last_hidden_states.shape) {expected_output} ``` a] Example: ```python >>> from transformers import AutoProcessor, {model_class} >>> from datasets import load_dataset >>> import torch >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate >>> processor = AutoProcessor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> # audio file is decoded on the fly >>> inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt") >>> with torch.no_grad(): ... logits = model(**inputs).logits >>> predicted_ids = torch.argmax(logits, dim=-1) >>> # transcribe speech >>> transcription = processor.batch_decode(predicted_ids) >>> transcription[0] {expected_output} >>> inputs["labels"] = processor(text=dataset[0]["text"], return_tensors="pt").input_ids >>> # compute loss >>> loss = model(**inputs).loss >>> round(loss.item(), 2) {expected_loss} ``` a Example: ```python >>> from transformers import AutoFeatureExtractor, {model_class} >>> from datasets import load_dataset >>> import torch >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate >>> feature_extractor = AutoFeatureExtractor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> # audio file is decoded on the fly >>> inputs = feature_extractor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt") >>> with torch.no_grad(): ... logits = model(**inputs).logits >>> predicted_class_ids = torch.argmax(logits, dim=-1).item() >>> predicted_label = model.config.id2label[predicted_class_ids] >>> predicted_label {expected_output} >>> # compute loss - target_label is e.g. "down" >>> target_label = model.config.id2label[0] >>> inputs["labels"] = torch.tensor([model.config.label2id[target_label]]) >>> loss = model(**inputs).loss >>> round(loss.item(), 2) {expected_loss} ``` a Example: ```python >>> from transformers import AutoFeatureExtractor, {model_class} >>> from datasets import load_dataset >>> import torch >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate >>> feature_extractor = AutoFeatureExtractor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> # audio file is decoded on the fly >>> inputs = feature_extractor(dataset[0]["audio"]["array"], return_tensors="pt", sampling_rate=sampling_rate) >>> with torch.no_grad(): ... logits = model(**inputs).logits >>> probabilities = torch.sigmoid(logits[0]) >>> # labels is a one-hot array of shape (num_frames, num_speakers) >>> labels = (probabilities > 0.5).long() >>> labels[0].tolist() {expected_output} ``` a Example: ```python >>> from transformers import AutoFeatureExtractor, {model_class} >>> from datasets import load_dataset >>> import torch >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate >>> feature_extractor = AutoFeatureExtractor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> # audio file is decoded on the fly >>> inputs = feature_extractor( ... [d["array"] for d in dataset[:2]["audio"]], sampling_rate=sampling_rate, return_tensors="pt", padding=True ... ) >>> with torch.no_grad(): ... embeddings = model(**inputs).embeddings >>> embeddings = torch.nn.functional.normalize(embeddings, dim=-1).cpu() >>> # the resulting embeddings can be used for cosine similarity-based retrieval >>> cosine_sim = torch.nn.CosineSimilarity(dim=-1) >>> similarity = cosine_sim(embeddings[0], embeddings[1]) >>> threshold = 0.7 # the optimal threshold is dataset-dependent >>> if similarity < threshold: ... print("Speakers are not the same!") >>> round(similarity.item(), 2) {expected_output} ``` a Example: ```python >>> from transformers import AutoImageProcessor, {model_class} >>> import torch >>> from datasets import load_dataset >>> dataset = load_dataset("huggingface/cats-image") >>> image = dataset["test"]["image"][0] >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = image_processor(image, return_tensors="pt") >>> with torch.no_grad(): ... outputs = model(**inputs) >>> last_hidden_states = outputs.last_hidden_state >>> list(last_hidden_states.shape) {expected_output} ``` a Example: ```python >>> from transformers import AutoImageProcessor, {model_class} >>> import torch >>> from datasets import load_dataset >>> dataset = load_dataset("huggingface/cats-image") >>> image = dataset["test"]["image"][0] >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = image_processor(image, return_tensors="pt") >>> with torch.no_grad(): ... logits = model(**inputs).logits >>> # model predicts one of the 1000 ImageNet classes >>> predicted_label = logits.argmax(-1).item() >>> print(model.config.id2label[predicted_label]) {expected_output} ``` )SequenceClassificationQuestionAnsweringTokenClassificationMultipleChoiceMaskedLMLMHead BaseModelSpeechBaseModelCTCAudioClassificationAudioFrameClassification AudioXVectorVisionBaseModelImageClassificationa Example: ```python >>> from transformers import AutoProcessor, {model_class}, SpeechT5HifiGan >>> model = {model_class}.from_pretrained("{checkpoint}") >>> processor = AutoProcessor.from_pretrained("{checkpoint}") >>> vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan") >>> inputs = processor(text="Hello, my dog is cute", return_tensors="pt") >>> # generate speech >>> speech = model.generate(inputs["input_ids"], speaker_embeddings=speaker_embeddings, vocoder=vocoder) ``` az Example: ```python >>> from transformers import AutoProcessor, {model_class} >>> model = {model_class}.from_pretrained("{checkpoint}") >>> processor = AutoProcessor.from_pretrained("{checkpoint}") >>> inputs = processor(text="Hello, my dog is cute", return_tensors="pt") >>> # generate speech >>> speech = model(inputs["input_ids"]) ``` a Example: ```python >>> from PIL import Image >>> import requests >>> from transformers import AutoProcessor, {model_class} >>> processor = AutoProcessor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> image = Image.open(requests.get(url, stream=True).raw) >>> inputs = processor(images=image, return_tensors="pt") >>> outputs = model(**inputs) ``` a Example: ```python >>> from transformers import AutoImageProcessor, {model_class} >>> import torch >>> from PIL import Image >>> import requests >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> image = Image.open(requests.get(url, stream=True).raw) >>> processor = AutoImageProcessor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu") >>> model.to(device) >>> # prepare image for the model >>> inputs = processor(images=image, return_tensors="pt").to(device) >>> with torch.no_grad(): ... outputs = model(**inputs) >>> # interpolate to original size >>> post_processed_output = processor.post_process_depth_estimation( ... outputs, [(image.height, image.width)], ... ) >>> predicted_depth = post_processed_output[0]["predicted_depth"] ``` z% Example: ```python ``` a Example: ```python >>> from PIL import Image >>> import requests >>> from transformers import AutoProcessor, {model_class} >>> model = {model_class}.from_pretrained("{checkpoint}") >>> processor = AutoProcessor.from_pretrained("{checkpoint}") >>> messages = [ ... {{ ... "role": "user", "content": [ ... {{"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"}}, ... {{"type": "text", "text": "Where is the cat standing?"}}, ... ] ... }}, ... ] >>> inputs = processor.apply_chat_template( ... messages, ... tokenize=True, ... return_dict=True, ... return_tensors="pt", ... add_generation_prompt=True ... ) >>> # Generate >>> generate_ids = model.generate(**inputs) >>> processor.batch_decode(generate_ids, skip_special_tokens=True)[0] ``` text-to-audio-spectrogramtext-to-audio-waveformautomatic-speech-recognitionaudio-frame-classificationaudio-classification audio-xvectorimage-text-to-text image-to-textvisual-question-answeringdepth-estimationvideo-classificationzero-shot-image-classificationimage-classificationzero-shot-object-detectionobject-detectionimage-segmentationimage-to-imageimage-feature-extractiontext-generationtable-question-answeringdocument-question-answeringquestion-answeringtext2text-generationnext-sentence-predictionmultiple-choicetext-classificationtoken-classification fill-maskmask-generation pretraining))+MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING_NAMESru)(MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING_NAMESrv)(MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMESrw)MODEL_FOR_CTC_MAPPING_NAMESrw)2MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING_NAMESrx),MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMESry)%MODEL_FOR_AUDIO_XVECTOR_MAPPING_NAMESrz)*MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMESr{)$MODEL_FOR_VISION_2_SEQ_MAPPING_NAMESr|)1MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMESr})(MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMESr~),MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMESr)6MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMESr),MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMESr)2MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMESr)(MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMESr)*MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMESr)&MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMESr)MODEL_FOR_IMAGE_MAPPING_NAMESr)!MODEL_FOR_CAUSAL_LM_MAPPING_NAMESr)0MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMESr)3MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMESr)*MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMESr),MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMESr)0MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING_NAMESr)'MODEL_FOR_MULTIPLE_CHOICE_MAPPING_NAMESr)/MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMESr),MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMESr)!MODEL_FOR_MASKED_LM_MAPPING_NAMESr)'MODEL_FOR_MASK_GENERATION_MAPPING_NAMESr)#MODEL_FOR_PRETRAINING_MAPPING_NAMESraI Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import tensorflow as tf >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer( ... "HuggingFace is a company based in Paris and New York", add_special_tokens=False, return_tensors="tf" ... ) >>> logits = model(**inputs).logits >>> predicted_token_class_ids = tf.math.argmax(logits, axis=-1) >>> # Note that tokens are classified rather then input words which means that >>> # there might be more predicted token classes than words. >>> # Multiple token classes might account for the same word >>> predicted_tokens_classes = [model.config.id2label[t] for t in predicted_token_class_ids[0].numpy().tolist()] >>> predicted_tokens_classes {expected_output} ``` ```python >>> labels = predicted_token_class_ids >>> loss = tf.math.reduce_mean(model(**inputs, labels=labels).loss) >>> round(float(loss), 2) {expected_loss} ``` a Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import tensorflow as tf >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet" >>> inputs = tokenizer(question, text, return_tensors="tf") >>> outputs = model(**inputs) >>> answer_start_index = int(tf.math.argmax(outputs.start_logits, axis=-1)[0]) >>> answer_end_index = int(tf.math.argmax(outputs.end_logits, axis=-1)[0]) >>> predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1] >>> tokenizer.decode(predict_answer_tokens) {expected_output} ``` ```python >>> # target is "nice puppet" >>> target_start_index = tf.constant([{qa_target_start_index}]) >>> target_end_index = tf.constant([{qa_target_end_index}]) >>> outputs = model(**inputs, start_positions=target_start_index, end_positions=target_end_index) >>> loss = tf.math.reduce_mean(outputs.loss) >>> round(float(loss), 2) {expected_loss} ``` a Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import tensorflow as tf >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf") >>> logits = model(**inputs).logits >>> predicted_class_id = int(tf.math.argmax(logits, axis=-1)[0]) >>> model.config.id2label[predicted_class_id] {expected_output} ``` ```python >>> # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)` >>> num_labels = len(model.config.id2label) >>> model = {model_class}.from_pretrained("{checkpoint}", num_labels=num_labels) >>> labels = tf.constant(1) >>> loss = model(**inputs, labels=labels).loss >>> round(float(loss), 2) {expected_loss} ``` a4 Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import tensorflow as tf >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="tf") >>> logits = model(**inputs).logits >>> # retrieve index of {mask} >>> mask_token_index = tf.where((inputs.input_ids == tokenizer.mask_token_id)[0]) >>> selected_logits = tf.gather_nd(logits[0], indices=mask_token_index) >>> predicted_token_id = tf.math.argmax(selected_logits, axis=-1) >>> tokenizer.decode(predicted_token_id) {expected_output} ``` ```python >>> labels = tokenizer("The capital of France is Paris.", return_tensors="tf")["input_ids"] >>> # mask labels of non-{mask} tokens >>> labels = tf.where(inputs.input_ids == tokenizer.mask_token_id, labels, -100) >>> outputs = model(**inputs, labels=labels) >>> round(float(outputs.loss), 2) {expected_loss} ``` a Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import tensorflow as tf >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf") >>> outputs = model(inputs) >>> last_hidden_states = outputs.last_hidden_state ``` a# Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import tensorflow as tf >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced." >>> choice0 = "It is eaten with a fork and a knife." >>> choice1 = "It is eaten while held in the hand." >>> encoding = tokenizer([prompt, prompt], [choice0, choice1], return_tensors="tf", padding=True) >>> inputs = {{k: tf.expand_dims(v, 0) for k, v in encoding.items()}} >>> outputs = model(inputs) # batch size is 1 >>> # the linear classifier still needs to be trained >>> logits = outputs.logits ``` a Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> import tensorflow as tf >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf") >>> outputs = model(inputs) >>> logits = outputs.logits ``` a  Example: ```python >>> from transformers import AutoProcessor, {model_class} >>> from datasets import load_dataset >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate >>> processor = AutoProcessor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> # audio file is decoded on the fly >>> inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="tf") >>> outputs = model(**inputs) >>> last_hidden_states = outputs.last_hidden_state >>> list(last_hidden_states.shape) {expected_output} ``` a_ Example: ```python >>> from transformers import AutoProcessor, {model_class} >>> from datasets import load_dataset >>> import tensorflow as tf >>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = dataset.sort("id") >>> sampling_rate = dataset.features["audio"].sampling_rate >>> processor = AutoProcessor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> # audio file is decoded on the fly >>> inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="tf") >>> logits = model(**inputs).logits >>> predicted_ids = tf.math.argmax(logits, axis=-1) >>> # transcribe speech >>> transcription = processor.batch_decode(predicted_ids) >>> transcription[0] {expected_output} ``` ```python >>> inputs["labels"] = processor(text=dataset[0]["text"], return_tensors="tf").input_ids >>> # compute loss >>> loss = model(**inputs).loss >>> round(float(loss), 2) {expected_loss} ``` aY Example: ```python >>> from transformers import AutoImageProcessor, {model_class} >>> from datasets import load_dataset >>> dataset = load_dataset("huggingface/cats-image") >>> image = dataset["test"]["image"][0] >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = image_processor(image, return_tensors="tf") >>> outputs = model(**inputs) >>> last_hidden_states = outputs.last_hidden_state >>> list(last_hidden_states.shape) {expected_output} ``` a Example: ```python >>> from transformers import AutoImageProcessor, {model_class} >>> import tensorflow as tf >>> from datasets import load_dataset >>> dataset = load_dataset("huggingface/cats-image")) >>> image = dataset["test"]["image"][0] >>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = image_processor(image, return_tensors="tf") >>> logits = model(**inputs).logits >>> # model predicts one of the 1000 ImageNet classes >>> predicted_label = int(tf.math.argmax(logits, axis=-1)) >>> print(model.config.id2label[predicted_label]) {expected_output} ``` ) rgrhrirjrkrlrmrnrorsrtar Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="jax") >>> outputs = model(**inputs) >>> logits = outputs.logits ``` a Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet" >>> inputs = tokenizer(question, text, return_tensors="jax") >>> outputs = model(**inputs) >>> start_scores = outputs.start_logits >>> end_scores = outputs.end_logits ``` a} Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="jax") >>> outputs = model(**inputs) >>> logits = outputs.logits ``` a Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="jax") >>> outputs = model(**inputs) >>> last_hidden_states = outputs.last_hidden_state ``` a Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced." >>> choice0 = "It is eaten with a fork and a knife." >>> choice1 = "It is eaten while held in the hand." >>> encoding = tokenizer([prompt, prompt], [choice0, choice1], return_tensors="jax", padding=True) >>> outputs = model(**{{k: v[None, :] for k, v in encoding.items()}}) >>> logits = outputs.logits ``` a Example: ```python >>> from transformers import AutoTokenizer, {model_class} >>> tokenizer = AutoTokenizer.from_pretrained("{checkpoint}") >>> model = {model_class}.from_pretrained("{checkpoint}") >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="np") >>> outputs = model(**inputs) >>> # retrieve logts for next token >>> next_token_logits = outputs.logits[:, -1] ``` )rgrhrirjrkrmrlc |jD]+\}}| d|zdz}tjd|dd|}-|S)zo Removes the lines testing an output with the doctest syntax in a code sample when it's set to `None`. {}z\n([^\n]+)\n\s+z\nrE)itemsr?rJ)r9kwargskeyvaluedoc_keys rfilter_outputs_from_examplersXllnL U   )c/FFogYb94K L rz[MASK]) processor_class checkpointr^rTmaskqa_target_start_indexqa_target_end_index model_clsmodalityexpected_output expected_lossreal_checkpointrevisionc F   fd}|S)Nc |jjddn}|dddk(rt}n|dddk(rt}nt}|  dd }d |vsd |vr d k(r|d }nd |vr|d }nd |vr|d }nd |vr|d }nd|vr|d}nd|vs|dvr|d}nud|vsd|vr|d}ngd|vr|d}n]d|vr|d}nSd|vr d k(r|d}nDd|vr d k(r|d}n5d|vr dk(r|d}n&d|vsd|vr|d}nd|vr|d}nt d|t | } t|z}|jxsd d j z}d n t }|jd'i|}Btjd!rt d"d#|jd$d%d$d&d%}||z|z|_|S)(Nr#rrGrRrFlaxz{true}) model_classrrrrrrrrfake_checkpointtruergrpaudiorhrirjrk)FlaubertWithLMHeadModelXLMWithLMHeadModelrlCausalLMrorqXVectorrrModelrnvisionrsEncoderrmrtz#Docstring can't be built for model )rrrz ^refs/pr/\\d+zThe provided revision 'zW' is incorrect. It should point to a pull request reference on the hub like 'refs/pr/6'zfrom_pretrained("z")z ", revision="r)r*r+TF_SAMPLE_DOCSTRINGSFLAX_SAMPLE_DOCSTRINGSPT_SAMPLE_DOCSTRINGSrVrFAKE_MODEL_DISCLAIMERrrrfr\r?matchreplace)rrsample_docstrings doc_kwargs code_samplefunc_doc output_doc built_docrrTrrrrrrr^rrrrrs rrz7add_code_sample_docstrings..docstring_decorators7@7Hboo++C03i r?d " 4  !_ & 6  4  '.$%:#6.*.)   % 37LP[7[aimtat+,ABK % 4+,DEK K /+,?@K "k 1+,ABK  ,+,<=K ; &+9j*j+J7K  $ k(A+H5K k !+E2K '; 6+,FGK + %(g*=+N;K  #G(;+,=>K  #H(<+,=>K  #yK'?+K8K "k 1+,ABKB;-PQ Q1     &/+=KJJ$"7&.R4N{\h4i &K&&44  xx((3 -hZ8LL"))#J.docstring_decorators::t$ #e*n+?q!J!R FA#e*n+?q!J!R s5z>U1X./F1+|X^_E!Hyy'Ht$**25   rr)r^rTrs`` rreplace_return_docstringsrs$ rctj|j|j|j|j |j }tj||}|j|_ |S)zReturns a copy of a function f.)nameargdefsclosure) types FunctionType__code__ __globals__rW __defaults__ __closure__ functoolsupdate_wrapper__kwdefaults__)fgs r copy_funcr(sX 1::q}}1::q~~ghgtgtuA  A&A''A Hr)NT)NN)Yrrrr?r.r collectionsrrrr:r=r[rZrCrOrfrPT_TOKEN_CLASSIFICATION_SAMPLEPT_QUESTION_ANSWERING_SAMPLE!PT_SEQUENCE_CLASSIFICATION_SAMPLEPT_MASKED_LM_SAMPLEPT_BASE_MODEL_SAMPLEPT_MULTIPLE_CHOICE_SAMPLEPT_CAUSAL_LM_SAMPLEPT_SPEECH_BASE_MODEL_SAMPLEPT_SPEECH_CTC_SAMPLEPT_SPEECH_SEQ_CLASS_SAMPLEPT_SPEECH_FRAME_CLASS_SAMPLEPT_SPEECH_XVECTOR_SAMPLEPT_VISION_BASE_MODEL_SAMPLEPT_VISION_SEQ_CLASS_SAMPLEr TEXT_TO_AUDIO_SPECTROGRAM_SAMPLETEXT_TO_AUDIO_WAVEFORM_SAMPLE!AUDIO_FRAME_CLASSIFICATION_SAMPLEAUDIO_XVECTOR_SAMPLEIMAGE_TO_TEXT_SAMPLEDEPTH_ESTIMATION_SAMPLEVIDEO_CLASSIFICATION_SAMPLE!ZERO_SHOT_OBJECT_DETECTION_SAMPLEIMAGE_TO_IMAGE_SAMPLEIMAGE_FEATURE_EXTRACTION_SAMPLE"DOCUMENT_QUESTION_ANSWERING_SAMPLENEXT_SENTENCE_PREDICTION_SAMPLEMULTIPLE_CHOICE_SAMPLEPRETRAINING_SAMPLEMASK_GENERATION_SAMPLE VISUAL_QUESTION_ANSWERING_SAMPLETEXT_GENERATION_SAMPLEIMAGE_CLASSIFICATION_SAMPLEIMAGE_SEGMENTATION_SAMPLEFILL_MASK_SAMPLEOBJECT_DETECTION_SAMPLEQUESTION_ANSWERING_SAMPLETEXT2TEXT_GENERATION_SAMPLETEXT_CLASSIFICATION_SAMPLETABLE_QUESTION_ANSWERING_SAMPLETOKEN_CLASSIFICATION_SAMPLEAUDIO_CLASSIFICATION_SAMPLE#AUTOMATIC_SPEECH_RECOGNITION_SAMPLE%ZERO_SHOT_IMAGE_CLASSIFICATION_SAMPLE$IMAGE_TEXT_TO_TEXT_GENERATION_SAMPLE#PIPELINE_TASKS_TO_SAMPLE_DOCSTRINGSMODELS_TO_PIPELINETF_TOKEN_CLASSIFICATION_SAMPLETF_QUESTION_ANSWERING_SAMPLE!TF_SEQUENCE_CLASSIFICATION_SAMPLETF_MASKED_LM_SAMPLETF_BASE_MODEL_SAMPLETF_MULTIPLE_CHOICE_SAMPLETF_CAUSAL_LM_SAMPLETF_SPEECH_BASE_MODEL_SAMPLETF_SPEECH_CTC_SAMPLETF_VISION_BASE_MODEL_SAMPLETF_VISION_SEQ_CLASS_SAMPLEr FLAX_TOKEN_CLASSIFICATION_SAMPLEFLAX_QUESTION_ANSWERING_SAMPLE#FLAX_SEQUENCE_CLASSIFICATION_SAMPLEFLAX_MASKED_LM_SAMPLEFLAX_BASE_MODEL_SAMPLEFLAX_MULTIPLE_CHOICE_SAMPLEFLAX_CAUSAL_LM_SAMPLErrrrrrrrr7sM  #"!H8 42j"B D8%!t@"0"4!F!H :!F28@59/#!%2 5 <,25$$ $!"%A!0*B%!#&"#3 $ 99?#=9';#)%($D'2 $&FG !#@A ')LM %'HI !<= ./ CD ./ $&FG 45 !<= )+PQ !<= %'HI 45 89 01 #%DE 23 #%DE &(JK 89 !<= #%DE 23  :; !<= &' 23 *+=!'#J!#%P"B! F%!>B". 0"H,2@59/#!%2 25 $ "$'#   ($B7;1%'#     `F, r