Skip to content

[BUG/Help] <title>运行示例代码出现TypeError报错 #686

@wumingshi000

Description

@wumingshi000

Is there an existing issue for this?

  • I have searched the existing issues

Current Behavior

运行的就是示例代码如下,出现了下面的TypeError 报错
`from transformers import AutoTokenizer, AutoModel
import torch
model_path="/home/songxinyue/new/models/chatglm2-6b"
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")

model = AutoModel.from_pretrained(model_path, trust_remote_code=True).float()

.half()

Move the model to the device

#model.to(device)
model = model.eval()
response, history = model.chat(tokenizer, "hello", history=[])
print(response)
response, history = model.chat(tokenizer, "What should I do if I can't sleep at night?", history=history)
print(response)`

`---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[1], line 13
9 # .half()
10 # Move the model to the device
11 #model.to(device)
12 model = model.eval()
---> 13 response, history = model.chat(tokenizer, "hello", history=[])
14 print(response)
15 response, history = model.chat(tokenizer, "What should I do if I can't sleep at night?", history=history)

File ~/new/environment/anaconda3/envs/sxy/lib/python3.10/site-packages/torch/autograd/grad_mode.py:27, in _DecoratorContextManager.call..decorate_context(*args, **kwargs)
24 @functools.wraps(func)
25 def decorate_context(*args, **kwargs):
26 with self.clone():
---> 27 return func(*args, **kwargs)

File ~/.cache/huggingface/modules/transformers_modules/chatglm2-6b/modeling_chatglm.py:1033, in ChatGLMForConditionalGeneration.chat(self, tokenizer, query, history, max_length, num_beams, do_sample, top_p, temperature, logits_processor, **kwargs)
1030 gen_kwargs = {"max_length": max_length, "num_beams": num_beams, "do_sample": do_sample, "top_p": top_p,
1031 "temperature": temperature, "logits_processor": logits_processor, **kwargs}
1032 inputs = self.build_inputs(tokenizer, query, history=history)
-> 1033 outputs = self.generate(**inputs, **gen_kwargs)
1034 outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):]
1035 response = tokenizer.decode(outputs)

File ~/new/environment/anaconda3/envs/sxy/lib/python3.10/site-packages/torch/autograd/grad_mode.py:27, in _DecoratorContextManager.call..decorate_context(*args, **kwargs)
24 @functools.wraps(func)
25 def decorate_context(*args, **kwargs):
26 with self.clone():
---> 27 return func(*args, **kwargs)

File ~/new/environment/anaconda3/envs/sxy/lib/python3.10/site-packages/transformers/generation/utils.py:2015, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)
2007 input_ids, model_kwargs = self._expand_inputs_for_generation(
2008 input_ids=input_ids,
2009 expand_size=generation_config.num_return_sequences,
2010 is_encoder_decoder=self.config.is_encoder_decoder,
2011 **model_kwargs,
2012 )
2014 # 12. run sample (it degenerates to greedy search when generation_config.do_sample=False)
-> 2015 result = self._sample(
2016 input_ids,
2017 logits_processor=prepared_logits_processor,
2018 stopping_criteria=prepared_stopping_criteria,
2019 generation_config=generation_config,
2020 synced_gpus=synced_gpus,
2021 streamer=streamer,
2022 **model_kwargs,
2023 )
2025 elif generation_mode in (GenerationMode.BEAM_SAMPLE, GenerationMode.BEAM_SEARCH):
2026 # 11. prepare beam search scorer
2027 beam_scorer = BeamSearchScorer(
2028 batch_size=batch_size,
2029 num_beams=generation_config.num_beams,
(...)
2034 max_length=generation_config.max_length,
2035 )

File ~/new/environment/anaconda3/envs/sxy/lib/python3.10/site-packages/transformers/generation/utils.py:3014, in GenerationMixin._sample(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, **model_kwargs)
3012 if streamer is not None:
3013 streamer.put(next_tokens.cpu())
-> 3014 model_kwargs = self._update_model_kwargs_for_generation(
3015 outputs,
3016 model_kwargs,
3017 is_encoder_decoder=self.config.is_encoder_decoder,
3018 )
3020 unfinished_sequences = unfinished_sequences & ~stopping_criteria(input_ids, scores)
3021 this_peer_finished = unfinished_sequences.max() == 0

File ~/.cache/huggingface/modules/transformers_modules/chatglm2-6b/modeling_chatglm.py:871, in ChatGLMForConditionalGeneration._update_model_kwargs_for_generation(self, outputs, model_kwargs, is_encoder_decoder, standardize_cache_format)
863 def _update_model_kwargs_for_generation(
864 self,
865 outputs: ModelOutput,
(...)
869 ) -> Dict[str, Any]:
870 # update past_key_values
--> 871 model_kwargs["past_key_values"] = self._extract_past_from_model_output(
872 outputs, standardize_cache_format=standardize_cache_format
873 )
875 # update attention mask
876 if "attention_mask" in model_kwargs:

TypeError: GenerationMixin._extract_past_from_model_output() got an unexpected keyword argument 'standardize_cache_format'`

Expected Behavior

No response

Steps To Reproduce

The code that is running is just the few lines of sample code above, and this TypeError error occurred. I suspect it's a problem with the environment, but I don't know which package specifically needs to be modified.

Environment

- OS:linux
- Python:3.10.14
- Transformers:4.28.1
- PyTorch:1.12.1+cu113
- CUDA Support (`python -c "import torch; print(torch.cuda.is_available())"`) :
True

Anything else?

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions