-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Description
Is there an existing issue for this?
- I have searched the existing issues
Current Behavior
运行的就是示例代码如下,出现了下面的TypeError 报错
`from transformers import AutoTokenizer, AutoModel
import torch
model_path="/home/songxinyue/new/models/chatglm2-6b"
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
model = AutoModel.from_pretrained(model_path, trust_remote_code=True).float()
.half()
Move the model to the device
#model.to(device)
model = model.eval()
response, history = model.chat(tokenizer, "hello", history=[])
print(response)
response, history = model.chat(tokenizer, "What should I do if I can't sleep at night?", history=history)
print(response)`
`---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[1], line 13
9 # .half()
10 # Move the model to the device
11 #model.to(device)
12 model = model.eval()
---> 13 response, history = model.chat(tokenizer, "hello", history=[])
14 print(response)
15 response, history = model.chat(tokenizer, "What should I do if I can't sleep at night?", history=history)
File ~/new/environment/anaconda3/envs/sxy/lib/python3.10/site-packages/torch/autograd/grad_mode.py:27, in _DecoratorContextManager.call..decorate_context(*args, **kwargs)
24 @functools.wraps(func)
25 def decorate_context(*args, **kwargs):
26 with self.clone():
---> 27 return func(*args, **kwargs)
File ~/.cache/huggingface/modules/transformers_modules/chatglm2-6b/modeling_chatglm.py:1033, in ChatGLMForConditionalGeneration.chat(self, tokenizer, query, history, max_length, num_beams, do_sample, top_p, temperature, logits_processor, **kwargs)
1030 gen_kwargs = {"max_length": max_length, "num_beams": num_beams, "do_sample": do_sample, "top_p": top_p,
1031 "temperature": temperature, "logits_processor": logits_processor, **kwargs}
1032 inputs = self.build_inputs(tokenizer, query, history=history)
-> 1033 outputs = self.generate(**inputs, **gen_kwargs)
1034 outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):]
1035 response = tokenizer.decode(outputs)
File ~/new/environment/anaconda3/envs/sxy/lib/python3.10/site-packages/torch/autograd/grad_mode.py:27, in _DecoratorContextManager.call..decorate_context(*args, **kwargs)
24 @functools.wraps(func)
25 def decorate_context(*args, **kwargs):
26 with self.clone():
---> 27 return func(*args, **kwargs)
File ~/new/environment/anaconda3/envs/sxy/lib/python3.10/site-packages/transformers/generation/utils.py:2015, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)
2007 input_ids, model_kwargs = self._expand_inputs_for_generation(
2008 input_ids=input_ids,
2009 expand_size=generation_config.num_return_sequences,
2010 is_encoder_decoder=self.config.is_encoder_decoder,
2011 **model_kwargs,
2012 )
2014 # 12. run sample (it degenerates to greedy search when generation_config.do_sample=False
)
-> 2015 result = self._sample(
2016 input_ids,
2017 logits_processor=prepared_logits_processor,
2018 stopping_criteria=prepared_stopping_criteria,
2019 generation_config=generation_config,
2020 synced_gpus=synced_gpus,
2021 streamer=streamer,
2022 **model_kwargs,
2023 )
2025 elif generation_mode in (GenerationMode.BEAM_SAMPLE, GenerationMode.BEAM_SEARCH):
2026 # 11. prepare beam search scorer
2027 beam_scorer = BeamSearchScorer(
2028 batch_size=batch_size,
2029 num_beams=generation_config.num_beams,
(...)
2034 max_length=generation_config.max_length,
2035 )
File ~/new/environment/anaconda3/envs/sxy/lib/python3.10/site-packages/transformers/generation/utils.py:3014, in GenerationMixin._sample(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, **model_kwargs)
3012 if streamer is not None:
3013 streamer.put(next_tokens.cpu())
-> 3014 model_kwargs = self._update_model_kwargs_for_generation(
3015 outputs,
3016 model_kwargs,
3017 is_encoder_decoder=self.config.is_encoder_decoder,
3018 )
3020 unfinished_sequences = unfinished_sequences & ~stopping_criteria(input_ids, scores)
3021 this_peer_finished = unfinished_sequences.max() == 0
File ~/.cache/huggingface/modules/transformers_modules/chatglm2-6b/modeling_chatglm.py:871, in ChatGLMForConditionalGeneration._update_model_kwargs_for_generation(self, outputs, model_kwargs, is_encoder_decoder, standardize_cache_format)
863 def _update_model_kwargs_for_generation(
864 self,
865 outputs: ModelOutput,
(...)
869 ) -> Dict[str, Any]:
870 # update past_key_values
--> 871 model_kwargs["past_key_values"] = self._extract_past_from_model_output(
872 outputs, standardize_cache_format=standardize_cache_format
873 )
875 # update attention mask
876 if "attention_mask" in model_kwargs:
TypeError: GenerationMixin._extract_past_from_model_output() got an unexpected keyword argument 'standardize_cache_format'`
Expected Behavior
No response
Steps To Reproduce
The code that is running is just the few lines of sample code above, and this TypeError error occurred. I suspect it's a problem with the environment, but I don't know which package specifically needs to be modified.
Environment
- OS:linux
- Python:3.10.14
- Transformers:4.28.1
- PyTorch:1.12.1+cu113
- CUDA Support (`python -c "import torch; print(torch.cuda.is_available())"`) :
True
Anything else?
No response