You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
是否已有关于该错误的issue或讨论? | Is there an existing issue / discussion for this?
我已经搜索过已有的issues和讨论 | I have searched the existing issues / discussions
该问题是否在FAQ中有解答? | Is there an existing answer for this in FAQ?
我已经搜索过FAQ | I have searched FAQ
当前行为 | Current Behavior
AttributeError Traceback (most recent call last)
Cell In[1], line 242
239 prompts_batch = split_list_into_batches(descriptions, batch_size)
240 for i, prompts in tqdm(enumerate(prompts_batch)):
241 # if i > 1: break
--> 242 batch_response = batch_infer(prompts, instruction_extract)
243 for idx, tag in enumerate(batch_response):
244 title_temp = titles[i*batch_size + idx]
Cell In[1], line 68, in batch_infer(all_raw_text, instruction)
66 batch_input_ids = tokenizer(batch_raw_text, padding='longest')
67 batch_input_ids = torch.LongTensor(batch_input_ids['input_ids']).to(model.device)
---> 68 batch_out_ids = model.generate(batch_input_ids, return_dict_in_generate=False, generation_config=model.generation_config)
69 padding_lens = [batch_input_ids[i].eq(tokenizer.pad_token_id).sum().item() for i in range(batch_input_ids.size(0))]
71 batch_response = [
72 decode_tokens(
73 batch_out_ids[i][padding_lens[i]:],
(...)
80 ) for i in range(len(all_raw_text))
81 ]
File ~/.cache/huggingface/modules/transformers_modules/modeling_qwen.py:1259, in QWenLMHeadModel.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, **kwargs)
1256 else:
1257 logits_processor.append(stop_words_logits_processor)
-> 1259 return super().generate(
1260 inputs,
1261 generation_config=generation_config,
1262 logits_processor=logits_processor,
1263 stopping_criteria=stopping_criteria,
1264 prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
1265 synced_gpus=synced_gpus,
1266 assistant_model=assistant_model,
1267 streamer=streamer,
1268 **kwargs,
1269 )
File /opt/conda/lib/python3.8/site-packages/torch/utils/_contextlib.py:115, in context_decorator.<locals>.decorate_context(*args, **kwargs)
112 @functools.wraps(func)
113 def decorate_context(*args, **kwargs):
114 with ctx_factory():
--> 115 return func(*args, **kwargs)
File /opt/conda/lib/python3.8/site-packages/transformers/generation/utils.py:1642, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)
1634 input_ids, model_kwargs = self._expand_inputs_for_generation(
1635 input_ids=input_ids,
1636 expand_size=generation_config.num_return_sequences,
1637 is_encoder_decoder=self.config.is_encoder_decoder,
1638 **model_kwargs,
1639 )
1641 # 13. run sample
-> 1642 return self.sample(
1643 input_ids,
1644 logits_processor=logits_processor,
1645 logits_warper=logits_warper,
1646 stopping_criteria=stopping_criteria,
1647 pad_token_id=generation_config.pad_token_id,
1648 eos_token_id=generation_config.eos_token_id,
1649 output_scores=generation_config.output_scores,
1650 return_dict_in_generate=generation_config.return_dict_in_generate,
1651 synced_gpus=synced_gpus,
1652 streamer=streamer,
1653 **model_kwargs,
1654 )
1656 elif generation_mode == GenerationMode.BEAM_SEARCH:
1657 # 11. prepare beam search scorer
1658 beam_scorer = BeamSearchScorer(
1659 batch_size=batch_size,
1660 num_beams=generation_config.num_beams,
(...)
1665 max_length=generation_config.max_length,
1666 )
File /opt/conda/lib/python3.8/site-packages/transformers/generation/utils.py:2724, in GenerationMixin.sample(self, input_ids, logits_processor, stopping_criteria, logits_warper, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus, streamer, **model_kwargs)
2721 model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs)
2723 # forward pass to get next token
-> 2724 outputs = self(
2725 **model_inputs,
2726 return_dict=True,
2727 output_attentions=output_attentions,
2728 output_hidden_states=output_hidden_states,
2729 )
2731 if synced_gpus and this_peer_finished:
2732 continue # don't waste resources running the code we don't need
File /opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
File ~/.cache/huggingface/modules/transformers_modules/modeling_qwen.py:1043, in QWenLMHeadModel.forward(self, input_ids, past_key_values, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, labels, use_cache, output_attentions, output_hidden_states, return_dict)
1021 def forward(
1022 self,
1023 input_ids: Optional[torch.LongTensor] = None,
(...)
1036 return_dict: Optional[bool] = None,
1037 ) -> Union[Tuple, CausalLMOutputWithPast]:
1039 return_dict = (
1040 return_dict if return_dict is not None else self.config.use_return_dict
1041 )
-> 1043 transformer_outputs = self.transformer(
1044 input_ids,
1045 past_key_values=past_key_values,
1046 attention_mask=attention_mask,
1047 token_type_ids=token_type_ids,
1048 position_ids=position_ids,
1049 head_mask=head_mask,
1050 inputs_embeds=inputs_embeds,
1051 encoder_hidden_states=encoder_hidden_states,
1052 encoder_attention_mask=encoder_attention_mask,
1053 use_cache=use_cache,
1054 output_attentions=output_attentions,
1055 output_hidden_states=output_hidden_states,
1056 return_dict=return_dict,
1057 )
1058 hidden_states = transformer_outputs[0]
1060 lm_logits = self.lm_head(hidden_states)
File /opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
File ~/.cache/huggingface/modules/transformers_modules/modeling_qwen.py:891, in QWenModel.forward(self, input_ids, past_key_values, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, use_cache, output_attentions, output_hidden_states, return_dict)
880 outputs = torch.utils.checkpoint.checkpoint(
881 create_custom_forward(block),
882 hidden_states,
(...)
888 encoder_attention_mask,
889 )
890 else:
--> 891 outputs = block(
892 hidden_states,
893 layer_past=layer_past,
894 rotary_pos_emb_list=rotary_pos_emb_list,
895 attention_mask=attention_mask,
896 head_mask=head_mask[i],
897 encoder_hidden_states=encoder_hidden_states,
898 encoder_attention_mask=encoder_attention_mask,
899 use_cache=use_cache,
900 output_attentions=output_attentions,
901 )
903 hidden_states = outputs[0]
904 if use_cache is True:
File /opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
File ~/.cache/huggingface/modules/transformers_modules/modeling_qwen.py:610, in QWenBlock.forward(self, hidden_states, rotary_pos_emb_list, layer_past, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, use_cache, output_attentions)
596 def forward(
597 self,
598 hidden_states: Optional[Tuple[torch.FloatTensor]],
(...)
606 output_attentions: Optional[bool] = False,
607 ):
608 layernorm_output = self.ln_1(hidden_states)
--> 610 attn_outputs = self.attn(
611 layernorm_output,
612 rotary_pos_emb_list,
613 layer_past=layer_past,
614 attention_mask=attention_mask,
615 head_mask=head_mask,
616 use_cache=use_cache,
617 output_attentions=output_attentions,
618 )
619 attn_output = attn_outputs[0]
621 outputs = attn_outputs[1:]
File /opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
File ~/.cache/huggingface/modules/transformers_modules/modeling_qwen.py:499, in QWenAttention.forward(self, hidden_states, rotary_pos_emb_list, layer_past, attention_mask, head_mask, encoder_hidden_states, encoder_attention_mask, output_attentions, use_cache)
492 if (
493 self.use_flash_attn
494 and flash_attn_unpadded_func is not None
495 and not self.is_fp32
496 and query.is_cuda
497 ):
498 q, k, v = query, key, value
--> 499 attn_output = self.core_attention_flash(q, k, v, attention_mask=attention_mask)
500 else:
501 key_size = key[0].size(2) if self.use_cache_quantization else key.size(1)
File /opt/conda/lib/python3.8/site-packages/torch/nn/modules/module.py:1501, in Module._call_impl(self, *args, **kwargs)
1496 # If we don't have any hooks, we want to skip the rest of the logic in
1497 # this function, and just call forward.
1498 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1499 or _global_backward_pre_hooks or _global_backward_hooks
1500 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501 return forward_call(*args, **kwargs)
1502 # Do not call functions when jit is used
1503 full_backward_hooks, non_full_backward_hooks = [], []
File ~/.cache/huggingface/modules/transformers_modules/modeling_qwen.py:183, in FlashSelfAttention.forward(self, q, k, v, attention_mask)
182 def forward(self, q, k, v, attention_mask=None):
--> 183 assert all((i.dtype in [torch.float16, torch.bfloat16] for i in (q, k, v)))
184 assert all((i.is_cuda for i in (q, k, v)))
185 batch_size, seqlen_q = q.shape[0], q.shape[1]
File ~/.cache/huggingface/modules/transformers_modules/modeling_qwen.py:183, in <genexpr>(.0)
182 def forward(self, q, k, v, attention_mask=None):
--> 183 assert all((i.dtype in [torch.float16, torch.bfloat16] for i in (q, k, v)))
184 assert all((i.is_cuda for i in (q, k, v)))
185 batch_size, seqlen_q = q.shape[0], q.shape[1]
AttributeError: 'tuple' object has no attribute 'dtype'
是否已有关于该错误的issue或讨论? | Is there an existing issue / discussion for this?
该问题是否在FAQ中有解答? | Is there an existing answer for this in FAQ?
当前行为 | Current Behavior
期望行为 | Expected Behavior
No response
复现方法 | Steps To Reproduce
No response
运行环境 | Environment
备注 | Anything else?
No response
The text was updated successfully, but these errors were encountered: