Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add Qwen1.5 in LLaVA #1146

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open

add Qwen1.5 in LLaVA #1146

wants to merge 1 commit into from

Conversation

yiyexy
Copy link

@yiyexy yiyexy commented Feb 18, 2024

No description provided.

@yiyexy yiyexy changed the title add QWen2 in LLaVA add Qwen1.5 in LLaVA Feb 18, 2024
@20191864218
Copy link

I encountered this issue after making the modifications:

Traceback (most recent call last):
File "/root/LLaVA/llava/train/train_mem.py", line 4, in
train()
File "/root/LLaVA/llava/train/train.py", line 1085, in train
trainer.train()
File "/root/miniconda3/lib/python3.10/site-packages/transformers/trainer.py", line 1539, in train
return inner_training_loop(
File "/root/miniconda3/lib/python3.10/site-packages/transformers/trainer.py", line 1836, in _inner_training_loop
for step, inputs in enumerate(epoch_iterator):
File "/root/miniconda3/lib/python3.10/site-packages/accelerate/data_loader.py", line 384, in iter
current_batch = next(dataloader_iter)
File "/root/miniconda3/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 631, in next
data = self._next_data()
File "/root/miniconda3/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1346, in _next_data
return self._process_data(data)
File "/root/miniconda3/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1372, in _process_data
data.reraise()
File "/root/miniconda3/lib/python3.10/site-packages/torch/_utils.py", line 722, in reraise
raise exception
TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/root/miniconda3/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
data = fetcher.fetch(index)
File "/root/miniconda3/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 54, in fetch
return self.collate_fn(data)
File "/root/LLaVA/llava/train/train.py", line 848, in call
input_ids = torch.nn.utils.rnn.pad_sequence(
File "/root/miniconda3/lib/python3.10/site-packages/torch/nn/utils/rnn.py", line 399, in pad_sequence
return torch._C._nn.pad_sequence(sequences, batch_first, padding_value)
TypeError: pad_sequence(): argument 'padding_value' (position 3) must be float, not NoneType

@yiyexy
Copy link
Author

yiyexy commented Feb 23, 2024

@20191864218 You should make change as follow:
image

@20191864218
Copy link

@20191864218 You should make change as follow: image

I followed your steps to modify the corresponding part of the code, but the error still persists.
Here is my code.

`from typing import List, Optional, Tuple, Union

import torch
import torch.nn as nn

from transformers import AutoConfig, AutoModelForCausalLM
from llava.model.language_model.Taiyi.configuration_qwen import QWenConfig
from llava.model.language_model.Taiyi.modeling_qwen import QWenModel, QWenLMHeadModel

from transformers.modeling_outputs import CausalLMOutputWithPast
from transformers.generation.utils import GenerateOutput

from ..llava_arch import LlavaMetaModel, LlavaMetaForCausalLM

class LlavaTaiyiConfig(QWenConfig):
model_type = "llava_Taiyi"

class LlavaTaiyiModel(LlavaMetaModel, QWenModel):
config_class = LlavaTaiyiConfig

def __init__(self, config: QWenConfig):
    super(LlavaTaiyiModel, self).__init__(config)

class LlavaTaiyiForCausalLM(QWenLMHeadModel, LlavaMetaForCausalLM):
config_class = LlavaTaiyiConfig

def __init__(self, config):
    super(QWenLMHeadModel, self).__init__(config)
    self.transformer = LlavaTaiyiModel(config)
    self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)

    # Initialize weights and apply final processing
    self.post_init()

def get_model(self):
    return self.transformer

def forward(
    self,
    input_ids: Optional[torch.LongTensor] = None,
    past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None,
    attention_mask: Optional[torch.FloatTensor] = None,
    token_type_ids: Optional[torch.LongTensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    head_mask: Optional[torch.FloatTensor] = None,
    inputs_embeds: Optional[torch.FloatTensor] = None,
    encoder_hidden_states: Optional[torch.Tensor] = None,
    encoder_attention_mask: Optional[torch.FloatTensor] = None,
    labels: Optional[torch.LongTensor] = None,
    use_cache: Optional[bool] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    images: Optional[torch.FloatTensor] = None,
    image_sizes: Optional[List[List[int]]] = None,
    return_dict: Optional[bool] = None,
) -> Union[Tuple, CausalLMOutputWithPast]:

    if inputs_embeds is None:
        (
            input_ids,
            position_ids,
            attention_mask,
            past_key_values,
            inputs_embeds,
            labels
        ) = self.prepare_inputs_labels_for_multimodal(
            input_ids,
            position_ids,
            attention_mask,
            past_key_values,
            labels,
            images,
            image_sizes
        )

    return super().forward(
        input_ids=input_ids,
        past_key_values=past_key_values,
        attention_mask=attention_mask,
        token_type_ids=token_type_ids,
        position_ids=position_ids,
        head_mask=head_mask,
        inputs_embeds=inputs_embeds,
        encoder_hidden_states=encoder_hidden_states,
        encoder_attention_mask=encoder_attention_mask,
        labels=labels,
        use_cache=use_cache,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict
    )

@torch.no_grad()
def generate(
    self,
    inputs: Optional[torch.Tensor] = None,
    images: Optional[torch.Tensor] = None,
    image_sizes: Optional[torch.Tensor] = None,
    **kwargs,
) -> Union[GenerateOutput, torch.LongTensor]:
    position_ids = kwargs.pop("position_ids", None)
    attention_mask = kwargs.pop("attention_mask", None)
    if "inputs_embeds" in kwargs:
        raise NotImplementedError("`inputs_embeds` is not supported")

    if images is not None:
        (
            inputs,
            position_ids,
            attention_mask,
            _,
            inputs_embeds,
            _
        ) = self.prepare_inputs_labels_for_multimodal(
            inputs,
            position_ids,
            attention_mask,
            None,
            None,
            images,
            image_sizes=image_sizes
        )
    else:
        inputs_embeds = self.get_model().embed_tokens(inputs)

    return super().generate(
        position_ids=position_ids,
        attention_mask=attention_mask,
        inputs_embeds=inputs_embeds,
        **kwargs
    )

def prepare_inputs_for_generation(self, input_ids, past_key_values=None,
                                  inputs_embeds=None, **kwargs):
    images = kwargs.pop("images", None)
    image_sizes = kwargs.pop("image_sizes", None)
    inputs = super().prepare_inputs_for_generation(
        input_ids, past_key_values=past_key_values, inputs_embeds=inputs_embeds, **kwargs
    )
    if images is not None:
        inputs['images'] = images
    if image_sizes is not None:
        inputs['image_sizes'] = image_sizes
    return inputs

AutoConfig.register("llava_Taiyi", LlavaTaiyiConfig)
AutoModelForCausalLM.register(LlavaTaiyiConfig, LlavaTaiyiForCausalLM)
`
image
image

@bryanyzhu
Copy link

Just curious, how is the two-stage training doing? Will qwen-based LLaVA perform better, maybe in terms of dialogue capability, reasoning capability, etc?

@20191864218
Copy link

@20191864218 You should make change as follow: image

I followed your steps to modify the corresponding part of the code, but the error still persists. Here is my code.

`from typing import List, Optional, Tuple, Union

import torch import torch.nn as nn

from transformers import AutoConfig, AutoModelForCausalLM from llava.model.language_model.Taiyi.configuration_qwen import QWenConfig from llava.model.language_model.Taiyi.modeling_qwen import QWenModel, QWenLMHeadModel

from transformers.modeling_outputs import CausalLMOutputWithPast from transformers.generation.utils import GenerateOutput

from ..llava_arch import LlavaMetaModel, LlavaMetaForCausalLM

class LlavaTaiyiConfig(QWenConfig): model_type = "llava_Taiyi"

class LlavaTaiyiModel(LlavaMetaModel, QWenModel): config_class = LlavaTaiyiConfig

def __init__(self, config: QWenConfig):
    super(LlavaTaiyiModel, self).__init__(config)

class LlavaTaiyiForCausalLM(QWenLMHeadModel, LlavaMetaForCausalLM): config_class = LlavaTaiyiConfig

def __init__(self, config):
    super(QWenLMHeadModel, self).__init__(config)
    self.transformer = LlavaTaiyiModel(config)
    self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)

    # Initialize weights and apply final processing
    self.post_init()

def get_model(self):
    return self.transformer

def forward(
    self,
    input_ids: Optional[torch.LongTensor] = None,
    past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None,
    attention_mask: Optional[torch.FloatTensor] = None,
    token_type_ids: Optional[torch.LongTensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    head_mask: Optional[torch.FloatTensor] = None,
    inputs_embeds: Optional[torch.FloatTensor] = None,
    encoder_hidden_states: Optional[torch.Tensor] = None,
    encoder_attention_mask: Optional[torch.FloatTensor] = None,
    labels: Optional[torch.LongTensor] = None,
    use_cache: Optional[bool] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    images: Optional[torch.FloatTensor] = None,
    image_sizes: Optional[List[List[int]]] = None,
    return_dict: Optional[bool] = None,
) -> Union[Tuple, CausalLMOutputWithPast]:

    if inputs_embeds is None:
        (
            input_ids,
            position_ids,
            attention_mask,
            past_key_values,
            inputs_embeds,
            labels
        ) = self.prepare_inputs_labels_for_multimodal(
            input_ids,
            position_ids,
            attention_mask,
            past_key_values,
            labels,
            images,
            image_sizes
        )

    return super().forward(
        input_ids=input_ids,
        past_key_values=past_key_values,
        attention_mask=attention_mask,
        token_type_ids=token_type_ids,
        position_ids=position_ids,
        head_mask=head_mask,
        inputs_embeds=inputs_embeds,
        encoder_hidden_states=encoder_hidden_states,
        encoder_attention_mask=encoder_attention_mask,
        labels=labels,
        use_cache=use_cache,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict
    )

@torch.no_grad()
def generate(
    self,
    inputs: Optional[torch.Tensor] = None,
    images: Optional[torch.Tensor] = None,
    image_sizes: Optional[torch.Tensor] = None,
    **kwargs,
) -> Union[GenerateOutput, torch.LongTensor]:
    position_ids = kwargs.pop("position_ids", None)
    attention_mask = kwargs.pop("attention_mask", None)
    if "inputs_embeds" in kwargs:
        raise NotImplementedError("`inputs_embeds` is not supported")

    if images is not None:
        (
            inputs,
            position_ids,
            attention_mask,
            _,
            inputs_embeds,
            _
        ) = self.prepare_inputs_labels_for_multimodal(
            inputs,
            position_ids,
            attention_mask,
            None,
            None,
            images,
            image_sizes=image_sizes
        )
    else:
        inputs_embeds = self.get_model().embed_tokens(inputs)

    return super().generate(
        position_ids=position_ids,
        attention_mask=attention_mask,
        inputs_embeds=inputs_embeds,
        **kwargs
    )

def prepare_inputs_for_generation(self, input_ids, past_key_values=None,
                                  inputs_embeds=None, **kwargs):
    images = kwargs.pop("images", None)
    image_sizes = kwargs.pop("image_sizes", None)
    inputs = super().prepare_inputs_for_generation(
        input_ids, past_key_values=past_key_values, inputs_embeds=inputs_embeds, **kwargs
    )
    if images is not None:
        inputs['images'] = images
    if image_sizes is not None:
        inputs['image_sizes'] = image_sizes
    return inputs

AutoConfig.register("llava_Taiyi", LlavaTaiyiConfig) AutoModelForCausalLM.register(LlavaTaiyiConfig, LlavaTaiyiForCausalLM) ` image image
谢谢楼主给出的代码参考,我发现是tokenizer的相关文件太老了,更新到最新的文件这个问题就解决了,再次感谢

@yiyexy
Copy link
Author

yiyexy commented Feb 26, 2024

Just curious, how is the two-stage training doing? Will qwen-based LLaVA perform better, maybe in terms of dialogue capability, reasoning capability, etc?

Yes, this is just a test I conducted to explore the capabilities of larger SOTA language models under the LLaVA framework. Perhaps I should wait until I have more refined results before presenting them.

@AdonLee072348
Copy link

@20191864218 You should make change as follow: image

I followed your steps to modify the corresponding part of the code, but the error still persists. Here is my code.
`from typing import List, Optional, Tuple, Union
import torch import torch.nn as nn
from transformers import AutoConfig, AutoModelForCausalLM from llava.model.language_model.Taiyi.configuration_qwen import QWenConfig from llava.model.language_model.Taiyi.modeling_qwen import QWenModel, QWenLMHeadModel
from transformers.modeling_outputs import CausalLMOutputWithPast from transformers.generation.utils import GenerateOutput
from ..llava_arch import LlavaMetaModel, LlavaMetaForCausalLM
class LlavaTaiyiConfig(QWenConfig): model_type = "llava_Taiyi"
class LlavaTaiyiModel(LlavaMetaModel, QWenModel): config_class = LlavaTaiyiConfig

def __init__(self, config: QWenConfig):
    super(LlavaTaiyiModel, self).__init__(config)

class LlavaTaiyiForCausalLM(QWenLMHeadModel, LlavaMetaForCausalLM): config_class = LlavaTaiyiConfig

def __init__(self, config):
    super(QWenLMHeadModel, self).__init__(config)
    self.transformer = LlavaTaiyiModel(config)
    self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)

    # Initialize weights and apply final processing
    self.post_init()

def get_model(self):
    return self.transformer

def forward(
    self,
    input_ids: Optional[torch.LongTensor] = None,
    past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None,
    attention_mask: Optional[torch.FloatTensor] = None,
    token_type_ids: Optional[torch.LongTensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    head_mask: Optional[torch.FloatTensor] = None,
    inputs_embeds: Optional[torch.FloatTensor] = None,
    encoder_hidden_states: Optional[torch.Tensor] = None,
    encoder_attention_mask: Optional[torch.FloatTensor] = None,
    labels: Optional[torch.LongTensor] = None,
    use_cache: Optional[bool] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    images: Optional[torch.FloatTensor] = None,
    image_sizes: Optional[List[List[int]]] = None,
    return_dict: Optional[bool] = None,
) -> Union[Tuple, CausalLMOutputWithPast]:

    if inputs_embeds is None:
        (
            input_ids,
            position_ids,
            attention_mask,
            past_key_values,
            inputs_embeds,
            labels
        ) = self.prepare_inputs_labels_for_multimodal(
            input_ids,
            position_ids,
            attention_mask,
            past_key_values,
            labels,
            images,
            image_sizes
        )

    return super().forward(
        input_ids=input_ids,
        past_key_values=past_key_values,
        attention_mask=attention_mask,
        token_type_ids=token_type_ids,
        position_ids=position_ids,
        head_mask=head_mask,
        inputs_embeds=inputs_embeds,
        encoder_hidden_states=encoder_hidden_states,
        encoder_attention_mask=encoder_attention_mask,
        labels=labels,
        use_cache=use_cache,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict
    )

@torch.no_grad()
def generate(
    self,
    inputs: Optional[torch.Tensor] = None,
    images: Optional[torch.Tensor] = None,
    image_sizes: Optional[torch.Tensor] = None,
    **kwargs,
) -> Union[GenerateOutput, torch.LongTensor]:
    position_ids = kwargs.pop("position_ids", None)
    attention_mask = kwargs.pop("attention_mask", None)
    if "inputs_embeds" in kwargs:
        raise NotImplementedError("`inputs_embeds` is not supported")

    if images is not None:
        (
            inputs,
            position_ids,
            attention_mask,
            _,
            inputs_embeds,
            _
        ) = self.prepare_inputs_labels_for_multimodal(
            inputs,
            position_ids,
            attention_mask,
            None,
            None,
            images,
            image_sizes=image_sizes
        )
    else:
        inputs_embeds = self.get_model().embed_tokens(inputs)

    return super().generate(
        position_ids=position_ids,
        attention_mask=attention_mask,
        inputs_embeds=inputs_embeds,
        **kwargs
    )

def prepare_inputs_for_generation(self, input_ids, past_key_values=None,
                                  inputs_embeds=None, **kwargs):
    images = kwargs.pop("images", None)
    image_sizes = kwargs.pop("image_sizes", None)
    inputs = super().prepare_inputs_for_generation(
        input_ids, past_key_values=past_key_values, inputs_embeds=inputs_embeds, **kwargs
    )
    if images is not None:
        inputs['images'] = images
    if image_sizes is not None:
        inputs['image_sizes'] = image_sizes
    return inputs

AutoConfig.register("llava_Taiyi", LlavaTaiyiConfig) AutoModelForCausalLM.register(LlavaTaiyiConfig, LlavaTaiyiForCausalLM) ` image image
谢谢楼主给出的代码参考,我发现是tokenizer的相关文件太老了,更新到最新的文件这个问题就解决了,再次感谢

你好,我在使用qwen的时候也遇到了相同的问题,tokenizer的相关文件是指qwen模型里的tokenization_qwen.py吗?我已经使用了huggingface上qwen模型的文件,是更新到哪个版本呢?

@20191864218
Copy link

@20191864218 You should make change as follow: image

I followed your steps to modify the corresponding part of the code, but the error still persists. Here is my code.
`from typing import List, Optional, Tuple, Union
import torch import torch.nn as nn
from transformers import AutoConfig, AutoModelForCausalLM from llava.model.language_model.Taiyi.configuration_qwen import QWenConfig from llava.model.language_model.Taiyi.modeling_qwen import QWenModel, QWenLMHeadModel
from transformers.modeling_outputs import CausalLMOutputWithPast from transformers.generation.utils import GenerateOutput
from ..llava_arch import LlavaMetaModel, LlavaMetaForCausalLM
class LlavaTaiyiConfig(QWenConfig): model_type = "llava_Taiyi"
class LlavaTaiyiModel(LlavaMetaModel, QWenModel): config_class = LlavaTaiyiConfig

def __init__(self, config: QWenConfig):
    super(LlavaTaiyiModel, self).__init__(config)

class LlavaTaiyiForCausalLM(QWenLMHeadModel, LlavaMetaForCausalLM): config_class = LlavaTaiyiConfig

def __init__(self, config):
    super(QWenLMHeadModel, self).__init__(config)
    self.transformer = LlavaTaiyiModel(config)
    self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)

    # Initialize weights and apply final processing
    self.post_init()

def get_model(self):
    return self.transformer

def forward(
    self,
    input_ids: Optional[torch.LongTensor] = None,
    past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None,
    attention_mask: Optional[torch.FloatTensor] = None,
    token_type_ids: Optional[torch.LongTensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    head_mask: Optional[torch.FloatTensor] = None,
    inputs_embeds: Optional[torch.FloatTensor] = None,
    encoder_hidden_states: Optional[torch.Tensor] = None,
    encoder_attention_mask: Optional[torch.FloatTensor] = None,
    labels: Optional[torch.LongTensor] = None,
    use_cache: Optional[bool] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    images: Optional[torch.FloatTensor] = None,
    image_sizes: Optional[List[List[int]]] = None,
    return_dict: Optional[bool] = None,
) -> Union[Tuple, CausalLMOutputWithPast]:

    if inputs_embeds is None:
        (
            input_ids,
            position_ids,
            attention_mask,
            past_key_values,
            inputs_embeds,
            labels
        ) = self.prepare_inputs_labels_for_multimodal(
            input_ids,
            position_ids,
            attention_mask,
            past_key_values,
            labels,
            images,
            image_sizes
        )

    return super().forward(
        input_ids=input_ids,
        past_key_values=past_key_values,
        attention_mask=attention_mask,
        token_type_ids=token_type_ids,
        position_ids=position_ids,
        head_mask=head_mask,
        inputs_embeds=inputs_embeds,
        encoder_hidden_states=encoder_hidden_states,
        encoder_attention_mask=encoder_attention_mask,
        labels=labels,
        use_cache=use_cache,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict
    )

@torch.no_grad()
def generate(
    self,
    inputs: Optional[torch.Tensor] = None,
    images: Optional[torch.Tensor] = None,
    image_sizes: Optional[torch.Tensor] = None,
    **kwargs,
) -> Union[GenerateOutput, torch.LongTensor]:
    position_ids = kwargs.pop("position_ids", None)
    attention_mask = kwargs.pop("attention_mask", None)
    if "inputs_embeds" in kwargs:
        raise NotImplementedError("`inputs_embeds` is not supported")

    if images is not None:
        (
            inputs,
            position_ids,
            attention_mask,
            _,
            inputs_embeds,
            _
        ) = self.prepare_inputs_labels_for_multimodal(
            inputs,
            position_ids,
            attention_mask,
            None,
            None,
            images,
            image_sizes=image_sizes
        )
    else:
        inputs_embeds = self.get_model().embed_tokens(inputs)

    return super().generate(
        position_ids=position_ids,
        attention_mask=attention_mask,
        inputs_embeds=inputs_embeds,
        **kwargs
    )

def prepare_inputs_for_generation(self, input_ids, past_key_values=None,
                                  inputs_embeds=None, **kwargs):
    images = kwargs.pop("images", None)
    image_sizes = kwargs.pop("image_sizes", None)
    inputs = super().prepare_inputs_for_generation(
        input_ids, past_key_values=past_key_values, inputs_embeds=inputs_embeds, **kwargs
    )
    if images is not None:
        inputs['images'] = images
    if image_sizes is not None:
        inputs['image_sizes'] = image_sizes
    return inputs

AutoConfig.register("llava_Taiyi", LlavaTaiyiConfig) AutoModelForCausalLM.register(LlavaTaiyiConfig, LlavaTaiyiForCausalLM) ` image image
谢谢楼主给出的代码参考,我发现是tokenizer的相关文件太老了,更新到最新的文件这个问题就解决了,再次感谢

你好,我在使用qwen的时候也遇到了相同的问题,tokenizer的相关文件是指qwen模型里的tokenization_qwen.py吗?我已经使用了huggingface上qwen模型的文件,是更新到哪个版本呢?

我是把此链接下的文件全部替换
https://huggingface.co/Qwen/Qwen-tokenizer

@AdonLee072348
Copy link

@20191864218 You should make change as follow: image

I followed your steps to modify the corresponding part of the code, but the error still persists. Here is my code.
`from typing import List, Optional, Tuple, Union
import torch import torch.nn as nn
from transformers import AutoConfig, AutoModelForCausalLM from llava.model.language_model.Taiyi.configuration_qwen import QWenConfig from llava.model.language_model.Taiyi.modeling_qwen import QWenModel, QWenLMHeadModel
from transformers.modeling_outputs import CausalLMOutputWithPast from transformers.generation.utils import GenerateOutput
from ..llava_arch import LlavaMetaModel, LlavaMetaForCausalLM
class LlavaTaiyiConfig(QWenConfig): model_type = "llava_Taiyi"
class LlavaTaiyiModel(LlavaMetaModel, QWenModel): config_class = LlavaTaiyiConfig

def __init__(self, config: QWenConfig):
    super(LlavaTaiyiModel, self).__init__(config)

class LlavaTaiyiForCausalLM(QWenLMHeadModel, LlavaMetaForCausalLM): config_class = LlavaTaiyiConfig

def __init__(self, config):
    super(QWenLMHeadModel, self).__init__(config)
    self.transformer = LlavaTaiyiModel(config)
    self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)

    # Initialize weights and apply final processing
    self.post_init()

def get_model(self):
    return self.transformer

def forward(
    self,
    input_ids: Optional[torch.LongTensor] = None,
    past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None,
    attention_mask: Optional[torch.FloatTensor] = None,
    token_type_ids: Optional[torch.LongTensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    head_mask: Optional[torch.FloatTensor] = None,
    inputs_embeds: Optional[torch.FloatTensor] = None,
    encoder_hidden_states: Optional[torch.Tensor] = None,
    encoder_attention_mask: Optional[torch.FloatTensor] = None,
    labels: Optional[torch.LongTensor] = None,
    use_cache: Optional[bool] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    images: Optional[torch.FloatTensor] = None,
    image_sizes: Optional[List[List[int]]] = None,
    return_dict: Optional[bool] = None,
) -> Union[Tuple, CausalLMOutputWithPast]:

    if inputs_embeds is None:
        (
            input_ids,
            position_ids,
            attention_mask,
            past_key_values,
            inputs_embeds,
            labels
        ) = self.prepare_inputs_labels_for_multimodal(
            input_ids,
            position_ids,
            attention_mask,
            past_key_values,
            labels,
            images,
            image_sizes
        )

    return super().forward(
        input_ids=input_ids,
        past_key_values=past_key_values,
        attention_mask=attention_mask,
        token_type_ids=token_type_ids,
        position_ids=position_ids,
        head_mask=head_mask,
        inputs_embeds=inputs_embeds,
        encoder_hidden_states=encoder_hidden_states,
        encoder_attention_mask=encoder_attention_mask,
        labels=labels,
        use_cache=use_cache,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict
    )

@torch.no_grad()
def generate(
    self,
    inputs: Optional[torch.Tensor] = None,
    images: Optional[torch.Tensor] = None,
    image_sizes: Optional[torch.Tensor] = None,
    **kwargs,
) -> Union[GenerateOutput, torch.LongTensor]:
    position_ids = kwargs.pop("position_ids", None)
    attention_mask = kwargs.pop("attention_mask", None)
    if "inputs_embeds" in kwargs:
        raise NotImplementedError("`inputs_embeds` is not supported")

    if images is not None:
        (
            inputs,
            position_ids,
            attention_mask,
            _,
            inputs_embeds,
            _
        ) = self.prepare_inputs_labels_for_multimodal(
            inputs,
            position_ids,
            attention_mask,
            None,
            None,
            images,
            image_sizes=image_sizes
        )
    else:
        inputs_embeds = self.get_model().embed_tokens(inputs)

    return super().generate(
        position_ids=position_ids,
        attention_mask=attention_mask,
        inputs_embeds=inputs_embeds,
        **kwargs
    )

def prepare_inputs_for_generation(self, input_ids, past_key_values=None,
                                  inputs_embeds=None, **kwargs):
    images = kwargs.pop("images", None)
    image_sizes = kwargs.pop("image_sizes", None)
    inputs = super().prepare_inputs_for_generation(
        input_ids, past_key_values=past_key_values, inputs_embeds=inputs_embeds, **kwargs
    )
    if images is not None:
        inputs['images'] = images
    if image_sizes is not None:
        inputs['image_sizes'] = image_sizes
    return inputs

AutoConfig.register("llava_Taiyi", LlavaTaiyiConfig) AutoModelForCausalLM.register(LlavaTaiyiConfig, LlavaTaiyiForCausalLM) ` image image
谢谢楼主给出的代码参考,我发现是tokenizer的相关文件太老了,更新到最新的文件这个问题就解决了,再次感谢

你好,我在使用qwen的时候也遇到了相同的问题,tokenizer的相关文件是指qwen模型里的tokenization_qwen.py吗?我已经使用了huggingface上qwen模型的文件,是更新到哪个版本呢?

我是把此链接下的文件全部替换 https://huggingface.co/Qwen/Qwen-tokenizer

好的,感谢回复,是用https://huggingface.co/Qwen/Qwen-tokenizer的文件替换下载的qwen模型里的文件是嘛?

@20191864218
Copy link

@20191864218 You should make change as follow: image

I followed your steps to modify the corresponding part of the code, but the error still persists. Here is my code.
`from typing import List, Optional, Tuple, Union
import torch import torch.nn as nn
from transformers import AutoConfig, AutoModelForCausalLM from llava.model.language_model.Taiyi.configuration_qwen import QWenConfig from llava.model.language_model.Taiyi.modeling_qwen import QWenModel, QWenLMHeadModel
from transformers.modeling_outputs import CausalLMOutputWithPast from transformers.generation.utils import GenerateOutput
from ..llava_arch import LlavaMetaModel, LlavaMetaForCausalLM
class LlavaTaiyiConfig(QWenConfig): model_type = "llava_Taiyi"
class LlavaTaiyiModel(LlavaMetaModel, QWenModel): config_class = LlavaTaiyiConfig

def __init__(self, config: QWenConfig):
    super(LlavaTaiyiModel, self).__init__(config)

class LlavaTaiyiForCausalLM(QWenLMHeadModel, LlavaMetaForCausalLM): config_class = LlavaTaiyiConfig

def __init__(self, config):
    super(QWenLMHeadModel, self).__init__(config)
    self.transformer = LlavaTaiyiModel(config)
    self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)

    # Initialize weights and apply final processing
    self.post_init()

def get_model(self):
    return self.transformer

def forward(
    self,
    input_ids: Optional[torch.LongTensor] = None,
    past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None,
    attention_mask: Optional[torch.FloatTensor] = None,
    token_type_ids: Optional[torch.LongTensor] = None,
    position_ids: Optional[torch.LongTensor] = None,
    head_mask: Optional[torch.FloatTensor] = None,
    inputs_embeds: Optional[torch.FloatTensor] = None,
    encoder_hidden_states: Optional[torch.Tensor] = None,
    encoder_attention_mask: Optional[torch.FloatTensor] = None,
    labels: Optional[torch.LongTensor] = None,
    use_cache: Optional[bool] = None,
    output_attentions: Optional[bool] = None,
    output_hidden_states: Optional[bool] = None,
    images: Optional[torch.FloatTensor] = None,
    image_sizes: Optional[List[List[int]]] = None,
    return_dict: Optional[bool] = None,
) -> Union[Tuple, CausalLMOutputWithPast]:

    if inputs_embeds is None:
        (
            input_ids,
            position_ids,
            attention_mask,
            past_key_values,
            inputs_embeds,
            labels
        ) = self.prepare_inputs_labels_for_multimodal(
            input_ids,
            position_ids,
            attention_mask,
            past_key_values,
            labels,
            images,
            image_sizes
        )

    return super().forward(
        input_ids=input_ids,
        past_key_values=past_key_values,
        attention_mask=attention_mask,
        token_type_ids=token_type_ids,
        position_ids=position_ids,
        head_mask=head_mask,
        inputs_embeds=inputs_embeds,
        encoder_hidden_states=encoder_hidden_states,
        encoder_attention_mask=encoder_attention_mask,
        labels=labels,
        use_cache=use_cache,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states,
        return_dict=return_dict
    )

@torch.no_grad()
def generate(
    self,
    inputs: Optional[torch.Tensor] = None,
    images: Optional[torch.Tensor] = None,
    image_sizes: Optional[torch.Tensor] = None,
    **kwargs,
) -> Union[GenerateOutput, torch.LongTensor]:
    position_ids = kwargs.pop("position_ids", None)
    attention_mask = kwargs.pop("attention_mask", None)
    if "inputs_embeds" in kwargs:
        raise NotImplementedError("`inputs_embeds` is not supported")

    if images is not None:
        (
            inputs,
            position_ids,
            attention_mask,
            _,
            inputs_embeds,
            _
        ) = self.prepare_inputs_labels_for_multimodal(
            inputs,
            position_ids,
            attention_mask,
            None,
            None,
            images,
            image_sizes=image_sizes
        )
    else:
        inputs_embeds = self.get_model().embed_tokens(inputs)

    return super().generate(
        position_ids=position_ids,
        attention_mask=attention_mask,
        inputs_embeds=inputs_embeds,
        **kwargs
    )

def prepare_inputs_for_generation(self, input_ids, past_key_values=None,
                                  inputs_embeds=None, **kwargs):
    images = kwargs.pop("images", None)
    image_sizes = kwargs.pop("image_sizes", None)
    inputs = super().prepare_inputs_for_generation(
        input_ids, past_key_values=past_key_values, inputs_embeds=inputs_embeds, **kwargs
    )
    if images is not None:
        inputs['images'] = images
    if image_sizes is not None:
        inputs['image_sizes'] = image_sizes
    return inputs

AutoConfig.register("llava_Taiyi", LlavaTaiyiConfig) AutoModelForCausalLM.register(LlavaTaiyiConfig, LlavaTaiyiForCausalLM) ` image image
谢谢楼主给出的代码参考,我发现是tokenizer的相关文件太老了,更新到最新的文件这个问题就解决了,再次感谢

你好,我在使用qwen的时候也遇到了相同的问题,tokenizer的相关文件是指qwen模型里的tokenization_qwen.py吗?我已经使用了huggingface上qwen模型的文件,是更新到哪个版本呢?

我是把此链接下的文件全部替换 https://huggingface.co/Qwen/Qwen-tokenizer

好的,感谢回复,是用https://huggingface.co/Qwen/Qwen-tokenizer的文件替换下载的qwen模型里的文件是嘛?

是的,全部替换即可

@foreverhell
Copy link

@20191864218 You should make change as follow: image

Thanks a lot! It's a great help.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

5 participants