New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
MiniGPT4-Video support #10759
Comments
Now ipex-llm has some issues for such unmerged lora module during inference. if self.low_resource:
self.llama_model = llm_model.from_pretrained(
llama_model,
torch_dtype=torch.float16,
device_map={"": "cpu"},
) @classmethod
def from_config(cls, cfg):
vit_model = cfg.get("vit_model", "eva_clip_g")
q_former_model = cfg.get("q_former_model", "https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/BLIP2/blip2_pretrained_flant5xxl.pth")
img_size = cfg.get("image_size")
num_query_token = cfg.get("num_query_token")
llama_model = cfg.get("llama_model")
drop_path_rate = cfg.get("drop_path_rate", 0)
use_grad_checkpoint = cfg.get("use_grad_checkpoint", False)
vit_precision = cfg.get("vit_precision", "fp16")
freeze_vit = cfg.get("freeze_vit", True)
freeze_qformer = cfg.get("freeze_qformer", True)
low_resource = cfg.get("low_resource", False)
prompt_path = cfg.get("prompt_path", "")
prompt_template = cfg.get("prompt_template", "")
max_txt_len = cfg.get("max_txt_len", 300)
end_sym = cfg.get("end_sym", '\n')
lora_r = cfg.get("lora_r",64)
lora_alpha = cfg.get("lora_alpha",16)
chat_template = cfg.get("chat_template",False)
system_prompt = cfg.get("system_prompt", False)
token_pooling = cfg.get("token_pooling",True)
use_grad_checkpoint_llm = cfg.get("use_grad_checkpoint_llm", False)
max_context_len = cfg.get("max_context_len", 3800)
remove_template = cfg.get("remove_template", False)
model = cls(
vit_model=vit_model,
img_size=img_size,
drop_path_rate=drop_path_rate,
use_grad_checkpoint=use_grad_checkpoint,
vit_precision=vit_precision,
freeze_vit=freeze_vit,
llama_model=llama_model,
prompt_path=prompt_path,
prompt_template=prompt_template,
max_txt_len=max_txt_len,
low_resource=low_resource,
end_sym=end_sym,
lora_r = lora_r,
lora_alpha = lora_alpha,
chat_template = chat_template,
system_prompt = system_prompt,
token_pooling = token_pooling,
use_grad_checkpoint_llm=use_grad_checkpoint_llm,
max_context_len=max_context_len,
remove_template = remove_template
)
ckpt_path = cfg.get("ckpt", "") # load weights of MiniGPT-4
if ckpt_path:
print("Load Minigpt-4-LLM Checkpoint: {}".format(ckpt_path))
ckpt = torch.load(ckpt_path, map_location="cpu")
msg = model.load_state_dict(ckpt['model'], strict=False)
if low_resource:
import ipex_llm
model.llama_model = model.llama_model.merge_and_unload()
model.llama_model = ipex_llm.optimize_model(model.llama_model, low_bit="sym_int8",
optimize_llm=True).to("xpu")
return model
|
I run MiniGPT4-Video, and get some errors:
Traceback (most recent call last):
File "C:\Users\mi\miniconda3\lib\site-packages\gradio\queueing.py", line 501, in call_prediction
output = await route_utils.call_process_api(
File "C:\Users\mi\miniconda3\lib\site-packages\gradio\route_utils.py", line 253, in call_process_api
output = await app.get_blocks().process_api(
File "C:\Users\mi\miniconda3\lib\site-packages\gradio\blocks.py", line 1695, in process_api
result = await self.call_function(
File "C:\Users\mi\miniconda3\lib\site-packages\gradio\blocks.py", line 1235, in call_function
prediction = await anyio.to_thread.run_sync(
File "C:\Users\mi\miniconda3\lib\site-packages\anyio\to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
File "C:\Users\mi\miniconda3\lib\site-packages\anyio_backends_asyncio.py", line 2144, in run_sync_in_worker_thread
return await future
File "C:\Users\mi\miniconda3\lib\site-packages\anyio_backends_asyncio.py", line 851, in run
result = context.run(func, *args)
File "C:\Users\mi\miniconda3\lib\site-packages\gradio\utils.py", line 692, in wrapper
response = f(*args, **kwargs)
File "D:\MiniGPT4-Video\minigpt4_video_demo.py", line 231, in gradio_demo_local
pred=run(video_path,instruction,model,vis_processor,gen_subtitles=has_sub)
File "D:\MiniGPT4-Video\minigpt4_video_demo.py", line 149, in run
answers = model.generate(prepared_images, prompt, max_new_tokens=args.max_new_tokens, do_sample=True, lengths=[length],num_beams=2)
File "C:\Users\mi\miniconda3\lib\site-packages\torch\utils_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "D:\MiniGPT4-Video\minigpt4\models\mini_gpt4_llama_v2.py", line 626, in generate
outputs = self.llama_model.generate(
File "C:\Users\mi\miniconda3\lib\site-packages\peft\peft_model.py", line 580, in generate
return self.base_model.generate(**kwargs)
File "C:\Users\mi\miniconda3\lib\site-packages\torch\utils_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "C:\Users\mi\miniconda3\lib\site-packages\transformers\generation\utils.py", line 1595, in generate
return self.beam_sample(
File "C:\Users\mi\miniconda3\lib\site-packages\transformers\generation\utils.py", line 3276, in beam_sample
outputs = self(
File "C:\Users\mi\miniconda3\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\Users\mi\miniconda3\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\mi\miniconda3\lib\site-packages\transformers\models\llama\modeling_llama.py", line 1183, in forward
outputs = self.model(
File "C:\Users\mi\miniconda3\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\Users\mi\miniconda3\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\mi\miniconda3\lib\site-packages\bigdl\llm\transformers\models\llama.py", line 114, in llama_model_forward_4_36
return llama_model_forward_4_36_internal(
File "C:\Users\mi\miniconda3\lib\site-packages\bigdl\llm\transformers\models\llama.py", line 1722, in llama_model_forward_4_36_internal
layer_outputs = decoder_layer(
File "C:\Users\mi\miniconda3\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\Users\mi\miniconda3\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\mi\miniconda3\lib\site-packages\bigdl\llm\transformers\models\llama.py", line 228, in llama_decoder_forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "C:\Users\mi\miniconda3\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\Users\mi\miniconda3\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\mi\miniconda3\lib\site-packages\bigdl\llm\transformers\models\llama.py", line 882, in llama_attention_forward_4_36
return forward_function(
File "C:\Users\mi\miniconda3\lib\site-packages\bigdl\llm\transformers\models\llama.py", line 1174, in llama_attention_forward_4_36_original
if should_use_mm_int4_qkv(self, device):
File "C:\Users\mi\miniconda3\lib\site-packages\bigdl\llm\transformers\models\llama.py", line 283, in should_use_mm_int4_qkv
return device.type == "xpu" and self.q_proj.qtype == SYM_INT4 and self.q_proj.enable_xetla
File "C:\Users\mi\miniconda3\lib\site-packages\torch\nn\modules\module.py", line 1695, in getattr
raise AttributeError(f"'{type(self).name}' object has no attribute '{name}'")
AttributeError: 'Linear' object has no attribute 'qtype'. Did you mean: 'type'?
Could you please help to support this model?
Thanks.
model project: https://github.com/Vision-CAIR/MiniGPT4-video/tree/main
The text was updated successfully, but these errors were encountered: