Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

moss-moon-003-sft Jupyter测试失败 #376

Open
YoloZyk opened this issue Jan 31, 2024 · 2 comments
Open

moss-moon-003-sft Jupyter测试失败 #376

YoloZyk opened this issue Jan 31, 2024 · 2 comments

Comments

@YoloZyk
Copy link

YoloZyk commented Jan 31, 2024

在ModelScope上发布的moss-moon-003-sft-plugin模型,使用提供的Jupyter测试样例部署失败,在自己服务器和魔塔提供的实例上都不可以,错误是AttributeError: 'MossTokenizer' object has no attribute 'encoder',GPU用的A40不知道有没有影响
`AttributeError Traceback (most recent call last)
File /opt/conda/lib/python3.10/site-packages/modelscope/utils/registry.py:212, in build_from_cfg(cfg, registry, group_key, default_args)
211 else:
--> 212 return obj_cls(**args)
213 except Exception as e:
214 # Normal TypeError does not print class name.

File ~/.cache/modelscope/modelscope_modules/moss-moon-003-sft-plugin/ms_wrapper.py:21, in mossmoon003sftpluginTextGenerationPipeline.init(self, model, *args, **kwargs)
16 def init(
17 self,
18 model: Union[Model, str],
19 *args,
20 **kwargs):
---> 21 model = mossmoon003sftpluginTextGeneration(model) if isinstance(model, str) else model
22 super().init(model=model, **kwargs)

File ~/.cache/modelscope/modelscope_modules/moss-moon-003-sft-plugin/ms_wrapper.py:43, in mossmoon003sftpluginTextGeneration.init(self, model_dir, *args, **kwargs)
42 # loading tokenizer
---> 43 self.tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
44 self.model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True).half()

File /opt/conda/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py:774, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
773 tokenizer_class.register_for_auto_class()
--> 774 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
775 elif config_tokenizer_class is not None:

File /opt/conda/lib/python3.10/site-packages/modelscope/utils/hf_util.py:52, in patch_tokenizer_base..from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
51 model_dir = pretrained_model_name_or_path
---> 52 return ori_from_pretrained(cls, model_dir, *model_args, **kwargs)

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2028, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, *init_inputs, **kwargs)
2026 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 2028 return cls._from_pretrained(
2029 resolved_vocab_files,
2030 pretrained_model_name_or_path,
2031 init_configuration,
2032 *init_inputs,
2033 token=token,
2034 cache_dir=cache_dir,
2035 local_files_only=local_files_only,
2036 _commit_hash=commit_hash,
2037 _is_local=is_local,
2038 **kwargs,
2039 )

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2260, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, *init_inputs, **kwargs)
2259 try:
-> 2260 tokenizer = cls(*init_inputs, **init_kwargs)
2261 except OSError:

File ~/.cache/huggingface/modules/transformers_modules/moss-moon-003-sft-plugin/tokenization_moss.py:149, in MossTokenizer.init(self, vocab_file, merges_file, errors, unk_token, bos_token, eos_token, pad_token, add_prefix_space, add_bos_token, **kwargs)
148 pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
--> 149 super().init(
150 errors=errors,
151 unk_token=unk_token,
152 bos_token=bos_token,
153 eos_token=eos_token,
154 pad_token=pad_token,
155 add_prefix_space=add_prefix_space,
156 add_bos_token=add_bos_token,
157 **kwargs,
158 )
159 self.add_bos_token = add_bos_token

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils.py:367, in PreTrainedTokenizer.init(self, **kwargs)
365 # 4. If some of the special tokens are not part of the vocab, we add them, at the end.
366 # the order of addition is the same as self.SPECIAL_TOKENS_ATTRIBUTES following tokenizers
--> 367 self._add_tokens(
368 [token for token in self.all_special_tokens_extended if token not in self._added_tokens_encoder],
369 special_tokens=True,
370 )
372 self._decode_use_source_tokenizer = False

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils.py:467, in PreTrainedTokenizer._add_tokens(self, new_tokens, special_tokens)
466 # TODO this is fairly slow to improve!
--> 467 current_vocab = self.get_vocab().copy()
468 new_idx = len(current_vocab) # only call this once, len gives the last index + 1

File ~/.cache/huggingface/modules/transformers_modules/moss-moon-003-sft-plugin/tokenization_moss.py:182, in MossTokenizer.get_vocab(self)
181 def get_vocab(self):
--> 182 return dict(self.encoder, **self.added_tokens_encoder)

AttributeError: 'MossTokenizer' object has no attribute 'encoder'`

后来发现moss-moon-003-sft也是同样的问题,有老哥知道怎么回事吗?

@Xuange923
Copy link

你好,请问你解决这个问题了吗

@xu-song
Copy link

xu-song commented May 10, 2024

加个revision="refs/pr/6" 就可以了

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("fnlp/moss-moon-003-sft", trust_remote_code=True, revision="refs/pr/6")
print(tokenizer.encode("good job"))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants