You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
ValueError Traceback (most recent call last)
Cell In[2], line 1
----> 1 from colossalai.booster import Booster
File ~/.local/lib/python3.11/site-packages/colossalai/booster/init.py:2
1 from .accelerator import Accelerator
----> 2 from .booster import Booster
3 from .plugin import Plugin
File ~/.local/lib/python3.11/site-packages/colossalai/booster/booster.py:17
15 from .accelerator import Accelerator
16 from .mixed_precision import MixedPrecision, mixed_precision_factory
---> 17 from .plugin import Plugin
18 from .plugin.pp_plugin_base import PipelinePluginBase
20 all = ["Booster"]
File ~/.local/lib/python3.11/site-packages/colossalai/booster/plugin/init.py:1
----> 1 from .gemini_plugin import GeminiPlugin
2 from .hybrid_parallel_plugin import HybridParallelPlugin
3 from .low_level_zero_plugin import LowLevelZeroPlugin
File ~/.local/lib/python3.11/site-packages/colossalai/booster/plugin/gemini_plugin.py:31
29 from colossalai.interface import ModelWrapper, OptimizerWrapper
30 from colossalai.shardformer import ShardConfig, ShardFormer
---> 31 from colossalai.zero import GeminiDDP, GeminiOptimizer
32 from colossalai.zero.gemini.memory_tracer import MemStats
34 from .dp_plugin_base import DPPluginBase
File ~/.local/lib/python3.11/site-packages/colossalai/zero/init.py:1
----> 1 from .gemini import GeminiAdamOptimizer, GeminiDDP, GeminiOptimizer, get_static_torch_model
2 from .low_level import LowLevelZeroOptimizer
3 from .wrapper import zero_model_wrapper, zero_optim_wrapper
File ~/.local/lib/python3.11/site-packages/colossalai/zero/gemini/init.py:2
1 from .chunk import ChunkManager, TensorInfo, TensorState, search_chunk_configuration
----> 2 from .gemini_ddp import GeminiDDP
3 from .gemini_mgr import GeminiManager
4 from .gemini_optimizer import GeminiAdamOptimizer, GeminiOptimizer
File ~/.local/lib/python3.11/site-packages/colossalai/zero/gemini/gemini_ddp.py:34
31 from colossalai.utils import _cast_float, free_storage, is_ddp_ignored
33 from .chunk import Chunk, ChunkManager, TensorState, init_chunk_manager
---> 34 from .gemini_hook import GeminiZeROHook
35 from .gemini_mgr import GeminiManager
36 from .memory_tracer import MemStats, OrderedParamGenerator
File ~/.local/lib/python3.11/site-packages/colossalai/zero/gemini/gemini_hook.py:11
9 from colossalai.utils import is_ddp_ignored
10 from colossalai.zero.gemini import TensorState
---> 11 from colossalai.zero.gemini.gemini_mgr import GeminiManager
14 class TrainingPhase(Enum):
15 FORWARD = 0
File ~/.local/lib/python3.11/site-packages/colossalai/zero/gemini/gemini_mgr.py:9
7 from .chunk import Chunk, ChunkManager
8 from .memory_tracer import ChunkMemStatsCollector, MemStats
----> 9 from .placement_policy import PlacementPolicyFactory
12 class GeminiManager:
13 """
14 Stateful Tensor Manager, inspired from PatrickStar
15
(...)
24 memstats (MemStats, optional): a mem stats collected by a runtime mem tracer. if None then GeminiManager will collect it during a warmup iteration.
25 """
File ~/.local/lib/python3.11/site-packages/colossalai/zero/gemini/placement_policy.py:10
7 import torch
9 from colossalai.accelerator import get_accelerator
---> 10 from colossalai.legacy.utils.memory import colo_device_memory_capacity
11 from colossalai.zero.gemini.chunk import Chunk
13 from .chunk import Chunk, ChunkManager
File ~/.local/lib/python3.11/site-packages/colossalai/legacy/initialize.py:21
19 from colossalai.context import Config, ConfigException
20 from colossalai.interface import OptimizerWrapper
---> 21 from colossalai.legacy.amp import AMP_TYPE, convert_to_amp
22 from colossalai.legacy.amp.naive_amp import NaiveAMPModel
23 from colossalai.legacy.builder.builder import build_gradient_handler
File ~/.local/lib/python3.11/site-packages/colossalai/legacy/amp/init.py:11
8 from colossalai.context import Config
10 from .amp_type import AMP_TYPE
---> 11 from .apex_amp import convert_to_apex_amp
12 from .naive_amp import convert_to_naive_amp
13 from .torch_amp import convert_to_torch_amp
File ~/.local/lib/python3.11/site-packages/colossalai/legacy/amp/apex_amp/init.py:4
1 import torch.nn as nn
2 from torch.optim import Optimizer
----> 4 from .apex_amp import ApexAMPOptimizer
7 def convert_to_apex_amp(model: nn.Module, optimizer: Optimizer, amp_config):
8 r"""A helper function to wrap training components with Apex AMP modules
9
10 Args:
(...)
34 More details about amp_config refer to amp_config <[https://nvidia.github.io/apex/amp.html?highlight=apex%20amp>_.](https://nvidia.github.io/apex/amp.html?highlight=apex%20amp%3E`_.%3C/span%3E)
35 """
File ~/.local/lib/python3.11/site-packages/colossalai/legacy/amp/apex_amp/apex_amp.py:14
11 from torch import Tensor
13 from colossalai.interface import OptimizerWrapper
---> 14 from colossalai.legacy.utils import clip_grad_norm_fp32
17 class ApexAMPOptimizer(OptimizerWrapper):
18 """A wrapper class for APEX optimizer and it implements apex-specific backward and clip_grad_norm
19 methods
20 """
File ~/.local/lib/python3.11/site-packages/colossalai/legacy/utils/init.py:1
----> 1 from .checkpointing import load_checkpoint, save_checkpoint
2 from .common import (
3 clip_grad_norm_fp32,
4 copy_tensor_parallel_attributes,
(...)
16 sync_model_param,
17 )
18 from .data_sampler import DataParallelSampler, get_dataloader
File ~/.local/lib/python3.11/site-packages/colossalai/legacy/utils/common.py:16
14 from colossalai.legacy.core import global_context as gpc
15 from colossalai.legacy.global_variables import tensor_parallel_env as env
---> 16 from colossalai.legacy.tensor import ProcessGroup
17 from colossalai.tensor import ColoParameter
18 from colossalai.utils.multi_tensor_apply import multi_tensor_applier
File ~/.local/lib/python3.11/site-packages/colossalai/legacy/tensor/init.py:6
4 from .distspec import ReplicaSpec, ShardSpec
5 from .process_group import ProcessGroup
----> 6 from .tensor_spec import ColoTensorSpec
8 all = [
9 "ComputePattern",
10 "ComputeSpec",
(...)
16 "ReplicaSpec",
17 ]
File ~/.local/lib/python3.11/site-packages/colossalai/legacy/tensor/tensor_spec.py:10
5 from colossalai.legacy.tensor.process_group import ProcessGroup
7 from .compute_spec import ComputeSpec
---> 10 @DataClass
11 class ColoTensorSpec:
12 """ColoTensorSpec
13
14 A data class for specifications of the ColoTensor.
15 It contains attributes of ProcessGroup, _DistSpec, ComputeSpec.
16 The latter two attributes are optional. If not set, they are default value is Replicate() and None.
17 """
19 pg: ProcessGroup
File /usr/local/lib/python3.11/dataclasses.py:1230, in dataclass(cls, init, repr, eq, order, unsafe_hash, frozen, match_args, kw_only, slots, weakref_slot)
1227 return wrap
1229 # We're called as @DataClass without parens.
-> 1230 return wrap(cls)
File /usr/local/lib/python3.11/dataclasses.py:958, in _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, match_args, kw_only, slots, weakref_slot)
955 kw_only = True
956 else:
957 # Otherwise it's a field of some type.
--> 958 cls_fields.append(_get_field(cls, name, type, kw_only))
960 for f in cls_fields:
961 fields[f.name] = f
File /usr/local/lib/python3.11/dataclasses.py:815, in _get_field(cls, a_name, a_type, default_kw_only)
811 # For real fields, disallow mutable defaults. Use unhashable as a proxy
812 # indicator for mutability. Read the hash attribute from the class,
813 # not the instance.
814 if f._field_type is _FIELD and f.default.class.hash is None:
--> 815 raise ValueError(f'mutable default {type(f.default)} for field '
816 f'{f.name} is not allowed: use default_factory')
818 return f
ValueError: mutable default <class 'colossalai.legacy.tensor.distspec._DistSpec'> for field dist_attr is not allowed: use default_factory
馃悰 Describe the bug
ValueError Traceback (most recent call last)
Cell In[2], line 1
----> 1 from colossalai.booster import Booster
File ~/.local/lib/python3.11/site-packages/colossalai/booster/init.py:2
1 from .accelerator import Accelerator
----> 2 from .booster import Booster
3 from .plugin import Plugin
File ~/.local/lib/python3.11/site-packages/colossalai/booster/booster.py:17
15 from .accelerator import Accelerator
16 from .mixed_precision import MixedPrecision, mixed_precision_factory
---> 17 from .plugin import Plugin
18 from .plugin.pp_plugin_base import PipelinePluginBase
20 all = ["Booster"]
File ~/.local/lib/python3.11/site-packages/colossalai/booster/plugin/init.py:1
----> 1 from .gemini_plugin import GeminiPlugin
2 from .hybrid_parallel_plugin import HybridParallelPlugin
3 from .low_level_zero_plugin import LowLevelZeroPlugin
File ~/.local/lib/python3.11/site-packages/colossalai/booster/plugin/gemini_plugin.py:31
29 from colossalai.interface import ModelWrapper, OptimizerWrapper
30 from colossalai.shardformer import ShardConfig, ShardFormer
---> 31 from colossalai.zero import GeminiDDP, GeminiOptimizer
32 from colossalai.zero.gemini.memory_tracer import MemStats
34 from .dp_plugin_base import DPPluginBase
File ~/.local/lib/python3.11/site-packages/colossalai/zero/init.py:1
----> 1 from .gemini import GeminiAdamOptimizer, GeminiDDP, GeminiOptimizer, get_static_torch_model
2 from .low_level import LowLevelZeroOptimizer
3 from .wrapper import zero_model_wrapper, zero_optim_wrapper
File ~/.local/lib/python3.11/site-packages/colossalai/zero/gemini/init.py:2
1 from .chunk import ChunkManager, TensorInfo, TensorState, search_chunk_configuration
----> 2 from .gemini_ddp import GeminiDDP
3 from .gemini_mgr import GeminiManager
4 from .gemini_optimizer import GeminiAdamOptimizer, GeminiOptimizer
File ~/.local/lib/python3.11/site-packages/colossalai/zero/gemini/gemini_ddp.py:34
31 from colossalai.utils import _cast_float, free_storage, is_ddp_ignored
33 from .chunk import Chunk, ChunkManager, TensorState, init_chunk_manager
---> 34 from .gemini_hook import GeminiZeROHook
35 from .gemini_mgr import GeminiManager
36 from .memory_tracer import MemStats, OrderedParamGenerator
File ~/.local/lib/python3.11/site-packages/colossalai/zero/gemini/gemini_hook.py:11
9 from colossalai.utils import is_ddp_ignored
10 from colossalai.zero.gemini import TensorState
---> 11 from colossalai.zero.gemini.gemini_mgr import GeminiManager
14 class TrainingPhase(Enum):
15 FORWARD = 0
File ~/.local/lib/python3.11/site-packages/colossalai/zero/gemini/gemini_mgr.py:9
7 from .chunk import Chunk, ChunkManager
8 from .memory_tracer import ChunkMemStatsCollector, MemStats
----> 9 from .placement_policy import PlacementPolicyFactory
12 class GeminiManager:
13 """
14 Stateful Tensor Manager, inspired from PatrickStar
15
(...)
24 memstats (MemStats, optional): a mem stats collected by a runtime mem tracer. if None then GeminiManager will collect it during a warmup iteration.
25 """
File ~/.local/lib/python3.11/site-packages/colossalai/zero/gemini/placement_policy.py:10
7 import torch
9 from colossalai.accelerator import get_accelerator
---> 10 from colossalai.legacy.utils.memory import colo_device_memory_capacity
11 from colossalai.zero.gemini.chunk import Chunk
13 from .chunk import Chunk, ChunkManager
File ~/.local/lib/python3.11/site-packages/colossalai/legacy/init.py:1
----> 1 from .initialize import (
2 get_default_parser,
3 initialize,
4 launch,
5 launch_from_openmpi,
6 launch_from_slurm,
7 launch_from_torch,
8 )
10 all = [
11 "launch",
12 "launch_from_openmpi",
(...)
16 "get_default_parser",
17 ]
File ~/.local/lib/python3.11/site-packages/colossalai/legacy/initialize.py:21
19 from colossalai.context import Config, ConfigException
20 from colossalai.interface import OptimizerWrapper
---> 21 from colossalai.legacy.amp import AMP_TYPE, convert_to_amp
22 from colossalai.legacy.amp.naive_amp import NaiveAMPModel
23 from colossalai.legacy.builder.builder import build_gradient_handler
File ~/.local/lib/python3.11/site-packages/colossalai/legacy/amp/init.py:11
8 from colossalai.context import Config
10 from .amp_type import AMP_TYPE
---> 11 from .apex_amp import convert_to_apex_amp
12 from .naive_amp import convert_to_naive_amp
13 from .torch_amp import convert_to_torch_amp
File ~/.local/lib/python3.11/site-packages/colossalai/legacy/amp/apex_amp/init.py:4
1 import torch.nn as nn
2 from torch.optim import Optimizer
----> 4 from .apex_amp import ApexAMPOptimizer
7 def convert_to_apex_amp(model: nn.Module, optimizer: Optimizer, amp_config):
8 r"""A helper function to wrap training components with Apex AMP modules
9
10 Args:
(...)
34 More details about
amp_config
refer toamp_config <[https://nvidia.github.io/apex/amp.html?highlight=apex%20amp>
_.](https://nvidia.github.io/apex/amp.html?highlight=apex%20amp%3E`_.%3C/span%3E)35 """
File ~/.local/lib/python3.11/site-packages/colossalai/legacy/amp/apex_amp/apex_amp.py:14
11 from torch import Tensor
13 from colossalai.interface import OptimizerWrapper
---> 14 from colossalai.legacy.utils import clip_grad_norm_fp32
17 class ApexAMPOptimizer(OptimizerWrapper):
18 """A wrapper class for APEX optimizer and it implements apex-specific backward and clip_grad_norm
19 methods
20 """
File ~/.local/lib/python3.11/site-packages/colossalai/legacy/utils/init.py:1
----> 1 from .checkpointing import load_checkpoint, save_checkpoint
2 from .common import (
3 clip_grad_norm_fp32,
4 copy_tensor_parallel_attributes,
(...)
16 sync_model_param,
17 )
18 from .data_sampler import DataParallelSampler, get_dataloader
File ~/.local/lib/python3.11/site-packages/colossalai/legacy/utils/checkpointing.py:16
13 except ImportError:
14 _EXTRA_STATE_KEY_SUFFIX = "_extra_state"
---> 16 from .common import is_using_pp
18 all = ["save_checkpoint", "load_checkpoint"]
21 def broadcast_state_dict(state_dict, parallel_mode):
File ~/.local/lib/python3.11/site-packages/colossalai/legacy/utils/common.py:16
14 from colossalai.legacy.core import global_context as gpc
15 from colossalai.legacy.global_variables import tensor_parallel_env as env
---> 16 from colossalai.legacy.tensor import ProcessGroup
17 from colossalai.tensor import ColoParameter
18 from colossalai.utils.multi_tensor_apply import multi_tensor_applier
File ~/.local/lib/python3.11/site-packages/colossalai/legacy/tensor/init.py:6
4 from .distspec import ReplicaSpec, ShardSpec
5 from .process_group import ProcessGroup
----> 6 from .tensor_spec import ColoTensorSpec
8 all = [
9 "ComputePattern",
10 "ComputeSpec",
(...)
16 "ReplicaSpec",
17 ]
File ~/.local/lib/python3.11/site-packages/colossalai/legacy/tensor/tensor_spec.py:10
5 from colossalai.legacy.tensor.process_group import ProcessGroup
7 from .compute_spec import ComputeSpec
---> 10 @DataClass
11 class ColoTensorSpec:
12 """ColoTensorSpec
13
14 A data class for specifications of the
ColoTensor
.15 It contains attributes of
ProcessGroup
,_DistSpec
,ComputeSpec
.16 The latter two attributes are optional. If not set, they are default value is
Replicate()
andNone
.17 """
19 pg: ProcessGroup
File /usr/local/lib/python3.11/dataclasses.py:1230, in dataclass(cls, init, repr, eq, order, unsafe_hash, frozen, match_args, kw_only, slots, weakref_slot)
1227 return wrap
1229 # We're called as @DataClass without parens.
-> 1230 return wrap(cls)
File /usr/local/lib/python3.11/dataclasses.py:1220, in dataclass..wrap(cls)
1219 def wrap(cls):
-> 1220 return _process_class(cls, init, repr, eq, order, unsafe_hash,
1221 frozen, match_args, kw_only, slots,
1222 weakref_slot)
File /usr/local/lib/python3.11/dataclasses.py:958, in _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, match_args, kw_only, slots, weakref_slot)
955 kw_only = True
956 else:
957 # Otherwise it's a field of some type.
--> 958 cls_fields.append(_get_field(cls, name, type, kw_only))
960 for f in cls_fields:
961 fields[f.name] = f
File /usr/local/lib/python3.11/dataclasses.py:815, in _get_field(cls, a_name, a_type, default_kw_only)
811 # For real fields, disallow mutable defaults. Use unhashable as a proxy
812 # indicator for mutability. Read the hash attribute from the class,
813 # not the instance.
814 if f._field_type is _FIELD and f.default.class.hash is None:
--> 815 raise ValueError(f'mutable default {type(f.default)} for field '
816 f'{f.name} is not allowed: use default_factory')
818 return f
ValueError: mutable default <class 'colossalai.legacy.tensor.distspec._DistSpec'> for field dist_attr is not allowed: use default_factory
Environment
python3.11锛宑uda12.2锛宯vidia-cudnn-cu12==8.9.2.2锛宯vidia-nccl-cu12==2.18.1; colossalai 0.3.6, PyTorch 2.1.1
The text was updated successfully, but these errors were encountered: