You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hello, I am trying to make yolox QAT work. Following the tutorial, I first conducted PTQ quantization on the yolox model, which includes CLE and BC operations. After that, I used aimet_torch.quantsim.QuantizationSimModel.model to obtain a quantized model with the quant_op added, and replaced the original yolox.model in the yolox trainer. However, the forward function now returns a tensor, which is different from the tuple returned by the original yolox.model(). Could you please tell me how should I modify my code?
Error:
2024-01-29 05:45:16 | INFO | yolox.core.trainer:344 - Training of experiment is done and the best AP is 0.00
2024-01-29 05:45:16 | ERROR | yolox.core.launch:98 - An error has been caught in function 'launch', process 'MainProcess' (2285414), thread 'MainThread' (139938635503424):
Traceback (most recent call last):
File "/workspace/workdir_trt/YOLOX/tools/train.py", line 146, in <module>
launch(
└ <function launch at 0x7f4406e45dc0>
> File "/workspace/workdir_trt/YOLOX/yolox/core/launch.py", line 98, in launch
main_func(*args)
│ └ (╒═══════════════════╤═══════════════════════════════════════════════════════════════════════════════════════════════════════...
└ <function main at 0x7f4407be6820>
File "/workspace/workdir_trt/YOLOX/tools/train.py", line 123, in main
trainer.train()
│ └ <function Trainer.train at 0x7f4407be6940>
└ <yolox.core.trainer.Trainer object at 0x7f4407bec910>
File "/workspace/workdir_trt/YOLOX/yolox/core/trainer.py", line 193, in train
self.train_in_epoch()
│ └ <function Trainer.train_in_epoch at 0x7f4407be69d0>
└ <yolox.core.trainer.Trainer object at 0x7f4407bec910>
File "/workspace/workdir_trt/YOLOX/yolox/core/trainer.py", line 202, in train_in_epoch
self.train_in_iter()
│ └ <function Trainer.train_in_iter at 0x7f4407be6a60>
└ <yolox.core.trainer.Trainer object at 0x7f4407bec910>
File "/workspace/workdir_trt/YOLOX/yolox/core/trainer.py", line 208, in train_in_iter
self.train_one_iter()
│ └ <function Trainer.train_one_iter at 0x7f4407be6af0>
└ <yolox.core.trainer.Trainer object at 0x7f4407bec910>
File "/workspace/workdir_trt/YOLOX/yolox/core/trainer.py", line 229, in train_one_iter
loss = outputs["total_loss"]
└ tensor([[[8.0200e+00, 9.7085e+00, 1.6745e+01, ..., 7.9569e-03,
2.4659e-03, 5.6030e-03],
[1.2010e+01, 1.41...
IndexError: too many indices for tensor of dimension 3
Here is the code I modified.
in yolox.trainer: added:
def apply_cross_layer_equalization(model: torch.nn.Module, input_shape: tuple):
"""
Applying CLE on the model inplace consists of:
Batch Norm Folding
Cross Layer Scaling
High Bias Fold
Converts any ReLU6 into ReLU.
:param model: the loaded model
:param input_shape: the shape of the input to the model
:return:
"""
equalize_model(model, input_shape)
def apply_bias_correction(model: torch.nn.Module, data_loader):
"""
Applies Bias-Correction on the model.
:param model: The model to quantize
:param evaluator: Evaluator used during quantization
:param dataloader: DataLoader used during quantization
:param logdir: Log directory used for storing log files
:return: None
"""
# Rounding mode can be 'nearest' or 'stochastic'
rounding_mode = 'nearest'
# Number of samples used during quantization
num_quant_samples = 16
# Number of samples used for bias correction
num_bias_correct_samples = 16
params = QuantParams(weight_bw=8, act_bw=8, round_mode=rounding_mode, quant_scheme='tf_enhanced')
# Perform Bias Correction
bias_correction.correct_bias(model.to(device="cuda"), params, num_quant_samples=num_quant_samples,
data_loader=data_loader, num_bias_correct_samples=num_bias_correct_samples)
def forward_pass(decoder, model, data_loader):
"""forward pass for compute encodings"""
#pylint:disable = no-member
tensor_type = torch.cuda.FloatTensor
model = model.eval()
for imgs, _, info_imgs, ids in tqdm(data_loader):
with torch.no_grad():
imgs = imgs.type(tensor_type)
outputs = model(imgs)
if decoder is not None:
outputs = decoder(outputs, dtype=outputs.type())
def calculate_quantsim_accuracy(model: torch.nn.Module, evaluator: aimet_common.defs.EvalFunction, dataloader,
use_cuda: bool = False, logdir: str = '') -> Tuple[torch.nn.Module, float]:
"""
Calculates model accuracy on quantized simulator and returns quantized model with accuracy.
:param model: the loaded model
:param evaluator: the Eval function to use for evaluation
:param iterations: No of batches to use in computing encodings.
Not used in image net dataset
:param num_val_samples_per_class: No of samples to use from every class in
computing encodings. Not used in pascal voc
dataset
:param use_cuda: the cuda device.
:return: a tuple of quantsim and accuracy of model on this quantsim
"""
input_shape = (1, 3, 640, 640)
if use_cuda:
model.to(torch.device('cuda'))
dummy_input = torch.rand(input_shape).cuda()
else:
dummy_input = torch.rand(input_shape)
# apply_cross_layer_equalization(model,input_shape)
# apply_bias_correction(model,dataloader)
# Number of batches to use for computing encodings
# Only 5 batches are used here to speed up the process, also the
# number of images in these 5 batches should be sufficient for
# compute encodings
iterations = 5
net_dataloader = get_data_loader(
dataset_path='/workspace/datasets',
img_size=(640,640),
batch_size=64,
num_workers=4,
)
quantsim = QuantizationSimModel(model=model, quant_scheme='tf_enhanced',
dummy_input=dummy_input, rounding_mode='nearest',
default_output_bw=8, default_param_bw=8, in_place=False)
forward_func = partial(forward_pass, None)
quantsim.compute_encodings(forward_func, forward_pass_callback_args=net_dataloader)
#accuracy = evaluator.evaluate(quantsim.model)
return quantsim, accuracy
modified
def before_train(self):
logger.info("args: {}".format(self.args))
logger.info("exp value:\n{}".format(self.exp))
# model related init
torch.cuda.set_device(self.local_rank)
model = self.exp.get_model()
logger.info(
"Model Summary: {}".format(get_model_info(model, self.exp.test_size))
)
model.to(self.device)
# solver related init
self.optimizer = self.exp.get_optimizer(self.args.batch_size)
# value of epoch will be set in `resume_train`
model = self.resume_train(model)
self.evaluator = self.exp.get_evaluator(
batch_size=self.args.batch_size, is_distributed=self.is_distributed
)
# data related init
self.no_aug = self.start_epoch >= self.max_epoch - self.exp.no_aug_epochs
self.train_loader = self.exp.get_data_loader(
batch_size=self.args.batch_size,
is_distributed=self.is_distributed,
no_aug=self.no_aug,
cache_img=self.args.cache,
)
logger.info("init prefetcher, this might take one minute or less...")
self.prefetcher = DataPrefetcher(self.train_loader)
# max_iter means iters per epoch
self.max_iter = len(self.train_loader)
# logger.info("PTQ complete, map50 accuracy:{}".format(map50))
quantsim, map50 = calculate_quantsim_accuracy(model=model,
evaluator=self.evaluator,
dataloader=self.train_loader,
use_cuda=True,
logdir=''
)
model = quantsim.model
self.lr_scheduler = self.exp.get_lr_scheduler(
self.exp.basic_lr_per_img * self.args.batch_size, self.max_iter
)
if self.args.occupy:
occupy_mem(self.local_rank)
if self.is_distributed:
model = DDP(model, device_ids=[self.local_rank], broadcast_buffers=False)
if self.use_model_ema:
self.ema_model = ModelEMA(model, 0.9998)
self.ema_model.updates = self.max_iter * self.start_epoch
self.model = model
# Tensorboard and Wandb loggers
if self.rank == 0:
if self.args.logger == "tensorboard":
self.tblogger = SummaryWriter(os.path.join(self.file_name, "tensorboard"))
elif self.args.logger == "wandb":
self.wandb_logger = WandbLogger.initialize_wandb_logger(
self.args,
self.exp,
self.evaluator.dataloader.dataset
)
else:
raise ValueError("logger must be either 'tensorboard' or 'wandb'")
logger.info("Training start...")
logger.info("\n{}".format(model))
The text was updated successfully, but these errors were encountered:
Hello, I am trying to make yolox QAT work. Following the tutorial, I first conducted PTQ quantization on the yolox model, which includes CLE and BC operations. After that, I used aimet_torch.quantsim.QuantizationSimModel.model to obtain a quantized model with the quant_op added, and replaced the original yolox.model in the yolox trainer. However, the forward function now returns a tensor, which is different from the tuple returned by the original yolox.model(). Could you please tell me how should I modify my code?
Error:
Here is the code I modified.
in yolox.trainer:
added:
modified
The text was updated successfully, but these errors were encountered: