Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tensorflow version 2.16.1 has retracing problem for keras.model.train_on_batch(). #67033

Open
garryyan2 opened this issue May 6, 2024 · 4 comments
Assignees
Labels

Comments

@garryyan2
Copy link

Issue type

Bug

Have you reproduced the bug with TensorFlow Nightly?

No

Source

binary

TensorFlow version

2,16,1

Custom code

No

OS platform and distribution

windows 10

Mobile device

No response

Python version

3.11

Bazel version

No response

GCC/compiler version

No response

CUDA/cuDNN version

No response

GPU model and memory

No response

Current behavior?

The problem happens in both windows and WSL (Ubuntu )operating system. Tensorflow 2.15.1 and earlier versions don't have the problem. The warning messages will be given after the train_on_batch() is called 5 and 6 times. The function is about 100 times slower in Tensorflow 2.16.1 in comparison to earlier versions.

Standalone code to reproduce the issue

import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
import numpy as np
import tensorflow as tf
import tensorboard
import time
from tensorflow import keras
from datetime import datetime
from tensorflow.keras.utils import plot_model
from tensorflow.keras.layers import Dense
import random



class trn_sizes:

    
    fm_length = 256
    fm_step = 128
    LSTMCell = 20
    #features = 14
    layerOne = 20
    layerTwo = 16
    layerThree = 8
    cepsTone = 12
    timeStep = 1   # is 1 only for now
    batches = 250 
    batch_samples = batches*timeStep*fm_step
    noise_insert_time = 2 # second
    noise_chi2_df = 10
    sampling_rate = 16000
    time_per_batch = batches*timeStep*fm_step/sampling_rate   # 2s for 250 batch size
    noise_chi2_scale = noise_insert_time*sampling_rate/noise_chi2_df    
       

trn_sizes = trn_sizes()

class inputBatch(object, ):

    
    def __init__(self, trn_sizes):
        self.state = 0 # 
        self.iirPinkSt = [0.0, 0.0, 0.0]
        self.pinkCount = 0
        self.pinkLeft = 0
        self.pinkBlkCnt = 0
        self.noisePower = 0.02
        self.iirPinkB1 = [0.010000000000000,  -0.018940429700000,   0.009585641562817,  -0.000621320035262]
        self.iirPinkA1 = [1.000000000000000,  -2.479309080000000 ,  1.985012853639686,  -0.505600430025288]
        self.remainingSamples = 0
        self.labelIdx = 0      # index based on the batch 
        self.samplesToBeFill = trn_sizes.batch_samples
        self.vadSampleIdx = 0  # index based on the audio file
        self.vadLabel = np.zeros(trn_sizes.batches)
        self.samplesOut = np.zeros(trn_sizes.batch_samples, dtype=float)
        self.indexa = 0

    
    def getBatch4(self, trn_sizes):  
        
        samples = np.random.normal(0, 0.1, (trn_sizes.batches-1)*trn_sizes.cepsTone) 
        mfccs = tf.convert_to_tensor(samples, dtype=tf.float32)
        mfccs3 = tf.reshape(mfccs, [trn_sizes.batches-1, 1, trn_sizes.cepsTone], )
        vadLabel3 = np.random.randint(2, size =(249, 1), dtype=np.int32)   
        vadLabelTf = tf.convert_to_tensor(vadLabel3, dtype=tf.float32)          
        return {mfccs3.ref(), vadLabelTf.ref()}  
    

def create_model(trn_sizes):
    
    inputs = keras.Input(shape=(1,trn_sizes.cepsTone), name="digits")
    x0 = keras.layers.Dense(trn_sizes.layerOne, activation="relu")(inputs)
    y1 = keras.layers.GRU(trn_sizes.LSTMCell)(x0)
    x1 = keras.layers.Dense(trn_sizes.layerTwo, activation="relu")(y1)
    x2 = keras.layers.Dense(trn_sizes.layerThree, activation="relu")(x1)
    outputs = keras.layers.Dense(1, activation='sigmoid', name="predictions")(x2)
    model = keras.Model(inputs=inputs, outputs=outputs)

    return model    
    

FLAGS = None

def main(FLAGS):

    print('tensorflow version: ' +  tf.__version__)
    print(tf.version.GIT_VERSION, tf.version.VERSION)
    dirName = os.path.dirname(os.path.abspath(__file__)) +'/MSLB0'
                    
    random.seed(11)
 
    vad = inputBatch(trn_sizes)  
    vadModel = create_model(trn_sizes)
    vadModel.compile(loss='binary_crossentropy',
    optimizer=keras.optimizers.Adam(learning_rate=1e-4), 
    metrics=['accuracy'])
  
    vadModel.build(input_shape = [None, 10, trn_sizes.cepsTone])
      
    
    print('model length = ', len(vadModel.weights))
    
    print(vadModel.summary())    
     
    batch_size=trn_sizes.batches;
    
    tensorboard = keras.callbacks.TensorBoard(
      #log_dir='/tmp/my_tf_logs',
      log_dir= "logs\\scalars\\" + datetime.now().strftime("%Y%m%d-%H%M%S"),
      histogram_freq=0,    
      write_graph=True,
      update_freq="batch"
    )
    tensorboard.set_model(vadModel)   
    
    start_time1 = time.perf_counter()
 
         
    for i in range(10):  
        mfccs3, vadLabel = vad.getBatch4(trn_sizes)
        if i % 10 == 0:
            print('mfccs3 value = ', tf.keras.backend.eval(mfccs3.deref()[1,:]) ) 
            print('vadLabel value = ', tf.keras.backend.eval(vadLabel.deref()[0:3]) )  
        
        logs = vadModel.train_on_batch(mfccs3.deref(), vadLabel.deref())
               
        
        if i %10 == 0:
            print('string logs = ', str(logs))
    end_time1 = time.perf_counter()
    total_duration1 = end_time1 - start_time1
    print( f"total_training time is { total_duration1: .2f}s ")
        
    print("training is done!")
        

main(FLAGS)

Relevant log output

WARNING:tensorflow:5 out of the last 5 calls to <function TensorFlowTrainer.make_train_function.<locals>.one_step_on_iterator at 0x0000023C2F73E020> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.
@SuryanarayanaY
Copy link
Collaborator

Hi @garryyan2 ,

The warning will arise when we call a function multiple times (for eg in a loop) that is decorated with tf.function decorator. Since retracing is computationally expensive this will affect performance but the model will not have any issue wrt its metrics or predictions except time complexity increases.

You can check the info in warning and take necessary steps. Also you can pass reduce_retracing=True to tf.function as argument.

However provided code snippet I can't see any tf.function decorator being used. I can't reproduce the issue either due to missing dependencies as per gist.

@SuryanarayanaY SuryanarayanaY added the stat:awaiting response Status - Awaiting response from author label May 7, 2024
@garryyan2
Copy link
Author

garryyan2 commented May 7, 2024

Hi @SuryanarayanaY

Thank you very much for looking into the problem. I am sorry that the code will not run. Now, I have tried out the updated code in colab as in here. It should run.

The code runs without any issues. The reason is that colab is still in version 2.15.1. The problem is not the warning in Tensorflow version 2.16.1. The problem is the code runs about 100 times slower in the new version. I want to used the new version because that is the version that I am able to have both CUDA/cuDNN and tensorflow installed in order to run the code in GPU.

@google-ml-butler google-ml-butler bot removed the stat:awaiting response Status - Awaiting response from author label May 7, 2024
@SuryanarayanaY
Copy link
Collaborator

Hi @garryyan2 ,

I can't access the attached colab of your's. Please submit the same.

Please confirm you have installed tensorflow[and-cuda] package that can install compatible nvidia libraries and ensure GPUs are detectable by testing with tf.config.list_physical_devices('GPU') .

@SuryanarayanaY SuryanarayanaY added type:performance Performance Issue stat:awaiting response Status - Awaiting response from author labels May 8, 2024
@garryyan2
Copy link
Author

Hi @SuryanarayanaY

Here is the code:

import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
import numpy as np
import tensorflow as tf
import tensorboard
import time
from tensorflow import keras
from datetime import datetime
from tensorflow.keras.utils import plot_model
from tensorflow.keras.layers import Dense
import random

class trn_sizes:

fm_length = 256
fm_step = 128
LSTMCell = 20
#features = 14
layerOne = 20
layerTwo = 16
layerThree = 8
cepsTone = 12
timeStep = 1   # is 1 only for now
batches = 250
batch_samples = batches*timeStep*fm_step
noise_insert_time = 2 # second
noise_chi2_df = 10
sampling_rate = 16000
time_per_batch = batches*timeStep*fm_step/sampling_rate   # 2s for 250 batch size
noise_chi2_scale = noise_insert_time*sampling_rate/noise_chi2_df

trn_sizes = trn_sizes()

class inputBatch(object, ):

def __init__(self, trn_sizes):
    self.state = 0 #
    self.iirPinkSt = [0.0, 0.0, 0.0]
    self.pinkCount = 0
    self.pinkLeft = 0
    self.pinkBlkCnt = 0
    self.noisePower = 0.02
    self.iirPinkB1 = [0.010000000000000,  -0.018940429700000,   0.009585641562817,  -0.000621320035262]
    self.iirPinkA1 = [1.000000000000000,  -2.479309080000000 ,  1.985012853639686,  -0.505600430025288]
    self.remainingSamples = 0
    self.labelIdx = 0      # index based on the batch
    self.samplesToBeFill = trn_sizes.batch_samples
    self.vadSampleIdx = 0  # index based on the audio file
    self.vadLabel = np.zeros(trn_sizes.batches)
    self.samplesOut = np.zeros(trn_sizes.batch_samples, dtype=float)
    self.indexa = 0


def getBatch4(self, trn_sizes):

    samples = np.random.normal(0, 0.1, (trn_sizes.batches-1)*trn_sizes.cepsTone)
    mfccs = tf.convert_to_tensor(samples, dtype=tf.float32)
    mfccs3 = tf.reshape(mfccs, [trn_sizes.batches-1, 1, trn_sizes.cepsTone], )
    vadLabel3 = np.random.randint(2, size =(249, 1), dtype=np.int32)
    vadLabelTf = tf.convert_to_tensor(vadLabel3, dtype=tf.float32)
    return {mfccs3.ref(), vadLabelTf.ref()}

def create_model(trn_sizes):

inputs = keras.Input(shape=(1,trn_sizes.cepsTone), name="digits")
x0 = keras.layers.Dense(trn_sizes.layerOne, activation="relu")(inputs)
y1 = keras.layers.GRU(trn_sizes.LSTMCell)(x0)
x1 = keras.layers.Dense(trn_sizes.layerTwo, activation="relu")(y1)
x2 = keras.layers.Dense(trn_sizes.layerThree, activation="relu")(x1)
outputs = keras.layers.Dense(1, activation='sigmoid', name="predictions")(x2)
model = keras.Model(inputs=inputs, outputs=outputs)

return model

FLAGS = None

def main(FLAGS):

print('tensorflow version: ' +  tf.__version__)
print(tf.version.GIT_VERSION, tf.version.VERSION)

random.seed(11)

vad = inputBatch(trn_sizes)
vadModel = create_model(trn_sizes)
vadModel.compile(loss='binary_crossentropy',
optimizer=keras.optimizers.Adam(learning_rate=1e-4),
metrics=['accuracy'])

vadModel.build(input_shape = [None, 10, trn_sizes.cepsTone])


print('model length = ', len(vadModel.weights))

print(vadModel.summary())

batch_size=trn_sizes.batches;

start_time1 = time.perf_counter()


for i in range(10):
    mfccs3, vadLabel = vad.getBatch4(trn_sizes)
    if i % 10 == 0:
        print('mfccs3 value = ', tf.keras.backend.eval(mfccs3.deref()[1,:]) )
        print('vadLabel value = ', tf.keras.backend.eval(vadLabel.deref()[0:3]) )

    logs = vadModel.train_on_batch(mfccs3.deref(), vadLabel.deref())


    if i %10 == 0:
        print('string logs = ', str(logs))
end_time1 = time.perf_counter()
total_duration1 = end_time1 - start_time1
print( f"total_training time is { total_duration1: .2f}s ")

print("training is done!")

main(FLAGS)

Yes, tensorflow is installed with [and-cuda]. The GPU is detected.

Thank you for your help.

@google-ml-butler google-ml-butler bot removed the stat:awaiting response Status - Awaiting response from author label May 8, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

No branches or pull requests

2 participants