Computing the gradient of the loss using Tensorflow.js

后端 未结 1 394
有刺的猬
有刺的猬 2021-01-06 18:58

I am trying to compute the gradient of a loss, with relation to a network\'s trainable weights using Tensorflow.js in order to apply these gradients to my network\'s weight.

相关标签:
1条回答
  • 2021-01-06 19:54

    The error says it all. Your issue has to do with tf.variableGrads. loss should be a scalar computed using all available tf tensors operators. loss should not return a tensor as indicated in your question.

    Here is an example of what loss should be:

    const a = tf.variable(tf.tensor1d([3, 4]));
    const b = tf.variable(tf.tensor1d([5, 6]));
    const x = tf.tensor1d([1, 2]);
    
    const f = () => a.mul(x.square()).add(b.mul(x)).sum(); // f is a function
    // df/da = x ^ 2, df/db = x 
    const {value, grads} = tf.variableGrads(f); // gradient of f as respect of each variable
    
    Object.keys(grads).forEach(varName => grads[varName].print());
    

    /!\ Notice that the gradient is calculated as respect of variables created using tf.variable

    Update:

    You're not computing the gradients as it should be. Here is the fix.

    function compute_loss(done, new_state, memory, agent, gamma=0.99) {
        const f = () => { let reward_sum = 0.;
        if(done) {
            reward_sum = 0.;
        } else {
            reward_sum = agent.call(tf.oneHot(new_state, 12).reshape([1, 9, 12]))
                        .values.flatten().get(0);
        }
    
        let discounted_rewards = [];
        let memory_reward_rev = memory.rewards;
        for(let reward of memory_reward_rev.reverse()) {
            reward_sum = reward + gamma * reward_sum;
            discounted_rewards.push(reward_sum);
        }
        discounted_rewards.reverse();
    
        let onehot_states = [];
        for(let state of memory.states) {
            onehot_states.push(tf.oneHot(state, 12));
        }
        let init_onehot = onehot_states[0];
    
        for(let i=1; i<onehot_states.length;i++) {
            init_onehot = init_onehot.concat(onehot_states[i]);
        }
    
        let log_val = agent.call(
            init_onehot.reshape([memory.states.length, 9, 12])
        );
    
        let disc_reward_tensor = tf.tensor(discounted_rewards);
        let advantage = disc_reward_tensor.reshapeAs(log_val.values).sub(log_val.values);
        let value_loss = advantage.square();
        log_val.values.print();
    
        let policy = tf.softmax(log_val.logits);
        let logits_cpy = log_val.logits.clone();
    
        let entropy = policy.mul(logits_cpy.mul(tf.scalar(-1))); 
        entropy = entropy.sum();
    
        let memory_actions = [];
        for(let i=0; i< memory.actions.length; i++) {
            memory_actions.push(new Array(2000).fill(0));
            memory_actions[i][memory.actions[i]] = 1;
        }
        memory_actions = tf.tensor(memory_actions);
        let policy_loss = tf.losses.softmaxCrossEntropy(memory_actions.reshape([memory.actions.length, 2000]), log_val.logits);
    
        let value_loss_copy = value_loss.clone();
        let entropy_mul = (entropy.mul(tf.scalar(0.01))).mul(tf.scalar(-1));
        let total_loss_1 = value_loss_copy.mul(tf.scalar(0.5, dtype='float32'));
    
        let total_loss_2 = total_loss_1.add(policy_loss);
        let total_loss = total_loss_2.add(entropy_mul);
        total_loss.print();
        return total_loss.mean().asScalar();
    }
    
    return tf.variableGrads(f);
    }
    

    Notice that you can quickly run into a memory consumption issue. It will advisable to surround the function differentiated with tf.tidy to dispose of the tensors.

    0 讨论(0)
提交回复
热议问题