I am trying to compute the gradient of a loss, with relation to a network\'s trainable weights using Tensorflow.js in order to apply these gradients to my network\'s weight.
The error says it all.
Your issue has to do with tf.variableGrads. loss
should be a scalar computed using all available tf
tensors operators. loss
should not return a tensor as indicated in your question.
Here is an example of what loss should be:
const a = tf.variable(tf.tensor1d([3, 4]));
const b = tf.variable(tf.tensor1d([5, 6]));
const x = tf.tensor1d([1, 2]);
const f = () => a.mul(x.square()).add(b.mul(x)).sum(); // f is a function
// df/da = x ^ 2, df/db = x
const {value, grads} = tf.variableGrads(f); // gradient of f as respect of each variable
Object.keys(grads).forEach(varName => grads[varName].print());
/!\ Notice that the gradient is calculated as respect of variables created using tf.variable
Update:
You're not computing the gradients as it should be. Here is the fix.
function compute_loss(done, new_state, memory, agent, gamma=0.99) {
const f = () => { let reward_sum = 0.;
if(done) {
reward_sum = 0.;
} else {
reward_sum = agent.call(tf.oneHot(new_state, 12).reshape([1, 9, 12]))
.values.flatten().get(0);
}
let discounted_rewards = [];
let memory_reward_rev = memory.rewards;
for(let reward of memory_reward_rev.reverse()) {
reward_sum = reward + gamma * reward_sum;
discounted_rewards.push(reward_sum);
}
discounted_rewards.reverse();
let onehot_states = [];
for(let state of memory.states) {
onehot_states.push(tf.oneHot(state, 12));
}
let init_onehot = onehot_states[0];
for(let i=1; i
Notice that you can quickly run into a memory consumption issue. It will advisable to surround the function differentiated with tf.tidy
to dispose of the tensors.