关于重复函数的梯度

import tensorflow as tf import numpy as np ACTION_DIM = 1 # random input x = tf.Variable(np.random.rand(1, 5)) # [b branches, state_dim] depth = 3 b = 4 v_list, a_list = [], [] # value and action store # make value estimates 3 steps into the future by predicting intermediate states for i in range(depth): reuse = True if i > 0 else False x = tf.tile(x, [b, 1]) # copy the state to be used for b different actions mu = tf.layers.dense(x, ACTION_DIM, name='mu', reuse=reuse) action_distribution = tf.distributions.Normal(loc=mu, scale=tf.ones_like(mu)) a = tf.reshape(action_distribution.sample(1), [-1, ACTION_DIM]) x_a = tf.concat([x, a], axis=1) # concatenate action and state x = tf.layers.dense(x_a, x.shape[-1], name='transition', reuse=reuse) # next state s' v = tf.layers.dense(x, 1, name='value', reuse=reuse) # value of s' v_list.append(tf.reshape(v, [-1, b ** i])) a_list.append(tf.reshape(a, [-1, b ** i])) # backup our sum of max values along trajectory sum_v = [None]*depth sum_v[-1] = v_list[-1] for i in reversed(range(depth)): max_v_i = tf.reduce_max(v_list[i], axis=1) if i > 0: sum_v[i-1] = tf.reduce_max(v_list[i-1], axis=1) + max_v_i max_idx = tf.reshape(tf.argmax(sum_v[0]), [-1, 1]) v = tf.gather_nd(v_list[0], max_idx) a = tf.gather_nd(a_list[0], max_idx) loss = -tf.reduce_sum(a) opt = tf.train.AdamOptimizer() grads = opt.compute_gradients(loss)

1条回答

网友

1楼 · 发布于 2024-04-25 15:05:35

我相信这个问题源于您在定义col_idx时的arg_max调用。Arg_max是一个位置参数，因此没有梯度。这是有意义的，因为最大值在列表中的位置不会随着最大值的变化而变化。你知道吗

我也不相信对tf.contrib.distributions.Normal的调用会对它的输入变量有导数，只是因为它在contrib中。如果在修复arg_max之后问题仍然存在，也许您可以尝试使用默认的tensorflow。你知道吗

相关问题更多 >

编程相关推荐

热门问题

热门文章