如何优化这个张量流加权平均定制运算

def weighted_average(inputs, weights): with tf.name_scope("weighted_average", "weighted_average", [inputs, weights]) as scope: in_shape = inputs.get_shape().as_list() w_shape = weights.get_shape().as_list() n_channels = in_shape[3] xs = tf.split(inputs, n_channels, axis=3) pad = (in_shape[1] - w_shape[1]) // 2 kernel_size = pad * 2 + 1 for index in range(n_channels): x = xs[index] x_stack = [] for i in range(kernel_size): for j in range(kernel_size): x_stack.append( x[:, i:x.shape[1] - 2 * pad + i, j:x.shape[2] - 2 * pad + j, :] ) x_stack = tf.concat(x_stack, axis=3) x = tf.reduce_sum(tf.multiply(x_stack, weights), axis=3, keep_dims=True) xs[index] = x return tf.concat(xs, axis=3)

1条回答

网友

1楼 · 发布于 2024-04-23 16:55:05

将tf.device('/cpu:0')强制在CPU中计算op，并使用Eigen lib使其速度更快

我想如果用GPU计算的话，可能和所有的张量变换有关

def weighted_averagex(inputs, weights):
    with tf.name_scope("weighted_average", "weighted_average", [inputs, weights]) as scope:
      with tf.device('/cpu:0'):
        in_shape = inputs.get_shape().as_list()
        w_shape = weights.get_shape().as_list()

        n_channels = in_shape[3]
        xs = tf.split(inputs, n_channels, axis=3)

        pad = (in_shape[1] - w_shape[1]) // 2

        kernel_size = pad * 2 + 1

        for index in range(n_channels):
            x = xs[index]

            x_stack = []
            for i in range(kernel_size):
                for j in range(kernel_size):
                    x_stack.append( x[:, i:x.shape[1] - 2 * pad + i, j:x.shape[2] - 2 * pad + j, :] )

            x_stack = tf.concat(x_stack, axis=3)
            x = tf.reduce_sum(tf.multiply(x_stack, weights), axis=3, keep_dims=True)

            xs[index] = x

      return tf.concat(xs, axis=3)

相关问题更多 >

编程相关推荐

热门问题

热门文章