7.2.5 循环
译者:Python 文档协作翻译小组,原文:Loop。
本文以 CC BY-NC-SA 4.0 协议发布,转载请保留作者署名和文章出处。
Python 文档协作翻译小组人手紧缺,有兴趣的朋友可以加入我们,完全公益性质。交流群:467338606。
Scan
- 重复的一般形式,可用于循环。
-
Reduction和map(在前面的维度上循环)是
scan
的特殊情况。 - 你沿着某个输入序列
scan
一个函数,在每个时间步骤产生输出。 - 这个函数可以看到你的函数的以前的K个时间步长。
-
sum()
可以通过在一个列表上scan z + x(i)函数计算得到,其中初始状态为z=0。 - 通常,for循环可以表示为
scan()
操作,scan
是Theano对循环最接近的实现。 - 使用
scan
比使用for循环的优点:- 作为符号图一部分的迭代次数。
- 最小化GPU传输(如果涉及GPU)。
- 通过连续步骤计算梯度。
- 使用编译的Theano函数比在Python中使用for循环稍快。
- 可以通过检测所需的实际内存量来降低总体内存使用量。
完整的文档可以在库中找到:Scan。
Scan示例:按元素计算tanh(x(t).dot(W) + b)
import theano
import theano.tensor as T
import numpy as np
# defining the tensor variables
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym), sequences=X)
compute_elementwise = theano.function(inputs=[X, W, b_sym], outputs=results)
# test values
x = np.eye(2, dtype=theano.config.floatX)
w = np.ones((2, 2), dtype=theano.config.floatX)
b = np.ones((2), dtype=theano.config.floatX)
b[1] = 2
print(compute_elementwise(x, w, b))
# comparison with numpy
print(np.tanh(x.dot(w) + b))
[[ 0.96402758 0.99505475]
[ 0.96402758 0.99505475]]
[[ 0.96402758 0.99505475]
[ 0.96402758 0.99505475]]
Scan示例:计算序列x(t) = tanh(x(t - 1).dot(W) + y(t).dot(U) + p(T - t).dot(V))
import theano
import theano.tensor as T
import numpy as np
# define tensor variables
X = T.vector("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
U = T.matrix("U")
Y = T.matrix("Y")
V = T.matrix("V")
P = T.matrix("P")
results, updates = theano.scan(lambda y, p, x_tm1: T.tanh(T.dot(x_tm1, W) + T.dot(y, U) + T.dot(p, V)),
sequences=[Y, P[::-1]], outputs_info=[X])
compute_seq = theano.function(inputs=[X, W, Y, U, P, V], outputs=results)
# test values
x = np.zeros((2), dtype=theano.config.floatX)
x[1] = 1
w = np.ones((2, 2), dtype=theano.config.floatX)
y = np.ones((5, 2), dtype=theano.config.floatX)
y[0, :] = -3
u = np.ones((2, 2), dtype=theano.config.floatX)
p = np.ones((5, 2), dtype=theano.config.floatX)
p[0, :] = 3
v = np.ones((2, 2), dtype=theano.config.floatX)
print(compute_seq(x, w, y, u, p, v))
# comparison with numpy
x_res = np.zeros((5, 2), dtype=theano.config.floatX)
x_res[0] = np.tanh(x.dot(w) + y[0].dot(u) + p[4].dot(v))
for i in range(1, 5):
x_res[i] = np.tanh(x_res[i - 1].dot(w) + y[i].dot(u) + p[4-i].dot(v))
print(x_res)
[[-0.99505475 -0.99505475]
[ 0.96471973 0.96471973]
[ 0.99998585 0.99998585]
[ 0.99998771 0.99998771]
[ 1\. 1\. ]]
[[-0.99505475 -0.99505475]
[ 0.96471973 0.96471973]
[ 0.99998585 0.99998585]
[ 0.99998771 0.99998771]
[ 1\. 1\. ]]
Scan示例:计算X行的范数
import theano
import theano.tensor as T
import numpy as np
# define tensor variable
X = T.matrix("X")
results, updates = theano.scan(lambda x_i: T.sqrt((x_i ** 2).sum()), sequences=[X])
compute_norm_lines = theano.function(inputs=[X], outputs=results)
# test value
x = np.diag(np.arange(1, 6, dtype=theano.config.floatX), 1)
print(compute_norm_lines(x))
# comparison with numpy
print(np.sqrt((x ** 2).sum(1)))
[ 1\. 2\. 3\. 4\. 5\. 0.]
[ 1\. 2\. 3\. 4\. 5\. 0.]
Scan示例:计算X列的范数
import theano
import theano.tensor as T
import numpy as np
# define tensor variable
X = T.matrix("X")
results, updates = theano.scan(lambda x_i: T.sqrt((x_i ** 2).sum()), sequences=[X.T])
compute_norm_cols = theano.function(inputs=[X], outputs=results)
# test value
x = np.diag(np.arange(1, 6, dtype=theano.config.floatX), 1)
print(compute_norm_cols(x))
# comparison with numpy
print(np.sqrt((x ** 2).sum(0)))
[ 0\. 1\. 2\. 3\. 4\. 5.]
[ 0\. 1\. 2\. 3\. 4\. 5.]
扫描示例:计算X的迹
import theano
import theano.tensor as T
import numpy as np
floatX = "float32"
# define tensor variable
X = T.matrix("X")
results, updates = theano.scan(lambda i, j, t_f: T.cast(X[i, j] + t_f, floatX),
sequences=[T.arange(X.shape[0]), T.arange(X.shape[1])],
outputs_info=np.asarray(0., dtype=floatX))
result = results[-1]
compute_trace = theano.function(inputs=[X], outputs=result)
# test value
x = np.eye(5, dtype=theano.config.floatX)
x[0] = np.arange(5, dtype=theano.config.floatX)
print(compute_trace(x))
# comparison with numpy
print(np.diagonal(x).sum())
4.0
4.0
Scan示例:计算序列x(t) = x(t - 2).dot(U) + x(t - 1).dot(V) + tanh(x(t - 1).dot(W) + b)
import theano
import theano.tensor as T
import numpy as np
# define tensor variables
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
U = T.matrix("U")
V = T.matrix("V")
n_sym = T.iscalar("n_sym")
results, updates = theano.scan(lambda x_tm2, x_tm1: T.dot(x_tm2, U) + T.dot(x_tm1, V) + T.tanh(T.dot(x_tm1, W) + b_sym),
n_steps=n_sym, outputs_info=[dict(initial=X, taps=[-2, -1])])
compute_seq2 = theano.function(inputs=[X, U, V, W, b_sym, n_sym], outputs=results)
# test values
x = np.zeros((2, 2), dtype=theano.config.floatX) # the initial value must be able to return x[-2]
x[1, 1] = 1
w = 0.5 * np.ones((2, 2), dtype=theano.config.floatX)
u = 0.5 * (np.ones((2, 2), dtype=theano.config.floatX) - np.eye(2, dtype=theano.config.floatX))
v = 0.5 * np.ones((2, 2), dtype=theano.config.floatX)
n = 10
b = np.ones((2), dtype=theano.config.floatX)
print(compute_seq2(x, u, v, w, b, n))
# comparison with numpy
x_res = np.zeros((10, 2))
x_res[0] = x[0].dot(u) + x[1].dot(v) + np.tanh(x[1].dot(w) + b)
x_res[1] = x[1].dot(u) + x_res[0].dot(v) + np.tanh(x_res[0].dot(w) + b)
x_res[2] = x_res[0].dot(u) + x_res[1].dot(v) + np.tanh(x_res[1].dot(w) + b)
for i in range(2, 10):
x_res[i] = (x_res[i - 2].dot(u) + x_res[i - 1].dot(v) +
np.tanh(x_res[i - 1].dot(w) + b))
print(x_res)
[[ 1.40514825 1.40514825]
[ 2.88898899 2.38898899]
[ 4.34018291 4.34018291]
[ 6.53463142 6.78463142]
[ 9.82972243 9.82972243]
[ 14.22203814 14.09703814]
[ 20.07439936 20.07439936]
[ 28.12291843 28.18541843]
[ 39.1913681 39.1913681 ]
[ 54.28407732 54.25282732]]
[[ 1.40514825 1.40514825]
[ 2.88898899 2.38898899]
[ 4.34018291 4.34018291]
[ 6.53463142 6.78463142]
[ 9.82972243 9.82972243]
[ 14.22203814 14.09703814]
[ 20.07439936 20.07439936]
[ 28.12291843 28.18541843]
[ 39.1913681 39.1913681 ]
[ 54.28407732 54.25282732]]
Scan示例:计算y = tanh(v.dot(A)) wrt x的Jacobian
import theano
import theano.tensor as T
import numpy as np
# define tensor variables
v = T.vector()
A = T.matrix()
y = T.tanh(T.dot(v, A))
results, updates = theano.scan(lambda i: T.grad(y[i], v), sequences=[T.arange(y.shape[0])])
compute_jac_t = theano.function([A, v], results, allow_input_downcast=True) # shape (d_out, d_in)
# test values
x = np.eye(5, dtype=theano.config.floatX)[0]
w = np.eye(5, 3, dtype=theano.config.floatX)
w[2] = np.ones((3), dtype=theano.config.floatX)
print(compute_jac_t(w, x))
# compare with numpy
print(((1 - np.tanh(x.dot(w)) ** 2) * w).T)
[[ 0.41997434 0\. 0.41997434 0\. 0\. ]
[ 0\. 1\. 1\. 0\. 0\. ]
[ 0\. 0\. 1\. 0\. 0\. ]]
[[ 0.41997434 0\. 0.41997434 0\. 0\. ]
[ 0\. 1\. 1\. 0\. 0\. ]
[ 0\. 0\. 1\. 0\. 0\. ]]
注意,我们需要迭代y
的索引,而不是y
的元素。原因是scan为其内部函数创建一个占位符变量,并且此占位符变量与将替换它的变量不具有相同的依赖关系。
Scan示例:Scan期间累积循环次数
import theano
import theano.tensor as T
import numpy as np
# define shared variables
k = theano.shared(0)
n_sym = T.iscalar("n_sym")
results, updates = theano.scan(lambda:{k:(k + 1)}, n_steps=n_sym)
accumulator = theano.function([n_sym], [], updates=updates, allow_input_downcast=True)
k.get_value()
accumulator(5)
k.get_value()
Scan示例:计算tanh(v.dot(W) + b) * d,其中d是二项式
import theano
import theano.tensor as T
import numpy as np
# define tensor variables
X = T.matrix("X")
W = T.matrix("W")
b_sym = T.vector("b_sym")
# define shared random stream
trng = T.shared_randomstreams.RandomStreams(1234)
d=trng.binomial(size=W[1].shape)
results, updates = theano.scan(lambda v: T.tanh(T.dot(v, W) + b_sym) * d, sequences=X)
compute_with_bnoise = theano.function(inputs=[X, W, b_sym], outputs=results,
updates=updates, allow_input_downcast=True)
x = np.eye(10, 2, dtype=theano.config.floatX)
w = np.ones((2, 2), dtype=theano.config.floatX)
b = np.ones((2), dtype=theano.config.floatX)
print(compute_with_bnoise(x, w, b))
[[ 0.96402758 0\. ]
[ 0\. 0.96402758]
[ 0\. 0\. ]
[ 0.76159416 0.76159416]
[ 0.76159416 0\. ]
[ 0\. 0.76159416]
[ 0\. 0.76159416]
[ 0\. 0.76159416]
[ 0\. 0\. ]
[ 0.76159416 0.76159416]]
注意,如果你想使用一个在scan循环中不会更新的随机变量d
,你应该将此变量作为non_sequences
参数传递。
Scan示例:计算pow(A, k)
import theano
import theano.tensor as T
theano.config.warn.subtensor_merge_bug = False
k = T.iscalar("k")
A = T.vector("A")
def inner_fct(prior_result, B):
return prior_result * B
# Symbolic description of the result
result, updates = theano.scan(fn=inner_fct,
outputs_info=T.ones_like(A),
non_sequences=A, n_steps=k)
# Scan has provided us with A ** 1 through A ** k. Keep only the last
# value. Scan notices this and does not waste memory saving them.
final_result = result[-1]
power = theano.function(inputs=[A, k], outputs=final_result,
updates=updates)
print(power(range(10), 2))
[ 0\. 1\. 4\. 9\. 16\. 25\. 36\. 49\. 64\. 81.]
Scan示例:计算多项式
import numpy
import theano
import theano.tensor as T
theano.config.warn.subtensor_merge_bug = False
coefficients = theano.tensor.vector("coefficients")
x = T.scalar("x")
max_coefficients_supported = 10000
# Generate the components of the polynomial
full_range=theano.tensor.arange(max_coefficients_supported)
components, updates = theano.scan(fn=lambda coeff, power, free_var:
coeff * (free_var ** power),
outputs_info=None,
sequences=[coefficients, full_range],
non_sequences=x)
polynomial = components.sum()
calculate_polynomial = theano.function(inputs=[coefficients, x],
outputs=polynomial)
test_coeff = numpy.asarray([1, 0, 2], dtype=numpy.float32)
print(calculate_polynomial(test_coeff, 3))
19.0
练习
运行这两个示例。
修改并执行多项式示例以通过scan
完成reduction。