1. softmax_cross_entropy_with_logits
方法定义:
def softmax_cross_entropy_with_logits(
_sentinel=None,
labels=None,
logits=None,
dim=-1,
name=None ):
计算方式:
let x = logits, y = labels, The loss is :
y * log(softmax(x))
softmax(x) = exp(xi) / sum(exp(xj))
2. sparse_softmax_cross_entropy_with_logits
方法定义:
def sparse_softmax_cross_entropy_with_logits(
_sentinel=None,
labels=None,
logits=None,
name=None ):
3. 1和2两种方法的对比
softmax_cross_entropy_with_logits和sparse_softmax_cross_entropy_with_logits这两种方法的交叉熵计算方式是相同的,都是以下方法:
-
对神经网络的输出做softmax计算
-
-
计算交叉熵
-
不同之处在于调用时labels参数不相同,softmax_cross_entropy_with_logits采用的是one hot的形式,而sparse_softmax_cross_entropy_with_logits采用的是原始标签形式。
import tensorflow as tf
# 神经网络的输出
logits=tf.constant([[1.0,2.0,3.0],[1.0,2.0,3.0],[1.0,2.0,3.0]])
# 对输出做softmax操作
y=tf.nn.softmax(logits)
# 真实数据标签,one hot形式
y_=tf.constant([[0.0,0.0,1.0],[0.0,0.0,1.0],[0.0,0.0,1.0]])
# 将标签稠密化
dense_y=tf.argmax(y_,1) # dense_y = [2 2 2]
# 采用普通方式计算交叉熵
cross_entropy = -tf.reduce_sum(y_*tf.log(y))
# 使用softmax_cross_entropy_with_logits方法计算交叉熵
cross_entropy2=tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_))
# 使用sparse_softmax_cross_entropy_with_logits方法计算交叉熵
cross_entropy3=tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=dense_y))
with tf.Session() as sess:
softmax=sess.run(y)
c_e = sess.run(cross_entropy)
c_e2 = sess.run(cross_entropy2)
c_e3 = sess.run(cross_entropy3)
print("step1:softmax result=")
print(softmax)
print("y_ = ")
print(sess.run(y_))
print("tf.log(y) = ")
print(sess.run(tf.log(y)))
print("dense_y =")
print(sess.run(dense_y))
print("step2:cross_entropy result=")
print(c_e)
print("Function(softmax_cross_entropy_with_logits) result=")
print(c_e2)
print("Function(sparse_softmax_cross_entropy_with_logits) result=")
print(c_e3)
输出结果:
step1:softmax result=
[[ 0.09003057 0.24472848 0.66524094]
[ 0.09003057 0.24472848 0.66524094]
[ 0.09003057 0.24472848 0.66524094]]
y_ =
[[ 0. 0. 1.]
[ 0. 0. 1.]
[ 0. 0. 1.]]
tf.log(y) =
[[-2.40760589 -1.40760589 -0.40760601]
[-2.40760589 -1.40760589 -0.40760601]
[-2.40760589 -1.40760589 -0.40760598]]
dense_y =
[2 2 2]
step2:cross_entropy result=
1.22282
Function(softmax_cross_entropy_with_logits) result=
1.22282
Function(sparse_softmax_cross_entropy_with_logits) result=
1.22282
4. sigmoid_cross_entropy_with_logits
方法定义:
def sigmoid_cross_entropy_with_logits(
_sentinel=None,
labels=None,
logits=None,
name=None ):
计算方式:
let x = logits, z = labels, The loss is :
z * log(sigmoid(x)) + (1-z) * log(1-sigmoid(x))
sigmoid(x) = 1 / (1 + exp(-x))
5. weighted_sigmoid_cross_entropy_with_logits
方法定义:
def weighted_cross_entropy_with_logits(
targets=None,
logits=None,
pos_weight=None,
name=None ):
计算方法:
let x = logits, z = labels, q = pos_weight, The loss is :
q * z * log(sigmoid(x)) + (1-z) * log(1-sigmoid(x))
sigmoid(x) = 1 / (1 + exp(-x))
6. 4和5的实现对比
值得注意的是,两个方法中,一个参数是labels,另一个对应的参数是targets。
import tensorflow as tf
# 神经网络的输出
logits=tf.constant([[1.0,2.0,3.0],[1.0,2.0,3.0],[1.0,2.0,3.0]])
# 对输出做softmax操作
y=tf.nn.sigmoid(logits)
# 真实数据标签,one hot形式
y_=tf.constant([[0.0,0.0,1.0],[0.0,0.0,1.0],[0.0,0.0,1.0]])
# 将标签稠密化
dense_y=tf.argmax(y_,1)
# 采用普通方式计算交叉熵
cross_entropy = -tf.reduce_sum(y_*tf.log(y)+(1-y_)*tf.log(1-y))
# 使用sigmoid_cross_entropy_with_logits方法计算交叉熵
cross_entropy2 = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=y_))
# 使用weighted_cross_entropy_with_logits方法计算交叉熵
cross_entropy3 = tf.reduce_sum(tf.nn.weighted_cross_entropy_with_logits(logits=logits, targets=y_, pos_weight=2.0))
with tf.Session() as sess:
sigmoid=sess.run(y)
c_e = sess.run(cross_entropy)
c_e2 = sess.run(cross_entropy2)
c_e3 = sess.run(cross_entropy3)
print("step1:sigmoid result=")
print(sigmoid)
print("step2:cross_entropy result=")
print(c_e)
print("Function(sigmoid_cross_entropy_with_logits) result=")
print(c_e2)
print("Function(weighted_cross_entropy_with_logits) result=")
print(c_e3)
输出结果:
step1:sigmoid result=
[[ 0.7310586 0.88079703 0.95257413]
[ 0.7310586 0.88079703 0.95257413]
[ 0.7310586 0.88079703 0.95257413]]
step2:cross_entropy result=
10.4663
Function(sigmoid_cross_entropy_with_logits) result=
10.4663
Function(weighted_cross_entropy_with_logits) result=
10.6121