|
| 1 | +# encoding:utf-8 |
| 2 | +import tensorflow as tf |
| 3 | + |
| 4 | + |
| 5 | +def linear(input_, output_size, scope=None): |
| 6 | + ''''' |
| 7 | + Linear map: output[k] = sum_i(Matrix[k, i] * args[i] ) + Bias[k] |
| 8 | + Args: |
| 9 | + args: a tensor or a list of 2D, batch x n, Tensors. |
| 10 | + output_size: int, second dimension of W[i]. |
| 11 | + scope: VariableScope for the created subgraph; defaults to "Linear". |
| 12 | + Returns: |
| 13 | + A 2D Tensor with shape [batch x output_size] equal to |
| 14 | + sum_i(args[i] * W[i]), where W[i]s are newly created matrices. |
| 15 | + Raises: |
| 16 | + ValueError: if some of the arguments has unspecified or wrong shape. |
| 17 | + ''' |
| 18 | + |
| 19 | + shape = input_.get_shape().as_list() |
| 20 | + if len(shape) != 2: |
| 21 | + raise ValueError("Linear is expecting 2D arguments: %s" % str(shape)) |
| 22 | + if not shape[1]: |
| 23 | + raise ValueError("Linear expects shape[1] of arguments: %s" % str(shape)) |
| 24 | + input_size = shape[1] |
| 25 | + |
| 26 | + # Now the computation. |
| 27 | + with tf.variable_scope(scope or "SimpleLinear"): |
| 28 | + matrix = tf.get_variable("Matrix", [output_size, input_size], dtype=input_.dtype) |
| 29 | + bias_term = tf.get_variable("Bias", [output_size], dtype=input_.dtype) |
| 30 | + |
| 31 | + return tf.matmul(input_, tf.transpose(matrix)) + bias_term |
| 32 | + |
| 33 | + |
| 34 | +# highway layer that borrowed from https://github.com/carpedm20/lstm-char-cnn-tensorflow |
| 35 | +def highway(input_, size, layer_size=1, bias=-2, f=tf.nn.relu): |
| 36 | + """Highway Network (cf. http://arxiv.org/abs/1505.00387). |
| 37 | +
|
| 38 | + t = sigmoid(Wy + b) |
| 39 | + z = t * g(Wy + b) + (1 - t) * y |
| 40 | + where g is nonlinearity, t is transform gate, and (1 - t) is carry gate. |
| 41 | + """ |
| 42 | + output = input_ |
| 43 | + for idx in xrange(layer_size): |
| 44 | + output = f( |
| 45 | + linear(output, size, scope='output_lin_%d' % idx)) # update |
| 46 | + # add below, and remove scope parameter while calling: |
| 47 | + # from tensorflow.contrib.rnn.python.ops.core_rnn_cell_impl import _linear as linear |
| 48 | + |
| 49 | + transform_gate = tf.sigmoid( |
| 50 | + linear(input_, size, scope='transform_lin_%d' % idx) + bias) # update |
| 51 | + carry_gate = 1. - transform_gate |
| 52 | + |
| 53 | + output = transform_gate * output + carry_gate * input_ |
| 54 | + |
| 55 | + return output |
| 56 | + |
| 57 | + |
| 58 | +class TextCNN(object): |
| 59 | + """ |
| 60 | + A CNN for text classification. |
| 61 | + Uses an embedding layer, followed by a convolutional, max-pooling and softmax layer. |
| 62 | + """ |
| 63 | + |
| 64 | + def __init__( |
| 65 | + self, sequence_length, num_classes, vocab_size, |
| 66 | + embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0): |
| 67 | + # Placeholders for input, output and dropout |
| 68 | + self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x") |
| 69 | + self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") |
| 70 | + self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") |
| 71 | + |
| 72 | + # Keeping track of l2 regularization loss (optional) |
| 73 | + l2_loss = tf.constant(0.0) |
| 74 | + |
| 75 | + # Embedding layer |
| 76 | + with tf.device('/gpu:0'), tf.name_scope("embedding"): |
| 77 | + W = tf.Variable( |
| 78 | + tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), |
| 79 | + name="W") |
| 80 | + self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x) |
| 81 | + self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1) |
| 82 | + |
| 83 | + # Create a convolution + maxpool layer for each filter size |
| 84 | + pooled_outputs = [] |
| 85 | + for filter_size, num_filter in zip(filter_sizes, num_filters): |
| 86 | + with tf.name_scope("conv-maxpool-%s" % filter_size): |
| 87 | + # Convolution Layer |
| 88 | + filter_shape = [filter_size, embedding_size, 1, num_filter] |
| 89 | + W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") |
| 90 | + b = tf.Variable(tf.constant(0.1, shape=[num_filter]), name="b") |
| 91 | + conv = tf.nn.conv2d( |
| 92 | + self.embedded_chars_expanded, |
| 93 | + W, |
| 94 | + strides=[1, 1, 1, 1], |
| 95 | + padding="VALID", |
| 96 | + name="conv") |
| 97 | + # Apply nonlinearity |
| 98 | + h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") |
| 99 | + # Maxpooling over the outputs |
| 100 | + pooled = tf.nn.max_pool( |
| 101 | + h, |
| 102 | + ksize=[1, sequence_length - filter_size + 1, 1, 1], |
| 103 | + strides=[1, 1, 1, 1], |
| 104 | + padding='VALID', |
| 105 | + name="pool") |
| 106 | + pooled_outputs.append(pooled) |
| 107 | + |
| 108 | + # Combine all the pooled features |
| 109 | + num_filters_total = sum(num_filters) |
| 110 | + self.h_pool = tf.concat(pooled_outputs, 3) # 转换参数的顺序 |
| 111 | + self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total]) |
| 112 | + |
| 113 | + # Add highway |
| 114 | + with tf.name_scope("highway"): |
| 115 | + self.h_highway = highway(self.h_pool_flat, self.h_pool_flat.get_shape()[1], 1, 0) |
| 116 | + |
| 117 | + # Add dropout |
| 118 | + with tf.name_scope("dropout"): |
| 119 | + self.h_drop = tf.nn.dropout(self.h_highway, self.dropout_keep_prob) |
| 120 | + |
| 121 | + # Final (unnormalized) scores and predictions |
| 122 | + with tf.name_scope("output"): |
| 123 | + W = tf.Variable(tf.truncated_normal([num_filters_total, num_classes], stddev=0.1), name="W") |
| 124 | + b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b") |
| 125 | + l2_loss += tf.nn.l2_loss(W) |
| 126 | + l2_loss += tf.nn.l2_loss(b) |
| 127 | + self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores") |
| 128 | + self.predictions = tf.argmax(self.scores, 1, name="predictions") |
| 129 | + |
| 130 | + # CalculateMean cross-entropy loss |
| 131 | + with tf.name_scope("loss"): |
| 132 | + losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y) # update |
| 133 | + self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss |
| 134 | + |
| 135 | + # Accuracy |
| 136 | + with tf.name_scope("accuracy"): |
| 137 | + correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) |
| 138 | + self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy") |
0 commit comments