每周一個機器學習小項目004-RNN實現(草稿)
來自專欄 神經網路學習與TensorFlow實踐
1. 閱讀建議
推薦閱讀時間:15min
2. 軟體環境
- python3
- numpy
3. 前置基礎
- 鏈式求導
4. 數據描述
- 生成測試數據
5. 理論部分
5.1 正向傳播過程
循環神經網路的輸入是時序相關的,因此其輸入與輸入可以描述為 、 ,為了保持時序信息,最簡單的RNN函數形式為:
(1.1)
其中x_t的形式為[BATCHSIZE, Features1],h_t的形式為[BatchSize, Features2]。多層rnn網路可以在輸出的基礎上繼續加入RNN函數:
(1.2)
5.2 反向傳播過程
RNN函數反向傳播過程與全鏈接網路類似:
(1.3)
1.3-a稱之為時間反向傳播演算法BPTT,1.3-c為層間傳播。可訓練參數為1.3-bc。實際上傳統的RNN網路與全鏈接網路並無不同。只是添加了時間反向傳播項。
6. 代碼部分
6.1 代碼部分
import numpy as npclass RNNCell(): def __init__(self, insize=12, outsize=6, type="BASIC"): self.outsize = outsize self.insize = insize self.w = np.random.uniform(-0.1, 0.1, [insize+outsize, outsize]) self.b = np.random.uniform(-0.1, 0.1, [outsize]) self.outputs = [] self.inputs = [] self.states = [] def tanh(self, x): epx = np.exp(x) enx = np.exp(-x) return (epx-enx)/(epx+enx) def __call__(self, x, s): self.inputs.append(x) self.inshape = np.shape(x) self.states.append(s) inx = np.concatenate([x, s], axis=1) out = np.dot(inx, self.w) + self.b self.outputs.append(out) out = self.tanh(out) return out, out def assign(self, w, b): self.w = w self.b = b def zero_state(self, batch_size): return np.zeros([batch_size, self.outsize]) def get_error(self, error): self.error = error def d_tanh(self, x): e2x = np.exp(2 * x) return 4 * e2x / (1 + e2x) ** 2 def backward(self): self.back_error = [np.zeros(self.inshape) for itr in range(len(self.outputs))] dw = np.zeros_like(self.w) db = np.zeros([self.outsize]) w1 = self.w[:self.insize, :] w2 = self.w[self.insize:, :] for itrs in range(len(self.outputs)-1, -1, -1): if len(self.error[itrs]) == 0: continue else: err = self.error[itrs] for itr in range(itrs, -1, -1): h1 = self.outputs[itr] h0 = self.states[itr] x = self.inputs[itr] d_fe = self.d_tanh(h1) #print("es", np.shape(err), itr) err = d_fe * err dw[:self.insize, :] += np.dot(x.T, err) dw[self.insize:, :] += np.dot(h0.T, err) db += np.sum(err, axis=0) self.back_error[itr] += np.dot(err, w1.T) #print(np.shape(self.back_error)) err = np.dot(err, w2.T) self.dw = dw self.db = db return dw, db def loss(self, y): self.error = [] for itr in range(len(self.outputs)): self.error.append([]) self.error[-1] = 2 * (self.tanh(self.outputs[-1]) - y) self.error[-2] = 2 * (self.tanh(self.outputs[-2]) - y)class MultiRNNCells(): def __init__(self, rnn_cells): print(rnn_cells) self.cells = rnn_cells self.cont = 0 def __call__(self, x, s): state = [] out = x for idx in range(len(self.cells)): out, st = self.cells[idx](out, s[idx]) state.append(st) self.cont += 1 return out, state def tanh(self, x): epx = np.exp(x) enx = np.exp(-x) return (epx-enx)/(epx+enx) def loss(self, y): self.error = [] for itr in range(self.cont): self.error.append([]) self.error[-1] = 2 * (self.tanh(self.cells[-1].outputs[-1]) - y) self.error[-2] = 2 * (self.tanh(self.cells[-1].outputs[-2]) - y) def backward(self): error = self.error dws = [] for itr in range(len(self.cells)-1, -1, -1): self.cells[itr].get_error(error) self.cells[itr].backward() error = self.cells[itr].back_error dws.append(self.cells[itr].dw) dws.append(self.cells[itr].db) return tuple(dws) def apply_gradient(self, eta=0.1): dws = [] for itr in range(len(self.cells)-1, -1, -1): self.cells[itr].w -= self.cells[itr].dw self.cells[itr].b -= self.cells[itr].db dws.append(self.cells[itr].dw) dws.append(self.cells[itr].db) return tuple(dws)
7. 程序運行
使用TensorFlow作為驗證程序,驗證方法為輸出計算導數
# 搭建多層神經網路batch_size = 1max_time = 10indata = tf.placeholder(dtype=tf.float64, shape=[batch_size, 10, 3])# 兩層RNN網路cell = rnn.MultiRNNCell([rnn.BasicRNNCell(3) for itr in range(2)], state_is_tuple=True)state = cell.zero_state(batch_size, tf.float64)outputs = []states = []# 獲取每一步輸出,與狀態for time_step in range(max_time): (cell_output, state) = cell(indata[:, time_step, :], state) outputs.append(cell_output) states.append(state)y = tf.placeholder(tf.float64, shape=[batch_size, 3])# 定義loss函數loss = tf.square(outputs[-1]-y) + tf.square(outputs[-2]-y)opt = tf.train.GradientDescentOptimizer(1)# 獲取可訓練參數weights = tf.trainable_variables()# 計算梯度grad = opt.compute_gradients(loss, weights)sess = tf.Session()sess.run(tf.global_variables_initializer())# 獲取變數值與梯度w1, b1, w2, b2 = sess.run(weights)dw1, db1, dw2, db2 = sess.run(grad, feed_dict={indata:np.ones([batch_size, 10, 3]), y:np.ones([batch_size, 3])})dw1 = dw1[0]db1 = db1[0]dw2 = dw2[0]db2 = db2[0]rnn1 = RNNCell(3, 3)rnn1.assign(w1, b1)rnn2 = RNNCell(3, 3)rnn2.assign(w2, b2)state = []state.append(rnn1.zero_state(batch_size))state.append(rnn2.zero_state(batch_size))rnn = MultiRNNCells([rnn1, rnn2])indata = np.ones([batch_size, 10, 3])for time_step in range(max_time): (cell_output, state) = rnn(indata[:, time_step, :], state) print(cell_output)print("TF Gradients", np.mean(dw1), np.mean(db1), np.mean(dw2), np.mean(db2))rnn.loss(np.ones([batch_size, 3]))dw2, db2, dw1, db1 = rnn.backward()print("NP Gradinets", np.mean(dw1, np.mean(db1), np.mean(dw2), np.mean(db2))
運行後結果相同。
接下來需要修改什麼
- LSTM實現
- 梯度問題
- 其他梯度迭代方法比如Adam引入
推薦閱讀:
TAG:機器學習 | 深度學習DeepLearning | 神經網路 |