Recurrent Neural Networks, RNN
관련글 : The Unreasonable Effectiveness of Recurrent Neural Networks
원본 소스 : https://gist.github.com/karpathy/d4dee566867f8291f086
python 3 버전
# encoding=utf8 """ Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy) BSD License """ import numpy as np import codecs # data I/O with codecs.open('input.txt', 'r', encoding='utf-8') as fp: data = fp.read() # data = open('input.txt', 'r').read() # should be simple plain text file chars = list(set(data)) data_size, vocab_size = len(data), len(chars) print ('data has %d characters, %d unique.' % (data_size, vocab_size)) char_to_ix = { ch:i for i,ch in enumerate(chars) } ix_to_char = { i:ch for i,ch in enumerate(chars) } # hyperparameters hidden_size = 100 # size of hidden layer of neurons seq_length = 25 # number of steps to unroll the RNN for learning_rate = 1e-1 # model parameters Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output bh = np.zeros((hidden_size, 1)) # hidden bias by = np.zeros((vocab_size, 1)) # output bias def lossFun(inputs, targets, hprev): """ inputs,targets are both list of integers. hprev is Hx1 array of initial hidden state returns the loss, gradients on model parameters, and last hidden state """ xs, hs, ys, ps = {}, {}, {}, {} hs[-1] = np.copy(hprev) loss = 0 # forward pass for t in range(len(inputs)): xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation xs[t][inputs[t]] = 1 hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state ys[t] = np.dot(Why, hs[t]) + by # unnormalized log probabilities for next chars ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss) # backward pass: compute gradients going backwards dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why) dbh, dby = np.zeros_like(bh), np.zeros_like(by) dhnext = np.zeros_like(hs[0]) for t in reversed(range(len(inputs))): dy = np.copy(ps[t]) dy[targets[t]] -= 1 # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here dWhy += np.dot(dy, hs[t].T) dby += dy dh = np.dot(Why.T, dy) + dhnext # backprop into h dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity dbh += dhraw dWxh += np.dot(dhraw, xs[t].T) dWhh += np.dot(dhraw, hs[t-1].T) dhnext = np.dot(Whh.T, dhraw) for dparam in [dWxh, dWhh, dWhy, dbh, dby]: np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1] def sample(h, seed_ix, n): """ sample a sequence of integers from the model h is memory state, seed_ix is seed letter for first time step """ x = np.zeros((vocab_size, 1)) x[seed_ix] = 1 ixes = [] for t in range(n): h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh) y = np.dot(Why, h) + by p = np.exp(y) / np.sum(np.exp(y)) ix = np.random.choice(range(vocab_size), p=p.ravel()) x = np.zeros((vocab_size, 1)) x[ix] = 1 ixes.append(ix) return ixes n, p = 0, 0 mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why) mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0 while True: # prepare inputs (we're sweeping from left to right in steps seq_length long) if p+seq_length+1 >= len(data) or n == 0: hprev = np.zeros((hidden_size,1)) # reset RNN memory p = 0 # go from start of data inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]] targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]] # sample from the model now and then if n % 100 == 0: sample_ix = sample(hprev, inputs[0], 200) txt = ''.join(ix_to_char[ix] for ix in sample_ix) print ('----\n %s \n----' % (txt, )) # forward seq_length characters through the net and fetch gradient loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev) smooth_loss = smooth_loss * 0.999 + loss * 0.001 if n % 100 == 0: print ('iter %d, loss: %f' % (n, smooth_loss)) # print progress # perform parameter update with Adagrad for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], [dWxh, dWhh, dWhy, dbh, dby], [mWxh, mWhh, mWhy, mbh, mby]): mem += dparam * dparam param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update p += seq_length # move data pointer n += 1 # iteration counterfdsfds
결과
무한 loop 이라, 적정한 시점에 알아서 멈추면 된다.
아래 결과는 대략 12시간 정도를 돌렸다. 컴퓨터 스펙은 아래와 같다.
아래 결과는 대략 12시간 정도를 돌렸다. 컴퓨터 스펙은 아래와 같다.
- CPU: i7-6700, 3.40GHz
- RAM: 16GB
---- beahngy amo k ns aeo?cdse nh a taei.rairrhelardr nela haeiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr nol e iohahenasen ---- iter 9309400, loss: 0.000086 ---- e nh a taei.rairrhelardr naioa aneaa ayio pe e bhnte ayio pe e h’e btentmuhgehi bcgdltt. gey heho grpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cds ---- iter 9309500, loss: 0.000086 ---- jCTCnhoofeoxelif edElobe negnk e iohehasenoldndAmdaI ayio pe e h’e btentmuhgehi bcgdltt. gey heho grpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cds ---- iter 9309600, loss: 0.000086 ---- negnk e iohehasenoldndAmdaI ayio pe e h’e btentmuhgehi bcgdltt. gey heho grpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr ---- iter 9309700, loss: 0.000086 ---- aI ayio pe e h’e btentmuhgehi bcgdltt. gey heho grpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr neli ae e angnI hyho gben ---- iter 9309800, loss: 0.000086 ---- gehi bcgdltt. gey heho grpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr nela dr iohecgrpiahe. Ddelnss.eelaishaner” cot AA ---- iter 9309900, loss: 0.000086 ---- piahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr nol e iohahenasenese hbea bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a t ---- iter 9310000, loss: 0.000086 ---- er” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr nela hamnaI ayio pe e h’e btentmuhgnhi beahe Ddabealohe bee amoi bcgdltt. gey heho grpiahe. Ddeln ---- iter 9310100, loss: 0.000086 ---- bih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr nol gyio pe e h’e btentmuhgehi bcgdltt. gey heho grpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae ---- iter 9310200, loss: 0.000086 ---- beahngy amo k ns aeo?cdse nh a taei.rairrhelardr ntlhnegnns. e amo k ns aeh?cdse nh a taei.rairrhelardr nol e iohehengrpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeah ---- iter 9310300, loss: 0.000086 ---- e nh a taei.rairrhelardr nol’e btentmuhgehi gcdslatha arenbggcodaeta tehr he ni.rhelaney gehnha e ar i ho bee amote ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr nol nyio chge heiohecgr ---- iter 9310400, loss: 0.000086 ---- jCTCnhoofeoxelif edElobe negnk e iohehasenoldndAmdaI ayio pe e h’e btentmuhgehi bcgdltt. gey heho grpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cds ---- iter 9310500, loss: 0.000086 ---- negnk e iohehasenoldndAmdaI ayio pe e h’e btentmuhgehi bcgdltt. gey heho grpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr ---- iter 9310600, loss: 0.000086 ---- aI ayio pe e h’e btentmuhgehi bcgdltt. gey heho grpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr nelardae abeahngy amo k ---- iter 9310700, loss: 0.000086 ---- gehi bcgdltt. gey heho grpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr ntl negnk t hi rsnse nhk br ne” a naeiarairr elirs ---- iter 9310800, loss: 0.000086 ---- piahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr nelardaenabeahngelareierhi. aif edElobe negrcih gey gey heho grpiahe. Ddel ----
댓글 없음:
댓글 쓰기