Recurrent Neural Networks, RNN
관련글 : The Unreasonable Effectiveness of Recurrent Neural Networks
원본 소스 : https://gist.github.com/karpathy/d4dee566867f8291f086
python 3 버전
# encoding=utf8
"""
Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)
BSD License
"""
import numpy as np
import codecs
# data I/O
with codecs.open('input.txt', 'r', encoding='utf-8') as fp:
data = fp.read()
# data = open('input.txt', 'r').read() # should be simple plain text file
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print ('data has %d characters, %d unique.' % (data_size, vocab_size))
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }
# hyperparameters
hidden_size = 100 # size of hidden layer of neurons
seq_length = 25 # number of steps to unroll the RNN for
learning_rate = 1e-1
# model parameters
Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output
bh = np.zeros((hidden_size, 1)) # hidden bias
by = np.zeros((vocab_size, 1)) # output bias
def lossFun(inputs, targets, hprev):
"""
inputs,targets are both list of integers.
hprev is Hx1 array of initial hidden state
returns the loss, gradients on model parameters, and last hidden state
"""
xs, hs, ys, ps = {}, {}, {}, {}
hs[-1] = np.copy(hprev)
loss = 0
# forward pass
for t in range(len(inputs)):
xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation
xs[t][inputs[t]] = 1
hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state
ys[t] = np.dot(Why, hs[t]) + by # unnormalized log probabilities for next chars
ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars
loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss)
# backward pass: compute gradients going backwards
dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
dbh, dby = np.zeros_like(bh), np.zeros_like(by)
dhnext = np.zeros_like(hs[0])
for t in reversed(range(len(inputs))):
dy = np.copy(ps[t])
dy[targets[t]] -= 1 # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here
dWhy += np.dot(dy, hs[t].T)
dby += dy
dh = np.dot(Why.T, dy) + dhnext # backprop into h
dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
dbh += dhraw
dWxh += np.dot(dhraw, xs[t].T)
dWhh += np.dot(dhraw, hs[t-1].T)
dhnext = np.dot(Whh.T, dhraw)
for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]
def sample(h, seed_ix, n):
"""
sample a sequence of integers from the model
h is memory state, seed_ix is seed letter for first time step
"""
x = np.zeros((vocab_size, 1))
x[seed_ix] = 1
ixes = []
for t in range(n):
h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
y = np.dot(Why, h) + by
p = np.exp(y) / np.sum(np.exp(y))
ix = np.random.choice(range(vocab_size), p=p.ravel())
x = np.zeros((vocab_size, 1))
x[ix] = 1
ixes.append(ix)
return ixes
n, p = 0, 0
mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0
while True:
# prepare inputs (we're sweeping from left to right in steps seq_length long)
if p+seq_length+1 >= len(data) or n == 0:
hprev = np.zeros((hidden_size,1)) # reset RNN memory
p = 0 # go from start of data
inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]
# sample from the model now and then
if n % 100 == 0:
sample_ix = sample(hprev, inputs[0], 200)
txt = ''.join(ix_to_char[ix] for ix in sample_ix)
print ('----\n %s \n----' % (txt, ))
# forward seq_length characters through the net and fetch gradient
loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)
smooth_loss = smooth_loss * 0.999 + loss * 0.001
if n % 100 == 0: print ('iter %d, loss: %f' % (n, smooth_loss)) # print progress
# perform parameter update with Adagrad
for param, dparam, mem in zip([Wxh, Whh, Why, bh, by],
[dWxh, dWhh, dWhy, dbh, dby],
[mWxh, mWhh, mWhy, mbh, mby]):
mem += dparam * dparam
param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update
p += seq_length # move data pointer
n += 1 # iteration counter
fdsfds결과
무한 loop 이라, 적정한 시점에 알아서 멈추면 된다.
아래 결과는 대략 12시간 정도를 돌렸다. 컴퓨터 스펙은 아래와 같다.
아래 결과는 대략 12시간 정도를 돌렸다. 컴퓨터 스펙은 아래와 같다.
- CPU: i7-6700, 3.40GHz
- RAM: 16GB
---- beahngy amo k ns aeo?cdse nh a taei.rairrhelardr nela haeiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr nol e iohahenasen ---- iter 9309400, loss: 0.000086 ---- e nh a taei.rairrhelardr naioa aneaa ayio pe e bhnte ayio pe e h’e btentmuhgehi bcgdltt. gey heho grpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cds ---- iter 9309500, loss: 0.000086 ---- jCTCnhoofeoxelif edElobe negnk e iohehasenoldndAmdaI ayio pe e h’e btentmuhgehi bcgdltt. gey heho grpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cds ---- iter 9309600, loss: 0.000086 ---- negnk e iohehasenoldndAmdaI ayio pe e h’e btentmuhgehi bcgdltt. gey heho grpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr ---- iter 9309700, loss: 0.000086 ---- aI ayio pe e h’e btentmuhgehi bcgdltt. gey heho grpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr neli ae e angnI hyho gben ---- iter 9309800, loss: 0.000086 ---- gehi bcgdltt. gey heho grpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr nela dr iohecgrpiahe. Ddelnss.eelaishaner” cot AA ---- iter 9309900, loss: 0.000086 ---- piahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr nol e iohahenasenese hbea bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a t ---- iter 9310000, loss: 0.000086 ---- er” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr nela hamnaI ayio pe e h’e btentmuhgnhi beahe Ddabealohe bee amoi bcgdltt. gey heho grpiahe. Ddeln ---- iter 9310100, loss: 0.000086 ---- bih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr nol gyio pe e h’e btentmuhgehi bcgdltt. gey heho grpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae ---- iter 9310200, loss: 0.000086 ---- beahngy amo k ns aeo?cdse nh a taei.rairrhelardr ntlhnegnns. e amo k ns aeh?cdse nh a taei.rairrhelardr nol e iohehengrpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeah ---- iter 9310300, loss: 0.000086 ---- e nh a taei.rairrhelardr nol’e btentmuhgehi gcdslatha arenbggcodaeta tehr he ni.rhelaney gehnha e ar i ho bee amote ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr nol nyio chge heiohecgr ---- iter 9310400, loss: 0.000086 ---- jCTCnhoofeoxelif edElobe negnk e iohehasenoldndAmdaI ayio pe e h’e btentmuhgehi bcgdltt. gey heho grpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cds ---- iter 9310500, loss: 0.000086 ---- negnk e iohehasenoldndAmdaI ayio pe e h’e btentmuhgehi bcgdltt. gey heho grpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr ---- iter 9310600, loss: 0.000086 ---- aI ayio pe e h’e btentmuhgehi bcgdltt. gey heho grpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr nelardae abeahngy amo k ---- iter 9310700, loss: 0.000086 ---- gehi bcgdltt. gey heho grpiahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr ntl negnk t hi rsnse nhk br ne” a naeiarairr elirs ---- iter 9310800, loss: 0.000086 ---- piahe. Ddelnss.eelaishaner” cot AAfhB ht ltny ehbih a”on bhnte ectrsnae abeahngy amo k ns aeo?cdse nh a taei.rairrhelardr nelardaenabeahngelareierhi. aif edElobe negrcih gey gey heho grpiahe. Ddel ----
댓글 없음:
댓글 쓰기