GPU和Chian的目标收敛速度不同

输出：

$ python debug.py --gpu -1 cpu mode objective in epoch 0 : 14.8049154282 objective in epoch 1 : 11.7126655579 objective in epoch 2 : 10.6166152954 objective in epoch 3 : 9.81489753723 objective in epoch 4 : 8.90626144409 objective in epoch 5 : 7.73007297516 objective in epoch 6 : 6.31889343262 objective in epoch 7 : 4.83179998398 objective in epoch 8 : 3.52315592766 objective in epoch 9 : 2.58598852158 $ python debug.py --gpu 0 gpu mode 0 objective in epoch 0 : 14.8049144745 objective in epoch 1 : 14.3081817627 objective in epoch 2 : 14.0404243469 objective in epoch 3 : 13.8618173599 objective in epoch 4 : 13.7236022949 objective in epoch 5 : 13.6082553864 objective in epoch 6 : 13.5111179352 objective in epoch 7 : 13.4323377609 objective in epoch 8 : 13.3735141754 objective in epoch 9 : 13.3361949921

完整代码：

''' this is a code for asking ''' import numpy as np try: import cupy as xp except ImportError: pass import sys import chainer as ch import chainer.links as L import chainer.functions as F INT = "int32" FLOAT="float32" BOOLEAN='bool' class LSTM(ch.Chain): def __init__(self, voc_size, in_size, out_size, batch_size): np.random.seed(0) w1 = np.random.normal(size=[voc_size, in_size]) super(LSTM, self).__init__( emb=L.EmbedID(voc_size, in_size, initialW =w1), # word embedding enc = L.LSTM(in_size=in_size, out_size=out_size),# LSTM_cell scores = L.Linear(out_size, voc_size) # output transformation ) self.batch_size = batch_size self.out_size = out_size self.gpu_idx = -1 #put links on GPU def to_gpu(self, device_idx): self.gpu_idx = device_idx self.emb.to_gpu(device_idx) self.scores.to_gpu(device_idx) self.enc.to_gpu(device_idx) def obj(self, seq): #object function is log likelyhood for the each word on the seq return -F.sum(self.logL(seq)) def logL(self, seq): ''' seq ; batch of src seq of length T : List<List<int>> RETRUN : R^{batch_size x T} : CP node ''' T = xp if self.gpu_idx>=0 else np padded = T.transpose(T.array(seq, dtype=INT)) #Z^{T x batch_size} #reset LSTM cell self.enc.reset_state() logL = [] #logL for each time step except the first input for i in range(0, len(padded)-1): #get LSTM output h = self.enc(self.emb(padded[i])) #R^{batch_size x hidden_size} #probability distribution over vocabrary s = self.scores(F.tanh(h)) #R^{batch_size x voc_size} s = F.transpose(F.log_softmax(s)) #R^{voc_size x batch_size} #likelyhood for the next word l = F.embed_id(padded[i+1] , s) #R^{batch_size x batch_size} l = F.sum(l * T.identity(self.batch_size), axis=0) #R^{batch_size} logL += [l] return F.transpose(F.stack(logL)) GPU_TAG = "--gpu" if __name__=="__main__": args= sys.argv gpu_idx = -1 i=0 #argument while i<len(args): if args[i]==GPU_TAG: i+=1 gpu_idx = int(args[i]) i+=1 #hyper paramters voc_size = 5 batch_size=3 in_size = 5 out_size=2 #instanciate model model = LSTM(voc_size, in_size, out_size, batch_size) #GPU mode or CPU mode if gpu_idx>=0: print "gpu mode ", gpu_idx ch.cuda.get_device(gpu_idx).use() model.to_gpu(gpu_idx) else: print "cpu mode" #prepare optimizer trainer = ch.optimizers.sgd.SGD(lr=0.3) trainer.setup(model) #seq to train x = [[1,2,3,4]]*batch_size #main training loop for epoch in range(10): obj = model.obj(x) #forward path model.cleargrads() #init grad for backward path obj.backward() #backward path print "objective in epoch ",epoch, ": ", obj.data trainer.update() #update

1条回答

网友

1楼 · 发布于 2024-04-18 05:54:10

我自己解决了这个问题。。。似乎我对计算图形的一些操作是用CPU工作的，而不是用GPU。你知道吗

通过替换以下代码可以解决此问题

s = F.transpose(F.log_softmax(s)) #R^{voc_size x batch_size}
#likelyhood for the next word
l = F.embed_id(padded[i+1] , s) 
l = F.sum(l * T.identity(self.batch_size), axis=0)

与

s=F.log_softmax(s)
l=F.select_item(s,padded[i+1])

谢谢你的帮助！你知道吗

输出：

环境：

我的型号：

完整代码：

相关问题更多 >

编程相关推荐

热门问题

热门文章