Best Python code snippet using hypothesis
model.py
Source:model.py
1import tensorflow as tf2import numpy as np3import random45class S2VT_model():6 7 def __init__(self, frame_steps=80.0, frame_feat_dim=4096, caption_steps=45, vocab_size=3000, dim_hidden=300):8 910 self.frame_steps = frame_steps11 self.frame_feat_dim = frame_feat_dim12 self.caption_steps = caption_steps13 self.vocab_size = vocab_size14 self.dim_hidden = dim_hidden1516 ## Graph input17 self.frame = tf.placeholder(tf.float32, [None, frame_steps, frame_feat_dim])18 self.caption = tf.placeholder(tf.int32, [None, caption_steps+1])19 self.caption_mask = tf.placeholder(tf.float32, [None, caption_steps+1])20 self.scheduled_sampling_prob = tf.placeholder(tf.float32, [], name='scheduled_sampling_prob')21 batch_frame = tf.shape(self.frame)[0]22 batch_caption = tf.shape(self.caption)[0]23 tf.Assert(tf.equal(batch_frame, batch_caption), [batch_frame, batch_caption])24 batch_size = batch_frame25 self.train_state = tf.placeholder(tf.bool)2627 28 ## frame Embedding param 29 with tf.variable_scope("frame_embedding"):30 w_frame_embed = tf.get_variable("w_frame_embed", [frame_feat_dim, dim_hidden], initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))31 b_frame_embed = tf.get_variable("b_frame_embed", [dim_hidden], initializer=tf.constant_initializer(0.0))32 33 ## word embedding param34 with tf.device("/cpu:0"):35 embedding = tf.get_variable("embedding", [vocab_size, dim_hidden], dtype=tf.float32)36 37 ## word embedding to onehot param38 w_word_onehot = tf.get_variable("w_word_onehot", [dim_hidden, vocab_size], initializer=tf.contrib.layers.xavier_initializer(dtype=tf.float32))39 b_word_onehot = tf.get_variable("b_word_onehot", [vocab_size], initializer=tf.constant_initializer(0.0))40 41 ## two lstm param42 with tf.variable_scope("att_lstm"):43 att_lstm = tf.contrib.rnn.LSTMCell(dim_hidden)44 with tf.variable_scope("cap_lstm"):45 cap_lstm = tf.contrib.rnn.LSTMCell(dim_hidden)46 47 att_state = (tf.zeros([batch_size, dim_hidden]),tf.zeros([batch_size, dim_hidden]))48 cap_state = (tf.zeros([batch_size, dim_hidden]),tf.zeros([batch_size, dim_hidden]))49 50 padding = tf.zeros([batch_size, dim_hidden])51 52 ##################### Computing Graph ########################53 54 frame_flat = tf.reshape(self.frame, [-1, frame_feat_dim])55 frame_embedding = tf.nn.xw_plus_b( frame_flat, w_frame_embed, b_frame_embed )56 frame_embedding = tf.reshape(frame_embedding, [batch_size, frame_steps, dim_hidden]) 57 58 59 cap_lstm_outputs = []60 61 ## Encoding stage62 for i in range(frame_steps):63 with tf.variable_scope('att_lstm'):64 if i > 0:65 tf.get_variable_scope().reuse_variables()66 output1, att_state = att_lstm(frame_embedding[:,i,:], att_state)67 ##input shape of cap_lstm2: [batch_size, 2*dim_hidden]68 with tf.variable_scope('cap_lstm'):69 if i > 0:70 tf.get_variable_scope().reuse_variables()71 output2, cap_state = cap_lstm(tf.concat([padding, output1], 1), cap_state)72 73 ## Decoding stage74 ## Training util75 def train_cap(prev_layer_output, prev_decoder_output, prev_state):76 with tf.device('/cpu:0'):77 word_index = tf.argmax(prev_decoder_output, axis=1)78 word_embed = tf.nn.embedding_lookup(embedding, word_index)79 output, state = cap_lstm(80 tf.concat([word_embed, prev_layer_output], 1), prev_state)81 m_state, c_state = state82 return output, m_state, c_state83 def test_cap(prev_layer_output, prev_decoder_output, prev_state):84 ## TODO: beam search85 word_index = tf.argmax(prev_decoder_output, axis=1)86 word_embed = tf.nn.embedding_lookup(embedding, word_index)87 output, state = cap_lstm(88 tf.concat([word_embed, prev_layer_output], 1), prev_state)89 m_state, c_state = state90 return output, m_state, c_state91 output2 = tf.tile(tf.one_hot([4], vocab_size), [batch_size, 1])92 scheduled_sampling_distribution = tf.random_uniform([caption_steps], 0, 1)93 for i in range(caption_steps):94 95 with tf.variable_scope('att_lstm'):96 tf.get_variable_scope().reuse_variables()97 output1, att_state = att_lstm(padding, att_state)98 99 with tf.variable_scope('cap_lstm'):100 tf.get_variable_scope().reuse_variables()101102103 output2, m_state, c_state = tf.cond(self.train_state, lambda: train_cap(output1, self.caption[:i], cap_state), lambda: test_cap(output1, output2, cap_state))104 cap_state = (m_state, c_state)105 cap_lstm_outputs.append(output2)106 107 108109110 output = tf.reshape(tf.concat(cap_lstm_outputs , 1), [-1, dim_hidden]) 111112 ## shape (batch_size*caption_steps, vocab_size) 113 onehot_word_logits = tf.nn.xw_plus_b(output, w_word_onehot, b_word_onehot)114 self.predict_result = tf.reshape(tf.argmax(onehot_word_logits[:,2:], 1)+2, [batch_size, caption_steps])115 116 loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example([onehot_word_logits],117 [tf.reshape(self.caption[:,1:], [-1])],118 [tf.reshape(self.caption_mask[:,1:], [-1])])119 120 self.cost = tf.reduce_mean(loss)121 self.global_step = tf.Variable(0, trainable=False)122 self.train_op = tf.train.AdamOptimizer().minimize(self.cost,global_step=self.global_step)123 124 config = tf.ConfigProto(log_device_placement = True)125 config.gpu_options.allow_growth = True126 127 self.sess = tf.Session(config=config)128129 def train(self, input_frame, input_caption,input_caption_mask, keep_prob=0.5):130 _,cost = self.sess.run([self.train_op,self.cost],feed_dict={self.frame:input_frame, 131 self.caption:input_caption, 132 self.caption_mask:input_caption_mask,133 self.train_state:True})134 return cost135 136 def predict(self, input_frame):137 padding = np.zeros([input_frame.shape[0], self.caption_steps + 1])138 words = self.sess.run([self.predict_result], feed_dict={self.frame: input_frame,139 self.caption: padding,140 self.train_state: False,141 self.scheduled_sampling_prob: 0.0})142 return words143 def initialize(self):144 self.sess.run(tf.global_variables_initializer())145 146 def schedule_sampling(self):147 prob = self.global_step / self.schedule_sampling_converge148 return random.random() > prob149150151class S2VT_attention_model():152 153 def __init__(self,frame_steps=20, frame_feat_dim=4096, caption_steps=45, vocab_size=3000, dim_hidden=300):154 155 self.frame_steps = frame_steps156 self.frame_feat_dim = frame_feat_dim157 self.caption_steps = caption_steps158 self.vocab_size = vocab_size159 self.dim_hidden = dim_hidden160 161 ## Graph input162 163 self.frame = tf.placeholder(tf.float32, [None, frame_steps, frame_feat_dim]) 164 self.caption = tf.placeholder(tf.int64, [None,caption_steps+1])165 self.caption_mask = tf.placeholder(tf.float32, [None, caption_steps+1])166 self.scheduled_sampling_prob = tf.placeholder(167 tf.float32, [], name='scheduled_sampling_prob')168 batch_frame = tf.shape(self.frame)[0]169 batch_caption = tf.shape(self.caption)[0]170 tf.Assert(tf.equal(batch_frame, batch_caption), [batch_frame, batch_caption])171 self.batch_size = batch_frame172 self.train_state = tf.placeholder(tf.bool)173 self.keep_prob = tf.placeholder(tf.float32)174 175 self.global_step = tf.Variable(0, trainable=False)176 ## frame Embedding param 177 with tf.variable_scope("frame_embedding"):178 w_frame_embed = tf.get_variable("w_frame_embed", [frame_feat_dim, dim_hidden], initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))179 b_frame_embed = tf.get_variable("b_frame_embed", [dim_hidden], initializer=tf.constant_initializer(0.0))180 181 ## word embedding param182 with tf.device("/cpu:0"):183 embedding = tf.get_variable("embedding", [vocab_size, dim_hidden], dtype=tf.float32)184 185 ## word embedding to onehot param186 w_word_onehot = tf.get_variable("w_word_onehot", [dim_hidden, vocab_size], initializer=tf.contrib.layers.xavier_initializer(dtype=tf.float32))187 b_word_onehot = tf.get_variable("b_word_onehot", [vocab_size], initializer=tf.constant_initializer(0.0))188 189 ## attention_position_embedding190 wp = tf.get_variable("w_position_emb_1", [self.dim_hidden,self.dim_hidden], initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))191 vp = tf.get_variable("w_position_emb_2", [1,self.dim_hidden], initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))192193 ## attention_align_embedding194 wa = tf.get_variable("w_align_emb",[self.dim_hidden,self.dim_hidden],initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32)) 195196 ## attention_align_embedding197 wc = tf.get_variable("w_attention_emb",[2*self.dim_hidden,self.dim_hidden],initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32)) 198199200 ## two lstm param201 with tf.variable_scope("att_lstm"):202 att_lstm = tf.contrib.rnn.LSTMCell(dim_hidden)203 #att_lstm = tf.contrib.rnn.DropoutWrapper(att_lstm,input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob)204 with tf.variable_scope("cap_lstm"):205 cap_lstm = tf.contrib.rnn.LSTMCell(dim_hidden)206 #cap_lstm = tf.contrib.rnn.DropoutWrapper(cap_lstm,input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob) 207 208 att_state = (tf.zeros([self.batch_size, dim_hidden]),tf.zeros([self.batch_size, dim_hidden]))209 cap_state = (tf.zeros([self.batch_size, dim_hidden]),tf.zeros([self.batch_size, dim_hidden]))210 211 padding = tf.zeros([self.batch_size, dim_hidden])212 213 ##################### Computing Graph ########################214 215 frame_flat = tf.reshape(self.frame, [-1, frame_feat_dim])216 frame_embedding = tf.nn.xw_plus_b( frame_flat, w_frame_embed, b_frame_embed )217 frame_embedding = tf.reshape(frame_embedding, [self.batch_size, frame_steps, dim_hidden]) 218 219 enc_lstm_outputs = []220 dec_lstm_outputs = []221 ## Encoding stage222 for i in range(frame_steps):223224 with tf.variable_scope('att_lstm'):225 if i > 0:226 tf.get_variable_scope().reuse_variables()227 output1, att_state = att_lstm(frame_embedding[:,i,:], att_state)228 ##input shape of cap_lstm2: [batch_size, 2*dim_hidden]229 with tf.variable_scope('cap_lstm'):230 if i > 0:231 tf.get_variable_scope().reuse_variables()232 #tf.get_variable_scope().reuse_variables()233 output2, cap_state = cap_lstm(tf.concat([padding, output1], 1), cap_state)234 enc_lstm_outputs.append(output2)235 236 ## (batch_size,frame_step,dim_hidden)237 enc_lstm_outputs = tf.reshape(tf.concat(enc_lstm_outputs , 1),[self.batch_size,self.frame_steps,self.dim_hidden])238 239 ## Decoding stage240 ## Training util241 def train_cap(input_lstm,prev_endcoder_output,real_ans,prev_decoder_output,global_step,prev_state):242 word_index = tf.cond(self.scheduled_sampling_prob <= tf.random_uniform([], 0, 1),243 lambda: real_ans,244 lambda: tf.argmax(prev_decoder_output, axis=1))245 with tf.device('/cpu:0'):246 word_embed = tf.nn.embedding_lookup(embedding,word_index)247 output, state = input_lstm(248 tf.concat([word_embed, prev_endcoder_output], 1), prev_state)249 m_state, c_state = state250 return output, m_state, c_state251 def test_cap(input_lstm,prev_encoder_output, prev_decoder_output, prev_state):252 ## TODO: beam search253 with tf.device('cpu:0'):254 word_index = tf.argmax(prev_decoder_output, axis=1)255 word_embed = tf.nn.embedding_lookup(embedding, word_index)256 output, state = input_lstm(257 tf.concat([word_embed, prev_encoder_output], 1), prev_state)258 m_state, c_state = state259 return output, m_state, c_state260 ## Decoding stage261 prev_step_word = tf.tile(tf.one_hot([4], vocab_size), [self.batch_size, 1])262 for i in range(caption_steps):263 264 with tf.variable_scope('att_lstm'):265 tf.get_variable_scope().reuse_variables()266 output1, att_state = att_lstm(padding, att_state)267 268 with tf.variable_scope('cap_lstm'):269 tf.get_variable_scope().reuse_variables()270 output2, m_state, c_state = tf.cond(self.train_state, lambda: train_cap(271 cap_lstm, output1, self.caption[:, i], prev_step_word, self.global_step, cap_state), lambda: test_cap(cap_lstm, output1, prev_step_word, cap_state))272 cap_state = (m_state, c_state)273 prev_step_word = tf.nn.xw_plus_b(274 output2, w_word_onehot, b_word_onehot)275 ## Attention276 #output2 = self.local_attention(output2,enc_lstm_outputs,wp,vp,wa)277 #concat_output = tf.concat([attention_output,output2] , 1)278 #output2 = tf.tanh(tf.matmul(concat_output,wc)) 279 dec_lstm_outputs.append(prev_step_word)280 281 onehot_word_logits = tf.reshape(tf.concat(dec_lstm_outputs , 1), [-1,vocab_size])282 283 self.predict_result = tf.reshape(tf.argmax(onehot_word_logits[:,2:], 1)+2, [self.batch_size, caption_steps])284 285 onehot_word_logits = tf.unstack(tf.reshape(onehot_word_logits,[self.batch_size,caption_steps,vocab_size]),axis = 1)286 287 caption_ans = tf.unstack(self.caption[:,1:],axis = 1) 288 289 caption_ans_mask = tf.unstack(self.caption_mask[:,1:],axis = 1) 290 loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(onehot_word_logits,291 caption_ans,292 caption_ans_mask)293 294 self.cost = tf.reduce_mean(loss)295 #self.global_step = tf.Variable(0, trainable=False)296 self.train_op = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(self.cost, global_step=self.global_step)297 298299 config = tf.ConfigProto(log_device_placement = True)300 config.gpu_options.allow_growth = True301 302 self.sess = tf.Session(config=config)303304 def train(self, input_frame, input_caption, input_caption_mask, keep_prob=0.5, scheduled_sampling_prob=0.0):305 _,cost = self.sess.run([self.train_op,self.cost],feed_dict={self.frame:input_frame, 306 self.caption:input_caption, 307 self.caption_mask:input_caption_mask,308 self.train_state:True,309 self.scheduled_sampling_prob:scheduled_sampling_prob,310 self.keep_prob:keep_prob})311 return cost312 313 def predict(self, input_frame):314 padding = np.zeros([input_frame.shape[0], self.caption_steps + 1])315 words = self.sess.run([self.predict_result], feed_dict={self.frame: input_frame,316 self.caption: padding,317 self.train_state: False,318 self.scheduled_sampling_prob: 1.0,319 self.keep_prob: 1.0})320 return words321 def initialize(self):322 self.sess.run(tf.global_variables_initializer()) 323324325 def local_attention(self,decode_vec,encode_vecs,wp,vp,wa):326 327328 ## (batch_size,frame_step)329 score = self.align(decode_vec,encode_vecs,wa)330 ## (dim_hidden,batch_size)331 decode_vec_t = tf.transpose(decode_vec,[1,0])332 ## (1,batch_size)333 pos_feature = tf.matmul(vp,tf.tanh(tf.matmul(wp,decode_vec_t)))334 ## (1,batch_size)335 pt = tf.reshape(self.frame_steps*tf.sigmoid(pos_feature),[self.batch_size])336 local_center = tf.round(pt)337338 half_window = 2 #tf.constant(4,shape = [1])339 delta = half_window/2340 341 def index_frame(ele):342 frames,center,pt,score = ele343 s = tf.range(self.frame_steps,dtype = tf.float32)344 score = score*tf.exp(-tf.square(s-pt)/(2*delta*delta))345 right = tf.minimum(center+half_window,self.frame_steps)346 left = tf.maximum(center-half_window,0)347 right = tf.cast(right,tf.int32)348 left = tf.cast(left,tf.int32)349 score = tf.expand_dims(score,0)350 attention_vec = tf.matmul(score[:,left:right],frames[left:right,:])351 attention_vec = tf.reshape(attention_vec,[self.dim_hidden])352 return attention_vec353 ## (batch_size,dim_hidden)354 attention_vec = tf.map_fn(index_frame,[encode_vecs,local_center,pt,score],dtype=tf.float32)355 return attention_vec+decode_vec356 357 358 359360 def align(self,decode_vec,encode_vecs,wa):361 ## (batch_size,dim_hidden,frame_step)362 encode_vecs_t = tf.transpose(encode_vecs,[0,2,1])363 ## (batch_size,1,dim_hidden)*(batch_size,dim_hidden,frame_step)364 score = tf.matmul(tf.expand_dims(tf.matmul(decode_vec,wa),1),encode_vecs_t)365 score = tf.reshape(score,[self.batch_size,self.frame_steps])366 ## (batch_size,frame_step):367 return score368369370 def saveModel(self,filepath):371 global_step = self.sess.run(self.global_step)372 saver = tf.train.Saver()373 saver.save(self.sess, './'+filepath+'_para/model_%d.ckpt' % (global_step))374 375 def loadModel(self, model_path):376 saver = tf.train.Saver(restore_sequentially=True)377 saver.restore(self.sess, model_path)378379380381class Effective_attention_model():382 383 def __init__(self,frame_steps=20, frame_feat_dim=4096, caption_steps=45, vocab_size=3000, dim_hidden=200):384 385 self.frame_steps = frame_steps386 self.frame_feat_dim = frame_feat_dim387 self.caption_steps = caption_steps388 self.vocab_size = vocab_size389 self.dim_hidden = dim_hidden390 391 ## Graph input392 self.frame = tf.placeholder(tf.float32, [None, frame_steps, frame_feat_dim])393 self.caption = tf.placeholder(tf.int64, [None,caption_steps+1])394 self.caption_mask = tf.placeholder(tf.float32, [None, caption_steps+1])395 batch_frame = tf.shape(self.frame)[0]396 batch_caption = tf.shape(self.caption)[0]397 tf.Assert(tf.equal(batch_frame, batch_caption), [batch_frame, batch_caption])398 self.batch_size = batch_frame399 self.train_state = tf.placeholder(tf.bool)400 self.scheduled_sampling_prob = tf.placeholder(401 tf.float32, [], name='scheduled_sampling_prob')402 self.keep_prob = tf.placeholder(tf.float32)403404 self.global_step = tf.Variable(0, trainable=False)405 ## frame Embedding param406 with tf.variable_scope("frame_embedding"):407 w_frame_embed = tf.get_variable("w_frame_embed", [frame_feat_dim, 2*dim_hidden], initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))408 b_frame_embed = tf.get_variable("b_frame_embed", [2*dim_hidden], initializer=tf.constant_initializer(0.0))409 410 ## word embedding param411 with tf.device("/cpu:0"):412 embedding = tf.get_variable("embedding", [vocab_size, dim_hidden], dtype=tf.float32)413 414 ## word embedding to onehot param415 w_word_onehot = tf.get_variable("w_word_onehot", [dim_hidden, vocab_size], initializer=tf.contrib.layers.xavier_initializer(dtype=tf.float32))416 b_word_onehot = tf.get_variable("b_word_onehot", [vocab_size], initializer=tf.constant_initializer(0.0))417 418 ## attention_position_embedding419 wp = tf.get_variable("w_position_emb_1", [self.dim_hidden,self.dim_hidden], initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))420 vp = tf.get_variable("w_position_emb_2", [1,self.dim_hidden], initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))421422 ## attention_align_embedding423 wa = tf.get_variable("w_align_emb",[self.dim_hidden,self.dim_hidden],initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32)) 424425 ## attention_align_embedding426 wc = tf.get_variable("w_attention_emb",[2*self.dim_hidden,self.dim_hidden],initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32)) 427428429 ## two lstm param430 with tf.variable_scope("att_lstm"):431 att_lstm = tf.contrib.rnn.LSTMCell(dim_hidden)432 att_lstm = tf.contrib.rnn.DropoutWrapper(att_lstm,input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob)433 with tf.variable_scope("cap_lstm"):434 cap_lstm = tf.contrib.rnn.LSTMCell(dim_hidden) 435 cap_lstm = tf.contrib.rnn.DropoutWrapper(cap_lstm,input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob) 436 437 att_state = (tf.zeros([self.batch_size, dim_hidden]),tf.zeros([self.batch_size, dim_hidden]))438 cap_state = (tf.zeros([self.batch_size, dim_hidden]),tf.zeros([self.batch_size, dim_hidden]))439 440 padding = tf.zeros([self.batch_size, dim_hidden])441 442 ##################### Computing Graph ########################443 444 frame_flat = tf.reshape(self.frame, [-1, frame_feat_dim])445 frame_embedding = tf.nn.xw_plus_b( frame_flat, w_frame_embed, b_frame_embed )446 frame_embedding = tf.reshape(frame_embedding, [self.batch_size, frame_steps, 2*dim_hidden]) 447 448 enc_lstm_outputs = []449 dec_lstm_outputs = []450 ## Encoding stage451 for i in range(frame_steps):452453 with tf.variable_scope('att_lstm'):454 if i > 0:455 tf.get_variable_scope().reuse_variables()456 output1, att_state = att_lstm(frame_embedding[:,i,:], att_state)457 ##input shape of cap_lstm2: [batch_size, 2*dim_hidden]458 with tf.variable_scope('cap_lstm'):459 if i > 0:460 tf.get_variable_scope().reuse_variables()461 output2, cap_state = cap_lstm(output1, cap_state)462 enc_lstm_outputs.append(output2)463 464 ## (batch_size,frame_step,dim_hidden)465 enc_lstm_outputs = tf.reshape(tf.concat(enc_lstm_outputs , 1),[self.batch_size,self.frame_steps,self.dim_hidden])466 467 ## Decoding stage468 ## Training util469 def train_cap(input_lstm,real_ans,prev_decoder_output,prev_attention_output,global_step,prev_state):470 471 with tf.device('cpu:0'):472 word_index = tf.cond(self.scheduled_sampling_prob >= tf.random_uniform([], 0, 1),473 lambda:real_ans,474 lambda:tf.argmax(prev_decoder_output, axis=1))475 #word_index = tf.argmax(real_ans, axis=1)476 word_embed = tf.nn.embedding_lookup(embedding, word_index) 477 output, state = input_lstm(tf.concat([word_embed, prev_attention_output], 1), prev_state)478 m_state, c_state = state479 return output, m_state, c_state480 def test_cap(input_lstm, prev_decoder_output, prev_attention_output,prev_state):481 ## TODO: beam search482 with tf.device('cpu:0'):483 word_index = tf.argmax(prev_decoder_output, axis=1)484 word_embed = tf.nn.embedding_lookup(embedding, word_index)485 output, state = input_lstm(486 tf.concat([word_embed,prev_attention_output], 1), prev_state)487 m_state, c_state = state488 return output, m_state, c_state489 prev_step_word = tf.tile(tf.one_hot([4], vocab_size), [self.batch_size, 1])490 attention_output = tf.zeros(shape = [self.batch_size,dim_hidden])491 ## Decoding stage492 for i in range(caption_steps):493 494 with tf.variable_scope('att_lstm'):495 tf.get_variable_scope().reuse_variables()496 output1, m_state, c_state = tf.cond(self.train_state, lambda: train_cap(att_lstm, self.caption[:,i],prev_step_word,attention_output,self.global_step,att_state), lambda: test_cap(att_lstm, prev_step_word,attention_output,att_state))497 att_state = (m_state, c_state)498 499 with tf.variable_scope('cap_lstm'):500 tf.get_variable_scope().reuse_variables()501 output2, cap_state = cap_lstm(output1,cap_state)502 ## Attention503 attention_output = self.global_attention(output2,enc_lstm_outputs,wa)504 #attention_output = self.local_attention(output2,enc_lstm_outputs,wp,vp,wa)505 concat_output = tf.concat([attention_output,output2] , 1)506 attention_output = tf.tanh(tf.matmul(concat_output,wc)) 507 prev_step_word = tf.nn.xw_plus_b(attention_output, w_word_onehot, b_word_onehot)508 dec_lstm_outputs.append(prev_step_word)509510 onehot_word_logits = tf.reshape(tf.concat(dec_lstm_outputs , 1), [-1,vocab_size])511 512 self.predict_result = tf.reshape(tf.argmax(onehot_word_logits[:,2:], 1)+2, [self.batch_size, caption_steps])513 514 onehot_word_logits = tf.unstack(tf.reshape(onehot_word_logits,[self.batch_size,caption_steps,vocab_size]),axis = 1)515 516 caption_ans = tf.unstack(self.caption[:,1:],axis = 1) 517 518 caption_ans_mask = tf.unstack(self.caption_mask[:,1:],axis = 1) 519 loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(onehot_word_logits,520 caption_ans,521 caption_ans_mask)522 523 self.cost = tf.reduce_mean(loss)524 #self.global_step = tf.Variable(0, trainable=False)525 self.train_op = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(self.cost, global_step=self.global_step)526 527528 config = tf.ConfigProto(log_device_placement = True)529 config.gpu_options.allow_growth = True530 531 self.sess = tf.Session(config=config)532533 def train(self, input_frame, input_caption,input_caption_mask, keep_prob=0.5, scheduled_sampling_prob=0.0):534 _,cost = self.sess.run([self.train_op,self.cost],feed_dict={self.frame:input_frame, 535 self.caption:input_caption, 536 self.caption_mask:input_caption_mask,537 self.train_state:True,538 self.scheduled_sampling_prob:scheduled_sampling_prob,539 self.keep_prob:keep_prob})540 return cost541 542 def predict(self, input_frame):543 padding = np.zeros([input_frame.shape[0], self.caption_steps + 1])544 words = self.sess.run([self.predict_result], feed_dict={self.frame: input_frame,545 self.caption: padding,546 self.train_state: False,547 self.scheduled_sampling_prob:1.0,548 self.keep_prob:1.0})549 return words550 def initialize(self):551 self.sess.run(tf.global_variables_initializer()) 552553554555 def global_attention(self,decode_vec,encode_vecs,wa):556 ## (batch_size,frame_step)557 score = tf.nn.softmax(self.score(decode_vec,encode_vecs,wa))558 attention_vec = tf.reduce_sum(encode_vecs*tf.tile(tf.expand_dims(score,2),[1,1,self.dim_hidden]),1 )559 560 return attention_vec561 def local_attention(self,decode_vec,encode_vecs,wp,vp,wa):562563 ## (batch_size,frame_step)564 score = self.score(decode_vec,encode_vecs,wa)565 ## (dim_hidden,batch_size)566 decode_vec_t = tf.transpose(decode_vec,[1,0])567 ## (1,batch_size)568 pos_feature = tf.matmul(vp,tf.tanh(tf.matmul(wp,decode_vec_t)))569 ## (1,batch_size)570 pt = tf.reshape(self.frame_steps*tf.sigmoid(pos_feature),[self.batch_size])571 local_center = tf.round(pt)572573 half_window = 2 #tf.constant(4,shape = [1])574 delta = half_window/2575 576 def index_frame(ele):577 frames_ind,center_ind,pt_ind,score_ind = ele578 right = tf.minimum(center_ind+half_window+1,self.frame_steps)579 left = tf.maximum(center_ind-half_window,0)580 right = tf.cast(right,tf.int32)581 left = tf.cast(left,tf.int32)582 frames_ind = frames_ind[left:right,:]583 score_ind = tf.nn.softmax(score_ind[left:right])584 s = tf.range(self.frame_steps,dtype = tf.float32)585 s = s[left:right]586 score_ind = score_ind*tf.exp(-tf.square(s-pt_ind)/(2*delta*delta))587 score_ind = tf.expand_dims(score_ind,0)588 attention_vec = tf.matmul(score_ind,frames_ind)589 attention_vec = tf.reshape(attention_vec,[self.dim_hidden])590 return attention_vec591 ## (batch_size,dim_hidden)592 attention_vec = tf.map_fn(index_frame,[encode_vecs,local_center,pt,score],dtype=tf.float32)593 return attention_vec594 595 def saveModel(self,filepath):596 global_step = self.sess.run(self.global_step)597 saver = tf.train.Saver()598 saver.save(self.sess, './'+filepath+'_para/model_%d.ckpt' % (global_step))599 600 def loadModel(self, model_path):601 saver = tf.train.Saver(restore_sequentially=True)602 saver.restore(self.sess, model_path)603 604605 def score(self,decode_vec,encode_vecs,wa):606 ## (batch_size,dim_hidden,frame_step)607 encode_vecs_t = tf.transpose(encode_vecs,[0,2,1])608 ## (batch_size,1,dim_hidden)*(batch_size,dim_hidden,frame_step)609 score = tf.matmul(tf.expand_dims(tf.matmul(decode_vec,wa),1),encode_vecs_t)610 score = tf.reshape(score,[self.batch_size,self.frame_steps])611 ## (batch_size,frame_step)612 613 return score614"""615class Adversary_S2VT_model():616617 def __init__(self,frame_steps=20, frame_feat_dim=4096, caption_steps=45, vocab_size=3000, dim_hidden=200):618 self.frame_steps = frame_steps619 self.frame_feat_dim = frame_feat_dim620 self.caption_steps = caption_steps621 self.vocab_size = vocab_size622 self.dim_hidden = dim_hidden623 624 ## Graph input625 626 self.frame = tf.placeholder(tf.float32, [None, frame_steps, frame_feat_dim]) 627 self.caption = tf.placeholder(tf.int64, [None,caption_steps+1])628 self.caption_mask = tf.placeholder(tf.float32, [None, caption_steps+1])629 self.scheduled_sampling_prob = tf.placeholder(630 tf.float32, [], name='scheduled_sampling_prob')631 batch_frame = tf.shape(self.frame)[0]632 batch_caption = tf.shape(self.caption)[0]633 tf.Assert(tf.equal(batch_frame, batch_caption), [batch_frame, batch_caption])634 self.batch_size = batch_frame635 self.train_state = tf.placeholder(tf.bool)636 self.keep_prob = tf.placeholder(tf.float32)637 638 self.global_step = tf.Variable(0, trainable=False)639 ## frame Embedding param 640 with tf.variable_scope("frame_embedding"):641 w_frame_embed = tf.get_variable("w_frame_embed", [frame_feat_dim, dim_hidden], initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))642 b_frame_embed = tf.get_variable("b_frame_embed", [dim_hidden], initializer=tf.constant_initializer(0.0))643644 ## frame Embedding param 645 with tf.variable_scope("reframe_embedding"):646 w_reframe_embed = tf.get_variable("w_reframe_embed", [dim_hidden,frame_feat_dim], initializer= tf.contrib.layers.xavier_initializer(dtype=tf.float32))647 b_reframe_embed = tf.get_variable("b_reframe_embed", [frame_feat_dim], initializer=tf.constant_initializer(0.0))648 649650 ## word embedding param651 with tf.device("/cpu:0"):652 embedding = tf.get_variable("embedding", [vocab_size, dim_hidden], dtype=tf.float32)653 654 ## word embedding to onehot param655 w_word_onehot = tf.get_variable("w_word_onehot", [dim_hidden, vocab_size], initializer=tf.contrib.layers.xavier_initializer(dtype=tf.float32))656 b_word_onehot = tf.get_variable("b_word_onehot", [vocab_size], initializer=tf.constant_initializer(0.0))657 ## two lstm param658 with tf.variable_scope("att_lstm"):659 att_lstm = tf.contrib.rnn.LSTMCell(dim_hidden)660 att_lstm = tf.contrib.rnn.DropoutWrapper(att_lstm,input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob)661 with tf.variable_scope("cap_lstm"):662 cap_lstm = tf.contrib.rnn.LSTMCell(dim_hidden)663 cap_lstm = tf.contrib.rnn.DropoutWrapper(cap_lstm,input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob) 664 665 att_state = (tf.zeros([self.batch_size, dim_hidden]),tf.zeros([self.batch_size, dim_hidden]))666 cap_state = (tf.zeros([self.batch_size, dim_hidden]),tf.zeros([self.batch_size, dim_hidden]))667 668 padding = tf.nn.embedding_lookup(embedding,tf.zeros(shape = [self.batch_size],dtype=tf.int32))669 670 ##################### Computing Graph ########################671 672 frame_flat = tf.reshape(self.frame, [-1, frame_feat_dim])673 frame_embedding = tf.nn.xw_plus_b( frame_flat, w_frame_embed, b_frame_embed )674 frame_embedding = tf.reshape(frame_embedding, [self.batch_size, frame_steps, dim_hidden]) 675 676 enc_lstm_outputs = []677 dec_lstm_outputs = []678 ## Encoding stage679 for i in range(frame_steps):680681 with tf.variable_scope('att_lstm'):682 if i > 0:683 tf.get_variable_scope().reuse_variables()684 output1, att_state = att_lstm(frame_embedding[:,i,:], att_state)685 ##input shape of cap_lstm2: [batch_size, 2*dim_hidden]686 with tf.variable_scope('cap_lstm'):687 if i > 0:688 tf.get_variable_scope().reuse_variables()689 #tf.get_variable_scope().reuse_variables()690 output2, cap_state = cap_lstm(tf.concat([padding, output1], 1), cap_state)691 enc_lstm_outputs.append(output2)692 693 ## (batch_size,frame_step,dim_hidden)694 enc_lstm_outputs = tf.reshape(tf.concat(enc_lstm_outputs , 1),[self.batch_size,self.frame_steps,self.dim_hidden])695 696 ## Decoding stage697 ## Training util698 def train_cap(input_lstm,prev_endcoder_output,real_ans,prev_decoder_output,global_step,prev_state):699 word_index = tf.cond(self.scheduled_sampling_prob >= tf.random_uniform([], 0, 1),700 lambda: real_ans,701 lambda: tf.argmax(prev_decoder_output, axis=1))702 with tf.device('/cpu:0'):703 word_embed = tf.nn.embedding_lookup(embedding,word_index)704 output, state = input_lstm(705 tf.concat([word_embed, prev_endcoder_output], 1), prev_state)706 m_state, c_state = state707 return output, m_state, c_state708 def test_cap(input_lstm,prev_encoder_output, prev_decoder_output, prev_state):709 ## TODO: beam search710 with tf.device('cpu:0'):711 word_index = tf.argmax(prev_decoder_output, axis=1)712 word_embed = tf.nn.embedding_lookup(embedding, word_index)713 output, state = input_lstm(714 tf.concat([word_embed, prev_encoder_output], 1), prev_state)715 m_state, c_state = state716 return output, m_state, c_state717 ## Decoding stage718 prev_step_word = tf.tile(tf.one_hot([4], vocab_size), [self.batch_size, 1])719 for i in range(caption_steps):720 721 with tf.variable_scope('att_lstm'):722 tf.get_variable_scope().reuse_variables()723 output1, att_state = att_lstm(padding, att_state)724 725 with tf.variable_scope('cap_lstm'):726 tf.get_variable_scope().reuse_variables()727 output2, m_state, c_state = tf.cond(self.train_state, lambda: train_cap(728 cap_lstm, output1, self.caption[:, i], prev_step_word, self.global_step, cap_state), lambda: test_cap(cap_lstm, output1, prev_step_word, cap_state))729 cap_state = (m_state, c_state)730 prev_step_word = tf.nn.xw_plus_b(output2, w_word_onehot, b_word_onehot)731 dec_lstm_outputs.append(prev_step_word)732 733 onehot_word_logits = tf.reshape(tf.concat(dec_lstm_outputs , 1), [-1,vocab_size])734 735 self.predict_result = tf.reshape(tf.argmax(onehot_word_logits[:,2:], 1)+2, [self.batch_size, caption_steps])736 737 onehot_word_logits = tf.unstack(tf.reshape(onehot_word_logits,[self.batch_size,caption_steps,vocab_size]),axis = 1)738 739 caption_ans = tf.unstack(self.caption[:,1:],axis = 1) 740 741 caption_ans_mask = tf.unstack(self.caption_mask[:,1:],axis = 1) 742 caption_loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(onehot_word_logits,743 caption_ans,744 caption_ans_mask)745 746747 #################### second stage #######################748 749 with tf.variable_scope("second_att_lstm"):750 second_att_lstm = tf.contrib.rnn.LSTMCell(dim_hidden)751 second_att_lstm = tf.contrib.rnn.DropoutWrapper(second_att_lstm,input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob)752 with tf.variable_scope("second_cap_lstm"):753 second_cap_lstm = tf.contrib.rnn.LSTMCell(dim_hidden)754 second_cap_lstm = tf.contrib.rnn.DropoutWrapper(second_att_lstm,input_keep_prob=self.keep_prob, output_keep_prob=self.keep_prob) 755 second_att_state = (tf.zeros([self.batch_size, dim_hidden]),tf.zeros([self.batch_size, dim_hidden]))756 second_cap_state = (tf.zeros([self.batch_size, dim_hidden]),tf.zeros([self.batch_size, dim_hidden]))757 758 ##################### Computing Graph ########################759 760 frame_flat = tf.reshape(self.frame, [-1, frame_feat_dim])761 frame_embedding = tf.nn.xw_plus_b( frame_flat, w_frame_embed, b_frame_embed )762 frame_embedding = tf.reshape(frame_embedding, [self.batch_size, frame_steps, dim_hidden]) 763 764 second_enc_lstm_outputs = []765 second_dec_lstm_outputs = []766 ## Encoding stage767 for i in range(caption_steps):768769 with tf.variable_scope('second_att_lstm'):770 if i > 0:771 tf.get_variable_scope().reuse_variables()772 word_index = tf.cond(self.scheduled_sampling_prob >= tf.random_uniform([], 0, 1),773 lambda: self.caption[:, i+1],774 lambda: tf.argmax(dec_lstm_outputs[i], axis=1)*tf.cast(self.caption_mask[:,i+1],tf.int64))775 with tf.device('/cpu:0'):776 word_embed = tf.nn.embedding_lookup(embedding,word_index)777 output1, second_att_state = second_att_lstm(word_embed, second_att_state)778 ##input shape of cap_lstm2: [batch_size, 2*dim_hidden]779 with tf.variable_scope('second_cap_lstm'):780 if i > 0:781 tf.get_variable_scope().reuse_variables()782 output2, cap_state = cap_lstm(tf.concat([padding, output1], 1), second_cap_state)783 second_enc_lstm_outputs.append(output2)784 785 ## second_Decoding stage786 prev_step_word = second_enc_lstm_outputs[-1]787 for i in range(self.frame_steps):788 with tf.variable_scope('second_att_lstm'):789 tf.get_variable_scope().reuse_variables()790 output1, second_att_state = second_att_lstm(padding, second_att_state)791 792 with tf.variable_scope('second_cap_lstm'):793 tf.get_variable_scope().reuse_variables()794 output2, second_cap_state = second_cap_lstm(tf.concat([prev_step_word,output1],1),second_cap_state)795 prev_step_word = tf.nn.xw_plus_b(self.frame[:,i,:],w_frame_embed,b_frame_embed)796 second_dec_lstm_outputs.append(tf.nn.xw_plus_b(output2,w_reframe_embed,b_reframe_embed))797798 second_dec_lstm_outputs = tf.reshape(tf.concat(second_dec_lstm_outputs , 1),[-1,frame_feat_dim])799 frame_loss = tf.reduce_mean(tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(second_dec_lstm_outputs, frame_flat)),-1)))800801802803 ratio = 0.7804 self.cost = ratio*tf.reduce_mean(caption_loss)+(1-ratio)*frame_loss805 #self.global_step = tf.Variable(0, trainable=False)806 self.train_op = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(self.cost, global_step=self.global_step)807 self.train_conv_op = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(self.cost, var_list=[w_frame_embed,b_frame_embed,w_reframe_embed,b_reframe_embed,embedding,w_word_onehot,b_word_onehot],global_step=self.global_step) 808809 config = tf.ConfigProto(log_device_placement = True)810 config.gpu_options.allow_growth = True811 812 self.sess = tf.Session(config=config)813814 def train(self, input_frame, input_caption, input_caption_mask, keep_prob=0.5, scheduled_sampling_prob=0.0):815 if scheduled_sampling_prob == 1:816 _,cost = self.sess.run([self.train_conv_op,self.cost],feed_dict={self.frame:input_frame, 817 self.caption:input_caption, 818 self.caption_mask:input_caption_mask,819 self.train_state:True,820 self.scheduled_sampling_prob:scheduled_sampling_prob,821 self.keep_prob:keep_prob})822 else:823 _,cost = self.sess.run([self.train_op,self.cost],feed_dict={self.frame:input_frame, 824 self.caption:input_caption, 825 self.caption_mask:input_caption_mask,826 self.train_state:True,827 self.scheduled_sampling_prob:scheduled_sampling_prob,828 self.keep_prob:keep_prob})829 830 return cost831 832 def predict(self, input_frame):833 padding = np.zeros([input_frame.shape[0], self.caption_steps + 1])834 words = self.sess.run([self.predict_result], feed_dict={self.frame: input_frame,835 self.caption: padding,836 self.train_state: False,837 self.scheduled_sampling_prob: 1.0,838 self.keep_prob: 1.0})839 return words840 def initialize(self):841 self.sess.run(tf.global_variables_initializer()) 842843 def saveModel(self,filepath):844 global_step = self.sess.run(self.global_step)845 saver = tf.train.Saver()846 saver.save(self.sess, './'+filepath+'_para/model_%d.ckpt' % (global_step))847 848 def loadModel(self, model_path):849 saver = tf.train.Saver(restore_sequentially=True)850 saver.restore(self.sess, model_path)
...
layers.py
Source:layers.py
1import tensorflow as tf2import numpy as np3def lrelu(x):4 return tf.maximum(x*0.2,x)5def upsample_and_concat_c(x1, x2, output_channels, in_channels, scope,reuse=False):6 with tf.variable_scope(scope,reuse=reuse):7 pool_size = 28 deconv_filter = tf.get_variable(shape= [pool_size, pool_size, output_channels, in_channels],initializer=tf.truncated_normal_initializer(stddev=0.02),name='dcf')9 deconv = tf.nn.conv2d_transpose(x1, deconv_filter, tf.shape(x2) , strides=[1, pool_size, pool_size, 1] )10 deconv_output = tf.concat([deconv, x2],3)11 # deconv_output.set_shape([None, None, None, output_channels*2])12 return deconv_output13def affine_mapping(x,in_chan,out_chan,scope,reuse=False):14 with tf.variable_scope(scope,reuse=reuse):15 mapping=tf.get_variable(shape=[in_chan+1,out_chan],initializer=tf.truncated_normal_initializer(0.0,1.0),dtype=tf.float32,name='mapping')16 x_pixels = tf.reshape(x, [-1, in_chan])17 bias = tf.ones_like(x_pixels[:,0:1])18 x_pixels = tf.concat([x_pixels,bias],1)19 x_pixels = tf.matmul(x_pixels, mapping)20 shape = tf.shape(x)21 return tf.reshape(x_pixels, [shape[0],shape[1],shape[2],out_chan])22def coeff_estimate(x,chan,scope,reuse=False):23 with tf.variable_scope(scope,reuse=reuse):24 shape = tf.shape(x)25 conv0 = slim.conv2d(current,chan,[3,3], activation_fn=lrelu,scope='g_conv0',reuse=reuse)26 pool0=slim.max_pool2d(conv0, [2, 2], padding='SAME',scope='pool0')27 conv1 = slim.conv2d(pool0,chan,[3,3], activation_fn=lrelu,scope='g_conv1',reuse=reuse)28 pool1=slim.max_pool2d(conv1, [2, 2], padding='SAME',scope='pool1')29 conv2 = slim.conv2d(pool1,chan,[3,3], activation_fn=lrelu,scope='g_conv2',reuse=reuse)30 pool2=slim.max_pool2d(conv2, [2, 2], padding='SAME',scope='pool2')31 conv3 = slim.conv2d(pool3,chan,[3,3], activation_fn=lrelu,scope='conv0',reuse=reuse)32 conv3 = slim.conv2d(conv3,chan,[3,3], activation_fn=lrelu,scope='conv1',reuse=reuse)33 dconv2 = upsample_and_concat_c( conv3, conv2, chan, chan, scope='uac2',reuse=reuse ) 34 dconv2 = slim.conv2d(dconv2,chan,[3,3], activation_fn=lrelu,scope='d_conv2',reuse=reuse)35 dconv1 = upsample_and_concat_c( dconv2, conv1, chan, chan, scope='uac1',reuse=reuse ) 36 dconv1 = slim.conv2d(dconv1,chan,[3,3], activation_fn=lrelu,scope='d_conv1',reuse=reuse)37 dconv0 = upsample_and_concat_c( dconv1, conv0, chan, chan, scope='uac0',reuse=reuse ) 38 dconv0 = slim.conv2d(dconv0,chan,[3,3], activation_fn=lrelu,scope='d_conv0',reuse=reuse)39 return dconv040'''41def perpixel_conv(fp,coef,chan,scope,reuse=False):42 with tf.variable_scope(scope,reuse=reuse):43 padding = 144 paddings = [[0,0],[padding,padding],[padding,padding],[0,0]]45 fp = tf.pad(fp,paddings,name='pad')46 result0 = fp[:,0:-2,0:-2,:]*coef[:,:,:,:chan]47 result1 = fp[:,1:-1,0:-2,:]*coef[:,:,:,chan:chan*2]48 result2 = fp[:,2:,0:-2,:]*coef[:,:,:,chan*2:chan*3]49 result3 = fp[:,0:-2,1:-1,:]*coef[:,:,:,chan*3:chan*4]50 result4 = fp[:,1:-1,1:-1,:]*coef[:,:,:,chan*4:chan*5]51 result5 = fp[:,2:,1:-1,:]*coef[:,:,:,chan*5:chan*6]52 result6 = fp[:,0:-2,2:,:]*coef[:,:,:,chan*6:chan*7]53 result7 = fp[:,1:-1,2:,:]*coef[:,:,:,chan*7:chan*8]54 result8 = fp[:,2:,2:,:]*coef[:,:,:,chan*8:chan*9]55 result = result0 + result1 + result2 + result3 + result4 + result5 + result6 + result7 + result8 + coef[:,:,:,chan*9:chan*10]56 return lrelu(result)57'''58def perpixel_affine(fp,coef,chan,scope,reuse=False):59 with tf.variable_scope(scope,reuse=reuse):60 padding = 161 paddings = [[0,0],[padding,padding],[padding,padding],[0,0]]62 fp = tf.pad(fp,paddings,name='pad')63 result0 = fp[:,0:-2,0:-2,:]*coef[:,:,:,:chan]64 result1 = fp[:,1:-1,0:-2,:]*coef[:,:,:,chan:chan*2]65 result2 = fp[:,2:,0:-2,:]*coef[:,:,:,chan*2:chan*3]66 result3 = fp[:,0:-2,1:-1,:]*coef[:,:,:,chan*3:chan*4]67 result4 = fp[:,1:-1,1:-1,:]*coef[:,:,:,chan*4:chan*5]68 result5 = fp[:,2:,1:-1,:]*coef[:,:,:,chan*5:chan*6]69 result6 = fp[:,0:-2,2:,:]*coef[:,:,:,chan*6:chan*7]70 result7 = fp[:,1:-1,2:,:]*coef[:,:,:,chan*7:chan*8]71 result8 = fp[:,2:,2:,:]*coef[:,:,:,chan*8:chan*9]72 result = result0 + result1 + result2 + result3 + result4 + result5 + result6 + result7 + result8 + coef[:,:,:,chan*9:chan*10]73 return lrelu(result)74def guided_upsampling(input_ftmp,guide_ftmp):75 # input_ftmp must be a [Batch,H,W,Intensity,Channel] shaped feature map76 # guide_ftmp must be a [Batch,H*factor,W*factor,1] shaped feature map77 def get_pixel_value(img, x, y, z):78 ## Getting parameters79 batch_size = tf.shape(img)[0]80 height = tf.shape(x)[0]81 width = tf.shape(x)[1]82 ## Preprocessing83 x = tf.cast(x,dtype=tf.int32)84 y = tf.cast(y,dtype=tf.int32)85 z = tf.cast(z,dtype=tf.int32)86 x = tf.expand_dims(x,0)87 y = tf.expand_dims(y,0)88 z = tf.expand_dims(z,0)89 x = tf.tile(x,[batch_size,1,1])90 y = tf.tile(y,[batch_size,1,1]) # x,y,z = [b,h,w]91 z = tf.tile(z,[batch_size,1,1])92 # Then b93 batch_idx = tf.range(0, batch_size)94 batch_idx = tf.reshape(batch_idx, (batch_size,1,1))95 b = tf.tile(batch_idx, (1, height, width)) # b = [b,h,w]96 97 indices = tf.stack([b, y, x, z], 3) 98 return tf.gather_nd(img, indices)99 100 ##### Do the job101 shape = tf.shape(input_ftmp)102 height = shape[1]103 width = shape[2]104 intensity = shape[3]105 height_s,width_s,intensity_s = tf.cast(height,dtype=tf.float32),tf.cast(width,dtype=tf.float32),tf.cast(intensity,dtype=tf.float32)106 new_shape = tf.shape(guide_ftmp)107 new_height = new_shape[1]108 new_width = new_shape[2]109 # create meshgrid110 x = tf.linspace(0.0, 1.0, new_width) 111 y = tf.linspace(0.0, 1.0, new_height)112 x_t, y_t = tf.meshgrid(x, y)113 z_t = guide_ftmp[0,:,:,0]114 # Transform the coords115 x_te = x_t*(width_s-1.0)116 y_te = y_t*(height_s-1.0)117 z_te = z_t*(intensity_s-1.0)118 # 8 neighborhood119 x0 = tf.floor(x_te)120 x1 = x0 + 1.0121 y0 = tf.floor(y_te)122 y1 = y0 + 1.0123 z0 = tf.floor(z_te)124 z1 = z0 + 1.0125 x0 = tf.clip_by_value(x0, 0.0, width_s-1.0)126 x1 = tf.clip_by_value(x1, 0.0, width_s-1.0)127 y0 = tf.clip_by_value(y0, 0.0, height_s-1.0)128 y1 = tf.clip_by_value(y1, 0.0, height_s-1.0)129 z0 = tf.clip_by_value(z0, 0.0, intensity_s-1.0)130 z1 = tf.clip_by_value(z1, 0.0, intensity_s-1.0)131 Ia = get_pixel_value(input_ftmp, x0, y0,z0)132 Ib = get_pixel_value(input_ftmp, x0, y0,z1)133 Ic = get_pixel_value(input_ftmp, x1, y0,z0)134 Id = get_pixel_value(input_ftmp, x1, y0,z1)135 Ie = get_pixel_value(input_ftmp, x0, y1,z0)136 If = get_pixel_value(input_ftmp, x0, y1,z1)137 Ig = get_pixel_value(input_ftmp, x1, y1,z0)138 Ih = get_pixel_value(input_ftmp, x1, y1,z1)139 wa = tf.maximum(1.0-tf.abs(x0-x_te),0.0) * tf.maximum(1.0-tf.abs(y0-y_te),0.0) * tf.maximum(1.0-tf.abs(z0-z_te),0.0)140 wb = tf.maximum(1.0-tf.abs(x0-x_te),0.0) * tf.maximum(1.0-tf.abs(y0-y_te),0.0) * tf.maximum(1.0-tf.abs(z1-z_te),0.0)141 wc = tf.maximum(1.0-tf.abs(x1-x_te),0.0) * tf.maximum(1.0-tf.abs(y0-y_te),0.0) * tf.maximum(1.0-tf.abs(z0-z_te),0.0)142 wd = tf.maximum(1.0-tf.abs(x1-x_te),0.0) * tf.maximum(1.0-tf.abs(y0-y_te),0.0) * tf.maximum(1.0-tf.abs(z1-z_te),0.0)143 we = tf.maximum(1.0-tf.abs(x0-x_te),0.0) * tf.maximum(1.0-tf.abs(y1-y_te),0.0) * tf.maximum(1.0-tf.abs(z0-z_te),0.0)144 wf = tf.maximum(1.0-tf.abs(x0-x_te),0.0) * tf.maximum(1.0-tf.abs(y1-y_te),0.0) * tf.maximum(1.0-tf.abs(z1-z_te),0.0)145 wg = tf.maximum(1.0-tf.abs(x1-x_te),0.0) * tf.maximum(1.0-tf.abs(y1-y_te),0.0) * tf.maximum(1.0-tf.abs(z0-z_te),0.0)146 wh = tf.maximum(1.0-tf.abs(x1-x_te),0.0) * tf.maximum(1.0-tf.abs(y1-y_te),0.0) * tf.maximum(1.0-tf.abs(z1-z_te),0.0)147 wa = tf.expand_dims(tf.expand_dims(wa, axis=0),3)148 wb = tf.expand_dims(tf.expand_dims(wb, axis=0),3)149 wc = tf.expand_dims(tf.expand_dims(wc, axis=0),3)150 wd = tf.expand_dims(tf.expand_dims(wd, axis=0),3)151 we = tf.expand_dims(tf.expand_dims(we, axis=0),3)152 wf = tf.expand_dims(tf.expand_dims(wf, axis=0),3)153 wg = tf.expand_dims(tf.expand_dims(wg, axis=0),3)154 wh = tf.expand_dims(tf.expand_dims(wh, axis=0),3)155 out = tf.add_n([wa*Ia, wb*Ib, wc*Ic, wd*Id, we*Ie, wf*If, wg*Ig, wh*Ih])156 return out157def gaussian_func(x1,x2,sigma):158 return tf.exp(-1.0*((x1-x2)**2.0)/(2.0*(sigma**2.0)))159def bilateral_joint_upsampling(input_ftmp,guide_ftmp,factor_g=0.2,factor_s=1.0,scope=None,reuse=False):160 with tf.variable_scope(scope,reuse=reuse):161 shape = tf.shape(input_ftmp)162 batchsize = shape[0]163 height = shape[1]164 width = shape[2]165 height_s,width_s = tf.cast(height,dtype=tf.float32),tf.cast(width,dtype=tf.float32)166 new_shape = tf.shape(guide_ftmp)167 new_height = new_shape[1]168 new_width = new_shape[2]169 new_height_s,new_width_s = tf.cast(new_height,dtype=tf.float32),tf.cast(new_width,dtype=tf.float32)170 x = tf.linspace(0.0, 1.0, new_width) 171 y = tf.linspace(0.0, 1.0, new_height)172 xt, yt = tf.meshgrid(x, y)173 xt = tf.tile(tf.expand_dims(tf.expand_dims(xt,0),3),[batchsize,1,1,1])174 yt = tf.tile(tf.expand_dims(tf.expand_dims(yt,0),3),[batchsize,1,1,1])175 176 ## Spatial 177 xd = tf.clip_by_value((width_s-1.0)*xt, 0.0, width_s-1.0)178 yd = tf.clip_by_value((height_s-1.0)*yt, 0.0, height_s-1.0)179 xd0 = tf.floor(xd)180 xd1 = xd0 + 1.0181 yd0 = tf.floor(yd)182 yd1 = yd0 + 1.0183 xd0 = tf.clip_by_value(xd0,0.0, width_s-1.0)184 xd1 = tf.clip_by_value(xd1,0.0, width_s-1.0)185 yd0 = tf.clip_by_value(yd0,0.0, height_s-1.0)186 yd1 = tf.clip_by_value(yd1,0.0, height_s-1.0)187 batch_idx = tf.range(0, batchsize)188 batch_idx = tf.reshape(batch_idx, (batchsize,1,1,1))189 bd = tf.tile(batch_idx, (1, new_height, new_width,1))190 191 indices00 = tf.concat([bd,tf.cast(yd0,tf.int32),tf.cast(xd0,tf.int32)],3)192 indices01 = tf.concat([bd,tf.cast(yd0,tf.int32),tf.cast(xd1,tf.int32)],3)193 indices10 = tf.concat([bd,tf.cast(yd1,tf.int32),tf.cast(xd0,tf.int32)],3)194 indices11 = tf.concat([bd,tf.cast(yd1,tf.int32),tf.cast(xd1,tf.int32)],3)195 196 I00 = tf.gather_nd(input_ftmp, indices00)197 I01 = tf.gather_nd(input_ftmp, indices01)198 I10 = tf.gather_nd(input_ftmp, indices10)199 I11 = tf.gather_nd(input_ftmp, indices11)200 201 #ws00 = gaussian_func(yd0,yd,factor_s) * gaussian_func(xd0,xd,factor_s)202 #ws01 = gaussian_func(yd0,yd,factor_s) * gaussian_func(xd1,xd,factor_s) 203 #ws10 = gaussian_func(yd1,yd,factor_s) * gaussian_func(xd0,xd,factor_s)204 #ws11 = gaussian_func(yd1,yd,factor_s) * gaussian_func(xd1,xd,factor_s)205 ws00 = tf.maximum(1.0-factor_s*tf.abs(yd0-yd),0.0)*tf.maximum(1.0-factor_s*tf.abs(xd0-xd),0.0)206 ws01 = tf.maximum(1.0-factor_s*tf.abs(yd0-yd),0.0)*tf.maximum(1.0-factor_s*tf.abs(xd1-xd),0.0)207 ws10 = tf.maximum(1.0-factor_s*tf.abs(yd1-yd),0.0)*tf.maximum(1.0-factor_s*tf.abs(xd0-xd),0.0)208 ws11 = tf.maximum(1.0-factor_s*tf.abs(yd1-yd),0.0)*tf.maximum(1.0-factor_s*tf.abs(xd1-xd),0.0)209 210 ## Guide211 xu = tf.clip_by_value((new_width_s-1.0)*xt, 0.0, new_width_s-1.0)212 yu = tf.clip_by_value((new_height_s-1.0)*yt, 0.0, new_height_s-1.0)213 xu0 = tf.clip_by_value((new_width_s-1.0)*xd0/(width_s-1.0), 0.0, new_width_s-1.0)214 xu1 = tf.clip_by_value((new_width_s-1.0)*xd1/(width_s-1.0), 0.0, new_width_s-1.0)215 yu0 = tf.clip_by_value((new_height_s-1.0)*yd0/(height_s-1.0), 0.0, new_height_s-1.0)216 yu1 = tf.clip_by_value((new_height_s-1.0)*yd1/(height_s-1.0), 0.0, new_height_s-1.0)217 bu = tf.tile(batch_idx, (1, new_height, new_width,1))218 219 indices00 = tf.concat([bu,tf.cast(yu0,tf.int32),tf.cast(xu0,tf.int32)],3)220 indices01 = tf.concat([bu,tf.cast(yu0,tf.int32),tf.cast(xu1,tf.int32)],3)221 indices10 = tf.concat([bu,tf.cast(yu1,tf.int32),tf.cast(xu0,tf.int32)],3)222 indices11 = tf.concat([bu,tf.cast(yu1,tf.int32),tf.cast(xu1,tf.int32)],3)223 indicestt = tf.concat([bu,tf.cast(yu,tf.int32),tf.cast(xu,tf.int32)],3)224 225 guide00 = tf.gather_nd(guide_ftmp, indices00)226 guide01 = tf.gather_nd(guide_ftmp, indices01)227 guide10 = tf.gather_nd(guide_ftmp, indices10)228 guide11 = tf.gather_nd(guide_ftmp, indices11) 229 guidett = tf.gather_nd(guide_ftmp, indicestt) 230 231 #factor_g=tf.get_variable(shape=[],initializer=tf.constant_initializer(factor_g),dtype=tf.float32,name='factor_g')232 wg00 = gaussian_func(guide00,guidett,factor_g) 233 wg01 = gaussian_func(guide01,guidett,factor_g) 234 wg10 = gaussian_func(guide10,guidett,factor_g) 235 wg11 = gaussian_func(guide11,guidett,factor_g)236 237 ## Final mearged238 weight00 = ws00*wg00239 weight01 = ws01*wg01240 weight10 = ws10*wg10241 weight11 = ws11*wg11242 weight_sum = weight00 + weight01 + weight10 + weight11 + 0.01243 I00 = I00*weight00244 I01 = I01*weight01245 I10 = I10*weight10246 I11 = I11*weight11247 return tf.add_n([I00,I01,I10,I11])/weight_sum248def bilateral_joint_upsampling_lin(input_ftmp,guide_ftmp,factor_g=5.0,factor_s=1.0,scope=None,reuse=False):249 with tf.variable_scope(scope,reuse=reuse):250 shape = tf.shape(input_ftmp)251 batchsize = shape[0]252 height = shape[1]253 width = shape[2]254 height_s,width_s = tf.cast(height,dtype=tf.float32),tf.cast(width,dtype=tf.float32)255 new_shape = tf.shape(guide_ftmp)256 new_height = new_shape[1]257 new_width = new_shape[2]258 new_height_s,new_width_s = tf.cast(new_height,dtype=tf.float32),tf.cast(new_width,dtype=tf.float32)259 x = tf.linspace(0.0, 1.0, new_width) 260 y = tf.linspace(0.0, 1.0, new_height)261 xt, yt = tf.meshgrid(x, y)262 xt = tf.tile(tf.expand_dims(tf.expand_dims(xt,0),3),[batchsize,1,1,1])263 yt = tf.tile(tf.expand_dims(tf.expand_dims(yt,0),3),[batchsize,1,1,1])264 265 ## Spatial 266 xd = tf.clip_by_value((width_s-1.0)*xt, 0.0, width_s-1.0)267 yd = tf.clip_by_value((height_s-1.0)*yt, 0.0, height_s-1.0)268 xd0 = tf.floor(xd)269 xd1 = xd0 + 1.0270 yd0 = tf.floor(yd)271 yd1 = yd0 + 1.0272 xd0 = tf.clip_by_value(xd0,0.0, width_s-1.0)273 xd1 = tf.clip_by_value(xd1,0.0, width_s-1.0)274 yd0 = tf.clip_by_value(yd0,0.0, height_s-1.0)275 yd1 = tf.clip_by_value(yd1,0.0, height_s-1.0)276 batch_idx = tf.range(0, batchsize)277 batch_idx = tf.reshape(batch_idx, (batchsize,1,1,1))278 bd = tf.tile(batch_idx, (1, new_height, new_width,1))279 280 indices00 = tf.concat([bd,tf.cast(yd0,tf.int32),tf.cast(xd0,tf.int32)],3)281 indices01 = tf.concat([bd,tf.cast(yd0,tf.int32),tf.cast(xd1,tf.int32)],3)282 indices10 = tf.concat([bd,tf.cast(yd1,tf.int32),tf.cast(xd0,tf.int32)],3)283 indices11 = tf.concat([bd,tf.cast(yd1,tf.int32),tf.cast(xd1,tf.int32)],3)284 285 I00 = tf.gather_nd(input_ftmp, indices00)286 I01 = tf.gather_nd(input_ftmp, indices01)287 I10 = tf.gather_nd(input_ftmp, indices10)288 I11 = tf.gather_nd(input_ftmp, indices11)289 290 ws00 = tf.maximum(1.0-factor_s*tf.abs(yd0-yd),0.0)*tf.maximum(1.0-factor_s*tf.abs(xd0-xd),0.0)291 ws01 = tf.maximum(1.0-factor_s*tf.abs(yd0-yd),0.0)*tf.maximum(1.0-factor_s*tf.abs(xd1-xd),0.0)292 ws10 = tf.maximum(1.0-factor_s*tf.abs(yd1-yd),0.0)*tf.maximum(1.0-factor_s*tf.abs(xd0-xd),0.0)293 ws11 = tf.maximum(1.0-factor_s*tf.abs(yd1-yd),0.0)*tf.maximum(1.0-factor_s*tf.abs(xd1-xd),0.0)294 295 ## Guide296 xu = tf.clip_by_value((new_width_s-1.0)*xt, 0.0, new_width_s-1.0)297 yu = tf.clip_by_value((new_height_s-1.0)*yt, 0.0, new_height_s-1.0)298 xu0 = tf.clip_by_value((new_width_s-1.0)*xd0/(width_s-1.0), 0.0, new_width_s-1.0)299 xu1 = tf.clip_by_value((new_width_s-1.0)*xd1/(width_s-1.0), 0.0, new_width_s-1.0)300 yu0 = tf.clip_by_value((new_height_s-1.0)*yd0/(height_s-1.0), 0.0, new_height_s-1.0)301 yu1 = tf.clip_by_value((new_height_s-1.0)*yd1/(height_s-1.0), 0.0, new_height_s-1.0)302 bu = tf.tile(batch_idx, (1, new_height, new_width,1))303 304 indices00 = tf.concat([bu,tf.cast(yu0,tf.int32),tf.cast(xu0,tf.int32)],3)305 indices01 = tf.concat([bu,tf.cast(yu0,tf.int32),tf.cast(xu1,tf.int32)],3)306 indices10 = tf.concat([bu,tf.cast(yu1,tf.int32),tf.cast(xu0,tf.int32)],3)307 indices11 = tf.concat([bu,tf.cast(yu1,tf.int32),tf.cast(xu1,tf.int32)],3)308 indicestt = tf.concat([bu,tf.cast(yu,tf.int32),tf.cast(xu,tf.int32)],3)309 310 guide00 = tf.gather_nd(guide_ftmp, indices00)311 guide01 = tf.gather_nd(guide_ftmp, indices01)312 guide10 = tf.gather_nd(guide_ftmp, indices10)313 guide11 = tf.gather_nd(guide_ftmp, indices11) 314 guidett = tf.gather_nd(guide_ftmp, indicestt) 315 316 #factor_g=tf.get_variable(shape=[],initializer=tf.constant_initializer(factor_g),dtype=tf.float32,name='factor_g')317 wg00 = tf.maximum(1.0-factor_g*tf.abs(guide00-guidett),0.0)318 wg01 = tf.maximum(1.0-factor_g*tf.abs(guide01-guidett),0.0)319 wg10 = tf.maximum(1.0-factor_g*tf.abs(guide10-guidett),0.0)320 wg11 = tf.maximum(1.0-factor_g*tf.abs(guide11-guidett),0.0)321 322 ## Final mearged323 weight00 = ws00*wg00324 weight01 = ws01*wg01325 weight10 = ws10*wg10326 weight11 = ws11*wg11327 I00 = I00*weight00328 I01 = I01*weight01329 I10 = I10*weight10330 I11 = I11*weight11331 weight_sum = weight00 + weight01 + weight10 + weight11 + 0.0001332 return tf.add_n([I00,I01,I10,I11])/weight_sum333def spatial_conv(x,coef):334 x_pad = tf.pad(x,[[0,0],[1,1],[1,1],[0,0]],mode="REFLECT")335 inner0 = x_pad[:,0:-2,1:-1,0:1] * coef[:,:,:,0:1]336 inner1 = x_pad[:,1:-1,0:-2,0:1] * coef[:,:,:,1:2]337 inner2 = x_pad[:,1:-1,1:-1,0:1] * coef[:,:,:,2:3]338 inner3 = x_pad[:,1:-1,2:,0:1] * coef[:,:,:,3:4]339 inner4 = x_pad[:,2:,1:-1,0:1] * coef[:,:,:,4:5]340 return inner0+inner1+inner2+inner3+inner4...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!