I have been playing around with a LSTM Tensorflow model (sentence summarization) and got it to the point where it is doing a fairly good job. But when I try to import the saved Model and use it interactively it doesn't produce any output.
My original code was using checkpoints, but I switched to SavedModelBuilder() because I thought it might be easier to work with.
import tensorflow as tf
from Seq2SeqModel import Seq2SeqModel
import utils
from time import gmtime, strftime
import sys, os
print("="*60)
DATADIR = "../DATASETS/sentence-compression-master/data/"
INPUT_DATA = DATADIR + "long_sentences.txt"
OUTPUT_DATA = DATADIR + "short_sentences.txt"
with open(INPUT_DATA, 'r', encoding='utf-8') as f:
in_sentences = f.readlines()
with open(OUTPUT_DATA, 'r', encoding='utf-8') as f:
out_sentences = f.readlines()
in_chars = []
TEMP_LINE = " abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ[]()-_,.:;!?0123456789$+'\""
for char in TEMP_LINE:
in_chars.append(char)
char_to_int = dict()
int_to_char = dict()
for i,char in enumerate(in_chars):
char_to_int[char] = i
int_to_char[i] = char
num_in_chars = len(in_chars)
max_in_chars_per_sample = max([len(sample) for sample in in_sentences])
max_out_chars_per_sample = max([len(sample) for sample in out_sentences])
num_samples = len(in_sentences)
print(strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " Create our Training Data")
X_train = []
y_train = []
for sent in in_sentences:
id_sent = []
for mychar in sent:
if mychar in in_chars:
vocab_id = char_to_int[mychar]
id_sent += [vocab_id]
X_train += [id_sent]
for sent in out_sentences:
id_sent = []
for mychar in sent:
if mychar in in_chars:
vocab_id = char_to_int[mychar]
id_sent += [vocab_id]
y_train += [id_sent]
print(strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " Train our Model")
step = 0
batch_size = 32
max_batches = int(len(X_train) / batch_size)
batches_in_epoch = 1
epoch_be_saved = 1
my_beam_width = 3
CHKPT_FOUND = False
for line in open("../models/checkpoint"):
if len(line) > 1:
data = line.split('"')
checkpoint = "../models/" + data[1]
data2 = data[1].split("-")
step = int(data2[1])
CHKPT_FOUND = True
print("Selecting previous checkpoint ../models/nmt.ckpt-" + str(step) + ".index")
g = tf.Graph()
with g.as_default():
model = Seq2SeqModel(encoder_num_units = 512, decoder_num_units = 512, embedding_size = 512, num_layers = 2,
vocab_size = num_in_chars, batch_size = batch_size, bidirectional = False, attention = True,
beam_search = True, beam_width = my_beam_width, mode = "Train")
print(strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " model constructed.")
builder = tf.saved_model.builder.SavedModelBuilder('./SavedModel/')
with tf.Session(config=tf.ConfigProto()) as sess:
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
summary_writer = tf.summary.FileWriter('../log', graph = sess.graph)
if CHKPT_FOUND == True:
print('loading previous checkpoint [' + checkpoint + ']')
saver.restore(sess, checkpoint)
builder.add_meta_graph_and_variables(sess, [tf.saved_model.tag_constants.SERVING], signature_def_map=None, assets_collection=None)
builder.save()
print('start training.')
for _epoch in range(1, batches_in_epoch + 1):
for _batch in range(max_batches + 1):
if step % 5 == 0:
print(strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " Processing Batch " + str(_batch) + " of " + str(max_batches))
X, y = utils.input_generator(X_train, y_train, batch_size)
feed_dict = model.make_train_inputs(x = X, y = y)
_, l, train_sentences, summary_str = sess.run([model.train_op, model.loss, model.decoder_predictions_train, model.summary_op], feed_dict)
summary_writer.add_summary(summary_str, _epoch * _batch)
if step == 0 or step % 25 == 0:
print(strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " Step {}".format(step))
print(' minibatch loss: {}'.format(sess.run(model.loss, feed_dict)))
for i in range(1):
print('train logits:')
train_sentence = ''
for mychar in train_sentences[i]:
train_sentence += str(int_to_char[mychar])
print(train_sentence)
print(' ')
if step % 100 == 0:
saver.save(sess, '../models/' + 'nmt.ckpt', global_step = step)
print(strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " model saved at step = " + str(step))
step += 1
print(strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " epoch finished")
if _epoch % epoch_be_saved == 0:
tf.saved_model.simple_save(sess, 'models/0', inputs, outputs)
saver.save(sess, '../models/' + 'nmt.ckpt', global_step = step)
print(strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " model saved at step = " + str(step))
print(strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " Finished Training")
I was then able to load my model in to another script, but when I try to do anything with it all I get are empty output arrays.
import tensorflow as tf
from tensorflow.python.saved_model import tag_constants
from Seq2SeqModel import Seq2SeqModel
import utils
from time import gmtime, strftime
import sys, os
in_sentences = []
out_sentences = []
in_chars = []
TEMP_LINE = " abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ[]()-_,.:;!?0123456789$+'\""
for char in TEMP_LINE:
in_chars.append(char)
char_to_int = dict()
int_to_char = dict()
for i,char in enumerate(in_chars):
char_to_int[char] = i
int_to_char[i] = char
num_in_chars = len(in_chars)
checkpoint = 'models/0'
step = 0
batch_size = 32
max_batches = 1
batches_in_epoch = 1
epoch_be_saved = 1
my_beam_width = 3
temperature = 1
top_k = 0
g = tf.Graph()
with g.as_default():
model = Seq2SeqModel(encoder_num_units = 512, decoder_num_units = 512, embedding_size = 512, num_layers = 2,
vocab_size = num_in_chars, batch_size = batch_size, bidirectional = False, attention = True,
beam_search = True,
beam_width = my_beam_width,
mode = "Infer"
)
print(strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " model constructed.")
gvars = g.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
assign_ops = [g.get_operation_by_name(v.op.name + "/Assign") for v in gvars]
init_values = [assign_op.inputs[1] for assign_op in assign_ops]
with tf.Session(config=tf.ConfigProto()) as sess:
tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING], './SavedModel/')
print(strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " model loaded.")
context_tokens = []
sent = "World renowned Count Olaf performed in a play and got a standing ovation from the crowd."
for mychar in sent:
if mychar in in_chars:
context_tokens.append(char_to_int[mychar])
generated = 0
print(sent)
output = []
feed_dict = model.make_infer_inputs(x = [context_tokens])
print(feed_dict)
out = sess.run(output, feed_dict)
print(output)
print("xxxxxxxxxxxxxxxxxx")
print(out)
out_string = ""
for i in range(len(output)):
out_string += int_to_char[out[i]]
print(out_string)
print("=" * 40)
This gives me the following output:
World renowned Count Olaf performed in a play and got a standing ovation from the crowd.
{<tf.Tensor 'encoder_inputs:0' shape=(?, ?) dtype=int32>: array([[49, 15, 18, 12, 4, 0, 18, 5, 14, 15, 23, 14, 5, 4, 0, 29,
15, 21, 14, 20, 0, 41, 12, 1, 6, 0, 16, 5, 18, 6, 15, 18,
13, 5, 4, 0, 9, 14, 0, 1, 0, 16, 12, 1, 25, 0, 1, 14,
4, 0, 7, 15, 20, 0, 1, 0, 19, 20, 1, 14, 4, 9, 14, 7,
0, 15, 22, 1, 20, 9, 15, 14, 0, 6, 18, 15, 13, 0, 20, 8,
5, 0, 3, 18, 15, 23, 4, 60]]), <tf.Tensor 'encoder_inputs_length:0' shape=(?,) dtype=int32>: [88]}
[]
xxxxxxxxxxxxxxxxxx
[]
My feed_dict looks ok, and I'm probably making some obvious mistake, but I can't find it.
How can I take a trained model, freeze it in place, and then use it to process individual sentences?
I understand that there is a lot I can do to clean up the code, unfortunately as I've been trying different approaches (and still getting nowhere) the code has become a bit sloppy. Please help before it devolves into an unreadable mess :-)
If necessary I can remove some of the code to simplify things, I was just worried someone might ask for the full code for evaluation.
What I have tried:
I've tried following several tutorials online, and going through the toy code for SavedModelBuilder and working with checkpoints but to no avail.