tensorlayer3/tensorlayer/db.py

747 lines
27 KiB
Python

#! /usr/bin/python
# -*- coding: utf-8 -*-
import os
import pickle
import sys
import time
from datetime import datetime
import numpy as np
import tensorflow as tf
import gridfs
import pymongo
from tensorlayer import logging
from tensorlayer.files import (
assign_weights, del_folder, exists_or_mkdir, load_hdf5_to_weights, save_weights_to_hdf5, static_graph2net
)
class TensorHub(object):
"""It is a MongoDB based manager that help you to manage data, network architecture, parameters and logging.
Parameters
-------------
ip : str
Localhost or IP address.
port : int
Port number.
dbname : str
Database name.
username : str or None
User name, set to None if you do not need authentication.
password : str
Password.
project_name : str or None
Experiment key for this entire project, similar with the repository name of Github.
Attributes
------------
ip, port, dbname and other input parameters : see above
See above.
project_name : str
The given project name, if no given, set to the script name.
db : mongodb client
See ``pymongo.MongoClient``.
"""
# @deprecated_alias(db_name='dbname', user_name='username', end_support_version=2.1)
def __init__(
self, ip='localhost', port=27017, dbname='dbname', username='None', password='password', project_name=None
):
self.ip = ip
self.port = port
self.dbname = dbname
self.username = username
print("[Database] Initializing ...")
# connect mongodb
client = pymongo.MongoClient(ip, port)
self.db = client[dbname]
if username is None:
print(username, password)
self.db.authenticate(username, password)
else:
print("[Database] No username given, it works if authentication is not required")
if project_name is None:
self.project_name = sys.argv[0].split('.')[0]
print("[Database] No project_name given, use {}".format(self.project_name))
else:
self.project_name = project_name
# define file system (Buckets)
self.dataset_fs = gridfs.GridFS(self.db, collection="datasetFilesystem")
self.model_fs = gridfs.GridFS(self.db, collection="modelfs")
# self.params_fs = gridfs.GridFS(self.db, collection="parametersFilesystem")
# self.architecture_fs = gridfs.GridFS(self.db, collection="architectureFilesystem")
print("[Database] Connected ")
_s = "[Database] Info:\n"
_s += " ip : {}\n".format(self.ip)
_s += " port : {}\n".format(self.port)
_s += " dbname : {}\n".format(self.dbname)
_s += " username : {}\n".format(self.username)
_s += " password : {}\n".format("*******")
_s += " project_name : {}\n".format(self.project_name)
self._s = _s
print(self._s)
def __str__(self):
"""Print information of databset."""
return self._s
def _fill_project_info(self, args):
"""Fill in project_name for all studies, architectures and parameters."""
return args.update({'project_name': self.project_name})
@staticmethod
def _serialization(ps):
"""Serialize data."""
return pickle.dumps(ps, protocol=pickle.HIGHEST_PROTOCOL) # protocol=2)
# with open('_temp.pkl', 'wb') as file:
# return pickle.dump(ps, file, protocol=pickle.HIGHEST_PROTOCOL)
@staticmethod
def _deserialization(ps):
"""Deseralize data."""
return pickle.loads(ps)
# =========================== MODELS ================================
def save_model(self, network=None, model_name='model', **kwargs):
"""Save model architecture and parameters into database, timestamp will be added automatically.
Parameters
----------
network : TensorLayer Model
TensorLayer Model instance.
model_name : str
The name/key of model.
kwargs : other events
Other events, such as name, accuracy, loss, step number and etc (optinal).
Examples
---------
Save model architecture and parameters into database.
>>> db.save_model(net, accuracy=0.8, loss=2.3, name='second_model')
Load one model with parameters from database (run this in other script)
>>> net = db.find_top_model(accuracy=0.8, loss=2.3)
Find and load the latest model.
>>> net = db.find_top_model(sort=[("time", pymongo.DESCENDING)])
>>> net = db.find_top_model(sort=[("time", -1)])
Find and load the oldest model.
>>> net = db.find_top_model(sort=[("time", pymongo.ASCENDING)])
>>> net = db.find_top_model(sort=[("time", 1)])
Get model information
>>> net._accuracy
... 0.8
Returns
---------
boolean : True for success, False for fail.
"""
kwargs.update({'model_name': model_name})
self._fill_project_info(kwargs) # put project_name into kwargs
# params = network.get_all_params()
params = network.all_weights
s = time.time()
# kwargs.update({'architecture': network.all_graphs, 'time': datetime.utcnow()})
kwargs.update({'architecture': network.config, 'time': datetime.utcnow()})
try:
params_id = self.model_fs.put(self._serialization(params))
kwargs.update({'params_id': params_id, 'time': datetime.utcnow()})
self.db.Model.insert_one(kwargs)
print("[Database] Save model: SUCCESS, took: {}s".format(round(time.time() - s, 2)))
return True
except Exception as e:
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
logging.info("{} {} {} {} {}".format(exc_type, exc_obj, fname, exc_tb.tb_lineno, e))
print("[Database] Save model: FAIL")
return False
def find_top_model(self, sort=None, model_name='model', **kwargs):
"""Finds and returns a model architecture and its parameters from the database which matches the requirement.
Parameters
----------
sort : List of tuple
PyMongo sort comment, search "PyMongo find one sorting" and `collection level operations <http://api.mongodb.com/python/current/api/pymongo/collection.html>`__ for more details.
model_name : str or None
The name/key of model.
kwargs : other events
Other events, such as name, accuracy, loss, step number and etc (optinal).
Examples
---------
- see ``save_model``.
Returns
---------
network : TensorLayer Model
Note that, the returned network contains all information of the document (record), e.g. if you saved accuracy in the document, you can get the accuracy by using ``net._accuracy``.
"""
# print(kwargs) # {}
kwargs.update({'model_name': model_name})
self._fill_project_info(kwargs)
s = time.time()
d = self.db.Model.find_one(filter=kwargs, sort=sort)
# _temp_file_name = '_find_one_model_ztemp_file'
if d is not None:
params_id = d['params_id']
graphs = d['architecture']
_datetime = d['time']
# exists_or_mkdir(_temp_file_name, False)
# with open(os.path.join(_temp_file_name, 'graph.pkl'), 'wb') as file:
# pickle.dump(graphs, file, protocol=pickle.HIGHEST_PROTOCOL)
else:
print("[Database] FAIL! Cannot find model: {}".format(kwargs))
return False
try:
params = self._deserialization(self.model_fs.get(params_id).read())
# TODO : restore model and load weights
network = static_graph2net(graphs)
assign_weights(weights=params, network=network)
# np.savez(os.path.join(_temp_file_name, 'params.npz'), params=params)
#
# network = load_graph_and_params(name=_temp_file_name, sess=sess)
# del_folder(_temp_file_name)
pc = self.db.Model.find(kwargs)
print(
"[Database] Find one model SUCCESS. kwargs:{} sort:{} save time:{} took: {}s".format(
kwargs, sort, _datetime, round(time.time() - s, 2)
)
)
# FIXME : not sure what's this for
# put all informations of model into the TL layer
# for key in d:
# network.__dict__.update({"_%s" % key: d[key]})
# check whether more parameters match the requirement
params_id_list = pc.distinct('params_id')
n_params = len(params_id_list)
if n_params != 1:
print(" Note that there are {} models match the kwargs".format(n_params))
return network
except Exception as e:
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
logging.info("{} {} {} {} {}".format(exc_type, exc_obj, fname, exc_tb.tb_lineno, e))
return False
def delete_model(self, **kwargs):
"""Delete model.
Parameters
-----------
kwargs : logging information
Find items to delete, leave it empty to delete all log.
"""
self._fill_project_info(kwargs)
self.db.Model.delete_many(kwargs)
logging.info("[Database] Delete Model SUCCESS")
# =========================== DATASET ===============================
def save_dataset(self, dataset=None, dataset_name=None, **kwargs):
"""Saves one dataset into database, timestamp will be added automatically.
Parameters
----------
dataset : any type
The dataset you want to store.
dataset_name : str
The name of dataset.
kwargs : other events
Other events, such as description, author and etc (optinal).
Examples
----------
Save dataset
>>> db.save_dataset([X_train, y_train, X_test, y_test], 'mnist', description='this is a tutorial')
Get dataset
>>> dataset = db.find_top_dataset('mnist')
Returns
---------
boolean : Return True if save success, otherwise, return False.
"""
self._fill_project_info(kwargs)
if dataset_name is None:
raise Exception("dataset_name is None, please give a dataset name")
kwargs.update({'dataset_name': dataset_name})
s = time.time()
try:
dataset_id = self.dataset_fs.put(self._serialization(dataset))
kwargs.update({'dataset_id': dataset_id, 'time': datetime.utcnow()})
self.db.Dataset.insert_one(kwargs)
# print("[Database] Save params: {} SUCCESS, took: {}s".format(file_name, round(time.time()-s, 2)))
print("[Database] Save dataset: SUCCESS, took: {}s".format(round(time.time() - s, 2)))
return True
except Exception as e:
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
logging.info("{} {} {} {} {}".format(exc_type, exc_obj, fname, exc_tb.tb_lineno, e))
print("[Database] Save dataset: FAIL")
return False
def find_top_dataset(self, dataset_name=None, sort=None, **kwargs):
"""Finds and returns a dataset from the database which matches the requirement.
Parameters
----------
dataset_name : str
The name of dataset.
sort : List of tuple
PyMongo sort comment, search "PyMongo find one sorting" and `collection level operations <http://api.mongodb.com/python/current/api/pymongo/collection.html>`__ for more details.
kwargs : other events
Other events, such as description, author and etc (optinal).
Examples
---------
Save dataset
>>> db.save_dataset([X_train, y_train, X_test, y_test], 'mnist', description='this is a tutorial')
Get dataset
>>> dataset = db.find_top_dataset('mnist')
>>> datasets = db.find_datasets('mnist')
Returns
--------
dataset : the dataset or False
Return False if nothing found.
"""
self._fill_project_info(kwargs)
if dataset_name is None:
raise Exception("dataset_name is None, please give a dataset name")
kwargs.update({'dataset_name': dataset_name})
s = time.time()
d = self.db.Dataset.find_one(filter=kwargs, sort=sort)
if d is not None:
dataset_id = d['dataset_id']
else:
print("[Database] FAIL! Cannot find dataset: {}".format(kwargs))
return False
try:
dataset = self._deserialization(self.dataset_fs.get(dataset_id).read())
pc = self.db.Dataset.find(kwargs)
print("[Database] Find one dataset SUCCESS, {} took: {}s".format(kwargs, round(time.time() - s, 2)))
# check whether more datasets match the requirement
dataset_id_list = pc.distinct('dataset_id')
n_dataset = len(dataset_id_list)
if n_dataset != 1:
print(" Note that there are {} datasets match the requirement".format(n_dataset))
return dataset
except Exception as e:
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
logging.info("{} {} {} {} {}".format(exc_type, exc_obj, fname, exc_tb.tb_lineno, e))
return False
def find_datasets(self, dataset_name=None, **kwargs):
"""Finds and returns all datasets from the database which matches the requirement.
In some case, the data in a dataset can be stored separately for better management.
Parameters
----------
dataset_name : str
The name/key of dataset.
kwargs : other events
Other events, such as description, author and etc (optional).
Returns
--------
params : the parameters, return False if nothing found.
"""
self._fill_project_info(kwargs)
if dataset_name is None:
raise Exception("dataset_name is None, please give a dataset name")
kwargs.update({'dataset_name': dataset_name})
s = time.time()
pc = self.db.Dataset.find(kwargs)
if pc is not None:
dataset_id_list = pc.distinct('dataset_id')
dataset_list = []
for dataset_id in dataset_id_list: # you may have multiple Buckets files
tmp = self.dataset_fs.get(dataset_id).read()
dataset_list.append(self._deserialization(tmp))
else:
print("[Database] FAIL! Cannot find any dataset: {}".format(kwargs))
return False
print("[Database] Find {} datasets SUCCESS, took: {}s".format(len(dataset_list), round(time.time() - s, 2)))
return dataset_list
def delete_datasets(self, **kwargs):
"""Delete datasets.
Parameters
-----------
kwargs : logging information
Find items to delete, leave it empty to delete all log.
"""
self._fill_project_info(kwargs)
self.db.Dataset.delete_many(kwargs)
logging.info("[Database] Delete Dataset SUCCESS")
# =========================== LOGGING ===============================
def save_training_log(self, **kwargs):
"""Saves the training log, timestamp will be added automatically.
Parameters
-----------
kwargs : logging information
Events, such as accuracy, loss, step number and etc.
Examples
---------
>>> db.save_training_log(accuracy=0.33, loss=0.98)
"""
self._fill_project_info(kwargs)
kwargs.update({'time': datetime.utcnow()})
_result = self.db.TrainLog.insert_one(kwargs)
_log = self._print_dict(kwargs)
logging.info("[Database] train log: " + _log)
def save_validation_log(self, **kwargs):
"""Saves the validation log, timestamp will be added automatically.
Parameters
-----------
kwargs : logging information
Events, such as accuracy, loss, step number and etc.
Examples
---------
>>> db.save_validation_log(accuracy=0.33, loss=0.98)
"""
self._fill_project_info(kwargs)
kwargs.update({'time': datetime.utcnow()})
_result = self.db.ValidLog.insert_one(kwargs)
_log = self._print_dict(kwargs)
logging.info("[Database] valid log: " + _log)
def save_testing_log(self, **kwargs):
"""Saves the testing log, timestamp will be added automatically.
Parameters
-----------
kwargs : logging information
Events, such as accuracy, loss, step number and etc.
Examples
---------
>>> db.save_testing_log(accuracy=0.33, loss=0.98)
"""
self._fill_project_info(kwargs)
kwargs.update({'time': datetime.utcnow()})
_result = self.db.TestLog.insert_one(kwargs)
_log = self._print_dict(kwargs)
logging.info("[Database] test log: " + _log)
def delete_training_log(self, **kwargs):
"""Deletes training log.
Parameters
-----------
kwargs : logging information
Find items to delete, leave it empty to delete all log.
Examples
---------
Save training log
>>> db.save_training_log(accuracy=0.33)
>>> db.save_training_log(accuracy=0.44)
Delete logs that match the requirement
>>> db.delete_training_log(accuracy=0.33)
Delete all logs
>>> db.delete_training_log()
"""
self._fill_project_info(kwargs)
self.db.TrainLog.delete_many(kwargs)
logging.info("[Database] Delete TrainLog SUCCESS")
def delete_validation_log(self, **kwargs):
"""Deletes validation log.
Parameters
-----------
kwargs : logging information
Find items to delete, leave it empty to delete all log.
Examples
---------
- see ``save_training_log``.
"""
self._fill_project_info(kwargs)
self.db.ValidLog.delete_many(kwargs)
logging.info("[Database] Delete ValidLog SUCCESS")
def delete_testing_log(self, **kwargs):
"""Deletes testing log.
Parameters
-----------
kwargs : logging information
Find items to delete, leave it empty to delete all log.
Examples
---------
- see ``save_training_log``.
"""
self._fill_project_info(kwargs)
self.db.TestLog.delete_many(kwargs)
logging.info("[Database] Delete TestLog SUCCESS")
# def find_training_logs(self, **kwargs):
# pass
#
# def find_validation_logs(self, **kwargs):
# pass
#
# def find_testing_logs(self, **kwargs):
# pass
# =========================== Task ===================================
def create_task(self, task_name=None, script=None, hyper_parameters=None, saved_result_keys=None, **kwargs):
"""Uploads a task to the database, timestamp will be added automatically.
Parameters
-----------
task_name : str
The task name.
script : str
File name of the python script.
hyper_parameters : dictionary
The hyper parameters pass into the script.
saved_result_keys : list of str
The keys of the task results to keep in the database when the task finishes.
kwargs : other parameters
Users customized parameters such as description, version number.
Examples
-----------
Uploads a task
>>> db.create_task(task_name='mnist', script='example/tutorial_mnist_simple.py', description='simple tutorial')
Finds and runs the latest task
>>> db.run_top_task(sort=[("time", pymongo.DESCENDING)])
>>> db.run_top_task(sort=[("time", -1)])
Finds and runs the oldest task
>>> db.run_top_task(sort=[("time", pymongo.ASCENDING)])
>>> db.run_top_task(sort=[("time", 1)])
"""
if not isinstance(task_name, str): # is None:
raise Exception("task_name should be string")
if not isinstance(script, str): # is None:
raise Exception("script should be string")
if hyper_parameters is None:
hyper_parameters = {}
if saved_result_keys is None:
saved_result_keys = []
self._fill_project_info(kwargs)
kwargs.update({'time': datetime.utcnow()})
kwargs.update({'hyper_parameters': hyper_parameters})
kwargs.update({'saved_result_keys': saved_result_keys})
_script = open(script, 'rb').read()
kwargs.update({'status': 'pending', 'script': _script, 'result': {}})
self.db.Task.insert_one(kwargs)
logging.info("[Database] Saved Task - task_name: {} script: {}".format(task_name, script))
def run_top_task(self, task_name=None, sort=None, **kwargs):
"""Finds and runs a pending task that in the first of the sorting list.
Parameters
-----------
task_name : str
The task name.
sort : List of tuple
PyMongo sort comment, search "PyMongo find one sorting" and `collection level operations <http://api.mongodb.com/python/current/api/pymongo/collection.html>`__ for more details.
kwargs : other parameters
Users customized parameters such as description, version number.
Examples
---------
Monitors the database and pull tasks to run
>>> while True:
>>> print("waiting task from distributor")
>>> db.run_top_task(task_name='mnist', sort=[("time", -1)])
>>> time.sleep(1)
Returns
--------
boolean : True for success, False for fail.
"""
if not isinstance(task_name, str): # is None:
raise Exception("task_name should be string")
self._fill_project_info(kwargs)
kwargs.update({'status': 'pending'})
# find task and set status to running
task = self.db.Task.find_one_and_update(kwargs, {'$set': {'status': 'running'}}, sort=sort)
# try:
# get task info e.g. hyper parameters, python script
if task is None:
logging.info("[Database] Find Task FAIL: key: {} sort: {}".format(task_name, sort))
return False
else:
logging.info("[Database] Find Task SUCCESS: key: {} sort: {}".format(task_name, sort))
_datetime = task['time']
_script = task['script']
_id = task['_id']
_hyper_parameters = task['hyper_parameters']
_saved_result_keys = task['saved_result_keys']
logging.info(" hyper parameters:")
for key in _hyper_parameters:
globals()[key] = _hyper_parameters[key]
logging.info(" {}: {}".format(key, _hyper_parameters[key]))
# run task
s = time.time()
logging.info("[Database] Start Task: key: {} sort: {} push time: {}".format(task_name, sort, _datetime))
_script = _script.decode('utf-8')
with tf.Graph().as_default(): # # as graph: # clear all TF graphs
exec(_script, globals())
# set status to finished
_ = self.db.Task.find_one_and_update({'_id': _id}, {'$set': {'status': 'finished'}})
# return results
__result = {}
for _key in _saved_result_keys:
logging.info(" result: {}={} {}".format(_key, globals()[_key], type(globals()[_key])))
__result.update({"%s" % _key: globals()[_key]})
_ = self.db.Task.find_one_and_update(
{'_id': _id}, {'$set': {
'result': __result
}}, return_document=pymongo.ReturnDocument.AFTER
)
logging.info(
"[Database] Finished Task: task_name - {} sort: {} push time: {} took: {}s".format(
task_name, sort, _datetime,
time.time() - s
)
)
return True
# except Exception as e:
# exc_type, exc_obj, exc_tb = sys.exc_info()
# fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
# logging.info("{} {} {} {} {}".format(exc_type, exc_obj, fname, exc_tb.tb_lineno, e))
# logging.info("[Database] Fail to run task")
# # if fail, set status back to pending
# _ = self.db.Task.find_one_and_update({'_id': _id}, {'$set': {'status': 'pending'}})
# return False
def delete_tasks(self, **kwargs):
"""Delete tasks.
Parameters
-----------
kwargs : logging information
Find items to delete, leave it empty to delete all log.
Examples
---------
>>> db.delete_tasks()
"""
self._fill_project_info(kwargs)
self.db.Task.delete_many(kwargs)
logging.info("[Database] Delete Task SUCCESS")
def check_unfinished_task(self, task_name=None, **kwargs):
"""Finds and runs a pending task.
Parameters
-----------
task_name : str
The task name.
kwargs : other parameters
Users customized parameters such as description, version number.
Examples
---------
Wait until all tasks finish in user's local console
>>> while not db.check_unfinished_task():
>>> time.sleep(1)
>>> print("all tasks finished")
>>> sess = tf.InteractiveSession()
>>> net = db.find_top_model(sess=sess, sort=[("test_accuracy", -1)])
>>> print("the best accuracy {} is from model {}".format(net._test_accuracy, net._name))
Returns
--------
boolean : True for success, False for fail.
"""
if not isinstance(task_name, str): # is None:
raise Exception("task_name should be string")
self._fill_project_info(kwargs)
kwargs.update({'$or': [{'status': 'pending'}, {'status': 'running'}]})
# ## find task
# task = self.db.Task.find_one(kwargs)
task = self.db.Task.find(kwargs)
task_id_list = task.distinct('_id')
n_task = len(task_id_list)
if n_task == 0:
logging.info("[Database] No unfinished task - task_name: {}".format(task_name))
return False
else:
logging.info("[Database] Find {} unfinished task - task_name: {}".format(n_task, task_name))
return True
@staticmethod
def _print_dict(args):
string = ''
for key, value in args.items():
if key is not '_id':
string += str(key) + ": " + str(value) + " / "
return string