mxnet訓練/導出/載入模型並預測(python和C++)
原文:mxnet訓練模型、導出模型、載入模型 進行預測(python和C++)
mxnet支持將已訓練的模型導出成網路和參數分離的json和params文件,方便離線載入進行預測和驗證,同時由於mxnet支持python,C++,scala等多種編程語言,這一特性使得mxnet可以在生產系統上部署「fine-tuning」
本文以一個簡單那的線性回歸訓練模型的例子,來介紹如何在mxnet中訓練模型,導出模型,載入模型,進行後續預測,其中預測部分採用了python和C++雙版本
訓練和導出模型
from mxnet import autograd, ndfrom mxnet import gluonfrom mxnet import initfrom mxnet.gluon import nnfrom mxnet.gluon import data as gdatafrom mxnet.gluon import loss as gloss# define data formatinput_dim = 2input_num = 100# prepare label datatrue_w = [3, -2.5]true_b = 7.6x_label = nd.random.normal(shape=(input_num, input_dim))y_label = true_w[0] * x_label[:, 0] + true_w[1] * x_label[:, 1] + true_b# print (x_label)# print (y_label)# load input databatch_size = 10dataset = gdata.ArrayDataset(x_label, y_label)data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)# define net(model)net = nn.HybridSequential() # make it hybrid to optimize computingnet.add(nn.Dense(1))net.initialize()# loss and optimize functionsloss = gloss.L2Loss()optimize_method = "sgd"learning_rate = 0.03trainer = gluon.Trainer(net.collect_params(), optimize_method, {"learning_rate": learning_rate})# trainnum_epoch = 20for epoch in range(0, num_epoch): for x, y in data_iter: with autograd.record(): temp_loss = loss(net(x), y) temp_loss.backward() trainer.step(batch_size) print ("epoch %d, loss %f" % (epoch, loss(net(x_label), y_label).mean().asnumpy())) # the trained parametersprint (net[0].weight.data(), net[0].bias.data())# test the modelx_test = nd.array([[3, 5], [6, 10], [13, 7]])net(x_test)# export net json and paramnet.hybridize()# Please first call block.hybridize() and then run forward with this block at least once before calling export.net(x_label)net.export("simple_net", num_epoch)
訓練過程
epoch 0, loss 19.267626epoch 1, loss 10.838810epoch 2, loss 6.167604epoch 3, loss 3.535518epoch 4, loss 2.036959epoch 5, loss 1.179224epoch 6, loss 0.684780epoch 7, loss 0.398763epoch 8, loss 0.232661epoch 9, loss 0.135978epoch 10, loss 0.079521epoch 11, loss 0.046530epoch 12, loss 0.027269epoch 13, loss 0.015979epoch 14, loss 0.009358epoch 15, loss 0.005487epoch 16, loss 0.003218epoch 17, loss 0.001887epoch 18, loss 0.001106epoch 19, loss 0.000649
訓練結果
weight和bias
[[ 2.9752004 -2.494717 ]]<NDArray 1x2 @cpu(0)>, [7.570985]<NDArray 1 @cpu(0)>
載入模型預測數據
python版本
# load model and predicateimport mxnet as mximport numpy as np# define test databatch_size = 1num_batch = 5eval_data = np.array([[3, 5], [6,10], [13, 7]])eval_label = np.zeros(len(eval_data)) # just need to be the same length, empty is okeval_iter = mx.io.NDArrayIter(eval_data, eval_label, batch_size, shuffle=False)# load modelsym, arg_params, aux_params = mx.model.load_checkpoint("simple_net", 20) # load with net name and epoch nummod = mx.mod.Module(symbol=sym, context=mx.cpu(), data_names=["data"], label_names=[]) # label can be emptymod.bind(for_training=False, data_shapes=[("data", (1, 2))]) # data shape, 1 x 2 vector for one test data recordmod.set_params(arg_params, aux_params)# predictpredict_stress = mod.predict(eval_iter, num_batch)print (predict_stress) # you can transfer to numpy array
輸出結果
[[ 4.1365533 ] [ 0.69436216] [29.17997 ]]<NDArray 3x1 @cpu(0)>
C++版本
關於mxnet編譯並使用C++介面相關配置,請參考
- windows下編譯mxnet並使用C++訓練模型
- linux下編譯mxnet並使用C++訓練模型
這裡僅僅使用mxnet的C++介面載入已導出的模型文件,然後進行前向預測輸出
main.cpp#include <iostream>#include <fstream>#include <string>#include <vector>#include <memory>#include <mxnet/c_predict_api.h>// file read buffer toolclass BufferFile{public: std::string file_path_; std::size_t length_ = 0; std::unique_ptr<char[]> buffer_; explicit BufferFile(const std::string &file_path) : file_path_(file_path) { std::ifstream ifs(file_path.c_str(), std::ios::in | std::ios::binary); if (!ifs) { std::cerr << "Cant open the file. Please check " << file_path << std::endl; return; } ifs.seekg(0, std::ios::end); length_ = static_cast<std::size_t>(ifs.tellg()); ifs.seekg(0, std::ios::beg); std::cout << file_path.c_str() << " ... " << length_ << " bytes
"; buffer_.reset(new char[length_]); ifs.read(buffer_.get(), length_); ifs.close(); } std::size_t GetLength() { return length_; } char* GetBuffer() { return buffer_.get(); }};int main(int argc, char* argv[]){ // model file path std::string json_file = "model/simple_net-symbol.json"; std::string param_file = "model/simple_net-0020.params"; // read model file BufferFile json_data(json_file); BufferFile param_data(param_file); if (json_data.GetLength() == 0 || param_data.GetLength() == 0) { return EXIT_FAILURE; } // mxnet parameters int dev_type = 1; // 1: cpu, 2: gpu, we can change int dev_id = 0; // arbitrary. mx_uint num_input_nodes = 1; // 1 for feedforward const char *input_key[1] = { "data" }; const char **input_keys = input_key; // define input data shape, notice this must be identical const mx_uint input_shape_indptr[2] = { 0, 2 }; // column dim is 2 const mx_uint input_shape_data[2] = { 3, 2 }; // 3 x 2 matrix input data shape // global predicator handler PredictorHandle pred_hnd = nullptr; // create predictor MXPredCreate(static_cast<const char*>(json_data.GetBuffer()), static_cast<const char*>(param_data.GetBuffer()), static_cast<int>(param_data.GetLength()), dev_type, dev_id, num_input_nodes, input_keys, input_shape_indptr, input_shape_data, &pred_hnd); if (!pred_hnd) { std::cerr << "Failed to create predict handler" << std::endl; return EXIT_FAILURE; } // prepare test data std::vector<mx_float> input_data{3, 5, 6, 10, 13, 7}; // set input data for mxnet MXPredSetInput(pred_hnd, "data", input_data.data(), input_data.size()); // do predict forward in mxnet model MXPredForward(pred_hnd); mx_uint output_index = 0; mx_uint *output_shape = nullptr; mx_uint ouput_shape_len; // get output result MXPredGetOutputShape(pred_hnd, output_index, &output_shape, &ouput_shape_len); std::size_t size = 1; for (mx_uint i = 0; i < ouput_shape_len; ++i) { size *= output_shape[i]; } // construct output data from size std::vector<float> output_data(size); MXPredGetOutput(pred_hnd, output_index, &(output_data[0]), static_cast<mx_uint>(size)); // release preditor MXPredFree(pred_hnd); // print output data std::cout << "the result calculated by trained simple net: " << std::endl; for (int i = 0; i < output_data.size(); i++) std::cout << output_data[i] << std::endl; return EXIT_SUCCESS;}
輸出結果
the result calculated by trained simple net: 4.136550.69436229.18
基本一致
總結
- 原理其實很清晰並且簡單,就是保存net,然後載入net,說是預測,其實就是輸入一些數據,看一下輸出是否符合預期或者用來進行後續處理而已
- 模型導出之前需要輸入數據forward一下才能導出
- python中進行載入數據predict,可以不定義label的佔位符,但是必須傳一個值進去,全0數組就行
- C++中輸入和輸出的數據在內存中都是按照一維數組排列和存儲的,所以需要對應處理
推薦閱讀:
※一則廣告
※MXNet/Gluon第五課:Gluon高級和優化演算法基礎筆記
※MXNet/Gluon第二課:過擬合,多層感知機,GPU和卷積神經網路筆記
※RefineDet 論文解析