/**
metacode_derived_features_buildmodel.txt
Script to machine learning
DolphinDB Inc.
DolphinDB server version: 2.00.6 2022.05.09
Last modification time: 2022.08.31
*/

/**
Attention:

1. The variable result is from features engineering calculation
2. The developer need to install Xgboost plugin in advance
3. There is one place in the script that need to be modified according to the environment
   */

//import Xgboost plugin
try{
	loadPlugin(getHomeDir()+"/plugins/xgboost/PluginXgboost.txt")
}
catch(ex){
	print(ex)
}

/**
part1: Load data from database
modified location 1: modelSavePath
*/
modelSavePath = "/hdd/hdd9/machineLearning/model/001.model"
/**
part2: data preprocessing
Attention: the variable result is from features engineering calculation
*/
result = result[each(isValid, result.values()).rowAnd()]
result_input = copy(result)
label = result[`LogReturn0_realizedVolatility]
result_input.update!(`SecurityID_int, int(result[`SecurityID]))
result_input.dropColumns!(`SecurityID`DateTime`LogReturn0_realizedVolatility)

/**
part3: split data set 
*/
def trainTestSplit(x, testRatio) {
    xSize = x.size()
    testSize =( xSize * (1-testRatio))$INT
    return x[0: testSize], x[testSize:xSize]
}
Train_x, Test_x = trainTestSplit(result_input, 0.3)
Train_y, Test_y = trainTestSplit(label, 0.3)

/**
part4: set parameters and train, save model
*/
params = {
	objective: 'reg:squarederror',
	colsample_bytree: 0.8,
	subsample: 0.8,
	min_child_weight: 1,
	max_leaves:128,
	eta: 0.1,
	max_depth:10,
	eval_metric : 'rmse'
	}
model_1 = xgboost::train(Train_y ,Train_x, params, 500)
xgboost::saveModel(model_1, modelSavePath)

/**
part5: load model and predict, use RMSPE to evaluate.
You can choose one of model(saved) and model_1 to predict.
*/
def RMSPE(a,b)
{
	return sqrt( sum( ((a-b)\a)*((a-b)\a) ) \a.size()  )
}
model = xgboost::loadModel(modelSavePath)
y_pred = xgboost::predict(model_1 , Test_x)
print('RMSPE='+RMSPE(Test_y, y_pred))