/**
modelBuildingTraining.txt
Script to build models and train models
DolphinDB Inc.
DolphinDB server version: 1.30.18 2022.05.09 / 2.00.6 2022.05.09
Last modification time: 2022.05.10
*/

/**
Attention:
1. There are one places in the script that need to be modified according to the environment
*/

//login account and clean up the environment
login("admin", "123456")
clearAllCache()
undef(all)
go

/**
modified location 1: modelSavePath, dbName and tbName
*/
modelSavePath = "/hdd/hdd9/machineLearning/realizedVolatilityModel_1.30.18.bin"
//modelSavePath = "/hdd/hdd9/machineLearning/realizedVolatilityModel_2.00.6.bin"
dbName = "dfs://sz50VolatilityDataSet"
tbName = "sz50VolatilityDataSet"
dataset = select * from loadTable(dbName, tbName) where date(TradeTime) between 2020.01.01 : 2020.12.31
//define function to split training set test set
def trainTestSplit(x, testRatio) {
	xSize = x.size()
	testSize =( xSize * (1-testRatio))$INT
	return x[0: testSize], x[testSize:xSize]
}
//define function to evaluate model
def RMSPE(a,b)
{
	return sqrt( sum( ((a-b)\a)*((a-b)\a) ) \a.size()  )
}
//train
Train, Test = trainTestSplit(dataset, 0.3)
timer(1){
	model = adaBoostRegressor(sqlDS(<select * from Train>), yColName=`targetRV, xColNames=`BAS`DI0`DI1`DI2`DI3`DI4`Press`RV, numTrees=30, maxDepth=16, loss=`square)
}
//predict
predicted = model.predict(Test)
Test[`predict] = predicted
print("RMSPE="+RMSPE(Test.targetRV, predicted))
//save model
saveModel(model, modelSavePath)

//visualization
stock_id=(select distinct(SecurityID) from Test)[rand(50,1)[0]].distinct_SecurityID
plot((select  targetRV,predict from Test where SecurityID=stock_id, date(TradeTime) between 2020.10.19 : 2020.10.23), title="The realized volatility of "+stock_id,extras={multiYAxes: false})