/** modelBuildingTraining.txt Script to build models and train models DolphinDB Inc. DolphinDB server version: 1.30.18 2022.05.09 / 2.00.6 2022.05.09 Last modification time: 2022.05.10 */ /** Attention: 1. There are one places in the script that need to be modified according to the environment */ //login account and clean up the environment login("admin", "123456") clearAllCache() undef(all) go /** modified location 1: modelSavePath, dbName and tbName */ modelSavePath = "/hdd/hdd9/machineLearning/realizedVolatilityModel_1.30.18.bin" //modelSavePath = "/hdd/hdd9/machineLearning/realizedVolatilityModel_2.00.6.bin" dbName = "dfs://sz50VolatilityDataSet" tbName = "sz50VolatilityDataSet" dataset = select * from loadTable(dbName, tbName) where date(TradeTime) between 2020.01.01 : 2020.12.31 //define function to split training set test set def trainTestSplit(x, testRatio) { xSize = x.size() testSize =( xSize * (1-testRatio))$INT return x[0: testSize], x[testSize:xSize] } //define function to evaluate model def RMSPE(a,b) { return sqrt( sum( ((a-b)\a)*((a-b)\a) ) \a.size() ) } //train Train, Test = trainTestSplit(dataset, 0.3) timer(1){ model = adaBoostRegressor(sqlDS(<select * from Train>), yColName=`targetRV, xColNames=`BAS`DI0`DI1`DI2`DI3`DI4`Press`RV, numTrees=30, maxDepth=16, loss=`square) } //predict predicted = model.predict(Test) Test[`predict] = predicted print("RMSPE="+RMSPE(Test.targetRV, predicted)) //save model saveModel(model, modelSavePath) //visualization stock_id=(select distinct(SecurityID) from Test)[rand(50,1)[0]].distinct_SecurityID plot((select targetRV,predict from Test where SecurityID=stock_id, date(TradeTime) between 2020.10.19 : 2020.10.23), title="The realized volatility of "+stock_id,extras={multiYAxes: false})