最优超参数优化:在目标函数外定义超参数空间

2024-03-29 08:15:40 发布

您现在位置:Python中文网/ 问答频道 /正文

有人知道如何使用OPAPI定义目标函数外的超参数空间吗

class Objective (object):  
    
    def __init__(self,
                 metric,
                 #training_param_search_space,
                 training_data,
                 testing_data):

        self.best_deepar_model=None
        self.training_param_search_space=None
        #self.training_param_search_space = training_param_search_space
        self.metric = metric
        self.training_data = training_data
        self.testing_data = testing_data
    
    def callback_best_deepar_model(study, trial):
        if study.best_trial.number == trial.number:
        #if study.best_trial == trial:
            self.best_deepar_model = estimator
    
    def __call__(self, trial):

        ########################################################
        ##### TO BE DEFINED OUTSIDE THE OBJECTIVE FUNCTION #####
        self.training_param_search_space = {
            'learning_rate': trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True),
            'epochs': trial.suggest_int('epochs', 2, 3),
            #'batch_size': 32,
            #'num_batches_per_epoch': 50,
            }
        
        # create an Estimator with DeepAR
        # an object of Trainer() class is used to customize Estimator
        estimator = deepar.DeepAREstimator(
                                freq=meta_data.freq,
                                prediction_length=meta_data.prediction_length,
                                trainer=Trainer(
                                    ctx="cpu",
                                    #param_search_space,
                                    epochs=self.training_param_search_space['epochs'],
                                    learning_rate=self.training_param_search_space['learning_rate']
                                                ))
        # create a Predictor by training the Estimator with training dataset
        predictor = estimator.train(training_data=training_data)

        # make predictions
        forecasts, test_series = make_evaluation_predictions(dataset=testing_data, predictor=predictor, num_samples=10)

        # Evaluation
        # transform forecasts and test_series (type:generator) to list for Evaluator-Class
        list_forecasts = list(forecasts)
        list_test_series = list(test_series)
        evaluator = Evaluator(quantiles=[0.5])
        agg_metrics, item_metrics = evaluator(iter(list_test_series), iter(list_forecasts), num_series=len(testing_data))

        return agg_metrics[self.metric]

我想通过初始化目标类来提供超参数空间(如度量和训练/测试数据)。所以我可以在optuna的目标函数之外定义h-p空间。因为定义h-p空间的方法是通过一个试验对象来完成的(这只存在于目标函数内部),所以我很难找到一种在外部仍然定义它的方法

#training_param_search_space = {
#    'lr': trial.suggest_float('lr', 1e-5, 1e-1, log=True),
#    'n_epochs': trial.suggest_int('n_epochs', 2, 3)
#    }

#objective = Objective(metric = 'MSE', training_param_search_space=training_param_search_space, training_data=training_data, testing_data=testing_data)
objective = Objective(metric = 'MSE', training_data=training_data, testing_data=testing_data)

# define and run study for optimization
study = optuna.create_study(direction="minimize")

# define duration of the optimization process by and/or number_of_trails and timeout
study.optimize(objective, n_trials=4, timeout=600)

请参阅下面的完整代码:

from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.dataset.repository.datasets import get_dataset
from gluonts.model import simple_feedforward
from gluonts.evaluation import Evaluator
from gluonts.dataset.util import to_pandas
from gluonts.model import deepar
from gluonts.dataset import common
from gluonts.trainer import Trainer

import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt

import optuna

# get the csv file as a dataframe
raw_data = pd.read_csv(
            "https://raw.githubusercontent.com/numenta/NAB/master/data/realTweets/Twitter_volume_AMZN.csv",
            header=0,
            index_col=0)

# convert the raw data into an object recognised by GluonTS
# start: the starting index of the dataframe
# target: the actual time-series data that we want to model
# freq: the frequency with which the data is collected
train_data = common.ListDataset(
    [{"start": raw_data.index[0], "target": raw_data.value[:"2015-04-05 00:00:00"]}], freq="5min")

# get the financial data "exchange_rate"
gluon_data = get_dataset("exchange_rate", regenerate=True)
train_data = next(iter(gluon_data.train))
test_data = next(iter(gluon_data.test))
meta_data = gluon_data.metadata

# convert dataset into an object recognised by GluonTS
training_data = common.ListDataset(gluon_data.train, freq=meta_data.freq)
testing_data = common.ListDataset(gluon_data.test, freq=meta_data.freq)

    
class Objective (object):  

    def __init__(self,
                 metric,
                 #training_param_search_space,
                 training_data,
                 testing_data):

        self.best_deepar_model=None
        self.training_param_search_space=None
        #self.training_param_search_space = training_param_search_space
        self.metric = metric
        self.training_data = training_data
        self.testing_data = testing_data

    def callback_best_deepar_model(study, trial):
        if study.best_trial.number == trial.number:
        #if study.best_trial == trial:
            self.best_deepar_model = estimator

    def __call__(self, trial):

        self.training_param_search_space = {
            'learning_rate': trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True),
            'epochs': trial.suggest_int('epochs', 2, 3),
            #'batch_size': 32,
            #'num_batches_per_epoch': 50,
            }

        # create an Estimator with DeepAR
        # an object of Trainer() class is used to customize Estimator
        estimator = deepar.DeepAREstimator(
                                freq=meta_data.freq,
                                prediction_length=meta_data.prediction_length,
                                trainer=Trainer(
                                    ctx="cpu",
                                    #param_search_space,
                                    epochs=self.training_param_search_space['epochs'],
                                    learning_rate=self.training_param_search_space['learning_rate']
                                                ))
        # create a Predictor by training the Estimator with training dataset
        predictor = estimator.train(training_data=training_data)

        # make predictions
        forecasts, test_series = make_evaluation_predictions(dataset=testing_data, predictor=predictor, num_samples=10)

        # Evaluation
        # transform forecasts and test_series (type:generator) to list for Evaluator-Class
        list_forecasts = list(forecasts)
        list_test_series = list(test_series)
        evaluator = Evaluator(quantiles=[0.5])
        agg_metrics, item_metrics = evaluator(iter(list_test_series), iter(list_forecasts), num_series=len(testing_data))

        return agg_metrics[self.metric]


#objective = Objective(metric = 'MSE', param_search_space=param_search_space, training_data=training_data, testing_data=testing_data)
objective = Objective(metric = 'MSE', training_data=training_data, testing_data=testing_data)

# define and run study for optimization
study = optuna.create_study(direction="minimize")

# define duration of the optimization process by and/or number_of_trails and timeout
study.optimize(objective, n_trials=4, timeout=600)

任何关于如何做到这一点的想法都会很好


Tags: thetestimportselfsearchdataparamtraining
1条回答
网友
1楼 · 发布于 2024-03-29 08:15:40

是的,这是可能的。下面我通过定义一个函数来给出一个例子。您可以将此函数保存在另一个python模块中,并在编写Objective类时导入它

def create_hpspace(trial):
    # Use trial to create your hyper parameter space based 
    # based on any conditon or loops !!
    return {
            'learning_rate': trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True),
            'epochs': trial.suggest_int('epochs', 2, 3),
            #'batch_size': 32,
            #'num_batches_per_epoch': 50,
            }

class Objective (object):  
    def __call__(self, trial):
        # Use create_hspace function
        self.training_param_search_space = create_hspace(trial)

相关问题 更多 >