Pass pointer to model parameters. (#5101)

* Pass pointer to model parameters.

This PR de-duplicates most of the model parameters except the one in
`tree_model.h`.  One difficulty is `base_score` is a model property but can be
changed at runtime by objective function.  Hence when performing model IO, we
need to save the one provided by users, instead of the one transformed by
objective.  Here we created an immutable version of `LearnerModelParam` that
represents the value of model parameter after configuration.
This commit is contained in:
Jiaming Yuan
2019-12-10 12:11:22 +08:00
committed by GitHub
parent 979f74d51a
commit e089e16e3d
33 changed files with 623 additions and 404 deletions

View File

@@ -11,12 +11,12 @@ namespace common {
TEST(CutsBuilder, SearchGroupInd) {
size_t constexpr kNumGroups = 4;
size_t constexpr kNumRows = 17;
size_t constexpr kNumCols = 15;
size_t constexpr kRows = 17;
size_t constexpr kCols = 15;
auto pp_mat = CreateDMatrix(kNumRows, kNumCols, 0);
auto pp_dmat = CreateDMatrix(kRows, kCols, 0);
std::shared_ptr<DMatrix> p_mat {*pp_dmat};
auto& p_mat = *pp_mat;
std::vector<bst_int> group(kNumGroups);
group[0] = 2;
group[1] = 3;
@@ -36,7 +36,7 @@ TEST(CutsBuilder, SearchGroupInd) {
EXPECT_ANY_THROW(CutsBuilder::SearchGroupIndFromRow(p_mat->Info().group_ptr_, 17));
delete pp_mat;
delete pp_dmat;
}
namespace {
@@ -52,12 +52,11 @@ TEST(SparseCuts, SingleThreadedBuild) {
size_t constexpr kCols = 31;
size_t constexpr kBins = 256;
// Dense matrix.
auto pp_mat = CreateDMatrix(kRows, kCols, 0);
DMatrix* p_fmat = (*pp_mat).get();
auto pp_dmat = CreateDMatrix(kRows, kCols, 0);
std::shared_ptr<DMatrix> p_fmat {*pp_dmat};
common::GHistIndexMatrix hmat;
hmat.Init(p_fmat, kBins);
hmat.Init(p_fmat.get(), kBins);
HistogramCuts cuts;
SparseCuts indices(&cuts);
@@ -69,7 +68,7 @@ TEST(SparseCuts, SingleThreadedBuild) {
ASSERT_EQ(hmat.cut.Values(), cuts.Values());
ASSERT_EQ(hmat.cut.MinValues(), cuts.MinValues());
delete pp_mat;
delete pp_dmat;
}
TEST(SparseCuts, MultiThreadedBuild) {

View File

@@ -12,62 +12,55 @@ TEST(GBTree, SelectTreeMethod) {
GenericParameter generic_param;
generic_param.UpdateAllowUnknown(Args{});
LearnerModelParam mparam;
mparam.base_score = 0.5;
mparam.num_feature = kCols;
mparam.num_output_group = 1;
std::vector<std::shared_ptr<DMatrix> > caches;
std::unique_ptr<GradientBooster> p_gbm{
GradientBooster::Create("gbtree", &generic_param, {}, 0)};
GradientBooster::Create("gbtree", &generic_param, &mparam, caches)};
auto& gbtree = dynamic_cast<gbm::GBTree&> (*p_gbm);
// Test if `tree_method` can be set
std::string n_feat = std::to_string(kCols);
Args args {{"tree_method", "approx"}, {"num_feature", n_feat}};
Args args {{"tree_method", "approx"}};
gbtree.Configure({args.cbegin(), args.cend()});
gbtree.Configure(args);
auto const& tparam = gbtree.GetTrainParam();
gbtree.Configure({{"tree_method", "approx"}, {"num_feature", n_feat}});
gbtree.Configure({{"tree_method", "approx"}});
ASSERT_EQ(tparam.updater_seq, "grow_histmaker,prune");
gbtree.Configure({{"tree_method", "exact"}, {"num_feature", n_feat}});
gbtree.Configure({{"tree_method", "exact"}});
ASSERT_EQ(tparam.updater_seq, "grow_colmaker,prune");
gbtree.Configure({{"tree_method", "hist"}, {"num_feature", n_feat}});
gbtree.Configure({{"tree_method", "hist"}});
ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
gbtree.Configure({{"booster", "dart"}, {"tree_method", "hist"},
{"num_feature", n_feat}});
gbtree.Configure({{"booster", "dart"}, {"tree_method", "hist"}});
ASSERT_EQ(tparam.updater_seq, "grow_quantile_histmaker");
#ifdef XGBOOST_USE_CUDA
generic_param.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
gbtree.Configure({{"tree_method", "gpu_hist"}, {"num_feature", n_feat}});
gbtree.Configure({{"tree_method", "gpu_hist"}});
ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
gbtree.Configure({{"booster", "dart"}, {"tree_method", "gpu_hist"},
{"num_feature", n_feat}});
gbtree.Configure({{"booster", "dart"}, {"tree_method", "gpu_hist"}});
ASSERT_EQ(tparam.updater_seq, "grow_gpu_hist");
#endif
#endif // XGBOOST_USE_CUDA
}
#ifdef XGBOOST_USE_CUDA
TEST(GBTree, ChoosePredictor) {
size_t constexpr kNumRows = 17;
size_t constexpr kRows = 17;
size_t constexpr kCols = 15;
auto pp_mat = CreateDMatrix(kNumRows, kCols, 0);
auto& p_mat = *pp_mat;
std::vector<bst_float> labels (kNumRows);
for (size_t i = 0; i < kNumRows; ++i) {
labels[i] = i % 2;
}
p_mat->Info().SetInfo("label", labels.data(), DataType::kFloat32, kNumRows);
auto pp_dmat = CreateDMatrix(kRows, kCols, 0);
std::shared_ptr<DMatrix> p_dmat {*pp_dmat};
std::vector<std::shared_ptr<xgboost::DMatrix>> mat = {p_mat};
std::string n_feat = std::to_string(kCols);
Args args {{"tree_method", "approx"}, {"num_feature", n_feat}};
GenericParameter generic_param;
generic_param.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
auto& data = (*(p_dmat->GetBatches<SparsePage>().begin())).data;
p_dmat->Info().labels_.Resize(kRows);
auto& data = (*(p_mat->GetBatches<SparsePage>().begin())).data;
auto learner = std::unique_ptr<Learner>(Learner::Create(mat));
learner->SetParams(Args{{"tree_method", "gpu_hist"}});
auto learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));
learner->SetParams(Args{{"tree_method", "gpu_hist"}, {"gpu_id", "0"}});
for (size_t i = 0; i < 4; ++i) {
learner->UpdateOneIter(i, p_mat.get());
learner->UpdateOneIter(i, p_dmat.get());
}
ASSERT_TRUE(data.HostCanWrite());
dmlc::TemporaryDirectory tempdir;
@@ -79,14 +72,14 @@ TEST(GBTree, ChoosePredictor) {
}
// a new learner
learner = std::unique_ptr<Learner>(Learner::Create(mat));
learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));
{
std::unique_ptr<dmlc::Stream> fi(dmlc::Stream::Create(fname.c_str(), "r"));
learner->Load(fi.get());
}
learner->SetParams(Args{{"tree_method", "gpu_hist"}, {"gpu_id", "0"}});
for (size_t i = 0; i < 4; ++i) {
learner->UpdateOneIter(i, p_mat.get());
learner->UpdateOneIter(i, p_dmat.get());
}
ASSERT_TRUE(data.HostCanWrite());
@@ -96,10 +89,10 @@ TEST(GBTree, ChoosePredictor) {
ASSERT_FALSE(data.HostCanWrite());
// another new learner
learner = std::unique_ptr<Learner>(Learner::Create(mat));
learner = std::unique_ptr<Learner>(Learner::Create({p_dmat}));
learner->SetParams(Args{{"tree_method", "gpu_hist"}, {"gpu_id", "0"}});
for (size_t i = 0; i < 4; ++i) {
learner->UpdateOneIter(i, p_mat.get());
learner->UpdateOneIter(i, p_dmat.get());
}
// data is not pulled back into host
ASSERT_FALSE(data.HostCanWrite());

View File

@@ -1,15 +1,17 @@
/*!
* Copyright 2016-2018 XGBoost contributors
* Copyright 2016-2019 XGBoost contributors
*/
#include <dmlc/filesystem.h>
#include <xgboost/logging.h>
#include <xgboost/gbm.h>
#include <xgboost/json.h>
#include <gtest/gtest.h>
#include <algorithm>
#include <random>
#include <cinttypes>
#include "./helpers.h"
#include "helpers.h"
#include "xgboost/c_api.h"
#include "../../src/data/simple_csr_source.h"
@@ -113,7 +115,6 @@ void CheckRankingObjFunction(std::unique_ptr<xgboost::ObjFunction> const& obj,
CheckObjFunctionImpl(obj, preds, labels, weights, info, out_grad, out_hess);
}
xgboost::bst_float GetMetricEval(xgboost::Metric * metric,
xgboost::HostDeviceVector<xgboost::bst_float> preds,
std::vector<xgboost::bst_float> labels,
@@ -192,6 +193,7 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrix(
return dmat;
}
std::unique_ptr<DMatrix> CreateSparsePageDMatrixWithRC(
size_t n_rows, size_t n_cols, size_t page_size, bool deterministic,
const dmlc::TemporaryDirectory& tempdir) {
@@ -257,16 +259,42 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrixWithRC(
}
}
gbm::GBTreeModel CreateTestModel() {
gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param) {
std::vector<std::unique_ptr<RegTree>> trees;
trees.push_back(std::unique_ptr<RegTree>(new RegTree));
(*trees.back())[0].SetLeaf(1.5f);
(*trees.back()).Stat(0).sum_hess = 1.0f;
gbm::GBTreeModel model(0.5);
gbm::GBTreeModel model(param);
model.CommitModel(std::move(trees), 0);
model.param.num_output_group = 1;
model.base_margin = 0;
return model;
}
std::unique_ptr<GradientBooster> CreateTrainedGBM(
std::string name, Args kwargs, size_t kRows, size_t kCols,
LearnerModelParam const* learner_model_param,
GenericParameter const* generic_param) {
std::unique_ptr<GradientBooster> gbm {
GradientBooster::Create(name, generic_param, learner_model_param, {})};
gbm->Configure(kwargs);
auto pp_dmat = CreateDMatrix(kRows, kCols, 0);
auto p_dmat = *pp_dmat;
std::vector<float> labels(kRows);
for (size_t i = 0; i < kRows; ++i) {
labels[i] = i;
}
p_dmat->Info().labels_.HostVector() = labels;
HostDeviceVector<GradientPair> gpair;
auto& h_gpair = gpair.HostVector();
h_gpair.resize(kRows);
for (size_t i = 0; i < kRows; ++i) {
h_gpair[i] = {static_cast<float>(i), 1};
}
gbm->DoBoost(p_dmat.get(), &gpair, nullptr);
delete pp_dmat;
return gbm;
}
} // namespace xgboost

View File

@@ -21,6 +21,8 @@
#include <xgboost/json.h>
#include <xgboost/predictor.h>
#include <xgboost/generic_parameters.h>
#include <xgboost/c_api.h>
#include <xgboost/learner.h>
#include "../../src/common/common.h"
#include "../../src/common/hist_util.h"
@@ -204,7 +206,12 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrixWithRC(
size_t n_rows, size_t n_cols, size_t page_size, bool deterministic,
const dmlc::TemporaryDirectory& tempdir = dmlc::TemporaryDirectory());
gbm::GBTreeModel CreateTestModel();
gbm::GBTreeModel CreateTestModel(LearnerModelParam const* param);
std::unique_ptr<GradientBooster> CreateTrainedGBM(
std::string name, Args kwargs, size_t kRows, size_t kCols,
LearnerModelParam const* learner_model_param,
GenericParameter const* generic_param);
inline GenericParameter CreateEmptyGenericParam(int gpu_id) {
xgboost::GenericParameter tparam;

View File

@@ -8,20 +8,30 @@
#include "../../../src/gbm/gblinear_model.h"
namespace xgboost {
TEST(Linear, shotgun) {
auto mat = xgboost::CreateDMatrix(10, 10, 0);
size_t constexpr kRows = 10;
size_t constexpr kCols = 10;
auto pp_dmat = xgboost::CreateDMatrix(kRows, kCols, 0);
auto p_fmat {*pp_dmat};
auto lparam = xgboost::CreateEmptyGenericParam(GPUIDX);
LearnerModelParam mparam;
mparam.num_feature = kCols;
mparam.num_output_group = 1;
mparam.base_score = 0.5;
{
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
xgboost::LinearUpdater::Create("shotgun", &lparam));
updater->Configure({{"eta", "1."}});
xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
(*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
xgboost::gbm::GBLinearModel model;
model.param.num_feature = (*mat)->Info().num_col_;
model.param.num_output_group = 1;
p_fmat->Info().num_row_, xgboost::GradientPair(-5, 1.0));
xgboost::gbm::GBLinearModel model{&mparam};
model.LazyInitModel();
updater->Update(&gpair, (*mat).get(), &model, gpair.Size());
updater->Update(&gpair, p_fmat.get(), &model, gpair.Size());
ASSERT_EQ(model.bias()[0], 5.0f);
@@ -31,24 +41,35 @@ TEST(Linear, shotgun) {
xgboost::LinearUpdater::Create("shotgun", &lparam));
EXPECT_ANY_THROW(updater->Configure({{"feature_selector", "random"}}));
}
delete mat;
delete pp_dmat;
}
TEST(Linear, coordinate) {
auto mat = xgboost::CreateDMatrix(10, 10, 0);
size_t constexpr kRows = 10;
size_t constexpr kCols = 10;
auto pp_dmat = xgboost::CreateDMatrix(kRows, kCols, 0);
auto p_fmat {*pp_dmat};
auto lparam = xgboost::CreateEmptyGenericParam(GPUIDX);
LearnerModelParam mparam;
mparam.num_feature = kCols;
mparam.num_output_group = 1;
mparam.base_score = 0.5;
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
xgboost::LinearUpdater::Create("coord_descent", &lparam));
updater->Configure({{"eta", "1."}});
xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
(*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
xgboost::gbm::GBLinearModel model;
model.param.num_feature = (*mat)->Info().num_col_;
model.param.num_output_group = 1;
p_fmat->Info().num_row_, xgboost::GradientPair(-5, 1.0));
xgboost::gbm::GBLinearModel model{&mparam};
model.LazyInitModel();
updater->Update(&gpair, (*mat).get(), &model, gpair.Size());
updater->Update(&gpair, p_fmat.get(), &model, gpair.Size());
ASSERT_EQ(model.bias()[0], 5.0f);
delete mat;
delete pp_dmat;
}
} // namespace xgboost

View File

@@ -8,16 +8,24 @@
namespace xgboost {
TEST(Linear, GPUCoordinate) {
auto mat = xgboost::CreateDMatrix(10, 10, 0);
size_t constexpr kRows = 10;
size_t constexpr kCols = 10;
auto mat = xgboost::CreateDMatrix(kRows, kCols, 0);
auto lparam = CreateEmptyGenericParam(GPUIDX);
LearnerModelParam mparam;
mparam.num_feature = kCols;
mparam.num_output_group = 1;
mparam.base_score = 0.5;
auto updater = std::unique_ptr<xgboost::LinearUpdater>(
xgboost::LinearUpdater::Create("gpu_coord_descent", &lparam));
updater->Configure({{"eta", "1."}});
xgboost::HostDeviceVector<xgboost::GradientPair> gpair(
(*mat)->Info().num_row_, xgboost::GradientPair(-5, 1.0));
xgboost::gbm::GBLinearModel model;
model.param.num_feature = (*mat)->Info().num_col_;
model.param.num_output_group = 1;
xgboost::gbm::GBLinearModel model{&mparam};
model.LazyInitModel();
updater->Update(&gpair, (*mat).get(), &model, gpair.Size());

View File

@@ -7,18 +7,23 @@
#include "../../../src/gbm/gbtree_model.h"
namespace xgboost {
TEST(cpu_predictor, Test) {
TEST(CpuPredictor, Basic) {
auto lparam = CreateEmptyGenericParam(GPUIDX);
auto cache = std::make_shared<std::unordered_map<DMatrix*, PredictionCacheEntry>>();
std::unique_ptr<Predictor> cpu_predictor =
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &lparam, cache));
gbm::GBTreeModel model = CreateTestModel();
int kRows = 5;
int kCols = 5;
int n_row = 5;
int n_col = 5;
LearnerModelParam param;
param.num_feature = kCols;
param.base_score = 0.0;
param.num_output_group = 1;
auto dmat = CreateDMatrix(n_row, n_col, 0);
gbm::GBTreeModel model = CreateTestModel(&param);
auto dmat = CreateDMatrix(kRows, kCols, 0);
// Test predict batch
HostDeviceVector<float> out_predictions;
@@ -46,19 +51,32 @@ TEST(cpu_predictor, Test) {
// Test predict contribution
std::vector<float> out_contribution;
cpu_predictor->PredictContribution((*dmat).get(), &out_contribution, model);
for (auto const& contri : out_contribution) {
ASSERT_EQ(contri, 1.5);
ASSERT_EQ(out_contribution.size(), kRows * (kCols + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
if ((i+1) % (kCols+1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
// Test predict contribution (approximate method)
cpu_predictor->PredictContribution((*dmat).get(), &out_contribution, model, true);
for (auto const& contri : out_contribution) {
ASSERT_EQ(contri, 1.5);
cpu_predictor->PredictContribution((*dmat).get(), &out_contribution, model, 0, nullptr, true);
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
if ((i+1) % (kCols+1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
delete dmat;
}
TEST(cpu_predictor, ExternalMemoryTest) {
TEST(CpuPredictor, ExternalMemory) {
dmlc::TemporaryDirectory tmpdir;
std::string filename = tmpdir.path + "/big.libsvm";
std::unique_ptr<DMatrix> dmat = CreateSparsePageDMatrix(12, 64, filename);
@@ -68,13 +86,18 @@ TEST(cpu_predictor, ExternalMemoryTest) {
std::unique_ptr<Predictor> cpu_predictor =
std::unique_ptr<Predictor>(Predictor::Create("cpu_predictor", &lparam, cache));
gbm::GBTreeModel model = CreateTestModel();
LearnerModelParam param;
param.base_score = 0;
param.num_feature = dmat->Info().num_col_;
param.num_output_group = 1;
gbm::GBTreeModel model = CreateTestModel(&param);
// Test predict batch
HostDeviceVector<float> out_predictions;
cpu_predictor->PredictBatch(dmat.get(), &out_predictions, model, 0);
std::vector<float> &out_predictions_h = out_predictions.HostVector();
EXPECT_EQ(out_predictions.Size(), dmat->Info().num_row_);
ASSERT_EQ(out_predictions.Size(), dmat->Info().num_row_);
for (const auto& v : out_predictions_h) {
ASSERT_EQ(v, 1.5);
}
@@ -82,7 +105,7 @@ TEST(cpu_predictor, ExternalMemoryTest) {
// Test predict leaf
std::vector<float> leaf_out_predictions;
cpu_predictor->PredictLeaf(dmat.get(), &leaf_out_predictions, model);
EXPECT_EQ(leaf_out_predictions.size(), dmat->Info().num_row_);
ASSERT_EQ(leaf_out_predictions.size(), dmat->Info().num_row_);
for (const auto& v : leaf_out_predictions) {
ASSERT_EQ(v, 0);
}
@@ -90,17 +113,30 @@ TEST(cpu_predictor, ExternalMemoryTest) {
// Test predict contribution
std::vector<float> out_contribution;
cpu_predictor->PredictContribution(dmat.get(), &out_contribution, model);
EXPECT_EQ(out_contribution.size(), dmat->Info().num_row_);
for (const auto& v : out_contribution) {
ASSERT_EQ(v, 1.5);
ASSERT_EQ(out_contribution.size(), dmat->Info().num_row_ * (dmat->Info().num_col_ + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
if ((i + 1) % (dmat->Info().num_col_ + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
// Test predict contribution (approximate method)
std::vector<float> out_contribution_approximate;
cpu_predictor->PredictContribution(dmat.get(), &out_contribution_approximate, model, true);
EXPECT_EQ(out_contribution_approximate.size(), dmat->Info().num_row_);
for (const auto& v : out_contribution_approximate) {
ASSERT_EQ(v, 1.5);
cpu_predictor->PredictContribution(dmat.get(), &out_contribution_approximate, model, 0, nullptr, true);
ASSERT_EQ(out_contribution_approximate.size(),
dmat->Info().num_row_ * (dmat->Info().num_col_ + 1));
for (size_t i = 0; i < out_contribution.size(); ++i) {
auto const& contri = out_contribution[i];
// shift 1 for bias, as test tree is a decision dump, only global bias is filled with LeafValue().
if ((i + 1) % (dmat->Info().num_col_ + 1) == 0) {
ASSERT_EQ(out_contribution.back(), 1.5f);
} else {
ASSERT_EQ(contri, 0);
}
}
}
} // namespace xgboost

View File

@@ -33,7 +33,7 @@ QueryBoosterConfigurationArguments(BoosterHandle handle) {
namespace xgboost {
namespace predictor {
TEST(gpu_predictor, Test) {
TEST(GpuPredictor, Basic) {
auto cpu_lparam = CreateEmptyGenericParam(-1);
auto gpu_lparam = CreateEmptyGenericParam(0);
auto cache = std::make_shared<std::unordered_map<DMatrix*, PredictionCacheEntry>>();
@@ -50,8 +50,12 @@ TEST(gpu_predictor, Test) {
int n_row = i, n_col = i;
auto dmat = CreateDMatrix(n_row, n_col, 0);
gbm::GBTreeModel model = CreateTestModel();
model.param.num_feature = n_col;
LearnerModelParam param;
param.num_feature = n_col;
param.num_output_group = 1;
param.base_score = 0.5;
gbm::GBTreeModel model = CreateTestModel(&param);
// Test predict batch
HostDeviceVector<float> gpu_out_predictions;
@@ -76,10 +80,14 @@ TEST(gpu_predictor, ExternalMemoryTest) {
std::unique_ptr<Predictor> gpu_predictor =
std::unique_ptr<Predictor>(Predictor::Create("gpu_predictor", &lparam, cache));
gpu_predictor->Configure({});
gbm::GBTreeModel model = CreateTestModel();
model.param.num_feature = 3;
LearnerModelParam param;
param.num_feature = 2;
const int n_classes = 3;
model.param.num_output_group = n_classes;
param.num_output_group = n_classes;
param.base_score = 0.5;
gbm::GBTreeModel model = CreateTestModel(&param);
std::vector<std::unique_ptr<DMatrix>> dmats;
dmlc::TemporaryDirectory tmpdir;
std::string file0 = tmpdir.path + "/big_0.libsvm";