diff --git a/rabit-learn/common.mk b/rabit-learn/common.mk index be73390c2..66f6b1882 100644 --- a/rabit-learn/common.mk +++ b/rabit-learn/common.mk @@ -4,7 +4,7 @@ export CC = gcc export CXX = g++ export MPICXX = mpicxx export LDFLAGS= -pthread -lm -L../../lib -export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../../include -I../common +export CFLAGS = -Wall -msse2 -Wno-unknown-pragmas -fPIC -I../../include -I../common .PHONY: clean all lib mpi all: $(BIN) $(MOCKBIN) diff --git a/rabit-learn/common/toolkit_util.h b/rabit-learn/common/toolkit_util.h index d616ac0bf..50f61eada 100644 --- a/rabit-learn/common/toolkit_util.h +++ b/rabit-learn/common/toolkit_util.h @@ -1,24 +1,38 @@ -#include +/*! + * Copyright (c) 2015 by Contributors + * \file toolkit_util.h + * \brief simple data structure that could be used by model + * + * \author Tianqi Chen + */ +#ifndef RABIT_TOOLKIT_UTIL_H_ +#define RABIT_TOOLKIT_UTIL_H_ + #include #include #include #include +#include #include +#include namespace rabit { +// typedef index type +typedef unsigned index_t; + /*! \brief sparse matrix, CSR format */ struct SparseMat { // sparse matrix entry struct Entry { // feature index - unsigned findex; + index_t findex; // feature value float fvalue; }; // sparse vector struct Vector { const Entry *data; - unsigned length; + index_t length; inline const Entry &operator[](size_t i) const { return data[i]; } @@ -26,7 +40,7 @@ struct SparseMat { inline Vector operator[](size_t i) const { Vector v; v.data = &data[0] + row_ptr[i]; - v.length = static_cast(row_ptr[i + 1]-row_ptr[i]); + v.length = static_cast(row_ptr[i + 1]-row_ptr[i]); return v; } // load data from LibSVM format @@ -35,7 +49,13 @@ struct SparseMat { if (!strcmp(fname, "stdin")) { fi = stdin; } else { - fi = utils::FopenCheck(fname, "r"); + if (strchr(fname, '%') != NULL) { + char s_tmp[256]; + snprintf(s_tmp, sizeof(s_tmp), fname, rabit::GetRank()); + fi = utils::FopenCheck(s_tmp, "r"); + } else { + fi = utils::FopenCheck(fname, "r"); + } } row_ptr.clear(); row_ptr.push_back(0); @@ -45,9 +65,11 @@ struct SparseMat { char tmp[1024]; while (fscanf(fi, "%s", tmp) == 1) { Entry e; - if (sscanf(tmp, "%u:%f", &e.findex, &e.fvalue) == 2) { + unsigned long fidx; + if (sscanf(tmp, "%lu:%f", &fidx, &e.fvalue) == 2) { + e.findex = static_cast(fidx); data.push_back(e); - feat_dim = std::max(e.findex, feat_dim); + feat_dim = std::max(fidx, feat_dim); } else { if (!init) { labels.push_back(label); @@ -61,6 +83,9 @@ struct SparseMat { labels.push_back(label); row_ptr.push_back(data.size()); feat_dim += 1; + utils::Check(feat_dim < std::numeric_limits::max(), + "feature dimension exceed limit of index_t"\ + "consider change the index_t to unsigned long"); // close the filed if (fi != stdin) fclose(fi); } @@ -68,7 +93,7 @@ struct SparseMat { return row_ptr.size() - 1; } // maximum feature dimension - unsigned feat_dim; + size_t feat_dim; std::vector row_ptr; std::vector data; std::vector labels; @@ -115,3 +140,4 @@ inline int Random(int value) { return rand() % value; } } // namespace rabit +#endif // RABIT_TOOLKIT_UTIL_H_ diff --git a/rabit-learn/kmeans/kmeans.cc b/rabit-learn/kmeans/kmeans.cc index 0a8171f9f..d41071255 100644 --- a/rabit-learn/kmeans/kmeans.cc +++ b/rabit-learn/kmeans/kmeans.cc @@ -83,9 +83,12 @@ inline size_t GetCluster(const Matrix ¢roids, int main(int argc, char *argv[]) { if (argc < 5) { + // intialize rabit engine + rabit::Init(argc, argv); if (rabit::GetRank() == 0) { rabit::TrackerPrintf("Usage: num_cluster max_iter \n"); } + rabit::Finalize(); return 0; } clock_t tStart = clock(); diff --git a/rabit-learn/linear/Makefile b/rabit-learn/linear/Makefile new file mode 100644 index 000000000..e69dfe66d --- /dev/null +++ b/rabit-learn/linear/Makefile @@ -0,0 +1,15 @@ +# specify tensor path +BIN = linear.rabit +MOCKBIN= +MPIBIN = +# objectives that makes up rabit library +OBJ = linear.o + +# common build script for programs +include ../common.mk +CFLAGS+=-fopenmp +linear.o: linear.cc ../../src/*.h linear.h ../solver/*.h +# dependenies here +linear.rabit: linear.o lib + + diff --git a/rabit-learn/linear/linear.cc b/rabit-learn/linear/linear.cc new file mode 100644 index 000000000..90c903bf9 --- /dev/null +++ b/rabit-learn/linear/linear.cc @@ -0,0 +1,176 @@ +#include "./linear.h" + +namespace rabit { +namespace linear { +class LinearObjFunction : public solver::IObjFunction { + public: + // training threads + int nthread; + // L2 regularization + float reg_L2; + // model + LinearModel model; + // training data + SparseMat dtrain; + // solver + solver::LBFGSSolver lbfgs; + // constructor + LinearObjFunction(void) { + lbfgs.SetObjFunction(this); + nthread = 1; + reg_L2 = 0.0f; + model.weight = NULL; + task = "train"; + model_in = "NULL"; + } + virtual ~LinearObjFunction(void) { + if (model.weight != NULL) delete [] model.weight; + } + // set parameters + inline void SetParam(const char *name, const char *val) { + model.param.SetParam(name, val); + lbfgs.SetParam(name, val); + if (!strcmp(name, "num_feature")) { + char ndigit[30]; + sprintf(ndigit, "%lu", model.param.num_feature + 1); + lbfgs.SetParam("num_dim", ndigit); + } + if (!strcmp(name, "reg_L2")) { + reg_L2 = static_cast(atof(val)); + } + if (!strcmp(name, "nthread")) { + nthread = atoi(val); + } + if (!strcmp(name, "task")) task = val; + if (!strcmp(name, "model_in")) model_in = val; + if (!strcmp(name, "model_out")) model_out = val; + } + inline void Run(void) { + if (model_in != "NULL") { + } + if (task == "train") { + lbfgs.Run(); + + } else if (task == "pred") { + + } else if (task == "eval") { + } else { + utils::Error("unknown task=%s", task.c_str()); + } + } + inline void LoadData(const char *fname) { + dtrain.Load(fname); + } + virtual size_t InitNumDim(void) { + if (model_in == "NULL") { + size_t ndim = dtrain.feat_dim; + rabit::Allreduce(&ndim, 1); + model.param.num_feature = std::max(ndim, model.param.num_feature); + } + return model.param.num_feature + 1; + } + virtual void InitModel(float *weight, size_t size) { + if (model_in == "NULL") { + memset(weight, 0.0f, size * sizeof(float)); + model.param.InitBaseScore(); + } else { + rabit::Broadcast(model.weight, size * sizeof(float), 0); + memcpy(weight, model.weight, size * sizeof(float)); + } + } + // load model + virtual void Load(rabit::IStream &fi) { + fi.Read(&model.param, sizeof(model.param)); + } + virtual void Save(rabit::IStream &fo) const { + fo.Write(&model.param, sizeof(model.param)); + } + virtual double Eval(const float *weight, size_t size) { + if (nthread != 0) omp_set_num_threads(nthread); + utils::Check(size == model.param.num_feature + 1, + "size consistency check"); + double sum_val = 0.0; + #pragma omp parallel for schedule(static) reduction(+:sum_val) + for (size_t i = 0; i < dtrain.NumRow(); ++i) { + float py = model.param.PredictMargin(weight, dtrain[i]); + float fv = model.param.MarginToLoss(dtrain.labels[i], py); + sum_val += fv; + } + if (rabit::GetRank() == 0) { + // only add regularization once + if (reg_L2 != 0.0f) { + double sum_sqr = 0.0; + for (size_t i = 0; i < model.param.num_feature; ++i) { + sum_sqr += weight[i] * weight[i]; + } + sum_val += 0.5 * reg_L2 * sum_sqr; + } + } + utils::Check(!std::isnan(sum_val), "nan occurs"); + return sum_val; + } + virtual void CalcGrad(float *out_grad, + const float *weight, + size_t size) { + if (nthread != 0) omp_set_num_threads(nthread); + utils::Check(size == model.param.num_feature + 1, + "size consistency check"); + memset(out_grad, 0.0f, sizeof(float) * size); + double sum_gbias = 0.0; + #pragma omp parallel for schedule(static) reduction(+:sum_gbias) + for (size_t i = 0; i < dtrain.NumRow(); ++i) { + SparseMat::Vector v = dtrain[i]; + float py = model.param.Predict(weight, v); + float grad = model.param.PredToGrad(dtrain.labels[i], py); + for (index_t j = 0; j < v.length; ++j) { + out_grad[v[j].findex] += v[j].fvalue * grad; + } + sum_gbias += grad; + } + out_grad[model.param.num_feature] = static_cast(sum_gbias); + if (rabit::GetRank() == 0) { + // only add regularization once + if (reg_L2 != 0.0f) { + for (size_t i = 0; i < model.param.num_feature; ++i) { + out_grad[i] += reg_L2 * weight[i]; + } + } + } + } + + private: + std::string task; + std::string model_in; + std::string model_out; +}; +} // namespace linear +} // namespace rabit + +int main(int argc, char *argv[]) { + if (argc < 2) { + // intialize rabit engine + rabit::Init(argc, argv); + if (rabit::GetRank() == 0) { + rabit::TrackerPrintf("Usage: param=val\n"); + } + rabit::Finalize(); + return 0; + } + rabit::linear::LinearObjFunction linear; + if (!strcmp(argv[1], "stdin")) { + linear.LoadData(argv[1]); + rabit::Init(argc, argv); + } else { + rabit::Init(argc, argv); + linear.LoadData(argv[1]); + } + for (int i = 2; i < argc; ++i) { + char name[256], val[256]; + if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) { + linear.SetParam(name, val); + } + } + linear.Run(); + rabit::Finalize(); + return 0; +} diff --git a/rabit-learn/linear/linear.h b/rabit-learn/linear/linear.h new file mode 100644 index 000000000..98aa4afee --- /dev/null +++ b/rabit-learn/linear/linear.h @@ -0,0 +1,131 @@ +/*! + * Copyright (c) 2015 by Contributors + * \file linear.h + * \brief Linear and Logistic regression + * + * \author Tianqi Chen + */ +#ifndef RABIT_LINEAR_H_ +#define RABIT_LINEAR_H_ +#include +#include "../common/toolkit_util.h" +#include "../solver/lbfgs.h" + +namespace rabit { +namespace linear { +/*! \brief simple linear model */ +struct LinearModel { + struct ModelParam { + /*! \brief global bias */ + float base_score; + /*! \brief number of features */ + size_t num_feature; + /*! \brief loss type*/ + int loss_type; + // reserved field + int reserved[16]; + // constructor + ModelParam(void) { + base_score = 0.5f; + num_feature = 0; + loss_type = 1; + std::memset(reserved, 0, sizeof(reserved)); + } + // initialize base score + inline void InitBaseScore(void) { + utils::Check(base_score > 0.0f && base_score < 1.0f, + "base_score must be in (0,1) for logistic loss"); + base_score = -std::log(1.0f / base_score - 1.0f); + } + /*! + * \brief set parameters from outside + * \param name name of the parameter + * \param val value of the parameter + */ + inline void SetParam(const char *name, const char *val) { + using namespace std; + if (!strcmp("base_score", name)) { + base_score = static_cast(atof(val)); + } + if (!strcmp("num_feature", name)) { + num_feature = static_cast(atol(val)); + } + if (!strcmp("objective", name)) { + if (!strcmp("linear", val)) { + loss_type = 0; + } else if (!strcmp("logistic", val)) { + loss_type = 1; + } else { + utils::Error("unknown objective type %s\n", val); + } + } + } + // transform margin to prediction + inline float MarginToPred(float margin) const { + if (loss_type == 1) { + return 1.0f / (1.0f + std::exp(-margin)); + } else { + return margin; + } + } + // margin to loss + inline float MarginToLoss(float label, float margin) const { + if (loss_type == 1) { + float nlogprob; + if (margin > 0.0f) { + nlogprob = std::log(1.0f + std::exp(-margin)); + } else { + nlogprob = -margin + std::log(1.0f + std::exp(margin)); + } + return label * nlogprob + + (1.0f -label) * (margin + nlogprob); + } else { + float diff = margin - label; + return 0.5f * diff * diff; + } + } + inline float PredToGrad(float label, float pred) const { + return pred - label; + } + inline float PredictMargin(const float *weight, + const SparseMat::Vector &v) const { + // weight[num_feature] is bias + float sum = base_score + weight[num_feature]; + for (unsigned i = 0; i < v.length; ++i) { + sum += weight[v[i].findex] * v[i].fvalue; + } + return sum; + } + inline float Predict(const float *weight, + const SparseMat::Vector &v) const { + return MarginToPred(PredictMargin(weight, v)); + } + }; + // model parameter + ModelParam param; + // weight corresponding to the model + float *weight; + LinearModel(void) : weight(NULL) { + } + ~LinearModel(void) { + if (weight != NULL) delete [] weight; + } + // load model + inline void Load(rabit::IStream &fi) { + fi.Read(¶m, sizeof(param)); + if (weight == NULL) { + weight = new float[param.num_feature + 1]; + fi.Read(weight, sizeof(float) * (param.num_feature + 1)); + } + } + inline void Save(rabit::IStream &fo) const { + fo.Write(¶m, sizeof(param)); + fo.Write(weight, sizeof(float) * (param.num_feature + 1)); + } + inline float Predict(const SparseMat::Vector &v) const { + return param.Predict(weight, v); + } +}; +} // namespace linear +} // namespace rabit +#endif // RABIT_LINEAR_H_ diff --git a/rabit-learn/linear/run-linear.sh b/rabit-learn/linear/run-linear.sh new file mode 100755 index 000000000..6b8970643 --- /dev/null +++ b/rabit-learn/linear/run-linear.sh @@ -0,0 +1,15 @@ +#!/bin/bash +if [[ $# -lt 1 ]] +then + echo "Usage: nprocess" + exit -1 +fi + +rm -rf mushroom.row* *.model +k=$1 + +# split the lib svm file into k subfiles +python splitrows.py ../data/agaricus.txt.train mushroom $k + +# run xgboost mpi +../../tracker/rabit_demo.py -n $k linear.rabit mushroom.row\%d "${*:2}" diff --git a/rabit-learn/linear/splitrows.py b/rabit-learn/linear/splitrows.py new file mode 100644 index 000000000..2e9d1184d --- /dev/null +++ b/rabit-learn/linear/splitrows.py @@ -0,0 +1,24 @@ +#!/usr/bin/python +import sys +import random + +# split libsvm file into different rows +if len(sys.argv) < 4: + print ('Usage: k') + exit(0) + +random.seed(10) + +k = int(sys.argv[3]) +fi = open( sys.argv[1], 'r' ) +fos = [] + +for i in range(k): + fos.append(open( sys.argv[2]+'.row%d' % i, 'w' )) + +for l in open(sys.argv[1]): + i = random.randint(0, k-1) + fos[i].write(l) + +for f in fos: + f.close() diff --git a/rabit-learn/solver/lbfgs.h b/rabit-learn/solver/lbfgs.h index 1e330aa0c..c5c17d0ce 100644 --- a/rabit-learn/solver/lbfgs.h +++ b/rabit-learn/solver/lbfgs.h @@ -21,12 +21,8 @@ namespace solver { template class IObjFunction : public rabit::ISerializable { public: - /*! - * \brief set parameters from outside - * \param name name of the parameter - * \param val value of the parameter - */ - virtual void SetParam(const char *name, const char *val) = 0; + // destructor + virtual ~IObjFunction(void){} /*! * \brief evaluate function values for a given weight * \param weight weight of the function @@ -34,7 +30,13 @@ class IObjFunction : public rabit::ISerializable { */ virtual double Eval(const DType *weight, size_t size) = 0; /*! - * \brief initialize the weight before starting the solver + * \return number of feature dimension to be allocated + * only called once during initialization + */ + virtual size_t InitNumDim(void) = 0; + /*! + * \brief initialize the weight before starting the solver + * only called once for initialization */ virtual void InitModel(DType *weight, size_t size) = 0; /*! @@ -45,18 +47,7 @@ class IObjFunction : public rabit::ISerializable { */ virtual void CalcGrad(DType *out_grad, const DType *weight, - size_t size); - /*! - * \brief add regularization gradient to the gradient if any - * this is used to add data set invariant regularization - * \param out_grad used to store the gradient value of the function - * \param weight weight of the function - * \param size size of the weight - */ - virtual void AddRegularization(DType *out_grad, - const DType *weight, - size_t size); - + size_t size) = 0; }; /*! \brief a basic version L-BFGS solver */ @@ -71,7 +62,7 @@ class LBFGSSolver { linesearch_c1 = 1e-4; min_lbfgs_iter = 5; max_lbfgs_iter = 1000; - lbfgs_stop_tol = 1e-6f; + lbfgs_stop_tol = 1e-5f; silent = 0; } virtual ~LBFGSSolver(void) {} @@ -81,17 +72,17 @@ class LBFGSSolver { * \param val value of the parameter */ virtual void SetParam(const char *name, const char *val) { - if (!strcmp("num_feature", name)) { - gstate.num_feature = static_cast(atol(val)); + if (!strcmp("num_dim", name)) { + gstate.num_dim = static_cast(atol(val)); } if (!strcmp("size_memory", name)) { gstate.size_memory = static_cast(atol(val)); } if (!strcmp("reg_L1", name)) { - reg_L1 = atof(val); + reg_L1 = static_cast(atof(val)); } if (!strcmp("linesearch_backoff", name)) { - linesearch_backoff = atof(val); + linesearch_backoff = static_cast(atof(val)); } if (!strcmp("max_linesearch_iter", name)) { max_linesearch_iter = atoi(val); @@ -113,22 +104,35 @@ class LBFGSSolver { virtual void Init(void) { utils::Check(gstate.obj != NULL, "LBFGSSolver.Init must SetObjFunction first"); - if (rabit::LoadCheckPoint(&gstate, &hist) == 0) { + int version = rabit::LoadCheckPoint(&gstate, &hist); + if (version == 0) { + gstate.num_dim = gstate.obj->InitNumDim(); + } + { + // decide parameter partition + size_t nproc = rabit::GetWorldSize(); + size_t rank = rabit::GetRank(); + size_t step = (gstate.num_dim + nproc - 1) / nproc; + // upper align + step = (step + 7) / 8 * 8; + utils::Assert(step * nproc >= gstate.num_dim, "BUG"); + range_begin_ = std::min(rank * step, gstate.num_dim); + range_end_ = std::min((rank + 1) * step, gstate.num_dim); + } + if (version == 0) { gstate.Init(); - hist.Init(gstate.num_feature, gstate.size_memory); - if (rabit::GetRank() == 0) { - gstate.obj->InitModel(gstate.weight, gstate.num_feature); - } + hist.Init(range_end_ - range_begin_, gstate.size_memory); + gstate.obj->InitModel(gstate.weight, gstate.num_dim); // broadcast initialize model rabit::Broadcast(gstate.weight, - sizeof(DType) * gstate.num_feature, 0); + sizeof(DType) * gstate.num_dim, 0); gstate.old_objval = this->Eval(gstate.weight); gstate.init_objval = gstate.old_objval; if (silent == 0 && rabit::GetRank() == 0) { rabit::TrackerPrintf - ("L-BFGS solver starts, num_feature=%lu, init_objval=%g\n", - gstate.num_feature, gstate.init_objval); + ("L-BFGS solver starts, num_dim=%lu, init_objval=%g, size_memory=%lu\n", + gstate.num_dim, gstate.init_objval, gstate.size_memory); } } } @@ -148,13 +152,16 @@ class LBFGSSolver { virtual bool UpdateOneIter(void) { bool stop = false; GlobalState &g = gstate; - g.obj->CalcGrad(g.grad, g.weight, g.num_feature); - rabit::Allreduce(g.grad, g.num_feature); - g.obj->AddRegularization(g.grad, g.weight, g.num_feature); + g.obj->CalcGrad(g.grad, g.weight, g.num_dim); + rabit::Allreduce(g.grad, g.num_dim); + // find change direction double vdot = FindChangeDirection(g.tempw, g.grad, g.weight); + // line-search, g.grad is now new weight int iter = BacktrackLineSearch(g.grad, g.tempw, g.weight, vdot); utils::Check(iter < max_linesearch_iter, "line search failed"); + // swap new weight std::swap(g.weight, g.grad); + // check stop condition if (gstate.num_iteration > min_lbfgs_iter) { if (g.old_objval - g.new_objval < lbfgs_stop_tol * g.init_objval) { return true; @@ -177,6 +184,10 @@ class LBFGSSolver { while (gstate.num_iteration < max_lbfgs_iter) { if (this->UpdateOneIter()) break; } + if (silent == 0 && rabit::GetRank() == 0) { + rabit::TrackerPrintf + ("L-BFGS: finishes at iteration %d\n", gstate.num_iteration); + } } protected: // find the delta value, given gradient @@ -186,7 +197,13 @@ class LBFGSSolver { const DType *weight) { int m = static_cast(gstate.size_memory); int n = static_cast(hist.num_useful()); - const size_t num_feature = gstate.num_feature; + if (n < m) { + utils::Assert(hist.num_useful() == gstate.num_iteration, + "BUG2"); + } else { + utils::Assert(n == m, "BUG3"); + } + const size_t num_dim = gstate.num_dim; const DType *gsub = grad + range_begin_; const size_t nsub = range_end_ - range_begin_; double vdot; @@ -214,10 +231,11 @@ class LBFGSSolver { for (size_t i = 0; i < tmp.size(); ++i) { gstate.DotBuf(idxset[i].first, idxset[i].second) = tmp[i]; } - // BFGS steps + // BFGS steps, use vector-free update + // parameterize vector using basis in hist std::vector alpha(n); - std::vector delta(2 * n + 1, 0.0); - delta[2 * n] = 1.0; + std::vector delta(2 * m + 1, 0.0); + delta[2 * m] = 1.0; // backward step for (int j = n - 1; j >= 0; --j) { double vsum = 0.0; @@ -243,26 +261,30 @@ class LBFGSSolver { delta[j] = delta[j] + (alpha[j] - beta); } // set all to zero - std::fill(dir, dir + num_feature, 0.0f); + std::fill(dir, dir + num_dim, 0.0f); DType *dirsub = dir + range_begin_; for (int i = 0; i < n; ++i) { - AddScale(dirsub, dirsub, hist[i], delta[i], nsub); AddScale(dirsub, dirsub, hist[m + i], delta[m + i], nsub); } AddScale(dirsub, dirsub, hist[2 * m], delta[2 * m], nsub); - FixDirL1Sign(dir + range_begin_, hist[2 * m], nsub); - vdot = -Dot(dir + range_begin_, hist[2 * m], nsub); + for (int i = 0; i < n; ++i) { + AddScale(dirsub, dirsub, hist[i], delta[i], nsub); + } + FixDirL1Sign(dirsub, hist[2 * m], nsub); + vdot = -Dot(dirsub, hist[2 * m], nsub); // allreduce to get full direction - rabit::Allreduce(dir, num_feature); + rabit::Allreduce(dir, num_dim); rabit::Allreduce(&vdot, 1); - } else { - SetL1Dir(dir, grad, weight, num_feature); - vdot = -Dot(dir, dir, num_feature); + } else { + SetL1Dir(dir, grad, weight, num_dim); + vdot = -Dot(dir, dir, num_dim); + } + // shift the history record + if (n < m) { + n += 1; + } else { + gstate.Shift(); hist.Shift(); } - // shift the history record - gstate.Shift(); hist.Shift(); - // next n - if (n < m) n += 1; hist.set_num_useful(n); // copy gradient to hist[m + n - 1] memcpy(hist[m + n - 1], gsub, nsub * sizeof(DType)); @@ -274,24 +296,25 @@ class LBFGSSolver { const DType *dir, const DType *weight, double dot_dir_l1grad) { - utils::Assert(dot_dir_l1grad < 0.0f, "gradient error"); + utils::Assert(dot_dir_l1grad < 0.0f, + "gradient error, dotv=%g", dot_dir_l1grad); double alpha = 1.0; double backoff = linesearch_backoff; // unit descent direction in first iter if (gstate.num_iteration == 0) { utils::Assert(hist.num_useful() == 1, "hist.nuseful"); alpha = 1.0f / std::sqrt(-dot_dir_l1grad); - linesearch_backoff = 0.1f; + backoff = 0.1f; } int iter = 0; double old_val = gstate.old_objval; double c1 = this->linesearch_c1; while (true) { - const size_t num_feature = gstate.num_feature; + const size_t num_dim = gstate.num_dim; if (++iter >= max_linesearch_iter) return iter; - AddScale(new_weight, weight, dir, alpha, num_feature); - this->FixWeightL1Sign(new_weight, weight, num_feature); + AddScale(new_weight, weight, dir, alpha, num_dim); + this->FixWeightL1Sign(new_weight, weight, num_dim); double new_val = this->Eval(new_weight); if (new_val - old_val <= c1 * dot_dir_l1grad * alpha) { gstate.new_objval = new_val; break; @@ -306,15 +329,16 @@ class LBFGSSolver { gstate.num_iteration += 1; return iter; } + // OWL-QN step for L1 regularization inline void SetL1Dir(DType *dst, - const DType *grad, - const DType *weight, - size_t size) { + const DType *grad, + const DType *weight, + size_t size) { if (reg_L1 == 0.0) { for (size_t i = 0; i < size; ++i) { dst[i] = -grad[i]; } - } else{ + } else { for (size_t i = 0; i < size; ++i) { if (weight[i] > 0.0f) { dst[i] = -grad[i] - reg_L1; @@ -332,7 +356,7 @@ class LBFGSSolver { } } } - // fix direction sign to be consistent with proposal + // OWL-QN step: fix direction sign to be consistent with proposal inline void FixDirL1Sign(DType *dir, const DType *steepdir, size_t size) { @@ -344,7 +368,7 @@ class LBFGSSolver { } } } - // fix direction sign to be consistent with proposal + // QWL-QN step: fix direction sign to be consistent with proposal inline void FixWeightL1Sign(DType *new_weight, const DType *weight, size_t size) { @@ -357,11 +381,11 @@ class LBFGSSolver { } } inline double Eval(const DType *weight) { - double val = gstate.obj->Eval(weight, gstate.num_feature); + double val = gstate.obj->Eval(weight, gstate.num_dim); rabit::Allreduce(&val, 1); if (reg_L1 != 0.0f) { double l1norm = 0.0; - for (size_t i = 0; i < gstate.num_feature; ++i) { + for (size_t i = 0; i < gstate.num_dim; ++i) { l1norm += std::abs(weight[i]); } val += l1norm * reg_L1; @@ -401,13 +425,14 @@ class LBFGSSolver { return res; } // map rolling array index - inline static size_t MapIndex(size_t i, size_t offset, size_t size_memory) { + inline static size_t MapIndex(size_t i, size_t offset, + size_t size_memory) { if (i == 2 * size_memory) return i; if (i < size_memory) { return (i + offset) % size_memory; } else { utils::Assert(i < 2 * size_memory, - "MapIndex: index exceed bound"); + "MapIndex: index exceed bound, i=%lu", i); return (i + offset) % size_memory + size_memory; } } @@ -419,7 +444,7 @@ class LBFGSSolver { // number of iterations passed size_t num_iteration; // number of features in the solver - size_t num_feature; + size_t num_dim; // initialize objective value double init_objval; // history objective value @@ -436,7 +461,7 @@ class LBFGSSolver { weight(NULL), tempw(NULL) { size_memory = 10; num_iteration = 0; - num_feature = 0; + num_dim = 0; old_objval = 0.0; } ~GlobalState(void) { @@ -461,25 +486,25 @@ class LBFGSSolver { virtual void Load(rabit::IStream &fi) { fi.Read(&size_memory, sizeof(size_memory)); fi.Read(&num_iteration, sizeof(num_iteration)); - fi.Read(&num_feature, sizeof(num_feature)); + fi.Read(&num_dim, sizeof(num_dim)); fi.Read(&init_objval, sizeof(init_objval)); fi.Read(&old_objval, sizeof(old_objval)); fi.Read(&offset_, sizeof(offset_)); fi.Read(&data); this->AllocSpace(); - fi.Read(weight, sizeof(DType) * num_feature); + fi.Read(weight, sizeof(DType) * num_dim); obj->Load(fi); } // save the shift array virtual void Save(rabit::IStream &fo) const { fo.Write(&size_memory, sizeof(size_memory)); fo.Write(&num_iteration, sizeof(num_iteration)); - fo.Write(&num_feature, sizeof(num_feature)); + fo.Write(&num_dim, sizeof(num_dim)); fo.Write(&init_objval, sizeof(init_objval)); fo.Write(&old_objval, sizeof(old_objval)); fo.Write(&offset_, sizeof(offset_)); fo.Write(data); - fo.Write(weight, sizeof(DType) * num_feature); + fo.Write(weight, sizeof(DType) * num_dim); obj->Save(fo); } inline void Shift(void) { @@ -493,16 +518,18 @@ class LBFGSSolver { // allocate sapce inline void AllocSpace(void) { if (grad == NULL) { - grad = new DType[num_feature]; - weight = new DType[num_feature]; - tempw = new DType[num_feature]; + grad = new DType[num_dim]; + weight = new DType[num_dim]; + tempw = new DType[num_dim]; } } }; /*! \brief rolling array that carries history information */ struct HistoryArray : public rabit::ISerializable { public: - HistoryArray(void) : dptr_(NULL) {} + HistoryArray(void) : dptr_(NULL) { + num_useful_ = 0; + } ~HistoryArray(void) { if (dptr_ != NULL) delete [] dptr_; } @@ -516,7 +543,8 @@ class LBFGSSolver { size_memory_ = size_memory; stride_ = num_col_; offset_ = 0; - dptr_ = new DType[num_col_ * stride_]; + size_t n = size_memory * 2 + 1; + dptr_ = new DType[n * stride_]; } // fetch element from rolling array inline const DType *operator[](size_t i) const { @@ -541,7 +569,7 @@ class LBFGSSolver { } // set number of useful memory inline void set_num_useful(size_t num_useful) { - utils::Assert(num_useful < size_memory_, + utils::Assert(num_useful <= size_memory_, "num_useful exceed bound"); num_useful_ = num_useful; }