Compare commits
15 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
56b1a3301f | ||
|
|
920f9f3565 | ||
|
|
c1a868e7ff | ||
|
|
63c4025656 | ||
|
|
4a622da67b | ||
|
|
b10efa2e4b | ||
|
|
0d6b977395 | ||
|
|
ca4b3b7541 | ||
|
|
4a98205ef1 | ||
|
|
982d16b2b6 | ||
|
|
fde318716f | ||
|
|
094d0a4497 | ||
|
|
d8b0edf133 | ||
|
|
bf5fcec8e8 | ||
|
|
278b788b34 |
@@ -6,6 +6,8 @@ Contributors: https://github.com/tqchen/xgboost/graphs/contributors
|
|||||||
|
|
||||||
Turorial and Documentation: https://github.com/tqchen/xgboost/wiki
|
Turorial and Documentation: https://github.com/tqchen/xgboost/wiki
|
||||||
|
|
||||||
|
Questions and Issues: [https://github.com/tqchen/xgboost/issues](https://github.com/tqchen/xgboost/issues?q=is%3Aissue+label%3Aquestion)
|
||||||
|
|
||||||
Features
|
Features
|
||||||
=======
|
=======
|
||||||
* Sparse feature format:
|
* Sparse feature format:
|
||||||
|
|||||||
@@ -43,7 +43,8 @@ for k, v in res:
|
|||||||
nhit += 1
|
nhit += 1
|
||||||
else:
|
else:
|
||||||
lb = 'b'
|
lb = 'b'
|
||||||
fo.write('%s,%d,%s\n' % ( k, rorder[k], lb ) )
|
# change output rank order to follow Kaggle convention
|
||||||
|
fo.write('%s,%d,%s\n' % ( k, len(rorder)+1-rorder[k], lb ) )
|
||||||
ntot += 1
|
ntot += 1
|
||||||
fo.close()
|
fo.close()
|
||||||
|
|
||||||
|
|||||||
@@ -39,4 +39,11 @@ pred = bst.predict( xg_test );
|
|||||||
|
|
||||||
print ('predicting, classification error=%f' % (sum( int(pred[i]) != test_Y[i] for i in range(len(test_Y))) / float(len(test_Y)) ))
|
print ('predicting, classification error=%f' % (sum( int(pred[i]) != test_Y[i] for i in range(len(test_Y))) / float(len(test_Y)) ))
|
||||||
|
|
||||||
|
# do the same thing again, but output probabilities
|
||||||
|
param['objective'] = 'multi:softprob'
|
||||||
|
bst = xgb.train(param, xg_train, num_round, watchlist );
|
||||||
|
# get prediction, this is in 1D array, need reshape to (nclass, ndata)
|
||||||
|
yprob = bst.predict( xg_test ).reshape( 6, test_Y.shape[0] )
|
||||||
|
ylabel = np.argmax( yprob, axis=0)
|
||||||
|
|
||||||
|
print ('predicting, classification error=%f' % (sum( int(ylabel[i]) != test_Y[i] for i in range(len(test_Y))) / float(len(test_Y)) ))
|
||||||
|
|||||||
@@ -121,6 +121,7 @@ class Booster:
|
|||||||
assert isinstance(d,DMatrix)
|
assert isinstance(d,DMatrix)
|
||||||
dmats = ( ctypes.c_void_p * len(cache) )(*[ d.handle for d in cache])
|
dmats = ( ctypes.c_void_p * len(cache) )(*[ d.handle for d in cache])
|
||||||
self.handle = ctypes.c_void_p( xglib.XGBoosterCreate( dmats, len(cache) ) )
|
self.handle = ctypes.c_void_p( xglib.XGBoosterCreate( dmats, len(cache) ) )
|
||||||
|
self.set_param( {'seed':0} )
|
||||||
self.set_param( params )
|
self.set_param( params )
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
xglib.XGBoosterFree(self.handle)
|
xglib.XGBoosterFree(self.handle)
|
||||||
@@ -186,7 +187,7 @@ class Booster:
|
|||||||
|
|
||||||
def train(params, dtrain, num_boost_round = 10, evals = [], obj=None):
|
def train(params, dtrain, num_boost_round = 10, evals = [], obj=None):
|
||||||
""" train a booster with given paramaters """
|
""" train a booster with given paramaters """
|
||||||
bst = Booster(params, [dtrain] )
|
bst = Booster(params, [dtrain]+[ d[0] for d in evals ] )
|
||||||
if obj == None:
|
if obj == None:
|
||||||
for i in range(num_boost_round):
|
for i in range(num_boost_round):
|
||||||
bst.update( dtrain )
|
bst.update( dtrain )
|
||||||
|
|||||||
@@ -130,6 +130,10 @@ namespace xgboost{
|
|||||||
xgboost::regrank::RegRankBoostLearner::LoadModel(fname);
|
xgboost::regrank::RegRankBoostLearner::LoadModel(fname);
|
||||||
this->init_model = true;
|
this->init_model = true;
|
||||||
}
|
}
|
||||||
|
inline void SetParam( const char *name, const char *val ){
|
||||||
|
if( !strcmp( name, "seed" ) ) random::Seed(atoi(val));
|
||||||
|
xgboost::regrank::RegRankBoostLearner::SetParam( name, val );
|
||||||
|
}
|
||||||
const float *Pred( const DMatrix &dmat, size_t *len, int bst_group ){
|
const float *Pred( const DMatrix &dmat, size_t *len, int bst_group ){
|
||||||
this->CheckInit();
|
this->CheckInit();
|
||||||
|
|
||||||
|
|||||||
@@ -27,6 +27,10 @@ namespace xgboost{
|
|||||||
obj_ = NULL;
|
obj_ = NULL;
|
||||||
name_obj_ = "reg:linear";
|
name_obj_ = "reg:linear";
|
||||||
}
|
}
|
||||||
|
/*! \brief destructor */
|
||||||
|
~RegRankBoostLearner(void){
|
||||||
|
if( obj_ != NULL ) delete obj_;
|
||||||
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief a regression booter associated with training and evaluating data
|
* \brief a regression booter associated with training and evaluating data
|
||||||
* \param mats array of pointers to matrix whose prediction result need to be cached
|
* \param mats array of pointers to matrix whose prediction result need to be cached
|
||||||
@@ -99,7 +103,7 @@ namespace xgboost{
|
|||||||
*/
|
*/
|
||||||
inline void InitTrainer(void){
|
inline void InitTrainer(void){
|
||||||
if( mparam.num_class != 0 ){
|
if( mparam.num_class != 0 ){
|
||||||
if( name_obj_ != "multi:softmax" ){
|
if( name_obj_ != "multi:softmax" && name_obj_ != "multi:softprob"){
|
||||||
name_obj_ = "multi:softmax";
|
name_obj_ = "multi:softmax";
|
||||||
printf("auto select objective=softmax to support multi-class classification\n" );
|
printf("auto select objective=softmax to support multi-class classification\n" );
|
||||||
}
|
}
|
||||||
@@ -134,6 +138,14 @@ namespace xgboost{
|
|||||||
inline void LoadModel(utils::IStream &fi){
|
inline void LoadModel(utils::IStream &fi){
|
||||||
base_gbm.LoadModel(fi);
|
base_gbm.LoadModel(fi);
|
||||||
utils::Assert(fi.Read(&mparam, sizeof(ModelParam)) != 0);
|
utils::Assert(fi.Read(&mparam, sizeof(ModelParam)) != 0);
|
||||||
|
// save name obj
|
||||||
|
size_t len;
|
||||||
|
if( fi.Read(&len, sizeof(len)) != 0 ){
|
||||||
|
name_obj_.resize( len );
|
||||||
|
if( len != 0 ){
|
||||||
|
utils::Assert( fi.Read(&name_obj_[0], len*sizeof(char)) != 0 );
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief DumpModel
|
* \brief DumpModel
|
||||||
@@ -159,6 +171,10 @@ namespace xgboost{
|
|||||||
inline void SaveModel(utils::IStream &fo) const{
|
inline void SaveModel(utils::IStream &fo) const{
|
||||||
base_gbm.SaveModel(fo);
|
base_gbm.SaveModel(fo);
|
||||||
fo.Write(&mparam, sizeof(ModelParam));
|
fo.Write(&mparam, sizeof(ModelParam));
|
||||||
|
// save name obj
|
||||||
|
size_t len = name_obj_.length();
|
||||||
|
fo.Write(&len, sizeof(len));
|
||||||
|
fo.Write(&name_obj_[0], len*sizeof(char));
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
* \brief save model into file
|
* \brief save model into file
|
||||||
@@ -202,7 +218,7 @@ namespace xgboost{
|
|||||||
fprintf(fo, "[%d]", iter);
|
fprintf(fo, "[%d]", iter);
|
||||||
for (size_t i = 0; i < evals.size(); ++i){
|
for (size_t i = 0; i < evals.size(); ++i){
|
||||||
this->PredictRaw(preds_, *evals[i]);
|
this->PredictRaw(preds_, *evals[i]);
|
||||||
obj_->PredTransform(preds_);
|
obj_->EvalTransform(preds_);
|
||||||
evaluator_.Eval(fo, evname[i].c_str(), preds_, evals[i]->info);
|
evaluator_.Eval(fo, evname[i].c_str(), preds_, evals[i]->info);
|
||||||
}
|
}
|
||||||
fprintf(fo, "\n");
|
fprintf(fo, "\n");
|
||||||
|
|||||||
@@ -41,6 +41,11 @@ namespace xgboost{
|
|||||||
* \param preds prediction values, saves to this vector as well
|
* \param preds prediction values, saves to this vector as well
|
||||||
*/
|
*/
|
||||||
virtual void PredTransform(std::vector<float> &preds){}
|
virtual void PredTransform(std::vector<float> &preds){}
|
||||||
|
/*!
|
||||||
|
* \brief transform prediction values, this is only called when Eval is called, usually it redirect to PredTransform
|
||||||
|
* \param preds prediction values, saves to this vector as well
|
||||||
|
*/
|
||||||
|
virtual void EvalTransform(std::vector<float> &preds){ this->PredTransform(preds); }
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -114,8 +119,8 @@ namespace xgboost{
|
|||||||
if( !strcmp("reg:logistic", name ) ) return new RegressionObj( LossType::kLogisticNeglik );
|
if( !strcmp("reg:logistic", name ) ) return new RegressionObj( LossType::kLogisticNeglik );
|
||||||
if( !strcmp("binary:logistic", name ) ) return new RegressionObj( LossType::kLogisticClassify );
|
if( !strcmp("binary:logistic", name ) ) return new RegressionObj( LossType::kLogisticClassify );
|
||||||
if( !strcmp("binary:logitraw", name ) ) return new RegressionObj( LossType::kLogisticRaw );
|
if( !strcmp("binary:logitraw", name ) ) return new RegressionObj( LossType::kLogisticRaw );
|
||||||
if( !strcmp("multi:softmax", name ) ) return new SoftmaxMultiClassObj();
|
if( !strcmp("multi:softmax", name ) ) return new SoftmaxMultiClassObj(0);
|
||||||
if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj();
|
if( !strcmp("multi:softprob", name ) ) return new SoftmaxMultiClassObj(1);
|
||||||
if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj();
|
if( !strcmp("rank:pairwise", name ) ) return new PairwiseRankObj();
|
||||||
if( !strcmp("rank:softmax", name ) ) return new SoftmaxRankObj();
|
if( !strcmp("rank:softmax", name ) ) return new SoftmaxRankObj();
|
||||||
utils::Error("unknown objective function type");
|
utils::Error("unknown objective function type");
|
||||||
|
|||||||
@@ -112,7 +112,7 @@ namespace xgboost{
|
|||||||
// simple softmax multi-class classification
|
// simple softmax multi-class classification
|
||||||
class SoftmaxMultiClassObj : public IObjFunction{
|
class SoftmaxMultiClassObj : public IObjFunction{
|
||||||
public:
|
public:
|
||||||
SoftmaxMultiClassObj(void){
|
SoftmaxMultiClassObj(int output_prob):output_prob(output_prob){
|
||||||
nclass = 0;
|
nclass = 0;
|
||||||
}
|
}
|
||||||
virtual ~SoftmaxMultiClassObj(){}
|
virtual ~SoftmaxMultiClassObj(){}
|
||||||
@@ -156,6 +156,13 @@ namespace xgboost{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
virtual void PredTransform(std::vector<float> &preds){
|
virtual void PredTransform(std::vector<float> &preds){
|
||||||
|
this->Transform(preds, output_prob);
|
||||||
|
}
|
||||||
|
virtual void EvalTransform(std::vector<float> &preds){
|
||||||
|
this->Transform(preds, 0);
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
inline void Transform(std::vector<float> &preds, int prob){
|
||||||
utils::Assert( nclass != 0, "must set num_class to use softmax" );
|
utils::Assert( nclass != 0, "must set num_class to use softmax" );
|
||||||
utils::Assert( preds.size() % nclass == 0, "SoftmaxMultiClassObj: label size and pred size does not match" );
|
utils::Assert( preds.size() % nclass == 0, "SoftmaxMultiClassObj: label size and pred size does not match" );
|
||||||
const unsigned ndata = static_cast<unsigned>(preds.size()/nclass);
|
const unsigned ndata = static_cast<unsigned>(preds.size()/nclass);
|
||||||
@@ -168,16 +175,26 @@ namespace xgboost{
|
|||||||
for( int k = 0; k < nclass; ++ k ){
|
for( int k = 0; k < nclass; ++ k ){
|
||||||
rec[k] = preds[j + k * ndata];
|
rec[k] = preds[j + k * ndata];
|
||||||
}
|
}
|
||||||
|
if( prob == 0 ){
|
||||||
preds[j] = FindMaxIndex( rec );
|
preds[j] = FindMaxIndex( rec );
|
||||||
|
}else{
|
||||||
|
Softmax( rec );
|
||||||
|
for( int k = 0; k < nclass; ++ k ){
|
||||||
|
preds[j + k * ndata] = rec[k];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( prob == 0 ){
|
||||||
preds.resize( ndata );
|
preds.resize( ndata );
|
||||||
}
|
}
|
||||||
|
}
|
||||||
virtual const char* DefaultEvalMetric(void) {
|
virtual const char* DefaultEvalMetric(void) {
|
||||||
return "merror";
|
return "merror";
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
int nclass;
|
int nclass;
|
||||||
|
int output_prob;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -158,7 +158,7 @@ public:
|
|||||||
this->info.labels.push_back( label );
|
this->info.labels.push_back( label );
|
||||||
// push back weight if any
|
// push back weight if any
|
||||||
if( fweight != NULL ){
|
if( fweight != NULL ){
|
||||||
this->info.labels.push_back( weight );
|
this->info.weights.push_back( weight );
|
||||||
}
|
}
|
||||||
this->data.AddRow( findex, fvalue );
|
this->data.AddRow( findex, fvalue );
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user