add Dart booster (#1220)

2016-06-09 06:04:01 +09:00
parent e034fdf74c
commit 949d1e3027
3 changed files with 332 additions and 3 deletions
--- a/tests/python/test_basic_models.py
+++ b/tests/python/test_basic_models.py
@@ -23,6 +23,51 @@ class TestModels(unittest.TestCase):
                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
        assert err < 0.1

+    def test_dart(self):
+        dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
+        dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
+        param = {'max_depth': 5, 'objective': 'binary:logistic', 'booster': 'dart', 'silent': False}
+        # specify validations set to watch performance
+        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
+        num_round = 2
+        bst = xgb.train(param, dtrain, num_round, watchlist)
+        # this is prediction
+        preds = bst.predict(dtest, ntree_limit=num_round)
+        labels = dtest.get_label()
+        err = sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
+        # error must be smaller than 10%
+        assert err < 0.1
+
+        # save dmatrix into binary buffer
+        dtest.save_binary('dtest.buffer')
+        # save model
+        bst.save_model('xgb.model.dart')
+        # load model and data in
+        bst2 = xgb.Booster(params=param, model_file='xgb.model.dart')
+        dtest2 = xgb.DMatrix('dtest.buffer')
+        preds2 = bst2.predict(dtest2, ntree_limit=num_round)
+        # assert they are the same
+        assert np.sum(np.abs(preds2 - preds)) == 0
+
+        # check whether sample_type and normalize_type work
+        num_round = 50
+        param['silent'] = True
+        param['learning_rate'] = 0.1
+        param['rate_drop'] = 0.1
+        preds_list = []
+        for p in [[p0, p1] for p0 in ['uniform', 'weighted'] for p1 in ['tree', 'forest']]:
+            param['sample_type'] = p[0]
+            param['normalize_type'] = p[1]
+            bst = xgb.train(param, dtrain, num_round, watchlist)
+            preds = bst.predict(dtest, ntree_limit=num_round)
+            err = sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
+            assert err < 0.1
+            preds_list.append(preds)
+
+        for ii in range(len(preds_list)):
+            for jj in range(ii + 1, len(preds_list)):
+                assert np.sum(np.abs(preds_list[ii] - preds_list[jj])) > 0
+
    def test_eta_decay(self):
        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
        num_round = 4