Merge pull request #245 from dmlc/lite

Lite
2015-04-19 00:56:10 -07:00
parent 47ee5e7c14 5123b07d73
commit 54a78b87dc
34 changed files with 1464 additions and 890 deletions
--- a/demo/guide-python/README.md
+++ b/demo/guide-python/README.md
@@ -7,3 +7,5 @@ XGBoost Python Feature Walkthrough
 * [Generalized Linear Model](generalized_linear_model.py)
 * [Cross validation](cross_validation.py)
 * [Predicting leaf indices](predict_leaf_indices.py)
+* [Sklearn Wrapper](sklearn_example.py)
+* [External Memory](external_memory.py)
--- a/demo/guide-python/external_memory.py
+++ b/demo/guide-python/external_memory.py
@@ -0,0 +1,25 @@
+#!/usr/bin/python
+import numpy as np
+import scipy.sparse
+import xgboost as xgb
+
+### simple example for using external memory version
+
+# this is the only difference, add a # followed by a cache prefix name
+# several cache file with the prefix will be generated
+# currently only support convert from libsvm file
+dtrain = xgb.DMatrix('../data/agaricus.txt.train#dtrain.cache')
+dtest = xgb.DMatrix('../data/agaricus.txt.test#dtest.cache')
+
+# specify validations set to watch performance
+param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
+
+# performance notice: set nthread to be the number of your real cpu
+# some cpu offer two threads per core, for example, a 4 core cpu with 8 threads, in such case set nthread=4
+#param['nthread']=num_real_cpu
+
+watchlist  = [(dtest,'eval'), (dtrain,'train')]
+num_round = 2
+bst = xgb.train(param, dtrain, num_round, watchlist)
+
+