From 55e62a7120705b411f9314f40c0c3533012fd722 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Wed, 19 Nov 2014 11:44:24 -0800
Subject: [PATCH] still need to test row merge

---
 multi-node/col-split/README.md                |  6 +++---
 .../{runexp-mpi.sh => mushroom-col.sh}        |  0
 multi-node/col-split/run-mushroom.sh          | 19 -------------------
 src/learner/learner-inl.hpp                   |  4 +++-
 4 files changed, 6 insertions(+), 23 deletions(-)
 rename multi-node/col-split/{runexp-mpi.sh => mushroom-col.sh} (100%)
 mode change 100644 => 100755
 delete mode 100755 multi-node/col-split/run-mushroom.sh
diff --git a/multi-node/col-split/README.md b/multi-node/col-split/README.md
index b3053080f..c0b9fef7c 100644
--- a/multi-node/col-split/README.md
+++ b/multi-node/col-split/README.md
@@ -1,14 +1,14 @@
-Column Split Version of XGBoost
+Distributed XGBoost: Column Split Version
 ====
 * run ```bash run-mushroom.sh```
 
-Steps to use column split version
+How to Use
 ====
 * First split the data by column, 
 * In the config, specify data file as containing a wildcard %d, where %d is the rank of the node, each node will load their part of data
 * Enable column split mode by ```dsplit=col```
 
-Note on the Column Split Version
+Notes
 ====
 * The code is multi-threaded, so you want to run one xgboost-mpi per node
 * The code will work correctly as long as union of each column subset is all the columns we are interested in.
diff --git a/multi-node/col-split/runexp-mpi.sh b/multi-node/col-split/mushroom-col.sh
old mode 100644
new mode 100755
similarity index 100%
rename from multi-node/col-split/runexp-mpi.sh
rename to multi-node/col-split/mushroom-col.sh
diff --git a/multi-node/col-split/run-mushroom.sh b/multi-node/col-split/run-mushroom.sh
deleted file mode 100755
index 5c4c06587..000000000
--- a/multi-node/col-split/run-mushroom.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-if [[ $# -ne 1 ]]
-then
-    echo "Usage: nprocess"
-    exit -1
-fi
-
-rm -rf train.col*
-k=$1
-
-# split the lib svm file into k subfiles
-python splitsvm.py ../../demo/data/agaricus.txt.train train $k
-
-# run xgboost mpi
-mpirun -n $k ../../xgboost-mpi  mushroom-col.conf updater=distcol silent=0
-
-# the model can be directly loaded by single machine xgboost solver, as usuall
-../../xgboost mushroom-col.conf task=dump model_in=0002.model fmap=../../demo/data/featmap.txt name_dump=dump.nice.$k.txt
-cat dump.nice.$k.txt
diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp
index b1a95dd96..70e71cf57 100644
--- a/src/learner/learner-inl.hpp
+++ b/src/learner/learner-inl.hpp
@@ -92,7 +92,7 @@ class BoostLearner {
     if (!strcmp(name, "silent")) silent = atoi(val);
     if (!strcmp(name, "dsplit")) {
       if (!strcmp(val, "col")) {
-        this->SetParam("updater", "distcol,prune");
+        this->SetParam("updater", "distcol");
         distributed_mode = 1;
       } else if (!strcmp(val, "row")) {
         this->SetParam("updater", "grow_histmaker,prune");
@@ -104,6 +104,8 @@ class BoostLearner {
     if (!strcmp(name, "part_load_col")) part_load_col = atoi(val);
     if (!strcmp(name, "prob_buffer_row")) {
       prob_buffer_row = static_cast<float>(atof(val));
+      utils::Check(distributed_mode == 0,
+                   "prob_buffer_row can only be used in single node mode so far");
       this->SetParam("updater", "grow_colmaker,refresh,prune");
     }
     if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val);