recheck column mode

2014-11-19 11:21:07 -08:00
parent dffcbc838b
commit 54e2ed90d7
8 changed files with 51 additions and 93 deletions
--- a/demo/mpi/README.md
+++ b/demo/mpi/README.md
@@ -1,3 +0,0 @@
-This folder contains toy example script to run xgboost-mpi. 
-
-This is an experimental distributed version of xgboost
--- a/demo/mpi/mpi.conf
+++ b/demo/mpi/mpi.conf
@@ -1,36 +0,0 @@
-# General Parameters, see comment for each definition
-# choose the booster, can be gbtree or gblinear
-booster = gbtree
-# choose logistic regression loss function for binary classification
-objective = binary:logistic
-
-# Tree Booster Parameters
-# step size shrinkage
-eta = 1.0 
-# minimum loss reduction required to make a further partition
-gamma = 1.0 
-# minimum sum of instance weight(hessian) needed in a child
-min_child_weight = 1 
-# maximum depth of a tree
-max_depth = 3 
-
-# Task Parameters
-# the number of round to do boosting
-num_round = 2
-# 0 means do not save any model except the final round model
-save_period = 0 
-use_buffer = 0
-
-
-# The path of training data %d is the wildcard for the rank of the data
-# The idea is each process take a feature matrix with subset of columns
-#
-data = "train.col%d" 
-
-# The path of validation data, used to monitor training process, here [test] sets name of the validation set
-eval[test] = "../data/agaricus.txt.test" 
-# evaluate on training data as well each round
-eval_train = 1
-
-# The path of test data, need to use full data of test, try not use it, or keep an subsampled version
-test:data = "agaricus.txt.test"      
--- a/demo/mpi/runexp-mpi.sh
+++ b/demo/mpi/runexp-mpi.sh
@@ -1,19 +0,0 @@
-#!/bin/bash
-if [[ $# -ne 1 ]]
-then
-    echo "Usage: nprocess"
-    exit -1
-fi
-
-rm -rf train.col*
-k=$1
-
-# split the lib svm file into k subfiles
-python splitsvm.py ../data/agaricus.txt.train train $k
-
-# run xgboost mpi
-mpirun -n $k ../../xgboost-mpi  mpi.conf 
-
-# the model can be directly loaded by single machine xgboost solver, as usuall
-../../xgboost mpi.conf task=dump model_in=0002.model fmap=../data/featmap.txt name_dump=dump.nice.$k.txt
-cat dump.nice.$k.txt
--- a/demo/mpi/splitsvm.py
+++ b/demo/mpi/splitsvm.py
@@ -1,32 +0,0 @@
-#!/usr/bin/python
-import sys
-import random
-
-# split libsvm file into different subcolumns
-if len(sys.argv) < 4:
-    print ('Usage:<fin> <fo> k')
-    exit(0)
-
-random.seed(10)
-fmap = {}
-
-k = int(sys.argv[3])
-fi = open( sys.argv[1], 'r' )
-fos = []
-
-for i in range(k):
-    fos.append(open( sys.argv[2]+'.col%d' % i, 'w' ))
-    
-for l in open(sys.argv[1]):
-    arr = l.split()
-    for f in fos:
-        f.write(arr[0])
-    for it in arr[1:]:
-        fid = int(it.split(':')[0])
-        if fid not in fmap:
-            fmap[fid] = random.randint(0, k-1)
-        fos[fmap[fid]].write(' '+it)
-    for f in fos:
-        f.write('\n')
-for f in fos:    
-    f.close()