diff --git a/guide/README.md b/guide/README.md index a2ef94160..8d3aa1b29 100644 --- a/guide/README.md +++ b/guide/README.md @@ -197,13 +197,10 @@ Rabit is a portable library that can run on multiple platforms. * You can use [../tracker/rabit_hadoop.py](../tracker/rabit_hadoop.py) to run rabit programs on hadoop * This will start n rabit programs as mappers of MapReduce * Each program can read its portion of data from stdin -* Yarn is highly recommended, since Yarn allows specifying number of cpus and memory of each mapper: +* Yarn(Hadoop 2.0 or higher) is highly recommended, since Yarn allows specifying number of cpus and memory of each mapper: - This allows multi-threading programs in each node, which can be more efficient - An easy multi-threading solution could be to use OpenMP with rabit code -#### Running Rabit on Yarn -* To Be modified from [../tracker/rabit_hadoop.py](../tracker/rabit_hadoop.py) - #### Running Rabit using MPI * You can submit rabit programs to an MPI cluster using [../tracker/rabit_mpi.py](../tracker/rabit_mpi.py). * If you linked your code against librabit_mpi.a, then you can directly use mpirun to submit the job diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 0d2850dcc..2883b1c47 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -89,8 +89,7 @@ assert out[0] == 'Hadoop', 'cannot parse hadoop version string' hadoop_version = out[1].split('.') use_yarn = int(hadoop_version[0]) >= 2 -if not use_yarn: - print 'Current Hadoop Version is %s' % out[1] +print 'Current Hadoop Version is %s' % out[1] def hadoop_streaming(nworker, worker_args, use_yarn): fset = set()