From c798fc2a2917c164cadcb235f2ed30f21318bad8 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 16 Jan 2015 10:45:54 -0800 Subject: [PATCH] change toolkit to rabitlearn --- README.md | 26 ++++++----------- rabit-learn/README.md | 17 +++++++++++ toolkit/Makefile => rabit-learn/common.mk | 28 ++++++------------- .../common}/toolkit_util.h | 0 rabit-learn/kmeans/Makefile | 16 +++++++++++ {toolkit => rabit-learn/kmeans}/README.md | 0 {toolkit => rabit-learn/kmeans}/kmeans.cc | 0 .../kmeans}/kmeans_hadoop.sh | 0 toolkit/.gitignore | 2 -- 9 files changed, 50 insertions(+), 39 deletions(-) create mode 100644 rabit-learn/README.md rename toolkit/Makefile => rabit-learn/common.mk (54%) rename {toolkit => rabit-learn/common}/toolkit_util.h (100%) create mode 100644 rabit-learn/kmeans/Makefile rename {toolkit => rabit-learn/kmeans}/README.md (100%) rename {toolkit => rabit-learn/kmeans}/kmeans.cc (100%) rename {toolkit => rabit-learn/kmeans}/kmeans_hadoop.sh (100%) delete mode 100644 toolkit/.gitignore diff --git a/README.md b/README.md index b035b7437..b8f3b785a 100644 --- a/README.md +++ b/README.md @@ -8,29 +8,19 @@ rabit is a light weight library that provides a fault tolerant interface of Allr Features ==== -* Portable library - - Rabit is a library instead of a framework, a program only needs to link the library to run. -* Flexibility in programming +All these features comes from the facts about small rabbit:) +* Portable: rabit is light weight and runs everywhere + - Rabit is a library instead of a framework, a program only needs to link the library to run + - Rabit only replies on a mechanism to start program, which was provided by most framework + - You can port rabit programs easily to many frameworks, including Hadoop, MPI without changing your code +* Scalable and Flexible: rabit runs fast + * Rabit program use Allreduce to communicate, and do not suffer the cost between iterations of MapReduce abstraction. - Programs can call rabit functions in any order, as opposed to frameworks where callbacks are offered and called by the framework, i.e. inversion of control principle. - Programs persist over all the iterations, unless they fail and recover. -* Fault tolerance +* Fault Tolerant: rabit dig burrows to avoid disasters - Rabit programs can recover the model and results using synchronous function calls. -* MPI compatible - - Code that uses the rabit interface also compiles with existing MPI compilers - - Users can use MPI Allreduce with no code modification Use Rabit ==== * Type make in the root folder will compile the rabit library in lib folder * Add lib to the library path and include to the include path of compiler - -Design Notes -==== -* Rabit is designed for algorithms that replicate the same global model across nodes, while each node operates on a local partition of the data. -* The collection of global statistics is done using Allreduce - -Design Goals -==== -* rabit should run fast -* rabit should be light weight -* rabit should safely dig burrows to avoid disasters diff --git a/rabit-learn/README.md b/rabit-learn/README.md new file mode 100644 index 000000000..bd16ea826 --- /dev/null +++ b/rabit-learn/README.md @@ -0,0 +1,17 @@ +Rabit-Learn +==== +This folder contains implementation of distributed machine learning algorithm using rabit. +It also contain links to the Machine Learning packages that uses rabit. + +* Contribution of toolkits, examples, benchmarks is more than welcomed! + +Toolkits +==== +* [KMeans Clustering](kmeans) +* [XGBoost: eXtreme Gradient Boosting](https://github.com/tqchen/xgboost/tree/unity/multi-node) + - xgboost is a very fast boosted tree(also known as GBDT) library, that can run more than + 10 times faster than existing packages + - Rabit carries xgboost to distributed enviroment, inheritating all the benefits of xgboost + single node version, and scale it to even larger problems + + diff --git a/toolkit/Makefile b/rabit-learn/common.mk similarity index 54% rename from toolkit/Makefile rename to rabit-learn/common.mk index 3b74f9ba6..be73390c2 100644 --- a/toolkit/Makefile +++ b/rabit-learn/common.mk @@ -1,29 +1,19 @@ +# this is the common build script for rabit programs +# you do not have to use it export CC = gcc export CXX = g++ export MPICXX = mpicxx -export LDFLAGS= -pthread -lm -L../lib -export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../include +export LDFLAGS= -pthread -lm -L../../lib +export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../../include -I../common -# specify tensor path -BIN = kmeans.rabit -MOCKBIN= kmeans.mock -MPIBIN = kmeans.mpi -# objectives that makes up rabit library -OBJ = kmeans.o -.PHONY: clean all lib -all: $(BIN) +.PHONY: clean all lib mpi +all: $(BIN) $(MOCKBIN) +mpi: $(MPIBIN) lib: - cd ..;make lib/librabit.a lib/librabit_mock.a; cd - + cd ../..;make lib/librabit.a lib/librabit_mock.a; cd - libmpi: - cd ..;make lib/librabit_mpi.a;cd - - -kmeans.o: kmeans.cc ../src/*.h - -# we can link against MPI version to get use MPI -kmeans.rabit: kmeans.o lib -kmeans.mock: kmeans.o lib -kmeans.mpi: kmeans.o libmpi + cd ../..;make lib/librabit_mpi.a;cd - $(BIN) : $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit diff --git a/toolkit/toolkit_util.h b/rabit-learn/common/toolkit_util.h similarity index 100% rename from toolkit/toolkit_util.h rename to rabit-learn/common/toolkit_util.h diff --git a/rabit-learn/kmeans/Makefile b/rabit-learn/kmeans/Makefile new file mode 100644 index 000000000..244d9afdf --- /dev/null +++ b/rabit-learn/kmeans/Makefile @@ -0,0 +1,16 @@ +# specify tensor path +BIN = kmeans.rabit +MOCKBIN= kmeans.mock +MPIBIN = kmeans.mpi +# objectives that makes up rabit library +OBJ = kmeans.o + +# common build script for programs +include ../common.mk + +# dependenies here +kmeans.rabit: kmeans.o lib +kmeans.mock: kmeans.o lib +kmeans.mpi: kmeans.o libmpi +kmeans.o: kmeans.cc ../../src/*.h + diff --git a/toolkit/README.md b/rabit-learn/kmeans/README.md similarity index 100% rename from toolkit/README.md rename to rabit-learn/kmeans/README.md diff --git a/toolkit/kmeans.cc b/rabit-learn/kmeans/kmeans.cc similarity index 100% rename from toolkit/kmeans.cc rename to rabit-learn/kmeans/kmeans.cc diff --git a/toolkit/kmeans_hadoop.sh b/rabit-learn/kmeans/kmeans_hadoop.sh similarity index 100% rename from toolkit/kmeans_hadoop.sh rename to rabit-learn/kmeans/kmeans_hadoop.sh diff --git a/toolkit/.gitignore b/toolkit/.gitignore deleted file mode 100644 index 5f8241b66..000000000 --- a/toolkit/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -kmeans -*.mpi