diff --git a/subtree/README.md b/subtree/README.md new file mode 100644 index 000000000..337e2c6b0 --- /dev/null +++ b/subtree/README.md @@ -0,0 +1 @@ +This folder contains git subtree projects of xgboost diff --git a/test/Makefile b/test/Makefile deleted file mode 100644 index a702d073f..000000000 --- a/test/Makefile +++ /dev/null @@ -1,35 +0,0 @@ -export CC = gcc -export CXX = g++ -export MPICXX = mpicxx -export LDFLAGS= -pthread -lm -export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../src - -ifeq ($(no_omp),1) - CFLAGS += -DDISABLE_OPENMP -else - CFLAGS += -fopenmp -endif - -# specify tensor path -BIN = test_group_data test_quantile test_allreduce -OBJ = sync_tcp.o -.PHONY: clean all - -all: $(BIN) $(MPIBIN) - -sync_tcp.o: ../src/sync/sync_tcp.cpp ../src/utils/*.h - -test_group_data: test_group_data.cpp ../src/utils/*.h -test_quantile: test_quantile.cpp ../src/utils/*.h -test_allreduce: test_allreduce.cpp ../src/utils/*.h ../src/sync/sync.h sync_tcp.o -$(BIN) : - $(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^) - -$(OBJ) : - $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) ) - -$(MPIBIN) : - $(MPICXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^) - -clean: - $(RM) $(BIN) $(MPIBIN) *~ diff --git a/test/mkquantest.py b/test/mkquantest.py deleted file mode 100755 index 70c467b46..000000000 --- a/test/mkquantest.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/python -import math -import sys -import random -import subprocess - -funcs = { - 'seq': 'lambda n: sorted([(x,1) for x in range(1,n+1)], key = lambda x:random.random())', - 'seqlogw': 'lambda n: sorted([(x, math.log(x)) for x in range(1,n+1)], key = lambda x:random.random())', - 'lots0': 'lambda n: sorted([(max(x - n*3/4,0), 1) for x in range(1,n+1)], key = lambda x:random.random())', - 'lots9': 'lambda n: sorted([(9 if x > n / 4 else x, 1) for x in range(1,n+1)], key = lambda x:random.random())', - 'lotsm': 'lambda n: sorted([(n/8 if x > n / 4 else x, 1) for x in range(1,n+1)], key = lambda x:random.random())', - 'lotsmr': 'lambda n: sorted([( x * 4 / n + n / 20 if x > n / 10 else x, 1) for x in range(1,n+1)], key = lambda x:random.random())', - 'lotsmr2': 'lambda n: sorted([( x * 10 / n + n / 20 if x > n / 10 else x, 1) for x in range(1,n+1)], key = lambda x:random.random())' -} - -if len(sys.argv) < 3: - print 'Usage: python mkquantest.py [generate-type] [ndata]|./test_quantile [solver]' - print 'test_quantile need to be compiled, solver can be gk(GK nonweight version), wq(weighted version), wx(weighthed version, with prune optimized for heavy hitter)' - print 'Possible generate-types:' - for k, v in funcs.items(): - print '\t%s: %s' % (k, v) - print 'Example: ./mkquantest.py 50000 0.3 lotsmr |./test_quantile wq' - exit(-1) -random.seed(0) -maxn = int(sys.argv[1]) -eps = float(sys.argv[2]) -if len(sys.argv) > 3: - method = sys.argv[3] - assert method in funcs, ('cannot find method %s' % method) -else: - method = 'seq' -if len(sys.argv) > 4: - ndata = int(sys.argv[4]) - assert ndata <= maxn, 'ndata must be smaller than maxn' -else: - ndata = maxn - -fo = sys.stdout -fo.write('%d\t%g\n' % (maxn, eps)) -for x, w in eval(funcs[method])(ndata): - fo.write(str(x)+'\t'+str(w)+'\n') diff --git a/test/test_allreduce.cpp b/test/test_allreduce.cpp deleted file mode 100644 index 4a47d7f55..000000000 --- a/test/test_allreduce.cpp +++ /dev/null @@ -1,124 +0,0 @@ -#include -#include -#include -#include -#include - -using namespace xgboost; - -inline void TestMax(size_t n) { - int rank = sync::GetRank(); - int nproc = sync::GetWorldSize(); - - std::vector ndata(n); - for (size_t i = 0; i < ndata.size(); ++i) { - ndata[i] = (i * (rank+1)) % 111; - } - sync::AllReduce(&ndata[0], ndata.size(), sync::kMax); - for (size_t i = 0; i < ndata.size(); ++i) { - float rmax = (i * 1) % 111; - for (int r = 0; r < nproc; ++r) { - rmax = std::max(rmax, (float)((i * (r+1)) % 111)); - } - utils::Check(rmax == ndata[i], "[%d] TestMax check failure", rank); - } -} - -inline void TestSum(size_t n) { - int rank = sync::GetRank(); - int nproc = sync::GetWorldSize(); - const int z = 131; - - std::vector ndata(n); - for (size_t i = 0; i < ndata.size(); ++i) { - ndata[i] = (i * (rank+1)) % z; - } - sync::AllReduce(&ndata[0], ndata.size(), sync::kSum); - for (size_t i = 0; i < ndata.size(); ++i) { - float rsum = 0.0f; - for (int r = 0; r < nproc; ++r) { - rsum += (float)((i * (r+1)) % z); - } - utils::Check(fabsf(rsum - ndata[i]) < 1e-5 , - "[%d] TestSum check failure, local=%g, allreduce=%g", rank, rsum, ndata[i]); - } -} - -struct Rec { - double rmax; - double rmin; - double rsum; - Rec() {} - Rec(double r) { - rmax = rmin = rsum = r; - } - inline void Reduce(const Rec &b) { - rmax = std::max(b.rmax, rmax); - rmin = std::max(b.rmin, rmin); - rsum += b.rsum; - } - inline void CheckSameAs(const Rec &b) { - if (rmax != b.rmax || rmin != b.rmin || fabs(rsum - b.rsum) > 1e-6) { - utils::Error("[%d] TestReducer check failure", sync::GetRank()); - } - } -}; - -inline void TestReducer(int n) { - int rank = sync::GetRank(); - int nproc = sync::GetWorldSize(); - const int z = 131; - sync::Reducer red; - std::vector ndata(n); - for (size_t i = 0; i < ndata.size(); ++i) { - ndata[i] = Rec((i * (rank+1)) % z); - } - red.AllReduce(&ndata[0], ndata.size()); - - for (size_t i = 0; i < ndata.size(); ++i) { - Rec rec((i * 1) % z); - for (int r = 1; r < nproc; ++r) { - rec.Reduce(Rec((i * (r+1)) % z)); - } - rec.CheckSameAs(ndata[i]); - } -} - - -inline void TestBcast(size_t n, int root) { - int rank = sync::GetRank(); - std::string s; s.resize(n); - for (size_t i = 0; i < n; ++i) { - s[i] = char(i % 126 + 1); - } - std::string res; - if (root == rank) { - res = s; - sync::Bcast(&res, root); - } else { - sync::Bcast(&res, root); - } - utils::Check(res == s, "[%d] TestBcast fail", rank); -} - -int main(int argc, char *argv[]) { - if (argc < 2) { - printf("Usage: \n"); - return 0; - } - int n = atoi(argv[1]); - sync::Init(argc, argv); - int rank = sync::GetRank(); - //int nproc = sync::GetWorldSize(); - std::string name = sync::GetProcessorName(); - printf("[%d] start at %s\n", rank, name.c_str()); - TestMax(n); - printf("[%d] TestMax pass\n", rank); - TestSum(n); - printf("[%d] TestSum pass\n", rank); - TestReducer(n); - printf("[%d] TestReducer pass\n", rank); - sync::Finalize(); - printf("[%d] all check pass\n", rank); - return 0; -} diff --git a/test/test_group_data.cpp b/test/test_group_data.cpp deleted file mode 100644 index 676d45e27..000000000 --- a/test/test_group_data.cpp +++ /dev/null @@ -1,84 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace xgboost::utils; -using namespace xgboost; - -int main(int argc, char *argv[]) { - if (argc < 3) { - printf("Usage: pnthread]\n"); - return 0; - } - if (argc > 3) { - omp_set_num_threads(atoi(argv[3])); - } - random::Seed(0); - unsigned nkey = static_cast(atoi(argv[1])); - size_t ndata = static_cast(atol(argv[2])); - - std::vector keys; - std::vector< std::pair > raw; - raw.reserve(ndata); keys.reserve(ndata); - for (size_t i = 0; i < ndata; ++i) { - unsigned key = random::NextUInt32(nkey); - utils::Check(key < nkey, "key exceed bound\n"); - raw.push_back(std::make_pair(key, i)); - keys.push_back(key); - } - printf("loading finish, start working\n"); - time_t start_t = time(NULL); - int nthread; - #pragma omp parallel - { - nthread = omp_get_num_threads(); - } - std::vector rptr; - std::vector data; - ParallelGroupBuilder builder(&rptr, &data); - builder.InitBudget(0, nthread); - - size_t nstep = (raw.size() +nthread-1)/ nthread; - #pragma omp parallel - { - int tid = omp_get_thread_num(); - size_t begin = tid * nstep; - size_t end = std::min((tid + 1) * nstep, raw.size()); - for (size_t i = begin; i < end; ++i) { - builder.AddBudget(raw[i].first, tid); - } - } - double first_cost = time(NULL) - start_t; - builder.InitStorage(); - - #pragma omp parallel - { - int tid = omp_get_thread_num(); - size_t begin = tid * nstep; - size_t end = std::min((tid + 1)* nstep, raw.size()); - for (size_t i = begin; i < end; ++i) { - builder.Push(raw[i].first, raw[i].second, tid); - } - } - - double second_cost = time(NULL) - start_t; - printf("all finish, phase1=%g sec, phase2=%g sec\n", first_cost, second_cost); - Check(rptr.size() <= nkey+1, "nkey exceed bound"); - Check(rptr.back() == ndata, "data shape inconsistent"); - for (size_t i = 0; i < rptr.size()-1; ++ i) { - Check(rptr[i] <= rptr[i+1], "rptr error"); - for (size_t j = rptr[i]; j < rptr[i+1]; ++j) { - unsigned pos = data[j]; - Check(pos < keys.size(), "invalid pos"); - Check(keys[pos] == i, "invalid key entry"); - } - } - printf("all check pass\n"); - return 0; -} diff --git a/test/test_quantile.cpp b/test/test_quantile.cpp deleted file mode 100644 index c1b85668d..000000000 --- a/test/test_quantile.cpp +++ /dev/null @@ -1,92 +0,0 @@ -#include -#include -#include -using namespace xgboost; - - -struct Entry { - double x, w, rmin; - inline bool operator<(const Entry &e) const { - return x < e.x; - } -}; - -inline void MakeQuantile(std::vector &dat) { - std::sort(dat.begin(), dat.end()); - size_t top = 0; - double wsum = 0.0; - for (size_t i = 0; i < dat.size();) { - size_t j = i + 1; - for (;j < dat.size() && dat[i].x == dat[j].x; ++j) { - dat[i].w += dat[j].w; - } - dat[top] = dat[i]; - dat[top].rmin = wsum; - wsum += dat[top].w; - ++top; - i = j; - } - dat.resize(top); -} - -template -inline void verifyWQ(std::vector &dat, Summary out) { - MakeQuantile(dat); - size_t j = 0; - double err = 0.0; - const double eps = 1e-4; - for (size_t i = 0; i < out.size; ++i) { - while (j < dat.size() && dat[j].x < out.data[i].value) ++j; - utils::Assert(j < dat.size() && fabs(dat[j].x - out.data[i].value) < eps, "bug"); - err = std::min(dat[j].rmin - out.data[i].rmin, err); - err = std::min(out.data[i].rmax - dat[j].rmin + dat[j].w, err); - err = std::min(dat[j].w - out.data[i].wmin, err); - } - if (err < 0.0) err = -err; - printf("verify correctness, max-constraint-violation=%g (0 means perfect, coubld be nonzero due to floating point)\n", err); -} - -template -inline typename Sketch::SummaryContainer test(std::vector &dat) { - Sketch sketch; - size_t n; - double wsum = 0.0; - float eps; - utils::Check(scanf("%lu%f", &n, &eps) == 2, "needs to start with n eps"); - sketch.Init(n, eps); - Entry e; - while (scanf("%lf%lf", &e.x, &e.w) == 2) { - dat.push_back(e); - wsum += e.w; - } - clock_t start = clock(); - for (size_t i = 0; i < dat.size(); ++i) { - sketch.Push(dat[i].x, dat[i].w); - } - double tcost = static_cast(clock() - start) / CLOCKS_PER_SEC; - typename Sketch::SummaryContainer out; - sketch.GetSummary(&out); - double maxerr = static_cast(out.MaxError()); - out.Print(); - printf("-------------------------\n"); - printf("timecost=%g sec\n", tcost); - printf("MaxError=%g/%g = %g\n", maxerr, wsum, maxerr / wsum); - printf("maxlevel = %lu, usedlevel=%lu, limit_size=%lu\n", sketch.nlevel, sketch.level.size(), sketch.limit_size); - return out; -} - -int main(int argc, char *argv[]) { - const char *method = "wq"; - if (argc > 1) method = argv[1]; - std::vector dat; - if (!strcmp(method, "wq")) { - verifyWQ(dat, test, float>(dat)); - } - if (!strcmp(method, "wx")) { - verifyWQ(dat, test, float>(dat)); - } - if (!strcmp(method, "gk")) { - test, unsigned>(dat); - } - return 0; -}