add subtree folder

2015-01-18 21:07:31 -08:00
parent 9695c51ce1
commit 07da390575
6 changed files with 1 additions and 377 deletions
--- a/subtree/README.md
+++ b/subtree/README.md
@@ -0,0 +1 @@
+This folder contains git subtree projects of xgboost
--- a/test/Makefile
+++ b/test/Makefile
@@ -1,35 +0,0 @@
-export CC  = gcc
-export CXX = g++
-export MPICXX = mpicxx
-export LDFLAGS= -pthread -lm 
-export CFLAGS = -Wall -O3 -msse2  -Wno-unknown-pragmas -fPIC -I../src
-
-ifeq ($(no_omp),1)
-	CFLAGS += -DDISABLE_OPENMP 
-else 
-	CFLAGS += -fopenmp
-endif
-
-# specify tensor path
-BIN = test_group_data test_quantile test_allreduce
-OBJ = sync_tcp.o
-.PHONY: clean all
-
-all: $(BIN) $(MPIBIN)
-
-sync_tcp.o: ../src/sync/sync_tcp.cpp ../src/utils/*.h
-
-test_group_data: test_group_data.cpp ../src/utils/*.h
-test_quantile: test_quantile.cpp ../src/utils/*.h
-test_allreduce: test_allreduce.cpp ../src/utils/*.h ../src/sync/sync.h sync_tcp.o
-$(BIN) : 
-	$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
-
-$(OBJ) : 
-	$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
-
-$(MPIBIN) : 
-	$(MPICXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
-
-clean:
-	$(RM) $(BIN) $(MPIBIN) *~
--- a/test/mkquantest.py
+++ b/test/mkquantest.py
@@ -1,42 +0,0 @@
-#!/usr/bin/python
-import math
-import sys
-import random
-import subprocess
-
-funcs = {
-    'seq': 'lambda n: sorted([(x,1) for x in range(1,n+1)], key = lambda x:random.random())',
-    'seqlogw': 'lambda n: sorted([(x, math.log(x)) for x in range(1,n+1)], key = lambda x:random.random())',
-    'lots0': 'lambda n: sorted([(max(x - n*3/4,0), 1) for x in range(1,n+1)], key = lambda x:random.random())',
-    'lots9': 'lambda n: sorted([(9 if x > n / 4 else x, 1) for x in range(1,n+1)], key = lambda x:random.random())',
-    'lotsm': 'lambda n: sorted([(n/8 if x > n / 4 else x, 1) for x in range(1,n+1)], key = lambda x:random.random())',
-    'lotsmr': 'lambda n: sorted([( x * 4 / n + n / 20 if x > n / 10 else x, 1) for x in range(1,n+1)], key = lambda x:random.random())',
-    'lotsmr2': 'lambda n: sorted([( x * 10 / n + n / 20 if x > n / 10 else x, 1) for x in range(1,n+1)], key = lambda x:random.random())'
-}
-
-if len(sys.argv) < 3:
-    print 'Usage: python mkquantest.py <maxn> <eps> [generate-type] [ndata]|./test_quantile [solver]'
-    print 'test_quantile need to be compiled, solver can be gk(GK nonweight version), wq(weighted version), wx(weighthed version, with prune optimized for heavy hitter)'
-    print 'Possible generate-types:' 
-    for k, v in funcs.items():
-        print '\t%s: %s' % (k, v)
-    print 'Example: ./mkquantest.py 50000 0.3 lotsmr |./test_quantile wq'
-    exit(-1)
-random.seed(0)
-maxn = int(sys.argv[1])
-eps = float(sys.argv[2])
-if len(sys.argv) > 3:
-    method = sys.argv[3]
-    assert method in funcs, ('cannot find method %s' % method)
-else:
-    method = 'seq'
-if len(sys.argv) > 4:
-    ndata = int(sys.argv[4])
-    assert ndata <= maxn, 'ndata must be smaller than maxn'
-else:
-    ndata = maxn
-    
-fo = sys.stdout
-fo.write('%d\t%g\n' % (maxn, eps))
-for x, w in eval(funcs[method])(ndata):
-    fo.write(str(x)+'\t'+str(w)+'\n')
--- a/test/test_allreduce.cpp
+++ b/test/test_allreduce.cpp
@@ -1,124 +0,0 @@
-#include <sync/sync.h>
-#include <utils/utils.h>
-#include <cstdio>
-#include <cstdlib>
-#include <cmath>
-
-using namespace xgboost;
-
-inline void TestMax(size_t n) {
-  int rank = sync::GetRank();
-  int nproc = sync::GetWorldSize();
-  
-  std::vector<float> ndata(n);
-  for (size_t i = 0; i < ndata.size(); ++i) {
-    ndata[i] = (i * (rank+1)) % 111;
-  }
-  sync::AllReduce(&ndata[0], ndata.size(), sync::kMax);  
-  for (size_t i = 0; i < ndata.size(); ++i) {
-    float rmax = (i * 1) % 111;
-    for (int r = 0; r < nproc; ++r) {
-      rmax = std::max(rmax, (float)((i * (r+1)) % 111));
-    }
-    utils::Check(rmax == ndata[i], "[%d] TestMax check failure", rank);
-  }
-}
-
-inline void TestSum(size_t n) {
-  int rank = sync::GetRank();
-  int nproc = sync::GetWorldSize();
-  const int z = 131;
-
-  std::vector<float> ndata(n);
-  for (size_t i = 0; i < ndata.size(); ++i) {
-    ndata[i] = (i * (rank+1)) % z;
-  }
-  sync::AllReduce(&ndata[0], ndata.size(), sync::kSum);  
-  for (size_t i = 0; i < ndata.size(); ++i) {
-    float rsum = 0.0f;
-    for (int r = 0; r < nproc; ++r) {
-      rsum += (float)((i * (r+1)) % z);
-    }
-    utils::Check(fabsf(rsum - ndata[i]) < 1e-5 ,
-                 "[%d] TestSum check failure, local=%g, allreduce=%g", rank, rsum, ndata[i]);
-  }
-}
-
-struct Rec {
-  double rmax;
-  double rmin;
-  double rsum;
-  Rec() {}
-  Rec(double r) {
-    rmax = rmin = rsum = r;
-  }
-  inline void Reduce(const Rec &b) {
-    rmax = std::max(b.rmax, rmax);
-    rmin = std::max(b.rmin, rmin);
-    rsum += b.rsum;
-  }
-  inline void CheckSameAs(const Rec &b) {
-    if (rmax != b.rmax || rmin != b.rmin || fabs(rsum - b.rsum) > 1e-6) {
-      utils::Error("[%d] TestReducer check failure", sync::GetRank());
-    }
-  }
-};
-
-inline void TestReducer(int n) {
-  int rank = sync::GetRank();
-  int nproc = sync::GetWorldSize();
-  const int z = 131;
-  sync::Reducer<Rec> red;
-  std::vector<Rec> ndata(n);
-  for (size_t i = 0; i < ndata.size(); ++i) {
-    ndata[i] = Rec((i * (rank+1)) % z);
-  }
-  red.AllReduce(&ndata[0], ndata.size());  
-                
-  for (size_t i = 0; i < ndata.size(); ++i) {
-    Rec rec((i * 1) % z);
-    for (int r = 1; r < nproc; ++r) {
-      rec.Reduce(Rec((i * (r+1)) % z));
-    }
-    rec.CheckSameAs(ndata[i]);
-  }  
-}
-
-
-inline void TestBcast(size_t n, int root) {
-  int rank = sync::GetRank();
-  std::string s; s.resize(n);      
-  for (size_t i = 0; i < n; ++i) {
-    s[i] = char(i % 126 + 1);
-  }
-  std::string res;
-  if (root == rank) {
-    res = s;
-    sync::Bcast(&res, root);
-  } else {
-    sync::Bcast(&res, root);
-  }
-  utils::Check(res == s, "[%d] TestBcast fail", rank);
-}
-
-int main(int argc, char *argv[]) {
-  if (argc < 2) {
-    printf("Usage: <ndata>\n");
-    return 0;
-  }
-  int n = atoi(argv[1]);
-  sync::Init(argc, argv);
-  int rank = sync::GetRank();
-  //int nproc = sync::GetWorldSize();
-  std::string name = sync::GetProcessorName();
-  printf("[%d] start at %s\n", rank, name.c_str());
-  TestMax(n);
-  printf("[%d] TestMax pass\n", rank);
-  TestSum(n);
-  printf("[%d] TestSum pass\n", rank);
-  TestReducer(n);
-  printf("[%d] TestReducer pass\n", rank);
-  sync::Finalize();
-  printf("[%d] all check pass\n", rank);
-  return 0;
-}
--- a/test/test_group_data.cpp
+++ b/test/test_group_data.cpp
@@ -1,84 +0,0 @@
-#include <cstdio>
-#include <cstdlib>
-#include <vector>
-#include <utility>
-#include <ctime>
-#include <utils/group_data.h>
-#include <utils/random.h>
-#include <utils/omp.h>
-#include <utils/utils.h>
-
-using namespace xgboost::utils;
-using namespace xgboost;
-
-int main(int argc, char *argv[]) {
-  if (argc < 3) {
-    printf("Usage: <nkey> <ndata> pnthread]\n");
-    return 0;
-  }
-  if (argc > 3) {
-    omp_set_num_threads(atoi(argv[3]));
-  }
-  random::Seed(0);
-  unsigned nkey = static_cast<unsigned>(atoi(argv[1]));
-  size_t ndata = static_cast<size_t>(atol(argv[2]));
-  
-  std::vector<unsigned> keys;
-  std::vector< std::pair<unsigned, unsigned> > raw;
-  raw.reserve(ndata); keys.reserve(ndata);
-  for (size_t i = 0; i < ndata; ++i) {
-    unsigned key = random::NextUInt32(nkey);
-    utils::Check(key < nkey, "key exceed bound\n");
-    raw.push_back(std::make_pair(key, i));
-    keys.push_back(key);
-  }
-  printf("loading finish, start working\n");
-  time_t start_t = time(NULL);
-  int nthread;
-  #pragma omp parallel
-  {
-    nthread = omp_get_num_threads();
-  }
-  std::vector<size_t> rptr;
-  std::vector<unsigned> data;
-  ParallelGroupBuilder<unsigned> builder(&rptr, &data);
-  builder.InitBudget(0, nthread);
-
-  size_t nstep = (raw.size() +nthread-1)/ nthread;
-  #pragma omp parallel
-  {
-    int tid = omp_get_thread_num(); 
-    size_t begin = tid * nstep;
-    size_t end = std::min((tid + 1) * nstep, raw.size());
-    for (size_t i = begin; i < end; ++i) {
-      builder.AddBudget(raw[i].first, tid);
-    }
-  }
-  double first_cost = time(NULL) - start_t;
-  builder.InitStorage();  
-
-  #pragma omp parallel
-  {
-    int tid = omp_get_thread_num(); 
-    size_t begin = tid * nstep;
-    size_t end = std::min((tid + 1)* nstep, raw.size());
-    for (size_t i = begin; i < end; ++i) {
-      builder.Push(raw[i].first, raw[i].second, tid);
-    }
-  }
-
-  double second_cost = time(NULL) - start_t;
-  printf("all finish, phase1=%g sec, phase2=%g sec\n", first_cost, second_cost);
-  Check(rptr.size() <= nkey+1, "nkey exceed bound");
-  Check(rptr.back() == ndata, "data shape inconsistent");
-  for (size_t i = 0; i < rptr.size()-1; ++ i) {
-    Check(rptr[i] <= rptr[i+1], "rptr error");
-    for (size_t j = rptr[i]; j < rptr[i+1]; ++j) {
-      unsigned pos = data[j];
-      Check(pos < keys.size(), "invalid pos");
-      Check(keys[pos] == i, "invalid key entry");
-    }
-  }
-  printf("all check pass\n");
-  return 0;
-}
--- a/test/test_quantile.cpp
+++ b/test/test_quantile.cpp
@@ -1,92 +0,0 @@
-#include <vector>
-#include <utils/quantile.h>
-#include <ctime>
-using namespace xgboost;
-
-
-struct Entry {
-  double x, w, rmin;
-  inline bool operator<(const Entry &e) const {
-    return x < e.x;
-  }
-};
-
-inline void MakeQuantile(std::vector<Entry> &dat) {
-  std::sort(dat.begin(), dat.end());
-  size_t top = 0;
-  double wsum = 0.0;
-  for (size_t i = 0; i < dat.size();) {
-    size_t j = i + 1;
-    for (;j < dat.size() && dat[i].x == dat[j].x; ++j) {
-      dat[i].w += dat[j].w;
-    }
-    dat[top] = dat[i];
-    dat[top].rmin = wsum;
-    wsum += dat[top].w;
-    ++top;
-    i = j;
-  }
-  dat.resize(top);
-}
-
-template<typename Summary>
-inline void verifyWQ(std::vector<Entry> &dat, Summary out) {
- MakeQuantile(dat);
- size_t j = 0;
- double err = 0.0;
- const double eps = 1e-4;
- for (size_t i = 0; i < out.size; ++i) {
-   while (j < dat.size() && dat[j].x < out.data[i].value) ++j;
-   utils::Assert(j < dat.size() && fabs(dat[j].x - out.data[i].value) < eps, "bug");
-   err = std::min(dat[j].rmin - out.data[i].rmin, err);
-   err = std::min(out.data[i].rmax - dat[j].rmin + dat[j].w, err);
-   err = std::min(dat[j].w - out.data[i].wmin, err);
- }
- if (err < 0.0) err = -err;
- printf("verify correctness, max-constraint-violation=%g (0 means perfect, coubld be nonzero due to floating point)\n", err);
-}
-
-template<typename Sketch, typename RType>
-inline typename Sketch::SummaryContainer test(std::vector<Entry> &dat) {
-  Sketch sketch;
-  size_t n;
-  double wsum = 0.0;
-  float eps;
-  utils::Check(scanf("%lu%f", &n, &eps) == 2, "needs to start with n eps");
-  sketch.Init(n, eps);
-  Entry e;
-  while (scanf("%lf%lf", &e.x, &e.w) == 2) {
-    dat.push_back(e);
-    wsum += e.w;
-  }
-  clock_t start = clock();
-  for (size_t i = 0; i < dat.size(); ++i) {
-    sketch.Push(dat[i].x, dat[i].w);
-  }
-  double tcost = static_cast<double>(clock() - start) / CLOCKS_PER_SEC;
-  typename Sketch::SummaryContainer out;
-  sketch.GetSummary(&out); 
-  double maxerr = static_cast<double>(out.MaxError());
-  out.Print();
-  printf("-------------------------\n");
-  printf("timecost=%g sec\n", tcost);
-  printf("MaxError=%g/%g = %g\n", maxerr, wsum, maxerr / wsum);
-  printf("maxlevel = %lu, usedlevel=%lu, limit_size=%lu\n", sketch.nlevel, sketch.level.size(), sketch.limit_size);
-  return out;
-}
-
-int main(int argc, char *argv[]) {
-  const char *method = "wq";
-  if (argc > 1) method = argv[1];
-  std::vector<Entry> dat;
-  if (!strcmp(method, "wq")) {
-    verifyWQ(dat, test<utils::WQuantileSketch<float, float>, float>(dat));
-  }
-  if (!strcmp(method, "wx")) {
-    verifyWQ(dat, test<utils::WXQuantileSketch<float, float>, float>(dat));
-  }
-  if (!strcmp(method, "gk")) {
-    test<utils::GKQuantileSketch<float, unsigned>, unsigned>(dat);
-  }
-  return 0;
-}
				`@@ -0,0 +1 @@`
				`This folder contains git subtree projects of xgboost`