add kmeans example
This commit is contained in:
parent
76abd80cb7
commit
39504825d8
@ -7,12 +7,11 @@ export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../include
|
||||
# specify tensor path
|
||||
BIN = kmeans.rabit
|
||||
MOCKBIN= kmeans.mock
|
||||
MPIBIN = kmeans.mpi
|
||||
# objectives that makes up rabit library
|
||||
OBJ = kmeans.o
|
||||
MPIBIN = kmeans.mpi
|
||||
.PHONY: clean all lib libmpi
|
||||
|
||||
all: $(BIN) $(MOCKBIN)
|
||||
.PHONY: clean all lib
|
||||
all: $(BIN)
|
||||
|
||||
lib:
|
||||
cd ..;make lib/librabit.a lib/librabit_mock.a; cd -
|
||||
@ -38,4 +37,4 @@ $(MPIBIN) :
|
||||
$(MPICXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) -lrabit_mpi
|
||||
|
||||
clean:
|
||||
$(RM) $(OBJ) $(BIN) $(MPIBIN) *~ ../src/*~
|
||||
$(RM) $(OBJ) $(BIN) $(MPIBIN) $(MOCKBIN) *~ ../src/*~
|
||||
|
||||
@ -83,7 +83,9 @@ inline size_t GetCluster(const Matrix ¢roids,
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc < 5) {
|
||||
printf("Usage: <data_dir> num_cluster max_iter <out_model>\n");
|
||||
if (rabit::GetRank() == 0) {
|
||||
rabit::TrackerPrintf("Usage: <data_dir> num_cluster max_iter <out_model>\n");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
clock_t tStart = clock();
|
||||
|
||||
@ -6,4 +6,4 @@ then
|
||||
fi
|
||||
#set path to hadoop streaming jar here
|
||||
STREAMING_JAR=
|
||||
python ../tracker/rabit_hadoop.py -hs $STREAMING_JAR -s $1 -i $2 -m kmeans.rabit --args "stdin "$3" "$4" stdout" -o $5 --file kmeans.rabit
|
||||
python ../tracker/rabit_hadoop.py -hs $STREAMING_JAR -n $1 -i $2 -o $5 kmeans.rabit stdin $3 $4 stdout
|
||||
|
||||
@ -29,7 +29,7 @@ struct SparseMat {
|
||||
v.length = static_cast<unsigned>(row_ptr[i + 1]-row_ptr[i]);
|
||||
return v;
|
||||
}
|
||||
// load data from file
|
||||
// load data from LibSVM format
|
||||
inline void Load(const char *fname) {
|
||||
FILE *fi;
|
||||
if (!strcmp(fname, "stdin")) {
|
||||
@ -41,17 +41,25 @@ struct SparseMat {
|
||||
row_ptr.push_back(0);
|
||||
data.clear();
|
||||
feat_dim = 0;
|
||||
unsigned num_feat;
|
||||
while (fscanf(fi, "%u", &num_feat) == 1) {
|
||||
float label; bool init = true;
|
||||
char tmp[1024];
|
||||
while (fscanf(file, "%s", tmp) == 1) {
|
||||
Entry e;
|
||||
for (unsigned i = 0; i < num_feat; ++i) {
|
||||
utils::Check(fscanf(fi, "%u:%f", &e.findex, &e.fvalue) == 2,
|
||||
"invalid format");
|
||||
if (sscanf(tmp, "%u:%f", &e.findex, &e.fvalue) == 2) {
|
||||
data.push_back(e);
|
||||
feat_dim = std::max(e.findex, feat_dim);
|
||||
} else {
|
||||
if (!init) {
|
||||
labels.push_back(label);
|
||||
row_ptr.push_back(data.size());
|
||||
}
|
||||
utils::Check(sscanf(tmp, "%f", &label) == 1, "invalid LibSVM format");
|
||||
init = false;
|
||||
}
|
||||
row_ptr.push_back(data.size());
|
||||
}
|
||||
// last row
|
||||
labels.push_back(label);
|
||||
row_ptr.push_back(data.size());
|
||||
feat_dim += 1;
|
||||
// close the filed
|
||||
if (fi != stdin) fclose(fi);
|
||||
@ -63,6 +71,7 @@ struct SparseMat {
|
||||
unsigned feat_dim;
|
||||
std::vector<size_t> row_ptr;
|
||||
std::vector<Entry> data;
|
||||
std::vector<float> labels;
|
||||
};
|
||||
// dense matrix
|
||||
struct Matrix {
|
||||
@ -85,7 +94,6 @@ struct Matrix {
|
||||
} else {
|
||||
fo = utils::FopenCheck(fname, "w");
|
||||
}
|
||||
fprintf(fo, "%lu %lu\n", nrow, ncol);
|
||||
for (size_t i = 0; i < data.size(); ++i) {
|
||||
fprintf(fo, "%g", data[i]);
|
||||
if ((i+1) % ncol == 0) {
|
||||
|
||||
@ -11,7 +11,7 @@ import subprocess
|
||||
import rabit_tracker as tracker
|
||||
|
||||
#!!! Set path to hadoop and hadoop streaming jar here
|
||||
hadoop_binary = None
|
||||
hadoop_binary = 'hadoop'
|
||||
hadoop_streaming_jar = None
|
||||
|
||||
# code
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user