add kmeans example
This commit is contained in:
parent
76abd80cb7
commit
39504825d8
@ -7,12 +7,11 @@ export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../include
|
|||||||
# specify tensor path
|
# specify tensor path
|
||||||
BIN = kmeans.rabit
|
BIN = kmeans.rabit
|
||||||
MOCKBIN= kmeans.mock
|
MOCKBIN= kmeans.mock
|
||||||
|
MPIBIN = kmeans.mpi
|
||||||
# objectives that makes up rabit library
|
# objectives that makes up rabit library
|
||||||
OBJ = kmeans.o
|
OBJ = kmeans.o
|
||||||
MPIBIN = kmeans.mpi
|
.PHONY: clean all lib
|
||||||
.PHONY: clean all lib libmpi
|
all: $(BIN)
|
||||||
|
|
||||||
all: $(BIN) $(MOCKBIN)
|
|
||||||
|
|
||||||
lib:
|
lib:
|
||||||
cd ..;make lib/librabit.a lib/librabit_mock.a; cd -
|
cd ..;make lib/librabit.a lib/librabit_mock.a; cd -
|
||||||
@ -38,4 +37,4 @@ $(MPIBIN) :
|
|||||||
$(MPICXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) -lrabit_mpi
|
$(MPICXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) -lrabit_mpi
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
$(RM) $(OBJ) $(BIN) $(MPIBIN) *~ ../src/*~
|
$(RM) $(OBJ) $(BIN) $(MPIBIN) $(MOCKBIN) *~ ../src/*~
|
||||||
|
|||||||
@ -83,7 +83,9 @@ inline size_t GetCluster(const Matrix ¢roids,
|
|||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
if (argc < 5) {
|
if (argc < 5) {
|
||||||
printf("Usage: <data_dir> num_cluster max_iter <out_model>\n");
|
if (rabit::GetRank() == 0) {
|
||||||
|
rabit::TrackerPrintf("Usage: <data_dir> num_cluster max_iter <out_model>\n");
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
clock_t tStart = clock();
|
clock_t tStart = clock();
|
||||||
|
|||||||
@ -6,4 +6,4 @@ then
|
|||||||
fi
|
fi
|
||||||
#set path to hadoop streaming jar here
|
#set path to hadoop streaming jar here
|
||||||
STREAMING_JAR=
|
STREAMING_JAR=
|
||||||
python ../tracker/rabit_hadoop.py -hs $STREAMING_JAR -s $1 -i $2 -m kmeans.rabit --args "stdin "$3" "$4" stdout" -o $5 --file kmeans.rabit
|
python ../tracker/rabit_hadoop.py -hs $STREAMING_JAR -n $1 -i $2 -o $5 kmeans.rabit stdin $3 $4 stdout
|
||||||
|
|||||||
@ -29,7 +29,7 @@ struct SparseMat {
|
|||||||
v.length = static_cast<unsigned>(row_ptr[i + 1]-row_ptr[i]);
|
v.length = static_cast<unsigned>(row_ptr[i + 1]-row_ptr[i]);
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
// load data from file
|
// load data from LibSVM format
|
||||||
inline void Load(const char *fname) {
|
inline void Load(const char *fname) {
|
||||||
FILE *fi;
|
FILE *fi;
|
||||||
if (!strcmp(fname, "stdin")) {
|
if (!strcmp(fname, "stdin")) {
|
||||||
@ -41,17 +41,25 @@ struct SparseMat {
|
|||||||
row_ptr.push_back(0);
|
row_ptr.push_back(0);
|
||||||
data.clear();
|
data.clear();
|
||||||
feat_dim = 0;
|
feat_dim = 0;
|
||||||
unsigned num_feat;
|
float label; bool init = true;
|
||||||
while (fscanf(fi, "%u", &num_feat) == 1) {
|
char tmp[1024];
|
||||||
|
while (fscanf(file, "%s", tmp) == 1) {
|
||||||
Entry e;
|
Entry e;
|
||||||
for (unsigned i = 0; i < num_feat; ++i) {
|
if (sscanf(tmp, "%u:%f", &e.findex, &e.fvalue) == 2) {
|
||||||
utils::Check(fscanf(fi, "%u:%f", &e.findex, &e.fvalue) == 2,
|
|
||||||
"invalid format");
|
|
||||||
data.push_back(e);
|
data.push_back(e);
|
||||||
feat_dim = std::max(e.findex, feat_dim);
|
feat_dim = std::max(e.findex, feat_dim);
|
||||||
|
} else {
|
||||||
|
if (!init) {
|
||||||
|
labels.push_back(label);
|
||||||
|
row_ptr.push_back(data.size());
|
||||||
|
}
|
||||||
|
utils::Check(sscanf(tmp, "%f", &label) == 1, "invalid LibSVM format");
|
||||||
|
init = false;
|
||||||
}
|
}
|
||||||
row_ptr.push_back(data.size());
|
|
||||||
}
|
}
|
||||||
|
// last row
|
||||||
|
labels.push_back(label);
|
||||||
|
row_ptr.push_back(data.size());
|
||||||
feat_dim += 1;
|
feat_dim += 1;
|
||||||
// close the filed
|
// close the filed
|
||||||
if (fi != stdin) fclose(fi);
|
if (fi != stdin) fclose(fi);
|
||||||
@ -63,6 +71,7 @@ struct SparseMat {
|
|||||||
unsigned feat_dim;
|
unsigned feat_dim;
|
||||||
std::vector<size_t> row_ptr;
|
std::vector<size_t> row_ptr;
|
||||||
std::vector<Entry> data;
|
std::vector<Entry> data;
|
||||||
|
std::vector<float> labels;
|
||||||
};
|
};
|
||||||
// dense matrix
|
// dense matrix
|
||||||
struct Matrix {
|
struct Matrix {
|
||||||
@ -85,7 +94,6 @@ struct Matrix {
|
|||||||
} else {
|
} else {
|
||||||
fo = utils::FopenCheck(fname, "w");
|
fo = utils::FopenCheck(fname, "w");
|
||||||
}
|
}
|
||||||
fprintf(fo, "%lu %lu\n", nrow, ncol);
|
|
||||||
for (size_t i = 0; i < data.size(); ++i) {
|
for (size_t i = 0; i < data.size(); ++i) {
|
||||||
fprintf(fo, "%g", data[i]);
|
fprintf(fo, "%g", data[i]);
|
||||||
if ((i+1) % ncol == 0) {
|
if ((i+1) % ncol == 0) {
|
||||||
|
|||||||
@ -11,7 +11,7 @@ import subprocess
|
|||||||
import rabit_tracker as tracker
|
import rabit_tracker as tracker
|
||||||
|
|
||||||
#!!! Set path to hadoop and hadoop streaming jar here
|
#!!! Set path to hadoop and hadoop streaming jar here
|
||||||
hadoop_binary = None
|
hadoop_binary = 'hadoop'
|
||||||
hadoop_streaming_jar = None
|
hadoop_streaming_jar = None
|
||||||
|
|
||||||
# code
|
# code
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user