From 67229fd7a9e97a4c032921592f91bfc0d8730cd0 Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 4 Dec 2014 09:05:48 -0800 Subject: [PATCH] change model --- toolkit/kmeans.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/toolkit/kmeans.cpp b/toolkit/kmeans.cpp index 24f784923..109b49826 100644 --- a/toolkit/kmeans.cpp +++ b/toolkit/kmeans.cpp @@ -73,13 +73,13 @@ inline size_t GetCluster(const Matrix ¢roids, double dmin = Cos(centroids[0], v); for (size_t k = 1; k < centroids.nrow; ++k) { double dist = Cos(centroids[k], v); - if (dist < dmin) { + if (dist > dmin) { dmin = dist; imin = k; - } + } } return imin; } - + int main(int argc, char *argv[]) { if (argc < 5) { printf("Usage: num_cluster max_iter \n"); @@ -116,9 +116,11 @@ int main(int argc, char *argv[]) { for (size_t i = 0; i < ndata; ++i) { SparseMat::Vector v = data[i]; size_t k = GetCluster(model.centroids, v); + // temp[k] += v for (size_t j = 0; j < v.length; ++j) { temp[k][v[j].findex] += v[j].fvalue; } + // use last column to record counts temp[k][num_feat] += 1.0f; } // call allreduce @@ -126,6 +128,7 @@ int main(int argc, char *argv[]) { // set number for (int k = 0; k < num_cluster; ++k) { float cnt = temp[k][num_feat]; + utils::Check(cnt != 0.0f, "get zero sized cluster"); for (unsigned i = 0; i < num_feat; ++i) { model.centroids[k][i] = temp[k][i] / cnt; }