From 7fefd6865d861e7dd1b76cc6bd12c27c0b7f4d53 Mon Sep 17 00:00:00 2001
From: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
Date: Sat, 4 Aug 2018 19:20:04 -0700
Subject: [PATCH] Fix #3402: wrong fid crashes distributed algorithm (#3535)

* Fix #3402: wrong fid crashes distributed algorithm

The bug was introduced by the recent DMatrix refactor (#3301). It was partially
fixed by #3408 but the example in #3402 was still failing. The example in #3402
will succeed after this fix is applied.

* Explicitly specify "this" to prevent compile error

* Add regression test

* Add distributed test to Travis matrix

* Install kubernetes Python package as dependency of dmlc tracker

* Add Python dependencies

* Add compile step

* Reduce size of regression test case

* Further reduce size of test
---
 .travis.yml                         |  4 ++
 src/tree/updater_histmaker.cc       |  5 +-
 tests/distributed/runtests.sh       |  5 ++
 tests/distributed/test_issue3402.py | 77 +++++++++++++++++++++++++++++
 tests/travis/run_test.sh            |  7 +++
 tests/travis/setup.sh               |  4 ++
 6 files changed, 100 insertions(+), 2 deletions(-)
 create mode 100644 tests/distributed/test_issue3402.py
diff --git a/.travis.yml b/.travis.yml
index d607f8d6e..e86cfd048 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -26,6 +26,8 @@ env:
     - TASK=cmake_test
     # c++ test
     - TASK=cpp_test
+    # distributed test
+    - TASK=distributed_test
 
 matrix:
   exclude:
@@ -39,6 +41,8 @@ matrix:
       env: TASK=python_lightweight_test
     - os: osx
       env: TASK=cpp_test
+    - os: osx
+      env: TASK=distributed_test
 
 # dependent apt packages
 addons:
diff --git a/src/tree/updater_histmaker.cc b/src/tree/updater_histmaker.cc
index 3cc0a760b..638017355 100644
--- a/src/tree/updater_histmaker.cc
+++ b/src/tree/updater_histmaker.cc
@@ -725,9 +725,10 @@ class GlobalProposalHistMaker: public CQHistMaker<TStats> {
         const auto nsize = static_cast<bst_omp_uint>(this->work_set_.size());
         #pragma omp parallel for schedule(dynamic, 1)
         for (bst_omp_uint i = 0; i < nsize; ++i) {
-          int offset = this->feat2workindex_[this->work_set_[i]];
+          int fid = this->work_set_[i];
+          int offset = this->feat2workindex_[fid];
           if (offset >= 0) {
-            this->UpdateHistCol(gpair, batch[i], info, tree,
+            this->UpdateHistCol(gpair, batch[fid], info, tree,
                                 fset, offset,
                                 &this->thread_hist_[omp_get_thread_num()]);
           }
diff --git a/tests/distributed/runtests.sh b/tests/distributed/runtests.sh
index 997fb1893..a798802c8 100755
--- a/tests/distributed/runtests.sh
+++ b/tests/distributed/runtests.sh
@@ -1,4 +1,9 @@
 #!/bin/bash
 
+echo "====== 1. Basic distributed test with Python ======"
 PYTHONPATH=../../python-package/ ../../dmlc-core/tracker/dmlc-submit  --cluster=local --num-workers=3\
   python test_basic.py
+
+echo "====== 2. Regression test for issue #3402 ======"
+PYTHONPATH=../../python-package/ ../../dmlc-core/tracker/dmlc-submit  --cluster=local --num-workers=2 --worker-cores=1\
+  python test_issue3402.py
diff --git a/tests/distributed/test_issue3402.py b/tests/distributed/test_issue3402.py
new file mode 100644
index 000000000..3dd895226
--- /dev/null
+++ b/tests/distributed/test_issue3402.py
@@ -0,0 +1,77 @@
+#!/usr/bin/python
+import xgboost as xgb
+
+xgb.rabit.init()
+
+X = [
+  [15.00,28.90,29.00,3143.70,0.00,0.10,69.90,90.00,13726.07,0.00,2299.70,0.00,0.05,
+   4327.03,0.00,24.00,0.18,3.00,0.41,3.77,0.00,0.00,4.00,0.00,150.92,0.00,2.00,0.00,
+   0.01,138.00,1.00,0.02,69.90,0.00,0.83,5.00,0.01,0.12,47.30,0.00,296.00,0.16,0.00,
+   0.00,27.70,7.00,7.25,4406.16,1.00,0.54,245.28,3.00,0.06,306.50,5143.00,29.00,23.74,
+   548.00,2.00,68.00,70.90,25.45,0.39,0.00,0.01,497.11,0.00,42.00,83.00,4.00,0.00,1.00,
+   0.00,104.35,94.12,0.03,79.23,237.69,1.00,0.04,0.01,0.02,2.00,108.81,7.00,12.00,0.46,
+   31.00,0.00,0.15,74.59,0.00,19.50,0.00,0.75,0.06,0.08,118.00,35.90,0.01,0.07,1.00,
+   0.03,81.18,13.33,0.00,0.00,0.00,0.00,0.00,0.41,0.00,0.15,57.00,0.00,22.00,449.68,
+   0.00,0.00,2.00,195.26,51.58,306.50,0.10,1.00,0.00,258.00,21.00,0.43,3.00,16.00,0.00,
+   0.00,0.00,0.00,1.00,74.51,4.00,0.02,35.90,30.00,8.69,0.00,0.36,5.00,2.00,3.00,0.26,
+   9.50,8.00,11.00,11918.15,0.00,258.00,13.00,9.04,0.14,604.65,0.92,74.59,0.00,0.00,
+   72.76,1.00,0.22,64.00,2.00,0.00,0.00,0.02,0.00,305.50,27.70,0.02,0.00,177.00,14.00,
+   0.00,0.05,90.00,0.03,0.00,1.00,0.43,4.00,0.05,0.09,431.00,0.00,2.00,0.00,0.00,1.00,
+   0.25,0.17,0.00,0.00,21.00,94.12,0.17,0.00,0.00,0.00,548.00,0.00,68.00,0.00,0.00,9.50,
+   25.45,1390.31,7.00,0.00,2.00,310.70,0.00,0.01,0.01,0.03,81.40,1.00,0.02,0.00,9.00,
+   6.00,0.00,175.76,36.00,0.00,20.75,2.00,0.00,0.00,0.00,0.22,74.16,0.10,56.81,0.00,
+   2197.03,0.00,197.66,0.00,55.00,20.00,367.18,22.00,0.00,0.01,1510.26,0.24,0.00,0.01,
+   0.00,11.00,278.10,61.70,278.10,0.00,0.08,0.57,1.00,0.65,255.60,0.00,0.86,0.25,70.95,
+   2299.70,0.23,0.05,92.70,1.00,38.00,0.00,0.00,56.81,21.85,0.00,23.74,0.00,2.00,0.03,
+   2.00,0.00,347.58,30.00,243.55,109.00,0.00,296.00,6.00,6.00,0.00,0.00,109.00,2299.70,
+   0.00,0.01,0.08,1.00,4745.09,4.00,0.18,0.00,0.17,0.02,0.00,1.00,147.13,71.07,2115.16,
+   0.00,0.26,0.00,43.00,604.90,49.44,4327.03,0.68,0.75,0.10,86.36,52.98,0.20,0.00,22.50,
+   305.50,0.00,1.00,0.00,7.00,0.78,0.00,296.00,22.50,0.00,5.00,2979.54,1.00,14.00,51.00,
+   0.42,0.11,0.00,1.00,0.00,0.00,70.90,37.84,0.02,548.40,0.00,46.35,5.00,1.66,0.29,0.00,
+   0.02,2255.69,160.53,790.64,6775.15,0.68,19.50,2299.70,79.87,6.00,0.00,60.00,0.27,
+   233.77,10.00,0.00,0.00,23.00,82.27,1.00,0.00,1.00,0.42,1.00,0.01,0.40,0.41,9.50,2299.70,
+   46.30,0.00,0.00,2299.70,3.00,0.00,0.00,83.00,1.00],
+  [48.00,80.89,69.90,11570.00,26.00,0.40,468.00,0.00,5739.46,0.00,1480.00,90.89,0.00,
+   14042.09,3600.08,120.00,0.09,31.00,0.25,2.36,0.00,7.00,22.00,0.00,257.59,0.00,6.00,
+   260.00,0.05,313.00,1.00,0.07,468.00,0.00,0.67,11.00,0.02,0.32,0.00,0.00,1387.61,0.34,
+   0.00,0.00,158.04,6.00,13.98,12380.05,0.00,0.16,122.74,3.00,0.18,291.33,7517.79,124.00,
+   45.08,900.00,1.00,0.00,577.25,79.75,0.39,0.00,0.00,244.62,0.00,57.00,178.00,19.00,
+   0.00,1.00,386.10,103.51,480.00,0.06,129.41,334.31,1.00,0.06,0.00,0.06,3.00,125.55,
+   0.00,76.00,0.14,30.00,0.00,0.03,411.29,791.33,55.00,0.12,3.80,0.07,0.01,188.00,221.11,
+   0.01,0.15,1.00,0.18,144.32,15.00,0.00,0.05,0.00,3.00,0.00,0.20,0.00,0.14,62.00,0.06,
+   55.00,239.35,0.00,0.00,2.00,534.20,747.50,400.57,0.40,0.00,0.00,219.98,30.00,0.25,
+   1.00,70.00,0.02,0.04,0.00,0.00,7.00,747.50,8.67,0.06,271.01,28.00,5.63,75.39,0.46,
+   11.00,3.00,19.00,0.38,131.74,23.00,39.00,30249.41,0.00,202.68,2.00,64.94,0.03,2787.68,
+   0.54,35.00,0.02,106.03,25.00,1.00,0.10,45.00,2.00,0.00,0.00,0.00,0.00,449.27,172.38,
+   0.05,0.00,550.00,130.00,2006.55,0.07,0.00,0.03,0.00,5.00,0.21,22.00,0.05,0.01,1011.40,
+   0.00,4.00,3600.08,0.00,1.00,1.00,1.00,0.00,3.00,9.00,270.00,0.12,0.03,0.00,0.00,820.00,
+   1827.50,0.00,100.33,0.00,131.74,53.16,9557.97,7.00,0.00,11.00,180.81,0.00,0.01,0.04,
+   0.02,1480.00,0.92,0.05,0.00,15.00,6.00,0.00,161.42,28.00,169.00,35.60,4.00,0.12,0.00,
+   0.00,0.27,230.56,0.42,171.90,0.00,28407.51,1.00,883.10,0.00,261.00,9.00,1031.67,38.00,
+   0.00,0.04,1607.68,0.32,791.33,0.04,1403.00,2.00,2260.50,88.08,2260.50,0.00,0.12,0.75,
+   3.00,0.00,1231.68,0.07,0.60,0.24,0.00,0.00,0.15,0.14,753.50,1.00,95.00,7.00,0.26,
+   77.63,38.45,0.00,42.65,0.00,14.00,0.07,6.00,0.00,1911.59,43.00,386.77,1324.80,0.00,
+   518.00,10.00,10.00,0.11,0.00,1324.80,0.00,0.00,0.02,0.16,1.00,10492.12,5.00,0.94,
+   5.00,0.08,0.10,1.00,0.92,3731.49,105.81,6931.39,0.00,0.43,0.00,118.00,5323.71,81.66,
+   14042.09,0.08,0.20,0.40,96.64,0.00,0.08,4.00,1028.82,353.00,0.00,2.00,32.00,43.00,
+   5.16,75.39,900.00,232.10,3.00,5.00,6049.88,1.00,126.00,46.00,0.59,0.15,0.00,8.00,
+   7.00,0.00,577.25,0.00,0.07,2415.10,0.00,83.72,9.00,1.76,0.20,0.00,0.17,3278.65,155.26,
+   4415.50,22731.62,1.00,55.00,0.00,499.94,22.00,0.58,67.00,0.21,341.72,16.00,0.00,965.07,
+   17.00,138.41,0.00,0.00,1.00,0.14,1.00,0.02,0.35,1.69,369.00,1300.00,25.00,0.00,0.01,
+   0.00,0.00,0.00,0.00,52.00,8.00]]
+y = [1, 0]
+
+dtrain = xgb.DMatrix(X, label=y)
+
+param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic' }
+watchlist  = [(dtrain,'train')]
+num_round = 2
+bst = xgb.train(param, dtrain, num_round, watchlist)
+
+if xgb.rabit.get_rank() == 0:
+  bst.save_model("test_issue3402.model")
+  xgb.rabit.tracker_print("Finished training\n")
+
+# Notify the tracker all training has been successful
+# This is only needed in distributed training.
+xgb.rabit.finalize()
diff --git a/tests/travis/run_test.sh b/tests/travis/run_test.sh
index 32caf1825..fa02e9266 100755
--- a/tests/travis/run_test.sh
+++ b/tests/travis/run_test.sh
@@ -147,3 +147,10 @@ if [ ${TASK} == "cpp_test" ]; then
     echo "GTEST_PATH="${CACHE_PREFIX} >> config.mk
     make cover
 fi
+
+if [ ${TASK} == "distributed_test" ]; then
+    set -e
+    make all || exit -1
+    cd tests/distributed
+    ./runtests.sh
+fi
diff --git a/tests/travis/setup.sh b/tests/travis/setup.sh
index 086ca6954..546e80344 100755
--- a/tests/travis/setup.sh
+++ b/tests/travis/setup.sh
@@ -27,3 +27,7 @@ if [ ${TASK} == "python_test" ] || [ ${TASK} == "python_lightweight_test" ]; the
     conda create -n python3 python=3.5
     conda create -n python2 python=2.7
 fi
+
+if [ ${TASK} == "distributed_test" ]; then
+    pip install --user kubernetes numpy scipy
+fi