Save Scikit-Learn attributes into learner attributes. (#5245)

* Remove the recommendation for pickle. * Save skl attributes in booster.attr * Test loading scikit-learn model with native booster.
2020-01-30 16:00:18 +08:00
parent c67163250e
commit 472ded549d
8 changed files with 194 additions and 57 deletions
--- a/src/common/io.h
+++ b/src/common/io.h
@@ -52,7 +52,7 @@ class PeekableInStream : public dmlc::Stream {
 class FixedSizeStream : public PeekableInStream {
 public:
  explicit FixedSizeStream(PeekableInStream* stream);
-  ~FixedSizeStream() = default;
+  ~FixedSizeStream() override = default;

  size_t Read(void* dptr, size_t size) override;
  size_t PeekRead(void* dptr, size_t size) override;
--- a/src/common/json.cc
+++ b/src/common/json.cc
@@ -1,6 +1,7 @@
 /*!
 * Copyright (c) by Contributors 2019
 */
+#include <cctype>
 #include <sstream>
 #include <limits>
 #include <cmath>
@@ -351,7 +352,9 @@ Json JsonReader::Parse() {
      return ParseObject();
    } else if ( c == '[' ) {
      return ParseArray();
-    } else if ( c == '-' || std::isdigit(c) ) {
+    } else if ( c == '-' || std::isdigit(c) ||
+                c == 'N' ) {
+      // For now we only accept `NaN`, not `nan` as the later violiates LR(1) with `null`.
      return ParseNumber();
    } else if ( c == '\"' ) {
      return ParseString();
@@ -547,6 +550,13 @@ Json JsonReader::ParseNumber() {

  // TODO(trivialfis): Add back all the checks for number
  bool negative = false;
+  if (XGBOOST_EXPECT(*p == 'N', false)) {
+    GetChar('N');
+    GetChar('a');
+    GetChar('N');
+    return Json(static_cast<Number::Float>(std::numeric_limits<float>::quiet_NaN()));
+  }
+
  if ('-' == *p) {
    ++p;
    negative = true;
--- a/src/learner.cc
+++ b/src/learner.cc
@@ -661,13 +661,13 @@ class LearnerImpl : public Learner {
      CHECK(header == serialisation_header_)  // NOLINT
          << R"doc(

-If you are loading a serialized model (like pickle in Python) generated by older XGBoost,
-please export the model by calling `Booster.save_model` from that version first, then load
-it back in current version.  See:
+  If you are loading a serialized model (like pickle in Python) generated by older
+  XGBoost, please export the model by calling `Booster.save_model` from that version
+  first, then load it back in current version.  See:

-  https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html
+    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html

-for more details about differences between saving model and serializing.
+  for more details about differences between saving model and serializing.

 )doc";
      int64_t json_offset {-1};