diff --git a/demo/binary_classification/mapfeat.py b/demo/binary_classification/mapfeat.py
index 74ca22d32..186af29e6 100755
--- a/demo/binary_classification/mapfeat.py
+++ b/demo/binary_classification/mapfeat.py
@@ -24,7 +24,7 @@ def loadfmap( fname ):
     return fmap, nmap
 
 def write_nmap( fo, nmap ):    
-    for i in xrange( len(nmap) ):
+    for i in range( len(nmap) ):
         fo.write('%d\t%s\ti\n' % (i, nmap[i]) )
 
 # start here
@@ -41,7 +41,7 @@ for l in open( 'agaricus-lepiota.data' ):
     else:
         assert arr[0] == 'e'
         fo.write('0')
-    for i in xrange( 1,len(arr) ):
+    for i in range( 1,len(arr) ):
         fo.write( ' %d:1' % fmap[i][arr[i].strip()] )
     fo.write('\n')
 
diff --git a/demo/binary_classification/mknfold.py b/demo/binary_classification/mknfold.py
index 0f42c301d..a941f8609 100755
--- a/demo/binary_classification/mknfold.py
+++ b/demo/binary_classification/mknfold.py
@@ -3,7 +3,7 @@ import sys
 import random
 
 if len(sys.argv) < 2:
-    print 'Usage:<filename> <k> [nfold = 5]'
+    print ('Usage:<filename> <k> [nfold = 5]')
     exit(0)
 
 random.seed( 10 )
diff --git a/demo/kaggle-higgs/higgs-numpy.py b/demo/kaggle-higgs/higgs-numpy.py
index 2bf4a82a5..1cb7ec00c 100755
--- a/demo/kaggle-higgs/higgs-numpy.py
+++ b/demo/kaggle-higgs/higgs-numpy.py
@@ -1,9 +1,15 @@
 #!/usr/bin/python
 # this is the example script to use xgboost to train 
+import inspect
+import os
 import sys
 import numpy as np
 # add path of xgboost python module
-sys.path.append('../../python/')
+code_path = os.path.join(
+    os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../python")
+
+sys.path.append(code_path)
+
 import xgboost as xgb
 
 test_size = 550000
@@ -12,19 +18,19 @@ test_size = 550000
 dpath = 'data'
 
 # load in training data, directly use numpy
-dtrain = np.loadtxt( dpath+'/training.csv', delimiter=',', skiprows=1, converters={32: lambda x:int(x=='s') } )
-print 'finish loading from csv '
+dtrain = np.loadtxt( dpath+'/training.csv', delimiter=',', skiprows=1, converters={32: lambda x:int(x=='s'.encode('utf-8')) } )
+print ('finish loading from csv ')
 
 label  = dtrain[:,32]
 data   = dtrain[:,1:31]
 # rescale weight to make it same as test set
 weight = dtrain[:,31] * float(test_size) / len(label)
 
-sum_wpos = sum( weight[i] for i in xrange(len(label)) if label[i] == 1.0  )
-sum_wneg = sum( weight[i] for i in xrange(len(label)) if label[i] == 0.0  )
+sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0  )
+sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0  )
 
 # print weight statistics 
-print 'weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos )
+print ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos ))
 
 # construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
 xgmat = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight )
@@ -43,14 +49,14 @@ param['silent'] = 1
 param['nthread'] = 16
 
 # you can directly throw param in, though we want to watch multiple metrics here 
-plst = param.items()+[('eval_metric', 'ams@0.15')]
+plst = list(param.items())+[('eval_metric', 'ams@0.15')]
 
 watchlist = [ (xgmat,'train') ]
 # boost 120 tres
 num_round = 120
-print 'loading data end, start to boost trees'
+print ('loading data end, start to boost trees')
 bst = xgb.train( plst, xgmat, num_round, watchlist );
 # save out model
 bst.save_model('higgs.model')
 
-print 'finish training'
+print ('finish training')
diff --git a/demo/kaggle-higgs/higgs-pred.py b/demo/kaggle-higgs/higgs-pred.py
index 3fad9c217..a38f6d82a 100755
--- a/demo/kaggle-higgs/higgs-pred.py
+++ b/demo/kaggle-higgs/higgs-pred.py
@@ -19,13 +19,13 @@ dtest = np.loadtxt( dpath+'/test.csv', delimiter=',', skiprows=1 )
 data   = dtest[:,1:31]
 idx = dtest[:,0]
 
-print 'finish loading from csv '
+print ('finish loading from csv ')
 xgmat = xgb.DMatrix( data, missing = -999.0 )
 bst = xgb.Booster({'nthread':16})
 bst.load_model( modelfile )
 ypred = bst.predict( xgmat )
 
-res  = [ ( int(idx[i]), ypred[i] ) for i in xrange(len(ypred)) ] 
+res  = [ ( int(idx[i]), ypred[i] ) for i in range(len(ypred)) ] 
 
 rorder = {}
 for k, v in sorted( res, key = lambda x:-x[1] ):
@@ -47,7 +47,7 @@ for k, v in res:
     ntot += 1
 fo.close()
 
-print 'finished writing into prediction file'
+print ('finished writing into prediction file')
 
 
 
diff --git a/demo/kaggle-higgs/run.sh b/demo/kaggle-higgs/run.sh
index c69426c25..23cde394b 100755
--- a/demo/kaggle-higgs/run.sh
+++ b/demo/kaggle-higgs/run.sh
@@ -1,4 +1,14 @@
 #!/bin/bash
 
-python higgs-numpy.py
-python higgs-pred.py
\ No newline at end of file
+python -u higgs-numpy.py
+ret=$?
+if [[ $ret != 0 ]]; then
+    echo "ERROR in higgs-numpy.py"
+    exit $ret
+fi
+python -u higgs-pred.py
+ret=$?
+if [[ $ret != 0 ]]; then
+    echo "ERROR in higgs-pred.py"
+    exit $ret
+fi
diff --git a/demo/kaggle-higgs/speedtest.py b/demo/kaggle-higgs/speedtest.py
index 8bef29ff2..86d5e3a3c 100755
--- a/demo/kaggle-higgs/speedtest.py
+++ b/demo/kaggle-higgs/speedtest.py
@@ -14,18 +14,18 @@ dpath = 'data'
 
 # load in training data, directly use numpy
 dtrain = np.loadtxt( dpath+'/training.csv', delimiter=',', skiprows=1, converters={32: lambda x:int(x=='s') } )
-print 'finish loading from csv '
+print ('finish loading from csv ')
 
 label  = dtrain[:,32]
 data   = dtrain[:,1:31]
 # rescale weight to make it same as test set
 weight = dtrain[:,31] * float(test_size) / len(label)
 
-sum_wpos = sum( weight[i] for i in xrange(len(label)) if label[i] == 1.0  )
-sum_wneg = sum( weight[i] for i in xrange(len(label)) if label[i] == 0.0  )
+sum_wpos = sum( weight[i] for i in range(len(label)) if label[i] == 1.0  )
+sum_wneg = sum( weight[i] for i in range(len(label)) if label[i] == 0.0  )
 
 # print weight statistics
-print 'weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos )
+print ('weight statistics: wpos=%g, wneg=%g, ratio=%g' % ( sum_wpos, sum_wneg, sum_wneg/sum_wpos ))
 
 # construct xgboost.DMatrix from numpy array, treat -999.0 as missing value
 xgmat = xgb.DMatrix( data, label=label, missing = -999.0, weight=weight )
@@ -47,20 +47,20 @@ plst = param.items()+[('eval_metric', 'ams@0.15')]
 watchlist = [ (xgmat,'train') ]
 # boost 10 tres
 num_round = 10
-print 'loading data end, start to boost trees'
-print "training GBM from sklearn"
+print ('loading data end, start to boost trees')
+print ("training GBM from sklearn")
 tmp = time.time()
 gbm = GradientBoostingClassifier(n_estimators=num_round, max_depth=6, verbose=2)
 gbm.fit(data, label)
-print "sklearn.GBM costs: %s seconds" % str(time.time() - tmp)
+print ("sklearn.GBM costs: %s seconds" % str(time.time() - tmp))
 #raw_input()
-print "training xgboost"
+print ("training xgboost")
 threads = [1, 2, 4, 16]
 for i in threads:
     param['nthread'] = i
     tmp = time.time()
     plst = param.items()+[('eval_metric', 'ams@0.15')]
     bst = xgb.train( plst, xgmat, num_round, watchlist );
-    print "XGBoost with %d thread costs: %s seconds" % (i, str(time.time() - tmp))
+    print ("XGBoost with %d thread costs: %s seconds" % (i, str(time.time() - tmp)))
 
-print 'finish training'
+print ('finish training')
diff --git a/demo/multiclass_classification/train.py b/demo/multiclass_classification/train.py
index 38d818890..df5e112aa 100755
--- a/demo/multiclass_classification/train.py
+++ b/demo/multiclass_classification/train.py
@@ -37,6 +37,6 @@ bst = xgb.train(param, xg_train, num_round, watchlist );
 # get prediction
 pred = bst.predict( xg_test );
 
-print 'predicting, classification error=%f' % (sum( int(pred[i]) != test_Y[i] for i in xrange(len(test_Y))) / float(len(test_Y)) )
+print ('predicting, classification error=%f' % (sum( int(pred[i]) != test_Y[i] for i in range(len(test_Y))) / float(len(test_Y)) ))
 
 
diff --git a/demo/rank/trans_data.py b/demo/rank/trans_data.py
index 3c9865106..7282848c4 100644
--- a/demo/rank/trans_data.py
+++ b/demo/rank/trans_data.py
@@ -2,18 +2,18 @@ import sys
 
 def save_data(group_data,output_feature,output_group):
     if len(group_data) == 0:
-	return
+        return
 
     output_group.write(str(len(group_data))+"\n")
     for data in group_data:
         # only include nonzero features
         feats = [ p for p in data[2:] if float(p.split(':')[1]) != 0.0 ]        
-	output_feature.write(data[0] + " " + " ".join(feats) + "\n")
+        output_feature.write(data[0] + " " + " ".join(feats) + "\n")
 
 if __name__ == "__main__":
     if len(sys.argv) != 4:
-        print "Usage: python trans_data.py [Ranksvm Format Input] [Output Feature File] [Output Group File]"
-	sys.exit(0)
+        print ("Usage: python trans_data.py [Ranksvm Format Input] [Output Feature File] [Output Group File]")
+        sys.exit(0)
 
     fi = open(sys.argv[1])
     output_feature = open(sys.argv[2],"w")
@@ -22,16 +22,16 @@ if __name__ == "__main__":
     group_data = []
     group = ""
     for line in fi:
-	if not line:
-	    break
-	if "#" in line:
-	    line = line[:line.index("#")]
+        if not line:
+            break
+        if "#" in line:
+            line = line[:line.index("#")]
         splits = line.strip().split(" ")
-	if splits[1] != group:
-	    save_data(group_data,output_feature,output_group)
-	    group_data = []
-	group = splits[1]
-	group_data.append(splits)
+        if splits[1] != group:
+            save_data(group_data,output_feature,output_group)
+            group_data = []
+        group = splits[1]
+        group_data.append(splits)
 
     save_data(group_data,output_feature,output_group)
 
diff --git a/demo/regression/mapfeat.py b/demo/regression/mapfeat.py
index 5ee494fb1..d86dca38a 100755
--- a/demo/regression/mapfeat.py
+++ b/demo/regression/mapfeat.py
@@ -7,7 +7,7 @@ fmap = {}
 for l in open( 'machine.data' ):
     arr = l.split(',')
     fo.write(arr[8])
-    for i in xrange( 0,6 ):
+    for i in range( 0,6 ):
         fo.write( ' %d:%s' %(i,arr[i+2]) )
     
     if arr[0] not in fmap:
@@ -24,9 +24,9 @@ fo = open('featmap.txt', 'w')
 # list from machine.names
 names = ['vendor','MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX', 'PRP', 'ERP' ]; 
 
-for i in xrange(0,6):
+for i in range(0,6):
     fo.write( '%d\t%s\tint\n' % (i, names[i+1]))
 
-for v, k in sorted( fmap.iteritems(), key = lambda x:x[1] ):
+for v, k in sorted( fmap.items(), key = lambda x:x[1] ):
     fo.write( '%d\tvendor=%s\ti\n' % (k, v))
 fo.close()
diff --git a/demo/regression/mknfold.py b/demo/regression/mknfold.py
index 0f42c301d..a941f8609 100755
--- a/demo/regression/mknfold.py
+++ b/demo/regression/mknfold.py
@@ -3,7 +3,7 @@ import sys
 import random
 
 if len(sys.argv) < 2:
-    print 'Usage:<filename> <k> [nfold = 5]'
+    print ('Usage:<filename> <k> [nfold = 5]')
     exit(0)
 
 random.seed( 10 )
diff --git a/python/example/demo.py b/python/example/demo.py
index 5cf3fb5db..d6c91b5ea 100755
--- a/python/example/demo.py
+++ b/python/example/demo.py
@@ -22,7 +22,7 @@ bst = xgb.train( param, dtrain, num_round, evallist )
 # this is prediction
 preds = bst.predict( dtest )
 labels = dtest.get_label()
-print 'error=%f' % (  sum(1 for i in xrange(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds)))
+print ('error=%f' % (  sum(1 for i in range(len(preds)) if int(preds[i]>0.5)!=labels[i]) /float(len(preds))))
 bst.save_model('0001.model')
 # dump model
 bst.dump_model('dump.raw.txt')
@@ -32,7 +32,7 @@ bst.dump_model('dump.raw.txt','featmap.txt')
 ###
 # build dmatrix in python iteratively
 #
-print 'start running example of build DMatrix in python'
+print ('start running example of build DMatrix in python')
 dtrain = xgb.DMatrix()
 labels = []
 for l in open('agaricus.txt.train'):
@@ -50,7 +50,7 @@ bst = xgb.train( param, dtrain, num_round, evallist )
 
 ###
 # build dmatrix from scipy.sparse
-print 'start running example of build DMatrix from scipy.sparse'
+print ('start running example of build DMatrix from scipy.sparse')
 labels = []
 row = []; col = []; dat = []
 i = 0
@@ -68,7 +68,7 @@ dtrain.set_label(labels)
 evallist  = [(dtest,'eval'), (dtrain,'train')]
 bst = xgb.train( param, dtrain, num_round, evallist )
 
-print 'start running example of build DMatrix from numpy array'
+print ('start running example of build DMatrix from numpy array')
 # NOTE: npymat is numpy array, we will convert it into scipy.sparse.csr_matrix in internal implementation,then convert to DMatrix
 npymat = csr.todense()
 dtrain = xgb.DMatrix( npymat )
@@ -79,7 +79,7 @@ bst = xgb.train( param, dtrain, num_round, evallist )
 ###
 # advanced: cutomsized loss function, set loss_type to 0, so that predict get untransformed score
 # 
-print 'start running example to used cutomized objective function'
+print ('start running example to used cutomized objective function')
 
 # note: set loss_type properly, loss_type=2 means the prediction will get logistic transformed
 #       in most case, we may want to set loss_type = 0, to get untransformed score to compute gradient
diff --git a/python/xgboost.py b/python/xgboost.py
index f51ef7ea5..690d0d4ed 100644
--- a/python/xgboost.py
+++ b/python/xgboost.py
@@ -41,7 +41,7 @@ class DMatrix:
         if data == None:
             return
         if isinstance(data,str):
-            xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(data), 1)             
+            xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(data.encode('utf-8')), 1)             
         elif isinstance(data,scp.csr_matrix):
             self.__init_from_csr(data)
         elif isinstance(data, numpy.ndarray) and len(data.shape) == 2:
@@ -51,7 +51,7 @@ class DMatrix:
                 csr = scp.csr_matrix(data)
                 self.__init_from_csr(csr)
             except:
-                raise Exception, "can not intialize DMatrix from"+str(type(data))
+                raise Exception("can not intialize DMatrix from"+str(type(data)))
         if label != None:
             self.set_label(label)
         if weight !=None:
@@ -76,10 +76,10 @@ class DMatrix:
         xglib.XGDMatrixFree(self.handle)    
     # load data from file 
     def load(self, fname, silent=True):
-        xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(fname), int(silent))
+        xglib.XGDMatrixLoad(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
     # load data from file 
     def save_binary(self, fname, silent=True):
-        xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname), int(silent))
+        xglib.XGDMatrixSaveBinary(self.handle, ctypes.c_char_p(fname.encode('utf-8')), int(silent))
     # set label of dmatrix
     def set_label(self, label):
         xglib.XGDMatrixSetLabel(self.handle, (ctypes.c_float*len(label))(*label), len(label) )
@@ -111,7 +111,7 @@ class DMatrix:
     def __getitem__(self, ridx):
         length = ctypes.c_ulong()
         row = xglib.XGDMatrixGetRow(self.handle, ridx, ctypes.byref(length) );
-        return [ (int(row[i].findex),row[i].fvalue) for i in xrange(length.value) ]
+        return [ (int(row[i].findex),row[i].fvalue) for i in range(length.value) ]
 
 class Booster:
     """learner class """
@@ -124,15 +124,21 @@ class Booster:
         self.set_param( params )
     def __del__(self):
         xglib.XGBoosterFree(self.handle) 
-    def set_param(self, params,pv=None):
+    def set_param(self, params, pv=None):
         if isinstance(params,dict):
-            for k, v in params.iteritems():
-                xglib.XGBoosterSetParam( self.handle, ctypes.c_char_p(k), ctypes.c_char_p(str(v)) )        
+            for k, v in params.items():
+                xglib.XGBoosterSetParam(
+                    self.handle, ctypes.c_char_p(k.encode('utf-8')), 
+                    ctypes.c_char_p(str(v).encode('utf-8')))        
         elif isinstance(params,str) and pv != None:
-            xglib.XGBoosterSetParam( self.handle, ctypes.c_char_p(params), ctypes.c_char_p(str(pv)) )
+            xglib.XGBoosterSetParam(
+                self.handle, ctypes.c_char_p(params.encode('utf-8')),
+                ctypes.c_char_p(str(pv).encode('utf-8')) )
         else:
             for k, v in params:
-                xglib.XGBoosterSetParam( self.handle, ctypes.c_char_p(k), ctypes.c_char_p(str(v)) )             
+                xglib.XGBoosterSetParam(
+                    self.handle, ctypes.c_char_p(k.encode('utf-8')),
+                    ctypes.c_char_p(str(v).encode('utf-8')) )             
     def update(self, dtrain):
         """ update """
         assert isinstance(dtrain, DMatrix)
@@ -150,13 +156,15 @@ class Booster:
         assert isinstance(dtrain, DMatrix)
         if booster_index != None:
             self.set_param('interact:booster_index', str(booster_index))
-        xglib.XGBoosterUpdateInteract( self.handle, dtrain.handle, ctypes.c_char_p(str(action)) )
+        xglib.XGBoosterUpdateInteract(
+            self.handle, dtrain.handle, ctypes.c_char_p(str(action)) )
     def eval_set(self, evals, it = 0):
         for d in evals:
             assert isinstance(d[0], DMatrix)
             assert isinstance(d[1], str)
         dmats = ( ctypes.c_void_p * len(evals) )(*[ d[0].handle for d in evals])
-        evnames = ( ctypes.c_char_p * len(evals) )(*[ ctypes.c_char_p(d[1]) for d in evals])
+        evnames = ( ctypes.c_char_p * len(evals) )(
+            *[ctypes.c_char_p(d[1].encode('utf-8')) for d in evals])
         xglib.XGBoosterEvalOneIter( self.handle, it, dmats, evnames, len(evals) )
     def eval(self, mat, name = 'eval', it = 0 ):
         self.eval_set( [(mat,name)], it)
@@ -166,25 +174,27 @@ class Booster:
         return ctypes2numpy( preds, length.value )
     def save_model(self, fname):
         """ save model to file """
-        xglib.XGBoosterSaveModel( self.handle, ctypes.c_char_p(fname) )
+        xglib.XGBoosterSaveModel(self.handle, ctypes.c_char_p(fname.encode('utf-8')))
     def load_model(self, fname):
         """load model from file"""
-        xglib.XGBoosterLoadModel( self.handle, ctypes.c_char_p(fname) )
+        xglib.XGBoosterLoadModel( self.handle, ctypes.c_char_p(fname.encode('utf-8')) )
     def dump_model(self, fname, fmap=''):
         """dump model into text file"""
-        xglib.XGBoosterDumpModel( self.handle, ctypes.c_char_p(fname), ctypes.c_char_p(fmap) )
+        xglib.XGBoosterDumpModel(
+            self.handle, ctypes.c_char_p(fname.encode('utf-8')), 
+            ctypes.c_char_p(fmap.encode('utf-8')))
 
 def train(params, dtrain, num_boost_round = 10, evals = [], obj=None):
     """ train a booster with given paramaters """
     bst = Booster(params, [dtrain] )
     if obj == None:
-        for i in xrange(num_boost_round):
+        for i in range(num_boost_round):
             bst.update( dtrain )
             if len(evals) != 0:
                 bst.eval_set( evals, i )
     else:
         # try customized objective function
-        for i in xrange(num_boost_round):
+        for i in range(num_boost_round):
             pred = bst.predict( dtrain )
             grad, hess = obj( pred, dtrain )
             bst.boost( dtrain, grad, hess )