Loading sklearn model in Java. Model created with DNNClassifier in python

问题

The goal is to open in Java a model created/trained in python with tensorflow.contrib.learn.learn.DNNClassifier.

At the moment the main issue is to know the name of the "tensor" to give in java on the session runner method.

I have this test code in python :

    from __future__ import division, print_function, absolute_import
import tensorflow as tf
import pandas as pd
import tensorflow.contrib.learn as learn
import numpy as np
from sklearn import metrics
from sklearn.cross_validation import train_test_split
from tensorflow.contrib import layers
from tensorflow.contrib.learn.python.learn.utils import input_fn_utils
from tensorflow.python.ops import array_ops
from tensorflow.python.framework import dtypes
from tensorflow.python.util.compat import as_text

print(tf.VERSION)

df = pd.read_csv('../NNNormalizeData-out.csv')

inputs = []
target = []

y=0;    
for x in df.columns:
    if y != 35 :
        #print("added %d" %y)
        inputs.append(x)
    else :
        target.append(x)
    y+=1

total_inputs,total_output = df.as_matrix(inputs).astype(np.float32),df.as_matrix([target]).astype(np.int32)

train_inputs, test_inputs, train_output, test_output = train_test_split(total_inputs, total_output, test_size=0.2, random_state=42)

feature_columns = [tf.contrib.layers.real_valued_column("", dimension=train_inputs.shape[1],dtype=tf.float32)]
#target_column = [tf.contrib.layers.real_valued_column("output", dimension=train_output.shape[1])]

classifier = learn.DNNClassifier(hidden_units=[10, 20, 5], n_classes=5
                                 ,feature_columns=feature_columns)

classifier.fit(train_inputs, train_output, steps=100)

#Save Model into saved_model.pbtxt file (possible to Load in Java)
tfrecord_serving_input_fn = tf.contrib.learn.build_parsing_serving_input_fn(layers.create_feature_spec_for_parsing(feature_columns))  
classifier.export_savedmodel(export_dir_base="test", serving_input_fn = tfrecord_serving_input_fn,as_text=True)


# Measure accuracy
pred = list(classifier.predict(test_inputs, as_iterable=True))
score = metrics.accuracy_score(test_output, pred)
print("Final score: {}".format(score))

# test individual samples 
sample_1 = np.array( [[0.37671986791414125,0.28395908337619136,-0.0966095873607713,-1.0,0.06891621389763203,-0.09716678086712205,0.726029084013637,4.984689881073479E-4,-0.30296253267499107,-0.16192917054985334,0.04820256230479658,0.4951319883569152,0.5269983894210499,-0.2560313828048315,-0.3710980821053321,-0.4845867212612598,-0.8647234314469595,-0.6491591208322198,-1.0,-0.5004549422844073,-0.9880910165770813,0.5540293108747256,0.5625990251930839,0.7420121698556554,0.5445551415657979,0.4644276850235627,0.7316976292340245,0.636690006814346,0.16486621649984112,-0.0466018967678159,0.5261100063227044,0.6256168612312738,-0.544295484930702,0.379125782517193,0.6959368575211544]], dtype=float)
sample_2 = np.array( [[1.0,0.7982741870963959,1.0,-0.46270838239235024,0.040320274521029376,0.443451913224413,-1.0,1.0,1.0,-1.0,0.36689718911339564,-0.13577379160035796,-0.5162916256414466,-0.03373651520104648,1.0,1.0,1.0,1.0,0.786999801054777,-0.43856035121103853,-0.8199093927945158,1.0,-1.0,-1.0,-0.1134921695894473,-1.0,0.6420892436196663,0.7871737734493178,1.0,0.6501788845358409,1.0,1.0,1.0,-0.17586627413625022,0.8817194210401085]], dtype=float)

pred = list(classifier.predict(sample_2, as_iterable=True))
print("Prediction for sample_1 is:{} ".format(pred))

pred = list(classifier.predict_proba(sample_2, as_iterable=True))
print("Prediction for sample_2 is:{} ".format(pred))

A model_saved.pbtxt file is created.

I try to load this model in Java with the following code :

    public class HelloTF {
    public static void main(String[] args) throws Exception {
        SavedModelBundle bundle=SavedModelBundle.load("/java/workspace/APIJavaSampleCode/tfModels/dnn/ModelSave","serve");
        Session s = bundle.session();

        double[] inputDouble = {1.0,0.7982741870963959,1.0,-0.46270838239235024,0.040320274521029376,0.443451913224413,-1.0,1.0,1.0,-1.0,0.36689718911339564,-0.13577379160035796,-0.5162916256414466,-0.03373651520104648,1.0,1.0,1.0,1.0,0.786999801054777,-0.43856035121103853,-0.8199093927945158,1.0,-1.0,-1.0,-0.1134921695894473,-1.0,0.6420892436196663,0.7871737734493178,1.0,0.6501788845358409,1.0,1.0,1.0,-0.17586627413625022,0.8817194210401085};
        float [] inputfloat=new float[inputDouble.length];
        for(int i=0;i<inputfloat.length;i++)
        {
            inputfloat[i]=(float)inputDouble[i];
        }
        Tensor inputTensor = Tensor.create(new long[] {35}, FloatBuffer.wrap(inputfloat) );

        Tensor result = s.runner()
                .feed("input_example_tensor", inputTensor)
                .fetch("dnn/multi_class_head/predictions/probabilities")
                .run().get(0);


         float[] m = new float[5];
         float[] vector = result.copyTo(m);
         float maxVal = 0;
         int inc = 0;
         int predict = -1;
         for(float val : vector) 
         {
             System.out.println(val+"  ");
             if(val > maxVal) {
                 predict = inc;
                 maxVal = val;
             }
             inc++;
         }
         System.out.println(predict);



    }
}

I get the error on the .run().get(0); line :

Exception in thread "main" org.tensorflow.TensorFlowException: Output 0 of type float does not match declared output type string for node _recv_input_example_tensor_0 = _Recv[_output_shapes=[[-1]], client_terminated=true, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/cpu:0", send_device_incarnation=3663984897684684554, tensor_name="input_example_tensor:0", tensor_type=DT_STRING, _device="/job:localhost/replica:0/task:0/cpu:0"]()
    at org.tensorflow.Session.run(Native Method)
    at org.tensorflow.Session.access$100(Session.java:48)
    at org.tensorflow.Session$Runner.runHelper(Session.java:285)
    at org.tensorflow.Session$Runner.run(Session.java:235)
    at tensorflow.HelloTF.main(HelloTF.java:35)

回答1:

Ok I finally Solve : the main problem was the name of the input to use in java that is ""dnn/input_from_feature_columns/input_from_feature_columns/concat" and not "input_example_tensor".

I have discover this using the graph navigation with: tensorboard --logdir=D:\python\Workspace\Autoencoder\src\dnn\ModelSave

here is the java code :

public class HelloTF {
public static void main(String[] args) throws Exception {
    SavedModelBundle bundle=SavedModelBundle.load("/java/workspace/APIJavaSampleCode/tfModels/dnn/ModelSave","serve");
    Session s = bundle.session();

    double[] inputDouble = {1.0,0.7982741870963959,1.0,-0.46270838239235024,0.040320274521029376,0.443451913224413,-1.0,1.0,1.0,-1.0,0.36689718911339564,-0.13577379160035796,-0.5162916256414466,-0.03373651520104648,1.0,1.0,1.0,1.0,0.786999801054777,-0.43856035121103853,-0.8199093927945158,1.0,-1.0,-1.0,-0.1134921695894473,-1.0,0.6420892436196663,0.7871737734493178,1.0,0.6501788845358409,1.0,1.0,1.0,-0.17586627413625022,0.8817194210401085};
    float [] inputfloat=new float[inputDouble.length];
    for(int i=0;i<inputfloat.length;i++)
    {
        inputfloat[i]=(float)inputDouble[i];
    }
FloatBuffer.wrap(inputfloat) );
    float[][] data= new float[1][35];
    data[0]=inputfloat;
    Tensor inputTensor=Tensor.create(data);


    Tensor result = s.runner()
            .feed("dnn/input_from_feature_columns/input_from_feature_columns/concat", inputTensor)
            //.feed("input_example_tensor", inputTensor)
            //.fetch("tensorflow/serving/classify")
            .fetch("dnn/multi_class_head/predictions/probabilities")
            //.fetch("dnn/zero_fraction_3/Cast")
            .run().get(0);


     float[][] m = new float[1][5];
     float[][] vector = result.copyTo(m);
     float maxVal = 0;
     int inc = 0;
     int predict = -1;
     for(float val : vector[0]) 
     {
         System.out.println(val+"  ");
         if(val > maxVal) {
             predict = inc;
             maxVal = val;
         }
         inc++;
     }
     System.out.println(predict);



}

}

I have tested the output :

phyton side :

Prediction for sample_2 is:[3] 
Prediction for sample_2 is:[array([ 0.17157166,  0.24475774,  0.16158019,  0.24648622,  0.17560424], dtype=float32)]

Java Side :

回答2:

The error message offers a clue: the tensor named "input_example_tensor" in the model expects to have string contents, whereas you provided float values.

Judging by the name of the tensor and your code, I'd guess that the tensor you're feeding is defined in input_fn_utils.py. This tensor is passed to the tf.parse_example() op, which expects a vector of tf.train.Example protocol buffers, serialized as strings.

回答3:

I got an error without feed("input_example_tensor", inputTensor) on Tensorflow 1.1.

But I found that example.proto can be fed as "input_example_tensor", although it took a lot of time to figure out how to create string tensors for serialized protocol buffer.

This is how I created inputTensor.

org.tensorflow.example.Example.Builder example = org.tensorflow.example.Example.newBuilder();   
/* set some features to example... */

Tensor exampleTensor = Tensor.create(example.build().toByteArray());
// Here, the shape of exampleTensor is not specified yet.

// Set the shape to feed this as "input_example_tensor"
Graph g = bundle.graph(); 
Output examplePlaceholder =
                  g.opBuilder("Placeholder", "example")
                  .setAttr("dtype", exampleTensor.dataType())                        
                      .build().output(0);
Tensor shapeTensor = Tensor.create(new long[]{1}, IntBuffer.wrap(new int[]{1}));                      
Output shapeConst = g.opBuilder("Const", "shape")
                      .setAttr("dtype", shapeTensor.dataType())
                      .setAttr("value", shapeTensor)
                      .build().output(0);
Output shaped = g.opBuilder("Reshape", "output").addInput(examplePlaceholder).addInput(shapeConst).build().output(0);


Tensor inputTensor = s.runner().feed(examplePlaceholder, exampleTensor).fetch(shaped).run().get(0);                   
// Now, inputTensor has shape of [1] and ready to feed.

回答4:

Your parameters in .feed() and .fetch() should be matching with your input and output datatype.

You can look at your savedmodel.pbtxt file. There are details about your paramaters and their input/output types.

For instance,

my java code

Tensor result = s.runner()
        .feed("ParseExample/ParseExample", inputTensor)
        .fetch("dnn/binary_logistic_head/predictions/probabilities")
        .run().get(0);

my savedModel.pbtxt (part of it)

node {
  name: "ParseExample/ParseExample"
  op: "ParseExample"
  input: "input_example_tensor"
  input: "ParseExample/ParseExample/names"
  input: "ParseExample/ParseExample/dense_keys_0"
  input: "ParseExample/Const"
  attr {
    key: "Ndense"
    value {
      i: 1
    }
  }
  attr {
    key: "Nsparse"
    value {
      i: 0
    }
  }
  attr {
    key: "Tdense"
    value {
      list {
        type: DT_FLOAT
      }
    }
  }
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: -1
          }
          dim {
            size: 2
          }
        }
      }
    }
  }
  attr {
    key: "dense_shapes"
    value {
      list {
        shape {
          dim {
            size: 2
          }
        }
      }
    }
  }
  attr {
    key: "sparse_types"
    value {
      list {
      }
    }
  }
}
  outputs {
    key: "scores"
    value {
      name: "dnn/binary_logistic_head/predictions/probabilities:0"
      dtype: DT_FLOAT
      tensor_shape {
        dim {
          size: -1
        }
        dim {
          size: 2
        }
      }
    }
  }

They both compatible with my datatype, float.

来源：https://stackoverflow.com/questions/43598953/loading-sklearn-model-in-java-model-created-with-dnnclassifier-in-python

标签

java

tensorflow

tensorflow-serving

sklearn-pandas