Example: Integrate with Multimedia Frameworks

In this section, we provide some examples of integrate Neuron Run-Time API with GstInference. The Prediction function that is provided below for evaluating frame buffer with neuron runtime is extracted from GstInference. Please refer to AI Demo App to get familiar with the user interface of GstInference. And for the details of Neuron Runtime API, please find Neuron API Reference.

Create neuron runtime environment at the Start of the inference engine . This function will be called only once after the APP executes.

RuntimeError Engine::Start (const char *in_path)  {
        void *runtime = nullptr;
        int err_code = (*fnNeuronRuntimeV2_create)(dla_model_path, 1, &runtime, /* backlog */2048u);
        if (err_code != NEURONRUNTIME_NO_ERROR) {
                std::cerr << "Failed to create Neuron runtime." << std::endl;
                exit(3);
        }
}

Now, have a deep dive into the Prediction phase.

RuntimeError Engine::Predict (std::shared_ptr<r2i::IFrame> in_frame,
  						std::vector< std::shared_ptr<r2i::IPrediction> > &predictions, const char *delegate, const char *delegate_option, const int init_flag) {
    RuntimeError error;
    	
    error = this->PredictAuxiliar(in_frame, delegate, delegate_option, init_flag);
    
    auto frame = std::dynamic_pointer_cast<Frame, IFrame> (in_frame);
    // Get input and output number.
    size_t nbInput, nbOutput;
    fnNeuronRuntimeV2_getInputNumber(this->runtime, &nbInput);
    fnNeuronRuntimeV2_getOutputNumber(this->runtime, &nbOutput);
    
    // Get the required input buffer 
    std::vector<IOBuffer> inputs;
    uint8_t **input_char = new uint8_t*[nbInput];
    
    for (size_t idx = 0 ; idx < nbInput ; idx++) {
        size_t size;
        // Get the required input buffer size
        if (fnNeuronRuntimeV2_getInputSize(this->runtime, idx, &size) != NEURONRUNTIME_NO_ERROR) { 
            std::cerr << "Failed to get single input size for network." << std::endl;
            exit(1); 
        }
        float *input_data = reinterpret_cast<float*>(frame->GetData());
        input_char[idx] = new uint8_t[size*sizeof(uint8_t)];
        
        // The scale and zero_point is already known
        ConvertArrayToFixedPoint<float, uint8_t>(input_data, input_char[idx], size,
        0.00787402, 128);
        
        inputs.emplace_back(input_char[idx], size, -1);  // The first input
    }
    
    std::vector<size_t> required_output_size;
    std::vector<IOBuffer> outputs;
    uint8_t **out_buf = new uint8_t*[nbOutput];
    for (size_t idx = 0 ; idx < nbOutput ; idx++) {
        size_t size;
        
        // Get the required output buffer size 
        if (fnNeuronRuntimeV2_getOutputSize(this->runtime, idx, &size)
        		!= NEURONRUNTIME_NO_ERROR) { exit(3); }
        required_output_size.push_back(size);
        out_buf[idx] = new uint8_t[size];
        outputs.emplace_back(out_buf[idx], size, -1);  // empty buffer for storing output    
    }
      
      SyncInferenceRequest req { inputs.data(), outputs.data() };
      
      // Run the inference
      err_code = (*fnNeuronRuntimeV2_run)(this->runtime, req);
      if (err_code != NEURONRUNTIME_NO_ERROR) {
          std::cerr << "Failed to inference." << std::endl;
          exit(3);
      }
    
    // Convert fixed-point output to float-point, 
    // so that it can be parsed to gst-inference interface in same scale as input frame
    for (size_t idx = 0 ; idx <nbOutput  ; idx++) {
        std::vector<float> dest;
        dest.resize(required_output_size.at(idx));
        if(required_output_size.at(idx) == 7668){
            ConvertArrayToFloatingPoint<float, uint8_t>(out_buf[idx], dest, required_output_size.at(idx),
            0.06461, 173);	
        }
        else if(required_output_size.at(idx) == 40257){
            ConvertArrayToFloatingPoint<float, uint8_t>(out_buf[idx], dest, required_output_size.at(idx),
            0.141151, 159);	
        }
      
        auto prediction = std::make_shared<Prediction>();
        prediction->SetTensorValues(dest.data(), required_output_size.at(idx));
        predictions.push_back(prediction);
    }
    
    // free the unused buffer	
    for(int i=0;i<nbInput;i++) {	  
        delete [] input_char[i];   
    }
    delete [] input_char;   
    
    for(int i=0;i<nbOutput;i++) {	 
        delete [] out_buf[i];   
    }
    delete [] out_buf;    
    
    return error;
}

Note

The version of NeuronAPI now we support on IoT Yocto is v5.0.1. Currently, there is no way for users to insert the meta information of the model when compiling the DLA model. Also, model metadata cannot be read out using NeuronRuntime_getMetadata at runtime. That is why in the above example, the value of scale and zero_point in ConvertArrayToFixedPoint and ConvertArrayToFloatingPoint are constants. Users must have a clear understanding of the model information to have the correct implementation for the inference.

Note

Related function for model metadata will be supported in NeuronAPI v5.0.2.

Template for converting float-point to fixed-point.

template <typename T1, typename T2>
static T2 ConvertToFixedPoint(const T1 value, const float scale, const int zero_point) {
        return static_cast<T2>((value / scale) + zero_point);
}

template <typename T1, typename T2>
static void ConvertArrayToFixedPoint(const T1 *data, T2 *output_data, const int size, const float scale, const int zero_point) {
for (int index = 0; index < size; index++) {
        output_data[index] = static_cast<T2>(ConvertToFixedPoint<T1, T2>(data[index], scale, zero_point));
        }
}

Template for converting fixed-point to float-point.

template <typename T1, typename T2>
static T1 ConvertToFloatingPoint(const T2 value, const float scale, const int zero_point) {
        return static_cast<T1>((value - zero_point) * scale);
}

template <typename T1, typename T2>
static void ConvertArrayToFloatingPoint(const T2 *data, std::vector<T1> &output_data, const int size, const float scale, const int zero_point) {
        for (int index = 0; index < size; index++) {
                output_data[index] = ConvertToFloatingPoint<T1, T2>(data[index], scale, zero_point);
        }
}