Example: Integrate with Multimedia Frameworks
In this section, we provide some examples of integrate Neuron Run-Time API with GstInference.
The Prediction
function that is provided below for evaluating frame buffer with neuron runtime is extracted from GstInference
.
Please refer to AI Demo App to get familiar with the user interface of GstInference
.
And for the details of Neuron Runtime API, please find Neuron API Reference.
Create neuron runtime environment at the Start
of the inference engine .
This function will be called only once after the APP executes.
RuntimeError Engine::Start (const char *in_path) {
void *runtime = nullptr;
int err_code = (*fnNeuronRuntimeV2_create)(dla_model_path, 1, &runtime, /* backlog */2048u);
if (err_code != NEURONRUNTIME_NO_ERROR) {
std::cerr << "Failed to create Neuron runtime." << std::endl;
exit(3);
}
}
Now, have a deep dive into the Prediction
phase.
RuntimeError Engine::Predict (std::shared_ptr<r2i::IFrame> in_frame,
std::vector< std::shared_ptr<r2i::IPrediction> > &predictions, const char *delegate, const char *delegate_option, const int init_flag) {
RuntimeError error;
error = this->PredictAuxiliar(in_frame, delegate, delegate_option, init_flag);
auto frame = std::dynamic_pointer_cast<Frame, IFrame> (in_frame);
// Get input and output number.
size_t nbInput, nbOutput;
fnNeuronRuntimeV2_getInputNumber(this->runtime, &nbInput);
fnNeuronRuntimeV2_getOutputNumber(this->runtime, &nbOutput);
// Get the required input buffer
std::vector<IOBuffer> inputs;
uint8_t **input_char = new uint8_t*[nbInput];
for (size_t idx = 0 ; idx < nbInput ; idx++) {
size_t size;
// Get the required input buffer size
if (fnNeuronRuntimeV2_getInputSize(this->runtime, idx, &size) != NEURONRUNTIME_NO_ERROR) {
std::cerr << "Failed to get single input size for network." << std::endl;
exit(1);
}
float *input_data = reinterpret_cast<float*>(frame->GetData());
input_char[idx] = new uint8_t[size*sizeof(uint8_t)];
// The scale and zero_point is already known
ConvertArrayToFixedPoint<float, uint8_t>(input_data, input_char[idx], size,
0.00787402, 128);
inputs.emplace_back(input_char[idx], size, -1); // The first input
}
std::vector<size_t> required_output_size;
std::vector<IOBuffer> outputs;
uint8_t **out_buf = new uint8_t*[nbOutput];
for (size_t idx = 0 ; idx < nbOutput ; idx++) {
size_t size;
// Get the required output buffer size
if (fnNeuronRuntimeV2_getOutputSize(this->runtime, idx, &size)
!= NEURONRUNTIME_NO_ERROR) { exit(3); }
required_output_size.push_back(size);
out_buf[idx] = new uint8_t[size];
outputs.emplace_back(out_buf[idx], size, -1); // empty buffer for storing output
}
SyncInferenceRequest req { inputs.data(), outputs.data() };
// Run the inference
err_code = (*fnNeuronRuntimeV2_run)(this->runtime, req);
if (err_code != NEURONRUNTIME_NO_ERROR) {
std::cerr << "Failed to inference." << std::endl;
exit(3);
}
// Convert fixed-point output to float-point,
// so that it can be parsed to gst-inference interface in same scale as input frame
for (size_t idx = 0 ; idx <nbOutput ; idx++) {
std::vector<float> dest;
dest.resize(required_output_size.at(idx));
if(required_output_size.at(idx) == 7668){
ConvertArrayToFloatingPoint<float, uint8_t>(out_buf[idx], dest, required_output_size.at(idx),
0.06461, 173);
}
else if(required_output_size.at(idx) == 40257){
ConvertArrayToFloatingPoint<float, uint8_t>(out_buf[idx], dest, required_output_size.at(idx),
0.141151, 159);
}
auto prediction = std::make_shared<Prediction>();
prediction->SetTensorValues(dest.data(), required_output_size.at(idx));
predictions.push_back(prediction);
}
// free the unused buffer
for(int i=0;i<nbInput;i++) {
delete [] input_char[i];
}
delete [] input_char;
for(int i=0;i<nbOutput;i++) {
delete [] out_buf[i];
}
delete [] out_buf;
return error;
}
Note
The version of NeuronAPI now we support on IoT Yocto is v5.0.1.
Currently, there is no way for users to insert the meta information of the model when compiling the DLA model.
Also, model metadata cannot be read out using NeuronRuntime_getMetadata
at runtime.
That is why in the above example, the value of scale
and zero_point
in ConvertArrayToFixedPoint
and ConvertArrayToFloatingPoint
are constants.
Users must have a clear understanding of the model information to have the correct implementation for the inference.
Note
Related function for model metadata will be supported in NeuronAPI v5.0.2.
Template for converting float-point to fixed-point.
template <typename T1, typename T2>
static T2 ConvertToFixedPoint(const T1 value, const float scale, const int zero_point) {
return static_cast<T2>((value / scale) + zero_point);
}
template <typename T1, typename T2>
static void ConvertArrayToFixedPoint(const T1 *data, T2 *output_data, const int size, const float scale, const int zero_point) {
for (int index = 0; index < size; index++) {
output_data[index] = static_cast<T2>(ConvertToFixedPoint<T1, T2>(data[index], scale, zero_point));
}
}
Template for converting fixed-point to float-point.
template <typename T1, typename T2>
static T1 ConvertToFloatingPoint(const T2 value, const float scale, const int zero_point) {
return static_cast<T1>((value - zero_point) * scale);
}
template <typename T1, typename T2>
static void ConvertArrayToFloatingPoint(const T2 *data, std::vector<T1> &output_data, const int size, const float scale, const int zero_point) {
for (int index = 0; index < size; index++) {
output_data[index] = ConvertToFloatingPoint<T1, T2>(data[index], scale, zero_point);
}
}