Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Set batched input
  • Loading branch information
sivanov-work committed Nov 27, 2023
commit ab0dc80bc6a1fb0bded10b1d01cc926cc2769c22
142 changes: 86 additions & 56 deletions demos/classification_benchmark_demo/cpp/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ static const char target_device_message[] = "Optional. Specify the target device
static const char num_threads_message[] = "Optional. Specify count of threads.";
static const char num_streams_message[] = "Optional. Specify count of streams.";
static const char num_inf_req_message[] = "Optional. Number of infer requests.";
static const char num_inf_req_per_batch_message[] = "Optional. Number of infer requests per batch.";
static const char image_grid_resolution_message[] = "Optional. Set image grid resolution in format WxH. "
"Default value is 1280x720.";
static const char ntop_message[] = "Optional. Number of top results. Default value is 5. Must be >= 1.";
Expand All @@ -75,6 +76,7 @@ DEFINE_string(d, "CPU", target_device_message);
DEFINE_uint32(nthreads, 0, num_threads_message);
DEFINE_string(nstreams, "", num_streams_message);
DEFINE_uint32(nireq, 0, num_inf_req_message);
DEFINE_uint32(nireq_per_batch, 2, num_inf_req_per_batch_message);
DEFINE_uint32(nt, 5, ntop_message);
DEFINE_string(res, "1280x720", image_grid_resolution_message);
DEFINE_bool(auto_resize, false, input_resizable_message);
Expand Down Expand Up @@ -265,6 +267,15 @@ int main(int argc, char* argv[]) {
std::size_t nextImageIndex = 0;
std::chrono::steady_clock::time_point startTime = std::chrono::steady_clock::now();

// batch setup
std::vector<InputData> inputDataVector {inputImages.begin(), inputImages.end()};
auto inputImagesBeginIt = inputDataVector.begin();
auto inputImagesEndIt = inputDataVector.begin();
std::advance(inputImagesEndIt, FLAGS_nireq_per_batch);

auto classIndicesBeginIt = classIndices.begin();
auto classIndicesEndIt = classIndicesBeginIt;
std::advance(classIndicesEndIt, FLAGS_nireq_per_batch);
while (keepRunning && elapsedSeconds < std::chrono::seconds(FLAGS_time)) {
if (elapsedSeconds >= testDuration - fpsCalculationDuration && framesNumOnCalculationStart == 0) {
framesNumOnCalculationStart = framesNum;
Expand All @@ -287,14 +298,30 @@ int main(int argc, char* argv[]) {
if (pipeline.isReadyToProcess()) {
auto imageStartTime = std::chrono::steady_clock::now();

pipeline.submitData(ImageInputData(inputImages[nextImageIndex]),
std::make_shared<ClassificationImageMetaData>(inputImages[nextImageIndex],
pipeline.submitData(inputImagesBeginIt, inputImagesEndIt,
std::make_shared<ClassificationImageMetaData>(inputImagesBeginIt, inputImagesEndIt
imageStartTime,
classIndices[nextImageIndex]));
nextImageIndex++;
if (nextImageIndex == imageNames.size()) {
nextImageIndex = 0;
classIndicesBeginIt, classIndicesEndIt));
//nextImageIndex++;
//if (nextImageIndex == imageNames.size()) {
//nextImageIndex = 0;
//}

++inputImagesBeginIt;
++inputImagesEndIt;
++classIndicesBeginIt;
++classIndicesEndIt;

if (inputImagesEndIt == inputDataVector.end()) {
inputImagesBeginIt = inputDataVector.begin();
inputImagesEndIt = inputImagesBeginIt;
std::advance(inputImagesEndIt, FLAGS_nireq_per_batch);

classIndicesBeginIt = classIndices.begin();
classIndicesEndIt = classIndicesBeginIt;
std::advance(classIndicesEndIt, FLAGS_nireq_per_batch);
}

}

//--- Waiting for free input slot or output data available. Function will return immediately if any of them
Expand All @@ -308,58 +335,61 @@ int main(int argc, char* argv[]) {
if (!classificationResult.metaData) {
throw std::invalid_argument("Renderer: metadata is null");
}
const ClassificationImageMetaData& classificationImageMetaData =
classificationResult.metaData->asRef<const ClassificationImageMetaData>();

auto outputImg = classificationImageMetaData.img;

if (outputImg.empty()) {
throw std::invalid_argument("Renderer: image provided in metadata is empty");
}
PredictionResult predictionResult = PredictionResult::Incorrect;
std::string label = classificationResult.topLabels.front().label;
if (!FLAGS_gt.empty()) {
for (size_t i = 0; i < FLAGS_nt; i++) {
unsigned predictedClass = classificationResult.topLabels[i].id;
if (predictedClass == classificationImageMetaData.groundTruthId) {
predictionResult = PredictionResult::Correct;
correctPredictionsCount++;
label = classificationResult.topLabels[i].label;
break;
}
const ClassificationImageBatchMetaData& classificationImageBatchMetaData =
classificationResult.metaData->asRef<const ClassificationImageBatchMetaData>();

//auto outputImg = classificationImageMetaData.img;
const std::vector<ClassificationImageMetaData> &outputImagesMD = classificationImageBatchMetaData.metadatas;
for (const ClassificationImageMetaData &classificationImageMetaData : outputImagesMD) {
auto outputImg = classificationImageMetaData.img;
if (outputImg.empty()) {
throw std::invalid_argument("Renderer: image provided in metadata is empty");
}
} else {
predictionResult = PredictionResult::Unknown;
}
framesNum++;
gridMat.updateMat(outputImg, label, predictionResult);
accuracy = static_cast<double>(correctPredictionsCount) / framesNum;
gridMat.textUpdate(metrics,
classificationResult.metaData->asRef<ImageMetaData>().timeStamp,
accuracy,
FLAGS_nt,
isTestMode,
!FLAGS_gt.empty(),
presenter);
renderMetrics.update(renderingStart);
elapsedSeconds = std::chrono::steady_clock::now() - startTime;
if (!FLAGS_no_show) {
cv::imshow("classification_demo", gridMat.outImg);
//--- Processing keyboard events
int key = cv::waitKey(1);
if (27 == key || 'q' == key || 'Q' == key) { // Esc
keepRunning = false;
} else if (32 == key || 'r' == key ||
'R' == key) { // press space or r to restart testing if needed
isTestMode = true;
framesNum = 0;
framesNumOnCalculationStart = 0;
correctPredictionsCount = 0;
accuracy = 0;
elapsedSeconds = std::chrono::steady_clock::duration(0);
startTime = std::chrono::steady_clock::now();
PredictionResult predictionResult = PredictionResult::Incorrect;
std::string label = classificationResult.topLabels.front().label;
if (!FLAGS_gt.empty()) {
for (size_t i = 0; i < FLAGS_nt; i++) {
unsigned predictedClass = classificationResult.topLabels[i].id;
if (predictedClass == classificationImageMetaData.groundTruthId) {
predictionResult = PredictionResult::Correct;
correctPredictionsCount++;
label = classificationResult.topLabels[i].label;
break;
}
}
} else {
presenter.handleKey(key);
predictionResult = PredictionResult::Unknown;
}
framesNum += 1;
gridMat.updateMat(outputImg, label, predictionResult);
accuracy = static_cast<double>(correctPredictionsCount) / framesNum;
gridMat.textUpdate(metrics,
classificationResult.metaData->asRef<ImageMetaData>().timeStamp,
accuracy,
FLAGS_nt,
isTestMode,
!FLAGS_gt.empty(),
presenter);
renderMetrics.update(renderingStart);
elapsedSeconds = std::chrono::steady_clock::now() - startTime;
if (!FLAGS_no_show) {
cv::imshow("classification_demo", gridMat.outImg);
//--- Processing keyboard events
int key = cv::waitKey(1);
if (27 == key || 'q' == key || 'Q' == key) { // Esc
keepRunning = false;
} else if (32 == key || 'r' == key ||
'R' == key) { // press space or r to restart testing if needed
isTestMode = true;
framesNum = 0;
framesNumOnCalculationStart = 0;
correctPredictionsCount = 0;
accuracy = 0;
elapsedSeconds = std::chrono::steady_clock::duration(0);
startTime = std::chrono::steady_clock::now();
} else {
presenter.handleKey(key);
}
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion demos/common/cpp/models/include/models/image_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ class ImageModel : public ModelBase {
ImageModel(const std::string& modelFileName, bool useAutoResize, const std::string& layout = "");

std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) override;

std::shared_ptr<InternalModelData> preprocess(std::vector<InputData>::iterator inputDataBegin,
std::vector<InputData>::iterator inputDataEnd,
ov::InferRequest& request) override;
protected:
bool useAutoResize;

Expand Down
3 changes: 3 additions & 0 deletions demos/common/cpp/models/include/models/model_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ class ModelBase {
virtual ~ModelBase() {}

virtual std::shared_ptr<InternalModelData> preprocess(const InputData& inputData, ov::InferRequest& request) = 0;
virtual std::shared_ptr<InternalModelData> preprocess(std::vector<InputData>::iterator inputDataBegin,
std::vector<InputData>::iterator inputDataEnd,
ov::InferRequest& request) {};
virtual ov::CompiledModel compileModel(const ModelConfig& config, ov::Core& core);
virtual void onLoadCompleted(const std::vector<ov::InferRequest>& requests) {}
virtual std::unique_ptr<ResultBase> postprocess(InferenceResult& infResult) = 0;
Expand Down
54 changes: 54 additions & 0 deletions demos/common/cpp/models/src/image_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,60 @@ ImageModel::ImageModel(const std::string& modelFileName, bool useAutoResize, con
: ModelBase(modelFileName, layout),
useAutoResize(useAutoResize) {}

std::shared_ptr<InternalModelData> ImageModel::preprocess(std::vector<InputData>::iterator inputDataBegin,
std::vector<InputData>::iterator inputDataEnd,
ov::InferRequest& request) {

const ov::Tensor& frameTensor = request.get_tensor(inputsNames[0]); // first input should be image
const ov::Shape& tensorShape = frameTensor.get_shape();
const ov::Layout layout("NHWC");
const size_t batch = tensorShape[ov::layout::batch_idx(layout)];
const size_t width = tensorShape[ov::layout::width_idx(layout)];
const size_t height = tensorShape[ov::layout::height_idx(layout)];
const size_t channels = tensorShape[ov::layout::channels_idx(layout)];

char* memoryBlob = nullptr;
size_t image_index = 0;
bool isMatFloat = false;
for (auto inputDataIt = inputDataBegin; inputDataIt != inputDataEnd; ++inputDataIt ) {
const auto& origImg = inputDataIt->asRef<ImageInputData>().inputImage;
auto img = inputTransform(origImg);

auto matType = mat.type() & CV_MAT_DEPTH_MASK;
if (matType != CV_8U && matType != CV_32F) {
throw std::runtime_error("Unsupported mat type for wrapping");
}
isMatFloat = matType == CV_32F;

if (!useAutoResize) {
// /* Resize and copy data from the image to the input tensor */

if (static_cast<size_t>(img.channels()) != channels) {
throw std::runtime_error(std::string("The number of channels for model input: ") +
std::to_string(channels) + " and image: " +
std::to_string(img.channels()) + " - must match");
}
if (channels != 1 && channels != 3) {
throw std::runtime_error("Unsupported number of channels");
}
img = resizeImageExt(img, width, height, resizeMode, interpolationMode);
}
size_t sizeInBytes = img.total() * img.elemSize();
if (!memoryBlob) {
memoryBlob = new char[sizeInBytes * batch]; // intended memory leak
}

// fill continuous batch
memcpy(memoryBlob + sizeInBytes * image_index, img.ptr(), sizeInBytes);
image_index++;
}

auto precision = isMatFloat ? ov::element::f32 : ov::element::u8;
auto batched_tensor = ov::Tensor(precision, ov::Shape{ batch, height, width, channels }, memoryBlob);
request.set_tensor(inputsNames[0],batched_tensor);
return std::make_shared<InternalImageModelData>(origImg.cols, origImg.rows);
}

std::shared_ptr<InternalModelData> ImageModel::preprocess(const InputData& inputData, ov::InferRequest& request) {
const auto& origImg = inputData.asRef<ImageInputData>().inputImage;
auto img = inputTransform(origImg);
Expand Down
3 changes: 3 additions & 0 deletions demos/common/cpp/pipelines/include/pipelines/async_pipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ class AsyncPipeline {
/// Otherwise returns unique sequential frame ID for this particular request. Same frame ID will be written in the
/// result structure.
virtual int64_t submitData(const InputData& inputData, const std::shared_ptr<MetaData>& metaData);
virtual int64_t submitData(std::vector<InputData>::iterator inputDataBegin,
std::vector<InputData>::iterator inputDataEnd,
const std::shared_ptr<MetaData>& metaData);

/// Gets available data from the queue
/// @param shouldKeepOrder if true, function will treat results as ready only if next sequential result (frame) is
Expand Down
23 changes: 23 additions & 0 deletions demos/common/cpp/pipelines/include/pipelines/metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,26 @@ struct ClassificationImageMetaData : public ImageMetaData {
: ImageMetaData(img, timeStamp),
groundTruthId(groundTruthId) {}
};


struct ClassificationImageBatchMetaData : public MetaData {
std::vector<std::shared_ptr<ClassificationImageMetaData>> metadatas;

ClassificationImageMetaData(const std::vector<cv::Mat>::iterator imagesBeginIt,
const std::vector<cv::Mat>::iterator imagesEndIt,
std::chrono::steady_clock::time_point timeStamp,
std::vector<unsigned int>::iterator groundTruthIdsBeginIt,
const std::vector<unsigned int>::iterator groundTruthIdsEndIt)
: MetaData(){
size_t images_count = std::distance(imagesBeginIt, imagesEndIt);
size_t gt_count = std::distance(groundTruthIdsBeginIt, groundTruthIdsEndIt);
if (images_count != gt_count) {
throw std::runtime_error("images.size() != groundTruthIds.size()");
}

metadatas.reserve(images_count);
for (; imagesBeginIt != imagesEndIt;) {
metadatas.push_back(std::make_shared<ClassificationImageMetaData>(*it++, timeStamp, *groundTruthIdsBeginIt++));
}
}
};
54 changes: 54 additions & 0 deletions demos/common/cpp/pipelines/src/async_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,60 @@ void AsyncPipeline::waitForData(bool shouldKeepOrder) {
}
}

int64_t AsyncPipeline::submitData(std::vector<InputData>::iterator inputDataBegin,
std::vector<InputData>::iterator inputDataEnd,
const std::shared_ptr<MetaData>& metaData) {
auto frameID = inputFrameId;

auto request = requestsPool->getIdleRequest();
if (!request) {
return -1;
}

auto startTime = std::chrono::steady_clock::now();
auto internalModelData = model->preprocess(inputDataBegin, inputDataEnd, request);
preprocessMetrics.update(startTime);

request.set_callback(
[this, request, frameID, internalModelData, metaData, startTime](std::exception_ptr ex) mutable {
{
const std::lock_guard<std::mutex> lock(mtx);
inferenceMetrics.update(startTime);
try {
if (ex) {
std::rethrow_exception(ex);
}
InferenceResult result;

result.frameId = frameID;
result.metaData = std::move(metaData);
result.internalModelData = std::move(internalModelData);

for (const auto& outName : model->getOutputsNames()) {
auto tensor = request.get_tensor(outName);
result.outputsData.emplace(outName, tensor);
}

completedInferenceResults.emplace(frameID, result);
requestsPool->setRequestIdle(request);
} catch (...) {
if (!callbackException) {
callbackException = std::current_exception();
}
}
}
condVar.notify_one();
});

inputFrameId++;
if (inputFrameId < 0)
inputFrameId = 0;

request.start_async();

return frameID;
}

int64_t AsyncPipeline::submitData(const InputData& inputData, const std::shared_ptr<MetaData>& metaData) {
auto frameID = inputFrameId;

Expand Down