Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,10 @@ public enum class InputType {
* This preprocessing will scale pixels between 0 and 1,
* then will normalize each channel with respect to the ImageNet dataset.
*/
TORCH
TORCH,

/**
* Just scale between 0 and 1
*/
CV
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ public fun preprocessInput(
InputType.TF -> floatArray.map { it / 127.5f - 1 }.toFloatArray()
InputType.CAFFE -> caffeStylePreprocessing(floatArray, tensorShape!!, channelsLast)
InputType.TORCH -> torchStylePreprocessing(floatArray, tensorShape!!, channelsLast)
InputType.CV -> floatArray.map { it / 255.0f }.toFloatArray()
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package examples.onnx.objectdetection.yolov4

import examples.transferlearning.getFileFromResource
import org.jetbrains.kotlinx.dl.api.extension.argmax
import org.jetbrains.kotlinx.dl.api.inference.loaders.ONNXModelHub
import org.jetbrains.kotlinx.dl.api.inference.objectdetection.DetectedObject
import org.jetbrains.kotlinx.dl.api.inference.onnx.ONNXModels
import org.jetbrains.kotlinx.dl.api.inference.onnx.OnnxInferenceModel
import org.jetbrains.kotlinx.dl.dataset.handler.cocoCategories
import org.jetbrains.kotlinx.dl.dataset.image.ColorOrder
import org.jetbrains.kotlinx.dl.dataset.preprocessor.*
import org.jetbrains.kotlinx.dl.dataset.preprocessor.image.resize
import java.io.File
import kotlin.math.max
import kotlin.math.min

object Yolov4 {
fun predict() {
val modelHub = ONNXModelHub(cacheDirectory = File("cache/pretrainedModels"))
val modelType = ONNXModels.ObjectDetection.YOLOv4
val model = modelHub.loadModel(modelType)
model.use {
println(it)
for (i in 0..9) {
val preprocessing: Preprocessing = preprocess {
load {
pathToData = getFileFromResource("datasets/vgg/image$i.jpg")
imageShape = ImageShape(224, 224, 3)
colorMode = ColorOrder.BGR
}
transformImage {
resize {
outputHeight = 416
outputWidth = 416
}
}
}

val inputData = modelType.preprocessInput(preprocessing)
val predict = it.detectObjects(inputData)
println(predict.toString())
}
}
}

private fun OnnxInferenceModel.detectObjects(inputData: FloatArray, topK: Int = 5): List<DetectedObject> {
// Following https://opencv-tutorial.readthedocs.io/en/latest/yolo/yolo.html
val foundObjects = mutableListOf<DetectedObject>()
val rawPredictions = this.predictRaw(inputData)
for (colPredictions in rawPredictions) {
for (rowPrediction in colPredictions) {
val predictions = rowPrediction as Array<Array<Array<FloatArray>>>
for (col in predictions) {
for (row in col) {
for (block in row) {
val xCenter = block[0]
val yCenter = block[1]
val w = block[2]
val h = block[3]
val conf = block[4]
val idx = block.sliceArray(5..84).argmax()
if (conf > 0.6) {
val element = DetectedObject(
classLabel = cocoCategories[idx + 1] ?: "",
probability = conf,
xMin = min(xCenter + (w / 2), xCenter - (w / 2)),
xMax = max(xCenter + (w / 2), xCenter - (w / 2)),
yMin = min(yCenter + (h / 2), yCenter - (h / 2)),
yMax = max(yCenter + (h / 2), yCenter - (h / 2))
)
foundObjects.add(element)
}
}
}
}
}
}

return foundObjects.groupBy { it.classLabel }
.mapValues { it.value.maxByOrNull { it.probability } }.values.filterNotNull().take(topK)
}
}

fun main() = Yolov4.predict()
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,24 @@ public object ONNXModels {
public object YOLOv4 :
ObjectDetection<OnnxInferenceModel, OnnxInferenceModel>("models/onnx/objectdetection/yolov4") {
override fun preprocessInput(data: FloatArray, tensorShape: LongArray): FloatArray {
TODO("Not yet implemented")
val transposedData = Transpose(axes = intArrayOf(2, 0, 1)).apply(
data,
ImageShape(width = tensorShape[0], height = tensorShape[1], channels = tensorShape[2])
)

// TODO: should be returned from the Transpose from apply method
val transposedShape = longArrayOf(tensorShape[2], tensorShape[0], tensorShape[1])

return preprocessInput(
transposedData,
transposedShape,
inputType = InputType.CV,
channelsLast = false
)
}

override fun pretrainedModel(modelHub: ModelHub): OnnxInferenceModel {
TODO("Not yet implemented")
return modelHub.loadModel(this)
}
}
}
Expand Down