Kotlin · kokorins · Jun 14, 2021 · Jun 21, 2021 · Jun 21, 2021 · Jun 23, 2021
diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/loaders/InputType.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/loaders/InputType.kt
@@ -25,5 +25,10 @@ public enum class InputType {
      * This preprocessing will scale pixels between 0 and 1,
      * then will normalize each channel with respect to the ImageNet dataset.
      */
-    TORCH
+    TORCH,
+
+    /**
+     * Just scale between 0 and 1
+     */
+    CV
 }
diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/loaders/LoadersUtil.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/loaders/LoadersUtil.kt
@@ -27,6 +27,7 @@ public fun preprocessInput(
         InputType.TF -> floatArray.map { it / 127.5f - 1 }.toFloatArray()
         InputType.CAFFE -> caffeStylePreprocessing(floatArray, tensorShape!!, channelsLast)
         InputType.TORCH -> torchStylePreprocessing(floatArray, tensorShape!!, channelsLast)
+        InputType.CV -> floatArray.map { it / 255.0f }.toFloatArray()
     }
 }
 

diff --git a/examples/src/main/kotlin/examples/onnx/objectdetection/yolov4/Yolov4.kt b/examples/src/main/kotlin/examples/onnx/objectdetection/yolov4/Yolov4.kt
@@ -0,0 +1,84 @@
+package examples.onnx.objectdetection.yolov4
+
+import examples.transferlearning.getFileFromResource
+import org.jetbrains.kotlinx.dl.api.extension.argmax
+import org.jetbrains.kotlinx.dl.api.inference.loaders.ONNXModelHub
+import org.jetbrains.kotlinx.dl.api.inference.objectdetection.DetectedObject
+import org.jetbrains.kotlinx.dl.api.inference.onnx.ONNXModels
+import org.jetbrains.kotlinx.dl.api.inference.onnx.OnnxInferenceModel
+import org.jetbrains.kotlinx.dl.dataset.handler.cocoCategories
+import org.jetbrains.kotlinx.dl.dataset.image.ColorOrder
+import org.jetbrains.kotlinx.dl.dataset.preprocessor.*
+import org.jetbrains.kotlinx.dl.dataset.preprocessor.image.resize
+import java.io.File
+import kotlin.math.max
+import kotlin.math.min
+
+object Yolov4 {
+    fun predict() {
+        val modelHub = ONNXModelHub(cacheDirectory = File("cache/pretrainedModels"))
+        val modelType = ONNXModels.ObjectDetection.YOLOv4
+        val model = modelHub.loadModel(modelType)
+        model.use {
+            println(it)
+            for (i in 0..9) {
+                val preprocessing: Preprocessing = preprocess {
+                    load {
+                        pathToData = getFileFromResource("datasets/vgg/image$i.jpg")
+                        imageShape = ImageShape(224, 224, 3)
+                        colorMode = ColorOrder.BGR
+                    }
+                    transformImage {
+                        resize {
+                            outputHeight = 416
+                            outputWidth = 416
+                        }
+                    }
+                }
+
+                val inputData = modelType.preprocessInput(preprocessing)
+                val predict = it.detectObjects(inputData)
+                println(predict.toString())
+            }
+        }
+    }
+
+    private fun OnnxInferenceModel.detectObjects(inputData: FloatArray, topK: Int = 5): List<DetectedObject> {
+        // Following https://opencv-tutorial.readthedocs.io/en/latest/yolo/yolo.html
+        val foundObjects = mutableListOf<DetectedObject>()
+        val rawPredictions = this.predictRaw(inputData)
+        for (colPredictions in rawPredictions) {
+            for (rowPrediction in colPredictions) {
+                val predictions = rowPrediction as Array<Array<Array<FloatArray>>>
+                for (col in predictions) {
+                    for (row in col) {
+                        for (block in row) {
+                            val xCenter = block[0]
+                            val yCenter = block[1]
+                            val w = block[2]
+                            val h = block[3]
+                            val conf = block[4]
+                            val idx = block.sliceArray(5..84).argmax()
+                            if (conf > 0.6) {
+                                val element = DetectedObject(
+                                    classLabel = cocoCategories[idx + 1] ?: "",
+                                    probability = conf,
+                                    xMin = min(xCenter + (w / 2), xCenter - (w / 2)),
+                                    xMax = max(xCenter + (w / 2), xCenter - (w / 2)),
+                                    yMin = min(yCenter + (h / 2), yCenter - (h / 2)),
+                                    yMax = max(yCenter + (h / 2), yCenter - (h / 2))
+                                )
+                                foundObjects.add(element)
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        return foundObjects.groupBy { it.classLabel }
+            .mapValues { it.value.maxByOrNull { it.probability } }.values.filterNotNull().take(topK)
+    }
+}
+
+fun main() = Yolov4.predict()
diff --git a/onnx/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/onnx/ONNXModels.kt b/onnx/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/onnx/ONNXModels.kt
@@ -199,11 +199,24 @@ public object ONNXModels {
         public object YOLOv4 :
             ObjectDetection<OnnxInferenceModel, OnnxInferenceModel>("models/onnx/objectdetection/yolov4") {
             override fun preprocessInput(data: FloatArray, tensorShape: LongArray): FloatArray {
-                TODO("Not yet implemented")
+                val transposedData = Transpose(axes = intArrayOf(2, 0, 1)).apply(
+                    data,
+                    ImageShape(width = tensorShape[0], height = tensorShape[1], channels = tensorShape[2])
+                )
+
+                // TODO: should be returned from the Transpose from apply method
+                val transposedShape = longArrayOf(tensorShape[2], tensorShape[0], tensorShape[1])
+
+                return preprocessInput(
+                    transposedData,
+                    transposedShape,
+                    inputType = InputType.CV,
+                    channelsLast = false
+                )
             }
 
             override fun pretrainedModel(modelHub: ModelHub): OnnxInferenceModel {
-                TODO("Not yet implemented")
+                return modelHub.loadModel(this)
             }
         }
     }