diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/loaders/InputType.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/loaders/InputType.kt index 70e5dfb6d..d247eff2f 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/loaders/InputType.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/loaders/InputType.kt @@ -25,5 +25,10 @@ public enum class InputType { * This preprocessing will scale pixels between 0 and 1, * then will normalize each channel with respect to the ImageNet dataset. */ - TORCH + TORCH, + + /** + * Just scale between 0 and 1 + */ + CV } diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/loaders/LoadersUtil.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/loaders/LoadersUtil.kt index bbe224cbb..f890f692c 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/loaders/LoadersUtil.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/loaders/LoadersUtil.kt @@ -27,6 +27,7 @@ public fun preprocessInput( InputType.TF -> floatArray.map { it / 127.5f - 1 }.toFloatArray() InputType.CAFFE -> caffeStylePreprocessing(floatArray, tensorShape!!, channelsLast) InputType.TORCH -> torchStylePreprocessing(floatArray, tensorShape!!, channelsLast) + InputType.CV -> floatArray.map { it / 255.0f }.toFloatArray() } } diff --git a/examples/src/main/kotlin/examples/onnx/objectdetection/yolov4/Yolov4.kt b/examples/src/main/kotlin/examples/onnx/objectdetection/yolov4/Yolov4.kt new file mode 100644 index 000000000..4ce22bc03 --- /dev/null +++ b/examples/src/main/kotlin/examples/onnx/objectdetection/yolov4/Yolov4.kt @@ -0,0 +1,84 @@ +package examples.onnx.objectdetection.yolov4 + +import examples.transferlearning.getFileFromResource +import org.jetbrains.kotlinx.dl.api.extension.argmax +import org.jetbrains.kotlinx.dl.api.inference.loaders.ONNXModelHub +import org.jetbrains.kotlinx.dl.api.inference.objectdetection.DetectedObject +import org.jetbrains.kotlinx.dl.api.inference.onnx.ONNXModels +import org.jetbrains.kotlinx.dl.api.inference.onnx.OnnxInferenceModel +import org.jetbrains.kotlinx.dl.dataset.handler.cocoCategories +import org.jetbrains.kotlinx.dl.dataset.image.ColorOrder +import org.jetbrains.kotlinx.dl.dataset.preprocessor.* +import org.jetbrains.kotlinx.dl.dataset.preprocessor.image.resize +import java.io.File +import kotlin.math.max +import kotlin.math.min + +object Yolov4 { + fun predict() { + val modelHub = ONNXModelHub(cacheDirectory = File("cache/pretrainedModels")) + val modelType = ONNXModels.ObjectDetection.YOLOv4 + val model = modelHub.loadModel(modelType) + model.use { + println(it) + for (i in 0..9) { + val preprocessing: Preprocessing = preprocess { + load { + pathToData = getFileFromResource("datasets/vgg/image$i.jpg") + imageShape = ImageShape(224, 224, 3) + colorMode = ColorOrder.BGR + } + transformImage { + resize { + outputHeight = 416 + outputWidth = 416 + } + } + } + + val inputData = modelType.preprocessInput(preprocessing) + val predict = it.detectObjects(inputData) + println(predict.toString()) + } + } + } + + private fun OnnxInferenceModel.detectObjects(inputData: FloatArray, topK: Int = 5): List { + // Following https://opencv-tutorial.readthedocs.io/en/latest/yolo/yolo.html + val foundObjects = mutableListOf() + val rawPredictions = this.predictRaw(inputData) + for (colPredictions in rawPredictions) { + for (rowPrediction in colPredictions) { + val predictions = rowPrediction as Array>> + for (col in predictions) { + for (row in col) { + for (block in row) { + val xCenter = block[0] + val yCenter = block[1] + val w = block[2] + val h = block[3] + val conf = block[4] + val idx = block.sliceArray(5..84).argmax() + if (conf > 0.6) { + val element = DetectedObject( + classLabel = cocoCategories[idx + 1] ?: "", + probability = conf, + xMin = min(xCenter + (w / 2), xCenter - (w / 2)), + xMax = max(xCenter + (w / 2), xCenter - (w / 2)), + yMin = min(yCenter + (h / 2), yCenter - (h / 2)), + yMax = max(yCenter + (h / 2), yCenter - (h / 2)) + ) + foundObjects.add(element) + } + } + } + } + } + } + + return foundObjects.groupBy { it.classLabel } + .mapValues { it.value.maxByOrNull { it.probability } }.values.filterNotNull().take(topK) + } +} + +fun main() = Yolov4.predict() \ No newline at end of file diff --git a/onnx/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/onnx/ONNXModels.kt b/onnx/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/onnx/ONNXModels.kt index 867fcec76..fa4ecb5c7 100644 --- a/onnx/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/onnx/ONNXModels.kt +++ b/onnx/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/onnx/ONNXModels.kt @@ -199,11 +199,24 @@ public object ONNXModels { public object YOLOv4 : ObjectDetection("models/onnx/objectdetection/yolov4") { override fun preprocessInput(data: FloatArray, tensorShape: LongArray): FloatArray { - TODO("Not yet implemented") + val transposedData = Transpose(axes = intArrayOf(2, 0, 1)).apply( + data, + ImageShape(width = tensorShape[0], height = tensorShape[1], channels = tensorShape[2]) + ) + + // TODO: should be returned from the Transpose from apply method + val transposedShape = longArrayOf(tensorShape[2], tensorShape[0], tensorShape[1]) + + return preprocessInput( + transposedData, + transposedShape, + inputType = InputType.CV, + channelsLast = false + ) } override fun pretrainedModel(modelHub: ModelHub): OnnxInferenceModel { - TODO("Not yet implemented") + return modelHub.loadModel(this) } } }