From 78196a88018bad65df0adbf925ffc52862bd6abf Mon Sep 17 00:00:00 2001
From: Stan van der Bend <stanvdbend@gmail.com>
Date: Mon, 14 Jun 2021 21:16:31 +0200
Subject: [PATCH 1/3] Added missing saving functions for ReLU and ELU
 activation layers (JetBrains#78)

---
 .../dl/api/inference/keras/ModelSaver.kt      | 30 +++++++++++++++----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt
index f4ee80356..78609616b 100644
--- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt
+++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt
@@ -12,10 +12,7 @@ import org.jetbrains.kotlinx.dl.api.core.Sequential
 import org.jetbrains.kotlinx.dl.api.core.activation.Activations
 import org.jetbrains.kotlinx.dl.api.core.initializer.*
 import org.jetbrains.kotlinx.dl.api.core.layer.Layer
-import org.jetbrains.kotlinx.dl.api.core.layer.activation.PReLU
-import org.jetbrains.kotlinx.dl.api.core.layer.activation.LeakyReLU
-import org.jetbrains.kotlinx.dl.api.core.layer.activation.Softmax
-import org.jetbrains.kotlinx.dl.api.core.layer.activation.ThresholdedReLU
+import org.jetbrains.kotlinx.dl.api.core.layer.activation.*
 import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.*
 import org.jetbrains.kotlinx.dl.api.core.layer.core.ActivationLayer
 import org.jetbrains.kotlinx.dl.api.core.layer.core.Dense
@@ -28,6 +25,7 @@ import org.jetbrains.kotlinx.dl.api.core.layer.reshaping.ZeroPadding2D
 import org.jetbrains.kotlinx.dl.api.core.regularizer.L2L1
 import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
 import org.jetbrains.kotlinx.dl.api.inference.keras.config.*
+import org.tensorflow.op.nn.Elu
 import java.io.File
 
 /**
@@ -86,6 +84,8 @@ private fun convertToKerasLayer(layer: Layer, isKerasFullyCompatible: Boolean, i
         is Input -> createKerasInput(layer)
         is BatchNorm -> createKerasBatchNorm(layer, isKerasFullyCompatible)
         is ActivationLayer -> createKerasActivationLayer(layer)
+        is ELU -> createKerasELU(layer)
+        is ReLU -> createKerasReLU(layer)
         is PReLU -> createKerasPReLULayer(layer, isKerasFullyCompatible)
         is LeakyReLU -> createKerasLeakyReLU(layer)
         is ThresholdedReLU -> createKerasThresholdedReLULayer(layer)
@@ -221,6 +221,26 @@ private fun createKerasActivationLayer(layer: ActivationLayer): KerasLayer {
     return KerasLayer(class_name = LAYER_ACTIVATION, config = configX)
 }
 
+private fun createKerasReLU(layer: ReLU): KerasLayer {
+    val configX = LayerConfig(
+        dtype = DATATYPE_FLOAT32,
+        max_value = layer.maxValue?.toDouble(),
+        negative_slope = layer.negativeSlope.toDouble(),
+        threshold = layer.threshold.toDouble(),
+        name = layer.name
+    )
+    return KerasLayer(class_name = LAYER_RELU, config = configX)
+}
+
+private fun createKerasELU(layer: ELU): KerasLayer {
+    val configX = LayerConfig(
+        dtype = DATATYPE_FLOAT32,
+        alpha = layer.alpha.toDouble(),
+        name = layer.name
+    )
+    return KerasLayer(class_name = LAYER_ELU, config = configX)
+}
+
 private fun createKerasPReLULayer(layer: PReLU, isKerasFullyCompatible: Boolean): KerasLayer {
     val configX = LayerConfig(
         dtype = DATATYPE_FLOAT32,
@@ -604,4 +624,4 @@ private fun createKerasZeroPadding2D(layer: ZeroPadding2D): KerasLayer {
         padding = KerasPadding.ZeroPadding2D(layer.padding)
     )
     return KerasLayer(class_name = LAYER_ZERO_PADDING_2D, config = configX)
-}
+}
\ No newline at end of file

From 6347d656d898fde3d2084e9b9489f6cddf1ed150 Mon Sep 17 00:00:00 2001
From: Stan van der Bend <stanvdbend@gmail.com>
Date: Mon, 14 Jun 2021 21:23:56 +0200
Subject: [PATCH 2/3] Reverted changes to the imports

---
 .../kotlinx/dl/api/inference/keras/ModelSaver.kt          | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt
index 78609616b..9e9adcd7f 100644
--- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt
+++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt
@@ -12,7 +12,12 @@ import org.jetbrains.kotlinx.dl.api.core.Sequential
 import org.jetbrains.kotlinx.dl.api.core.activation.Activations
 import org.jetbrains.kotlinx.dl.api.core.initializer.*
 import org.jetbrains.kotlinx.dl.api.core.layer.Layer
-import org.jetbrains.kotlinx.dl.api.core.layer.activation.*
+import org.jetbrains.kotlinx.dl.api.core.layer.activation.ELU
+import org.jetbrains.kotlinx.dl.api.core.layer.activation.ReLU
+import org.jetbrains.kotlinx.dl.api.core.layer.activation.PReLU
+import org.jetbrains.kotlinx.dl.api.core.layer.activation.LeakyReLU
+import org.jetbrains.kotlinx.dl.api.core.layer.activation.Softmax
+import org.jetbrains.kotlinx.dl.api.core.layer.activation.ThresholdedReLU
 import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.*
 import org.jetbrains.kotlinx.dl.api.core.layer.core.ActivationLayer
 import org.jetbrains.kotlinx.dl.api.core.layer.core.Dense
@@ -25,7 +30,6 @@ import org.jetbrains.kotlinx.dl.api.core.layer.reshaping.ZeroPadding2D
 import org.jetbrains.kotlinx.dl.api.core.regularizer.L2L1
 import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
 import org.jetbrains.kotlinx.dl.api.inference.keras.config.*
-import org.tensorflow.op.nn.Elu
 import java.io.File
 
 /**

From bd6af53f16312815920aeaa0444114638a721add Mon Sep 17 00:00:00 2001
From: Stan van der Bend <stanvdbend@gmail.com>
Date: Thu, 8 Jul 2021 04:51:50 +0200
Subject: [PATCH 3/3] WIP Added AbstractSeparableConv.kt (see desc.) #125

- still have to write tests, and do the 1D variant
- this also fixed a bug in WeightLoader that did not load the bias weights correctly for SeparableConv2D.kt
---
 .../convolutional/AbstractSeparableConv.kt    | 207 ++++++++++++++++++
 .../layer/convolutional/SeparableConv2D.kt    | 179 ++++-----------
 .../dl/api/core/util/nameConventions.kt       |   6 +-
 .../dl/api/inference/keras/WeightLoader.kt    |  16 +-
 4 files changed, 258 insertions(+), 150 deletions(-)
 create mode 100644 api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/AbstractSeparableConv.kt

diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/AbstractSeparableConv.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/AbstractSeparableConv.kt
new file mode 100644
index 000000000..145786397
--- /dev/null
+++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/AbstractSeparableConv.kt
@@ -0,0 +1,207 @@
+package org.jetbrains.kotlinx.dl.api.core.layer.convolutional
+
+import org.jetbrains.kotlinx.dl.api.core.KGraph
+import org.jetbrains.kotlinx.dl.api.core.activation.Activations
+import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
+import org.jetbrains.kotlinx.dl.api.core.layer.Layer
+import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
+import org.jetbrains.kotlinx.dl.api.core.shape.TensorShape
+import org.jetbrains.kotlinx.dl.api.core.shape.numElements
+import org.jetbrains.kotlinx.dl.api.core.shape.shapeFromDims
+import org.jetbrains.kotlinx.dl.api.core.util.getDType
+import org.tensorflow.Operand
+import org.tensorflow.Shape
+import org.tensorflow.op.Ops
+import org.tensorflow.op.core.Variable
+import kotlin.math.roundToInt
+
+/**
+ * Abstract N-D convolution layer with separable filters.
+ *
+ * This layer performs a depthwise convolution that acts separately on
+ * channels, followed by a pointwise convolution that mixes channels.
+ *
+ * If [useBiasInternal] is `true` and a [biasInitializerInternal] is provided,
+ * it adds a bias vector to the output.
+ * It then optionally applies an activation function to produce the final output.
+ *
+ * Note: layer attributes cannot be modified after the layer has been called once (except the `trainable` attribute).
+ *
+ * TODO: add rank for getting the channel axis?
+ * TODO: add docs for params?
+ * TODO: add trainable param?
+ */
+public abstract class AbstractSeparableConv(
+    protected val filtersInternal: Long,
+    protected val kernelSizeInternal: LongArray,
+    protected val stridesInternal: LongArray,
+    protected val dilationsInternal: LongArray,
+    protected val depthMulitplierInternal: Int = 1,
+    protected val activationInternal: Activations,
+    protected val depthwiseInitializerInternal: Initializer,
+    protected val pointwiseInitializerInternal: Initializer,
+    protected val biasInitializerInternal: Initializer,
+    protected val depthwiseRegularizerInternal: Regularizer?,
+    protected val pointwiseRegularizerInternal: Regularizer?,
+    protected val biasRegularizerInternal: Regularizer?,
+    protected val useBiasInternal: Boolean,
+    protected val depthwiseKernelVariableName: String,
+    protected val pointwiseKernelVariableName: String,
+    protected val biasVariableName: String,
+    name: String
+) : Layer(
+    name
+) {
+
+    init {
+        require(dilationsInternal.any { it != 1L } && stridesInternal.any { it != 1L }) {
+            "Specifying any dilations value != 1 is incompatible with specifying any stride value != 1"
+        }
+    }
+
+    /** Returns the shape of kernel weights. */
+    public val depthwiseShapeArray: LongArray get() = TensorShape(depthwiseKernelShape).dims()
+
+    /** Returns the shape of kernel weights. */
+    public val pointwiseShapeArray: LongArray get() = TensorShape(pointwiseKernelShape).dims()
+
+    /** Returns the shape of bias weights. */
+    public val biasShapeArray: LongArray get() = TensorShape(biasShape).dims()
+
+    override val hasActivation: Boolean get() = true
+
+    override val paramCount: Int
+        get() = (depthwiseKernelShape.numElements() + pointwiseKernelShape.numElements() + biasShape.numElements()).toInt()
+
+    override var weights: Map<String, Array<*>>
+        get() = extractDepthConvWeights()
+        set(value) = assignWeights(value)
+
+    // weight tensors
+    protected lateinit var depthwiseKernel: Variable<Float>
+    protected lateinit var pointwiseKernel: Variable<Float>
+    protected var bias: Variable<Float>? = null
+
+    // weight tensor shapes
+    protected lateinit var depthwiseKernelShape: Shape
+    protected lateinit var pointwiseKernelShape: Shape
+    protected lateinit var biasShape: Shape
+
+    override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape) {
+
+        // Amount of channels should be the last value in the inputShape
+        val numberOfChannels = inputShape.size(inputShape.numDimensions() - 1)
+
+        // Compute shapes of kernel and bias matrices
+        computeMatricesShapes(numberOfChannels)
+
+        // should be calculated before addWeight because it's used in calculation,
+        // need to rewrite addWeight to avoid strange behaviour calculate fanIn, fanOut
+        val inputDepth = getInputDepth(numberOfChannels) // number of input channels
+        val outputDepth = getOutputDepth(numberOfChannels) // number of output channels
+
+        fanIn = (inputDepth * multiply(*kernelSizeInternal)).toInt()
+        fanOut = ((outputDepth * multiply(*kernelSizeInternal)).toDouble() /
+                multiply(*stridesInternal).toDouble()).roundToInt()
+
+        val (depthwiseKernelVariableName, pointwiseKernelVariableName, biasVariableName) = defineVariableNames()
+
+        createSeparableConvVariables(
+            tf,
+            depthwiseKernelVariableName,
+            pointwiseKernelVariableName,
+            biasVariableName,
+            kGraph
+        )
+    }
+
+    override fun computeOutputShape(inputShape: Shape): Shape {
+        val shape = defineOutputShape(inputShape)
+        outputShape = TensorShape(shape)
+        return shape
+    }
+
+    override fun forward(
+        tf: Ops,
+        input: Operand<Float>,
+        isTraining: Operand<Boolean>,
+        numberOfLosses: Operand<Float>?
+    ): Operand<Float> {
+        var output = separableConvImplementation(tf, input)
+
+        if (useBiasInternal) {
+            output = tf.nn.biasAdd(output, bias)
+        }
+
+        return Activations.convert(activationInternal).apply(tf, output, name)
+    }
+
+    private fun defineVariableNames(): Triple<String, String, String> {
+        return if (name.isNotEmpty()) {
+            Triple(
+                depthwiseKernalVarName(name),
+                pointwiseKernelVarName(name),
+                biasVarName(name)
+            )
+        } else {
+            Triple(depthwiseKernelVariableName, pointwiseKernelVariableName, biasVariableName)
+        }
+    }
+
+    private fun createSeparableConvVariables(
+        tf: Ops,
+        depthwiseKernelVariableName: String,
+        pointwiseKernelVariableName: String,
+        biasVariableName: String,
+        kGraph: KGraph
+    ) {
+        depthwiseKernel = tf.withName(depthwiseKernelVariableName).variable(depthwiseKernelShape, getDType())
+        pointwiseKernel = tf.withName(pointwiseKernelVariableName).variable(pointwiseKernelShape, getDType())
+        if (useBiasInternal) bias = tf.withName(biasVariableName).variable(biasShape, getDType())
+
+        depthwiseKernel = addWeight(
+            tf,
+            kGraph,
+            depthwiseKernelVariableName,
+            depthwiseKernel,
+            depthwiseInitializerInternal,
+            depthwiseRegularizerInternal
+        )
+        pointwiseKernel = addWeight(
+            tf,
+            kGraph,
+            pointwiseKernelVariableName,
+            pointwiseKernel,
+            pointwiseInitializerInternal,
+            pointwiseRegularizerInternal
+        )
+        if (useBiasInternal)
+            bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializerInternal, biasRegularizerInternal)
+    }
+
+    protected open fun getInputDepth(numberOfChannels: Long): Long = numberOfChannels
+
+    protected open fun getOutputDepth(numberOfChannels: Long): Long = filtersInternal
+
+    private fun computeMatricesShapes(numberOfChannels: Long) {
+        depthwiseKernelShape = shapeFromDims(*kernelSizeInternal, numberOfChannels, depthMulitplierInternal.toLong())
+        pointwiseKernelShape = shapeFromDims(1, 1, numberOfChannels * depthMulitplierInternal, filtersInternal)
+        biasShape = Shape.make(filtersInternal)
+    }
+
+    private fun extractDepthConvWeights(): Map<String, Array<*>> {
+        return extractWeights(defineVariableNames().toList())
+    }
+
+    protected abstract fun depthwiseKernalVarName(name: String): String
+
+    protected abstract fun pointwiseKernelVarName(name: String): String
+
+    protected abstract fun biasVarName(name: String): String
+
+    protected abstract fun separableConvImplementation(tf: Ops, input: Operand<Float>): Operand<Float>
+
+    protected abstract fun defineOutputShape(inputShape: Shape): Shape
+}
+
+private fun multiply(vararg values: Long) = values.fold(1L, Long::times)
diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/SeparableConv2D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/SeparableConv2D.kt
index c0872e09d..ef1ac0dbf 100644
--- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/SeparableConv2D.kt
+++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/SeparableConv2D.kt
@@ -5,31 +5,23 @@
 
 package org.jetbrains.kotlinx.dl.api.core.layer.convolutional
 
-import org.jetbrains.kotlinx.dl.api.core.KGraph
 import org.jetbrains.kotlinx.dl.api.core.activation.Activations
 import org.jetbrains.kotlinx.dl.api.core.initializer.HeNormal
 import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform
 import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer
-import org.jetbrains.kotlinx.dl.api.core.layer.Layer
 import org.jetbrains.kotlinx.dl.api.core.layer.NoGradients
 import org.jetbrains.kotlinx.dl.api.core.layer.requireArraySize
 import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer
-import org.jetbrains.kotlinx.dl.api.core.shape.TensorShape
 import org.jetbrains.kotlinx.dl.api.core.shape.convOutputLength
-import org.jetbrains.kotlinx.dl.api.core.shape.numElements
-import org.jetbrains.kotlinx.dl.api.core.shape.shapeFromDims
-import org.jetbrains.kotlinx.dl.api.core.util.getDType
-import org.jetbrains.kotlinx.dl.api.core.util.separableConv2dBiasVarName
-import org.jetbrains.kotlinx.dl.api.core.util.separableConv2dDepthwiseKernelVarName
-import org.jetbrains.kotlinx.dl.api.core.util.separableConv2dPointwiseKernelVarName
+import org.jetbrains.kotlinx.dl.api.core.util.separableConvBiasVarName
+import org.jetbrains.kotlinx.dl.api.core.util.separableConvDepthwiseKernelVarName
+import org.jetbrains.kotlinx.dl.api.core.util.separableConvPointwiseKernelVarName
 import org.tensorflow.Operand
 import org.tensorflow.Shape
 import org.tensorflow.op.Ops
-import org.tensorflow.op.core.Variable
 import org.tensorflow.op.nn.Conv2d
 import org.tensorflow.op.nn.DepthwiseConv2dNative
 import org.tensorflow.op.nn.DepthwiseConv2dNative.dilations
-import kotlin.math.roundToInt
 
 private const val DEPTHWISE_KERNEL_VARIABLE_NAME = "separable_conv2d_depthwise_kernel"
 private const val POINTWISE_KERNEL_VARIABLE_NAME = "separable_conv2d_pointwise_kernel"
@@ -84,117 +76,33 @@ public class SeparableConv2D(
     public val padding: ConvPadding = ConvPadding.SAME,
     public val useBias: Boolean = true,
     name: String = ""
-) : Layer(name), NoGradients {
-    // weight tensors
-    private lateinit var depthwiseKernel: Variable<Float>
-    private lateinit var pointwiseKernel: Variable<Float>
-    private var bias: Variable<Float>? = null
-
-    // weight tensor shapes
-    private lateinit var depthwiseKernelShape: Shape
-    private lateinit var pointwiseKernelShape: Shape
-    private lateinit var biasShape: Shape
+) : AbstractSeparableConv(
+    filtersInternal = filters,
+    kernelSizeInternal = kernelSize,
+    stridesInternal = strides,
+    dilationsInternal = dilations,
+    depthMulitplierInternal = depthMultiplier,
+    activationInternal = activation,
+    depthwiseInitializerInternal = depthwiseInitializer,
+    pointwiseInitializerInternal = pointwiseInitializer,
+    biasInitializerInternal = biasInitializer,
+    depthwiseRegularizerInternal = depthwiseRegularizer,
+    pointwiseRegularizerInternal = pointwiseRegularizer,
+    biasRegularizerInternal = biasRegularizer,
+    useBiasInternal = useBias,
+    depthwiseKernelVariableName = DEPTHWISE_KERNEL_VARIABLE_NAME,
+    pointwiseKernelVariableName = POINTWISE_KERNEL_VARIABLE_NAME,
+    biasVariableName = BIAS_VARIABLE_NAME,
+    name
+), NoGradients {
 
     init {
         requireArraySize(kernelSize, 2, "kernelSize")
         requireArraySize(strides, 4, "strides")
         requireArraySize(dilations, 4, "dilations")
-        isTrainable = false
-    }
-
-    override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape) {
-        // Amount of channels should be the last value in the inputShape (make warning here)
-        val numberOfChannels = inputShape.size(inputShape.numDimensions() - 1)
-
-        // Compute shapes of kernel and bias matrices
-        depthwiseKernelShape = shapeFromDims(*kernelSize, numberOfChannels, this.depthMultiplier.toLong())
-        pointwiseKernelShape = shapeFromDims(1, 1, numberOfChannels * this.depthMultiplier, filters)
-        biasShape = Shape.make(filters)
-
-        // should be calculated before addWeight because it's used in calculation, need to rewrite addWEight to avoid strange behaviour
-        // calculate fanIn, fanOut
-        val inputDepth = numberOfChannels // amount of channels
-        val outputDepth = numberOfChannels * this.depthMultiplier // amount of channels for the next layer
-
-        fanIn = (inputDepth * kernelSize[0] * kernelSize[1]).toInt()
-        fanOut = ((outputDepth * kernelSize[0] * kernelSize[1] / (strides[0].toDouble() * strides[1])).roundToInt())
-
-        val (depthwiseKernelVariableName, pointwiseKernelVariableName, biasVariableName) = defineVariableNames()
-
-        createSeparableConv2DVariables(
-            tf,
-            depthwiseKernelVariableName,
-            pointwiseKernelVariableName,
-            biasVariableName,
-            kGraph
-        )
-    }
-
-    private fun defineVariableNames(): Triple<String, String, String> {
-        return if (name.isNotEmpty()) {
-            Triple(
-                separableConv2dDepthwiseKernelVarName(name),
-                separableConv2dPointwiseKernelVarName(name),
-                separableConv2dBiasVarName(name)
-            )
-        } else {
-            Triple(DEPTHWISE_KERNEL_VARIABLE_NAME, POINTWISE_KERNEL_VARIABLE_NAME, BIAS_VARIABLE_NAME)
-        }
-    }
-
-    private fun createSeparableConv2DVariables(
-        tf: Ops,
-        depthwiseKernelVariableName: String,
-        pointwiseKernelVariableName: String,
-        biasVariableName: String,
-        kGraph: KGraph
-    ) {
-        depthwiseKernel = tf.withName(depthwiseKernelVariableName).variable(depthwiseKernelShape, getDType())
-        pointwiseKernel = tf.withName(pointwiseKernelVariableName).variable(pointwiseKernelShape, getDType())
-        if (useBias) bias = tf.withName(biasVariableName).variable(biasShape, getDType())
-
-        depthwiseKernel = addWeight(
-            tf,
-            kGraph,
-            depthwiseKernelVariableName,
-            depthwiseKernel,
-            depthwiseInitializer,
-            depthwiseRegularizer
-        )
-        pointwiseKernel = addWeight(
-            tf,
-            kGraph,
-            pointwiseKernelVariableName,
-            pointwiseKernel,
-            pointwiseInitializer,
-            pointwiseRegularizer
-        )
-        if (useBias) bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializer, biasRegularizer)
-    }
-
-    override fun computeOutputShape(inputShape: Shape): Shape {
-        var rows = inputShape.size(1)
-        var cols = inputShape.size(2)
-        rows = convOutputLength(
-            rows, kernelSize[0].toInt(), padding,
-            strides[1].toInt(), dilations[1].toInt()
-        )
-        cols = convOutputLength(
-            cols, kernelSize[1].toInt(), padding,
-            strides[2].toInt(), dilations[2].toInt()
-        )
-
-        val shape = Shape.make(inputShape.size(0), rows, cols, filters)
-        outputShape = TensorShape(shape)
-        return shape
     }
 
-    override fun forward(
-        tf: Ops,
-        input: Operand<Float>,
-        isTraining: Operand<Boolean>,
-        numberOfLosses: Operand<Float>?
-    ): Operand<Float> {
+    override fun separableConvImplementation(tf: Ops, input: Operand<Float>): Operand<Float> {
         val paddingName = padding.paddingName
         val depthwiseConv2DOptions: DepthwiseConv2dNative.Options = dilations(dilations.toList()).dataFormat("NHWC")
 
@@ -210,37 +118,30 @@ public class SeparableConv2D(
         val pointwiseStrides = mutableListOf(1L, 1L, 1L, 1L)
 
         val conv2DOptions: Conv2d.Options = Conv2d.dataFormat("NHWC")
-        var output: Operand<Float> =
-            tf.nn.conv2d(depthwiseOutput, pointwiseKernel, pointwiseStrides, "VALID", conv2DOptions)
-
-        if (useBias) {
-            output = tf.nn.biasAdd(output, bias)
-        }
-
-        return Activations.convert(activation).apply(tf, output, name)
+        return tf.nn.conv2d(depthwiseOutput, pointwiseKernel, pointwiseStrides, "VALID", conv2DOptions)
     }
 
-    override var weights: Map<String, Array<*>>
-        get() = extractDepthConv2DWeights()
-        set(value) = assignWeights(value)
-
-    private fun extractDepthConv2DWeights(): Map<String, Array<*>> {
-        return extractWeights(defineVariableNames().toList())
-    }
+    override fun defineOutputShape(inputShape: Shape): Shape {
+        var rows = inputShape.size(1)
+        var cols = inputShape.size(2)
 
-    /** Returns the shape of kernel weights. */
-    public val depthwiseShapeArray: LongArray get() = TensorShape(depthwiseKernelShape).dims()
+        rows = convOutputLength(
+            rows, kernelSize[0].toInt(), padding,
+            strides[1].toInt(), dilations[1].toInt()
+        )
+        cols = convOutputLength(
+            cols, kernelSize[1].toInt(), padding,
+            strides[2].toInt(), dilations[2].toInt()
+        )
 
-    /** Returns the shape of kernel weights. */
-    public val pointwiseShapeArray: LongArray get() = TensorShape(pointwiseKernelShape).dims()
+        return Shape.make(inputShape.size(0), rows, cols, filters)
+    }
 
-    /** Returns the shape of bias weights. */
-    public val biasShapeArray: LongArray get() = TensorShape(biasShape).dims()
+    override fun depthwiseKernalVarName(name: String): String = separableConvDepthwiseKernelVarName(name, dim = 2)
 
-    override val hasActivation: Boolean get() = true
+    override fun pointwiseKernelVarName(name: String): String = separableConvPointwiseKernelVarName(name, dim = 2)
 
-    override val paramCount: Int
-        get() = (depthwiseKernelShape.numElements() + pointwiseKernelShape.numElements() + biasShape.numElements()).toInt()
+    override fun biasVarName(name: String): String = separableConvBiasVarName(name, dim = 2)
 
     override fun toString(): String =
         "SeparableConv2D(kernelSize=${kernelSize.contentToString()}, strides=${strides.contentToString()}, " +
diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/util/nameConventions.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/util/nameConventions.kt
index 1366f8dae..94fd78ade 100644
--- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/util/nameConventions.kt
+++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/util/nameConventions.kt
@@ -32,13 +32,13 @@ internal fun depthwiseConv2dBiasVarName(name: String) = name + "_" + "depthwise_
 internal fun depthwiseConv2dKernelVarName(name: String) = name + "_" + "depthwise_conv2d_kernel"
 
 /** Default SeparableConv2d bias variable name in TensorFlow graph, based on variable's name. */
-internal fun separableConv2dBiasVarName(name: String) = name + "_" + "separable_conv2d_bias"
+internal fun separableConvBiasVarName(name: String, dim: Int) = name + "_" + "separable_conv2d_bias"
 
 /** Default SeparableConv2d depthwise kernel variable name in TensorFlow graph, based on variable's name. */
-internal fun separableConv2dDepthwiseKernelVarName(name: String) = name + "_" + "separable_conv2d_depthwise_kernel"
+internal fun separableConvDepthwiseKernelVarName(name: String, dim: Int) = name + "_" + "separable_conv2d_depthwise_kernel"
 
 /** Default SeparableConv2d pointwise kernel variable name in TensorFlow graph, based on variable's name. */
-internal fun separableConv2dPointwiseKernelVarName(name: String) = name + "_" + "separable_conv2d_pointwise_kernel"
+internal fun separableConvPointwiseKernelVarName(name: String, dim: Int) = name + "_" + "separable_conv2d_pointwise_kernel"
 
 /** Default Dense bias variable name in TensorFlow graph, based on variable's name. */
 internal fun denseBiasVarName(name: String) = name + "_" + "dense_bias"
diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/WeightLoader.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/WeightLoader.kt
index 7ab6ed91e..9d67a3f3d 100644
--- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/WeightLoader.kt
+++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/WeightLoader.kt
@@ -281,7 +281,7 @@ private fun fillSeparableConv2DVariablesFromKeras(
         val data = it.data
         when (it.name) {
             "depthwise_kernel:0" -> {
-                val kernelVariableName = separableConv2dDepthwiseKernelVarName(layerName)
+                val kernelVariableName = separableConvDepthwiseKernelVarName(layerName, dim = 2)
                 val kernelShape = (model.getLayer(layerName) as SeparableConv2D).depthwiseShapeArray
                 require(
                     kernelShape.map { e -> e.toInt() }.toIntArray().contentEquals(dims)
@@ -289,7 +289,7 @@ private fun fillSeparableConv2DVariablesFromKeras(
                 model.fillVariable(kernelVariableName, data)
             }
             "pointwise_kernel:0" -> {
-                val kernelVariableName = separableConv2dPointwiseKernelVarName(layerName)
+                val kernelVariableName = separableConvPointwiseKernelVarName(layerName, dim = 2)
                 val kernelShape = (model.getLayer(layerName) as SeparableConv2D).pointwiseShapeArray
                 require(
                     kernelShape.map { e -> e.toInt() }.toIntArray().contentEquals(dims)
@@ -297,7 +297,7 @@ private fun fillSeparableConv2DVariablesFromKeras(
                 model.fillVariable(kernelVariableName, data)
             }
             "depthwise_bias:0" -> {
-                val biasVariableName = separableConv2dBiasVarName(layerName)
+                val biasVariableName = separableConvBiasVarName(layerName, dim = 2)
                 val biasShape = (model.getLayer(layerName) as SeparableConv2D).biasShapeArray
                 require(
                     biasShape.map { e -> e.toInt() }.toIntArray().contentEquals(dims)
@@ -590,9 +590,9 @@ private fun initDepthwiseConv2DVariablesByDefaultInitializer(name: String, model
 }
 
 private fun initSeparableConv2DVariablesByDefaultInitializer(name: String, model: GraphTrainableModel) {
-    val depthwiseKernelVariableName = separableConv2dDepthwiseKernelVarName(name)
-    val pointwiseKernelVariableName = separableConv2dPointwiseKernelVarName(name)
-    val biasVariableName = depthwiseConv2dBiasVarName(name)
+    val depthwiseKernelVariableName = separableConvDepthwiseKernelVarName(name, dim = 2)
+    val pointwiseKernelVariableName = separableConvPointwiseKernelVarName(name, dim = 2)
+    val biasVariableName = separableConvBiasVarName(name, dim = 2)
     model.runAssignOpByVarName(depthwiseKernelVariableName)
     model.runAssignOpByVarName(pointwiseKernelVariableName)
     model.runAssignOpByVarName(biasVariableName)
@@ -786,11 +786,11 @@ private fun fillSeparableConv2DVariables(
 
     layerPaths as LayerConvOrDensePaths
     val depthwiseKernelData = hdfFile.getDatasetByPath(depthwiseKernelDataPathTemplate.format(name, name)).data
-    val depthwiseKernelVariableName = separableConv2dDepthwiseKernelVarName(name)
+    val depthwiseKernelVariableName = separableConvDepthwiseKernelVarName(name, dim = 2)
     model.fillVariable(depthwiseKernelVariableName, depthwiseKernelData)
 
     val pointwiseKernelData = hdfFile.getDatasetByPath(pointwiseKernelDataPathTemplate.format(name, name)).data
-    val pointwiseKernelVariableName = separableConv2dPointwiseKernelVarName(name)
+    val pointwiseKernelVariableName = separableConvPointwiseKernelVarName(name, dim = 2)
     model.fillVariable(pointwiseKernelVariableName, pointwiseKernelData)
 
     if (useBias) {