From 78196a88018bad65df0adbf925ffc52862bd6abf Mon Sep 17 00:00:00 2001 From: Stan van der Bend Date: Mon, 14 Jun 2021 21:16:31 +0200 Subject: [PATCH 1/3] Added missing saving functions for ReLU and ELU activation layers (JetBrains#78) --- .../dl/api/inference/keras/ModelSaver.kt | 30 +++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt index f4ee80356..78609616b 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt @@ -12,10 +12,7 @@ import org.jetbrains.kotlinx.dl.api.core.Sequential import org.jetbrains.kotlinx.dl.api.core.activation.Activations import org.jetbrains.kotlinx.dl.api.core.initializer.* import org.jetbrains.kotlinx.dl.api.core.layer.Layer -import org.jetbrains.kotlinx.dl.api.core.layer.activation.PReLU -import org.jetbrains.kotlinx.dl.api.core.layer.activation.LeakyReLU -import org.jetbrains.kotlinx.dl.api.core.layer.activation.Softmax -import org.jetbrains.kotlinx.dl.api.core.layer.activation.ThresholdedReLU +import org.jetbrains.kotlinx.dl.api.core.layer.activation.* import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.* import org.jetbrains.kotlinx.dl.api.core.layer.core.ActivationLayer import org.jetbrains.kotlinx.dl.api.core.layer.core.Dense @@ -28,6 +25,7 @@ import org.jetbrains.kotlinx.dl.api.core.layer.reshaping.ZeroPadding2D import org.jetbrains.kotlinx.dl.api.core.regularizer.L2L1 import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer import org.jetbrains.kotlinx.dl.api.inference.keras.config.* +import org.tensorflow.op.nn.Elu import java.io.File /** @@ -86,6 +84,8 @@ private fun convertToKerasLayer(layer: Layer, isKerasFullyCompatible: Boolean, i is Input -> createKerasInput(layer) is BatchNorm -> createKerasBatchNorm(layer, isKerasFullyCompatible) is ActivationLayer -> createKerasActivationLayer(layer) + is ELU -> createKerasELU(layer) + is ReLU -> createKerasReLU(layer) is PReLU -> createKerasPReLULayer(layer, isKerasFullyCompatible) is LeakyReLU -> createKerasLeakyReLU(layer) is ThresholdedReLU -> createKerasThresholdedReLULayer(layer) @@ -221,6 +221,26 @@ private fun createKerasActivationLayer(layer: ActivationLayer): KerasLayer { return KerasLayer(class_name = LAYER_ACTIVATION, config = configX) } +private fun createKerasReLU(layer: ReLU): KerasLayer { + val configX = LayerConfig( + dtype = DATATYPE_FLOAT32, + max_value = layer.maxValue?.toDouble(), + negative_slope = layer.negativeSlope.toDouble(), + threshold = layer.threshold.toDouble(), + name = layer.name + ) + return KerasLayer(class_name = LAYER_RELU, config = configX) +} + +private fun createKerasELU(layer: ELU): KerasLayer { + val configX = LayerConfig( + dtype = DATATYPE_FLOAT32, + alpha = layer.alpha.toDouble(), + name = layer.name + ) + return KerasLayer(class_name = LAYER_ELU, config = configX) +} + private fun createKerasPReLULayer(layer: PReLU, isKerasFullyCompatible: Boolean): KerasLayer { val configX = LayerConfig( dtype = DATATYPE_FLOAT32, @@ -604,4 +624,4 @@ private fun createKerasZeroPadding2D(layer: ZeroPadding2D): KerasLayer { padding = KerasPadding.ZeroPadding2D(layer.padding) ) return KerasLayer(class_name = LAYER_ZERO_PADDING_2D, config = configX) -} +} \ No newline at end of file From 6347d656d898fde3d2084e9b9489f6cddf1ed150 Mon Sep 17 00:00:00 2001 From: Stan van der Bend Date: Mon, 14 Jun 2021 21:23:56 +0200 Subject: [PATCH 2/3] Reverted changes to the imports --- .../kotlinx/dl/api/inference/keras/ModelSaver.kt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt index 78609616b..9e9adcd7f 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/ModelSaver.kt @@ -12,7 +12,12 @@ import org.jetbrains.kotlinx.dl.api.core.Sequential import org.jetbrains.kotlinx.dl.api.core.activation.Activations import org.jetbrains.kotlinx.dl.api.core.initializer.* import org.jetbrains.kotlinx.dl.api.core.layer.Layer -import org.jetbrains.kotlinx.dl.api.core.layer.activation.* +import org.jetbrains.kotlinx.dl.api.core.layer.activation.ELU +import org.jetbrains.kotlinx.dl.api.core.layer.activation.ReLU +import org.jetbrains.kotlinx.dl.api.core.layer.activation.PReLU +import org.jetbrains.kotlinx.dl.api.core.layer.activation.LeakyReLU +import org.jetbrains.kotlinx.dl.api.core.layer.activation.Softmax +import org.jetbrains.kotlinx.dl.api.core.layer.activation.ThresholdedReLU import org.jetbrains.kotlinx.dl.api.core.layer.convolutional.* import org.jetbrains.kotlinx.dl.api.core.layer.core.ActivationLayer import org.jetbrains.kotlinx.dl.api.core.layer.core.Dense @@ -25,7 +30,6 @@ import org.jetbrains.kotlinx.dl.api.core.layer.reshaping.ZeroPadding2D import org.jetbrains.kotlinx.dl.api.core.regularizer.L2L1 import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer import org.jetbrains.kotlinx.dl.api.inference.keras.config.* -import org.tensorflow.op.nn.Elu import java.io.File /** From bd6af53f16312815920aeaa0444114638a721add Mon Sep 17 00:00:00 2001 From: Stan van der Bend Date: Thu, 8 Jul 2021 04:51:50 +0200 Subject: [PATCH 3/3] WIP Added AbstractSeparableConv.kt (see desc.) #125 - still have to write tests, and do the 1D variant - this also fixed a bug in WeightLoader that did not load the bias weights correctly for SeparableConv2D.kt --- .../convolutional/AbstractSeparableConv.kt | 207 ++++++++++++++++++ .../layer/convolutional/SeparableConv2D.kt | 179 ++++----------- .../dl/api/core/util/nameConventions.kt | 6 +- .../dl/api/inference/keras/WeightLoader.kt | 16 +- 4 files changed, 258 insertions(+), 150 deletions(-) create mode 100644 api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/AbstractSeparableConv.kt diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/AbstractSeparableConv.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/AbstractSeparableConv.kt new file mode 100644 index 000000000..145786397 --- /dev/null +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/AbstractSeparableConv.kt @@ -0,0 +1,207 @@ +package org.jetbrains.kotlinx.dl.api.core.layer.convolutional + +import org.jetbrains.kotlinx.dl.api.core.KGraph +import org.jetbrains.kotlinx.dl.api.core.activation.Activations +import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer +import org.jetbrains.kotlinx.dl.api.core.layer.Layer +import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer +import org.jetbrains.kotlinx.dl.api.core.shape.TensorShape +import org.jetbrains.kotlinx.dl.api.core.shape.numElements +import org.jetbrains.kotlinx.dl.api.core.shape.shapeFromDims +import org.jetbrains.kotlinx.dl.api.core.util.getDType +import org.tensorflow.Operand +import org.tensorflow.Shape +import org.tensorflow.op.Ops +import org.tensorflow.op.core.Variable +import kotlin.math.roundToInt + +/** + * Abstract N-D convolution layer with separable filters. + * + * This layer performs a depthwise convolution that acts separately on + * channels, followed by a pointwise convolution that mixes channels. + * + * If [useBiasInternal] is `true` and a [biasInitializerInternal] is provided, + * it adds a bias vector to the output. + * It then optionally applies an activation function to produce the final output. + * + * Note: layer attributes cannot be modified after the layer has been called once (except the `trainable` attribute). + * + * TODO: add rank for getting the channel axis? + * TODO: add docs for params? + * TODO: add trainable param? + */ +public abstract class AbstractSeparableConv( + protected val filtersInternal: Long, + protected val kernelSizeInternal: LongArray, + protected val stridesInternal: LongArray, + protected val dilationsInternal: LongArray, + protected val depthMulitplierInternal: Int = 1, + protected val activationInternal: Activations, + protected val depthwiseInitializerInternal: Initializer, + protected val pointwiseInitializerInternal: Initializer, + protected val biasInitializerInternal: Initializer, + protected val depthwiseRegularizerInternal: Regularizer?, + protected val pointwiseRegularizerInternal: Regularizer?, + protected val biasRegularizerInternal: Regularizer?, + protected val useBiasInternal: Boolean, + protected val depthwiseKernelVariableName: String, + protected val pointwiseKernelVariableName: String, + protected val biasVariableName: String, + name: String +) : Layer( + name +) { + + init { + require(dilationsInternal.any { it != 1L } && stridesInternal.any { it != 1L }) { + "Specifying any dilations value != 1 is incompatible with specifying any stride value != 1" + } + } + + /** Returns the shape of kernel weights. */ + public val depthwiseShapeArray: LongArray get() = TensorShape(depthwiseKernelShape).dims() + + /** Returns the shape of kernel weights. */ + public val pointwiseShapeArray: LongArray get() = TensorShape(pointwiseKernelShape).dims() + + /** Returns the shape of bias weights. */ + public val biasShapeArray: LongArray get() = TensorShape(biasShape).dims() + + override val hasActivation: Boolean get() = true + + override val paramCount: Int + get() = (depthwiseKernelShape.numElements() + pointwiseKernelShape.numElements() + biasShape.numElements()).toInt() + + override var weights: Map> + get() = extractDepthConvWeights() + set(value) = assignWeights(value) + + // weight tensors + protected lateinit var depthwiseKernel: Variable + protected lateinit var pointwiseKernel: Variable + protected var bias: Variable? = null + + // weight tensor shapes + protected lateinit var depthwiseKernelShape: Shape + protected lateinit var pointwiseKernelShape: Shape + protected lateinit var biasShape: Shape + + override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape) { + + // Amount of channels should be the last value in the inputShape + val numberOfChannels = inputShape.size(inputShape.numDimensions() - 1) + + // Compute shapes of kernel and bias matrices + computeMatricesShapes(numberOfChannels) + + // should be calculated before addWeight because it's used in calculation, + // need to rewrite addWeight to avoid strange behaviour calculate fanIn, fanOut + val inputDepth = getInputDepth(numberOfChannels) // number of input channels + val outputDepth = getOutputDepth(numberOfChannels) // number of output channels + + fanIn = (inputDepth * multiply(*kernelSizeInternal)).toInt() + fanOut = ((outputDepth * multiply(*kernelSizeInternal)).toDouble() / + multiply(*stridesInternal).toDouble()).roundToInt() + + val (depthwiseKernelVariableName, pointwiseKernelVariableName, biasVariableName) = defineVariableNames() + + createSeparableConvVariables( + tf, + depthwiseKernelVariableName, + pointwiseKernelVariableName, + biasVariableName, + kGraph + ) + } + + override fun computeOutputShape(inputShape: Shape): Shape { + val shape = defineOutputShape(inputShape) + outputShape = TensorShape(shape) + return shape + } + + override fun forward( + tf: Ops, + input: Operand, + isTraining: Operand, + numberOfLosses: Operand? + ): Operand { + var output = separableConvImplementation(tf, input) + + if (useBiasInternal) { + output = tf.nn.biasAdd(output, bias) + } + + return Activations.convert(activationInternal).apply(tf, output, name) + } + + private fun defineVariableNames(): Triple { + return if (name.isNotEmpty()) { + Triple( + depthwiseKernalVarName(name), + pointwiseKernelVarName(name), + biasVarName(name) + ) + } else { + Triple(depthwiseKernelVariableName, pointwiseKernelVariableName, biasVariableName) + } + } + + private fun createSeparableConvVariables( + tf: Ops, + depthwiseKernelVariableName: String, + pointwiseKernelVariableName: String, + biasVariableName: String, + kGraph: KGraph + ) { + depthwiseKernel = tf.withName(depthwiseKernelVariableName).variable(depthwiseKernelShape, getDType()) + pointwiseKernel = tf.withName(pointwiseKernelVariableName).variable(pointwiseKernelShape, getDType()) + if (useBiasInternal) bias = tf.withName(biasVariableName).variable(biasShape, getDType()) + + depthwiseKernel = addWeight( + tf, + kGraph, + depthwiseKernelVariableName, + depthwiseKernel, + depthwiseInitializerInternal, + depthwiseRegularizerInternal + ) + pointwiseKernel = addWeight( + tf, + kGraph, + pointwiseKernelVariableName, + pointwiseKernel, + pointwiseInitializerInternal, + pointwiseRegularizerInternal + ) + if (useBiasInternal) + bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializerInternal, biasRegularizerInternal) + } + + protected open fun getInputDepth(numberOfChannels: Long): Long = numberOfChannels + + protected open fun getOutputDepth(numberOfChannels: Long): Long = filtersInternal + + private fun computeMatricesShapes(numberOfChannels: Long) { + depthwiseKernelShape = shapeFromDims(*kernelSizeInternal, numberOfChannels, depthMulitplierInternal.toLong()) + pointwiseKernelShape = shapeFromDims(1, 1, numberOfChannels * depthMulitplierInternal, filtersInternal) + biasShape = Shape.make(filtersInternal) + } + + private fun extractDepthConvWeights(): Map> { + return extractWeights(defineVariableNames().toList()) + } + + protected abstract fun depthwiseKernalVarName(name: String): String + + protected abstract fun pointwiseKernelVarName(name: String): String + + protected abstract fun biasVarName(name: String): String + + protected abstract fun separableConvImplementation(tf: Ops, input: Operand): Operand + + protected abstract fun defineOutputShape(inputShape: Shape): Shape +} + +private fun multiply(vararg values: Long) = values.fold(1L, Long::times) diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/SeparableConv2D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/SeparableConv2D.kt index c0872e09d..ef1ac0dbf 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/SeparableConv2D.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/SeparableConv2D.kt @@ -5,31 +5,23 @@ package org.jetbrains.kotlinx.dl.api.core.layer.convolutional -import org.jetbrains.kotlinx.dl.api.core.KGraph import org.jetbrains.kotlinx.dl.api.core.activation.Activations import org.jetbrains.kotlinx.dl.api.core.initializer.HeNormal import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer -import org.jetbrains.kotlinx.dl.api.core.layer.Layer import org.jetbrains.kotlinx.dl.api.core.layer.NoGradients import org.jetbrains.kotlinx.dl.api.core.layer.requireArraySize import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer -import org.jetbrains.kotlinx.dl.api.core.shape.TensorShape import org.jetbrains.kotlinx.dl.api.core.shape.convOutputLength -import org.jetbrains.kotlinx.dl.api.core.shape.numElements -import org.jetbrains.kotlinx.dl.api.core.shape.shapeFromDims -import org.jetbrains.kotlinx.dl.api.core.util.getDType -import org.jetbrains.kotlinx.dl.api.core.util.separableConv2dBiasVarName -import org.jetbrains.kotlinx.dl.api.core.util.separableConv2dDepthwiseKernelVarName -import org.jetbrains.kotlinx.dl.api.core.util.separableConv2dPointwiseKernelVarName +import org.jetbrains.kotlinx.dl.api.core.util.separableConvBiasVarName +import org.jetbrains.kotlinx.dl.api.core.util.separableConvDepthwiseKernelVarName +import org.jetbrains.kotlinx.dl.api.core.util.separableConvPointwiseKernelVarName import org.tensorflow.Operand import org.tensorflow.Shape import org.tensorflow.op.Ops -import org.tensorflow.op.core.Variable import org.tensorflow.op.nn.Conv2d import org.tensorflow.op.nn.DepthwiseConv2dNative import org.tensorflow.op.nn.DepthwiseConv2dNative.dilations -import kotlin.math.roundToInt private const val DEPTHWISE_KERNEL_VARIABLE_NAME = "separable_conv2d_depthwise_kernel" private const val POINTWISE_KERNEL_VARIABLE_NAME = "separable_conv2d_pointwise_kernel" @@ -84,117 +76,33 @@ public class SeparableConv2D( public val padding: ConvPadding = ConvPadding.SAME, public val useBias: Boolean = true, name: String = "" -) : Layer(name), NoGradients { - // weight tensors - private lateinit var depthwiseKernel: Variable - private lateinit var pointwiseKernel: Variable - private var bias: Variable? = null - - // weight tensor shapes - private lateinit var depthwiseKernelShape: Shape - private lateinit var pointwiseKernelShape: Shape - private lateinit var biasShape: Shape +) : AbstractSeparableConv( + filtersInternal = filters, + kernelSizeInternal = kernelSize, + stridesInternal = strides, + dilationsInternal = dilations, + depthMulitplierInternal = depthMultiplier, + activationInternal = activation, + depthwiseInitializerInternal = depthwiseInitializer, + pointwiseInitializerInternal = pointwiseInitializer, + biasInitializerInternal = biasInitializer, + depthwiseRegularizerInternal = depthwiseRegularizer, + pointwiseRegularizerInternal = pointwiseRegularizer, + biasRegularizerInternal = biasRegularizer, + useBiasInternal = useBias, + depthwiseKernelVariableName = DEPTHWISE_KERNEL_VARIABLE_NAME, + pointwiseKernelVariableName = POINTWISE_KERNEL_VARIABLE_NAME, + biasVariableName = BIAS_VARIABLE_NAME, + name +), NoGradients { init { requireArraySize(kernelSize, 2, "kernelSize") requireArraySize(strides, 4, "strides") requireArraySize(dilations, 4, "dilations") - isTrainable = false - } - - override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape) { - // Amount of channels should be the last value in the inputShape (make warning here) - val numberOfChannels = inputShape.size(inputShape.numDimensions() - 1) - - // Compute shapes of kernel and bias matrices - depthwiseKernelShape = shapeFromDims(*kernelSize, numberOfChannels, this.depthMultiplier.toLong()) - pointwiseKernelShape = shapeFromDims(1, 1, numberOfChannels * this.depthMultiplier, filters) - biasShape = Shape.make(filters) - - // should be calculated before addWeight because it's used in calculation, need to rewrite addWEight to avoid strange behaviour - // calculate fanIn, fanOut - val inputDepth = numberOfChannels // amount of channels - val outputDepth = numberOfChannels * this.depthMultiplier // amount of channels for the next layer - - fanIn = (inputDepth * kernelSize[0] * kernelSize[1]).toInt() - fanOut = ((outputDepth * kernelSize[0] * kernelSize[1] / (strides[0].toDouble() * strides[1])).roundToInt()) - - val (depthwiseKernelVariableName, pointwiseKernelVariableName, biasVariableName) = defineVariableNames() - - createSeparableConv2DVariables( - tf, - depthwiseKernelVariableName, - pointwiseKernelVariableName, - biasVariableName, - kGraph - ) - } - - private fun defineVariableNames(): Triple { - return if (name.isNotEmpty()) { - Triple( - separableConv2dDepthwiseKernelVarName(name), - separableConv2dPointwiseKernelVarName(name), - separableConv2dBiasVarName(name) - ) - } else { - Triple(DEPTHWISE_KERNEL_VARIABLE_NAME, POINTWISE_KERNEL_VARIABLE_NAME, BIAS_VARIABLE_NAME) - } - } - - private fun createSeparableConv2DVariables( - tf: Ops, - depthwiseKernelVariableName: String, - pointwiseKernelVariableName: String, - biasVariableName: String, - kGraph: KGraph - ) { - depthwiseKernel = tf.withName(depthwiseKernelVariableName).variable(depthwiseKernelShape, getDType()) - pointwiseKernel = tf.withName(pointwiseKernelVariableName).variable(pointwiseKernelShape, getDType()) - if (useBias) bias = tf.withName(biasVariableName).variable(biasShape, getDType()) - - depthwiseKernel = addWeight( - tf, - kGraph, - depthwiseKernelVariableName, - depthwiseKernel, - depthwiseInitializer, - depthwiseRegularizer - ) - pointwiseKernel = addWeight( - tf, - kGraph, - pointwiseKernelVariableName, - pointwiseKernel, - pointwiseInitializer, - pointwiseRegularizer - ) - if (useBias) bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializer, biasRegularizer) - } - - override fun computeOutputShape(inputShape: Shape): Shape { - var rows = inputShape.size(1) - var cols = inputShape.size(2) - rows = convOutputLength( - rows, kernelSize[0].toInt(), padding, - strides[1].toInt(), dilations[1].toInt() - ) - cols = convOutputLength( - cols, kernelSize[1].toInt(), padding, - strides[2].toInt(), dilations[2].toInt() - ) - - val shape = Shape.make(inputShape.size(0), rows, cols, filters) - outputShape = TensorShape(shape) - return shape } - override fun forward( - tf: Ops, - input: Operand, - isTraining: Operand, - numberOfLosses: Operand? - ): Operand { + override fun separableConvImplementation(tf: Ops, input: Operand): Operand { val paddingName = padding.paddingName val depthwiseConv2DOptions: DepthwiseConv2dNative.Options = dilations(dilations.toList()).dataFormat("NHWC") @@ -210,37 +118,30 @@ public class SeparableConv2D( val pointwiseStrides = mutableListOf(1L, 1L, 1L, 1L) val conv2DOptions: Conv2d.Options = Conv2d.dataFormat("NHWC") - var output: Operand = - tf.nn.conv2d(depthwiseOutput, pointwiseKernel, pointwiseStrides, "VALID", conv2DOptions) - - if (useBias) { - output = tf.nn.biasAdd(output, bias) - } - - return Activations.convert(activation).apply(tf, output, name) + return tf.nn.conv2d(depthwiseOutput, pointwiseKernel, pointwiseStrides, "VALID", conv2DOptions) } - override var weights: Map> - get() = extractDepthConv2DWeights() - set(value) = assignWeights(value) - - private fun extractDepthConv2DWeights(): Map> { - return extractWeights(defineVariableNames().toList()) - } + override fun defineOutputShape(inputShape: Shape): Shape { + var rows = inputShape.size(1) + var cols = inputShape.size(2) - /** Returns the shape of kernel weights. */ - public val depthwiseShapeArray: LongArray get() = TensorShape(depthwiseKernelShape).dims() + rows = convOutputLength( + rows, kernelSize[0].toInt(), padding, + strides[1].toInt(), dilations[1].toInt() + ) + cols = convOutputLength( + cols, kernelSize[1].toInt(), padding, + strides[2].toInt(), dilations[2].toInt() + ) - /** Returns the shape of kernel weights. */ - public val pointwiseShapeArray: LongArray get() = TensorShape(pointwiseKernelShape).dims() + return Shape.make(inputShape.size(0), rows, cols, filters) + } - /** Returns the shape of bias weights. */ - public val biasShapeArray: LongArray get() = TensorShape(biasShape).dims() + override fun depthwiseKernalVarName(name: String): String = separableConvDepthwiseKernelVarName(name, dim = 2) - override val hasActivation: Boolean get() = true + override fun pointwiseKernelVarName(name: String): String = separableConvPointwiseKernelVarName(name, dim = 2) - override val paramCount: Int - get() = (depthwiseKernelShape.numElements() + pointwiseKernelShape.numElements() + biasShape.numElements()).toInt() + override fun biasVarName(name: String): String = separableConvBiasVarName(name, dim = 2) override fun toString(): String = "SeparableConv2D(kernelSize=${kernelSize.contentToString()}, strides=${strides.contentToString()}, " + diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/util/nameConventions.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/util/nameConventions.kt index 1366f8dae..94fd78ade 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/util/nameConventions.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/util/nameConventions.kt @@ -32,13 +32,13 @@ internal fun depthwiseConv2dBiasVarName(name: String) = name + "_" + "depthwise_ internal fun depthwiseConv2dKernelVarName(name: String) = name + "_" + "depthwise_conv2d_kernel" /** Default SeparableConv2d bias variable name in TensorFlow graph, based on variable's name. */ -internal fun separableConv2dBiasVarName(name: String) = name + "_" + "separable_conv2d_bias" +internal fun separableConvBiasVarName(name: String, dim: Int) = name + "_" + "separable_conv2d_bias" /** Default SeparableConv2d depthwise kernel variable name in TensorFlow graph, based on variable's name. */ -internal fun separableConv2dDepthwiseKernelVarName(name: String) = name + "_" + "separable_conv2d_depthwise_kernel" +internal fun separableConvDepthwiseKernelVarName(name: String, dim: Int) = name + "_" + "separable_conv2d_depthwise_kernel" /** Default SeparableConv2d pointwise kernel variable name in TensorFlow graph, based on variable's name. */ -internal fun separableConv2dPointwiseKernelVarName(name: String) = name + "_" + "separable_conv2d_pointwise_kernel" +internal fun separableConvPointwiseKernelVarName(name: String, dim: Int) = name + "_" + "separable_conv2d_pointwise_kernel" /** Default Dense bias variable name in TensorFlow graph, based on variable's name. */ internal fun denseBiasVarName(name: String) = name + "_" + "dense_bias" diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/WeightLoader.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/WeightLoader.kt index 7ab6ed91e..9d67a3f3d 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/WeightLoader.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/WeightLoader.kt @@ -281,7 +281,7 @@ private fun fillSeparableConv2DVariablesFromKeras( val data = it.data when (it.name) { "depthwise_kernel:0" -> { - val kernelVariableName = separableConv2dDepthwiseKernelVarName(layerName) + val kernelVariableName = separableConvDepthwiseKernelVarName(layerName, dim = 2) val kernelShape = (model.getLayer(layerName) as SeparableConv2D).depthwiseShapeArray require( kernelShape.map { e -> e.toInt() }.toIntArray().contentEquals(dims) @@ -289,7 +289,7 @@ private fun fillSeparableConv2DVariablesFromKeras( model.fillVariable(kernelVariableName, data) } "pointwise_kernel:0" -> { - val kernelVariableName = separableConv2dPointwiseKernelVarName(layerName) + val kernelVariableName = separableConvPointwiseKernelVarName(layerName, dim = 2) val kernelShape = (model.getLayer(layerName) as SeparableConv2D).pointwiseShapeArray require( kernelShape.map { e -> e.toInt() }.toIntArray().contentEquals(dims) @@ -297,7 +297,7 @@ private fun fillSeparableConv2DVariablesFromKeras( model.fillVariable(kernelVariableName, data) } "depthwise_bias:0" -> { - val biasVariableName = separableConv2dBiasVarName(layerName) + val biasVariableName = separableConvBiasVarName(layerName, dim = 2) val biasShape = (model.getLayer(layerName) as SeparableConv2D).biasShapeArray require( biasShape.map { e -> e.toInt() }.toIntArray().contentEquals(dims) @@ -590,9 +590,9 @@ private fun initDepthwiseConv2DVariablesByDefaultInitializer(name: String, model } private fun initSeparableConv2DVariablesByDefaultInitializer(name: String, model: GraphTrainableModel) { - val depthwiseKernelVariableName = separableConv2dDepthwiseKernelVarName(name) - val pointwiseKernelVariableName = separableConv2dPointwiseKernelVarName(name) - val biasVariableName = depthwiseConv2dBiasVarName(name) + val depthwiseKernelVariableName = separableConvDepthwiseKernelVarName(name, dim = 2) + val pointwiseKernelVariableName = separableConvPointwiseKernelVarName(name, dim = 2) + val biasVariableName = separableConvBiasVarName(name, dim = 2) model.runAssignOpByVarName(depthwiseKernelVariableName) model.runAssignOpByVarName(pointwiseKernelVariableName) model.runAssignOpByVarName(biasVariableName) @@ -786,11 +786,11 @@ private fun fillSeparableConv2DVariables( layerPaths as LayerConvOrDensePaths val depthwiseKernelData = hdfFile.getDatasetByPath(depthwiseKernelDataPathTemplate.format(name, name)).data - val depthwiseKernelVariableName = separableConv2dDepthwiseKernelVarName(name) + val depthwiseKernelVariableName = separableConvDepthwiseKernelVarName(name, dim = 2) model.fillVariable(depthwiseKernelVariableName, depthwiseKernelData) val pointwiseKernelData = hdfFile.getDatasetByPath(pointwiseKernelDataPathTemplate.format(name, name)).data - val pointwiseKernelVariableName = separableConv2dPointwiseKernelVarName(name) + val pointwiseKernelVariableName = separableConvPointwiseKernelVarName(name, dim = 2) model.fillVariable(pointwiseKernelVariableName, pointwiseKernelData) if (useBias) {