diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/AbstractSeparableConv.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/AbstractSeparableConv.kt new file mode 100644 index 000000000..145786397 --- /dev/null +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/AbstractSeparableConv.kt @@ -0,0 +1,207 @@ +package org.jetbrains.kotlinx.dl.api.core.layer.convolutional + +import org.jetbrains.kotlinx.dl.api.core.KGraph +import org.jetbrains.kotlinx.dl.api.core.activation.Activations +import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer +import org.jetbrains.kotlinx.dl.api.core.layer.Layer +import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer +import org.jetbrains.kotlinx.dl.api.core.shape.TensorShape +import org.jetbrains.kotlinx.dl.api.core.shape.numElements +import org.jetbrains.kotlinx.dl.api.core.shape.shapeFromDims +import org.jetbrains.kotlinx.dl.api.core.util.getDType +import org.tensorflow.Operand +import org.tensorflow.Shape +import org.tensorflow.op.Ops +import org.tensorflow.op.core.Variable +import kotlin.math.roundToInt + +/** + * Abstract N-D convolution layer with separable filters. + * + * This layer performs a depthwise convolution that acts separately on + * channels, followed by a pointwise convolution that mixes channels. + * + * If [useBiasInternal] is `true` and a [biasInitializerInternal] is provided, + * it adds a bias vector to the output. + * It then optionally applies an activation function to produce the final output. + * + * Note: layer attributes cannot be modified after the layer has been called once (except the `trainable` attribute). + * + * TODO: add rank for getting the channel axis? + * TODO: add docs for params? + * TODO: add trainable param? + */ +public abstract class AbstractSeparableConv( + protected val filtersInternal: Long, + protected val kernelSizeInternal: LongArray, + protected val stridesInternal: LongArray, + protected val dilationsInternal: LongArray, + protected val depthMulitplierInternal: Int = 1, + protected val activationInternal: Activations, + protected val depthwiseInitializerInternal: Initializer, + protected val pointwiseInitializerInternal: Initializer, + protected val biasInitializerInternal: Initializer, + protected val depthwiseRegularizerInternal: Regularizer?, + protected val pointwiseRegularizerInternal: Regularizer?, + protected val biasRegularizerInternal: Regularizer?, + protected val useBiasInternal: Boolean, + protected val depthwiseKernelVariableName: String, + protected val pointwiseKernelVariableName: String, + protected val biasVariableName: String, + name: String +) : Layer( + name +) { + + init { + require(dilationsInternal.any { it != 1L } && stridesInternal.any { it != 1L }) { + "Specifying any dilations value != 1 is incompatible with specifying any stride value != 1" + } + } + + /** Returns the shape of kernel weights. */ + public val depthwiseShapeArray: LongArray get() = TensorShape(depthwiseKernelShape).dims() + + /** Returns the shape of kernel weights. */ + public val pointwiseShapeArray: LongArray get() = TensorShape(pointwiseKernelShape).dims() + + /** Returns the shape of bias weights. */ + public val biasShapeArray: LongArray get() = TensorShape(biasShape).dims() + + override val hasActivation: Boolean get() = true + + override val paramCount: Int + get() = (depthwiseKernelShape.numElements() + pointwiseKernelShape.numElements() + biasShape.numElements()).toInt() + + override var weights: Map> + get() = extractDepthConvWeights() + set(value) = assignWeights(value) + + // weight tensors + protected lateinit var depthwiseKernel: Variable + protected lateinit var pointwiseKernel: Variable + protected var bias: Variable? = null + + // weight tensor shapes + protected lateinit var depthwiseKernelShape: Shape + protected lateinit var pointwiseKernelShape: Shape + protected lateinit var biasShape: Shape + + override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape) { + + // Amount of channels should be the last value in the inputShape + val numberOfChannels = inputShape.size(inputShape.numDimensions() - 1) + + // Compute shapes of kernel and bias matrices + computeMatricesShapes(numberOfChannels) + + // should be calculated before addWeight because it's used in calculation, + // need to rewrite addWeight to avoid strange behaviour calculate fanIn, fanOut + val inputDepth = getInputDepth(numberOfChannels) // number of input channels + val outputDepth = getOutputDepth(numberOfChannels) // number of output channels + + fanIn = (inputDepth * multiply(*kernelSizeInternal)).toInt() + fanOut = ((outputDepth * multiply(*kernelSizeInternal)).toDouble() / + multiply(*stridesInternal).toDouble()).roundToInt() + + val (depthwiseKernelVariableName, pointwiseKernelVariableName, biasVariableName) = defineVariableNames() + + createSeparableConvVariables( + tf, + depthwiseKernelVariableName, + pointwiseKernelVariableName, + biasVariableName, + kGraph + ) + } + + override fun computeOutputShape(inputShape: Shape): Shape { + val shape = defineOutputShape(inputShape) + outputShape = TensorShape(shape) + return shape + } + + override fun forward( + tf: Ops, + input: Operand, + isTraining: Operand, + numberOfLosses: Operand? + ): Operand { + var output = separableConvImplementation(tf, input) + + if (useBiasInternal) { + output = tf.nn.biasAdd(output, bias) + } + + return Activations.convert(activationInternal).apply(tf, output, name) + } + + private fun defineVariableNames(): Triple { + return if (name.isNotEmpty()) { + Triple( + depthwiseKernalVarName(name), + pointwiseKernelVarName(name), + biasVarName(name) + ) + } else { + Triple(depthwiseKernelVariableName, pointwiseKernelVariableName, biasVariableName) + } + } + + private fun createSeparableConvVariables( + tf: Ops, + depthwiseKernelVariableName: String, + pointwiseKernelVariableName: String, + biasVariableName: String, + kGraph: KGraph + ) { + depthwiseKernel = tf.withName(depthwiseKernelVariableName).variable(depthwiseKernelShape, getDType()) + pointwiseKernel = tf.withName(pointwiseKernelVariableName).variable(pointwiseKernelShape, getDType()) + if (useBiasInternal) bias = tf.withName(biasVariableName).variable(biasShape, getDType()) + + depthwiseKernel = addWeight( + tf, + kGraph, + depthwiseKernelVariableName, + depthwiseKernel, + depthwiseInitializerInternal, + depthwiseRegularizerInternal + ) + pointwiseKernel = addWeight( + tf, + kGraph, + pointwiseKernelVariableName, + pointwiseKernel, + pointwiseInitializerInternal, + pointwiseRegularizerInternal + ) + if (useBiasInternal) + bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializerInternal, biasRegularizerInternal) + } + + protected open fun getInputDepth(numberOfChannels: Long): Long = numberOfChannels + + protected open fun getOutputDepth(numberOfChannels: Long): Long = filtersInternal + + private fun computeMatricesShapes(numberOfChannels: Long) { + depthwiseKernelShape = shapeFromDims(*kernelSizeInternal, numberOfChannels, depthMulitplierInternal.toLong()) + pointwiseKernelShape = shapeFromDims(1, 1, numberOfChannels * depthMulitplierInternal, filtersInternal) + biasShape = Shape.make(filtersInternal) + } + + private fun extractDepthConvWeights(): Map> { + return extractWeights(defineVariableNames().toList()) + } + + protected abstract fun depthwiseKernalVarName(name: String): String + + protected abstract fun pointwiseKernelVarName(name: String): String + + protected abstract fun biasVarName(name: String): String + + protected abstract fun separableConvImplementation(tf: Ops, input: Operand): Operand + + protected abstract fun defineOutputShape(inputShape: Shape): Shape +} + +private fun multiply(vararg values: Long) = values.fold(1L, Long::times) diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/SeparableConv2D.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/SeparableConv2D.kt index c0872e09d..ef1ac0dbf 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/SeparableConv2D.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/layer/convolutional/SeparableConv2D.kt @@ -5,31 +5,23 @@ package org.jetbrains.kotlinx.dl.api.core.layer.convolutional -import org.jetbrains.kotlinx.dl.api.core.KGraph import org.jetbrains.kotlinx.dl.api.core.activation.Activations import org.jetbrains.kotlinx.dl.api.core.initializer.HeNormal import org.jetbrains.kotlinx.dl.api.core.initializer.HeUniform import org.jetbrains.kotlinx.dl.api.core.initializer.Initializer -import org.jetbrains.kotlinx.dl.api.core.layer.Layer import org.jetbrains.kotlinx.dl.api.core.layer.NoGradients import org.jetbrains.kotlinx.dl.api.core.layer.requireArraySize import org.jetbrains.kotlinx.dl.api.core.regularizer.Regularizer -import org.jetbrains.kotlinx.dl.api.core.shape.TensorShape import org.jetbrains.kotlinx.dl.api.core.shape.convOutputLength -import org.jetbrains.kotlinx.dl.api.core.shape.numElements -import org.jetbrains.kotlinx.dl.api.core.shape.shapeFromDims -import org.jetbrains.kotlinx.dl.api.core.util.getDType -import org.jetbrains.kotlinx.dl.api.core.util.separableConv2dBiasVarName -import org.jetbrains.kotlinx.dl.api.core.util.separableConv2dDepthwiseKernelVarName -import org.jetbrains.kotlinx.dl.api.core.util.separableConv2dPointwiseKernelVarName +import org.jetbrains.kotlinx.dl.api.core.util.separableConvBiasVarName +import org.jetbrains.kotlinx.dl.api.core.util.separableConvDepthwiseKernelVarName +import org.jetbrains.kotlinx.dl.api.core.util.separableConvPointwiseKernelVarName import org.tensorflow.Operand import org.tensorflow.Shape import org.tensorflow.op.Ops -import org.tensorflow.op.core.Variable import org.tensorflow.op.nn.Conv2d import org.tensorflow.op.nn.DepthwiseConv2dNative import org.tensorflow.op.nn.DepthwiseConv2dNative.dilations -import kotlin.math.roundToInt private const val DEPTHWISE_KERNEL_VARIABLE_NAME = "separable_conv2d_depthwise_kernel" private const val POINTWISE_KERNEL_VARIABLE_NAME = "separable_conv2d_pointwise_kernel" @@ -84,117 +76,33 @@ public class SeparableConv2D( public val padding: ConvPadding = ConvPadding.SAME, public val useBias: Boolean = true, name: String = "" -) : Layer(name), NoGradients { - // weight tensors - private lateinit var depthwiseKernel: Variable - private lateinit var pointwiseKernel: Variable - private var bias: Variable? = null - - // weight tensor shapes - private lateinit var depthwiseKernelShape: Shape - private lateinit var pointwiseKernelShape: Shape - private lateinit var biasShape: Shape +) : AbstractSeparableConv( + filtersInternal = filters, + kernelSizeInternal = kernelSize, + stridesInternal = strides, + dilationsInternal = dilations, + depthMulitplierInternal = depthMultiplier, + activationInternal = activation, + depthwiseInitializerInternal = depthwiseInitializer, + pointwiseInitializerInternal = pointwiseInitializer, + biasInitializerInternal = biasInitializer, + depthwiseRegularizerInternal = depthwiseRegularizer, + pointwiseRegularizerInternal = pointwiseRegularizer, + biasRegularizerInternal = biasRegularizer, + useBiasInternal = useBias, + depthwiseKernelVariableName = DEPTHWISE_KERNEL_VARIABLE_NAME, + pointwiseKernelVariableName = POINTWISE_KERNEL_VARIABLE_NAME, + biasVariableName = BIAS_VARIABLE_NAME, + name +), NoGradients { init { requireArraySize(kernelSize, 2, "kernelSize") requireArraySize(strides, 4, "strides") requireArraySize(dilations, 4, "dilations") - isTrainable = false - } - - override fun build(tf: Ops, kGraph: KGraph, inputShape: Shape) { - // Amount of channels should be the last value in the inputShape (make warning here) - val numberOfChannels = inputShape.size(inputShape.numDimensions() - 1) - - // Compute shapes of kernel and bias matrices - depthwiseKernelShape = shapeFromDims(*kernelSize, numberOfChannels, this.depthMultiplier.toLong()) - pointwiseKernelShape = shapeFromDims(1, 1, numberOfChannels * this.depthMultiplier, filters) - biasShape = Shape.make(filters) - - // should be calculated before addWeight because it's used in calculation, need to rewrite addWEight to avoid strange behaviour - // calculate fanIn, fanOut - val inputDepth = numberOfChannels // amount of channels - val outputDepth = numberOfChannels * this.depthMultiplier // amount of channels for the next layer - - fanIn = (inputDepth * kernelSize[0] * kernelSize[1]).toInt() - fanOut = ((outputDepth * kernelSize[0] * kernelSize[1] / (strides[0].toDouble() * strides[1])).roundToInt()) - - val (depthwiseKernelVariableName, pointwiseKernelVariableName, biasVariableName) = defineVariableNames() - - createSeparableConv2DVariables( - tf, - depthwiseKernelVariableName, - pointwiseKernelVariableName, - biasVariableName, - kGraph - ) - } - - private fun defineVariableNames(): Triple { - return if (name.isNotEmpty()) { - Triple( - separableConv2dDepthwiseKernelVarName(name), - separableConv2dPointwiseKernelVarName(name), - separableConv2dBiasVarName(name) - ) - } else { - Triple(DEPTHWISE_KERNEL_VARIABLE_NAME, POINTWISE_KERNEL_VARIABLE_NAME, BIAS_VARIABLE_NAME) - } - } - - private fun createSeparableConv2DVariables( - tf: Ops, - depthwiseKernelVariableName: String, - pointwiseKernelVariableName: String, - biasVariableName: String, - kGraph: KGraph - ) { - depthwiseKernel = tf.withName(depthwiseKernelVariableName).variable(depthwiseKernelShape, getDType()) - pointwiseKernel = tf.withName(pointwiseKernelVariableName).variable(pointwiseKernelShape, getDType()) - if (useBias) bias = tf.withName(biasVariableName).variable(biasShape, getDType()) - - depthwiseKernel = addWeight( - tf, - kGraph, - depthwiseKernelVariableName, - depthwiseKernel, - depthwiseInitializer, - depthwiseRegularizer - ) - pointwiseKernel = addWeight( - tf, - kGraph, - pointwiseKernelVariableName, - pointwiseKernel, - pointwiseInitializer, - pointwiseRegularizer - ) - if (useBias) bias = addWeight(tf, kGraph, biasVariableName, bias!!, biasInitializer, biasRegularizer) - } - - override fun computeOutputShape(inputShape: Shape): Shape { - var rows = inputShape.size(1) - var cols = inputShape.size(2) - rows = convOutputLength( - rows, kernelSize[0].toInt(), padding, - strides[1].toInt(), dilations[1].toInt() - ) - cols = convOutputLength( - cols, kernelSize[1].toInt(), padding, - strides[2].toInt(), dilations[2].toInt() - ) - - val shape = Shape.make(inputShape.size(0), rows, cols, filters) - outputShape = TensorShape(shape) - return shape } - override fun forward( - tf: Ops, - input: Operand, - isTraining: Operand, - numberOfLosses: Operand? - ): Operand { + override fun separableConvImplementation(tf: Ops, input: Operand): Operand { val paddingName = padding.paddingName val depthwiseConv2DOptions: DepthwiseConv2dNative.Options = dilations(dilations.toList()).dataFormat("NHWC") @@ -210,37 +118,30 @@ public class SeparableConv2D( val pointwiseStrides = mutableListOf(1L, 1L, 1L, 1L) val conv2DOptions: Conv2d.Options = Conv2d.dataFormat("NHWC") - var output: Operand = - tf.nn.conv2d(depthwiseOutput, pointwiseKernel, pointwiseStrides, "VALID", conv2DOptions) - - if (useBias) { - output = tf.nn.biasAdd(output, bias) - } - - return Activations.convert(activation).apply(tf, output, name) + return tf.nn.conv2d(depthwiseOutput, pointwiseKernel, pointwiseStrides, "VALID", conv2DOptions) } - override var weights: Map> - get() = extractDepthConv2DWeights() - set(value) = assignWeights(value) - - private fun extractDepthConv2DWeights(): Map> { - return extractWeights(defineVariableNames().toList()) - } + override fun defineOutputShape(inputShape: Shape): Shape { + var rows = inputShape.size(1) + var cols = inputShape.size(2) - /** Returns the shape of kernel weights. */ - public val depthwiseShapeArray: LongArray get() = TensorShape(depthwiseKernelShape).dims() + rows = convOutputLength( + rows, kernelSize[0].toInt(), padding, + strides[1].toInt(), dilations[1].toInt() + ) + cols = convOutputLength( + cols, kernelSize[1].toInt(), padding, + strides[2].toInt(), dilations[2].toInt() + ) - /** Returns the shape of kernel weights. */ - public val pointwiseShapeArray: LongArray get() = TensorShape(pointwiseKernelShape).dims() + return Shape.make(inputShape.size(0), rows, cols, filters) + } - /** Returns the shape of bias weights. */ - public val biasShapeArray: LongArray get() = TensorShape(biasShape).dims() + override fun depthwiseKernalVarName(name: String): String = separableConvDepthwiseKernelVarName(name, dim = 2) - override val hasActivation: Boolean get() = true + override fun pointwiseKernelVarName(name: String): String = separableConvPointwiseKernelVarName(name, dim = 2) - override val paramCount: Int - get() = (depthwiseKernelShape.numElements() + pointwiseKernelShape.numElements() + biasShape.numElements()).toInt() + override fun biasVarName(name: String): String = separableConvBiasVarName(name, dim = 2) override fun toString(): String = "SeparableConv2D(kernelSize=${kernelSize.contentToString()}, strides=${strides.contentToString()}, " + diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/util/nameConventions.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/util/nameConventions.kt index 1366f8dae..94fd78ade 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/util/nameConventions.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/util/nameConventions.kt @@ -32,13 +32,13 @@ internal fun depthwiseConv2dBiasVarName(name: String) = name + "_" + "depthwise_ internal fun depthwiseConv2dKernelVarName(name: String) = name + "_" + "depthwise_conv2d_kernel" /** Default SeparableConv2d bias variable name in TensorFlow graph, based on variable's name. */ -internal fun separableConv2dBiasVarName(name: String) = name + "_" + "separable_conv2d_bias" +internal fun separableConvBiasVarName(name: String, dim: Int) = name + "_" + "separable_conv2d_bias" /** Default SeparableConv2d depthwise kernel variable name in TensorFlow graph, based on variable's name. */ -internal fun separableConv2dDepthwiseKernelVarName(name: String) = name + "_" + "separable_conv2d_depthwise_kernel" +internal fun separableConvDepthwiseKernelVarName(name: String, dim: Int) = name + "_" + "separable_conv2d_depthwise_kernel" /** Default SeparableConv2d pointwise kernel variable name in TensorFlow graph, based on variable's name. */ -internal fun separableConv2dPointwiseKernelVarName(name: String) = name + "_" + "separable_conv2d_pointwise_kernel" +internal fun separableConvPointwiseKernelVarName(name: String, dim: Int) = name + "_" + "separable_conv2d_pointwise_kernel" /** Default Dense bias variable name in TensorFlow graph, based on variable's name. */ internal fun denseBiasVarName(name: String) = name + "_" + "dense_bias" diff --git a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/WeightLoader.kt b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/WeightLoader.kt index 7ab6ed91e..9d67a3f3d 100644 --- a/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/WeightLoader.kt +++ b/api/src/main/kotlin/org/jetbrains/kotlinx/dl/api/inference/keras/WeightLoader.kt @@ -281,7 +281,7 @@ private fun fillSeparableConv2DVariablesFromKeras( val data = it.data when (it.name) { "depthwise_kernel:0" -> { - val kernelVariableName = separableConv2dDepthwiseKernelVarName(layerName) + val kernelVariableName = separableConvDepthwiseKernelVarName(layerName, dim = 2) val kernelShape = (model.getLayer(layerName) as SeparableConv2D).depthwiseShapeArray require( kernelShape.map { e -> e.toInt() }.toIntArray().contentEquals(dims) @@ -289,7 +289,7 @@ private fun fillSeparableConv2DVariablesFromKeras( model.fillVariable(kernelVariableName, data) } "pointwise_kernel:0" -> { - val kernelVariableName = separableConv2dPointwiseKernelVarName(layerName) + val kernelVariableName = separableConvPointwiseKernelVarName(layerName, dim = 2) val kernelShape = (model.getLayer(layerName) as SeparableConv2D).pointwiseShapeArray require( kernelShape.map { e -> e.toInt() }.toIntArray().contentEquals(dims) @@ -297,7 +297,7 @@ private fun fillSeparableConv2DVariablesFromKeras( model.fillVariable(kernelVariableName, data) } "depthwise_bias:0" -> { - val biasVariableName = separableConv2dBiasVarName(layerName) + val biasVariableName = separableConvBiasVarName(layerName, dim = 2) val biasShape = (model.getLayer(layerName) as SeparableConv2D).biasShapeArray require( biasShape.map { e -> e.toInt() }.toIntArray().contentEquals(dims) @@ -590,9 +590,9 @@ private fun initDepthwiseConv2DVariablesByDefaultInitializer(name: String, model } private fun initSeparableConv2DVariablesByDefaultInitializer(name: String, model: GraphTrainableModel) { - val depthwiseKernelVariableName = separableConv2dDepthwiseKernelVarName(name) - val pointwiseKernelVariableName = separableConv2dPointwiseKernelVarName(name) - val biasVariableName = depthwiseConv2dBiasVarName(name) + val depthwiseKernelVariableName = separableConvDepthwiseKernelVarName(name, dim = 2) + val pointwiseKernelVariableName = separableConvPointwiseKernelVarName(name, dim = 2) + val biasVariableName = separableConvBiasVarName(name, dim = 2) model.runAssignOpByVarName(depthwiseKernelVariableName) model.runAssignOpByVarName(pointwiseKernelVariableName) model.runAssignOpByVarName(biasVariableName) @@ -786,11 +786,11 @@ private fun fillSeparableConv2DVariables( layerPaths as LayerConvOrDensePaths val depthwiseKernelData = hdfFile.getDatasetByPath(depthwiseKernelDataPathTemplate.format(name, name)).data - val depthwiseKernelVariableName = separableConv2dDepthwiseKernelVarName(name) + val depthwiseKernelVariableName = separableConvDepthwiseKernelVarName(name, dim = 2) model.fillVariable(depthwiseKernelVariableName, depthwiseKernelData) val pointwiseKernelData = hdfFile.getDatasetByPath(pointwiseKernelDataPathTemplate.format(name, name)).data - val pointwiseKernelVariableName = separableConv2dPointwiseKernelVarName(name) + val pointwiseKernelVariableName = separableConvPointwiseKernelVarName(name, dim = 2) model.fillVariable(pointwiseKernelVariableName, pointwiseKernelData) if (useBias) {