From c1bc115496ccecef2cdff5cc57cb84b365b3fce8 Mon Sep 17 00:00:00 2001 From: Julia Beliaeva Date: Wed, 13 Apr 2022 03:51:51 +0300 Subject: [PATCH 1/7] Return a list of variables from createSlots function --- .../kotlinx/dl/api/core/optimizer/AdaDelta.kt | 13 ++++++------- .../kotlinx/dl/api/core/optimizer/AdaGrad.kt | 10 ++++------ .../kotlinx/dl/api/core/optimizer/AdaGradDA.kt | 13 ++++++------- .../kotlinx/dl/api/core/optimizer/Adam.kt | 14 +++++++------- .../kotlinx/dl/api/core/optimizer/Adamax.kt | 15 ++++++++------- .../kotlinx/dl/api/core/optimizer/Ftrl.kt | 14 +++++++------- .../kotlinx/dl/api/core/optimizer/Momentum.kt | 10 ++++------ .../kotlinx/dl/api/core/optimizer/Optimizer.kt | 7 +++---- .../kotlinx/dl/api/core/optimizer/RMSProp.kt | 16 ++++++++-------- 9 files changed, 53 insertions(+), 59 deletions(-) diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt index a41302c33..fbe9c90a2 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt @@ -95,22 +95,21 @@ public class AdaDelta( return targets } - private fun createAdaDeltaSlot(graph: KGraph, tf: Ops, v: Output) { + private fun createAdaDeltaSlot(graph: KGraph, tf: Ops, v: Output): Pair, Variable> { val accumInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR)) val accumulatorInitializer = tf.withName(accumInitializerName) .fill(tf.shape(v), tf.dtypes.cast(tf.constant(0.0f), getDType())) - createSlot(graph, tf, v.asOutput(), ACCUMULATOR, accumulatorInitializer) + val accumulator = createSlot(graph, tf, v.asOutput(), ACCUMULATOR, accumulatorInitializer) val accumUpdateInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR_UPDATE)) val updateInitializer: Operand = tf.withName(accumUpdateInitializerName) .fill(tf.shape(v), tf.dtypes.cast(tf.constant(0.0f), getDType())) - createSlot(graph, tf, v.asOutput(), ACCUMULATOR_UPDATE, updateInitializer) + val accumulatorUpdate = createSlot(graph, tf, v.asOutput(), ACCUMULATOR_UPDATE, updateInitializer) + return accumulator to accumulatorUpdate } - override fun createSlots(graph: KGraph, tf: Ops, variables: List>) { - for (v in variables) { - createAdaDeltaSlot(graph, tf, v.asOutput()) - } + override fun createSlots(graph: KGraph, tf: Ops, variables: List>): List> { + return variables.flatMap { createAdaDeltaSlot(graph, tf, it.asOutput()).toList() } } override val optimizerName: String get() = "Adadelta" diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt index cd6293b88..5b408daf3 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt @@ -84,18 +84,16 @@ public class AdaGrad( return targets } - private fun createAdaGradSlot(graph: KGraph, tf: Ops, v: Output) { + private fun createAdaGradSlot(graph: KGraph, tf: Ops, v: Output): Variable { val accumInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR)) val initializer: Operand = tf.withName(accumInitializerName) .fill(tf.shape(v), tf.constant(initialAccumulatorValue)) - createSlot(graph, tf, v.asOutput(), ACCUMULATOR, initializer) + return createSlot(graph, tf, v.asOutput(), ACCUMULATOR, initializer) } - override fun createSlots(graph: KGraph, tf: Ops, variables: List>) { - for (v in variables) { - createAdaGradSlot(graph, tf, v.asOutput()) - } + override fun createSlots(graph: KGraph, tf: Ops, variables: List>): List> { + return variables.map { createAdaGradSlot(graph, tf, it.asOutput()) } } override val optimizerName: String get() = "Adagrad" diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt index 310de4987..92fed203c 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt @@ -104,28 +104,27 @@ public class AdaGradDA( return targets } - private fun createAdaGradDASlot(graph: KGraph, tf: Ops, v: Output) { + private fun createAdaGradDASlot(graph: KGraph, tf: Ops, v: Output): Pair, Variable> { val accumulatorInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR)) val accumInitializer: Operand = tf.withName(accumulatorInitializerName) .fill(tf.shape(v), tf.constant(0.0f)) - createSlot(graph, tf, v.asOutput(), ACCUMULATOR, accumInitializer) + val accumulator = createSlot(graph, tf, v.asOutput(), ACCUMULATOR, accumInitializer) val squareAccumInitializerName = defaultInitializerOpName(createName(v, SQUARED_ACCUMULATOR)) val sqInitializer: Operand = tf.withName(squareAccumInitializerName) .fill(tf.shape(v), tf.constant(initialAccumulatorValue)) - createSlot(graph, tf, v.asOutput(), SQUARED_ACCUMULATOR, sqInitializer) + val squaredAccumulator = createSlot(graph, tf, v.asOutput(), SQUARED_ACCUMULATOR, sqInitializer) + return accumulator to squaredAccumulator } - override fun createSlots(graph: KGraph, tf: Ops, variables: List>) { - for (v in variables) { - createAdaGradDASlot(graph, tf, v.asOutput()) - } + override fun createSlots(graph: KGraph, tf: Ops, variables: List>): List> { globalStep = tf.withName(GLOBAL_STEP).variable(Shape.scalar(), getDType()) val globalStepAssignName = defaultAssignOpName(GLOBAL_STEP) val globalStepInit: Assign<*> = tf.withName(globalStepAssignName) .assign(globalStep, tf.withName(defaultInitializerOpName(GLOBAL_STEP)).constant(0.0f)) graph.addOptimizerVariableInitializer(globalStepInit) + return variables.flatMap { createAdaGradDASlot(graph, tf, it.asOutput()).toList() } } override val optimizerName: String get() = "AdaGradDA" diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt index b8a05f248..7b2587cf4 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt @@ -122,22 +122,20 @@ public class Adam( return targets } - private fun createAdamSlot(graph: KGraph, tf: Ops, v: Output) { + private fun createAdamSlot(graph: KGraph, tf: Ops, v: Output): Pair, Variable> { val firstMomentInitializerName = defaultInitializerOpName(createName(v, FIRST_MOMENT)) val firstMomentInitializer = tf.withName(firstMomentInitializerName).fill(tf.shape(v), tf.constant(0.0f, getDType())) - createSlot(graph, tf, v.asOutput(), FIRST_MOMENT, firstMomentInitializer) + val firstMoment = createSlot(graph, tf, v.asOutput(), FIRST_MOMENT, firstMomentInitializer) val secondMomentInitializerName = defaultInitializerOpName(createName(v, SECOND_MOMENT)) val secondMomentInitializer = tf.withName(secondMomentInitializerName).fill(tf.shape(v), tf.constant(0.0f, getDType())) - createSlot(graph, tf, v.asOutput(), SECOND_MOMENT, secondMomentInitializer) + val secondMoment = createSlot(graph, tf, v.asOutput(), SECOND_MOMENT, secondMomentInitializer) + return firstMoment to secondMoment } - override fun createSlots(graph: KGraph, tf: Ops, variables: List>) { - for (v in variables) { - createAdamSlot(graph, tf, v.asOutput()) - } + override fun createSlots(graph: KGraph, tf: Ops, variables: List>): List> { betaOnePower = tf.withName(FIRST_BETA_POWER_NAME).variable(Shape.scalar(), getDType()) val betaOnePowerAssignName = defaultAssignOpName(FIRST_BETA_POWER_NAME) @@ -158,6 +156,8 @@ public class Adam( tf.withName(defaultInitializerOpName(SECOND_BETA_POWER_NAME)).constant(beta2, getDType()) ) graph.addOptimizerVariableInitializer(betaTwoPowerInit) + + return variables.flatMap { createAdamSlot(graph, tf, it.asOutput()).toList() } } override val optimizerName: String get() = "Adam" diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt index 33954e229..5a9f83c77 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt @@ -115,22 +115,21 @@ public class Adamax( return targets } - private fun createAdamaxSlot(graph: KGraph, tf: Ops, v: Output) { + private fun createAdamaxSlot(graph: KGraph, tf: Ops, v: Output): Pair, Variable> { val firstMomentInitializerName = defaultInitializerOpName(createName(v, FIRST_MOMENT)) val firstMomentInitializer = tf.withName(firstMomentInitializerName).fill(tf.shape(v), tf.constant(0.0f, getDType())) - createSlot(graph, tf, v.asOutput(), FIRST_MOMENT, firstMomentInitializer) + val firstMoment = createSlot(graph, tf, v.asOutput(), FIRST_MOMENT, firstMomentInitializer) val secondMomentInitializerName = defaultInitializerOpName(createName(v, SECOND_MOMENT)) val secondMomentInitializer = tf.withName(secondMomentInitializerName) .fill(tf.shape(v), tf.constant(0.0f, getDType())) - createSlot(graph, tf, v.asOutput(), SECOND_MOMENT, secondMomentInitializer) + val secondMoment = createSlot(graph, tf, v.asOutput(), SECOND_MOMENT, secondMomentInitializer) + + return firstMoment to secondMoment } - override fun createSlots(graph: KGraph, tf: Ops, variables: List>) { - for (v in variables) { - createAdamaxSlot(graph, tf, v.asOutput()) - } + override fun createSlots(graph: KGraph, tf: Ops, variables: List>): List> { betaOnePower = tf.withName(FIRST_BETA_POWER_NAME).variable(Shape.scalar(), getDType()) val betaOnePowerAssignName = defaultAssignOpName(FIRST_BETA_POWER_NAME) @@ -140,6 +139,8 @@ public class Adamax( tf.withName(defaultInitializerOpName(FIRST_BETA_POWER_NAME)).constant(beta1, getDType()) ) graph.addOptimizerVariableInitializer(betaOnePowerInit) + + return variables.flatMap { createAdamaxSlot(graph, tf, it.asOutput()).toList() } } override val optimizerName: String get() = "Adamax" diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt index 23eb07204..013af3a98 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt @@ -119,22 +119,22 @@ public class Ftrl( return targets } - private fun createFtrlSlot(graph: KGraph, tf: Ops, v: Output) { + private fun createFtrlSlot(graph: KGraph, tf: Ops, v: Output): Pair, Variable> { val accumInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR)) val accumInitializer = tf.withName(accumInitializerName) .fill(tf.shape(v), tf.constant(initialAccumulatorValue)) - createSlot(graph, tf, v.asOutput(), ACCUMULATOR, accumInitializer) + val accumulator = createSlot(graph, tf, v.asOutput(), ACCUMULATOR, accumInitializer) val linearAccumInitializerName = defaultInitializerOpName(createName(v, LINEAR_ACCUMULATOR)) val linearAccumInitializer = tf.withName(linearAccumInitializerName) .fill(tf.shape(v), tf.constant(0.0f)) - createSlot(graph, tf, v.asOutput(), LINEAR_ACCUMULATOR, linearAccumInitializer) + val linearAccumulator = createSlot(graph, tf, v.asOutput(), LINEAR_ACCUMULATOR, linearAccumInitializer) + + return accumulator to linearAccumulator } - override fun createSlots(graph: KGraph, tf: Ops, variables: List>) { - for (v in variables) { - createFtrlSlot(graph, tf, v.asOutput()) - } + override fun createSlots(graph: KGraph, tf: Ops, variables: List>): List> { + return variables.flatMap { createFtrlSlot(graph, tf, it.asOutput()).toList() } } override val optimizerName: String get() = "Ftrl" diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt index 34dd2b934..39523dfee 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt @@ -70,17 +70,15 @@ public class Momentum( return targets } - private fun createMomentumSlot(graph: KGraph, tf: Ops, v: Output) { + private fun createMomentumSlot(graph: KGraph, tf: Ops, v: Output): Variable { val momentumInitializerName = defaultInitializerOpName(createName(v, MOMENTUM)) val initializer: Operand = tf.withName(momentumInitializerName) .fill(tf.shape(v), tf.constant(0.0f)) - createSlot(graph, tf, v.asOutput(), MOMENTUM, initializer) + return createSlot(graph, tf, v.asOutput(), MOMENTUM, initializer) } - override fun createSlots(graph: KGraph, tf: Ops, variables: List>) { - for (v in variables) { - createMomentumSlot(graph, tf, v.asOutput()) - } + override fun createSlots(graph: KGraph, tf: Ops, variables: List>): List> { + return variables.map { createMomentumSlot(graph, tf, it.asOutput()) } } override val optimizerName: String get() = "Momentum" diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt index 5cc95cfb7..cd4b79c2f 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt @@ -93,9 +93,7 @@ public abstract class Optimizer(public val clipGradient: ClipGradientAction) { * * @param variables The variables to create slots for. */ - protected open fun createSlots(graph: KGraph, tf: Ops, variables: List>) { - - } + protected open fun createSlots(graph: KGraph, tf: Ops, variables: List>): List> = emptyList() /** Returns optimizer name. */ public abstract val optimizerName: String @@ -116,7 +114,7 @@ public abstract class Optimizer(public val clipGradient: ClipGradientAction) { variable: Output, slotName: String, initializer: Operand - ) { + ): Variable { val createName: String = createName(variable, slotName) val slot: Variable = tf.withName(createName).variable(variable.shape(), getDType()) @@ -130,6 +128,7 @@ public abstract class Optimizer(public val clipGradient: ClipGradientAction) { val variables: MutableMap> = slots.computeIfAbsent(slotName) { mutableMapOf() } variables[varName] = slot + return slot } /** diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt index 5fea565ee..878e80b42 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt @@ -107,17 +107,17 @@ public class RMSProp( return targets } - private fun createRMSPropSlot(graph: KGraph, tf: Ops, v: Output) { + private fun createRMSPropSlot(graph: KGraph, tf: Ops, v: Output): List> { val rmsInitializerName = defaultInitializerOpName(createName(v, RMS)) val rmsInitializer: Operand = tf.withName(rmsInitializerName) .fill(tf.shape(v), tf.dtypes.cast(tf.constant(1.0f), getDType())) - createSlot(graph, tf, v.asOutput(), RMS, rmsInitializer) + val rms = createSlot(graph, tf, v.asOutput(), RMS, rmsInitializer) val momentumInitializerName = defaultInitializerOpName(createName(v, MOMENTUM)) val momentumInitializer: Operand = tf.withName(momentumInitializerName) .fill(tf.shape(v), tf.dtypes.cast(tf.constant(0.0f), getDType())) - createSlot(graph, tf, v.asOutput(), MOMENTUM, momentumInitializer) + val momentum = createSlot(graph, tf, v.asOutput(), MOMENTUM, momentumInitializer) if (centered) { val mgInitializerName = defaultInitializerOpName(createName(v, MG)) @@ -126,14 +126,14 @@ public class RMSProp( tf.shape(v), tf.constant(0.0f) ) - createSlot(graph, tf, v.asOutput(), MG, mgInitializer) + val mg = createSlot(graph, tf, v.asOutput(), MG, mgInitializer) + return listOf(rms, momentum, mg) } + return listOf(rms, momentum) } - override fun createSlots(graph: KGraph, tf: Ops, variables: List>) { - for (v in variables) { - createRMSPropSlot(graph, tf, v.asOutput()) - } + override fun createSlots(graph: KGraph, tf: Ops, variables: List>): List> { + return variables.flatMap { createRMSPropSlot(graph, tf, it.asOutput()) } } override val optimizerName: String get() = "RMSProp" From ccfc029d729161535a06c7777d6db1c9b79fba53 Mon Sep 17 00:00:00 2001 From: Julia Beliaeva Date: Wed, 13 Apr 2022 03:54:07 +0300 Subject: [PATCH 2/7] Simplify converting variables to outputs --- .../kotlinx/dl/api/core/optimizer/Optimizer.kt | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt index cd4b79c2f..8d84cc55a 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt @@ -47,22 +47,11 @@ public abstract class Optimizer(public val clipGradient: ClipGradientAction) { val gradients: Gradients = computeGradients(tf, loss, weights) - val variableOutputs = variablesToOutputs(weights) - - createSlots(graph, tf, variableOutputs) // empty action if not overridden + createSlots(graph, tf, weights.map { it.asOutput() }) // empty action if not overridden return applyGradients(graph, tf, weights, gradients) } - private fun variablesToOutputs(variables: List>): List> { - val variableOutputs: MutableList> = mutableListOf() - for (i in variables.indices) { - variableOutputs.add(i, variables[i].asOutput()) - } - - return variableOutputs - } - /** * Applies gradients to weights. * From 0191501c9dc56eacea966e1655ff5e8060d2c51f Mon Sep 17 00:00:00 2001 From: Julia Beliaeva Date: Wed, 13 Apr 2022 04:22:03 +0300 Subject: [PATCH 3/7] Simplify applyGradients --- .../jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt | 7 +++---- .../org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt | 6 ++---- .../jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt | 7 +++---- .../org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt | 7 ++----- .../org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt | 6 ++---- .../org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt | 7 ++----- .../jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt | 7 ++----- .../org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt | 6 ++---- 8 files changed, 18 insertions(+), 35 deletions(-) diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt index fbe9c90a2..57ad7a749 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt @@ -67,14 +67,13 @@ public class AdaDelta( weights: List>, gradients: Gradients ): List> { - val targets: MutableList> = - ArrayList() + val targets = mutableListOf>() + rhoConst = tf.constant(rho, getDType()) learningRateConst = tf.constant(learningRate, getDType()) epsilonConstant = tf.constant(epsilon, getDType()) - for (i in weights.indices) { - val variable = weights[i] + for ((i, variable) in weights.withIndex()) { val varName = variable.ref().op().name() val accumSlot: Variable = getSlot(varName, ACCUMULATOR) diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt index 5b408daf3..11a2ab7f8 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt @@ -58,14 +58,12 @@ public class AdaGrad( weights: List>, gradients: Gradients ): List> { - val targets: MutableList> = - ArrayList() + val targets = mutableListOf>() initialAccumulatorValueConstant = tf.constant(initialAccumulatorValue, getDType()) learningRateConst = tf.constant(learningRate, getDType()) - for (i in weights.indices) { - val variable = weights[i] + for ((i, variable) in weights.withIndex()) { val varName = variable.ref().op().name() val slot: Variable = getSlot(varName, ACCUMULATOR) diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt index 92fed203c..1848f5785 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt @@ -70,14 +70,13 @@ public class AdaGradDA( weights: List>, gradients: Gradients ): List> { - val targets: MutableList> = - ArrayList() + val targets = mutableListOf>() + learningRateConst = tf.constant(learningRate, getDType()) l1StrengthConst = tf.constant(l1Strength, getDType()) l2StrengthConst = tf.constant(l2Strength, getDType()) - for (i in weights.indices) { - val variable = weights[i] + for ((i, variable) in weights.withIndex()) { val varName = variable.ref().op().name() val gradSlot: Variable = getSlot(varName, ACCUMULATOR) diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt index 7b2587cf4..ef76865c0 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt @@ -73,17 +73,14 @@ public class Adam( weights: List>, gradients: Gradients ): List> { - val targets: MutableList> = - ArrayList() + val targets = mutableListOf>() betaOneConst = tf.constant(beta1, getDType()) betaTwoConst = tf.constant(beta2, getDType()) learningRateConst = tf.constant(learningRate, getDType()) epsilonConstant = tf.constant(epsilon, getDType()) - for (i in weights.indices) { - - val variable = weights[i] + for ((i, variable) in weights.withIndex()) { val varName = variable.ref().op().name() val firstMomentSlot: Variable = getSlot(varName, FIRST_MOMENT) diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt index 5a9f83c77..198b9df83 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt @@ -72,8 +72,7 @@ public class Adamax( weights: List>, gradients: Gradients ): List> { - val targets: MutableList> = - ArrayList() + val targets = mutableListOf>() betaOneConst = tf.constant(beta1, getDType()) betaTwoConst = tf.constant(beta2, getDType()) @@ -82,8 +81,7 @@ public class Adamax( val scope = Scope(graph.tfGraph) - for (i in weights.indices) { - val variable = weights[i] + for ((i, variable) in weights.withIndex()) { val varName = variable.ref().op().name() val firstMomentSlot: Variable = getSlot(varName, FIRST_MOMENT) diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt index 013af3a98..00b298979 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt @@ -82,8 +82,7 @@ public class Ftrl( weights: List>, gradients: Gradients ): List> { - val targets: MutableList> = - ArrayList() + val targets = mutableListOf>() l1RegularizationStrengthConst = tf.constant(l1RegularizationStrength, getDType()) l2RegularizationStrengthConst = tf.constant(l2RegularizationStrength, getDType()) @@ -91,9 +90,7 @@ public class Ftrl( l2ShrinkageRegularizationStrengthConst = tf.constant(l2ShrinkageRegularizationStrength, getDType()) learningRatePowerConst = tf.constant(learningRatePower, getDType()) - for (i in weights.indices) { - - val variable = weights[i] + for ((i, variable) in weights.withIndex()) { val varName = variable.ref().op().name() val accumSlot: Variable = getSlot(varName, ACCUMULATOR) diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt index 39523dfee..62d0443b7 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt @@ -44,15 +44,12 @@ public class Momentum( weights: List>, gradients: Gradients ): List> { - val targets: MutableList> = - ArrayList() + val targets = mutableListOf>() learningRateConst = tf.constant(learningRate) momentumConst = tf.constant(momentum) - for (i in weights.indices) { - val variable = weights[i] - + for ((i, variable) in weights.withIndex()) { val slot = getSlot(variable.ref().op().name(), MOMENTUM) targets.add( diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt index 878e80b42..943152b7b 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt @@ -57,16 +57,14 @@ public class RMSProp( weights: List>, gradients: Gradients ): List> { - val targets: MutableList> = - ArrayList() + val targets = mutableListOf>() decayConst = tf.constant(decay, getDType()) momentumConst = tf.constant(momentum, getDType()) learningRateConst = tf.constant(learningRate, getDType()) epsilonConstant = tf.constant(epsilon, getDType()) - for (i in weights.indices) { - val variable = weights[i] + for ((i, variable) in weights.withIndex()) { val varName = variable.ref().op().name() val rmsSlot: Variable = getSlot(varName, RMS) From 4741e397ee3aac7eb9ef0a253af6c94a46aa991b Mon Sep 17 00:00:00 2001 From: Julia Beliaeva Date: Wed, 13 Apr 2022 04:23:42 +0300 Subject: [PATCH 4/7] Inline createSlots into applyGradients Create optimizer variables directly at the place they are used. --- .../kotlinx/dl/api/core/optimizer/AdaDelta.kt | 9 +--- .../kotlinx/dl/api/core/optimizer/AdaGrad.kt | 8 +--- .../dl/api/core/optimizer/AdaGradDA.kt | 21 +++------ .../kotlinx/dl/api/core/optimizer/Adam.kt | 47 +++++++------------ .../kotlinx/dl/api/core/optimizer/Adamax.kt | 34 +++++--------- .../kotlinx/dl/api/core/optimizer/Ftrl.kt | 8 +--- .../kotlinx/dl/api/core/optimizer/Momentum.kt | 6 +-- .../dl/api/core/optimizer/Optimizer.kt | 10 ---- .../kotlinx/dl/api/core/optimizer/RMSProp.kt | 13 ++--- 9 files changed, 45 insertions(+), 111 deletions(-) diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt index 57ad7a749..a08b3f3c1 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt @@ -74,10 +74,7 @@ public class AdaDelta( epsilonConstant = tf.constant(epsilon, getDType()) for ((i, variable) in weights.withIndex()) { - val varName = variable.ref().op().name() - - val accumSlot: Variable = getSlot(varName, ACCUMULATOR) - val accumUpdateSlot: Variable = getSlot(varName, ACCUMULATOR_UPDATE) + val (accumSlot, accumUpdateSlot) = createAdaDeltaSlot(graph, tf, variable.asOutput()) targets.add( tf.train.applyAdadelta( @@ -107,10 +104,6 @@ public class AdaDelta( return accumulator to accumulatorUpdate } - override fun createSlots(graph: KGraph, tf: Ops, variables: List>): List> { - return variables.flatMap { createAdaDeltaSlot(graph, tf, it.asOutput()).toList() } - } - override val optimizerName: String get() = "Adadelta" override val isRunningOnGPU: Boolean get() = true diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt index 11a2ab7f8..e021b57f1 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt @@ -64,9 +64,7 @@ public class AdaGrad( learningRateConst = tf.constant(learningRate, getDType()) for ((i, variable) in weights.withIndex()) { - val varName = variable.ref().op().name() - - val slot: Variable = getSlot(varName, ACCUMULATOR) + val slot = createAdaGradSlot(graph, tf, variable.asOutput()) targets.add( tf.train.applyAdagrad( @@ -90,10 +88,6 @@ public class AdaGrad( return createSlot(graph, tf, v.asOutput(), ACCUMULATOR, initializer) } - override fun createSlots(graph: KGraph, tf: Ops, variables: List>): List> { - return variables.map { createAdaGradSlot(graph, tf, it.asOutput()) } - } - override val optimizerName: String get() = "Adagrad" override val isRunningOnGPU: Boolean get() = true diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt index 1848f5785..37644e30a 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt @@ -76,12 +76,14 @@ public class AdaGradDA( l1StrengthConst = tf.constant(l1Strength, getDType()) l2StrengthConst = tf.constant(l2Strength, getDType()) - for ((i, variable) in weights.withIndex()) { - val varName = variable.ref().op().name() - - val gradSlot: Variable = getSlot(varName, ACCUMULATOR) - val gradSquaredSlot: Variable = getSlot(varName, SQUARED_ACCUMULATOR) + globalStep = tf.withName(GLOBAL_STEP).variable(Shape.scalar(), getDType()) + val globalStepAssignName = defaultAssignOpName(GLOBAL_STEP) + val globalStepInit: Assign<*> = tf.withName(globalStepAssignName) + .assign(globalStep, tf.withName(defaultInitializerOpName(GLOBAL_STEP)).constant(0.0f)) + graph.addOptimizerVariableInitializer(globalStepInit) + for ((i, variable) in weights.withIndex()) { + val (gradSlot, gradSquaredSlot) = createAdaGradDASlot(graph, tf, variable.asOutput()) targets.add( tf.train.applyAdagradDa( variable, @@ -117,15 +119,6 @@ public class AdaGradDA( return accumulator to squaredAccumulator } - override fun createSlots(graph: KGraph, tf: Ops, variables: List>): List> { - globalStep = tf.withName(GLOBAL_STEP).variable(Shape.scalar(), getDType()) - val globalStepAssignName = defaultAssignOpName(GLOBAL_STEP) - val globalStepInit: Assign<*> = tf.withName(globalStepAssignName) - .assign(globalStep, tf.withName(defaultInitializerOpName(GLOBAL_STEP)).constant(0.0f)) - graph.addOptimizerVariableInitializer(globalStepInit) - return variables.flatMap { createAdaGradDASlot(graph, tf, it.asOutput()).toList() } - } - override val optimizerName: String get() = "AdaGradDA" override val isRunningOnGPU: Boolean get() = true diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt index ef76865c0..83fd97f42 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt @@ -80,12 +80,26 @@ public class Adam( learningRateConst = tf.constant(learningRate, getDType()) epsilonConstant = tf.constant(epsilon, getDType()) - for ((i, variable) in weights.withIndex()) { - val varName = variable.ref().op().name() + betaOnePower = tf.withName(FIRST_BETA_POWER_NAME).variable(Shape.scalar(), getDType()) + val betaOnePowerAssignName = defaultAssignOpName(FIRST_BETA_POWER_NAME) + val betaOnePowerInit: Assign<*> = tf.withName(betaOnePowerAssignName) + .assign( + betaOnePower, + tf.withName(defaultInitializerOpName(FIRST_BETA_POWER_NAME)).constant(beta1, getDType()) + ) + graph.addOptimizerVariableInitializer(betaOnePowerInit) - val firstMomentSlot: Variable = getSlot(varName, FIRST_MOMENT) - val secondMomentSlot: Variable = getSlot(varName, SECOND_MOMENT) + betaTwoPower = tf.withName(SECOND_BETA_POWER_NAME).variable(Shape.scalar(), getDType()) + val betaTwoPowerAssignName = defaultAssignOpName(SECOND_BETA_POWER_NAME) + val betaTwoPowerInit: Assign<*> = tf.withName(betaTwoPowerAssignName) + .assign( + betaTwoPower, + tf.withName(defaultInitializerOpName(SECOND_BETA_POWER_NAME)).constant(beta2, getDType()) + ) + graph.addOptimizerVariableInitializer(betaTwoPowerInit) + for ((i, variable) in weights.withIndex()) { + val (firstMomentSlot, secondMomentSlot) = createAdamSlot(graph, tf, variable.asOutput()) targets.add( tf.train.applyAdam( variable, @@ -132,31 +146,6 @@ public class Adam( return firstMoment to secondMoment } - override fun createSlots(graph: KGraph, tf: Ops, variables: List>): List> { - betaOnePower = tf.withName(FIRST_BETA_POWER_NAME).variable(Shape.scalar(), getDType()) - - val betaOnePowerAssignName = defaultAssignOpName(FIRST_BETA_POWER_NAME) - val betaOnePowerInit: Assign<*> = tf.withName(betaOnePowerAssignName) - .assign( - betaOnePower, - tf.withName(defaultInitializerOpName(FIRST_BETA_POWER_NAME)).constant(beta1, getDType()) - ) - graph.addOptimizerVariableInitializer(betaOnePowerInit) - - - betaTwoPower = tf.withName(SECOND_BETA_POWER_NAME).variable(Shape.scalar(), getDType()) - - val betaTwoPowerAssignName = defaultAssignOpName(SECOND_BETA_POWER_NAME) - val betaTwoPowerInit: Assign<*> = tf.withName(betaTwoPowerAssignName) - .assign( - betaTwoPower, - tf.withName(defaultInitializerOpName(SECOND_BETA_POWER_NAME)).constant(beta2, getDType()) - ) - graph.addOptimizerVariableInitializer(betaTwoPowerInit) - - return variables.flatMap { createAdamSlot(graph, tf, it.asOutput()).toList() } - } - override val optimizerName: String get() = "Adam" override val isRunningOnGPU: Boolean get() = true diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt index 198b9df83..8c5011d4d 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt @@ -79,14 +79,19 @@ public class Adamax( learningRateConst = tf.constant(learningRate, getDType()) epsilonConstant = tf.constant(epsilon, getDType()) + betaOnePower = tf.withName(FIRST_BETA_POWER_NAME).variable(Shape.scalar(), getDType()) + val betaOnePowerAssignName = defaultAssignOpName(FIRST_BETA_POWER_NAME) + val betaOnePowerInit: Assign<*> = tf.withName(betaOnePowerAssignName) + .assign( + betaOnePower, + tf.withName(defaultInitializerOpName(FIRST_BETA_POWER_NAME)).constant(beta1, getDType()) + ) + graph.addOptimizerVariableInitializer(betaOnePowerInit) + val scope = Scope(graph.tfGraph) for ((i, variable) in weights.withIndex()) { - val varName = variable.ref().op().name() - - val firstMomentSlot: Variable = getSlot(varName, FIRST_MOMENT) - val secondMomentSlot: Variable = getSlot(varName, SECOND_MOMENT) - + val (firstMomentSlot, secondMomentSlot) = createAdamaxSlot(graph, tf, variable.asOutput()) targets.add( ApplyAdaMax.create( scope, @@ -104,10 +109,9 @@ public class Adamax( ) } - val betaOnePowerInit = tf - .assign(betaOnePower, tf.math.mul(betaOnePower, betaOneConst)) + val betaOnePowerInit2 = tf.assign(betaOnePower, tf.math.mul(betaOnePower, betaOneConst)) - graph.addOptimizerVariableInitializer(betaOnePowerInit) + graph.addOptimizerVariableInitializer(betaOnePowerInit2) graph.addOptimizerVariable(betaOnePower) return targets @@ -127,20 +131,6 @@ public class Adamax( return firstMoment to secondMoment } - override fun createSlots(graph: KGraph, tf: Ops, variables: List>): List> { - betaOnePower = tf.withName(FIRST_BETA_POWER_NAME).variable(Shape.scalar(), getDType()) - val betaOnePowerAssignName = defaultAssignOpName(FIRST_BETA_POWER_NAME) - - val betaOnePowerInit: Assign<*> = tf.withName(betaOnePowerAssignName) - .assign( - betaOnePower, - tf.withName(defaultInitializerOpName(FIRST_BETA_POWER_NAME)).constant(beta1, getDType()) - ) - graph.addOptimizerVariableInitializer(betaOnePowerInit) - - return variables.flatMap { createAdamaxSlot(graph, tf, it.asOutput()).toList() } - } - override val optimizerName: String get() = "Adamax" override val isRunningOnGPU: Boolean get() = false diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt index 00b298979..79aa7985e 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt @@ -91,10 +91,8 @@ public class Ftrl( learningRatePowerConst = tf.constant(learningRatePower, getDType()) for ((i, variable) in weights.withIndex()) { - val varName = variable.ref().op().name() + val (accumSlot, linearSlot) = createFtrlSlot(graph, tf, variable.asOutput()) - val accumSlot: Variable = getSlot(varName, ACCUMULATOR) - val linearSlot: Variable = getSlot(varName, LINEAR_ACCUMULATOR) val options = ApplyFtrl.useLocking(true) targets.add( @@ -130,10 +128,6 @@ public class Ftrl( return accumulator to linearAccumulator } - override fun createSlots(graph: KGraph, tf: Ops, variables: List>): List> { - return variables.flatMap { createFtrlSlot(graph, tf, it.asOutput()).toList() } - } - override val optimizerName: String get() = "Ftrl" override val isRunningOnGPU: Boolean get() = false diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt index 62d0443b7..ba9d24f1c 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt @@ -50,7 +50,7 @@ public class Momentum( momentumConst = tf.constant(momentum) for ((i, variable) in weights.withIndex()) { - val slot = getSlot(variable.ref().op().name(), MOMENTUM) + val slot = createMomentumSlot(graph, tf, variable.asOutput()) targets.add( tf.train.applyMomentum( @@ -74,10 +74,6 @@ public class Momentum( return createSlot(graph, tf, v.asOutput(), MOMENTUM, initializer) } - override fun createSlots(graph: KGraph, tf: Ops, variables: List>): List> { - return variables.map { createMomentumSlot(graph, tf, it.asOutput()) } - } - override val optimizerName: String get() = "Momentum" override val isRunningOnGPU: Boolean get() = true diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt index 8d84cc55a..06ba9e457 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt @@ -46,9 +46,6 @@ public abstract class Optimizer(public val clipGradient: ClipGradientAction) { slots = mutableMapOf() val gradients: Gradients = computeGradients(tf, loss, weights) - - createSlots(graph, tf, weights.map { it.asOutput() }) // empty action if not overridden - return applyGradients(graph, tf, weights, gradients) } @@ -77,13 +74,6 @@ public abstract class Optimizer(public val clipGradient: ClipGradientAction) { return tf.gradients(loss, weights) } - /** - * No-op slot creation method. - * - * @param variables The variables to create slots for. - */ - protected open fun createSlots(graph: KGraph, tf: Ops, variables: List>): List> = emptyList() - /** Returns optimizer name. */ public abstract val optimizerName: String diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt index 943152b7b..6bfdc4584 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt @@ -65,13 +65,12 @@ public class RMSProp( epsilonConstant = tf.constant(epsilon, getDType()) for ((i, variable) in weights.withIndex()) { - val varName = variable.ref().op().name() - - val rmsSlot: Variable = getSlot(varName, RMS) - val momentumSlot: Variable = getSlot(varName, MOMENTUM) + val slots = createRMSPropSlot(graph, tf, variable.asOutput()) + val rmsSlot: Variable = slots[0] + val momentumSlot: Variable = slots[1] if (centered) { - val mgSlot: Variable = getSlot(varName, MG) + val mgSlot: Variable = slots[2] targets.add( tf.train.applyCenteredRmsProp( variable, @@ -130,10 +129,6 @@ public class RMSProp( return listOf(rms, momentum) } - override fun createSlots(graph: KGraph, tf: Ops, variables: List>): List> { - return variables.flatMap { createRMSPropSlot(graph, tf, it.asOutput()) } - } - override val optimizerName: String get() = "RMSProp" override val isRunningOnGPU: Boolean get() = true From 752369e177e678b1c22a2f6cdc526e5e9d7a542e Mon Sep 17 00:00:00 2001 From: Julia Beliaeva Date: Wed, 13 Apr 2022 04:28:34 +0300 Subject: [PATCH 5/7] Remove slots field --- .../dl/api/core/optimizer/Optimizer.kt | 28 +------------------ 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt index 06ba9e457..b6742729d 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt @@ -22,11 +22,6 @@ import org.tensorflow.op.core.Variable * @property [clipGradient] Strategy of gradient clipping as subclass of [ClipGradientAction]. */ public abstract class Optimizer(public val clipGradient: ClipGradientAction) { - /** - * Top level map key is the variable name, lower level map key is the slot name. - */ - private lateinit var slots: MutableMap>> - /** * Prepares targets for optimization process. * @@ -43,8 +38,6 @@ public abstract class Optimizer(public val clipGradient: ClipGradientAction) { tf: Ops, loss: Operand ): List> { - slots = mutableMapOf() - val gradients: Gradients = computeGradients(tf, loss, weights) return applyGradients(graph, tf, weights, gradients) } @@ -79,7 +72,7 @@ public abstract class Optimizer(public val clipGradient: ClipGradientAction) { /** * Creates a slot in the graph for the specified variable with the specified name. Adds the slot's - * initializer to the graph's initializers, and the slot to the optimiser's slot map. + * initializer to the graph's initializers. * * @param [graph] KGraph to be updated. * @param [tf] TensorFlow graph API for building operations. @@ -103,28 +96,9 @@ public abstract class Optimizer(public val clipGradient: ClipGradientAction) { graph.addOptimizerVariableInitializer(slotInit) graph.addOptimizerVariable(slot) - val varName = variable.op().name() - - val variables: MutableMap> = slots.computeIfAbsent(slotName) { mutableMapOf() } - variables[varName] = slot return slot } - /** - * Gets the slot associated with the specified variable and slot name. - * - * @param [varName] The variable to lookup. - * @param [slotName] The slot name. - * @return The slot. - */ - protected fun getSlot( - varName: String, - slotName: String - ): Variable { - val variables: MutableMap> = slots[slotName]!! - return variables[varName]!! - } - /** * Creates name for [variable] used in slot with name [slotName]. */ From 5cc43ac41d2ccf7edb78ca235f4d14d97a9db4d8 Mon Sep 17 00:00:00 2001 From: Julia Beliaeva Date: Wed, 13 Apr 2022 04:50:07 +0300 Subject: [PATCH 6/7] Remove duplication in the code creating optimizer variables --- .../kotlinx/dl/api/core/optimizer/AdaDelta.kt | 19 ++------- .../kotlinx/dl/api/core/optimizer/AdaGrad.kt | 12 +----- .../dl/api/core/optimizer/AdaGradDA.kt | 19 ++------- .../kotlinx/dl/api/core/optimizer/Adam.kt | 18 ++------ .../kotlinx/dl/api/core/optimizer/Adamax.kt | 19 ++------- .../kotlinx/dl/api/core/optimizer/Ftrl.kt | 20 ++------- .../kotlinx/dl/api/core/optimizer/Momentum.kt | 11 +---- .../dl/api/core/optimizer/Optimizer.kt | 42 +++++++++---------- .../kotlinx/dl/api/core/optimizer/RMSProp.kt | 35 ++-------------- 9 files changed, 40 insertions(+), 155 deletions(-) diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt index a08b3f3c1..feaa3886c 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt @@ -6,10 +6,8 @@ package org.jetbrains.kotlinx.dl.api.core.optimizer import org.jetbrains.kotlinx.dl.api.core.KGraph -import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand -import org.tensorflow.Output import org.tensorflow.op.Ops import org.tensorflow.op.core.Constant import org.tensorflow.op.core.Gradients @@ -74,7 +72,9 @@ public class AdaDelta( epsilonConstant = tf.constant(epsilon, getDType()) for ((i, variable) in weights.withIndex()) { - val (accumSlot, accumUpdateSlot) = createAdaDeltaSlot(graph, tf, variable.asOutput()) + val output = variable.asOutput() + val accumSlot = createSlot(ACCUMULATOR, output, tf, graph) + val accumUpdateSlot = createSlot(ACCUMULATOR_UPDATE, output, tf, graph) targets.add( tf.train.applyAdadelta( @@ -91,19 +91,6 @@ public class AdaDelta( return targets } - private fun createAdaDeltaSlot(graph: KGraph, tf: Ops, v: Output): Pair, Variable> { - val accumInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR)) - val accumulatorInitializer = tf.withName(accumInitializerName) - .fill(tf.shape(v), tf.dtypes.cast(tf.constant(0.0f), getDType())) - val accumulator = createSlot(graph, tf, v.asOutput(), ACCUMULATOR, accumulatorInitializer) - - val accumUpdateInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR_UPDATE)) - val updateInitializer: Operand = tf.withName(accumUpdateInitializerName) - .fill(tf.shape(v), tf.dtypes.cast(tf.constant(0.0f), getDType())) - val accumulatorUpdate = createSlot(graph, tf, v.asOutput(), ACCUMULATOR_UPDATE, updateInitializer) - return accumulator to accumulatorUpdate - } - override val optimizerName: String get() = "Adadelta" override val isRunningOnGPU: Boolean get() = true diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt index e021b57f1..9c0cf2f0f 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt @@ -6,10 +6,8 @@ package org.jetbrains.kotlinx.dl.api.core.optimizer import org.jetbrains.kotlinx.dl.api.core.KGraph -import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand -import org.tensorflow.Output import org.tensorflow.op.Ops import org.tensorflow.op.core.Constant import org.tensorflow.op.core.Gradients @@ -64,7 +62,7 @@ public class AdaGrad( learningRateConst = tf.constant(learningRate, getDType()) for ((i, variable) in weights.withIndex()) { - val slot = createAdaGradSlot(graph, tf, variable.asOutput()) + val slot = createSlot(ACCUMULATOR, variable.asOutput(), tf, graph, initialValue = initialAccumulatorValue) targets.add( tf.train.applyAdagrad( @@ -80,14 +78,6 @@ public class AdaGrad( return targets } - private fun createAdaGradSlot(graph: KGraph, tf: Ops, v: Output): Variable { - val accumInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR)) - - val initializer: Operand = tf.withName(accumInitializerName) - .fill(tf.shape(v), tf.constant(initialAccumulatorValue)) - return createSlot(graph, tf, v.asOutput(), ACCUMULATOR, initializer) - } - override val optimizerName: String get() = "Adagrad" override val isRunningOnGPU: Boolean get() = true diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt index 37644e30a..ca328fcea 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt @@ -11,7 +11,6 @@ import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName import org.jetbrains.kotlinx.dl.api.core.util.defaultOptimizerVariableName import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand -import org.tensorflow.Output import org.tensorflow.Shape import org.tensorflow.op.Ops import org.tensorflow.op.core.Assign @@ -83,7 +82,9 @@ public class AdaGradDA( graph.addOptimizerVariableInitializer(globalStepInit) for ((i, variable) in weights.withIndex()) { - val (gradSlot, gradSquaredSlot) = createAdaGradDASlot(graph, tf, variable.asOutput()) + val output = variable.asOutput() + val gradSlot = createSlot(ACCUMULATOR, output, tf, graph) + val gradSquaredSlot = createSlot(SQUARED_ACCUMULATOR, output, tf, graph) targets.add( tf.train.applyAdagradDa( variable, @@ -105,20 +106,6 @@ public class AdaGradDA( return targets } - private fun createAdaGradDASlot(graph: KGraph, tf: Ops, v: Output): Pair, Variable> { - val accumulatorInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR)) - val accumInitializer: Operand = tf.withName(accumulatorInitializerName) - .fill(tf.shape(v), tf.constant(0.0f)) - val accumulator = createSlot(graph, tf, v.asOutput(), ACCUMULATOR, accumInitializer) - - val squareAccumInitializerName = defaultInitializerOpName(createName(v, SQUARED_ACCUMULATOR)) - val sqInitializer: Operand = tf.withName(squareAccumInitializerName) - .fill(tf.shape(v), tf.constant(initialAccumulatorValue)) - - val squaredAccumulator = createSlot(graph, tf, v.asOutput(), SQUARED_ACCUMULATOR, sqInitializer) - return accumulator to squaredAccumulator - } - override val optimizerName: String get() = "AdaGradDA" override val isRunningOnGPU: Boolean get() = true diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt index 83fd97f42..a0aa0b4e7 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt @@ -11,7 +11,6 @@ import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName import org.jetbrains.kotlinx.dl.api.core.util.defaultOptimizerVariableName import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand -import org.tensorflow.Output import org.tensorflow.Shape import org.tensorflow.op.Ops import org.tensorflow.op.core.Assign @@ -99,7 +98,9 @@ public class Adam( graph.addOptimizerVariableInitializer(betaTwoPowerInit) for ((i, variable) in weights.withIndex()) { - val (firstMomentSlot, secondMomentSlot) = createAdamSlot(graph, tf, variable.asOutput()) + val output = variable.asOutput() + val firstMomentSlot = createSlot(FIRST_MOMENT, output, tf, graph) + val secondMomentSlot = createSlot(SECOND_MOMENT, output, tf, graph) targets.add( tf.train.applyAdam( variable, @@ -133,19 +134,6 @@ public class Adam( return targets } - private fun createAdamSlot(graph: KGraph, tf: Ops, v: Output): Pair, Variable> { - val firstMomentInitializerName = defaultInitializerOpName(createName(v, FIRST_MOMENT)) - val firstMomentInitializer = - tf.withName(firstMomentInitializerName).fill(tf.shape(v), tf.constant(0.0f, getDType())) - val firstMoment = createSlot(graph, tf, v.asOutput(), FIRST_MOMENT, firstMomentInitializer) - - val secondMomentInitializerName = defaultInitializerOpName(createName(v, SECOND_MOMENT)) - val secondMomentInitializer = - tf.withName(secondMomentInitializerName).fill(tf.shape(v), tf.constant(0.0f, getDType())) - val secondMoment = createSlot(graph, tf, v.asOutput(), SECOND_MOMENT, secondMomentInitializer) - return firstMoment to secondMoment - } - override val optimizerName: String get() = "Adam" override val isRunningOnGPU: Boolean get() = true diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt index 8c5011d4d..449f93e1a 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt @@ -11,7 +11,6 @@ import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName import org.jetbrains.kotlinx.dl.api.core.util.defaultOptimizerVariableName import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand -import org.tensorflow.Output import org.tensorflow.Shape import org.tensorflow.op.Ops import org.tensorflow.op.Scope @@ -91,7 +90,9 @@ public class Adamax( val scope = Scope(graph.tfGraph) for ((i, variable) in weights.withIndex()) { - val (firstMomentSlot, secondMomentSlot) = createAdamaxSlot(graph, tf, variable.asOutput()) + val output = variable.asOutput() + val firstMomentSlot = createSlot(FIRST_MOMENT, output, tf, graph) + val secondMomentSlot = createSlot(SECOND_MOMENT, output, tf, graph) targets.add( ApplyAdaMax.create( scope, @@ -117,20 +118,6 @@ public class Adamax( return targets } - private fun createAdamaxSlot(graph: KGraph, tf: Ops, v: Output): Pair, Variable> { - val firstMomentInitializerName = defaultInitializerOpName(createName(v, FIRST_MOMENT)) - val firstMomentInitializer = - tf.withName(firstMomentInitializerName).fill(tf.shape(v), tf.constant(0.0f, getDType())) - val firstMoment = createSlot(graph, tf, v.asOutput(), FIRST_MOMENT, firstMomentInitializer) - - val secondMomentInitializerName = defaultInitializerOpName(createName(v, SECOND_MOMENT)) - val secondMomentInitializer = tf.withName(secondMomentInitializerName) - .fill(tf.shape(v), tf.constant(0.0f, getDType())) - val secondMoment = createSlot(graph, tf, v.asOutput(), SECOND_MOMENT, secondMomentInitializer) - - return firstMoment to secondMoment - } - override val optimizerName: String get() = "Adamax" override val isRunningOnGPU: Boolean get() = false diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt index 79aa7985e..db8ea4b1e 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt @@ -6,10 +6,8 @@ package org.jetbrains.kotlinx.dl.api.core.optimizer import org.jetbrains.kotlinx.dl.api.core.KGraph -import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand -import org.tensorflow.Output import org.tensorflow.op.Ops import org.tensorflow.op.core.Constant import org.tensorflow.op.core.Gradients @@ -91,7 +89,9 @@ public class Ftrl( learningRatePowerConst = tf.constant(learningRatePower, getDType()) for ((i, variable) in weights.withIndex()) { - val (accumSlot, linearSlot) = createFtrlSlot(graph, tf, variable.asOutput()) + val output = variable.asOutput() + val accumSlot = createSlot(ACCUMULATOR, output, tf, graph) + val linearSlot = createSlot(LINEAR_ACCUMULATOR, output, tf, graph) val options = ApplyFtrl.useLocking(true) @@ -114,20 +114,6 @@ public class Ftrl( return targets } - private fun createFtrlSlot(graph: KGraph, tf: Ops, v: Output): Pair, Variable> { - val accumInitializerName = defaultInitializerOpName(createName(v, ACCUMULATOR)) - val accumInitializer = tf.withName(accumInitializerName) - .fill(tf.shape(v), tf.constant(initialAccumulatorValue)) - val accumulator = createSlot(graph, tf, v.asOutput(), ACCUMULATOR, accumInitializer) - - val linearAccumInitializerName = defaultInitializerOpName(createName(v, LINEAR_ACCUMULATOR)) - val linearAccumInitializer = tf.withName(linearAccumInitializerName) - .fill(tf.shape(v), tf.constant(0.0f)) - val linearAccumulator = createSlot(graph, tf, v.asOutput(), LINEAR_ACCUMULATOR, linearAccumInitializer) - - return accumulator to linearAccumulator - } - override val optimizerName: String get() = "Ftrl" override val isRunningOnGPU: Boolean get() = false diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt index ba9d24f1c..964534626 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt @@ -6,9 +6,7 @@ package org.jetbrains.kotlinx.dl.api.core.optimizer import org.jetbrains.kotlinx.dl.api.core.KGraph -import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName import org.tensorflow.Operand -import org.tensorflow.Output import org.tensorflow.op.Ops import org.tensorflow.op.core.Constant import org.tensorflow.op.core.Gradients @@ -50,7 +48,7 @@ public class Momentum( momentumConst = tf.constant(momentum) for ((i, variable) in weights.withIndex()) { - val slot = createMomentumSlot(graph, tf, variable.asOutput()) + val slot = createSlot(MOMENTUM, variable.asOutput(), tf, graph) targets.add( tf.train.applyMomentum( @@ -67,13 +65,6 @@ public class Momentum( return targets } - private fun createMomentumSlot(graph: KGraph, tf: Ops, v: Output): Variable { - val momentumInitializerName = defaultInitializerOpName(createName(v, MOMENTUM)) - val initializer: Operand = tf.withName(momentumInitializerName) - .fill(tf.shape(v), tf.constant(0.0f)) - return createSlot(graph, tf, v.asOutput(), MOMENTUM, initializer) - } - override val optimizerName: String get() = "Momentum" override val isRunningOnGPU: Boolean get() = true diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt index b6742729d..ab0683dac 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Optimizer.kt @@ -7,12 +7,12 @@ package org.jetbrains.kotlinx.dl.api.core.optimizer import org.jetbrains.kotlinx.dl.api.core.KGraph import org.jetbrains.kotlinx.dl.api.core.util.defaultAssignOpName +import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName import org.jetbrains.kotlinx.dl.api.core.util.defaultOptimizerVariableName import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand import org.tensorflow.Output import org.tensorflow.op.Ops -import org.tensorflow.op.core.Assign import org.tensorflow.op.core.Gradients import org.tensorflow.op.core.Variable @@ -74,38 +74,34 @@ public abstract class Optimizer(public val clipGradient: ClipGradientAction) { * Creates a slot in the graph for the specified variable with the specified name. Adds the slot's * initializer to the graph's initializers. * - * @param [graph] KGraph to be updated. - * @param [tf] TensorFlow graph API for building operations. - * @param [variable] The variable to create the slot for. * @param [slotName] The name of the slot. - * @param [initializer] The initializer for the slot. + * @param [variable] The variable to create the slot for. + * @param [tf] TensorFlow graph API for building operations. + * @param [graph] KGraph to be updated. + * @param [initialValue] The initial value to use. */ - protected open fun createSlot( - graph: KGraph, - tf: Ops, - variable: Output, - slotName: String, - initializer: Operand + protected fun createSlot(slotName: String, + variable: Output, + tf: Ops, + graph: KGraph, + initialValue: Float = 0.0f ): Variable { - val createName: String = createName(variable, slotName) - val slot: Variable = tf.withName(createName).variable(variable.shape(), getDType()) + val slotVariableName = defaultOptimizerVariableName(variable.op().name() + "-" + slotName) + val slot = tf.withName(slotVariableName).variable(variable.shape(), getDType()) + + val initializerOpName = defaultInitializerOpName(slotVariableName) + val initializerOp = tf.withName(initializerOpName) + .fill(tf.shape(variable), tf.dtypes.cast(tf.constant(initialValue), getDType())) - val assignName = defaultAssignOpName(createName(variable, slotName)) - val slotInit: Assign = tf.withName(assignName).assign(slot, initializer) + val assignOpName = defaultAssignOpName(slotVariableName) + val assignOp = tf.withName(assignOpName).assign(slot, initializerOp) - graph.addOptimizerVariableInitializer(slotInit) + graph.addOptimizerVariableInitializer(assignOp) graph.addOptimizerVariable(slot) return slot } - /** - * Creates name for [variable] used in slot with name [slotName]. - */ - internal open fun createName(variable: Output, slotName: String): String { - return defaultOptimizerVariableName(variable.op().name() + "-" + slotName) - } - /** True, if optimizer is implemented for GPU. */ internal abstract val isRunningOnGPU: Boolean } diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt index 6bfdc4584..b4837df18 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt @@ -6,10 +6,8 @@ package org.jetbrains.kotlinx.dl.api.core.optimizer import org.jetbrains.kotlinx.dl.api.core.KGraph -import org.jetbrains.kotlinx.dl.api.core.util.defaultInitializerOpName import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand -import org.tensorflow.Output import org.tensorflow.op.Ops import org.tensorflow.op.core.Constant import org.tensorflow.op.core.Gradients @@ -65,12 +63,12 @@ public class RMSProp( epsilonConstant = tf.constant(epsilon, getDType()) for ((i, variable) in weights.withIndex()) { - val slots = createRMSPropSlot(graph, tf, variable.asOutput()) - val rmsSlot: Variable = slots[0] - val momentumSlot: Variable = slots[1] + val output = variable.asOutput() + val rmsSlot = createSlot(RMS, output, tf, graph) + val momentumSlot = createSlot(MOMENTUM, output, tf, graph) if (centered) { - val mgSlot: Variable = slots[2] + val mgSlot = createSlot(MG, output, tf, graph) targets.add( tf.train.applyCenteredRmsProp( variable, @@ -104,31 +102,6 @@ public class RMSProp( return targets } - private fun createRMSPropSlot(graph: KGraph, tf: Ops, v: Output): List> { - val rmsInitializerName = defaultInitializerOpName(createName(v, RMS)) - - val rmsInitializer: Operand = tf.withName(rmsInitializerName) - .fill(tf.shape(v), tf.dtypes.cast(tf.constant(1.0f), getDType())) - val rms = createSlot(graph, tf, v.asOutput(), RMS, rmsInitializer) - - val momentumInitializerName = defaultInitializerOpName(createName(v, MOMENTUM)) - val momentumInitializer: Operand = tf.withName(momentumInitializerName) - .fill(tf.shape(v), tf.dtypes.cast(tf.constant(0.0f), getDType())) - val momentum = createSlot(graph, tf, v.asOutput(), MOMENTUM, momentumInitializer) - - if (centered) { - val mgInitializerName = defaultInitializerOpName(createName(v, MG)) - val mgInitializer: Operand = tf.withName(mgInitializerName) - .fill( - tf.shape(v), - tf.constant(0.0f) - ) - val mg = createSlot(graph, tf, v.asOutput(), MG, mgInitializer) - return listOf(rms, momentum, mg) - } - return listOf(rms, momentum) - } - override val optimizerName: String get() = "RMSProp" override val isRunningOnGPU: Boolean get() = true From 96ed53df1a305bbc5bd9ab12adaeefb8e264d10d Mon Sep 17 00:00:00 2001 From: Julia Beliaeva Date: Fri, 15 Apr 2022 21:57:16 +0300 Subject: [PATCH 7/7] Convert optimizer properties to local variables --- .../kotlinx/dl/api/core/optimizer/AdaDelta.kt | 10 +++------- .../kotlinx/dl/api/core/optimizer/AdaGrad.kt | 6 +----- .../dl/api/core/optimizer/AdaGradDA.kt | 13 ++++-------- .../kotlinx/dl/api/core/optimizer/Adam.kt | 20 ++++++------------- .../kotlinx/dl/api/core/optimizer/Adamax.kt | 17 +++++----------- .../kotlinx/dl/api/core/optimizer/Ftrl.kt | 17 +++++----------- .../kotlinx/dl/api/core/optimizer/Momentum.kt | 7 ++----- .../kotlinx/dl/api/core/optimizer/RMSProp.kt | 14 ++++--------- 8 files changed, 30 insertions(+), 74 deletions(-) diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt index feaa3886c..3bd1acb4d 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaDelta.kt @@ -9,7 +9,6 @@ import org.jetbrains.kotlinx.dl.api.core.KGraph import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand import org.tensorflow.op.Ops -import org.tensorflow.op.core.Constant import org.tensorflow.op.core.Gradients import org.tensorflow.op.core.Variable import org.tensorflow.op.train.ApplyAdadelta @@ -49,9 +48,6 @@ public class AdaDelta( private val epsilon: Float = 1e-8f, clipGradient: ClipGradientAction = NoClipGradient() ) : Optimizer(clipGradient) { - private lateinit var epsilonConstant: Constant - private lateinit var learningRateConst: Constant - private lateinit var rhoConst: Constant init { require(learningRate >= 0.0f) { "Learning rate $learningRate should be >= 0.0." } @@ -67,9 +63,9 @@ public class AdaDelta( ): List> { val targets = mutableListOf>() - rhoConst = tf.constant(rho, getDType()) - learningRateConst = tf.constant(learningRate, getDType()) - epsilonConstant = tf.constant(epsilon, getDType()) + val rhoConst = tf.constant(rho, getDType()) + val learningRateConst = tf.constant(learningRate, getDType()) + val epsilonConstant = tf.constant(epsilon, getDType()) for ((i, variable) in weights.withIndex()) { val output = variable.asOutput() diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt index 9c0cf2f0f..fa861af2d 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGrad.kt @@ -9,7 +9,6 @@ import org.jetbrains.kotlinx.dl.api.core.KGraph import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand import org.tensorflow.op.Ops -import org.tensorflow.op.core.Constant import org.tensorflow.op.core.Gradients import org.tensorflow.op.core.Variable import org.tensorflow.op.train.ApplyAdagrad @@ -42,8 +41,6 @@ public class AdaGrad( private val initialAccumulatorValue: Float = 0.01f, clipGradient: ClipGradientAction = NoClipGradient() ) : Optimizer(clipGradient) { - private lateinit var initialAccumulatorValueConstant: Constant - private lateinit var learningRateConst: Constant init { require(learningRate >= 0.0f) { "Learning rate $learningRate should be >= 0.0." } @@ -58,8 +55,7 @@ public class AdaGrad( ): List> { val targets = mutableListOf>() - initialAccumulatorValueConstant = tf.constant(initialAccumulatorValue, getDType()) - learningRateConst = tf.constant(learningRate, getDType()) + val learningRateConst = tf.constant(learningRate, getDType()) for ((i, variable) in weights.withIndex()) { val slot = createSlot(ACCUMULATOR, variable.asOutput(), tf, graph, initialValue = initialAccumulatorValue) diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt index ca328fcea..3f0db4248 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/AdaGradDA.kt @@ -14,7 +14,6 @@ import org.tensorflow.Operand import org.tensorflow.Shape import org.tensorflow.op.Ops import org.tensorflow.op.core.Assign -import org.tensorflow.op.core.Constant import org.tensorflow.op.core.Gradients import org.tensorflow.op.core.Variable import org.tensorflow.op.train.ApplyAdagradDa @@ -51,10 +50,6 @@ public class AdaGradDA( private val l2Strength: Float = 0.01f, clipGradient: ClipGradientAction = NoClipGradient() ) : Optimizer(clipGradient) { - private lateinit var learningRateConst: Constant - private lateinit var l1StrengthConst: Constant - private lateinit var l2StrengthConst: Constant - private lateinit var globalStep: Variable init { require(learningRate >= 0.0f) { "Learning rate $learningRate should be >= 0.0." } @@ -71,11 +66,11 @@ public class AdaGradDA( ): List> { val targets = mutableListOf>() - learningRateConst = tf.constant(learningRate, getDType()) - l1StrengthConst = tf.constant(l1Strength, getDType()) - l2StrengthConst = tf.constant(l2Strength, getDType()) + val learningRateConst = tf.constant(learningRate, getDType()) + val l1StrengthConst = tf.constant(l1Strength, getDType()) + val l2StrengthConst = tf.constant(l2Strength, getDType()) - globalStep = tf.withName(GLOBAL_STEP).variable(Shape.scalar(), getDType()) + val globalStep = tf.withName(GLOBAL_STEP).variable(Shape.scalar(), getDType()) val globalStepAssignName = defaultAssignOpName(GLOBAL_STEP) val globalStepInit: Assign<*> = tf.withName(globalStepAssignName) .assign(globalStep, tf.withName(defaultInitializerOpName(GLOBAL_STEP)).constant(0.0f)) diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt index a0aa0b4e7..c11a83bb6 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adam.kt @@ -14,7 +14,6 @@ import org.tensorflow.Operand import org.tensorflow.Shape import org.tensorflow.op.Ops import org.tensorflow.op.core.Assign -import org.tensorflow.op.core.Constant import org.tensorflow.op.core.Gradients import org.tensorflow.op.core.Variable import org.tensorflow.op.train.ApplyAdam @@ -52,13 +51,6 @@ public class Adam( clipGradient: ClipGradientAction = NoClipGradient() ) : Optimizer(clipGradient) { - private lateinit var epsilonConstant: Constant - private lateinit var learningRateConst: Constant - private lateinit var betaOneConst: Constant - private lateinit var betaTwoConst: Constant - private lateinit var betaOnePower: Variable - private lateinit var betaTwoPower: Variable - init { require(learningRate >= 0.0f) { "Learning rate $learningRate should be >= 0.0." } require(beta1 > 0.0f && beta1 < 1.0f) { "Beta1 $beta1 should be in range (0.0; 1.0)." } @@ -74,12 +66,12 @@ public class Adam( ): List> { val targets = mutableListOf>() - betaOneConst = tf.constant(beta1, getDType()) - betaTwoConst = tf.constant(beta2, getDType()) - learningRateConst = tf.constant(learningRate, getDType()) - epsilonConstant = tf.constant(epsilon, getDType()) + val betaOneConst = tf.constant(beta1, getDType()) + val betaTwoConst = tf.constant(beta2, getDType()) + val learningRateConst = tf.constant(learningRate, getDType()) + val epsilonConstant = tf.constant(epsilon, getDType()) - betaOnePower = tf.withName(FIRST_BETA_POWER_NAME).variable(Shape.scalar(), getDType()) + val betaOnePower = tf.withName(FIRST_BETA_POWER_NAME).variable(Shape.scalar(), getDType()) val betaOnePowerAssignName = defaultAssignOpName(FIRST_BETA_POWER_NAME) val betaOnePowerInit: Assign<*> = tf.withName(betaOnePowerAssignName) .assign( @@ -88,7 +80,7 @@ public class Adam( ) graph.addOptimizerVariableInitializer(betaOnePowerInit) - betaTwoPower = tf.withName(SECOND_BETA_POWER_NAME).variable(Shape.scalar(), getDType()) + val betaTwoPower = tf.withName(SECOND_BETA_POWER_NAME).variable(Shape.scalar(), getDType()) val betaTwoPowerAssignName = defaultAssignOpName(SECOND_BETA_POWER_NAME) val betaTwoPowerInit: Assign<*> = tf.withName(betaTwoPowerAssignName) .assign( diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt index 449f93e1a..1d8cbfc58 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Adamax.kt @@ -15,7 +15,6 @@ import org.tensorflow.Shape import org.tensorflow.op.Ops import org.tensorflow.op.Scope import org.tensorflow.op.core.Assign -import org.tensorflow.op.core.Constant import org.tensorflow.op.core.Gradients import org.tensorflow.op.core.Variable import org.tensorflow.op.train.ApplyAdaMax @@ -52,12 +51,6 @@ public class Adamax( clipGradient: ClipGradientAction = NoClipGradient() ) : Optimizer(clipGradient) { - private lateinit var epsilonConstant: Constant - private lateinit var learningRateConst: Constant - private lateinit var betaOneConst: Constant - private lateinit var betaTwoConst: Constant - private lateinit var betaOnePower: Variable - init { require(learningRate >= 0.0f) { "Learning rate $learningRate should be >= 0.0." } require(beta1 > 0.0f && beta1 < 1.0f) { "Beta1 $beta1 should be in range (0.0; 1.0)." } @@ -73,12 +66,12 @@ public class Adamax( ): List> { val targets = mutableListOf>() - betaOneConst = tf.constant(beta1, getDType()) - betaTwoConst = tf.constant(beta2, getDType()) - learningRateConst = tf.constant(learningRate, getDType()) - epsilonConstant = tf.constant(epsilon, getDType()) + val betaOneConst = tf.constant(beta1, getDType()) + val betaTwoConst = tf.constant(beta2, getDType()) + val learningRateConst = tf.constant(learningRate, getDType()) + val epsilonConstant = tf.constant(epsilon, getDType()) - betaOnePower = tf.withName(FIRST_BETA_POWER_NAME).variable(Shape.scalar(), getDType()) + val betaOnePower = tf.withName(FIRST_BETA_POWER_NAME).variable(Shape.scalar(), getDType()) val betaOnePowerAssignName = defaultAssignOpName(FIRST_BETA_POWER_NAME) val betaOnePowerInit: Assign<*> = tf.withName(betaOnePowerAssignName) .assign( diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt index db8ea4b1e..e003b68ea 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Ftrl.kt @@ -9,7 +9,6 @@ import org.jetbrains.kotlinx.dl.api.core.KGraph import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand import org.tensorflow.op.Ops -import org.tensorflow.op.core.Constant import org.tensorflow.op.core.Gradients import org.tensorflow.op.core.Variable import org.tensorflow.op.train.ApplyFtrl @@ -58,12 +57,6 @@ public class Ftrl( private var initialAccumulatorValue: Float = 0.0f, clipGradient: ClipGradientAction = NoClipGradient() ) : Optimizer(clipGradient) { - /** */ - private lateinit var learningRatePowerConst: Constant - private lateinit var learningRateConst: Constant - private lateinit var l1RegularizationStrengthConst: Constant - private lateinit var l2RegularizationStrengthConst: Constant - private lateinit var l2ShrinkageRegularizationStrengthConst: Constant init { require(learningRate >= 0.0f) { "Learning rate $learningRate should be >= 0.0." } @@ -82,11 +75,11 @@ public class Ftrl( ): List> { val targets = mutableListOf>() - l1RegularizationStrengthConst = tf.constant(l1RegularizationStrength, getDType()) - l2RegularizationStrengthConst = tf.constant(l2RegularizationStrength, getDType()) - learningRateConst = tf.constant(learningRate, getDType()) - l2ShrinkageRegularizationStrengthConst = tf.constant(l2ShrinkageRegularizationStrength, getDType()) - learningRatePowerConst = tf.constant(learningRatePower, getDType()) + val l1RegularizationStrengthConst = tf.constant(l1RegularizationStrength, getDType()) + val l2RegularizationStrengthConst = tf.constant(l2RegularizationStrength, getDType()) + val learningRateConst = tf.constant(learningRate, getDType()) + val l2ShrinkageRegularizationStrengthConst = tf.constant(l2ShrinkageRegularizationStrength, getDType()) + val learningRatePowerConst = tf.constant(learningRatePower, getDType()) for ((i, variable) in weights.withIndex()) { val output = variable.asOutput() diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt index 964534626..4596ca2b0 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/Momentum.kt @@ -8,7 +8,6 @@ package org.jetbrains.kotlinx.dl.api.core.optimizer import org.jetbrains.kotlinx.dl.api.core.KGraph import org.tensorflow.Operand import org.tensorflow.op.Ops -import org.tensorflow.op.core.Constant import org.tensorflow.op.core.Gradients import org.tensorflow.op.core.Variable import org.tensorflow.op.train.ApplyMomentum @@ -28,8 +27,6 @@ public class Momentum( private val useNesterov: Boolean = true, clipGradient: ClipGradientAction = NoClipGradient() ) : Optimizer(clipGradient) { - private lateinit var momentumConst: Constant - private lateinit var learningRateConst: Constant init { require(learningRate >= 0.0f) { "Learning rate $learningRate should be >= 0.0." } @@ -44,8 +41,8 @@ public class Momentum( ): List> { val targets = mutableListOf>() - learningRateConst = tf.constant(learningRate) - momentumConst = tf.constant(momentum) + val learningRateConst = tf.constant(learningRate) + val momentumConst = tf.constant(momentum) for ((i, variable) in weights.withIndex()) { val slot = createSlot(MOMENTUM, variable.asOutput(), tf, graph) diff --git a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt index b4837df18..f313d85ed 100644 --- a/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt +++ b/tensorflow/src/main/kotlin/org/jetbrains/kotlinx/dl/api/core/optimizer/RMSProp.kt @@ -9,7 +9,6 @@ import org.jetbrains.kotlinx.dl.api.core.KGraph import org.jetbrains.kotlinx.dl.api.core.util.getDType import org.tensorflow.Operand import org.tensorflow.op.Ops -import org.tensorflow.op.core.Constant import org.tensorflow.op.core.Gradients import org.tensorflow.op.core.Variable import org.tensorflow.op.train.ApplyCenteredRmsProp @@ -37,11 +36,6 @@ public class RMSProp( clipGradient: ClipGradientAction = NoClipGradient() ) : Optimizer(clipGradient) { - private lateinit var epsilonConstant: Constant - private lateinit var learningRateConst: Constant - private lateinit var decayConst: Constant - private lateinit var momentumConst: Constant - init { require(learningRate >= 0.0f) { "Learning rate $learningRate should be >= 0.0." } require(momentum >= 0.0f) { "Momentum $momentum should be >= 0.0." } @@ -57,10 +51,10 @@ public class RMSProp( ): List> { val targets = mutableListOf>() - decayConst = tf.constant(decay, getDType()) - momentumConst = tf.constant(momentum, getDType()) - learningRateConst = tf.constant(learningRate, getDType()) - epsilonConstant = tf.constant(epsilon, getDType()) + val decayConst = tf.constant(decay, getDType()) + val momentumConst = tf.constant(momentum, getDType()) + val learningRateConst = tf.constant(learningRate, getDType()) + val epsilonConstant = tf.constant(epsilon, getDType()) for ((i, variable) in weights.withIndex()) { val output = variable.asOutput()