Add examples/xor.rs for a neural net trained entirely in Rust

adamcrume · adamcrume · commit aafe7afc93a2 · 2019-07-21T14:20:03.000-07:00
diff --git a/Cargo.toml b/Cargo.toml
@@ -48,3 +48,7 @@ name = "regression_savedmodel"
 
 [[example]]
 name = "regression_checkpoint"
+
+[[example]]
+name = "xor"
+required-features = ["experimental_training"]
diff --git a/examples/xor.rs b/examples/xor.rs
@@ -0,0 +1,151 @@
+use std::error::Error;
+use std::result::Result;
+use tensorflow::ops;
+use tensorflow::train::AdadeltaOptimizer;
+use tensorflow::train::MinimizeOptions;
+use tensorflow::train::Optimizer;
+use tensorflow::Code;
+use tensorflow::DataType;
+use tensorflow::Output;
+use tensorflow::Scope;
+use tensorflow::Session;
+use tensorflow::SessionOptions;
+use tensorflow::SessionRunArgs;
+use tensorflow::Shape;
+use tensorflow::Status;
+use tensorflow::Tensor;
+use tensorflow::Variable;
+
+// Helper for building a layer.
+//
+// `activation` is a function which takes a tensor and applies an activation
+// function such as tanh.
+//
+// Returns variables created and the layer output.
+fn layer<O1: Into<Output>>(
+    input: O1,
+    input_size: u64,
+    output_size: u64,
+    activation: &dyn Fn(Output, &mut Scope) -> Result<Output, Status>,
+    scope: &mut Scope,
+) -> Result<(Vec<Variable>, Output), Status> {
+    let mut scope = scope.new_sub_scope("layer");
+    let scope = &mut scope;
+    let w_shape = ops::constant(&[input_size as i64, output_size as i64][..], scope)?;
+    let w = Variable::builder()
+        .initial_value(ops::random_normal(w_shape, scope)?)
+        .data_type(DataType::Float)
+        .shape(Shape::from(&[input_size, output_size][..]))
+        .build(&mut scope.with_op_name("w"))?;
+    let b = Variable::builder()
+        .const_initial_value(Tensor::<f32>::new(&[output_size]))
+        .build(&mut scope.with_op_name("b"))?;
+    Ok((
+        vec![w.clone(), b.clone()],
+        activation(
+            ops::add(
+                ops::mat_mul(input, w.output().clone(), scope)?,
+                b.output().clone(),
+                scope,
+            )?
+            .into(),
+            scope,
+        )?,
+    ))
+}
+
+fn main() -> Result<(), Box<Error>> {
+    // ================
+    // Build the model.
+    // ================
+    let mut scope = Scope::new_root_scope();
+    let scope = &mut scope;
+    // Size of the hidden layer.
+    // This is far more than is necessary, but makes it train more reliably.
+    let hidden_size: u64 = 8;
+    let input = ops::Placeholder::new()
+        .data_type(DataType::Float)
+        .shape(Shape::from(&[1u64, 2][..]))
+        .build(&mut scope.with_op_name("input"))?;
+    let label = ops::Placeholder::new()
+        .data_type(DataType::Float)
+        .shape(Shape::from(&[1u64][..]))
+        .build(&mut scope.with_op_name("label"))?;
+    // Hidden layer.
+    let (vars1, layer1) = layer(
+        input.clone(),
+        2,
+        hidden_size,
+        &|x, scope| Ok(ops::tanh(x, scope)?.into()),
+        scope,
+    )?;
+    // Output layer.
+    let (vars2, layer2) = layer(layer1.clone(), hidden_size, 1, &|x, _| Ok(x), scope)?;
+    let error = ops::subtract(layer2.clone(), label.clone(), scope)?;
+    let error_squared = ops::multiply(error.clone(), error, scope)?;
+    let mut optimizer = AdadeltaOptimizer::new();
+    optimizer.set_learning_rate(ops::constant(1.0f32, scope)?);
+    let mut variables = Vec::new();
+    variables.extend(vars1);
+    variables.extend(vars2);
+    let (minimizer_vars, minimize) = optimizer.minimize(
+        scope,
+        error_squared.clone().into(),
+        MinimizeOptions::default().with_variables(&variables),
+    )?;
+
+    // =========================
+    // Initialize the variables.
+    // =========================
+    let options = SessionOptions::new();
+    let g = scope.graph_mut();
+    let session = Session::new(&options, &g)?;
+    let mut run_args = SessionRunArgs::new();
+    // Initialize variables we defined.
+    for var in &variables {
+        run_args.add_target(&var.initializer());
+    }
+    // Initialize variables the optimizer defined.
+    for var in &minimizer_vars {
+        run_args.add_target(&var.initializer());
+    }
+    session.run(&mut run_args)?;
+
+    // ================
+    // Train the model.
+    // ================
+    let mut input_tensor = Tensor::<f32>::new(&[1, 2]);
+    let mut label_tensor = Tensor::<f32>::new(&[1]);
+    // Helper that generates a training example from an integer, trains on that
+    // example, and returns the error.
+    let mut train = |i| -> Result<f32, Box<Error>> {
+        input_tensor[0] = (i & 1) as f32;
+        input_tensor[1] = ((i >> 1) & 1) as f32;
+        label_tensor[0] = ((i & 1) ^ ((i >> 1) & 1)) as f32;
+        let mut run_args = SessionRunArgs::new();
+        run_args.add_target(&minimize);
+        let error_squared_fetch = run_args.request_fetch(&error_squared, 0);
+        run_args.add_feed(&input, 0, &input_tensor);
+        run_args.add_feed(&label, 0, &label_tensor);
+        session.run(&mut run_args)?;
+        Ok(run_args.fetch::<f32>(error_squared_fetch)?[0])
+    };
+    for i in 0..10000 {
+        train(i)?;
+    }
+
+    // ===================
+    // Evaluate the model.
+    // ===================
+    for i in 0..4 {
+        let error = train(i)?;
+        println!("Error: {}", error);
+        if error > 0.1 {
+            return Err(Box::new(Status::new_set(
+                Code::Internal,
+                &format!("Error too high: {}", error),
+            )?));
+        }
+    }
+    Ok(())
+}
diff --git a/src/train.rs b/src/train.rs
@@ -131,8 +131,10 @@ pub struct GradientDescentOptimizer {
 
 impl GradientDescentOptimizer {
     /// Creates a new optimizer with the given learning rate.
-    pub fn new(learning_rate: Output) -> Self {
-        Self { learning_rate }
+    pub fn new<T: Into<Output>>(learning_rate: T) -> Self {
+        Self {
+            learning_rate: learning_rate.into(),
+        }
     }
 }
 
@@ -216,15 +218,9 @@ fn create_zeros_slot(
     dtype: Option<DataType>,
 ) -> Result<Variable> {
     let dtype = dtype.unwrap_or_else(|| primary.dtype);
-    // TODO: use standard op
-    let zeros = {
-        let name = scope.get_unique_name_for_op("ZerosLike");
-        let mut graph = scope.graph_mut();
-        let mut nd = graph.new_operation("ZerosLike", &name)?;
-        nd.add_input(primary.output.clone());
-        nd.add_control_input(&primary.initializer);
-        nd.finish()?
-    };
+    let zeros = ops::ZerosLike::new()
+        .add_control_input(primary.initializer.clone())
+        .build(primary.output.clone(), scope)?;
     Variable::builder()
         .initial_value(zeros)
         .shape(primary.shape.clone())
@@ -276,9 +272,13 @@ impl Optimizer for AdadeltaOptimizer {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::ops;
+    use crate::Scope;
     use crate::Session;
     use crate::SessionOptions;
     use crate::SessionRunArgs;
+    use crate::Shape;
+    use crate::Tensor;
 
     #[test]
     fn simple_gradient_descent() {
@@ -403,4 +403,105 @@ mod tests {
             x_output[0]
         );
     }
+
+    #[test]
+    fn xor_nn() {
+        let mut scope = Scope::new_root_scope();
+        let scope = &mut scope;
+        let hidden_size: u64 = 4;
+        let input = ops::Placeholder::new()
+            .data_type(DataType::Float)
+            .shape(Shape::from(&[1u64, 2][..]))
+            .build(&mut scope.with_op_name("input"))
+            .unwrap();
+        let label = ops::Placeholder::new()
+            .data_type(DataType::Float)
+            .shape(Shape::from(&[1u64][..]))
+            .build(&mut scope.with_op_name("label"))
+            .unwrap();
+        let w_shape = ops::constant(&[2, hidden_size as i64][..], scope).unwrap();
+        let w_init = ops::random_normal(w_shape, scope).unwrap();
+        let w = Variable::builder()
+            .initial_value(w_init)
+            .data_type(DataType::Float)
+            .shape(Shape::from(&[2, hidden_size][..]))
+            .build(&mut scope.with_op_name("w"))
+            .unwrap();
+        let b = Variable::builder()
+            .const_initial_value(Tensor::<f32>::new(&[hidden_size]))
+            .build(&mut scope.with_op_name("b"))
+            .unwrap();
+        let layer1a = ops::MatMul::new()
+            .build(input.clone(), w.output.clone(), scope)
+            .unwrap();
+        let layer1b = ops::Add::new()
+            .build(layer1a, b.output.clone(), scope)
+            .unwrap();
+        let layer1 = ops::Tanh::new().build(layer1b, scope).unwrap();
+        let w2_shape = ops::constant(&[hidden_size as i64, 1][..], scope).unwrap();
+        let w2_init = ops::random_normal(w2_shape, scope).unwrap();
+        let w2 = Variable::builder()
+            .initial_value(w2_init)
+            .data_type(DataType::Float)
+            .shape(Shape::from(&[hidden_size, 1][..]))
+            .build(&mut scope.with_op_name("w2"))
+            .unwrap();
+        let b2 = Variable::builder()
+            .const_initial_value(Tensor::<f32>::new(&[1]))
+            .build(&mut scope.with_op_name("b2"))
+            .unwrap();
+        let layer2a = ops::mat_mul(layer1, w2.output.clone(), scope).unwrap();
+        let layer2b = ops::add(layer2a, b2.output.clone(), scope).unwrap();
+        let layer2 = layer2b;
+        let error = ops::subtract(layer2.clone(), label.clone(), scope).unwrap();
+        let error_squared = ops::multiply(error.clone(), error, scope).unwrap();
+        let sgd = GradientDescentOptimizer {
+            learning_rate: Output {
+                operation: ops::constant(0.1f32, scope).unwrap(),
+                index: 0,
+            },
+        };
+        let variables = vec![w.clone(), b.clone(), w2.clone(), b2.clone()];
+        let (minimizer_vars, minimize) = sgd
+            .minimize(
+                scope,
+                error_squared.clone().into(),
+                MinimizeOptions::default().with_variables(&variables),
+            )
+            .unwrap();
+        let options = SessionOptions::new();
+        let g = scope.graph_mut();
+        let session = Session::new(&options, &g).unwrap();
+
+        let mut run_args = SessionRunArgs::new();
+        for var in &variables {
+            run_args.add_target(&var.initializer);
+        }
+        for var in &minimizer_vars {
+            run_args.add_target(&var.initializer);
+        }
+        session.run(&mut run_args).unwrap();
+
+        let mut input_tensor = Tensor::<f32>::new(&[1, 2]);
+        let mut label_tensor = Tensor::<f32>::new(&[1]);
+        let mut train = |i| {
+            input_tensor[0] = (i & 1) as f32;
+            input_tensor[1] = ((i >> 1) & 1) as f32;
+            label_tensor[0] = ((i & 1) ^ ((i >> 1) & 1)) as f32;
+            let mut run_args = SessionRunArgs::new();
+            run_args.add_target(&minimize);
+            let error_squared_fetch = run_args.request_fetch(&error_squared, 0);
+            run_args.add_feed(&input, 0, &input_tensor);
+            run_args.add_feed(&label, 0, &label_tensor);
+            session.run(&mut run_args).unwrap();
+            run_args.fetch::<f32>(error_squared_fetch).unwrap()[0]
+        };
+        for i in 0..1000 {
+            train(i);
+        }
+        for i in 0..4 {
+            let error = train(i);
+            assert!(error < 0.01, "error = {}", error);
+        }
+    }
 }
diff --git a/test-all b/test-all
@@ -43,6 +43,7 @@ cargo test -vv -j 2 --features tensorflow_unstable
 cargo test -vv -j 2 --features experimental_training
 cargo test -vv -j 2 --features tensorflow_unstable,experimental_training
 cargo run --example regression
+cargo run --features=experimental_training --example xor
 cargo run --features tensorflow_unstable --example expressions
 cargo doc -vv --features tensorflow_unstable,experimental_training
 # TODO(#66): Re-enable: (cd tensorflow-sys && cargo test -vv -j 1)