diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 26803e88b..0a9c3e707 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -17,6 +17,7 @@ steps:
         Pkg.develop(path="src/ReinforcementLearningBase")
         Pkg.develop(path="src/ReinforcementLearningEnvironments")
         Pkg.develop(path="src/ReinforcementLearningCore")
+        Pkg.develop(path="src/ReinforcementLearningFarm")
 
         println("+++ :julia: Running tests")
         Pkg.test("ReinforcementLearningCore", coverage=true)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 016733c3f..0dcb7870a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -95,6 +95,7 @@ jobs:
             Pkg.develop(path="src/ReinforcementLearningBase")
             Pkg.develop(path="src/ReinforcementLearningCore")
             Pkg.develop(path="src/ReinforcementLearningEnvironments")
+            Pkg.develop(path="src/ReinforcementLearningFarm")
             Pkg.test("ReinforcementLearningCore", coverage=true)'
       - uses: julia-actions/julia-processcoverage@v1
         with:
diff --git a/src/ReinforcementLearningCore/Project.toml b/src/ReinforcementLearningCore/Project.toml
index 04a3b01ee..f5e916fbe 100644
--- a/src/ReinforcementLearningCore/Project.toml
+++ b/src/ReinforcementLearningCore/Project.toml
@@ -37,6 +37,7 @@ Metal = "1.0"
 ProgressMeter = "1"
 Reexport = "1"
 ReinforcementLearningBase = "0.12"
+ReinforcementLearningFarm = "0.0.1"
 ReinforcementLearningTrajectories = "0.3.7"
 Statistics = "1"
 StatsBase = "0.32, 0.33, 0.34"
@@ -52,9 +53,10 @@ Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
 Preferences = "21216c6a-2e73-6563-6e65-726566657250"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 ReinforcementLearningEnvironments = "25e41dd2-4622-11e9-1641-f1adca772921"
+ReinforcementLearningFarm = "14eff660-7080-4cec-bba2-cfb12cd77ac3"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
 
 [targets]
-test = ["CommonRLInterface", "CUDA", "cuDNN", "DomainSets", "Metal", "Preferences", "ReinforcementLearningEnvironments", "Test", "UUIDs"]
+test = ["CommonRLInterface", "CUDA", "cuDNN", "DomainSets", "Metal", "Preferences", "ReinforcementLearningEnvironments", "ReinforcementLearningFarm", "Test", "UUIDs"]
diff --git a/src/ReinforcementLearningCore/src/policies/agent/agent_base.jl b/src/ReinforcementLearningCore/src/policies/agent/agent_base.jl
index adbc0bb84..5769a9fa7 100644
--- a/src/ReinforcementLearningCore/src/policies/agent/agent_base.jl
+++ b/src/ReinforcementLearningCore/src/policies/agent/agent_base.jl
@@ -37,7 +37,7 @@ RLBase.optimise!(::SyncTrajectoryStyle, agent::AbstractAgent, stage::S) where {S
 # already spawn a task to optimise inner policy when initializing the agent
 RLBase.optimise!(::AsyncTrajectoryStyle, agent::AbstractAgent, stage::S) where {S<:AbstractStage} = nothing
 
-#by default, optimise does nothing at all stage
+#by default, optimise does nothing at all stages
 function RLBase.optimise!(policy::AbstractPolicy, stage::AbstractStage, trajectory::Trajectory) end
 
 Flux.@layer Agent trainable=(policy,)
diff --git a/src/ReinforcementLearningCore/src/policies/explorers/epsilon_greedy_explorer.jl b/src/ReinforcementLearningCore/src/policies/explorers/epsilon_greedy_explorer.jl
index 22c387487..7d8925e87 100644
--- a/src/ReinforcementLearningCore/src/policies/explorers/epsilon_greedy_explorer.jl
+++ b/src/ReinforcementLearningCore/src/policies/explorers/epsilon_greedy_explorer.jl
@@ -99,13 +99,13 @@ get_ϵ(s::EpsilonGreedyExplorer) = get_ϵ(s, s.step)
     `NaN` will be filtered unless all the values are `NaN`.
     In that case, a random one will be returned.
 """
-function RLBase.plan!(s::EpsilonGreedyExplorer{<:Any,true}, values::Vector{I}) where {I<:Real}
+function RLBase.plan!(s::EpsilonGreedyExplorer{<:Any,true}, values::A) where {I<:Real, A<:AbstractArray{I}}
     ϵ = get_ϵ(s)
     s.step += 1
     rand(s.rng) >= ϵ ? rand(s.rng, find_all_max(values)[2]) : rand(s.rng, 1:length(values))
 end
 
-function RLBase.plan!(s::EpsilonGreedyExplorer{<:Any,false}, values::Vector{I}) where {I<:Real}
+function RLBase.plan!(s::EpsilonGreedyExplorer{<:Any,false}, values::A) where {I<:Real, A<:AbstractArray{I}}
     ϵ = get_ϵ(s)
     s.step += 1
     rand(s.rng) >= ϵ ? findmax(values)[2] : rand(s.rng, 1:length(values))
@@ -113,17 +113,18 @@ end
 
 #####
 
-RLBase.plan!(s::EpsilonGreedyExplorer{<:Any,true}, x, mask::Trues) = RLBase.plan!(s, x)
+RLBase.plan!(s::EpsilonGreedyExplorer{<:Any,true}, x::A, mask::Trues) where {I<:Real, A<:AbstractArray{I}} = RLBase.plan!(s, x)
 
-function RLBase.plan!(s::EpsilonGreedyExplorer{<:Any,true}, values::Vector{I}, mask::M) where {I<:Real, M<:Union{BitVector, Vector{Bool}}}
+function RLBase.plan!(s::EpsilonGreedyExplorer{<:Any,true}, values::A, mask::M) where {I<:Real, A<:AbstractArray{I}, M<:Union{BitVector, Vector{Bool}}}
     ϵ = get_ϵ(s)
     s.step += 1
     rand(s.rng) >= ϵ ? rand(s.rng, find_all_max(values, mask)[2]) :
     rand(s.rng, findall(mask))
 end
 
-RLBase.plan!(s::EpsilonGreedyExplorer{<:Any,false}, x::Vector{I}, mask::Trues) where{I<:Real} = RLBase.plan!(s, x)
-function RLBase.plan!(s::EpsilonGreedyExplorer{<:Any,false}, values::Vector{I}, mask::M) where {I<:Real, M<:Union{BitVector, Vector{Bool}}}
+RLBase.plan!(s::EpsilonGreedyExplorer{<:Any,false}, x::A, mask::Trues) where{I<:Real, A<:AbstractArray{I}} = RLBase.plan!(s, x)
+
+function RLBase.plan!(s::EpsilonGreedyExplorer{<:Any,false}, values::A, mask::M) where {I<:Real, A<:AbstractArray{I}, M<:Union{BitVector, Vector{Bool}}}
     ϵ = get_ϵ(s)
     s.step += 1
     rand(s.rng) >= ϵ ? findmax_masked(values, mask)[2] : rand(s.rng, findall(mask))
@@ -137,7 +138,7 @@ end
 
 Return the probability of selecting each action given the estimated `values` of each action.
 """
-function RLBase.prob(s::EpsilonGreedyExplorer{<:Any,true}, values)
+function RLBase.prob(s::EpsilonGreedyExplorer{<:Any,true}, values::A) where {I<:Real, A<:AbstractArray{I}}
     ϵ, n = get_ϵ(s), length(values)
     probs = fill(ϵ / n, n)
     max_val_inds = find_all_max(values)[2]
diff --git a/src/ReinforcementLearningCore/src/policies/learners/approximator.jl b/src/ReinforcementLearningCore/src/policies/learners/approximator.jl
deleted file mode 100644
index 43fb9b955..000000000
--- a/src/ReinforcementLearningCore/src/policies/learners/approximator.jl
+++ /dev/null
@@ -1,45 +0,0 @@
-using Flux
-
-"""
-    Approximator(model, optimiser)
-
-Wraps a Flux trainable model and implements the `RLBase.optimise!(::Approximator, ::Gradient)` 
-interface. See the RLCore documentation for more information on proper usage.
-"""
-struct Approximator{M,O} <: AbstractLearner
-    model::M
-    optimiser_state::O
-end
-
-
-"""
-    Approximator(; model, optimiser, usegpu=false)
-
-Constructs an `Approximator` object for reinforcement learning.
-
-# Arguments
-- `model`: The model used for approximation.
-- `optimiser`: The optimizer used for updating the model.
-- `usegpu`: A boolean indicating whether to use GPU for computation. Default is `false`.
-
-# Returns
-An `Approximator` object.
-"""
-function Approximator(; model, optimiser::Flux.Optimise.AbstractOptimiser, use_gpu=false)
-    optimiser_state = Flux.setup(optimiser, model)
-    if use_gpu  # Pass model to GPU (if available) upon creation
-        return Approximator(gpu(model), gpu(optimiser_state))
-    else
-        return Approximator(model, optimiser_state)
-    end
-end
-
-Approximator(model, optimiser::Flux.Optimise.AbstractOptimiser; use_gpu=false) = Approximator(model=model, optimiser=optimiser, use_gpu=use_gpu)
-
-Flux.@layer Approximator trainable=(model,)
-
-forward(A::Approximator, args...; kwargs...) = A.model(args...; kwargs...)
-forward(A::Approximator, env::E) where {E <: AbstractEnv} = env |> state |> (x -> forward(A, x))
-
-RLBase.optimise!(A::Approximator, grad::NamedTuple) =
-    Flux.Optimise.update!(A.optimiser_state, A.model, grad.model)
diff --git a/src/ReinforcementLearningCore/src/policies/learners/flux_approximator.jl b/src/ReinforcementLearningCore/src/policies/learners/flux_approximator.jl
new file mode 100644
index 000000000..02bc20087
--- /dev/null
+++ b/src/ReinforcementLearningCore/src/policies/learners/flux_approximator.jl
@@ -0,0 +1,47 @@
+export FluxApproximator
+
+using Flux
+
+"""
+    FluxApproximator(model, optimiser)
+
+Wraps a Flux trainable model and implements the `RLBase.optimise!(::FluxApproximator, ::Gradient)` 
+interface. See the RLCore documentation for more information on proper usage.
+"""
+struct FluxApproximator{M,O} <: AbstractLearner
+    model::M
+    optimiser_state::O
+end
+
+
+"""
+    FluxApproximator(; model, optimiser, usegpu=false)
+
+Constructs an `FluxApproximator` object for reinforcement learning.
+
+# Arguments
+- `model`: The model used for approximation.
+- `optimiser`: The optimizer used for updating the model.
+- `usegpu`: A boolean indicating whether to use GPU for computation. Default is `false`.
+
+# Returns
+An `FluxApproximator` object.
+"""
+function FluxApproximator(; model, optimiser, use_gpu=false)
+    optimiser_state = Flux.setup(optimiser, model)
+    if use_gpu  # Pass model to GPU (if available) upon creation
+        return FluxApproximator(gpu(model), gpu(optimiser_state))
+    else
+        return FluxApproximator(model, optimiser_state)
+    end
+end
+
+FluxApproximator(model, optimiser::Flux.Optimise.AbstractOptimiser; use_gpu=false) = FluxApproximator(model=model, optimiser=optimiser, use_gpu=use_gpu)
+
+Flux.@layer FluxApproximator trainable=(model,)
+
+forward(A::FluxApproximator, args...; kwargs...) = A.model(args...; kwargs...)
+forward(A::FluxApproximator, env::E) where {E <: AbstractEnv} = env |> state |> (x -> forward(A, x))
+
+RLBase.optimise!(A::FluxApproximator, grad::NamedTuple) =
+    Flux.Optimise.update!(A.optimiser_state, A.model, grad.model)
diff --git a/src/ReinforcementLearningCore/src/policies/learners/learners.jl b/src/ReinforcementLearningCore/src/policies/learners/learners.jl
index 28fafdd9f..0290ea7bd 100644
--- a/src/ReinforcementLearningCore/src/policies/learners/learners.jl
+++ b/src/ReinforcementLearningCore/src/policies/learners/learners.jl
@@ -1,4 +1,5 @@
 include("abstract_learner.jl")
-include("approximator.jl")
+include("flux_approximator.jl")
 include("tabular_approximator.jl")
+include("td_learner.jl")
 include("target_network.jl")
diff --git a/src/ReinforcementLearningCore/src/policies/learners/tabular_approximator.jl b/src/ReinforcementLearningCore/src/policies/learners/tabular_approximator.jl
index ef76d2a7e..546660279 100644
--- a/src/ReinforcementLearningCore/src/policies/learners/tabular_approximator.jl
+++ b/src/ReinforcementLearningCore/src/policies/learners/tabular_approximator.jl
@@ -1,11 +1,14 @@
 export TabularApproximator, TabularVApproximator, TabularQApproximator
 
-const TabularApproximator = Approximator{A,O} where {A<:AbstractArray,O}
-const TabularQApproximator = Approximator{A,O} where {A<:AbstractArray,O}
-const TabularVApproximator = Approximator{A,O} where {A<:AbstractVector,O}
+struct TabularApproximator{A} <: AbstractLearner where {A<:AbstractArray}
+    model::A
+end
+
+const TabularQApproximator = TabularApproximator{A} where {A<:AbstractMatrix}
+const TabularVApproximator = TabularApproximator{A} where {A<:AbstractVector}
 
 """
-    TabularApproximator(table<:AbstractArray, opt)
+    TabularApproximator(table<:AbstractArray)
 
 For `table` of 1-d, it will serve as a state value approximator.
 For `table` of 2-d, it will serve as a state-action value approximator.
@@ -13,34 +16,34 @@ For `table` of 2-d, it will serve as a state-action value approximator.
 !!! warning
     For `table` of 2-d, the first dimension is action and the second dimension is state.
 """
-function TabularApproximator(table::A, opt::O) where {A<:AbstractArray,O}
+function TabularApproximator(table::A) where {A<:AbstractArray}
     n = ndims(table)
     n <= 2 || throw(ArgumentError("the dimension of table must be <= 2"))
-    TabularApproximator{A,O}(table, opt)
+    TabularApproximator{A}(table)
 end
 
-TabularVApproximator(; n_state, init = 0.0, opt = InvDecay(1.0)) =
-    TabularApproximator(fill(init, n_state), opt)
+TabularVApproximator(; n_state, init = 0.0) =
+    TabularApproximator(fill(init, n_state))
 
-TabularQApproximator(; n_state, n_action, init = 0.0, opt = InvDecay(1.0)) =
-    TabularApproximator(fill(init, n_action, n_state), opt)
+TabularQApproximator(; n_state, n_action, init = 0.0) =
+    TabularApproximator(fill(init, n_action, n_state))
 
 # Take Learner and Environment, get state, send to RLCore.forward(Learner, State)
 forward(L::TabularVApproximator, env::E) where {E <: AbstractEnv} = env |> state |> (x -> forward(L, x))
 forward(L::TabularQApproximator, env::E) where {E <: AbstractEnv} = env |> state |> (x -> forward(L, x))
 
 RLCore.forward(
-    app::TabularVApproximator{R,O},
+    app::TabularVApproximator{R},
     s::I,
-) where {R<:AbstractVector,O,I} = @views app.model[s]
+) where {R<:AbstractVector,I} = @views app.model[s]
 
 RLCore.forward(
-    app::TabularQApproximator{R,O},
+    app::TabularQApproximator{R},
     s::I,
-) where {R<:AbstractArray,O,I} = @views app.model[:, s]
+) where {R<:AbstractArray,I} = @views app.model[:, s]
 
 RLCore.forward(
-    app::TabularQApproximator{R,O},
+    app::TabularQApproximator{R},
     s::I1,
     a::I2,
-) where {R<:AbstractArray,O,I1,I2} = @views app.model[a, s]
+) where {R<:AbstractArray,I1,I2} = @views app.model[a, s]
diff --git a/src/ReinforcementLearningCore/src/policies/learners/target_network.jl b/src/ReinforcementLearningCore/src/policies/learners/target_network.jl
index 74003644c..7a3b8490a 100644
--- a/src/ReinforcementLearningCore/src/policies/learners/target_network.jl
+++ b/src/ReinforcementLearningCore/src/policies/learners/target_network.jl
@@ -1,14 +1,14 @@
-export Approximator, TargetNetwork, target, model
+export TargetNetwork, target, model
 
 using Flux
 
-target(ap::Approximator) = ap.model #see TargetNetwork
-model(ap::Approximator) = ap.model #see TargetNetwork
+target(ap::FluxApproximator) = ap.model #see TargetNetwork
+model(ap::FluxApproximator) = ap.model #see TargetNetwork
 
 """
-    TargetNetwork(network::Approximator; sync_freq::Int = 1, ρ::Float32 = 0f0)
+    TargetNetwork(network::FluxApproximator; sync_freq::Int = 1, ρ::Float32 = 0f0)
 
-Wraps an Approximator to hold a target network that is updated towards the model of the 
+Wraps an FluxApproximator to hold a target network that is updated towards the model of the 
 approximator. 
 - `sync_freq` is the number of updates of `network` between each update of the `target`. 
 - ρ (\rho) is "how much of the target is kept when updating it". 
@@ -21,11 +21,11 @@ Implements the `RLBase.optimise!(::TargetNetwork, ::Gradient)` interface to upda
 and the target with weights replacement or Polyak averaging.
 
 Note to developers: `model(::TargetNetwork)` will return the trainable Flux model 
-and `target(::TargetNetwork)` returns the target model and `target(::Approximator)`
+and `target(::TargetNetwork)` returns the target model and `target(::FluxApproximator)`
 returns the non-trainable Flux model. See the RLCore documentation.
 """
 mutable struct TargetNetwork{M}
-    network::Approximator{M}
+    network::FluxApproximator{M}
     target::M
     sync_freq::Int
     ρ::Float32
@@ -46,13 +46,13 @@ Constructs a target network for reinforcement learning.
 # Returns
 A `TargetNetwork` object.
 """
-function TargetNetwork(network::Approximator; sync_freq = 1, ρ = 0f0, use_gpu = false)
+function TargetNetwork(network::FluxApproximator; sync_freq = 1, ρ = 0f0, use_gpu = false)
     @assert 0 <= ρ <= 1 "ρ must in [0,1]"
     ρ = Float32(ρ)
     
     if use_gpu
-        @assert typeof(gpu(network.model)) == typeof(network.model) "`Approximator` model is not on GPU. Please set `use_gpu=false`` or ensure model is on GPU, by setting `use_gpu=true` when constructing `Approximator`."
-        # NOTE: model is pushed to gpu in Approximator, need to transfer to cpu before deepcopy, then push target model to gpu
+        @assert typeof(gpu(network.model)) == typeof(network.model) "`FluxApproximator` model is not on GPU. Please set `use_gpu=false`` or ensure model is on GPU, by setting `use_gpu=true` when constructing `FluxApproximator`."
+        # NOTE: model is pushed to gpu in FluxApproximator, need to transfer to cpu before deepcopy, then push target model to gpu
         target = gpu(deepcopy(cpu(network.model)))
     else
         target = deepcopy(network.model)
diff --git a/src/ReinforcementLearningCore/src/policies/learners/td_learner.jl b/src/ReinforcementLearningCore/src/policies/learners/td_learner.jl
new file mode 100644
index 000000000..15cbaf3b9
--- /dev/null
+++ b/src/ReinforcementLearningCore/src/policies/learners/td_learner.jl
@@ -0,0 +1,93 @@
+export TDLearner
+
+using LinearAlgebra: dot
+using Distributions: pdf
+import Base.push!
+
+using ReinforcementLearningCore: AbstractLearner, TabularApproximator
+using Flux
+
+"""
+    TDLearner(;approximator, method, γ=1.0, α=0.01, n=0)
+
+Use temporal-difference method to estimate state value or state-action value.
+
+# Fields
+- `approximator` is `<:TabularApproximator`.
+- `γ=1.0`, discount rate.
+- `method`: only `:SARS` (Q-learning) is supported for the time being.
+- `n=0`: the number of time steps used minus 1.
+"""
+Base.@kwdef mutable struct TDLearner{M,A} <: AbstractLearner where {A<:TabularApproximator,M<:Symbol}
+    approximator::A
+    γ::Float64 = 1.0 # discount factor
+    α::Float64 = 0.01 # learning rate
+    n::Int = 0
+
+    function TDLearner(approximator::A, method::Symbol; γ=1.0, α=0.01, n=0) where {A<:TabularApproximator}
+        if method ∉ [:SARS]
+            @error "Method $method is not supported"
+        else
+            new{method, A}(approximator, γ, α, n)
+        end
+    end
+end
+
+RLCore.forward(L::TDLearner, s::Int) = RLCore.forward(L.approximator, s)
+RLCore.forward(L::TDLearner, s::Int, a::Int) = RLCore.forward(L.approximator, s, a)
+
+Q(app::TabularApproximator, s::Int, a::Int) = RLCore.forward(app, s, a)
+Q(app::TabularApproximator, s::Int) = RLCore.forward(app, s)
+
+"""
+    bellman_update!(app::TabularApproximator, s::Int, s_plus_one::Int, a::Int, α::Float64, π_::Float64, γ::Float64)
+
+Update the Q-value of the given state-action pair.
+"""
+function bellman_update!(
+    approx::TabularApproximator,
+    state::I1,
+    next_state::I2,
+    action::I3,
+    reward::F1,
+    γ::Float64, # discount factor
+    α::Float64, # learning rate
+) where {I1<:Integer,I2<:Integer,I3<:Integer,F1<:AbstractFloat}
+    # Q-learning formula following https://github.com/JuliaPOMDP/TabularTDLearning.jl/blob/25c4d3888e178c51ed1ff448f36b0fcaf7c1d8e8/src/q_learn.jl#LL63C26-L63C95
+    # Terminology following https://en.wikipedia.org/wiki/Q-learning
+    estimate_optimal_future_value = maximum(Q(approx, next_state))
+    current_value = Q(approx, state, action)
+    raw_q_value = (reward + γ * estimate_optimal_future_value - current_value) # Discount factor γ is applied here
+    approx.model[action, state] += α * raw_q_value
+    return Q(approx, state, action)
+end
+
+function _optimise!(
+    n::I1,
+    γ::F, # discount factor
+    α::F, # learning rate
+    approx::TabularApproximator{Ar},
+    state::I2,
+    next_state::I2,
+    action::I3,
+    reward::F,
+) where {I1<:Number,I2<:Number,I3<:Number,Ar<:AbstractArray,F<:AbstractFloat}
+    bellman_update!(approx, state, next_state, action, reward, γ, α)
+end
+
+function RLBase.optimise!(
+    L::TDLearner,
+    t::@NamedTuple{state::I1, next_state::I1, action::I2, reward::F2, terminal::Bool},
+) where {I1<:Number,I2<:Number,F2<:AbstractFloat}
+    _optimise!(L.n, L.γ, L.α, L.approximator, t.state, t.next_state, t.action, t.reward)
+end
+
+function RLBase.optimise!(learner::TDLearner, stage::AbstractStage, trajectory::Trajectory)
+    for batch in trajectory.container
+        optimise!(learner, stage, batch)
+    end
+end
+
+# TDLearner{:SARS} is optimized at the PostActStage
+RLBase.optimise!(learner::TDLearner{:SARS}, stage::PostActStage, trace::NamedTuple) = RLBase.optimise!(learner, trace)
+
diff --git a/src/ReinforcementLearningCore/src/policies/q_based_policy.jl b/src/ReinforcementLearningCore/src/policies/q_based_policy.jl
index b712c3144..53774e23c 100644
--- a/src/ReinforcementLearningCore/src/policies/q_based_policy.jl
+++ b/src/ReinforcementLearningCore/src/policies/q_based_policy.jl
@@ -10,7 +10,7 @@ action of an environment at its current state. It is typically a table or a neur
 QBasedPolicy can be queried for an action with `RLBase.plan!`, the explorer will affect the action selection
 accordingly.
 """
-Base.@kwdef mutable struct QBasedPolicy{L,E} <: AbstractPolicy
+Base.@kwdef mutable struct QBasedPolicy{L<:TDLearner,E<:AbstractExplorer} <: AbstractPolicy
     "estimate the Q value"
     learner::L
     "select the action based on Q values calculated by the learner"
@@ -19,16 +19,16 @@ end
 
 Flux.@layer QBasedPolicy trainable=(learner,)
 
-function RLBase.plan!(p::QBasedPolicy{L,Ex}, env::E) where {Ex<:AbstractExplorer,L<:AbstractLearner,E<:AbstractEnv}
-    RLBase.plan!(p.explorer, p.learner, env)
+function RLBase.plan!(policy::QBasedPolicy{L,Ex}, env::E) where {Ex<:AbstractExplorer,L<:TDLearner,E<:AbstractEnv}
+    RLBase.plan!(policy.explorer, policy.learner, env)
 end
 
-function RLBase.plan!(p::QBasedPolicy{L,Ex}, env::E, player::Symbol) where {Ex<:AbstractExplorer,L<:AbstractLearner,E<:AbstractEnv}
-    RLBase.plan!(p.explorer, p.learner, env, player)
+function RLBase.plan!(policy::QBasedPolicy{L,Ex}, env::E, player::Symbol) where {Ex<:AbstractExplorer,L<:TDLearner,E<:AbstractEnv}
+    RLBase.plan!(policy.explorer, policy.learner, env, player)
 end
 
-RLBase.prob(p::QBasedPolicy{L,Ex}, env::AbstractEnv) where {L<:AbstractLearner,Ex<:AbstractExplorer} =
-    prob(p.explorer, forward(p.learner, env), legal_action_space_mask(env))
+RLBase.prob(policy::QBasedPolicy{L,Ex}, env::AbstractEnv) where {L<:TDLearner,Ex<:AbstractExplorer} =
+    prob(policy.explorer, forward(policy.learner, env), legal_action_space_mask(env))
 
 #the internal learner defines the optimization stage.
-RLBase.optimise!(p::QBasedPolicy, s::AbstractStage, trajectory::Trajectory) = RLBase.optimise!(p.learner, s, trajectory)
+RLBase.optimise!(policy::QBasedPolicy, stage::AbstractStage, trajectory::Trajectory) = RLBase.optimise!(policy.learner, stage, trajectory)
diff --git a/src/ReinforcementLearningCore/test/policies/learners/abstract_learner.jl b/src/ReinforcementLearningCore/test/policies/learners/abstract_learner.jl
index 52700d3cd..fbf9ec09b 100644
--- a/src/ReinforcementLearningCore/test/policies/learners/abstract_learner.jl
+++ b/src/ReinforcementLearningCore/test/policies/learners/abstract_learner.jl
@@ -1,32 +1,30 @@
 using Test
 using Flux
+using ReinforcementLearningCore, ReinforcementLearningBase
+
+# Mock explorer, environment, and learner
+struct MockExplorer <: AbstractExplorer end
+struct MockEnv <: AbstractEnv end
+struct MockLearner <: AbstractLearner end
 
 @testset "AbstractLearner Tests" begin
     @testset "Forward" begin
-        # Mock environment and learner
-        struct MockEnv <: AbstractEnv end
-        struct MockLearner <: AbstractLearner end
 
-        function RLCore.forward(::MockLearner, ::AbstractState)
-            return rand(2)
+        function RLCore.forward(::MockLearner, state::Int)
+            return [1.0, 2.0]
         end
 
+        RLBase.state(::MockEnv, ::Observation{Any}, ::DefaultPlayer) = 1
+
         env = MockEnv()
         learner = MockLearner()
 
-        output = forward(learner, env)
-
-        @test typeof(output) == Array{Float64,1}
-        @test length(output) == 2
+        output = RLCore.forward(learner, env)
+        @test output == Float64[1.0, 2.0]
     end
 
     @testset "Plan" begin
-        # Mock explorer, environment, and learner
-        struct MockExplorer <: AbstractExplorer end
-        struct MockEnv <: AbstractEnv end
-        struct MockLearner <: AbstractLearner end
-
-        function RLBase.plan!(::MockExplorer, ::AbstractState, ::AbstractActionSpace)
+        function RLBase.plan!(::MockExplorer, learner::MockLearner, env::MockEnv)
             return rand(2)
         end
 
@@ -42,11 +40,11 @@ using Flux
 
     @testset "Plan with Player" begin
         # Mock explorer, environment, and learner
-        struct MockExplorer <: AbstractExplorer end
-        struct MockEnv <: AbstractEnv end
-        struct MockLearner <: AbstractLearner end
+        function RLBase.action_space(::MockEnv, ::Symbol)
+            return [1, 2]
+        end
 
-        function RLBase.plan!(::MockExplorer, ::AbstractState, ::AbstractActionSpace)
+        function RLBase.plan!(::MockExplorer, learner::MockLearner, env::MockEnv, p::Symbol)
             return rand(2)
         end
 
@@ -62,12 +60,11 @@ using Flux
     end
 
     @testset "optimise!" begin
-        struct MockLearner <: AbstractLearner end
         tr = Trajectory(
                     CircularArraySARTSTraces(; capacity = 1_000),
                     BatchSampler(1),
                     InsertSampleRatioController(n_inserted = -1),
                 )
-        @test optimise!(MockLearner(), PreActStage(), tr) is nothing
+        @test optimise!(MockLearner(), PreActStage(), tr) == nothing
     end
 end
diff --git a/src/ReinforcementLearningCore/test/policies/learners/approximator.jl b/src/ReinforcementLearningCore/test/policies/learners/flux_approximator.jl
similarity index 73%
rename from src/ReinforcementLearningCore/test/policies/learners/approximator.jl
rename to src/ReinforcementLearningCore/test/policies/learners/flux_approximator.jl
index 53bff4c60..b5fd19749 100644
--- a/src/ReinforcementLearningCore/test/policies/learners/approximator.jl
+++ b/src/ReinforcementLearningCore/test/policies/learners/flux_approximator.jl
@@ -1,13 +1,13 @@
 using Test
 using Flux
 
-@testset "Approximator Tests" begin
+@testset "FluxApproximator Tests" begin
     @testset "Creation, with use_gpu = true toggle" begin
         model = Chain(Dense(10, 5, relu), Dense(5, 2))
         optimiser = Adam()
-        approximator = Approximator(model=model, optimiser=optimiser, use_gpu=true)
+        approximator = FluxApproximator(model=model, optimiser=optimiser, use_gpu=true)
 
-        @test approximator isa Approximator
+        @test approximator isa FluxApproximator
         @test typeof(approximator.model) == typeof(gpu(model))
         @test approximator.optimiser_state isa NamedTuple
     end
@@ -15,7 +15,7 @@ using Flux
     @testset "Forward" begin
         model = Chain(Dense(10, 5, relu), Dense(5, 2))
         optimiser = Adam()
-        approximator = Approximator(model=model, optimiser=optimiser, use_gpu=false)
+        approximator = FluxApproximator(model=model, optimiser=optimiser, use_gpu=false)
 
         input = rand(Float32, 10)
         output = RLCore.forward(approximator, input)
@@ -27,9 +27,9 @@ using Flux
     @testset "Forward to environment" begin
         model = Chain(Dense(4, 5, relu), Dense(5, 2))
         optimiser = Adam()
-        approximator = Approximator(model=model, optimiser=optimiser, use_gpu=false)
+        approximator = FluxApproximator(model=model, optimiser=optimiser, use_gpu=false)
 
-        env = CartPoleEnv()
+        env = CartPoleEnv(T=Float32)
         output = RLCore.forward(approximator, env)
         @test typeof(output) == Array{Float32,1}
         @test length(output) == 2
@@ -38,7 +38,7 @@ using Flux
     @testset "Optimise" begin
         model = Chain(Dense(10, 5, relu), Dense(5, 2))
         optimiser = Adam()
-        approximator = Approximator(model=model, optimiser=optimiser)
+        approximator = FluxApproximator(model=model, optimiser=optimiser)
 
         input = rand(Float32, 10)
         
diff --git a/src/ReinforcementLearningCore/test/policies/learners/learners.jl b/src/ReinforcementLearningCore/test/policies/learners/learners.jl
index 94b6e0487..07deb8f48 100644
--- a/src/ReinforcementLearningCore/test/policies/learners/learners.jl
+++ b/src/ReinforcementLearningCore/test/policies/learners/learners.jl
@@ -1,5 +1,7 @@
 @testset "approximators.jl" begin
-    include("approximator.jl")
+    include("abstract_learner.jl")
+    include("flux_approximator.jl")
     include("tabular_approximator.jl")
     include("target_network.jl")
+    include("td_learner.jl")
 end
diff --git a/src/ReinforcementLearningCore/test/policies/learners/tabular_approximator.jl b/src/ReinforcementLearningCore/test/policies/learners/tabular_approximator.jl
index 25c3b2e86..2876e99c1 100644
--- a/src/ReinforcementLearningCore/test/policies/learners/tabular_approximator.jl
+++ b/src/ReinforcementLearningCore/test/policies/learners/tabular_approximator.jl
@@ -4,11 +4,11 @@ using ReinforcementLearningCore
 using Flux
 
 @testset "Constructors" begin
-    @test TabularApproximator(fill(1, 10, 10), fill(1, 10)) isa TabularApproximator
+    @test TabularApproximator(fill(1, 10, 10)) isa TabularApproximator
     @test TabularVApproximator(n_state = 10) isa
-          TabularApproximator{Vector{Float64},InvDecay}
+          TabularApproximator{Vector{Float64}}
     @test TabularQApproximator(n_state = 10, n_action = 10) isa
-          TabularApproximator{Matrix{Float64},InvDecay}
+          TabularApproximator{Matrix{Float64}}
 end
 
 @testset "RLCore.forward" begin
diff --git a/src/ReinforcementLearningCore/test/policies/learners/target_network.jl b/src/ReinforcementLearningCore/test/policies/learners/target_network.jl
index 87c72c30b..e9182ddaa 100644
--- a/src/ReinforcementLearningCore/test/policies/learners/target_network.jl
+++ b/src/ReinforcementLearningCore/test/policies/learners/target_network.jl
@@ -1,17 +1,18 @@
 using Test
 using Flux
 using ReinforcementLearningCore
+
 @testset "TargetNetwork Tests" begin
     @testset "Creation" begin
         model = Chain(Dense(10, 5, relu), Dense(5, 2))
         optimiser = Adam()
         if ((@isdefined CUDA) && CUDA.functional()) || ((@isdefined Metal) && Metal.functional())
-            @test_throws "AssertionError: `Approximator` model is not on GPU." TargetNetwork(Approximator(model, optimiser), use_gpu=true)
+            @test_throws "AssertionError: `FluxApproximator` model is not on GPU." TargetNetwork(FluxApproximator(model, optimiser), use_gpu=true)
         end
-        @test TargetNetwork(Approximator(model=model, optimiser=optimiser, use_gpu=true), use_gpu=true) isa TargetNetwork
-        @test TargetNetwork(Approximator(model, optimiser, use_gpu=true), use_gpu=true) isa TargetNetwork
+        @test TargetNetwork(FluxApproximator(model=model, optimiser=optimiser, use_gpu=true), use_gpu=true) isa TargetNetwork
+        @test TargetNetwork(FluxApproximator(model, optimiser, use_gpu=true), use_gpu=true) isa TargetNetwork
 
-        approx = Approximator(model, optimiser, use_gpu=false)
+        approx = FluxApproximator(model, optimiser, use_gpu=false)
         target_network = TargetNetwork(approx, use_gpu=false)
 
         
@@ -25,7 +26,7 @@ using ReinforcementLearningCore
 
     @testset "Forward" begin
         model = Chain(Dense(10, 5, relu), Dense(5, 2))
-        target_network = TargetNetwork(Approximator(model, Adam()))
+        target_network = TargetNetwork(FluxApproximator(model, Adam()))
     
         input = rand(Float32, 10)
         output = RLCore.forward(target_network, input)
@@ -37,7 +38,7 @@ using ReinforcementLearningCore
     @testset "Optimise" begin
         optimiser = Adam()
         model = Chain(Dense(10, 5, relu), Dense(5, 2))
-        approximator = Approximator(model, optimiser)
+        approximator = FluxApproximator(model, optimiser)
         target_network = TargetNetwork(approximator)
         input = rand(Float32, 10)    
         grad = Flux.Zygote.gradient(target_network) do model
@@ -53,7 +54,7 @@ using ReinforcementLearningCore
 
     @testset "Sync" begin
         optimiser = Adam()
-        model = Approximator(Chain(Dense(10, 5, relu), Dense(5, 2)), optimiser)
+        model = FluxApproximator(Chain(Dense(10, 5, relu), Dense(5, 2)), optimiser)
         target_network = TargetNetwork(model, sync_freq=2, ρ=0.5)
     
         input = rand(Float32, 10)
@@ -72,7 +73,7 @@ end
 
 @testset "TargetNetwork" begin 
     m = Chain(Dense(4,1))
-    app = Approximator(model = m, optimiser = Flux.Adam(), use_gpu=true)
+    app = FluxApproximator(model = m, optimiser = Flux.Adam(), use_gpu=true)
     tn = TargetNetwork(app, sync_freq = 3, use_gpu=true)
     @test typeof(model(tn)) == typeof(target(tn))
     p1 = Flux.destructure(model(tn))[1]
diff --git a/src/ReinforcementLearningCore/test/policies/learners/td_learner.jl b/src/ReinforcementLearningCore/test/policies/learners/td_learner.jl
new file mode 100644
index 000000000..0379ad184
--- /dev/null
+++ b/src/ReinforcementLearningCore/test/policies/learners/td_learner.jl
@@ -0,0 +1,56 @@
+using Test
+using Flux
+
+@testset "Test TDLearner creation" begin
+    approximator = TabularVApproximator(n_state=5)
+    @test TDLearner(approximator, :SARS, γ=0.95, n=0) isa TDLearner
+
+    approximator = TabularQApproximator(n_state=5, n_action=3)
+    @test TDLearner(approximator, :SARS, γ=0.95, n=0) isa TDLearner
+end
+
+# Test TDLearner struct
+@testset "TDLearner struct" begin
+    approximator = TabularQApproximator(n_state=5, n_action=3)
+    learner = TDLearner(approximator, :SARS)
+    @test learner.approximator === approximator
+    @test learner.γ == 1.0
+    @test learner.n == 0
+end
+
+# Test bellman_update! function
+@testset "bellman_update! function" begin
+    learner = TDLearner(TabularQApproximator(n_state=5, n_action=3), :SARS)
+    approximator = learner.approximator
+    s = 1
+    s_plus_one = 2
+    a = 3
+    α = learner.α
+    π_ = 5.0
+    γ = 0.9
+    approximator.model[2, s_plus_one] = 15
+    approximator.model[a, s] = 2
+
+    # Following https://en.wikipedia.org/wiki/Q-learning#Algorithm
+    q_should_be = (1-α) * RLCore.Q(approximator, s, a) + α * (π_ + γ * maximum(RLCore.Q(approximator, s_plus_one)))
+
+    @test RLCore.bellman_update!(approximator, s, s_plus_one, a, π_, γ, α) ≈ q_should_be
+    @test RLCore.Q(approximator, s, a) ≈ q_should_be
+end
+
+# Test optimise! function
+@testset "optimise! function" begin
+    learner = TDLearner(TabularQApproximator(n_state=5, n_action=3), :SARS)
+    approximator = learner.approximator
+
+    t = (state=1, next_state=2, action=3, reward=5.0, terminal=false)
+    optimise!(learner, t)
+    @test learner.approximator.model[t.action, t.state] ≈ 0.05
+    optimise!(learner, t)
+    @test learner.approximator.model[t.action, t.state] ≈ 0.0995
+
+    for i in 1:1000
+        optimise!(learner, t)
+    end
+    @test approximator.model[t.action, t.state] ≈ t.reward atol=0.01
+end
diff --git a/src/ReinforcementLearningCore/test/policies/policies.jl b/src/ReinforcementLearningCore/test/policies/policies.jl
index 27d12ca7e..417358b1a 100644
--- a/src/ReinforcementLearningCore/test/policies/policies.jl
+++ b/src/ReinforcementLearningCore/test/policies/policies.jl
@@ -1,4 +1,4 @@
 include("agent.jl")
 include("multi_agent.jl")
 include("learners/learners.jl")
-
+include("q_based_policy.jl")
diff --git a/src/ReinforcementLearningCore/test/policies/q_based_policy.jl b/src/ReinforcementLearningCore/test/policies/q_based_policy.jl
new file mode 100644
index 000000000..cbe3382ab
--- /dev/null
+++ b/src/ReinforcementLearningCore/test/policies/q_based_policy.jl
@@ -0,0 +1,95 @@
+@testset "QBasedPolicy" begin
+
+    @testset "constructor" begin
+        q_approx = TabularQApproximator(n_state = 5, n_action = 10)
+        explorer = EpsilonGreedyExplorer(0.1)
+        learner = TDLearner(q_approx, :SARS)
+        p = QBasedPolicy(learner, explorer)
+        @test p.learner == learner
+        @test p.explorer == explorer
+    end
+
+    @testset "plan!" begin
+        @testset "plan! without player argument" begin
+            env = TicTacToeEnv()
+            q_approx = TabularQApproximator(n_state = 5, n_action = length(action_space(env)))
+            learner = TDLearner(q_approx, :SARS)
+            explorer = EpsilonGreedyExplorer(0.1)
+            policy = QBasedPolicy(learner, explorer)
+            @test 1 <= RLBase.plan!(policy, env) <= 9
+        end
+
+        @testset "plan! with player argument" begin
+            env = TicTacToeEnv()
+            q_approx = TabularQApproximator(n_state = 5, n_action = length(action_space(env)))
+            learner = TDLearner(q_approx, :SARS)
+            explorer = EpsilonGreedyExplorer(0.1)
+            policy = QBasedPolicy(learner, explorer)
+            player = :player1
+            @test 1 <= RLBase.plan!(policy, env) <= 9
+        end
+    end
+
+    # Test prob function
+    @testset "prob" begin
+        env = TicTacToeEnv()
+        q_approx = TabularQApproximator(n_state = 5, n_action = length(action_space(env)))
+        learner = TDLearner(q_approx, :SARS)
+        explorer = EpsilonGreedyExplorer(0.1)
+        policy = QBasedPolicy(learner, explorer)
+        trajectory = Trajectory(
+            CircularArraySARTSTraces(;
+                capacity = 1,
+                state = Int64 => (),
+                action = Int64 => (),
+                reward = Float64 => (),
+                terminal = Bool => (),
+            ),
+            DummySampler(),
+            InsertSampleRatioController(),
+        )
+        t = (state=2, action=3)
+        push!(trajectory, t)
+        next_state = 4
+        t = (action=3, state=next_state, reward=5.0, terminal=false)
+        push!(trajectory, t)
+        optimise!(policy, PostActStage(), trajectory)
+        prob = RLBase.prob(policy, env)
+        @test prob.p == [0.9111111111111111, 0.011111111111111112, 0.011111111111111112, 0.011111111111111112, 0.011111111111111112, 0.011111111111111112, 0.011111111111111112, 0.011111111111111112, 0.011111111111111112]
+    end
+
+    # Test optimise! function
+    @testset "optimise!" begin
+        env = TicTacToeEnv()
+        q_approx = TabularQApproximator(n_state = 5, n_action = length(action_space(env)))
+        explorer = EpsilonGreedyExplorer(0.1)
+        learner = TDLearner(q_approx, :SARS, γ=0.95, α=0.01, n=0)
+        policy = QBasedPolicy(learner, explorer)
+        trajectory = Trajectory(
+            CircularArraySARTSTraces(;
+                capacity = 1,
+                state = Int64 => (),
+                action = Int64 => (),
+                reward = Float64 => (),
+                terminal = Bool => (),
+            ),
+            DummySampler(),
+            InsertSampleRatioController(),
+        )
+        t = (state=4, action=3)
+        push!(trajectory, t)
+        next_state = 4
+        t = (action=3, state=next_state, reward=5.0, terminal=false)
+        push!(trajectory, t)
+
+        RLBase.optimise!(policy, PostActStage(), trajectory)
+        @test policy.learner.approximator.model[t.action, t.state] ≈ 0.05
+        RLBase.optimise!(policy, PostActStage(), trajectory)
+        @test policy.learner.approximator.model[t.action, t.state] ≈ 0.09997500000000001
+    
+        for i in 1:100000
+            RLBase.optimise!(policy, PostActStage(), trajectory)
+        end
+        @test policy.learner.approximator.model[t.action, t.state] ≈ t.reward / (1-policy.learner.γ) atol=0.01
+    end
+end
diff --git a/src/ReinforcementLearningCore/test/utils/networks.jl b/src/ReinforcementLearningCore/test/utils/networks.jl
index c995366ff..f070dc75c 100644
--- a/src/ReinforcementLearningCore/test/utils/networks.jl
+++ b/src/ReinforcementLearningCore/test/utils/networks.jl
@@ -5,7 +5,7 @@ import ReinforcementLearningBase: RLBase
 @testset "Approximators" begin
     #= These may need to be updated due to recent changes
     @testset "TabularApproximator" begin
-        A = TabularVApproximator(; n_state = 2, opt = InvDecay(1.0))
+        A = TabularVApproximator(; n_state = 2)
 
         @test A(1) == 0.0
         @test A(2) == 0.0
diff --git a/src/ReinforcementLearningFarm/Project.toml b/src/ReinforcementLearningFarm/Project.toml
index ba93671d8..e72610565 100644
--- a/src/ReinforcementLearningFarm/Project.toml
+++ b/src/ReinforcementLearningFarm/Project.toml
@@ -3,22 +3,26 @@ uuid = "14eff660-7080-4cec-bba2-cfb12cd77ac3"
 version = "0.0.1"
 
 [deps]
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 ReinforcementLearningBase = "e575027e-6cd6-5018-9292-cdc6200d2b44"
 ReinforcementLearningCore = "de1b191a-4ae0-4afa-a27b-92d07f46b2d6"
 
 [compat]
 ReinforcementLearningBase = "0.12"
 ReinforcementLearningCore = "0.14"
+ReinforcementLearningEnvironments = "0.8"
 julia = "1.9"
 
 [extras]
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
 Preferences = "21216c6a-2e73-6563-6e65-726566657250"
+ReinforcementLearningEnvironments = "25e41dd2-4622-11e9-1641-f1adca772921"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
 
 [targets]
-test = ["CUDA", "Metal", "Preferences", "Test", "UUIDs", "cuDNN"]
+test = ["CUDA", "Metal", "Preferences", "ReinforcementLearningEnvironments", "Test", "UUIDs", "cuDNN"]
diff --git a/src/ReinforcementLearningFarm/README.md b/src/ReinforcementLearningFarm/README.md
index 8c59fc295..8d23a3161 100644
--- a/src/ReinforcementLearningFarm/README.md
+++ b/src/ReinforcementLearningFarm/README.md
@@ -2,4 +2,4 @@
 
 This project contains updated, tested algorithms compatible with ReinforcementLearning.jl v0.11+.
 
-Unlike ReinforcementLearningZoo, the algorithms here have been domesticated.
+Unlike `ReinforcementLearningZoo`, the algorithms here have been domesticated.
diff --git a/src/ReinforcementLearningFarm/src/ReinforcementLearningFarm.jl b/src/ReinforcementLearningFarm/src/ReinforcementLearningFarm.jl
index 8e22f7e58..7281421b4 100644
--- a/src/ReinforcementLearningFarm/src/ReinforcementLearningFarm.jl
+++ b/src/ReinforcementLearningFarm/src/ReinforcementLearningFarm.jl
@@ -5,4 +5,6 @@ using ReinforcementLearningCore
 const RLFarm = ReinforcementLearningFarm
 export RLFarm
 
+include("algorithms/algorithms.jl")
+
 end # module
diff --git a/src/ReinforcementLearningFarm/src/algorithms/algorithms.jl b/src/ReinforcementLearningFarm/src/algorithms/algorithms.jl
new file mode 100644
index 000000000..48fb131b9
--- /dev/null
+++ b/src/ReinforcementLearningFarm/src/algorithms/algorithms.jl
@@ -0,0 +1 @@
+include("tabular/tabular.jl")
diff --git a/src/ReinforcementLearningFarm/src/algorithms/tabular/tabular.jl b/src/ReinforcementLearningFarm/src/algorithms/tabular/tabular.jl
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/ReinforcementLearningFarm/test/algorithms/algorithms.jl b/src/ReinforcementLearningFarm/test/algorithms/algorithms.jl
new file mode 100644
index 000000000..48fb131b9
--- /dev/null
+++ b/src/ReinforcementLearningFarm/test/algorithms/algorithms.jl
@@ -0,0 +1 @@
+include("tabular/tabular.jl")
diff --git a/src/ReinforcementLearningFarm/test/algorithms/tabular/tabular.jl b/src/ReinforcementLearningFarm/test/algorithms/tabular/tabular.jl
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/ReinforcementLearningFarm/test/runtests.jl b/src/ReinforcementLearningFarm/test/runtests.jl
index e3819a6b7..6e4db5d12 100644
--- a/src/ReinforcementLearningFarm/test/runtests.jl
+++ b/src/ReinforcementLearningFarm/test/runtests.jl
@@ -12,6 +12,11 @@ else
 end
 
 using Test
-@testset "ReinforcementLearningZoo.jl" begin
+using ReinforcementLearningBase
+using ReinforcementLearningCore
+using ReinforcementLearningEnvironments
+using ReinforcementLearningFarm
 
+@testset "ReinforcementLearningFarm.jl" begin
+    include("algorithms/algorithms.jl")
 end