diff --git a/ouroboros-network/CHANGELOG.md b/ouroboros-network/CHANGELOG.md index dfd8b843850..a4fde1411a9 100644 --- a/ouroboros-network/CHANGELOG.md +++ b/ouroboros-network/CHANGELOG.md @@ -6,6 +6,12 @@ ### Non-breaking changes +## 0.19.0.3 -- 2025-03-11 + +### Non-breaking changes + +* `IOError` rethrow policy changed. + ## 0.19.0.2 -- 2025-02-03 ### Breaking changes diff --git a/ouroboros-network/ouroboros-network.cabal b/ouroboros-network/ouroboros-network.cabal index 478e418c9a9..a082b822db7 100644 --- a/ouroboros-network/ouroboros-network.cabal +++ b/ouroboros-network/ouroboros-network.cabal @@ -1,6 +1,6 @@ cabal-version: 3.0 name: ouroboros-network -version: 0.19.0.2 +version: 0.19.0.3 synopsis: A networking layer for the Ouroboros blockchain protocol description: A networking layer for the Ouroboros blockchain protocol. license: Apache-2.0 diff --git a/ouroboros-network/sim-tests-lib/Test/Ouroboros/Network/Testnet.hs b/ouroboros-network/sim-tests-lib/Test/Ouroboros/Network/Testnet.hs index 805be5cfecb..6315144978d 100644 --- a/ouroboros-network/sim-tests-lib/Test/Ouroboros/Network/Testnet.hs +++ b/ouroboros-network/sim-tests-lib/Test/Ouroboros/Network/Testnet.hs @@ -99,7 +99,6 @@ import Test.QuickCheck import Test.QuickCheck.Monoids import Test.Tasty import Test.Tasty.QuickCheck -import Ouroboros.Network.Diffusion.P2P (isFatal) tests :: TestTree tests = @@ -1576,7 +1575,6 @@ unit_4191 = testWithIOSim prop_diffusion_dns_can_recover 125000 absInfo script -- prop_connect_failure :: AbsIOError -> Property prop_connect_failure (AbsIOError ioerr) = - label (if isFatal (ioe_type ioerr) then "fatal IOError" else "non-fatal IOError") $ testWithIOSim (\trace _ -> let -- events generated by `nodeAddr` @@ -1594,18 +1592,10 @@ prop_connect_failure (AbsIOError ioerr) = evs = eventsToList (selectDiffusionSimulationTrace events) in counterexample (Trace.ppTrace show (ppSimEvent 0 0 0) . Trace.take noEvents $ trace) . counterexample (show evs) - . (if isFatal (ioe_type ioerr) - then -- verify that the node was killed by the right exception - any (\case - TrErrored e | Just (ExceptionInHandler _ e') <- fromException e - , Just e'' <- fromException e' - , e'' == ioerr - -> True - _ -> False) - else -- verify that the ndoe was not killed by the `ioerr` exception - all (\case - TrErrored {} -> False - _ -> True) + . ( -- verify that the node was not killed by the `IOError` + all (\case + TrErrored {} -> False + _ -> True) ) . map snd $ evs diff --git a/ouroboros-network/src/Ouroboros/Network/Diffusion/P2P.hs b/ouroboros-network/src/Ouroboros/Network/Diffusion/P2P.hs index f9243857d2e..15cd06fb02c 100644 --- a/ouroboros-network/src/Ouroboros/Network/Diffusion/P2P.hs +++ b/ouroboros-network/src/Ouroboros/Network/Diffusion/P2P.hs @@ -26,7 +26,6 @@ module Ouroboros.Network.Diffusion.P2P , Interfaces (..) , runM , NodeToNodePeerConnectionHandle - , isFatal -- * Re-exports , AbstractTransitionTrace , RemoteTransitionTrace @@ -56,7 +55,7 @@ import Data.Maybe (catMaybes, maybeToList) import Data.Proxy (Proxy (..)) import Data.Typeable (Typeable) import Data.Void (Void) -import GHC.IO.Exception (IOException (..), IOErrorType (..)) +import GHC.IO.Exception (IOException (..)) import System.Exit (ExitCode) import System.Random (StdGen, newStdGen, split) #ifdef POSIX @@ -737,11 +736,28 @@ runM Interfaces Just (_ :: IOManagerError) -> ShutdownNode Nothing -> mempty) <> + -- IOError rethrow-policy + -- + -- After a critical bug, we decided that `IOError` policy should only + -- kill the connection which thrown it. `IOError`s are not propagated. + -- There's a risk that one could arm an attack if one discovers + -- a mechanism to trigger fatal `IOError`s, e.g. through a kernel bug. + -- + -- It is responsibility for an SPO to monitor the node if it is making + -- progress and have enough resources to do so, e.g. if it has enough + -- memory, file descriptors. + -- + -- The `ouroboros-network` guarantees running on a fixed number of file + -- descriptors given a topology file, see + -- https://github.com/IntersectMBO/ouroboros-network/issues/4585#issuecomment-1591777447 + -- There's also a calculation for `ouroboros-consensus`, see + -- https://github.com/IntersectMBO/ouroboros-consensus/issues/20#issuecomment-1514554680 + -- File descriptors could be drained by the tracing system in + -- `cardano-node` (such a bug existed), or even an external process. + -- RethrowPolicy (\_ctx err -> - case fromException err of - Just IOError { ioe_type } -> - if isFatal ioe_type then ShutdownNode - else mempty + case fromException err :: Maybe IOException of + Just {} -> mempty Nothing -> mempty) <> RethrowPolicy (\ctx err -> case (ctx, fromException err) of @@ -1311,44 +1327,6 @@ run tracers tracersExtra args argsExtra apps appsExtra = do } tracers tracersExtra args argsExtra apps appsExtra --- --- Fatal Errors --- --- If we are out of file descriptors (either because we exhausted --- process or system limit) we should shut down the node and let the --- operator investigate. --- --- Refs: --- * https://hackage.haskell.org/package/ghc-internal-9.1001.0/docs/src/GHC.Internal.Foreign.C.Error.html#errnoToIOError --- * man socket.2 --- * man connect.2 --- * man accept.2 --- NOTE: many `connect` and `accept` exceptions are classified as --- `OtherError`, here we only distinguish fatal IO errors (e.g. --- ones that propagate to the main thread). --- NOTE: we don't use the rethrow policy for `accept` calls, where --- all but `ECONNABORTED` are fatal exceptions. --- --- NOTE: UnsupportedOperation error type is not considered fatal since it can --- happen on a race condition between the connect and accept call between two --- nodes that have each other as local roots. --- -isFatal :: IOErrorType -> Bool -isFatal ResourceExhausted = True - -- EAGAIN -- connect, accept - -- EMFILE -- socket, accept - -- ENFILE -- socket, accept - -- ENOBUFS -- socket, accept - -- ENOMEM -- socket, accept -isFatal InvalidArgument = True - -- EINVAL -- socket, accept - -- ENOTSOCK -- connect - -- EBADF -- connect, accept -isFatal ProtocolError = True - -- EPROTONOSUPPOPRT -- socket - -- EPROTO -- accept -isFatal _ = False - -- -- Data flow --