Optimize negation: avoid combinatorial explosion

aherlihy · guillembartrina · Oct 19, 2023 · Oct 19, 2023 · Oct 22, 2023 · Oct 23, 2023
commit 5a1807445cc24cc60e593ca40e27ea4a46c88a39
diff --git a/src/main/scala/datalog/execution/BytecodeCompiler.scala b/src/main/scala/datalog/execution/BytecodeCompiler.scala
@@ -102,10 +102,16 @@ class BytecodeCompiler(val storageManager: StorageManager)(using JITOptions) ext
             .constantInstruction(rId)
           emitSMCall(xb, meth, classOf[Int])
 
-        case ComplementOp(arity) =>
+        case GroundOfOp(cols) =>
           xb.aload(0)
-            .constantInstruction(arity)
-          emitSMCall(xb, "getComplement", classOf[Int])
+          emitCols(xb, cols)
+          emitSMCall(xb, "getGroundOf", classOf[Seq[?]])
+
+        case ZeroOutOp(child, cols) =>
+          xb.aload(0)
+          traverse(xb, child)
+          emitSeq(xb, cols.map(v => xxb => emitBoolean(xxb, v)))
+          emitSMCall(xb, "zeroOut", classOf[EDB], classOf[Seq[?]])
 
         case ScanEDBOp(rId) =>
           xb.aload(0)

diff --git a/src/main/scala/datalog/execution/BytecodeGenerator.scala b/src/main/scala/datalog/execution/BytecodeGenerator.scala
@@ -179,6 +179,12 @@ object BytecodeGenerator {
     else
       xb.constantInstruction(0)
 
+  /** Emit `Boolean.valueOf($value)`. */
+  def emitBoolean(xb: CodeBuilder, value: Boolean): Unit =
+    xb.constantInstruction(if value then 1 else 0)
+      .invokestatic(clsDesc(classOf[java.lang.Boolean]), "valueOf",
+        MethodTypeDesc.of(clsDesc(classOf[java.lang.Boolean]), clsDesc(classOf[Boolean])))
+
   def emitSeqInt(xb: CodeBuilder, value: Seq[Int]): Unit =
     emitSeq(xb, value.map(v => xxb => emitInteger(xxb, v)))
 
@@ -248,6 +254,17 @@ object BytecodeGenerator {
     }
   }
 
+  def emitEither[A, B](xb: CodeBuilder, either: Either[A, B], emitA: (CodeBuilder, A) => Unit, emitB: (CodeBuilder, B) => Unit): Unit =
+    either match
+      case Left(value) =>
+        emitNew(xb, classOf[Left[A, B]], { xxb =>
+          emitA(xxb, value)
+        })
+      case Right(value) =>
+        emitNew(xb, classOf[Right[A, B]], { xxb =>
+          emitB(xxb, value)
+        })
+
   def emitProjIndexes(xb: CodeBuilder, value: Seq[(String, Constant)]): Unit =
     emitSeq(xb, value.map(v => xxb => emitStringConstantTuple2(xxb, v)))
 
@@ -268,6 +285,7 @@ object BytecodeGenerator {
   def emitCxns(xb: CodeBuilder, value: collection.mutable.Map[String, collection.mutable.Map[Int, Seq[String]]]): Unit =
     emitMap(xb, value.toSeq, emitString, emitCxnElement)
 
+  /*
   def emitJoinIndexes(xb: CodeBuilder, value: JoinIndexes): Unit =
     emitNew(xb, classOf[JoinIndexes], xxb =>
       emitVarIndexes(xxb, value.varIndexes)
@@ -277,7 +295,11 @@ object BytecodeGenerator {
 //      emitArrayAtoms(xxb, value.atoms)
       emitSeq(xb, value.atoms.map(a => xxb => emitAtom(xxb, a)))
       emitCxns(xxb, value.cxns)
-      emitBool(xxb, value.edb))
+      // TODO: Missing negationInfo!
+      emitBool(xxb, value.edb),
+      // TODO: Missing groupingInfos!
+      )
+  */
 
   def emitStorageAggOp(xb: CodeBuilder, sao: StorageAggOp): Unit =
     val enumCompanionCls = classOf[StorageAggOp.type]
@@ -315,6 +337,18 @@ object BytecodeGenerator {
       emitSeqInt(xxb, value.groupingIndexes)
       emitAggOpInfos(xxb, value.aggOpInfos))
 
+  def emitCols(xb: CodeBuilder, value: Seq[Either[Constant, Seq[(RelationId, Int)]]]): Unit =
+    emitSeq(xb, value.map(v => xxb =>
+      emitEither(xxb, v, emitConstant, (xxxb, s) =>
+        emitSeq(xxxb, s.map(vv => xxxxb =>
+          emitNew(xxxxb, classOf[(Int, Int)], xxxxxb =>
+            emitInteger(xxxxxb, vv._1)
+            emitInteger(xxxxxb, vv._2)
+          )
+        ))
+      )
+    ))
+
   val CD_BoxedUnit = clsDesc(classOf[scala.runtime.BoxedUnit])
 
   /** Emit `BoxedUnit.UNIT`. */

diff --git a/src/main/scala/datalog/execution/JoinIndexes.scala b/src/main/scala/datalog/execution/JoinIndexes.scala
@@ -43,6 +43,7 @@ case class JoinIndexes(varIndexes: Seq[Seq[Int]],
                        deps: Seq[(PredicateType, RelationId)],
                        atoms: Seq[Atom],
                        cxns: mutable.Map[String, mutable.Map[Int, Seq[String]]],
+                       negationInfo: Map[String, Seq[Either[Constant, Seq[(RelationId, Int)]]]],
                        edb: Boolean = false,
                        groupingIndexes: Map[String, GroupingJoinIndexes] = Map.empty
                       ) {
@@ -54,6 +55,7 @@ case class JoinIndexes(varIndexes: Seq[Seq[Int]],
       ", deps:" + depsToString(ns) +
       ", edb:" + edb +
       ", cxn: " + cxnsToString(ns) +
+      ", negation: " + negationToString(ns) +
       " }"
 
   def varToString(): String = varIndexes.map(v => v.mkString("$", "==$", "")).mkString("[", ",", "]")
@@ -66,6 +68,13 @@ case class JoinIndexes(varIndexes: Seq[Seq[Int]],
         inCommon.map((count, hashs) =>
           count.toString + ": " + hashs.map(h => ns.hashToAtom(h)).mkString("", "|", "")
         ).mkString("", ", ", "")} }").mkString("[", ",\n", "]")
+  def negationToString(ns: NS): String =
+    negationInfo.map((h, infos) =>
+      s"{ ${ns.hashToAtom(h)} => ${
+        infos.map{
+          case Left(value) => value
+          case Right(value) => s"[ ${value.map((r, c) => s"(${ns(r)}, $c)")} ]"
+        }} }").mkString("[", ",\n", "]")
   val hash: String = atoms.map(a => a.hash).mkString("", "", "")
 }
 
@@ -137,6 +146,16 @@ object JoinIndexes {
       )).to(mutable.Map)
     )
 
+
+    val variables2 = body.filterNot(_.negated).flatMap(a => a.terms.zipWithIndex.collect{ case (v: Variable, i) if !v.anon => (v, i) }.map((v, i) => (v, (a.rId, i)))).groupBy(_._1).view.mapValues(_.map(_._2))
+
+    val negationInfo = body.filter(_.negated).map(a =>
+      a.hash -> a.terms.map{
+        case c: Constant => Left(c)
+        case v: Variable => Right(if v.anon then Seq() else variables2(v))
+      }
+    ).toMap
+
     //groupings
     val groupingIndexes = precalculatedGroupingIndexes.getOrElse(
       body.collect{ case ga: GroupingAtom => ga }.map(ga =>
@@ -166,7 +185,7 @@ object JoinIndexes {
       ).toMap
     )
 
-    new JoinIndexes(bodyVars, constants.to(mutable.Map), projects, deps, rule, cxns, edb = false, groupingIndexes = groupingIndexes)
+    new JoinIndexes(bodyVars, constants.to(mutable.Map), projects, deps, rule, cxns, negationInfo, edb = false, groupingIndexes = groupingIndexes)
   }
 
   // used to approximate poor user-defined order

diff --git a/src/main/scala/datalog/execution/LambdaCompiler.scala b/src/main/scala/datalog/execution/LambdaCompiler.scala
@@ -148,8 +148,12 @@ class LambdaCompiler(val storageManager: StorageManager)(using JITOptions) exten
           }
       }
 
-    case ComplementOp(arity) =>
-      _.getComplement(arity)
+    case GroundOfOp(cols) =>
+      _.getGroundOf(cols)
+
+    case ZeroOutOp(child, cols) =>
+      val clh = compile(child)
+      sm => sm.zeroOut(clh(sm), cols)
 
     case ScanEDBOp(rId) =>
       if (storageManager.edbContains(rId))

diff --git a/src/main/scala/datalog/execution/NaiveExecutionEngine.scala b/src/main/scala/datalog/execution/NaiveExecutionEngine.scala
@@ -39,17 +39,11 @@ class NaiveExecutionEngine(val storageManager: StorageManager, stratified: Boole
     idbs.getOrElseUpdate(rId, mutable.ArrayBuffer[IndexedSeq[Atom]]()).addOne(rule.toIndexedSeq)
     val jIdx = getOperatorKey(rule)
     prebuiltOpKeys.getOrElseUpdate(rId, mutable.ArrayBuffer[JoinIndexes]()).addOne(jIdx)
-    storageManager.addConstantsToDomain(jIdx.constIndexes.values.toSeq)
-
-    // We need to add the constants occurring in the grouping predicates of the grouping atoms
-    rule.collect{ case ga: GroupingAtom => ga}.foreach(ga =>
-      storageManager.addConstantsToDomain(jIdx.groupingIndexes(ga.hash).constIndexes.values.toSeq)
-    )
   }
 
   def insertEDB(rule: Atom): Unit = {
     if (!storageManager.edbContains(rule.rId))
-      prebuiltOpKeys.getOrElseUpdate(rule.rId, mutable.ArrayBuffer[JoinIndexes]()).addOne(JoinIndexes(IndexedSeq(), mutable.Map(), IndexedSeq(), Seq((PredicateType.POSITIVE, rule.rId)), Seq(rule), mutable.Map.empty, true))
+      prebuiltOpKeys.getOrElseUpdate(rule.rId, mutable.ArrayBuffer[JoinIndexes]()).addOne(JoinIndexes(IndexedSeq(), mutable.Map(), IndexedSeq(), Seq((PredicateType.POSITIVE, rule.rId)), Seq(rule), mutable.Map.empty, Map.empty, true))
     storageManager.insertEDB(rule)
   }
 

diff --git a/src/main/scala/datalog/execution/QuoteCompiler.scala b/src/main/scala/datalog/execution/QuoteCompiler.scala
@@ -72,6 +72,7 @@ class QuoteCompiler(val storageManager: StorageManager)(using JITOptions) extend
           ${ Expr(x.deps) },
           ${ Expr(x.atoms) },
           ${ Expr(x.cxns) },
+          ${ Expr(x.negationInfo) },
           ${ Expr(x.edb) }
         )
       }
@@ -135,8 +136,12 @@ class QuoteCompiler(val storageManager: StorageManager)(using JITOptions) extend
             }
         }
 
-      case ComplementOp(arity) =>
-        '{ $stagedSM.getComplement(${ Expr(arity) }) }
+      case GroundOfOp(cols) =>
+        '{ $stagedSM.getGroundOf(${ Expr(cols) }) }
+
+      case ZeroOutOp(child, cols) =>
+        val clh = compileIRRelOp(child)
+        '{ $stagedSM.zeroOut($clh, ${ Expr(cols) }) }
 
       case ScanEDBOp(rId) =>
         if (storageManager.edbContains(rId))

diff --git a/src/main/scala/datalog/execution/StagedExecutionEngine.scala b/src/main/scala/datalog/execution/StagedExecutionEngine.scala
@@ -349,9 +349,12 @@ class StagedExecutionEngine(val storageManager: StorageManager, val defaultJITOp
       case op: ScanEDBOp =>
         op.run(storageManager)
 
-      case op: ComplementOp =>
+      case op: GroundOfOp =>
         op.run(storageManager)
 
+      case op: ZeroOutOp =>
+        op.run_continuation(storageManager, op.children.map(o => (sm: StorageManager) => jit(o)))
+
       case op: ProjectJoinFilterOp =>
         op.run_continuation(storageManager, op.children.map(o => (sm: StorageManager) => jit(o)))
 

diff --git a/src/main/scala/datalog/execution/StagedSnippetCompiler.scala b/src/main/scala/datalog/execution/StagedSnippetCompiler.scala
@@ -67,6 +67,7 @@ class StagedSnippetCompiler(val storageManager: StorageManager)(using val jitOpt
           ${ Expr(x.deps) },
           ${ Expr(x.atoms) },
           ${ Expr(x.cxns) },
+          ${ Expr(x.negationInfo) },
           ${ Expr(x.edb) },
         ) }
     }
@@ -127,8 +128,11 @@ class StagedSnippetCompiler(val storageManager: StorageManager)(using val jitOpt
             }
         }
 
-      case ComplementOp(arity) =>
-        '{ $stagedSM.getComplement(${ Expr(arity) }) }
+      case GroundOfOp(cols) =>
+        '{ $stagedSM.getGroundOf(${ Expr(cols) }) }
+
+      case ZeroOutOp(child, cols) =>
+        '{ $stagedSM.zeroOut($stagedFns.head($stagedSM), ${ Expr(cols) }) }
 
       case ScanEDBOp(rId) =>
         if (storageManager.edbContains(rId))

diff --git a/src/main/scala/datalog/execution/StagedSnippetExecutionEngine.scala b/src/main/scala/datalog/execution/StagedSnippetExecutionEngine.scala
@@ -115,9 +115,12 @@ class StagedSnippetExecutionEngine(override val storageManager: StorageManager,
       case op: DebugPeek =>
         op.run_continuation(storageManager, op.children.map(o => (sm: StorageManager) => jit(o)))
 
-      case op: ComplementOp =>
+      case op: GroundOfOp =>
         op.run(storageManager)
 
+      case op: ZeroOutOp =>
+        op.run_continuation(storageManager, op.children.map(o => (sm: StorageManager) => jit(o)))
+
       case _ => throw new Exception(s"Error: interpretRelOp called with unit operation: code=${irTree.code}")
     }
   }

diff --git a/src/main/scala/datalog/execution/ir/IROp.scala b/src/main/scala/datalog/execution/ir/IROp.scala
@@ -18,7 +18,7 @@ import scala.util.{Failure, Success}
 enum OpCode:
   case PROGRAM, SWAP_CLEAR, SEQ,
   SCAN, SCANEDB, SCAN_DISCOVERED,
-  COMPLEMENT,
+  GROUNDOF, ZEROOUT,
   SPJ, INSERT, UNION, DIFF,
   GROUPING,
   DEBUG, DEBUGP, DOWHILE, UPDATE_DISCOVERED,
@@ -196,16 +196,26 @@ case class InsertOp(rId: RelationId, db: DB, knowledge: KNOWLEDGE, override val
     }
 }
 
-case class ComplementOp(arity: Int)(using JITOptions) extends IROp[EDB] {
-  val code: OpCode = OpCode.COMPLEMENT
+case class GroundOfOp(cols: Seq[Either[Constant, Seq[(RelationId, Int)]]])(using JITOptions) extends IROp[EDB] {
+  val code: OpCode = OpCode.GROUNDOF
 
   override def run(storageManager: StorageManager): EDB =
-    storageManager.getComplement(arity)
+    storageManager.getGroundOf(cols)
 
   override def run_continuation(storageManager: StorageManager, opFns: Seq[CompiledFn[EDB]]): EDB =
     run(storageManager) // bc leaf node, no difference for continuation or run
 }
 
+case class ZeroOutOp(child: IROp[EDB], var cols: Seq[Boolean])(using JITOptions) extends IROp[EDB](child) {
+  val code: OpCode = OpCode.ZEROOUT
+
+  override def run(storageManager: StorageManager): EDB =
+    storageManager.zeroOut(child.run(storageManager), cols)
+
+  override def run_continuation(storageManager: StorageManager, opFns: Seq[CompiledFn[EDB]]): EDB =
+    storageManager.zeroOut(opFns(0)(storageManager), cols)
+}
+
 case class ScanOp(rId: RelationId, db: DB, knowledge: KNOWLEDGE)(using JITOptions) extends IROp[EDB] {
   val code: OpCode = OpCode.SCAN
 

diff --git a/src/main/scala/datalog/execution/ir/IRTreeGenerator.scala b/src/main/scala/datalog/execution/ir/IRTreeGenerator.scala
@@ -68,9 +68,13 @@ class IRTreeGenerator(using val ctx: InterpreterContext)(using JITOptions) {
               val q = ScanOp(r, DB.Derived, KNOWLEDGE.Known)
               typ match
                 case PredicateType.NEGATED =>
-                  val arity = k.atoms(i + 1).terms.length
-                  val res = DiffOp(ComplementOp(arity), q)
-                  debug(s"found negated relation, rule=", () => s"${ctx.storageManager.printer.ruleToString(k.atoms)}\n\tarity=$arity")
+                  val nis = k.negationInfo(k.atoms(i + 1).hash)
+                  val cols = nis.map(_.exists(_.isEmpty))
+
+                  val compl = GroundOfOp(nis)
+                  val nq = ZeroOutOp(q, cols)
+                  val res = DiffOp(compl, nq)
+                  debug(s"found negated relation, rule=", () => s"${ctx.storageManager.printer.ruleToString(k.atoms)}")
                   res
                 case PredicateType.GROUPING =>
                   val ga = k.atoms(i + 1).asInstanceOf[GroupingAtom]
@@ -119,9 +123,13 @@ class IRTreeGenerator(using val ctx: InterpreterContext)(using JITOptions) {
                     ScanOp(r, DB.Derived, KNOWLEDGE.Known)
                   typ match
                     case PredicateType.NEGATED =>
-                      val arity = k.atoms(i + 1).terms.length
-                      val res = DiffOp(ComplementOp(arity), q)
-                      debug(s"found negated relation, rule=", () => s"${ctx.storageManager.printer.ruleToString(k.atoms)}\n\tarity=$arity")
+                      val nis = k.negationInfo(k.atoms(i + 1).hash)
+                      val cols = nis.map(_.exists(_.isEmpty))
+
+                      val compl = GroundOfOp(nis)
+                      val nq = ZeroOutOp(q, cols)
+                      val res = DiffOp(compl, nq)
+                      debug(s"found negated relation, rule=", () => s"${ctx.storageManager.printer.ruleToString(k.atoms)}")
                       res
                     case PredicateType.GROUPING =>
                       val ga = k.atoms(i + 1).asInstanceOf[GroupingAtom]