Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions src/main/scala/xiangshan/XSCore.scala
Original file line number Diff line number Diff line change
Expand Up @@ -383,8 +383,8 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
slowWakeUpOut = intBlockSlowWakeUp
))
val floatBlock = Module(new FloatBlock(
fastWakeUpIn = Seq(),
slowWakeUpIn = intExuConfigs.filter(_.writeFpRf) ++ loadExuConfigs,
intSlowWakeUpIn = intExuConfigs.filter(_.writeFpRf),
memSlowWakeUpIn = loadExuConfigs,
fastWakeUpOut = Seq(),
slowWakeUpOut = fpExuConfigs
))
Expand All @@ -411,9 +411,10 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
ctrlBlock.io.toFpBlock <> floatBlock.io.fromCtrlBlock
ctrlBlock.io.toLsBlock <> memBlock.io.fromCtrlBlock

val memBlockWakeUpInt = memBlock.io.wakeUpOut.slow.map(x => intOutValid(x))
val memBlockWakeUpFp = memBlock.io.wakeUpOut.slow.map(x => fpOutValid(x))
memBlock.io.wakeUpOut.slow.foreach(_.ready := true.B)
val memBlockWakeUpInt = memBlock.io.wakeUpOutInt.slow.map(x => intOutValid(x))
val memBlockWakeUpFp = memBlock.io.wakeUpOutFp.slow.map(x => fpOutValid(x))
memBlock.io.wakeUpOutInt.slow.foreach(_.ready := true.B)
memBlock.io.wakeUpOutFp.slow.foreach(_.ready := true.B)

fpExuConfigs.zip(floatBlock.io.wakeUpOut.slow).filterNot(_._1.writeIntRf).map(_._2.ready := true.B)
val fpBlockWakeUpInt = fpExuConfigs
Expand All @@ -430,7 +431,8 @@ class XSCoreImp(outer: XSCore) extends LazyModuleImp(outer)
integerBlock.io.wakeUpIn.slow <> fpBlockWakeUpInt ++ memBlockWakeUpInt
integerBlock.io.toMemBlock <> memBlock.io.fromIntBlock

floatBlock.io.wakeUpIn.slow <> intBlockWakeUpFp ++ memBlockWakeUpFp
floatBlock.io.intWakeUpFp <> intBlockWakeUpFp
floatBlock.io.memWakeUpFp <> memBlockWakeUpFp
floatBlock.io.toMemBlock <> memBlock.io.fromFpBlock

val wakeUpMem = Seq(
Expand Down
11 changes: 2 additions & 9 deletions src/main/scala/xiangshan/backend/CtrlBlock.scala
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,6 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {
val redirectGen = Module(new RedirectGenerator)

val roqWbSize = NRIntWritePorts + NRFpWritePorts + exuParameters.StuCnt

val roq = Module(new Roq(roqWbSize))

val backendRedirect = redirectGen.io.stage2Redirect
Expand All @@ -227,7 +226,7 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {

redirectGen.io.exuMispredict.zip(io.fromIntBlock.exuRedirect).map({case (x, y) =>
val misPred = y.valid && y.bits.redirect.cfiUpdate.isMisPred
val killedByOlder = y.bits.uop.roqIdx.needFlush(backendRedirect, flush)
val killedByOlder = y.bits.uop.roqIdx.needFlush(backendRedirect, flushReg)
x.valid := RegNext(misPred && !killedByOlder, init = false.B)
x.bits := RegEnable(y.bits, y.valid)
})
Expand Down Expand Up @@ -327,13 +326,7 @@ class CtrlBlock extends XSModule with HasCircularQueuePtrHelper {
fpBusyTable.io.read <> dispatch.io.readFpState

roq.io.redirect <> backendRedirect
roq.io.exeWbResults.zip(
io.fromIntBlock.wbRegs ++ io.fromFpBlock.wbRegs ++ io.fromLsBlock.stOut
).foreach{
case(x, y) =>
x.bits := y.bits
x.valid := y.valid
}
roq.io.exeWbResults <> (io.fromIntBlock.wbRegs ++ io.fromFpBlock.wbRegs ++ io.fromLsBlock.stOut)

// TODO: is 'backendRedirect' necesscary?
io.toIntBlock.redirect <> backendRedirect
Expand Down
107 changes: 61 additions & 46 deletions src/main/scala/xiangshan/backend/FloatBlock.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import utils._
import xiangshan.backend.regfile.Regfile
import xiangshan.backend.exu._
import xiangshan.backend.issue.ReservationStation
import xiangshan.mem.HasLoadHelper
import xiangshan.mem.{HasFpLoadHelper, HasLoadHelper}


class FpBlockToCtrlIO extends XSBundle {
Expand All @@ -17,17 +17,18 @@ class FpBlockToCtrlIO extends XSBundle {

class FloatBlock
(
fastWakeUpIn: Seq[ExuConfig],
slowWakeUpIn: Seq[ExuConfig],
intSlowWakeUpIn: Seq[ExuConfig],
memSlowWakeUpIn: Seq[ExuConfig],
fastWakeUpOut: Seq[ExuConfig],
slowWakeUpOut: Seq[ExuConfig],
) extends XSModule with HasExeBlockHelper with HasLoadHelper {
) extends XSModule with HasExeBlockHelper with HasFpLoadHelper {
val io = IO(new Bundle {
val fromCtrlBlock = Flipped(new CtrlToFpBlockIO)
val toCtrlBlock = new FpBlockToCtrlIO
val toMemBlock = new FpBlockToMemBlockIO

val wakeUpIn = new WakeUpBundle(fastWakeUpIn.size, slowWakeUpIn.size)
val intWakeUpFp = Vec(intSlowWakeUpIn.size, Flipped(DecoupledIO(new ExuOutput)))
val memWakeUpFp = Vec(memSlowWakeUpIn.size, Flipped(DecoupledIO(new ExuOutput)))
val wakeUpOut = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size))

// from csr
Expand All @@ -37,24 +38,32 @@ class FloatBlock
val redirect = io.fromCtrlBlock.redirect
val flush = io.fromCtrlBlock.flush

require(fastWakeUpIn.isEmpty)
val wakeUpInReg = Wire(Flipped(new WakeUpBundle(fastWakeUpIn.size, slowWakeUpIn.size)))
wakeUpInReg.slow.zip(io.wakeUpIn.slow).foreach{
case (inReg, in) =>
PipelineConnect(in, inReg, inReg.fire(), in.bits.uop.roqIdx.needFlush(redirect, flush))
}
val wakeUpInRecode = WireInit(wakeUpInReg)
for(((rec, reg), cfg) <- wakeUpInRecode.slow.zip(wakeUpInReg.slow).zip(slowWakeUpIn)){
rec.bits.data := {
if(cfg == Exu.ldExeUnitCfg) fpRdataHelper(reg.bits.uop, reg.bits.data)
else Mux(reg.bits.uop.ctrl.fpu.typeTagOut === S,
recode(reg.bits.data(31, 0), S),
recode(reg.bits.data(63, 0), D)
val intWakeUpFpReg = Wire(Vec(intSlowWakeUpIn.size, Flipped(DecoupledIO(new ExuOutput))))
intWakeUpFpReg.zip(io.intWakeUpFp).foreach{
case (inReg, wakeUpIn) =>
val in = WireInit(wakeUpIn)
wakeUpIn.ready := in.ready
in.valid := wakeUpIn.valid && !wakeUpIn.bits.uop.roqIdx.needFlush(redirect, flush)
PipelineConnect(in, inReg,
inReg.fire() || inReg.bits.uop.roqIdx.needFlush(redirect, flush), false.B
)
}
}
val intRecoded = WireInit(intWakeUpFpReg)
for(((rec, reg), cfg) <- intRecoded.zip(intWakeUpFpReg).zip(intSlowWakeUpIn)){
rec.bits.data := Mux(reg.bits.uop.ctrl.fpu.typeTagOut === S,
recode(reg.bits.data(31, 0), S),
recode(reg.bits.data(63, 0), D)
)
rec.bits.redirectValid := false.B
reg.ready := rec.ready
reg.ready := rec.ready || !rec.valid
}
val memRecoded = WireInit(io.memWakeUpFp)
for((rec, reg) <- memRecoded.zip(io.memWakeUpFp)){
rec.bits.data := fpRdataHelper(reg.bits.uop, reg.bits.data)
rec.bits.redirectValid := false.B
reg.ready := true.B
}
val wakeUpInRecode = intRecoded ++ memRecoded

val fpRf = Module(new Regfile(
numReadPorts = NRFpReadPorts,
Expand Down Expand Up @@ -91,7 +100,7 @@ class FloatBlock
val fastPortsCnt = inBlockWbData.length

val inBlockListenPorts = exeUnits.filter(e => e.config.hasUncertainlatency).map(_.io.out)
val slowPorts = (inBlockListenPorts ++ wakeUpInRecode.slow).map(decoupledIOToValidIO)
val slowPorts = (inBlockListenPorts ++ wakeUpInRecode).map(decoupledIOToValidIO)
val slowPortsCnt = slowPorts.length

println(s"${i}: exu:${cfg.name} fastPortsCnt: ${fastPortsCnt} " +
Expand Down Expand Up @@ -139,48 +148,54 @@ class FloatBlock
rs.io.fastUopsIn <> inBlockUops
}

val (recodeOut, ieeeOutReg) = exeUnits.map(e => {
val rec = WireInit(e.io.out)
val recReg = Wire(DecoupledIO(new ExuOutput))
PipelineConnect(
rec, recReg, recReg.fire(),
rec.bits.uop.roqIdx.needFlush(redirect, flush)
)
val ieeeReg = WireInit(recReg)
recReg.ready := ieeeReg.ready
ieeeReg.bits.data := Mux(recReg.bits.uop.ctrl.fpWen, ieee(recReg.bits.data), recReg.bits.data)
ieeeReg.bits.redirectValid := false.B
(rec, ieeeReg)
}).unzip

io.wakeUpOut.slow <> ieeeOutReg

// read fp rf from ctrl block
fpRf.io.readPorts.zipWithIndex.map{ case (r, i) => r.addr := io.fromCtrlBlock.readRf(i) }
(0 until exuParameters.StuCnt).foreach(i =>
io.toMemBlock.readFpRf(i).data := RegNext(ieee(fpRf.io.readPorts(i + 12).data))
)
// write fp rf arbiter
val fpWbArbiter = Module(new Wb(
(exeUnits.map(_.config) ++ fastWakeUpIn ++ slowWakeUpIn),
exeUnits.map(_.config) ++ intSlowWakeUpIn ++ memSlowWakeUpIn,
NRFpWritePorts,
isFp = true
))
fpWbArbiter.io.in <> exeUnits.map(e =>
if(e.config.writeIntRf) WireInit(e.io.out) else e.io.out
) ++ wakeUpInRecode.slow

exeUnits.zip(recodeOut).zip(fpWbArbiter.io.in).filter(_._1._1.config.writeIntRf).foreach {
case ((exu, wInt), wFp) =>
exu.io.out.ready := wInt.fire() || wFp.fire()
fpWbArbiter.io.in.drop(exeUnits.length).zip(wakeUpInRecode).foreach(x => x._1 <> x._2)

for((exu, i) <- exeUnits.zipWithIndex){
val out, outReg = Wire(DecoupledIO(new ExuOutput))
out.bits := exu.io.out.bits
out.valid := exu.io.out.valid && !out.bits.uop.roqIdx.needFlush(redirect, flush)
PipelineConnect(out, outReg,
outReg.fire() || outReg.bits.uop.roqIdx.needFlush(redirect, flush), false.B
)
io.wakeUpOut.slow(i).valid := outReg.valid
io.wakeUpOut.slow(i).bits := outReg.bits
io.wakeUpOut.slow(i).bits.redirectValid := false.B
io.wakeUpOut.slow(i).bits.data := Mux(outReg.bits.uop.ctrl.fpWen,
ieee(outReg.bits.data),
outReg.bits.data
)
fpWbArbiter.io.in(i).valid := exu.io.out.valid && exu.io.out.bits.uop.ctrl.fpWen && outReg.ready
fpWbArbiter.io.in(i).bits := exu.io.out.bits
if(exu.config.writeIntRf){
outReg.ready := !outReg.valid || (
io.wakeUpOut.slow(i).ready && outReg.bits.uop.ctrl.rfWen
) || outReg.bits.uop.ctrl.fpWen
// don't consider flush in 'intFire'
val intFire = exu.io.out.valid && out.ready && out.bits.uop.ctrl.rfWen
exu.io.out.ready := intFire || fpWbArbiter.io.in(i).fire() || !exu.io.out.valid
} else {
outReg.ready := true.B
exu.io.out.ready := fpWbArbiter.io.in(i).fire() || !exu.io.out.valid
}
}

// set busytable and update roq
io.toCtrlBlock.wbRegs <> fpWbArbiter.io.out

fpRf.io.writePorts.zip(fpWbArbiter.io.out).foreach{
case (rf, wb) =>
rf.wen := wb.valid && wb.bits.uop.ctrl.fpWen
rf.wen := wb.valid
rf.addr := wb.bits.uop.pdest
rf.data := wb.bits.data
}
Expand Down
21 changes: 15 additions & 6 deletions src/main/scala/xiangshan/backend/IntegerBlock.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ package xiangshan.backend
import chisel3._
import chisel3.util._
import xiangshan._
import xiangshan.backend.exu.Exu.{ldExeUnitCfg, stExeUnitCfg}
import xiangshan.backend.exu.Exu.{jumpExeUnitCfg, ldExeUnitCfg, stExeUnitCfg}
import xiangshan.backend.exu._
import xiangshan.backend.issue.ReservationStation
import xiangshan.backend.fu.{FenceToSbuffer, CSRFileIO}
import xiangshan.backend.issue.{ReservationStation}
import xiangshan.backend.regfile.Regfile

class WakeUpBundle(numFast: Int, numSlow: Int) extends XSBundle {
Expand Down Expand Up @@ -246,12 +246,21 @@ class IntegerBlock
isFp = false
))
intWbArbiter.io.in <> exeUnits.map(e => {
if(e.config.writeFpRf) WireInit(e.io.out) else e.io.out
val w = WireInit(e.io.out)
val fpWen = if(e.config.writeFpRf) e.io.out.bits.uop.ctrl.fpWen else false.B
w.valid := e.io.out.valid && !fpWen
w
}) ++ io.wakeUpIn.slow

exeUnits.zip(intWbArbiter.io.in).filter(_._1.config.writeFpRf).zip(io.wakeUpIn.slow).foreach{
case ((exu, wInt), wFp) =>
exu.io.out.ready := wFp.fire() || wInt.fire()
exeUnits.zip(intWbArbiter.io.in).foreach{
case (exu, wInt) =>
if(exu.config.writeFpRf){
val wakeUpOut = io.wakeUpOut.slow(0) // jmpExeUnit
val writeFpReady = wakeUpOut.fire() && wakeUpOut.bits.uop.ctrl.fpWen
exu.io.out.ready := wInt.fire() || writeFpReady || !exu.io.out.valid
} else {
exu.io.out.ready := wInt.fire() || !exu.io.out.valid
}
}

// set busytable and update roq
Expand Down
38 changes: 32 additions & 6 deletions src/main/scala/xiangshan/backend/MemBlock.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import xiangshan.backend.issue.{ReservationStation}
import xiangshan.backend.regfile.RfReadPort

class LsBlockToCtrlIO extends XSBundle {
val stOut = Vec(exuParameters.StuCnt, ValidIO(new ExuOutput)) // write to roq
val stOut = Vec(exuParameters.StuCnt, ValidIO(new ExuOutput))
val numExist = Vec(exuParameters.LsExuCnt, Output(UInt(log2Ceil(IssQueSize).W)))
val replay = ValidIO(new Redirect)
}
Expand Down Expand Up @@ -48,6 +48,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
with HasXSLog
with HasFPUParameters
with HasExeBlockHelper
with HasFpLoadHelper
{

val fastWakeUpIn = outer.fastWakeUpIn
Expand All @@ -62,7 +63,8 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val toCtrlBlock = new LsBlockToCtrlIO

val wakeUpIn = new WakeUpBundle(fastWakeUpIn.size, slowWakeUpIn.size)
val wakeUpOut = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size))
val wakeUpOutInt = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size))
val wakeUpOutFp = Flipped(new WakeUpBundle(fastWakeUpOut.size, slowWakeUpOut.size))

val ptw = new TlbPtwIO
val sfence = Input(new SfenceBundle)
Expand Down Expand Up @@ -117,6 +119,8 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
loadUnits.head.io.ldout.ready := ldOut0.ready

val exeWbReqs = ldOut0 +: loadUnits.tail.map(_.io.ldout)
// 'wakeUpFp' is 1 cycle later than 'exeWbReqs'
val wakeUpFp = Wire(Vec(exuParameters.LduCnt, Decoupled(new ExuOutput)))

val readPortIndex = Seq(0, 1, 2, 4)
io.fromIntBlock.readIntRf.foreach(_.addr := DontCare)
Expand All @@ -136,10 +140,22 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
.map(_._2.bits.data)
val wakeupCnt = fastDatas.length

val slowPorts = (exeWbReqs ++
val slowPorts = (
(if(cfg == Exu.stExeUnitCfg) wakeUpFp else exeWbReqs) ++
slowWakeUpIn.zip(io.wakeUpIn.slow)
.filter(x => (x._1.writeIntRf && readIntRf) || (x._1.writeFpRf && readFpRf))
.map(_._2)).map(decoupledIOToValidIO)
.map{
case (Exu.jumpExeUnitCfg, value) if cfg == Exu.stExeUnitCfg =>
val jumpOut = Wire(Flipped(DecoupledIO(new ExuOutput)))
jumpOut.bits := RegNext(value.bits)
jumpOut.valid := RegNext(
value.valid && !value.bits.uop.roqIdx.needFlush(redirect, io.fromCtrlBlock.flush)
)
jumpOut.ready := true.B
jumpOut
case (_, value) => value
}
).map(decoupledIOToValidIO)

val slowPortsCnt = slowPorts.length

Expand Down Expand Up @@ -179,7 +195,17 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
.map(_._2)
}

io.wakeUpOut.slow <> exeWbReqs
wakeUpFp.zip(exeWbReqs).foreach{
case(w, e) =>
val r = RegNext(e.bits)
w.bits := r
w.valid := RegNext(e.valid && !e.bits.uop.roqIdx.needFlush(redirect, io.fromCtrlBlock.flush))
e.ready := true.B
assert(w.ready === true.B)
}

io.wakeUpOutInt.slow <> exeWbReqs
io.wakeUpOutFp.slow <> wakeUpFp
io.wakeUpIn.slow.foreach(_.ready := true.B)

val dtlb = Module(new TLB(Width = DTLBWidth, isDtlb = true))
Expand Down Expand Up @@ -243,8 +269,8 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
lsq.io.mmioStout.ready := false.B
when (lsq.io.mmioStout.valid && !storeUnits(0).io.stout.valid) {
io.toCtrlBlock.stOut(0).valid := true.B
lsq.io.mmioStout.ready := true.B
io.toCtrlBlock.stOut(0).bits := lsq.io.mmioStout.bits
lsq.io.mmioStout.ready := true.B
}

// Lsq
Expand Down
6 changes: 3 additions & 3 deletions src/main/scala/xiangshan/backend/exu/Exu.scala
Original file line number Diff line number Diff line change
Expand Up @@ -209,13 +209,13 @@ object Exu {
val storeExuConfigs = Seq.fill(exuParameters.StuCnt)(stExeUnitCfg)

val intExuConfigs = jumpExeUnitCfg +: (
Seq.fill(exuParameters.AluCnt)(aluExeUnitCfg) ++
Seq.fill(exuParameters.MduCnt)(mulDivExeUnitCfg)
Seq.fill(exuParameters.MduCnt)(mulDivExeUnitCfg) ++
Seq.fill(exuParameters.AluCnt)(aluExeUnitCfg)
)

val fpExuConfigs =
Seq.fill(exuParameters.FmacCnt)(fmacExeUnitCfg) ++
Seq.fill(exuParameters.FmiscCnt)(fmiscExeUnitCfg)

val exuConfigs: Seq[ExuConfig] = intExuConfigs ++ fpExuConfigs
val exuConfigs: Seq[ExuConfig] = intExuConfigs ++ fpExuConfigs ++ loadExuConfigs ++ storeExuConfigs
}
2 changes: 1 addition & 1 deletion src/main/scala/xiangshan/backend/regfile/Regfile.scala
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class Regfile

val useBlackBox = false
if (!useBlackBox) {
val mem = Mem(NRPhyRegs, UInt(len.W))
val mem = Reg(Vec(NRPhyRegs, UInt(len.W)))
for (r <- io.readPorts) {
val rdata = if (hasZero) Mux(r.addr === 0.U, 0.U, mem(r.addr)) else mem(r.addr)
r.data := RegNext(rdata)
Expand Down
Loading