From e0b6634a939ac5c4c70d0e054ab2be8903e70bda Mon Sep 17 00:00:00 2001 From: Sebastian Bugge Date: Tue, 12 Nov 2024 12:28:38 +0100 Subject: [PATCH 1/3] Add single cycle stall to branching. --- src/main/scala/IF.scala | 6 +++--- src/main/scala/IFBarrier.scala | 12 +----------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/src/main/scala/IF.scala b/src/main/scala/IF.scala index b4fd54a..a08d526 100644 --- a/src/main/scala/IF.scala +++ b/src/main/scala/IF.scala @@ -46,9 +46,9 @@ class InstructionFetch extends MultiIOModule { * * You should expand on or rewrite the code below. */ - io.PC := PC - IMEM.io.instructionAddress := PC - PC := Mux(io.branch, io.branchAddress, Mux(io.stall, PC, PC + 4.U)) + io.PC := Mux(io.branch, io.branchAddress, PC) + IMEM.io.instructionAddress := io.PC + PC := Mux(io.branch, io.branchAddress + 4.U, Mux(io.stall, PC, PC + 4.U)) val instruction = Wire(new Instruction) instruction := IMEM.io.instruction.asTypeOf(new Instruction) diff --git a/src/main/scala/IFBarrier.scala b/src/main/scala/IFBarrier.scala index 4b5d103..789f4f6 100644 --- a/src/main/scala/IFBarrier.scala +++ b/src/main/scala/IFBarrier.scala @@ -20,22 +20,12 @@ class IFBarrier extends MultiIOModule { val instruction = Reg(new Instruction) val replay = RegInit(Bool(), false.B) - val flushRemaining = RegInit(UInt(2.W), 0.U) - flushRemaining := Mux( - io.flush, - 1.U, - Mux( - flushRemaining === 0.U, - 0.U, - flushRemaining - 1.U - ) - ) replay := io.stall instruction := io.instructionIn io.instructionOut := Mux( - io.stall || io.flush || flushRemaining > 0.U, + io.stall || io.flush, Instruction.NOP, Mux( replay, From 3216c89dae0cc4d09e2a041a3504cffd996b1707 Mon Sep 17 00:00:00 2001 From: Sebastian Bugge Date: Tue, 12 Nov 2024 15:01:04 +0100 Subject: [PATCH 2/3] Move address calculation to ID. --- src/main/scala/CPU.scala | 10 ++++++++- src/main/scala/Decoder.scala | 2 +- src/main/scala/EX.scala | 1 - src/main/scala/EXBarrier.scala | 10 +++++++-- src/main/scala/ID.scala | 33 ++++++++++++++++++++++++++++ src/main/scala/IDBarrier.scala | 1 + src/main/scala/ToplevelSignals.scala | 1 - 7 files changed, 52 insertions(+), 6 deletions(-) diff --git a/src/main/scala/CPU.scala b/src/main/scala/CPU.scala index d5e17c7..31530e8 100644 --- a/src/main/scala/CPU.scala +++ b/src/main/scala/CPU.scala @@ -90,6 +90,7 @@ class CPU extends MultiIOModule { IDBarrier.in.r2Address := ID.io.r2Address IDBarrier.in.ALUop := ID.io.ALUOp IDBarrier.in.returnAddr := ID.io.returnAddr + IDBarrier.in.branchAddr := ID.io.branchAddr IDBarrier.in.jump := ID.io.jump IDBarrier.in.branchType := ID.io.branchType IDBarrier.in.writeEnable := ID.io.writeEnableOut @@ -109,6 +110,7 @@ class CPU extends MultiIOModule { EXBarrier.branchIn := EX.io.branch EXBarrier.in.jump := IDBarrier.out.jump EXBarrier.in.returnAddr := IDBarrier.out.returnAddr + EXBarrier.branchAddrIn := IDBarrier.out.branchAddr EXBarrier.in.writeEnable := IDBarrier.out.writeEnable EXBarrier.in.writeAddr := IDBarrier.out.writeAddr EXBarrier.in.memWrite := IDBarrier.out.memWrite @@ -126,6 +128,12 @@ class CPU extends MultiIOModule { MEMBarrier.in.writeEnable := EXBarrier.out.writeEnable MEMBarrier.in.writeAddr := EXBarrier.out.writeAddr + // Fast branching forwarding + ID.io.forwardEx := EXBarrier.forwardEx + ID.io.forwardMem := MEMBarrier.forwardMem + ID.io.forwardWb := MEMBarrier.forwardWb + ID.io.forwardId := MEMBarrier.forwardId + // Write back ID.io.writeData := MEMBarrier.out.data ID.io.writeEnableIn := MEMBarrier.out.writeEnable @@ -133,7 +141,7 @@ class CPU extends MultiIOModule { // Branching IF.io.branch := EXBarrier.branchOut - IF.io.branchAddress := EXBarrier.branchAddr + IF.io.branchAddress := EXBarrier.branchAddrOut // Stall IF.io.stall := IDBarrier.stall diff --git a/src/main/scala/Decoder.scala b/src/main/scala/Decoder.scala index 3f6dc46..71d324b 100644 --- a/src/main/scala/Decoder.scala +++ b/src/main/scala/Decoder.scala @@ -87,7 +87,7 @@ class Decoder() extends Module { // signal regWrite, memRead, memWrite, branch, jump, branchType, Op1Select, Op2Select, ImmSelect, ALUOp JAL -> List(Y, N, N, Y, Y, branchType.jump, PC, imm, ImmFormat.JTYPE, ALUOps.ADD ), - JALR -> List(Y, N, N, Y, Y, branchType.jump, rs1, imm, ImmFormat.ITYPE, ALUOps.ADDR ), + JALR -> List(Y, N, N, Y, Y, branchType.jump, rs1, imm, ImmFormat.ITYPE, ALUOps.ADD ), ) diff --git a/src/main/scala/EX.scala b/src/main/scala/EX.scala index a65bb61..539ae2d 100644 --- a/src/main/scala/EX.scala +++ b/src/main/scala/EX.scala @@ -31,7 +31,6 @@ class Execute extends MultiIOModule { ALUOps.SRA -> (io.op1 >> io.op2(4, 0)), ALUOps.SRL -> (io.op1.asUInt() >> io.op2(4, 0)).asSInt(), ALUOps.SLL -> (io.op1.asUInt() << io.op2(4, 0)).asSInt(), - ALUOps.ADDR -> ((io.op1 + io.op2) & -2.S), ALUOps.COPY_A -> io.op1, ALUOps.COPY_B -> io.op2, ) diff --git a/src/main/scala/EXBarrier.scala b/src/main/scala/EXBarrier.scala index 5e8687e..45f933e 100644 --- a/src/main/scala/EXBarrier.scala +++ b/src/main/scala/EXBarrier.scala @@ -20,9 +20,11 @@ class EXBarrier extends MultiIOModule { val in = Input(new EXBarrierIO) val out = Output(new EXBarrierIO) val flush = Output(Bool()) - val branchAddr = Output(UInt(32.W)) val branchIn = Input(Bool()) val branchOut = Output(Bool()) + val branchAddrIn = Input(UInt(32.W)) + val branchAddrOut = Output(UInt(32.W)) + val forwardEx = Output(new Forwarding) }) val delay = Reg(new EXBarrierIO) @@ -31,5 +33,9 @@ class EXBarrier extends MultiIOModule { io.flush := io.branchIn io.branchOut := io.branchIn - io.branchAddr := io.in.ALUResult + io.branchAddrOut := io.branchAddrIn + + io.forwardEx.write := io.in.writeEnable + io.forwardEx.writeAddr := io.in.writeAddr + io.forwardEx.writeData := Mux(io.in.jump, io.in.returnAddr, io.in.ALUResult) } diff --git a/src/main/scala/ID.scala b/src/main/scala/ID.scala index 1027e70..8d39e95 100644 --- a/src/main/scala/ID.scala +++ b/src/main/scala/ID.scala @@ -39,9 +39,39 @@ class InstructionDecode extends MultiIOModule { val branchType = Output(UInt(3.W)) val jump = Output(Bool()) val returnAddr = Output(UInt(32.W)) + val branchAddr = Output(UInt(32.W)) + + val forwardEx = Input(new Forwarding) + val forwardMem = Input(new Forwarding) + val forwardWb = Input(new Forwarding) + val forwardId = Input(new Forwarding) } ) + def forward(data: UInt, addr: UInt, useForward: Bool, ex: Forwarding, mem: Forwarding, wb: Forwarding, id: Forwarding): UInt = { + Mux( + !useForward, + data, + Mux( + ex.valid && ex.writeAddr === addr, + ex.writeData, + Mux( + mem.valid && mem.writeAddr === addr, + mem.writeData, + Mux( + wb.valid && wb.writeAddr === addr, + wb.writeData, + Mux( + id.valid && id.writeAddr === addr, + id.writeData, + data, + ) + ) + ) + ) + ) + } + val registers = Module(new Registers) val decoder = Module(new Decoder).io @@ -94,4 +124,7 @@ class InstructionDecode extends MultiIOModule { io.writeEnableOut := decoder.controlSignals.regWrite io.memRead := decoder.controlSignals.memRead io.memWrite := decoder.controlSignals.memWrite + + val op1 = forward(io.op1.asUInt(), io.r1Address, io.isOp1RValue, ex = io.forwardEx, mem = io.forwardMem, wb = io.forwardWb, id = io.forwardId).asSInt() + io.branchAddr := ((op1 + io.op2) & -2.S).asUInt() } diff --git a/src/main/scala/IDBarrier.scala b/src/main/scala/IDBarrier.scala index c924233..ec5ecfc 100644 --- a/src/main/scala/IDBarrier.scala +++ b/src/main/scala/IDBarrier.scala @@ -13,6 +13,7 @@ class IDBarrierIO extends Bundle { val r2Value = UInt(32.W) val r2Address = UInt(5.W) val returnAddr = UInt(32.W) + val branchAddr = UInt(32.W) val jump = Bool() val ALUop = UInt(4.W) val branchType = UInt(3.W) diff --git a/src/main/scala/ToplevelSignals.scala b/src/main/scala/ToplevelSignals.scala index e56b572..7fe70f5 100644 --- a/src/main/scala/ToplevelSignals.scala +++ b/src/main/scala/ToplevelSignals.scala @@ -119,7 +119,6 @@ object ALUOps { val SRA = 9.U(4.W) val COPY_A = 10.U(4.W) val COPY_B = 11.U(4.W) - val ADDR = 12.U(4.W) val DC = 15.U(4.W) } From bb89461843e8d5475e33650ff5a6e8b3aec5cf65 Mon Sep 17 00:00:00 2001 From: Sebastian Bugge Date: Thu, 14 Nov 2024 15:38:03 +0100 Subject: [PATCH 3/3] Simple one-bit branch-predictor. --- src/main/scala/CPU.scala | 21 ++++++++++++++------- src/main/scala/EX.scala | 13 +++++++++++-- src/main/scala/EXBarrier.scala | 9 --------- src/main/scala/ID.scala | 9 +++++++++ src/main/scala/IDBarrier.scala | 2 ++ src/test/scala/Manifest.scala | 2 +- 6 files changed, 37 insertions(+), 19 deletions(-) diff --git a/src/main/scala/CPU.scala b/src/main/scala/CPU.scala index 31530e8..eef93b7 100644 --- a/src/main/scala/CPU.scala +++ b/src/main/scala/CPU.scala @@ -91,6 +91,8 @@ class CPU extends MultiIOModule { IDBarrier.in.ALUop := ID.io.ALUOp IDBarrier.in.returnAddr := ID.io.returnAddr IDBarrier.in.branchAddr := ID.io.branchAddr + IDBarrier.in.nextOpAddr := ID.io.nextOpAddr + IDBarrier.in.branchPrediction := ID.io.branchPrediction IDBarrier.in.jump := ID.io.jump IDBarrier.in.branchType := ID.io.branchType IDBarrier.in.writeEnable := ID.io.writeEnableOut @@ -104,13 +106,14 @@ class CPU extends MultiIOModule { EX.io.branchType := IDBarrier.out.branchType EX.io.rs1ValueIn := forward(IDBarrier.out.r1Value, IDBarrier.out.r1Address, true.B, mem = MEMBarrier.forwardMem, wb = MEMBarrier.forwardWb, id = MEMBarrier.forwardId).asSInt() EX.io.rs2ValueIn := forward(IDBarrier.out.r2Value, IDBarrier.out.r2Address, true.B, mem = MEMBarrier.forwardMem, wb = MEMBarrier.forwardWb, id = MEMBarrier.forwardId).asSInt() + EX.io.branchAddr := IDBarrier.out.branchAddr + EX.io.nextOpAddr := IDBarrier.out.nextOpAddr + EX.io.branchPrediction := IDBarrier.out.branchPrediction EXBarrier.in.r2Value := EX.io.rs2ValueOut.asUInt() EXBarrier.in.ALUResult := EX.io.ALUResult.asUInt() - EXBarrier.branchIn := EX.io.branch EXBarrier.in.jump := IDBarrier.out.jump EXBarrier.in.returnAddr := IDBarrier.out.returnAddr - EXBarrier.branchAddrIn := IDBarrier.out.branchAddr EXBarrier.in.writeEnable := IDBarrier.out.writeEnable EXBarrier.in.writeAddr := IDBarrier.out.writeAddr EXBarrier.in.memWrite := IDBarrier.out.memWrite @@ -140,13 +143,17 @@ class CPU extends MultiIOModule { ID.io.writeAddrIn := MEMBarrier.out.writeAddr // Branching - IF.io.branch := EXBarrier.branchOut - IF.io.branchAddress := EXBarrier.branchAddrOut + IF.io.branch := ID.io.branchPrediction || EX.io.misprediction + IF.io.branchAddress := Mux(EX.io.misprediction, EX.io.actualBranchAddr, ID.io.branchAddr) + + // Flush + IFBarrier.flush := EX.io.misprediction + + // Update branch prediction + ID.io.hasBranchResult := EX.io.branchOperation + ID.io.branchResult := EX.io.branchTaken // Stall IF.io.stall := IDBarrier.stall IFBarrier.stall := IDBarrier.stall - - // Flush - IFBarrier.flush := EXBarrier.flush } diff --git a/src/main/scala/EX.scala b/src/main/scala/EX.scala index 539ae2d..b988558 100644 --- a/src/main/scala/EX.scala +++ b/src/main/scala/EX.scala @@ -14,7 +14,13 @@ class Execute extends MultiIOModule { val rs2ValueIn = Input(SInt(32.W)) val rs2ValueOut = Output(SInt(32.W)) val branchType = Input(UInt(3.W)) - val branch = Output(Bool()) + val branchPrediction = Input(Bool()) + val branchAddr = Input(UInt(32.W)) + val nextOpAddr = Input(UInt(32.W)) + val actualBranchAddr = Output(UInt(32.W)) + val branchTaken = Output(Bool()) + val branchOperation = Output(Bool()) + val misprediction = Output(Bool()) val ALUOp = Input(UInt(4.W)) val ALUResult = Output(SInt(32.W)) } @@ -46,6 +52,9 @@ class Execute extends MultiIOModule { ) io.rs2ValueOut := io.rs2ValueIn - io.branch := MuxLookup(io.branchType, false.B, BranchALUOpsMap) + io.branchTaken := MuxLookup(io.branchType, false.B, BranchALUOpsMap) + io.misprediction := io.branchOperation && (io.branchTaken =/= io.branchPrediction) + io.actualBranchAddr := Mux(io.branchTaken, io.branchAddr, io.nextOpAddr) + io.branchOperation := io.branchType =/= branchType.DC io.ALUResult := MuxLookup(io.ALUOp, 0.S(32.W), ALUOpsMap) } \ No newline at end of file diff --git a/src/main/scala/EXBarrier.scala b/src/main/scala/EXBarrier.scala index 45f933e..c0ea41b 100644 --- a/src/main/scala/EXBarrier.scala +++ b/src/main/scala/EXBarrier.scala @@ -19,11 +19,6 @@ class EXBarrier extends MultiIOModule { new Bundle { val in = Input(new EXBarrierIO) val out = Output(new EXBarrierIO) - val flush = Output(Bool()) - val branchIn = Input(Bool()) - val branchOut = Output(Bool()) - val branchAddrIn = Input(UInt(32.W)) - val branchAddrOut = Output(UInt(32.W)) val forwardEx = Output(new Forwarding) }) @@ -31,10 +26,6 @@ class EXBarrier extends MultiIOModule { delay := io.in io.out := delay - io.flush := io.branchIn - io.branchOut := io.branchIn - io.branchAddrOut := io.branchAddrIn - io.forwardEx.write := io.in.writeEnable io.forwardEx.writeAddr := io.in.writeAddr io.forwardEx.writeData := Mux(io.in.jump, io.in.returnAddr, io.in.ALUResult) diff --git a/src/main/scala/ID.scala b/src/main/scala/ID.scala index 8d39e95..c689f33 100644 --- a/src/main/scala/ID.scala +++ b/src/main/scala/ID.scala @@ -19,6 +19,8 @@ class InstructionDecode extends MultiIOModule { val io = IO( new Bundle { val instruction = Input(new Instruction) + val hasBranchResult = Input(Bool()) + val branchResult = Input(Bool()) val pc = Input(UInt(32.W)) val op1 = Output(SInt(32.W)) val isOp1RValue = Output(Bool()) @@ -37,9 +39,11 @@ class InstructionDecode extends MultiIOModule { val memWrite = Output(Bool()) val memRead = Output(Bool()) val branchType = Output(UInt(3.W)) + val branchPrediction = Output(Bool()) val jump = Output(Bool()) val returnAddr = Output(UInt(32.W)) val branchAddr = Output(UInt(32.W)) + val nextOpAddr = Output(UInt(32.W)) val forwardEx = Input(new Forwarding) val forwardMem = Input(new Forwarding) @@ -127,4 +131,9 @@ class InstructionDecode extends MultiIOModule { val op1 = forward(io.op1.asUInt(), io.r1Address, io.isOp1RValue, ex = io.forwardEx, mem = io.forwardMem, wb = io.forwardWb, id = io.forwardId).asSInt() io.branchAddr := ((op1 + io.op2) & -2.S).asUInt() + io.nextOpAddr := io.pc + 4.U + + val lastBranchWasTaken = RegInit(Bool(), true.B) + lastBranchWasTaken := Mux(io.hasBranchResult, io.branchResult, lastBranchWasTaken) + io.branchPrediction := Mux(io.branchType =/= branchType.DC, lastBranchWasTaken, false.B) } diff --git a/src/main/scala/IDBarrier.scala b/src/main/scala/IDBarrier.scala index ec5ecfc..23cdf90 100644 --- a/src/main/scala/IDBarrier.scala +++ b/src/main/scala/IDBarrier.scala @@ -14,6 +14,8 @@ class IDBarrierIO extends Bundle { val r2Address = UInt(5.W) val returnAddr = UInt(32.W) val branchAddr = UInt(32.W) + val nextOpAddr = UInt(32.W) + val branchPrediction = Bool() val jump = Bool() val ALUop = UInt(4.W) val branchType = UInt(3.W) diff --git a/src/test/scala/Manifest.scala b/src/test/scala/Manifest.scala index d10ed8a..6a3c731 100644 --- a/src/test/scala/Manifest.scala +++ b/src/test/scala/Manifest.scala @@ -19,7 +19,7 @@ import LogParser._ object Manifest { - val singleTest = "branch.s" + val singleTest = "square.s" val nopPadded = false