diff --git a/src/test/scala/RISCV/DataTypes.scala b/src/test/scala/RISCV/DataTypes.scala index 180e854..4cd7c61 100644 --- a/src/test/scala/RISCV/DataTypes.scala +++ b/src/test/scala/RISCV/DataTypes.scala @@ -39,7 +39,8 @@ object Data { // addr is the target address case class PcUpdateJALR(addr: Addr) extends ExecutionEvent case class PcUpdateJAL(addr: Addr) extends ExecutionEvent - case class PcUpdateB(addr: Addr) extends ExecutionEvent + case class PcUpdateBranch(addr: Addr) extends ExecutionEvent + case class PcUpdateNoBranch(addr: Addr) extends ExecutionEvent case class PcUpdate(addr: Addr) extends ExecutionEvent case class ExecutionTraceEvent(pc: Addr, event: ExecutionEvent*){ override def toString(): String = s"$pc: " + event.toList.mkString(", ") } diff --git a/src/test/scala/RISCV/Ops.scala b/src/test/scala/RISCV/Ops.scala index 9cd7ded..f129986 100644 --- a/src/test/scala/RISCV/Ops.scala +++ b/src/test/scala/RISCV/Ops.scala @@ -110,25 +110,10 @@ object Ops { case class LUI(rd: Reg, imm: Imm) extends Op with UType case class AUIPC(rd: Reg, imm: Imm) extends Op with UType - - case class Store(rs2: Reg, rs1: Reg, offset: Imm, width: Int) extends Op with SType - object Store { - def sw(rs2: Int, rs1: Int, offset: Int) = Store(Reg(rs2), Reg(rs1), Imm(offset), 4) - def sh(rs2: Int, rs1: Int, offset: Int) = Store(Reg(rs2), Reg(rs1), Imm(offset), 2) - def sb(rs2: Int, rs1: Int, offset: Int) = Store(Reg(rs2), Reg(rs1), Imm(offset), 1) - } - - case class Load(rd: Reg, rs1: Reg, offset: Imm, width: Int, signed: Boolean) extends Op with IType - object Load { - def lw (rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 4, true) - def lh (rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 2, true) - def lb (rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 1, true) - def lhu(rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 2, false) - def lbu(rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 1, false) - } - case class JALR(rd: Reg, rs1: Reg, dst: String) extends Op with IType case class JAL(rd: Reg, dst: String) extends Op with UType + case class SW(rs2: Reg, rs1: Reg, offset: Imm) extends Op with SType + case class LW(rd: Reg, rs1: Reg, offset: Imm) extends Op with IType object LUI { def apply(rd: Int, imm: Int): LUI = LUI(Reg(rd), Imm(imm)) } @@ -136,6 +121,8 @@ object Ops { object JAL{ def apply(rd: Int, dst: String): JAL = JAL(Reg(rd), dst) } object JALR{ def apply(rd: Int, rs1: Int, dst: String): JALR = JALR(Reg(rd), Reg(rs1), dst) } + object SW { def apply(rs2: Int, rs1: Int, offset: Int): SW = SW(Reg(rs2), Reg(rs1), Imm(offset)) } + object LW { def apply(rd: Int, rs1: Int, offset: Int): LW = LW(Reg(rd), Reg(rs1), Imm(offset)) } // This op should not be assembled, but will for the sake of simplicity be rendered as a NOP case object DONE extends Op with IType { val rd = Reg(0); val rs1 = Reg(0) } diff --git a/src/test/scala/RISCV/VM.scala b/src/test/scala/RISCV/VM.scala index 203161f..effaf6d 100644 --- a/src/test/scala/RISCV/VM.scala +++ b/src/test/scala/RISCV/VM.scala @@ -38,21 +38,19 @@ case class VM( } - private def executeBranch(op: Branch) = { getAddr(op.dst).map{ addr => val takeBranch = regs.compare(op.rs1, op.rs2, op.comp.run) if(takeBranch){ val nextVM = copy(pc = addr) - jump(nextVM, PcUpdateB(nextVM.pc)) + jump(nextVM, PcUpdateBranch(nextVM.pc)) } else { - step(this) + step(this, PcUpdateNoBranch(this.pc + Addr(4))) } } } - /** * The weird :_* syntax is simply a way to pass a list to a varArgs function. * diff --git a/src/test/scala/RISCV/printUtils.scala b/src/test/scala/RISCV/printUtils.scala index 58adefa..980e52e 100644 --- a/src/test/scala/RISCV/printUtils.scala +++ b/src/test/scala/RISCV/printUtils.scala @@ -42,7 +42,8 @@ object PrintUtils { // addr is the target address case PcUpdateJALR(addr) => fansi.Color.Green(s"PC updated to ${addr.show} via JALR") case PcUpdateJAL(addr) => fansi.Color.Magenta(s"PC updated to ${addr.show} via JAL") - case PcUpdateB(addr) => fansi.Color.Yellow(s"PC updated to ${addr.show} via Branch") + case PcUpdateBranch(addr) => fansi.Color.Yellow(s"PC updated to ${addr.show} via Branch") + case PcUpdateNoBranch(addr) => fansi.Color.Yellow(s"PC updated to ${addr.show}, skipping a Branch") } } diff --git a/src/test/scala/RISCV/testRunner.scala b/src/test/scala/RISCV/testRunner.scala index 8ef1f1f..d51128f 100644 --- a/src/test/scala/RISCV/testRunner.scala +++ b/src/test/scala/RISCV/testRunner.scala @@ -100,4 +100,94 @@ object TestRunner { successful }.toOption.getOrElse(false) } + + def profileBranching(testOptions: TestOptions): Boolean = { + + val testResults = for { + lines <- fileUtils.readTest(testOptions) + program <- FiveStage.Parser.parseProgram(lines, testOptions) + (binary, (trace, finalVM)) <- program.validate.map(x => (x._1, x._2.run)) + } yield { + + sealed trait BranchEvent + case class Taken(addr: Int) extends BranchEvent + case class NotTaken(addr: Int) extends BranchEvent + + val events: List[BranchEvent] = trace.flatMap(_.event).collect{ + case PcUpdateBranch(x) => Taken(x.value) + case PcUpdateNoBranch(x) => NotTaken(x.value) + } + + + /** + * This is a sample profiler for a rather unrealistic branch predictor which has an unlimited amount + * of slots + */ + def OneBitInfiniteSlots(events: List[BranchEvent]): Int = { + + // Helper inspects the next element of the event list. If the event is a mispredict the prediction table is updated + // to reflect this. + // As long as there are remaining events the helper calls itself recursively on the remainder + def helper(events: List[BranchEvent], predictionTable: Map[Int, Boolean]): Int = { + events match { + + // Scala syntax for matching a list with a head element of some type and a tail + // `case h :: t =>` + // means we want to match a list with at least a head and a tail (tail can be Nil, so we + // essentially want to match a list with at least one element) + // h is the first element of the list, t is the remainder (which can be Nil, aka empty) + + // `case Constructor(arg1, arg2) :: t => ` + // means we want to match a list whose first element is of type Constructor, giving us access to its internal + // values. + + // `case Constructor(arg1, arg2) :: t => if(p(arg1, arg2))` + // means we want to match a list whose first element is of type Constructor while satisfying some predicate p, + // called an if guard. + case Taken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) + case Taken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, true)) + case NotTaken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, false)) + case NotTaken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) + case _ => 0 + } + } + + // Initially every possible branch is set to false since the initial state of the predictor is to assume branch not taken + def initState = events.map{ + case Taken(addr) => (addr, false) + case NotTaken(addr) => (addr, false) + }.toMap + + helper(events, initState) + } + + say(OneBitInfiniteSlots(events)) + + } + true + } + + + def profileCache(testOptions: TestOptions): Boolean = { + + val testResults = for { + lines <- fileUtils.readTest(testOptions) + program <- FiveStage.Parser.parseProgram(lines, testOptions) + (binary, (trace, finalVM)) <- program.validate.map(x => (x._1, x._2.run)) + } yield { + + sealed trait MemoryEvent + case class Write(addr: Int) extends MemoryEvent + case class Read(addr: Int) extends MemoryEvent + + val events: List[MemoryEvent] = trace.flatMap(_.event).collect{ + case MemWrite(x,_) => Write(x.value) + case MemRead(x,_) => Read(x.value) + } + + // Your cache here + + } + true + } } diff --git a/theory2.org b/theory2.org new file mode 100644 index 0000000..07cf86e --- /dev/null +++ b/theory2.org @@ -0,0 +1,101 @@ +* Question 1 - Benchmarking + In order to gauge the performance increase from adding branch predictors it is necessary to do some testing. + Rather than writing a test from scratch it is better to use the tester already in use in the test harness. + When running a program the VM outputs a log of all events, including which branches have been taken and which + haven't, which as it turns out is the only information we actually need to gauge the effectiveness of a branch + predictor! + + For this exercise you will write a program that parses a log of branch events. + + #+BEGIN_SRC scala + sealed trait BranchEvent + case class Taken(addr: Int) extends BranchEvent + case class NotTaken(addr: Int) extends BranchEvent + + + def profile(events: List[BranchEvent]): Int = ??? + #+END_SRC + + To help you get started, I have provided you with much of the necessary code. + In order to get an idea for how you should profile branch misses, consider the following profiler which calculates + misses for a processor with a branch predictor with a 1 bit predictor with infinite memory: + + #+BEGIN_SRC scala + def OneBitInfiniteSlots(events: List[BranchEvent]): Int = { + + // Helper inspects the next element of the event list. If the event is a mispredict the prediction table is updated + // to reflect this. + // As long as there are remaining events the helper calls itself recursively on the remainder + def helper(events: List[BranchEvent], predictionTable: Map[Int, Boolean]): Int = { + events match { + + // Scala syntax for matching a list with a head element of some type and a tail + // `case h :: t =>` + // means we want to match a list with at least a head and a tail (tail can be Nil, so we + // essentially want to match a list with at least one element) + // h is the first element of the list, t is the remainder (which can be Nil, aka empty) + + // `case Constructor(arg1, arg2) :: t => ` + // means we want to match a list whose first element is of type Constructor, giving us access to its internal + // values. + + // `case Constructor(arg1, arg2) :: t => if(p(arg1, arg2))` + // means we want to match a list whose first element is of type Constructor while satisfying some predicate p, + // called an if guard. + case Taken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) + case Taken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, true)) + case NotTaken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, false)) + case NotTaken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) + case _ => 0 + } + } + + // Initially every possible branch is set to false since the initial state of the predictor is to assume branch not taken + def initState = events.map{ + case Taken(addr) => (addr, false) + case NotTaken(addr) => (addr, false) + }.toMap + + helper(events, initState) + } + #+END_SRC + +** Your task + Your job is to implement a test that checks how many misses occur for a 2 bit branch predictor with 4 slots. + For this task it is probably smart to use something else than a ~Map[(Int, Boolean)]~ + + The skeleton code is located in ~testRunner.scala~ and can be run using testOnly FiveStage.ProfileTest. + If you do so now you will see that the unrealistic prediction model yields 1449 misses. + + With a 2 bit 4 slot scheme, how many misses will you incur? + Answer with a number. + +* Question 2 - Cache profiling + Unlike our design which has a very limited memory pool, real designs have access to vast amounts of memory, offset + by a steep cost in access latency. + To amend this a modern processor features several caches where even the smallest fastest cache has more memory than + your entire design. + In order to investigate how caches can alter performance it is therefore necessary to make some rather + unrealistic assumptions to see how different cache schemes impacts performance. + + We will therefore assume the following: + + Reads from main memory takes 5 cycles + + cache has a total storage of 32 words (1024 bits) + + cache reads work as they do now (i.e no additional latency) + + For this exercise you will write a program that parses a log of memory events, similar to previous task + #+BEGIN_SRC scala + sealed trait MemoryEvent + case class Write(addr: Int) extends MemoryEvent + case class Read(addr: Int) extends MemoryEvent + + + def profile(events: List[MemoryEvent]): Int = ??? + #+END_SRC + +** Your task + Your job is to implement a test that checks how many delay cycles will occur for a cache which: + + Follows a 2-way associative scheme + + Block size is 4 words (128 bits) + + Is write-through write no-allocate + + Eviction policy is LRU (least recently used)