Add theory 2

This commit is contained in:
peteraa 2019-10-17 16:28:13 +02:00
parent 63b4447084
commit ec5089de8e
6 changed files with 201 additions and 23 deletions

View file

@ -39,7 +39,8 @@ object Data {
// addr is the target address // addr is the target address
case class PcUpdateJALR(addr: Addr) extends ExecutionEvent case class PcUpdateJALR(addr: Addr) extends ExecutionEvent
case class PcUpdateJAL(addr: Addr) extends ExecutionEvent case class PcUpdateJAL(addr: Addr) extends ExecutionEvent
case class PcUpdateB(addr: Addr) extends ExecutionEvent case class PcUpdateBranch(addr: Addr) extends ExecutionEvent
case class PcUpdateNoBranch(addr: Addr) extends ExecutionEvent
case class PcUpdate(addr: Addr) extends ExecutionEvent case class PcUpdate(addr: Addr) extends ExecutionEvent
case class ExecutionTraceEvent(pc: Addr, event: ExecutionEvent*){ override def toString(): String = s"$pc: " + event.toList.mkString(", ") } case class ExecutionTraceEvent(pc: Addr, event: ExecutionEvent*){ override def toString(): String = s"$pc: " + event.toList.mkString(", ") }

View file

@ -110,25 +110,10 @@ object Ops {
case class LUI(rd: Reg, imm: Imm) extends Op with UType case class LUI(rd: Reg, imm: Imm) extends Op with UType
case class AUIPC(rd: Reg, imm: Imm) extends Op with UType case class AUIPC(rd: Reg, imm: Imm) extends Op with UType
case class Store(rs2: Reg, rs1: Reg, offset: Imm, width: Int) extends Op with SType
object Store {
def sw(rs2: Int, rs1: Int, offset: Int) = Store(Reg(rs2), Reg(rs1), Imm(offset), 4)
def sh(rs2: Int, rs1: Int, offset: Int) = Store(Reg(rs2), Reg(rs1), Imm(offset), 2)
def sb(rs2: Int, rs1: Int, offset: Int) = Store(Reg(rs2), Reg(rs1), Imm(offset), 1)
}
case class Load(rd: Reg, rs1: Reg, offset: Imm, width: Int, signed: Boolean) extends Op with IType
object Load {
def lw (rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 4, true)
def lh (rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 2, true)
def lb (rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 1, true)
def lhu(rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 2, false)
def lbu(rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 1, false)
}
case class JALR(rd: Reg, rs1: Reg, dst: String) extends Op with IType case class JALR(rd: Reg, rs1: Reg, dst: String) extends Op with IType
case class JAL(rd: Reg, dst: String) extends Op with UType case class JAL(rd: Reg, dst: String) extends Op with UType
case class SW(rs2: Reg, rs1: Reg, offset: Imm) extends Op with SType
case class LW(rd: Reg, rs1: Reg, offset: Imm) extends Op with IType
object LUI { def apply(rd: Int, imm: Int): LUI = LUI(Reg(rd), Imm(imm)) } object LUI { def apply(rd: Int, imm: Int): LUI = LUI(Reg(rd), Imm(imm)) }
@ -136,6 +121,8 @@ object Ops {
object JAL{ def apply(rd: Int, dst: String): JAL = JAL(Reg(rd), dst) } object JAL{ def apply(rd: Int, dst: String): JAL = JAL(Reg(rd), dst) }
object JALR{ def apply(rd: Int, rs1: Int, dst: String): JALR = JALR(Reg(rd), Reg(rs1), dst) } object JALR{ def apply(rd: Int, rs1: Int, dst: String): JALR = JALR(Reg(rd), Reg(rs1), dst) }
object SW { def apply(rs2: Int, rs1: Int, offset: Int): SW = SW(Reg(rs2), Reg(rs1), Imm(offset)) }
object LW { def apply(rd: Int, rs1: Int, offset: Int): LW = LW(Reg(rd), Reg(rs1), Imm(offset)) }
// This op should not be assembled, but will for the sake of simplicity be rendered as a NOP // This op should not be assembled, but will for the sake of simplicity be rendered as a NOP
case object DONE extends Op with IType { val rd = Reg(0); val rs1 = Reg(0) } case object DONE extends Op with IType { val rd = Reg(0); val rs1 = Reg(0) }

View file

@ -38,21 +38,19 @@ case class VM(
} }
private def executeBranch(op: Branch) = { private def executeBranch(op: Branch) = {
getAddr(op.dst).map{ addr => getAddr(op.dst).map{ addr =>
val takeBranch = regs.compare(op.rs1, op.rs2, op.comp.run) val takeBranch = regs.compare(op.rs1, op.rs2, op.comp.run)
if(takeBranch){ if(takeBranch){
val nextVM = copy(pc = addr) val nextVM = copy(pc = addr)
jump(nextVM, PcUpdateB(nextVM.pc)) jump(nextVM, PcUpdateBranch(nextVM.pc))
} }
else { else {
step(this) step(this, PcUpdateNoBranch(this.pc + Addr(4)))
} }
} }
} }
/** /**
* The weird :_* syntax is simply a way to pass a list to a varArgs function. * The weird :_* syntax is simply a way to pass a list to a varArgs function.
* *

View file

@ -42,7 +42,8 @@ object PrintUtils {
// addr is the target address // addr is the target address
case PcUpdateJALR(addr) => fansi.Color.Green(s"PC updated to ${addr.show} via JALR") case PcUpdateJALR(addr) => fansi.Color.Green(s"PC updated to ${addr.show} via JALR")
case PcUpdateJAL(addr) => fansi.Color.Magenta(s"PC updated to ${addr.show} via JAL") case PcUpdateJAL(addr) => fansi.Color.Magenta(s"PC updated to ${addr.show} via JAL")
case PcUpdateB(addr) => fansi.Color.Yellow(s"PC updated to ${addr.show} via Branch") case PcUpdateBranch(addr) => fansi.Color.Yellow(s"PC updated to ${addr.show} via Branch")
case PcUpdateNoBranch(addr) => fansi.Color.Yellow(s"PC updated to ${addr.show}, skipping a Branch")
} }
} }

View file

@ -100,4 +100,94 @@ object TestRunner {
successful successful
}.toOption.getOrElse(false) }.toOption.getOrElse(false)
} }
def profileBranching(testOptions: TestOptions): Boolean = {
val testResults = for {
lines <- fileUtils.readTest(testOptions)
program <- FiveStage.Parser.parseProgram(lines, testOptions)
(binary, (trace, finalVM)) <- program.validate.map(x => (x._1, x._2.run))
} yield {
sealed trait BranchEvent
case class Taken(addr: Int) extends BranchEvent
case class NotTaken(addr: Int) extends BranchEvent
val events: List[BranchEvent] = trace.flatMap(_.event).collect{
case PcUpdateBranch(x) => Taken(x.value)
case PcUpdateNoBranch(x) => NotTaken(x.value)
}
/**
* This is a sample profiler for a rather unrealistic branch predictor which has an unlimited amount
* of slots
*/
def OneBitInfiniteSlots(events: List[BranchEvent]): Int = {
// Helper inspects the next element of the event list. If the event is a mispredict the prediction table is updated
// to reflect this.
// As long as there are remaining events the helper calls itself recursively on the remainder
def helper(events: List[BranchEvent], predictionTable: Map[Int, Boolean]): Int = {
events match {
// Scala syntax for matching a list with a head element of some type and a tail
// `case h :: t =>`
// means we want to match a list with at least a head and a tail (tail can be Nil, so we
// essentially want to match a list with at least one element)
// h is the first element of the list, t is the remainder (which can be Nil, aka empty)
// `case Constructor(arg1, arg2) :: t => `
// means we want to match a list whose first element is of type Constructor, giving us access to its internal
// values.
// `case Constructor(arg1, arg2) :: t => if(p(arg1, arg2))`
// means we want to match a list whose first element is of type Constructor while satisfying some predicate p,
// called an if guard.
case Taken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable)
case Taken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, true))
case NotTaken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, false))
case NotTaken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable)
case _ => 0
}
}
// Initially every possible branch is set to false since the initial state of the predictor is to assume branch not taken
def initState = events.map{
case Taken(addr) => (addr, false)
case NotTaken(addr) => (addr, false)
}.toMap
helper(events, initState)
}
say(OneBitInfiniteSlots(events))
}
true
}
def profileCache(testOptions: TestOptions): Boolean = {
val testResults = for {
lines <- fileUtils.readTest(testOptions)
program <- FiveStage.Parser.parseProgram(lines, testOptions)
(binary, (trace, finalVM)) <- program.validate.map(x => (x._1, x._2.run))
} yield {
sealed trait MemoryEvent
case class Write(addr: Int) extends MemoryEvent
case class Read(addr: Int) extends MemoryEvent
val events: List[MemoryEvent] = trace.flatMap(_.event).collect{
case MemWrite(x,_) => Write(x.value)
case MemRead(x,_) => Read(x.value)
}
// Your cache here
}
true
}
} }

101
theory2.org Normal file
View file

@ -0,0 +1,101 @@
* Question 1 - Benchmarking
In order to gauge the performance increase from adding branch predictors it is necessary to do some testing.
Rather than writing a test from scratch it is better to use the tester already in use in the test harness.
When running a program the VM outputs a log of all events, including which branches have been taken and which
haven't, which as it turns out is the only information we actually need to gauge the effectiveness of a branch
predictor!
For this exercise you will write a program that parses a log of branch events.
#+BEGIN_SRC scala
sealed trait BranchEvent
case class Taken(addr: Int) extends BranchEvent
case class NotTaken(addr: Int) extends BranchEvent
def profile(events: List[BranchEvent]): Int = ???
#+END_SRC
To help you get started, I have provided you with much of the necessary code.
In order to get an idea for how you should profile branch misses, consider the following profiler which calculates
misses for a processor with a branch predictor with a 1 bit predictor with infinite memory:
#+BEGIN_SRC scala
def OneBitInfiniteSlots(events: List[BranchEvent]): Int = {
// Helper inspects the next element of the event list. If the event is a mispredict the prediction table is updated
// to reflect this.
// As long as there are remaining events the helper calls itself recursively on the remainder
def helper(events: List[BranchEvent], predictionTable: Map[Int, Boolean]): Int = {
events match {
// Scala syntax for matching a list with a head element of some type and a tail
// `case h :: t =>`
// means we want to match a list with at least a head and a tail (tail can be Nil, so we
// essentially want to match a list with at least one element)
// h is the first element of the list, t is the remainder (which can be Nil, aka empty)
// `case Constructor(arg1, arg2) :: t => `
// means we want to match a list whose first element is of type Constructor, giving us access to its internal
// values.
// `case Constructor(arg1, arg2) :: t => if(p(arg1, arg2))`
// means we want to match a list whose first element is of type Constructor while satisfying some predicate p,
// called an if guard.
case Taken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable)
case Taken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, true))
case NotTaken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, false))
case NotTaken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable)
case _ => 0
}
}
// Initially every possible branch is set to false since the initial state of the predictor is to assume branch not taken
def initState = events.map{
case Taken(addr) => (addr, false)
case NotTaken(addr) => (addr, false)
}.toMap
helper(events, initState)
}
#+END_SRC
** Your task
Your job is to implement a test that checks how many misses occur for a 2 bit branch predictor with 4 slots.
For this task it is probably smart to use something else than a ~Map[(Int, Boolean)]~
The skeleton code is located in ~testRunner.scala~ and can be run using testOnly FiveStage.ProfileTest.
If you do so now you will see that the unrealistic prediction model yields 1449 misses.
With a 2 bit 4 slot scheme, how many misses will you incur?
Answer with a number.
* Question 2 - Cache profiling
Unlike our design which has a very limited memory pool, real designs have access to vast amounts of memory, offset
by a steep cost in access latency.
To amend this a modern processor features several caches where even the smallest fastest cache has more memory than
your entire design.
In order to investigate how caches can alter performance it is therefore necessary to make some rather
unrealistic assumptions to see how different cache schemes impacts performance.
We will therefore assume the following:
+ Reads from main memory takes 5 cycles
+ cache has a total storage of 32 words (1024 bits)
+ cache reads work as they do now (i.e no additional latency)
For this exercise you will write a program that parses a log of memory events, similar to previous task
#+BEGIN_SRC scala
sealed trait MemoryEvent
case class Write(addr: Int) extends MemoryEvent
case class Read(addr: Int) extends MemoryEvent
def profile(events: List[MemoryEvent]): Int = ???
#+END_SRC
** Your task
Your job is to implement a test that checks how many delay cycles will occur for a cache which:
+ Follows a 2-way associative scheme
+ Block size is 4 words (128 bits)
+ Is write-through write no-allocate
+ Eviction policy is LRU (least recently used)