TDT4255/src/test/scala/RISCV/Parser.scala

357 lines
13 KiB
Scala

package FiveStage
import atto._, Atto._, syntax.refined._
import eu.timepit.refined.numeric._
import fileUtils.say
import Ops._
import Data._
import cats._
import cats.data.{ Op => _ }
import cats.implicits._
object Parser {
def hex : Parser[Int] = string("0x") ~> many1(hexDigit).map{ ds =>
val bi = Integer.parseUnsignedInt(new String(ds.toList.toArray), 16)
bi.toInt
}
def labelDest : Parser[Label] = (takeWhile(_ != ':') <~ char(':'))
def label : Parser[Label] = takeWhile(_ != ' ')
def reg : Parser[Int] = takeWhile(x => (x != ',' && x != ')')).map(lookupReg).attempt
def sep : Parser[Unit] = many(whitespace) *> char(',') *> many(whitespace).void
def branch : (Parser[Int], Parser[Int], Parser[String]) = (reg <~ sep, reg <~ sep, label)
def branchZ : (Parser[Int], Parser[String]) = (reg <~ sep, label)
def arith : (Parser[Int], Parser[Int], Parser[Int]) = (reg <~ sep, reg <~ sep, reg)
def arithImm : (Parser[Int], Parser[Int], Parser[Int]) = (reg <~ sep, reg <~ sep, hex | int)
def stringWs(s: String) : Parser[String] = many(whitespace) ~> string(s) <~ many1(whitespace)
val singleInstruction: Parser[Op] = List(
////////////////////////////////////////////
//// Branches
stringWs("beq") ~> branch.mapN{Branch.beq},
stringWs("bne") ~> branch.mapN{Branch.bne},
stringWs("blt") ~> branch.mapN{Branch.blt},
stringWs("bge") ~> branch.mapN{Branch.bge},
stringWs("bltu") ~> branch.mapN{Branch.bltu},
stringWs("bgeu") ~> branch.mapN{Branch.bgtu},
// pseudos:
stringWs("ble") ~> branch.mapN{Branch.ble},
stringWs("bgt") ~> branch.mapN{Branch.bgt},
stringWs("bleu") ~> branch.mapN{Branch.bleu},
stringWs("bgtu") ~> branch.mapN{Branch.bgtu},
// Introduce zero
stringWs("bnez") ~> branchZ.mapN{Branch.bnez},
stringWs("beqz") ~> branchZ.mapN{Branch.beqz},
stringWs("blez") ~> branchZ.mapN{Branch.blez},
////////////////////////////////////////////
//// Arith
stringWs("add") ~> arith.mapN{Arith.add},
stringWs("sub") ~> arith.mapN{Arith.sub},
stringWs("or") ~> arith.mapN{Arith.or},
stringWs("xor") ~> arith.mapN{Arith.xor},
stringWs("and") ~> arith.mapN{Arith.and},
stringWs("sll") ~> arith.mapN{Arith.sll},
stringWs("srl") ~> arith.mapN{Arith.srl},
stringWs("sra") ~> arith.mapN{Arith.sra},
stringWs("slt") ~> arith.mapN{Arith.slt},
stringWs("sltu") ~> arith.mapN{Arith.sltu},
// pseudos
stringWs("mv") ~> (reg <~ sep, reg, ok(0)).mapN{Arith.add},
stringWs("nop") ~> (ok(0), ok(0), ok(0)).mapN{Arith.add},
// Check if rs1 is not equal to 0.
// snez rd, rs1 => sltu rd, zero, rs1
stringWs("snez") ~> (reg <~ sep, ok(0), reg).mapN{Arith.sltu},
////////////////////////////////////////////
//// Arith Imm
stringWs("addi") ~> arithImm.mapN{ArithImm.add},
stringWs("ori") ~> arithImm.mapN{ArithImm.or},
stringWs("xori") ~> arithImm.mapN{ArithImm.xor},
stringWs("andi") ~> arithImm.mapN{ArithImm.and},
stringWs("slli") ~> arithImm.mapN{ArithImmShift.sll},
stringWs("srli") ~> arithImm.mapN{ArithImmShift.srl},
stringWs("srai") ~> arithImm.mapN{ArithImmShift.sra},
stringWs("slti") ~> arithImm.mapN{ArithImm.slt},
stringWs("sltiu") ~> arithImm.mapN{ArithImm.sltu},
// pseudos
stringWs("not") ~> (reg <~ sep, reg, ok(-1)).mapN{ArithImm.xor},
// Check if rs1 is less than 1. Only 0 is less than 1 when using unsigned comparison
// seqz rd, rs1 => sltiu rd, rs1, 1
stringWs("seqz") ~> (reg <~ sep, reg, ok(1)).mapN{ArithImm.sltu},
stringWs("li") ~> (reg ~ sep ~ int).collect{
case((a, b), c) if (c.nBitsS <= 12) => ArithImm.add(a, 0, c)
},
////////////////////////////////////////////
//// Jumps
stringWs("jalr") ~> (reg <~ sep, reg <~ sep, label).mapN{JALR.apply},
stringWs("jal") ~> (reg <~ sep, label).mapN{JAL.apply},
// pseudos
// JAL with ra as rd automatically chosen.
stringWs("call") ~> label.map(label => JAL(regNames.ra, label)),
// For jr we don't care about where we jumped from.
stringWs("jr") ~> reg.map(r => JALR(0, r, "zero")),
// As jr, but with a label rather than a register.
stringWs("j") ~> label.map(label => JAL(0, label)),
many(whitespace) ~> string("ret") ~> ok(JALR(0, regNames.ra, "zero")),
////////////////////////////////////////////
//// load/store
stringWs("sw") ~> (reg <~ sep, int <~ char('('), reg <~ char(')')).mapN{case (rs2, offset, rs1) => SW(rs2, rs1, offset)},
stringWs("lw") ~> (reg <~ sep, int <~ char('('), reg <~ char(')')).mapN{case (rd, offset, rs1) => LW(rd, rs1, offset)},
////////////////////////////////////////////
//// others
stringWs("auipc") ~> (reg <~ sep, int).mapN{AUIPC.apply},
stringWs("lui") ~> (reg <~ sep, int).mapN{LUI.apply},
many(whitespace) ~> string("nop") ~> ok(Arith.add(0, 0, 0)),
many(whitespace) ~> string("done") ~> ok(DONE),
// stringWs("done") ~> ok(DONE),
).map(_.widen[Op]).reduce(_|_)
// def getShiftsHalfWord(offset: Int): (Int, Int) = (offset % 4) match {
// case 0 => (16, 16)
// case 1 => (
// }
val multipleInstructions: Parser[List[Op]] = List(
stringWs("li") ~> (reg <~ sep, int.map(_.splitLoHi(12))).mapN{ case(rd, (lo, hi)) => List(
LUI(rd, hi),
ArithImm.add(rd, 0, lo)
)}.map(_.widen[Op]),
// NOTE: THESE ARE NOT PSEUDO-OPS IN RISV32I!
// NOTE: USES A SPECIAL REGISTER
stringWs("lh") ~> (reg <~ sep, int <~ char('('), reg <~ char(')')).mapN{
case (rd, offset, rs1) if (offset % 4 == 3) => {
val placeHolder = if(rd == Reg("a0").value) Reg("a1").value else Reg("a0").value
List(
SW(placeHolder, 0, 2048),
LW(placeHolder, rs1.value, (offset & 0xFFFFFF1C)),
LW(rd.value, rs1.value, (offset & 0xFFFFFF1C) + 4),
ArithImmShift.sra(placeHolder, placeHolder, 24),
ArithImmShift.sll(rd.value, rd.value, 24),
ArithImmShift.sra(rd.value, rd.value, 16),
Arith.add(rd, rd, placeHolder),
LW(placeHolder, 0, 2048)).reverse
}
case (rd, offset, rs1) if (offset % 4 == 2) => {
List(
LW(rd, rs1, (offset & 0xFFFFFF1C)),
ArithImmShift.sra(rd, rd, 16)
).reverse
}
case (rd, offset, rs1) => {
val leftShift = if((offset % 4) == 0) 16 else 8
List(
LW(rd, rs1, (offset & 0xFFFFFF1C)),
ArithImmShift.sll(rd, rd, leftShift),
ArithImmShift.sra(rd, rd, 16),
).reverse
}
}.map(_.widen[Op]),
).reduce(_|_)
val instruction = singleInstruction.map(List(_)) | multipleInstructions
val setting = List(
char('#') ~> string("regset") ~> many1(whitespace) ~> (reg.map(Reg.apply) <~ sep, hex | int).mapN{REGSET.apply},
char('#') ~> string("memset") ~> many1(whitespace) ~> ((hex | int).map(Addr.apply) <~ sep, hex | int).mapN{MEMSET.apply}
).map(_.widen[TestSetting]).reduce(_|_)
def parseProgram(p: List[String], testOptions: TestOptions): Either[String, Program] = {
val all = setting || (instruction || labelDest)
/**
* The foldhelper represents a traversal through a RISC-V program.
*
* When it sees an op it records the operation and appends the source line and its location.
* If it is in nopPad mode it will also insert NOPs between the parsed ops.
* After appending ops the address counter is bumbed accordingly
*
* When it sees a label destination it checks what the current addres counter is at and creates
* a link to this address.
*
* When it sees a parse error it simply stores the error and keeps going, allowing you to get every error
* (This works for an ASM program since each line is independent)
*
* Lastly, when it sees a test setting it appends that test setting.
*
* The reason everything is treated all-in-one is to make it easier to ensure that everything is parsed.
* If there were separate parsers for ops, labels and settings it would be difficult to find out if errors
* were simply of the wrong type or a legit error.
* This is not set in stone, if you're re-architecturing the code maybe it's better to separate parsers?
* Or maybe have multiple passes? Up to you!
*/
case class FoldHelper(
settings : List[TestSetting],
ops : List[SourceInfo[Op]],
labelMap : Map[Label, Addr],
errors : List[String],
addrCount : Int){
def addSettings (t: TestSetting): FoldHelper = copy(settings = t :: settings)
def addErrors (t: String): FoldHelper = copy(errors = t :: errors)
def addLabelMap (t: Label): FoldHelper = copy(labelMap = labelMap + (t -> Addr(addrCount)))
def addOps (t: List[SourceInfo[Op]]): FoldHelper = {
if(testOptions.nopPadded){
copy(
ops = t.flatMap(x => (x :: List.fill(4)(SourceInfo("inserted NOP", NOP).widen[Op]))).reverse ::: ops,
addrCount = addrCount + t.size*4*5)
}
else {
copy(ops = t ::: ops, addrCount = addrCount + t.size*4)
}
}
def program: Either[String, (List[TestSetting], List[SourceInfo[Op]], Map[Label, Addr])] = {
/**
* There are two possible ways for a program to successfully terminate, either by explicitly executing
* a DONE instruction, or by returning from the main method.
* In the latter case it is necessary to preload the return address register such that the return instruction
* jumps to a predetermined special done address
*/
val hasDONEinstruction = ops.map(_.run._2).contains(DONE)
val done = copy(settings = REGSET(Reg("sp"), 1024) :: settings, ops = ops.reverse, errors = errors.reverse)
val withReturnAddress = if(hasDONEinstruction){
done
}
else
done.copy(settings = REGSET(Reg("ra"), 0xEB1CEB1C) :: done.settings) // now that's what I call EPIC
Either.cond(errors.isEmpty, (withReturnAddress.settings, done.ops, labelMap), done.errors).left.map(errors =>
s"Parser errors in ${testOptions.testName}:\n" + errors.mkString("\n"))
}
}
def foldHelper(
acc: FoldHelper,
program: List[(Int, String)]): FoldHelper = program match {
case Nil => acc
case (lineNo, line) :: t => {
if(line.isEmpty)
foldHelper(acc, t)
else {
val next = all.parse(line).done.either match {
case Left(parseError) => acc.addErrors(f"$lineNo%3d" +s":$line\t$parseError")
case Right(Left(setting)) => acc.addSettings(setting)
case Right(Right(Right(label))) => acc.addLabelMap(label)
case Right(Right(Left(ops))) => acc.addOps(ops.map(op => SourceInfo(s"${lineNo.toString.padTo(3, ' ')}:\t$line", op)))
}
foldHelper(next, t)
}
}
}
val results = foldHelper(FoldHelper(Nil, Nil, Map("zero" -> Addr(0)), Nil, 0), p.zipWithIndex.map(_.swap))
results.program.map{ case(settings, ops, labelMap) => Program(ops, settings, labelMap) }
}
def lookupReg(s: String): Int = {
val regMap = Map(
"x0" -> 0,
"x1" -> 1,
"x2" -> 2,
"x3" -> 3,
"x4" -> 4,
"x5" -> 5,
"x6" -> 6,
"x7" -> 7,
"x8" -> 8,
"x9" -> 9,
"x10" -> 10,
"x11" -> 11,
"x12" -> 12,
"x13" -> 13,
"x14" -> 14,
"x15" -> 15,
"x16" -> 16,
"x17" -> 17,
"x18" -> 18,
"x19" -> 19,
"x20" -> 20,
"x21" -> 21,
"x22" -> 22,
"x23" -> 23,
"x24" -> 24,
"x25" -> 25,
"x26" -> 26,
"x27" -> 27,
"x28" -> 28,
"x29" -> 29,
"x30" -> 30,
"x31" -> 31,
"zero" -> 0,
"ra" -> 1,
"sp" -> 2,
"gp" -> 3,
"tp" -> 4,
"t0" -> 5,
"t1" -> 6,
"t2" -> 7,
"s0" -> 8,
"fp" -> 8,
"s1" -> 9,
"a0" -> 10,
"a1" -> 11,
"a2" -> 12,
"a3" -> 13,
"a4" -> 14,
"a5" -> 15,
"a6" -> 16,
"a7" -> 17,
"s2" -> 18,
"s3" -> 19,
"s4" -> 20,
"s5" -> 21,
"s6" -> 22,
"s7" -> 23,
"s8" -> 24,
"s9" -> 25,
"s10" -> 26,
"s11" -> 27,
"t3" -> 28,
"t4" -> 29,
"t5" -> 30,
"t6" -> 31)
regMap(s)
}
}