-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #155 from kth-step/quotationLib
Quotation lib
- Loading branch information
Showing
3 changed files
with
1,092 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,202 @@ | ||
\documentclass{article} | ||
\usepackage{syntax} | ||
|
||
\begin{document} | ||
\section{BIR syntax} | ||
|
||
\begin{itemize} | ||
\item \syntax{<literal>} is a numeric literal, either in decimal notation (e.g. 123) | ||
or in hexadecimal (e.g. 0xabc). | ||
|
||
\item \syntax{<var>} is a variable name. | ||
|
||
\item Variables that begin with the prefix ``MEM'' will be parsed as memory | ||
variables of default memory type; these are the only allowed memory variables, | ||
and they cannot be treated as registers. Any keywords that appear in the | ||
grammar below are reserved and cannot occur as variables. The string ``__ite'' | ||
is also reserved and used internally by the parser. | ||
\end{itemize} | ||
|
||
\setlength{\grammarparsep}{4pt plus 1pt minus 1pt} % increase separation between rules | ||
\setlength{\grammarindent}{12em} % increase separation between LHS/RHS | ||
|
||
\begin{grammar} | ||
|
||
<bit-type> ::= Bit8 | Bit16 | Bit32 | Bit64 | ||
|
||
<cast-op> ::= ucast | scast | hcast | lcast | ||
|
||
<unary-op> ::= chsign | cls | clz | ||
|
||
<infix-op> ::= + | - | * | / | \% | $\ll$ | $\gg$ | == | ... | ||
|
||
<prefix-op> ::= sdiv | smod | memeq | srsh | slt | sle | ||
|
||
<expr> ::= <var> \alt <literal> | ||
\alt \textasciitilde <expr> | ||
\alt <unary-op> <expr> | ||
\alt <expr> <infix-op> <expr> | ||
\alt <prefix-op> <expr> <expr> | ||
\alt ld <expr> <expr> | ||
\alt ld\{8,16,32,64\} <expr> <expr> | ||
\alt st <expr> <expr> <expr> | ||
\alt st\{8,16,32,64\} <expr> <expr> <expr> | ||
\alt <cast-op> <expr> <bit-type> | ||
\alt if <expr> then <expr> else <expr> | ||
\alt <expr>:<bit-type> | ||
|
||
<statement> ::= assert <expr> | ||
\alt assume <expr> | ||
\alt observe <literal> <expr> <expr> | ||
\alt <var> = <expr> | ||
|
||
<end-statement> ::= halt <expr> | ||
\alt jmp <expr> | ||
\alt cjmp <expr> <expr> <expr> | ||
|
||
<stat-list> ::= <statement> `\\n' <stat-list> | <statement> | ||
|
||
<label> ::= <string> | <literal> | ||
|
||
<block> ::= <label>: `\\n' <stat-list> `\\n' <end-statement> | ||
|
||
<program> ::= <block> `\\n' <program> | <block> | ||
\end{grammar} | ||
|
||
\section{Usage notes} | ||
|
||
The quotation library can be found in \texttt{bir_quotationLib} in the HolBA | ||
shared heap. | ||
|
||
\subsection{Parser} | ||
|
||
\paragraph{Invoking the quotation parser} Two parsers are provided: | ||
\begin{enumerate} | ||
\item \texttt{BExp}, to parse a single BIR expression, e.g. \texttt{BExp`X1+4`}; and | ||
\item \texttt{BIR}, to parse an entire BIR program. | ||
\end{enumerate} | ||
|
||
Here is an example of the latter: | ||
|
||
\begin{verbatim} | ||
val prog = | ||
BIR‘ | ||
a: | ||
assert (X1 > 0) | ||
X1 = if X2 == 3 then 2 else 3 | ||
MEM = st MEM 0x4 (sdiv 100 8) | ||
jmp b | ||
b: | ||
halt X1 | ||
’; | ||
\end{verbatim} | ||
|
||
Single-line comments are supported in the \texttt{BIR} parser only and begin | ||
with \texttt{';'}. Any characters that follow will be ignored up to the next | ||
newline. Example: | ||
|
||
\begin{verbatim} | ||
val prog_with_comments = | ||
BIR‘ | ||
a: | ||
assert (X1 > 0) | ||
X1 = X2 + X3 ; this is a comment | ||
jmp b ;another comment | ||
b: | ||
; can also be stand-alone | ||
halt X1 | ||
’; | ||
\end{verbatim} | ||
|
||
|
||
\paragraph{Type annotations} The parser assumes all variables and literals to be | ||
of 64-bit word type (\syntax{Bit64}). To explicitly ascribe a different type, we | ||
can use operator \texttt{:} as follows: | ||
|
||
\begin{verbatim} | ||
> BExp`5 : Bit32`; | ||
val it = ``BExp_Const (Imm32 5w)``: term | ||
\end{verbatim} | ||
|
||
Without the explicit annotation we get | ||
|
||
\begin{verbatim} | ||
> BExp`5`; | ||
val it = ``BExp_Const (Imm64 5w)``: term | ||
\end{verbatim} | ||
|
||
Type annotations can be applied to an entire expression, causing all variables | ||
and literals to be interpreted at the explicitly-provided type. For instance | ||
|
||
\begin{verbatim} | ||
> BExp`(5+5):Bit32`; | ||
val it = ``BExp_BinExp BIExp_Plus | ||
(BExp_Const (Imm32 5w)) (BExp_Const (Imm32 5w))``: term | ||
\end{verbatim} | ||
|
||
|
||
Note that this operator is not a cast, just a hint to the parser. | ||
|
||
\paragraph{Memory operation defaults} All memory operations assume that the | ||
memory maps 64-bit addresses to 8-bit words, and they are also assumed to be | ||
little endian. The default memory sizes can be controlled with global flags | ||
\texttt{quotation_default_size} and | ||
\texttt{quotation_default_size_byte}. | ||
|
||
\paragraph{Load size} Loads default to 64-bit, i.e. loading 8 consecutive bytes | ||
from the specified address. This behaviour can be overridden by using the | ||
explicit type annotation operator \texttt{:}, or by using one of the forms with | ||
a specific size. For example, \texttt{ld16} will load a 16-bit value. | ||
|
||
\begin{verbatim} | ||
> BExp`ld16 MEM 0x4000` | ||
``BExp_Load (BExp_Den (BVar "MEM" (BType_Mem Bit64 Bit8))) | ||
(BExp_Const (Imm64 16384w)) BEnd_LittleEndian Bit16``: term | ||
\end{verbatim} | ||
|
||
\paragraph{Store size} | ||
Stores automatically use the size of the value being stored. If needed, it's | ||
possible to override the default size for parsing the value by using the special | ||
\texttt{st} forms with a specific size. For example, \texttt{st8 MEM 0x4000 42} | ||
will store a single byte, and it is equivalent to writing \texttt{st MEM 0x4000 | ||
(42:Bit8)}. | ||
|
||
\paragraph{Observations} Observations are assumed to be of \texttt{bir_val_t} | ||
type, and the observation function is fixed to \texttt{`HD`}. | ||
|
||
\paragraph{Whitespace} Extra whitespace between tokens generally doesn't matter. | ||
For example \texttt{BExp`1+1`} and \texttt{BExp`1 + 1`} are equivalent. | ||
|
||
\paragraph{Statement separators} Statements in a block are separated by | ||
newlines. The label of a block should also be on a separate line. | ||
|
||
\paragraph{Application syntax} All multi-operand forms use curried application | ||
syntax, e.g. \texttt{BExp`ld MEM (X2+0x10)`}, \texttt{BExp`ucast X1 Bit32`}. | ||
|
||
\newpage | ||
|
||
\subsection{Pretty-printer} | ||
|
||
\paragraph{Invoking the pretty printer} Two pretty printers are provided: | ||
\begin{enumerate} | ||
\item \texttt{expr_to_string : term -> string}, to pretty-print a single BIR expression, e.g. | ||
\texttt{expr_to_string (BExp`X1+4`) = "X1 + 4"}; and | ||
\item \texttt{prog_to_string : term -> string}, to pretty-print an entire BIR program. | ||
\end{enumerate} | ||
|
||
These functions should be ``inverses'' of \texttt{BExp} and \texttt{BIR} modulo | ||
whitespace, comments, unnecessary brackets, associativity of infix operators, | ||
canonical forms of arithmetic comparisons, and the base of numeric literals, | ||
which are always printed in decimal notation. This property has not been | ||
verified or tested thoroughly. | ||
|
||
Some examples where the inverse doesn't hold in the strict sense: | ||
|
||
\begin{verbatim} | ||
> expr_to_string (BExp `X1 > 0x1000`); | ||
val it = "4096 < X1": string | ||
> expr_to_string (BExp `(X1 + X2) + X3`); | ||
val it = "X1 + X2 + X3": string | ||
\end{verbatim} | ||
|
||
\end{document} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
(* | ||
BIR quotation library for HolBA | ||
=============================== | ||
This library is released under the standard BSD-3-Clause license. | ||
See .sml file for more docs. | ||
Copyright 2022 Pablo Buiras | ||
Redistribution and use in source and binary forms, with or without modification, | ||
are permitted provided that the following conditions are met: | ||
1. Redistributions of source code must retain the above copyright notice, this | ||
list of conditions and the following disclaimer. | ||
2. Redistributions in binary form must reproduce the above copyright notice, | ||
this list of conditions and the following disclaimer in the documentation and/or | ||
other materials provided with the distribution. | ||
3. Neither the name of the copyright holder nor the names of its contributors | ||
may be used to endorse or promote products derived from this software without | ||
specific prior written permission. | ||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR | ||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON | ||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
*) | ||
signature bir_quotationLib = | ||
sig | ||
include Abbrev | ||
|
||
(* Set the following refs to alter memory defaults *) | ||
(* default = 64 *) | ||
val quotation_default_size : int ref | ||
|
||
(* default = 8 *) | ||
val quotation_default_size_byte : int ref | ||
|
||
(* default=["MEM","_MEM"] *) | ||
val quotation_memory_prefixes : string list ref | ||
|
||
(* Parsers *) | ||
val BExp : 'a frag list -> term | ||
val BIR : 'a frag list -> term | ||
|
||
(* Pretty printers *) | ||
val expr_to_string : term -> string | ||
val prog_to_string : term -> string | ||
end |
Oops, something went wrong.