Skip to content

Commit

Permalink
Merge pull request #155 from kth-step/quotationLib
Browse files Browse the repository at this point in the history
Quotation lib
  • Loading branch information
didriklundberg authored Sep 29, 2022
2 parents 8ce6be6 + 04c4bd2 commit e4e3876
Show file tree
Hide file tree
Showing 3 changed files with 1,092 additions and 0 deletions.
202 changes: 202 additions & 0 deletions doc/bir_grammar/bir_grammar.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
\documentclass{article}
\usepackage{syntax}

\begin{document}
\section{BIR syntax}

\begin{itemize}
\item \syntax{<literal>} is a numeric literal, either in decimal notation (e.g. 123)
or in hexadecimal (e.g. 0xabc).

\item \syntax{<var>} is a variable name.

\item Variables that begin with the prefix ``MEM'' will be parsed as memory
variables of default memory type; these are the only allowed memory variables,
and they cannot be treated as registers. Any keywords that appear in the
grammar below are reserved and cannot occur as variables. The string ``__ite''
is also reserved and used internally by the parser.
\end{itemize}

\setlength{\grammarparsep}{4pt plus 1pt minus 1pt} % increase separation between rules
\setlength{\grammarindent}{12em} % increase separation between LHS/RHS

\begin{grammar}

<bit-type> ::= Bit8 | Bit16 | Bit32 | Bit64

<cast-op> ::= ucast | scast | hcast | lcast

<unary-op> ::= chsign | cls | clz

<infix-op> ::= + | - | * | / | \% | $\ll$ | $\gg$ | == | ...

<prefix-op> ::= sdiv | smod | memeq | srsh | slt | sle

<expr> ::= <var> \alt <literal>
\alt \textasciitilde <expr>
\alt <unary-op> <expr>
\alt <expr> <infix-op> <expr>
\alt <prefix-op> <expr> <expr>
\alt ld <expr> <expr>
\alt ld\{8,16,32,64\} <expr> <expr>
\alt st <expr> <expr> <expr>
\alt st\{8,16,32,64\} <expr> <expr> <expr>
\alt <cast-op> <expr> <bit-type>
\alt if <expr> then <expr> else <expr>
\alt <expr>:<bit-type>

<statement> ::= assert <expr>
\alt assume <expr>
\alt observe <literal> <expr> <expr>
\alt <var> = <expr>

<end-statement> ::= halt <expr>
\alt jmp <expr>
\alt cjmp <expr> <expr> <expr>

<stat-list> ::= <statement> `\\n' <stat-list> | <statement>

<label> ::= <string> | <literal>

<block> ::= <label>: `\\n' <stat-list> `\\n' <end-statement>

<program> ::= <block> `\\n' <program> | <block>
\end{grammar}

\section{Usage notes}

The quotation library can be found in \texttt{bir_quotationLib} in the HolBA
shared heap.

\subsection{Parser}

\paragraph{Invoking the quotation parser} Two parsers are provided:
\begin{enumerate}
\item \texttt{BExp}, to parse a single BIR expression, e.g. \texttt{BExp`X1+4`}; and
\item \texttt{BIR}, to parse an entire BIR program.
\end{enumerate}

Here is an example of the latter:

\begin{verbatim}
val prog =
BIR‘
a:
assert (X1 > 0)
X1 = if X2 == 3 then 2 else 3
MEM = st MEM 0x4 (sdiv 100 8)
jmp b
b:
halt X1
’;
\end{verbatim}

Single-line comments are supported in the \texttt{BIR} parser only and begin
with \texttt{';'}. Any characters that follow will be ignored up to the next
newline. Example:

\begin{verbatim}
val prog_with_comments =
BIR‘
a:
assert (X1 > 0)
X1 = X2 + X3 ; this is a comment
jmp b ;another comment
b:
; can also be stand-alone
halt X1
’;
\end{verbatim}


\paragraph{Type annotations} The parser assumes all variables and literals to be
of 64-bit word type (\syntax{Bit64}). To explicitly ascribe a different type, we
can use operator \texttt{:} as follows:

\begin{verbatim}
> BExp`5 : Bit32`;
val it = ``BExp_Const (Imm32 5w)``: term
\end{verbatim}

Without the explicit annotation we get

\begin{verbatim}
> BExp`5`;
val it = ``BExp_Const (Imm64 5w)``: term
\end{verbatim}

Type annotations can be applied to an entire expression, causing all variables
and literals to be interpreted at the explicitly-provided type. For instance

\begin{verbatim}
> BExp`(5+5):Bit32`;
val it = ``BExp_BinExp BIExp_Plus
(BExp_Const (Imm32 5w)) (BExp_Const (Imm32 5w))``: term
\end{verbatim}


Note that this operator is not a cast, just a hint to the parser.

\paragraph{Memory operation defaults} All memory operations assume that the
memory maps 64-bit addresses to 8-bit words, and they are also assumed to be
little endian. The default memory sizes can be controlled with global flags
\texttt{quotation_default_size} and
\texttt{quotation_default_size_byte}.

\paragraph{Load size} Loads default to 64-bit, i.e. loading 8 consecutive bytes
from the specified address. This behaviour can be overridden by using the
explicit type annotation operator \texttt{:}, or by using one of the forms with
a specific size. For example, \texttt{ld16} will load a 16-bit value.

\begin{verbatim}
> BExp`ld16 MEM 0x4000`
``BExp_Load (BExp_Den (BVar "MEM" (BType_Mem Bit64 Bit8)))
(BExp_Const (Imm64 16384w)) BEnd_LittleEndian Bit16``: term
\end{verbatim}

\paragraph{Store size}
Stores automatically use the size of the value being stored. If needed, it's
possible to override the default size for parsing the value by using the special
\texttt{st} forms with a specific size. For example, \texttt{st8 MEM 0x4000 42}
will store a single byte, and it is equivalent to writing \texttt{st MEM 0x4000
(42:Bit8)}.

\paragraph{Observations} Observations are assumed to be of \texttt{bir_val_t}
type, and the observation function is fixed to \texttt{`HD`}.

\paragraph{Whitespace} Extra whitespace between tokens generally doesn't matter.
For example \texttt{BExp`1+1`} and \texttt{BExp`1 + 1`} are equivalent.

\paragraph{Statement separators} Statements in a block are separated by
newlines. The label of a block should also be on a separate line.

\paragraph{Application syntax} All multi-operand forms use curried application
syntax, e.g. \texttt{BExp`ld MEM (X2+0x10)`}, \texttt{BExp`ucast X1 Bit32`}.

\newpage

\subsection{Pretty-printer}

\paragraph{Invoking the pretty printer} Two pretty printers are provided:
\begin{enumerate}
\item \texttt{expr_to_string : term -> string}, to pretty-print a single BIR expression, e.g.
\texttt{expr_to_string (BExp`X1+4`) = "X1 + 4"}; and
\item \texttt{prog_to_string : term -> string}, to pretty-print an entire BIR program.
\end{enumerate}

These functions should be ``inverses'' of \texttt{BExp} and \texttt{BIR} modulo
whitespace, comments, unnecessary brackets, associativity of infix operators,
canonical forms of arithmetic comparisons, and the base of numeric literals,
which are always printed in decimal notation. This property has not been
verified or tested thoroughly.

Some examples where the inverse doesn't hold in the strict sense:

\begin{verbatim}
> expr_to_string (BExp `X1 > 0x1000`);
val it = "4096 < X1": string
> expr_to_string (BExp `(X1 + X2) + X3`);
val it = "X1 + X2 + X3": string
\end{verbatim}

\end{document}
55 changes: 55 additions & 0 deletions src/shared/bir_quotationLib.sig
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
(*
BIR quotation library for HolBA
===============================
This library is released under the standard BSD-3-Clause license.
See .sml file for more docs.
Copyright 2022 Pablo Buiras
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
signature bir_quotationLib =
sig
include Abbrev

(* Set the following refs to alter memory defaults *)
(* default = 64 *)
val quotation_default_size : int ref

(* default = 8 *)
val quotation_default_size_byte : int ref

(* default=["MEM","_MEM"] *)
val quotation_memory_prefixes : string list ref

(* Parsers *)
val BExp : 'a frag list -> term
val BIR : 'a frag list -> term

(* Pretty printers *)
val expr_to_string : term -> string
val prog_to_string : term -> string
end
Loading

0 comments on commit e4e3876

Please sign in to comment.