changeset 3945:a1aadf0c7383

added libgfx
author Alexandre Becoulet <alexandre.becoulet@free.fr>
date Sun, 27 May 2018 18:45:35 +0200
parents 7986589d0de4
children f0df16897b9f
files libgfx/Makefile libgfx/bc_custom_gfx.pm libgfx/bytecode.c libgfx/gfx.c libgfx/gfx.config libgfx/include/gfx/arc.t libgfx/include/gfx/blit.t libgfx/include/gfx/bytecode.h libgfx/include/gfx/circle.t libgfx/include/gfx/gfx.t libgfx/include/gfx/line.t libgfx/include/gfx/math.h libgfx/include/gfx/pixel.t libgfx/include/gfx/rect.t libgfx/test/Makefile libgfx/test/backslash.pl libgfx/test/bc_custom_gfx.pm libgfx/test/bytecode.c libgfx/test/bytecode.h libgfx/test/font_8x11.xbm libgfx/test/font_8x11_v.xbm libgfx/test/font_8x11_v_pow2.xbm libgfx/test/test.c libgfx/test/test_arc.bc libgfx/test/test_arith.bc libgfx/test/test_blit.bc libgfx/test/test_circle.bc libgfx/test/test_line.bc libgfx/test/test_rect.bc libgfx/test/test_scroll.bc libgfx/test/test_tile.bc
diffstat 31 files changed, 6620 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/Makefile	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,8 @@
+
+header_sources = $(wildcard $(LOCAL_SRC_DIR)/include/gfx/*.t)
+header_raw = $(wildcard $(LOCAL_SRC_DIR)/include/gfx/*.h)
+pre_headers = $(subst $(LOCAL_SRC_DIR),,$(header_sources:.t=.h))
+copy = $(subst $(LOCAL_SRC_DIR),,$(header_raw))
+
+objs = gfx.o
+objs-$(CONFIG_GFX_BYTECODE) += bytecode.o
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/bc_custom_gfx.pm	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,372 @@
+package bc_custom_gfx;
+
+# *  gfx_disp       n                1000 1000 00nn 0000   : copy the drawable surface to the display at 0,0
+# *  gfx_disp_at    xy, n            1000 1000 01nn xxxx   : copy the drawable surface to the display at specified position ans size. size is in r15
+#
+# *  gfx_surface    p, d, n          1001 00nn pppp dddd   : setup surface `n'. `p' is the pointer to surface data and `d' is the descriptor
+# *  gfx_tilemap    d, n             1001 01-- --nn dddd   : setup the current tile map. `d' is the tile descriptor and `n' is the source surface
+# *  gfx_attr_l8    a                1001 1000 aaaa aaaa   : select the current drawing attribute from a 8 bit greyscale value
+#    gfx_attr_rgb8  a                1001 1001 aaaa aaaa   : select the current drawing color from a 8 bits RRRGGGBB value
+#    gfx_pick_at    xy               1001 1010 --nn xxxx   : pick the color from a surface and set the current attribute
+# *  gfx_swap       n, N             1001 1011 nnNN ----   : exchange index of surfaces n and N
+#    gfx_pick       x                1001 11nn xxxx xxxx   : pick the color from a surface and set the current attribute.
+#
+# *  gfx_circle     xy, q            1010 0-00 qqqq xxxx   : draw a circle on the current drawable. q is a quadrants, radius is r15
+# *  gfx_circle_f   xy, q            1010 0-01 qqqq xxxx   : draw a filled circle on the current drawable. q is a quadrants, radius is r15
+# *  gfx_circle_i   xy, q            1010 0-10 qqqq xxxx   : draw a inverted filled circle on the current drawable. q is a quadrants, radius is r15
+# *  gfx_arc_cw     xy, a            1010 10-0 xxxx aaaa   : draw an arc with center and radius on the current drawable. g are begin and end angles, radius is r15, c is CW|CCW
+# *  gfx_arc_ccw    xy, a            1010 10-1 xxxx aaaa   : draw an arc with center and radius on the current drawable. g are begin and end angles, radius is r15, c is CW|CCW
+#    gfx_xyarc_cw   xy, XY           1010 11-0 xxxx XXXX   : draw an arc between xy and XY on the current drawable. radius is r15, c is CW|CCW
+#    gfx_xyarc_ccw  xy, XY           1010 11-1 xxxx XXXX   : draw an arc between xy and XY on the current drawable. radius is r15, c is CW|CCW
+
+# *  gfx_line       xy, XY           1011 000- xxxx XXXX   : draw a line on the current drawable
+# *  gfx_point      xy               1011 000- xxxx xxxx   : draw a point (same as gfx_line xy, xy)
+# *  gfx_tile       xy, tile         1011 0010 xxxx tttt   : draw a single tile on the current drawable, xy is tile top left corner
+# *  gfx_tilec      xy, tile         1011 0011 xxxx tttt   : draw a single tile on the current drawable, xy is tile center
+# *  gfx_rect       xy, XY           1011 0100 xxxx XXXX   : draw a rectangle on the current drawable. f is filled
+# *  gfx_rect_r     xy, XY           1011 0110 xxxx XXXX   : draw a rectangle with round corners on the current drawable. f is filled, radius is r15
+# *  gfx_rect_f     xy, XY           1011 0101 xxxx XXXX   : draw a filled rectangle on the current drawable. f is filled
+# *  gfx_rect_fr    xy, XY           1011 0111 xxxx XXXX   : draw a filed rectangle with round corners on the current drawable. f is filled, radius is r15
+# *  gfx_tilestr    xy, str, r15, dir 1011 10dd xxxx ssss   : draw a string on the current drawable. Use tile map as font. string len is in r15.
+# *  gfx_tilestrc   xy, str, r15, dir 1011 11dd xxxx ssss   : draw a centered string on the current drawable. Use tile map as font. string len is in r15.
+#    gfx_plotx      xy, scale, buf
+#
+#    gfx_copy                        1100 000- ---- ----   : copy from source to drawable of the same size
+# *  gfx_clear_l8   attr             1100 0010 aaaa aaaa   : fill the current drawable with the specified pixel attribute
+#    gfx_clear_rgb8 attr             1100 0011 aaaa aaaa   : fill the current drawable with the specified pixel attribute
+#
+# *  gfx_blit       xy, XY, s        1100 10ss xxxx XXXX   : blit from source surface s at XY to drawable surface at xy. rectangle size is r15. Whole source when r15 is 0.
+# *  gfx_blit_o     xy, XY           1100 11-- xxxx XXXX   : blit from drawable surface at XY to drawable surface at xy. rectangle size is r15. Blit areas are allowed to overlap.
+#
+# *  gfx_addxi      xy, +/-v         1101 0svv vvvv vvxx   : add a signed constant value to the integral part of the x component of a vector
+# *  gfx_addyi      xy, +/-v         1101 1svv vvvv vvxx   : add a signed constant value to the integral part of y component of a vector
+# *  gfx_addv       xy, XY           1110 0000 xxxx XXXX   : add vectors and store result in xy
+# *  gfx_subv       xy, XY           1110 0001 xxxx XXXX   : subtract vectors and store result in xy
+# *  gfx_negx       XY               1110 0010 -001 XXXX   : compute (-x, y) of vector
+# *  gfx_negy       XY               1110 0010 -010 XXXX   : compute (x, -y) of vector
+# *  gfx_negv       XY               1110 0010 -011 XXXX   : compute (-x, -y) of vector
+# *  gfx_swpv       XY               1110 0010 -100 XXXX   : compute (y, x) of vector
+# *  gfx_negx_swpv  XY               1110 0010 -101 XXXX   : compute (y, -x) of vector
+# *  gfx_negy_swpv  XY               1110 0010 -110 XXXX   : compute (-y, x) of vector
+# *  gfx_negv_swpv  XY               1110 0010 -111 XXXX   : compute (-y, -x) of vector
+# *  gfx_mul        a, b             1110 0011 aaaa bbbb   : multiply signed Q27.5 values
+
+# *  gfx_mulxy      XY, r            1110 0100 rrrr XXXX   : multiply both x and y components by a Q27.5 value from register
+# *  gfx_addx       XY, rx           1110 0101 rrrr XXXX   : add a Q27.5 value from a register to the x component
+# *  gfx_addy       XY, ry           1110 0110 rrrr XXXX   : add a Q27.5 value from a register to the y component
+# *  gfx_divxy      XY, r            1110 0111 rrrr XXXX   : divide both x and y components by a Q27.5 value from register
+
+# *  gfx_unpack     rx, ry, XY       1110 1000 rrr0 XXXX   : move both x and y unsigned components to two contiguous registers
+# *  gfx_unpacks    rx, ry, XY       1110 1000 rrr1 XXXX   : move both x and y signed components to two contiguous registers
+# *  gfx_packx0     XY, rx           1110 1001 rrr0 XXXX   : set the x component from a register and clear the y component
+# *  gfx_packx      XY, rx           1110 1001 rrr1 XXXX   : set the x component from a register
+# *  gfx_pack0y     XY, ry           1110 1010 rrr0 XXXX   : set the y component from a register and clear the x component
+# *  gfx_packy      XY, ry           1110 1010 rrr1 XXXX   : set the y component from a register
+# *  gfx_pack       XY, rx, ry       1110 1011 rrr- XXXX   : set both the x and y components from two contiguous registers
+
+# *  gfx_size       xy, s            1110 1100 00nn xxxx   : get surface size
+# *  gfx_hypot      xy               1110 1100 0100 xxxx   : compute distance (Q27.5) from a signed vector
+# *  gfx_sqrt       a                1110 1100 0101 aaaa   : compute square root of a Q27.5 value
+# *  gfx_sincos     a, r15           1110 1100 0110 aaaa   : compute a sin,cos signed vector scaled by r15 (Q27.5) from angle (Q9.5)
+# *  gfx_sin        a, r15           1110 1100 1000 aaaa   : compute a sin (Q27.5) scaled by r15 (Q27.5) from angle (Q9.5)
+# *  gfx_cos        a, r15           1110 1100 1001 aaaa   : compute a cos (Q27.5) scaled by r15 (Q27.5) from angle (Q9.5)
+
+#    gfx_mreset                      1111 000- ---- ----   : reset the current transform matrix
+#    gfx_mpush                       1111 001- ---- ----   : push the current transform matrix (a 4 entries ring is used)
+#    gfx_mpop                        1111 010- ---- ----   : pop the current transform matrix
+#    gfx_translate  vect             1111 011- ---- vvvv   : apply a translation to the current matrix in degrees by r[v]
+#    gfx_rot        angle            1111 100- ---- aaaa   : apply a rotation of r[a] degrees to the current matrix
+#    gfx_rotc       angle            1111 101a aaaa aaaa   : apply a constant rotation to the current matrix in degrees
+#    gfx_scale      vect             1111 110- ---- vvvv   : apply a 2d scale to the current matrix by r[v]
+#    gfx_apply      xy, XY           1111 111- xxxx XXXX   : apply the current matrix to the position XY and store result in xy
+#
+#  vector register format:
+#
+#    xxxxxyyy yyXXXYYY YYYYYYYY XXXXXXXX
+#        |     |  |  |        |        \----- x integral low part
+#        |     |  |  |        \-------------- y integral low part
+#        |     |  |  \----------------------- y integral high part
+#        |     |  \-------------------------- x integral high part
+#        |     \----------------------------- y fractional part
+#        \----------------------------------- x fractional part
+#
+#  register format of a surface descriptor:
+#    wwwwwwww wwwwhhhh hhhhhhhh zzzfffff
+#                                  \--------- pixel format
+#                               \------------ compression (0:none, rle, lzo, ..)
+#                  \------------------------- height
+#    \--------------------------------------- width
+#
+#  register format of a tile descriptor:
+#    wwwwwwww wwwwhhhh hhhhhhhh oooooooo
+#                               \------------ index of first tile
+#                 \-------------------------- tile height
+#    \--------------------------------------- tile width
+
+sub parse_attr
+{
+    my $thisop = shift;
+
+    my $arg0 = main::check_num( $thisop, 0, 0, 255 );
+
+    $thisop->{code} |= $arg0;
+}
+
+sub parse_1reg
+{
+    my $thisop = shift;
+
+    my $arg0 = main::check_reg( $thisop, 0 );
+
+    $thisop->{code} |= $arg0;
+}
+
+sub parse_1reg_r15
+{
+    my $thisop = shift;
+    main::check_reg( $thisop, 1, 15, 15 );
+    parse_1reg( $thisop );
+}
+
+sub parse_2reg
+{
+    my $thisop = shift;
+
+    my $arg0 = main::check_reg( $thisop, 0 );
+    my $arg1 = main::check_reg( $thisop, 1 );
+
+    $thisop->{code} |= ($arg0 << 4) | $arg1;
+}
+
+sub parse_tilestr
+{
+    my $thisop = shift;
+
+    my $arg0 = main::check_reg( $thisop, 0 );
+    my $arg1 = main::check_reg( $thisop, 1 );
+    my $arg2 = main::check_reg( $thisop, 2 );
+    my $arg3 = main::check_num( $thisop, 3, 0, 3 );
+
+    die "$thisop->{line}: string length must be r15\n"
+        unless ($arg2 == 15);
+
+    $thisop->{code} |= ($arg3 << 8) | ($arg0 << 4) | $arg1;
+}
+
+sub parse_1reg_2
+{
+    my $thisop = shift;
+
+    my $arg0 = main::check_reg( $thisop, 0 );
+
+    $thisop->{code} |= ($arg0 << 4) | $arg0;
+}
+
+sub parse_2reg_s
+{
+    my $thisop = shift;
+
+    my $a = main::check_reg( $thisop, 0 );
+    my $b = main::check_reg( $thisop, 1 );
+    my $n = main::check_num( $thisop, 2, 0, 3 );
+
+    $thisop->{code} |= ($n << 8) | ($a << 4) | $b;
+}
+
+sub parse_addi
+{
+    my $thisop = shift;
+
+    my $a = main::check_reg( $thisop, 0, 0, 3 );
+    my $n = main::check_num( $thisop, 1, -256, 255 );
+
+    $thisop->{code} |= (($n & 0x1ff) << 2) | $a;
+}
+
+sub parse_s
+{
+    my $thisop = shift;
+
+    my $n = main::check_num( $thisop, 0, 0, 3 );
+
+    $thisop->{code} |= $n << 4;
+}
+
+sub parse_swap
+{
+    my $thisop = shift;
+
+    my $a = main::check_num( $thisop, 0, 0, 3 );
+    my $b = main::check_num( $thisop, 1, 0, 3 );
+
+    $thisop->{code} |= ($a << 4) | ($b << 6);
+}
+
+sub parse_disp_at
+{
+    parse_2reg_s( $thisop );
+
+    die "$thisop->{line}: same register can't be used for position and size\n"
+        unless ($thisop->{code} & 0xff);
+
+    $thisop->{code} |= ($n << 8) | ($xy << 4) | $s;
+}
+
+sub parse_1reg_s
+{
+    my $thisop = shift;
+
+    my $d = main::check_reg( $thisop, 0 );
+    my $n = main::check_num( $thisop, 1, 0, 3 );
+
+    $thisop->{code} |= ($n << 4) | $d;
+}
+
+sub parse_1reg_s_r15
+{
+    my $thisop = shift;
+    main::check_reg( $thisop, 2, 15, 15 );
+    parse_1reg_s( $thisop );
+}
+
+sub parse_attr
+{
+    my $thisop = shift;
+
+    my $arg0 = main::check_num( $thisop, 0, 0, 255 );
+
+    $thisop->{code} |= $arg0;
+}
+
+sub parse_circle
+{
+    my $thisop = shift;
+
+    my $xy = main::check_reg( $thisop, 0 );
+    my $oct = main::check_num( $thisop, 1, 1, 15 );
+
+    $thisop->{code} |= ($oct << 4) | $xy;
+}
+
+sub parse_unpack
+{
+    my $thisop = shift;
+
+    my $arg0 = main::check_reg( $thisop, 0 );
+    my $arg1 = main::check_reg( $thisop, 1 );
+    my $arg2 = main::check_reg( $thisop, 2 );
+
+    die "$thisop->{line}: contiguous destination registers expected\n"
+        if ( $arg1 - 1 != $arg0 || ( $arg0 & 1 ) );
+
+    $thisop->{code} |= ($arg0 << 4) | $arg2;
+}
+
+sub parse_packy
+{
+    my $thisop = shift;
+
+    my $arg0 = main::check_reg( $thisop, 0 );
+    my $arg1 = main::check_reg( $thisop, 1 );
+
+    die "$thisop->{line}: odd source register expected\n"
+        unless ( $arg1 & 1 );
+
+    $thisop->{code} |= (($arg1 & 14) << 4) | $arg0;
+}
+
+sub parse_packx
+{
+    my $thisop = shift;
+
+    my $arg0 = main::check_reg( $thisop, 0 );
+    my $arg1 = main::check_reg( $thisop, 1 );
+
+    die "$thisop->{line}: even source register expected\n"
+        if ( $arg1 & 1 );
+
+    $thisop->{code} |= ($arg1 << 4) | $arg0;
+}
+
+sub parse_pack
+{
+    my $thisop = shift;
+
+    my $arg0 = main::check_reg( $thisop, 0 );
+    my $arg1 = main::check_reg( $thisop, 1 );
+    my $arg2 = main::check_reg( $thisop, 2 );
+
+    die "$thisop->{line}: contiguous source registers expected\n"
+        if ( $arg2 - 1 != $arg1 || ( $arg1 & 1 ) );
+
+    $thisop->{code} |= ($arg1 << 4) | $arg0;
+}
+
+
+main::custom_op('gfx_disp',      1, 0x0800, \&parse_s );
+main::custom_op('gfx_disp_at',   3, 0x0800, \&parse_1reg_s_r15 );
+
+main::custom_op('gfx_surface',   3, 0x1000, \&parse_2reg_s );
+main::custom_op('gfx_tilemap',   2, 0x1400, \&parse_1reg_s );
+main::custom_op('gfx_attr_l8',   1, 0x1800, \&parse_attr );
+main::custom_op('gfx_attr_rgb8', 1, 0x1900, \&parse_attr );
+main::custom_op('gfx_swap',      2, 0x1b00, \&parse_swap );
+
+main::custom_op('gfx_circle',    2, 0x2000, \&parse_circle );
+main::custom_op('gfx_circle_f',  2, 0x2100, \&parse_circle );
+main::custom_op('gfx_circle_i',  2, 0x2200, \&parse_circle );
+
+main::custom_op('gfx_arc_cw',    2, 0x2800, \&parse_2reg );
+main::custom_op('gfx_arc_ccw',   2, 0x2900, \&parse_2reg );
+main::custom_op('gfx_xyarc_cw',  2, 0x2c00, \&parse_2reg );
+main::custom_op('gfx_xyarc_ccw', 2, 0x2d00, \&parse_2reg );
+main::custom_op('gfx_line',      2, 0x3000, \&parse_2reg );
+main::custom_op('gfx_point',     1, 0x3000, \&parse_1reg_2 );
+main::custom_op('gfx_tile',      2, 0x3200, \&parse_2reg );
+main::custom_op('gfx_tilec',     2, 0x3300, \&parse_2reg );
+main::custom_op('gfx_rect',      2, 0x3400, \&parse_2reg );
+main::custom_op('gfx_rect_r',    2, 0x3600, \&parse_2reg );
+main::custom_op('gfx_rect_f',    2, 0x3500, \&parse_2reg );
+main::custom_op('gfx_rect_fr',   2, 0x3700, \&parse_2reg );
+main::custom_op('gfx_tilestr',   4, 0x3800, \&parse_tilestr );
+main::custom_op('gfx_tilestrc',  4, 0x3c00, \&parse_tilestr );
+
+main::custom_op('gfx_clear',     1, 0x4200, \&parse_attr );
+main::custom_op('gfx_blit',      3, 0x4800, \&parse_2reg_s );
+main::custom_op('gfx_blit_o',    2, 0x4c00, \&parse_2reg );
+
+main::custom_op('gfx_addxi',     2, 0x5000, \&parse_addi );
+main::custom_op('gfx_addyi',     2, 0x5800, \&parse_addi );
+main::custom_op('gfx_addv',      2, 0x6000, \&parse_2reg );
+main::custom_op('gfx_subv',      2, 0x6100, \&parse_2reg );
+main::custom_op('gfx_negx',      1, 0x6210, \&parse_1reg );
+main::custom_op('gfx_negy',      1, 0x6220, \&parse_1reg );
+main::custom_op('gfx_negv',      1, 0x6230, \&parse_1reg );
+main::custom_op('gfx_swpv',      1, 0x6240, \&parse_1reg );
+main::custom_op('gfx_negx_swpv', 1, 0x6250, \&parse_1reg );
+main::custom_op('gfx_negy_swpv', 1, 0x6260, \&parse_1reg );
+main::custom_op('gfx_negv_swpv', 1, 0x6270, \&parse_1reg );
+
+main::custom_op('gfx_mul',       2, 0x6300, \&parse_2reg );
+
+main::custom_op('gfx_mulxy',     2, 0x6400, \&parse_2reg );
+main::custom_op('gfx_addx',      2, 0x6500, \&parse_2reg );
+main::custom_op('gfx_addy',      2, 0x6600, \&parse_2reg );
+main::custom_op('gfx_divxy',     2, 0x6700, \&parse_2reg );
+
+main::custom_op('gfx_unpack',    3, 0x6800, \&parse_unpack );
+main::custom_op('gfx_unpacks',   3, 0x6810, \&parse_unpack );
+main::custom_op('gfx_packx',     2, 0x6910, \&parse_packx );
+main::custom_op('gfx_packx0',    2, 0x6900, \&parse_packx );
+main::custom_op('gfx_packy',     2, 0x6a10, \&parse_packy );
+main::custom_op('gfx_pack0y',    2, 0x6a00, \&parse_packy );
+main::custom_op('gfx_pack',      3, 0x6b00, \&parse_pack );
+
+main::custom_op('gfx_size',      2, 0x6c00, \&parse_1reg_s );
+main::custom_op('gfx_hypot',     1, 0x6c40, \&parse_1reg );
+main::custom_op('gfx_sqrt',      1, 0x6c50, \&parse_1reg );
+main::custom_op('gfx_sincos',    2, 0x6c60, \&parse_1reg_r15 );
+main::custom_op('gfx_sin',       2, 0x6c80, \&parse_1reg_r15 );
+main::custom_op('gfx_cos',       2, 0x6c90, \&parse_1reg_r15 );
+
+return 1;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/bytecode.c	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,462 @@
+
+#include <gfx/pixel.h>
+#include <gfx/line.h>
+#include <gfx/circle.h>
+#include <gfx/arc.h>
+#include <gfx/rect.h>
+#include <gfx/blit.h>
+
+#include <gfx/bytecode.h>
+
+#ifdef __MUTEKH__
+#include <mutek/bytecode.h>
+#endif
+
+#include <string.h>
+
+void gfx_bc_init(struct gfx_bc_context_s *ctx)
+{
+  ctx->attr = 0;
+
+  uint_fast8_t i;
+  for (i = 0; i < 4; i++)
+    gfx_surface_dummy(&ctx->s[i]);
+  gfx_tilemap_init(&ctx->tilemap, &ctx->s[0], 1, 1, 0);
+}
+
+error_t gfx_bc_run(struct bc_context_s *vm,
+		   struct gfx_bc_context_s *ctx,
+		   uint16_t op)
+{
+  gfx_pixel_t attr = ctx->attr;
+  struct gfx_surface_s *draw = &ctx->s[0];
+
+  switch (op & 0x7f00)
+    {
+    case 0x1000:
+    case 0x1100:
+    case 0x1200:
+    case 0x1300: {          /* gfx_surface */
+      uint32_t d = bc_get_reg(vm, op & 15);
+      bc_reg_t p = bc_get_reg(vm, (op >> 4) & 15);
+      uint_fast8_t n = (op >> 8) & 3;
+      gfx_pos_t w = (d >> 20) & 0xfff;
+      gfx_pos_t h = (d >> 8) & 0xfff;
+      enum gfx_surface_format fmt = d & 0x1f;
+
+      size_t s;
+      if (/* enforce 32 bits alignment */ (p & 3) ||
+	  gfx_surface_bytes(&s, w, h, fmt))
+	return -ERANGE;
+
+      void *data = bc_translate_addr(vm, p, s, 0);
+      if (!data || gfx_surface_init(ctx->s + n, data,
+				    s, w, h, fmt))
+	return -ERANGE;
+
+      if (!n)
+	ctx->attr = gfx_fmt_desc[fmt].pm;
+      break;
+    }
+
+    case 0x1400:
+    case 0x1500:
+    case 0x1600:
+    case 0x1700: {          /* gfx_tilemap */
+      uint32_t d = bc_get_reg(vm, op & 15);
+      uint_fast8_t n = (op >> 4) & 3;
+      gfx_pos_t tw = (d >> 20) & 0xfff;
+      gfx_pos_t th = (d >> 8) & 0xfff;
+
+      if (gfx_tilemap_init(&ctx->tilemap, &ctx->s[n], tw, th, d & 0xff))
+	return -ERANGE;
+
+      break;
+    }
+
+    case 0x1800: {          /* gfx_attr_l8 */
+      gfx_pixel_t attr = op & 0xff;
+      uint_fast8_t bpp = 1 << gfx_fmt_desc[draw->fmt].l2bpp;
+      if (bpp < 8)
+	attr >>= 8 - bpp;
+      else
+	attr *= 0x010101 & gfx_fmt_desc[draw->fmt].pm;
+      ctx->attr = attr;
+      break;
+    }
+
+    case 0x1b00: {          /* gfx_swap */
+      uint_fast8_t a = (op >> 6) & 3;
+      uint_fast8_t b = (op >> 4) & 3;
+      struct gfx_surface_s tmp = ctx->s[b];
+      ctx->s[b] = ctx->s[a];
+      ctx->s[a] = tmp;
+      break;
+    }
+
+    case 0x2000:
+    case 0x2100:
+    case 0x2200: {          /* gfx_circle* */
+      uint32_t xyc = bc_get_reg(vm, op & 15);
+      uint_fast16_t r = bc_get_reg(vm, 15) & 2047;
+      uint_fast16_t xc = gfx_vector_xint(xyc);
+      uint_fast16_t yc = gfx_vector_yint(xyc);
+      uint_fast8_t oct = (op >> 4) & 0xf;
+      switch (op & 0x0300)
+	{
+	case 0x0000:
+	  gfx_draw_circle_safe(draw, xc, yc, r, oct, attr);
+	  break;
+	case 0x0100:
+	  gfx_draw_circle_infill_safe(draw, xc, yc, r, oct, attr);
+	  break;
+	case 0x0200:
+	  gfx_draw_circle_outfill_safe(draw, xc, yc, r, oct, attr);
+	  break;
+	}
+      break;
+    }
+
+    case 0x2800:
+    case 0x2900: {          /* gfx_arc* */
+      uint32_t angles = bc_get_reg(vm, op & 15);
+      uint32_t xyc = bc_get_reg(vm, (op >> 4) & 15);
+      uint_fast16_t r = bc_get_reg(vm, 15) & 2047;
+      gfx_draw_arc_angles_safe(draw,
+			       gfx_vector_xint(xyc), gfx_vector_yint(xyc),
+			       gfx_vector_xint(angles), gfx_vector_yint(angles),
+			       r, (op >> 8) & 1, attr);
+      break;
+    }
+#if 0
+    case 0x2c00:
+    case 0x2d00: {          /* gfx_xyarc* */
+      uint32_t xy0 = bc_get_reg(vm, (op >> 4) & 15);
+      uint32_t xy1 = bc_get_reg(vm, op & 15);
+      uint_fast16_t r = bc_get_reg(vm, 15) & 2047;
+      gfx_draw_arc_xy_safe(draw,
+			   gfx_vector_xint(xy0), gfx_vector_yint(xy0),
+			   gfx_vector_xint(xy1), gfx_vector_yint(xy1),
+			   r, (op >> 8) & 1, attr);
+      break;
+    }
+#endif
+
+    case 0x3000: {          /* gfx_line / gfx_point */
+      uint32_t p0 = bc_get_reg(vm, op & 15);
+      uint32_t p1 = bc_get_reg(vm, (op >> 4) & 15);
+      uint_fast16_t x0 = gfx_vector_xint(p0);
+      uint_fast16_t y0 = gfx_vector_yint(p0);
+      if (p0 == p1)
+	{
+	  gfx_put_pixel_safe(draw, x0, y0, attr);
+	}
+      else
+	{
+	  uint_fast16_t x1 = gfx_vector_xint(p1);
+	  uint_fast16_t y1 = gfx_vector_yint(p1);
+	  gfx_draw_line_safe(draw, x0, y0, x1, y1, attr);
+	}
+      break;
+    }
+
+    case 0x3200:
+    case 0x3300: {          /* gfx_tile */
+      uint32_t s = bc_get_reg(vm, (op >> 4) & 15);
+      uint32_t t = bc_get_reg(vm, op & 15);
+      gfx_pos_t x = gfx_vector_xint(s);
+      gfx_pos_t y = gfx_vector_yint(s);
+
+      gfx_draw_tile(draw, &ctx->tilemap, t, x, y, !!(op & 0x100));
+      break;
+    }
+
+    case 0x3400:
+    case 0x3500:
+    case 0x3600:
+    case 0x3700: {          /* gfx_rect* */
+      uint32_t p0 = bc_get_reg(vm, op & 15);
+      uint32_t p1 = bc_get_reg(vm, (op >> 4) & 15);
+      uint_fast16_t x0 = gfx_vector_xint(p0);
+      uint_fast16_t y0 = gfx_vector_yint(p0);
+      uint_fast16_t x1 = gfx_vector_xint(p1);
+      uint_fast16_t y1 = gfx_vector_yint(p1);
+      bool_t filled = (op >> 8) & 1;
+      bool_t round = (op >> 9) & 1;
+      uint_fast16_t r = round ? bc_get_reg(vm, 15) & 2047 : 0;
+      if (filled)
+	gfx_draw_rect_fr_safe(draw, x0, y0, x1, y1, r, attr);
+      else
+	gfx_draw_rect_r_safe(draw, x0, y0, x1, y1, r, attr);
+      break;
+    }
+
+    case 0x3800:
+    case 0x3900:
+    case 0x3a00:
+    case 0x3b00:
+    case 0x3c00:
+    case 0x3d00:
+    case 0x3e00:
+    case 0x3f00:{          /* gfx_tilestr */
+      uint32_t len = bc_get_reg(vm, 15);
+
+      uint32_t s = bc_get_reg(vm, (op >> 4) & 15);
+      gfx_pos_t x = gfx_vector_xint(s);
+      gfx_pos_t y = gfx_vector_yint(s);
+
+      bc_reg_t p = bc_get_reg(vm, op & 15);
+      const uint8_t *str = bc_translate_addr(vm, p, len, 0);
+
+      enum gfx_direction_e dir = (op >> 8) & 3;
+      if (str != NULL)
+	gfx_draw_tile_string(draw, &ctx->tilemap, str, len,
+			     x, y, dir, !!(op & 0x400));
+      break;
+    }
+
+    case 0x4200: {          /* gfx_clear_l8 */
+      gfx_pixel_t attr = op & 0xff;
+      uint_fast8_t bpp = 1 << gfx_fmt_desc[draw->fmt].l2bpp;
+      if (bpp < 8)
+	attr >>= 8 - bpp;
+      else
+	attr *= 0x010101 & gfx_fmt_desc[draw->fmt].pm;
+      gfx_clear(draw, attr);
+#if 0
+      srand(0);
+      unsigned n = 10000;
+      while (n--)
+	((uint8_t*)draw->ptr)[rand() & draw->mask] = rand();
+#endif
+      break;
+    }
+
+    case 0x4800:
+    case 0x4900:
+    case 0x4a00:
+    case 0x4b00: {          /* gfx_blit */
+      struct gfx_surface_s *src = &ctx->s[(op >> 8) & 3];
+      uint32_t p0 = bc_get_reg(vm, op & 15);
+      uint32_t p1 = bc_get_reg(vm, (op >> 4) & 15);
+      uint32_t p15 = bc_get_reg(vm, 15);
+      uint_fast16_t x0 = gfx_vector_xint(p0);
+      uint_fast16_t y0 = gfx_vector_yint(p0);
+      uint_fast16_t x2 = gfx_vector_xint(p1);
+      uint_fast16_t y2 = gfx_vector_yint(p1);
+      gfx_pos_t w = gfx_vector_xint(p15);
+      gfx_pos_t h = gfx_vector_yint(p15);
+      gfx_blit_safe(draw, x2, y2,
+		    src, x0, y0,
+		    w ? w : gfx_width(src),
+		    h ? h : gfx_height(src));
+      break;
+    }
+
+    case 0x4c00: {          /* gfx_blit_overlap */
+      uint32_t p0 = bc_get_reg(vm, op & 15);
+      uint32_t p1 = bc_get_reg(vm, (op >> 4) & 15);
+      uint32_t p15 = bc_get_reg(vm, 15);
+      uint_fast16_t x0 = gfx_vector_xint(p0);
+      uint_fast16_t y0 = gfx_vector_yint(p0);
+      uint_fast16_t x2 = gfx_vector_xint(p1);
+      uint_fast16_t y2 = gfx_vector_yint(p1);
+      gfx_pos_t w = gfx_vector_xint(p15);
+      gfx_pos_t h = gfx_vector_yint(p15);
+      gfx_blit_overlap_safe(draw, x2, y2,
+			    x0, y0, w, h);
+      break;
+    }
+
+    case 0x5800:
+    case 0x5900:
+    case 0x5a00:
+    case 0x5b00:
+    case 0x5c00:
+    case 0x5d00:
+    case 0x5e00:
+    case 0x5f00:		/* gfx_addyi */
+    case 0x5000:
+    case 0x5100:
+    case 0x5200:
+    case 0x5300:
+    case 0x5400:
+    case 0x5500:
+    case 0x5600:
+    case 0x5700: {		/* gfx_addxi */
+      uint_fast8_t d = op & 3;
+      uint32_t p0 = bc_get_reg(vm, d);
+      uint32_t x = gfx_vector_x(p0);
+      uint32_t y = gfx_vector_y(p0);
+      uint32_t i = (op << 3) & 0x3fe0;
+      i = (0x2000 ^ i) - 0x2000;
+      if (op & 0x0800)
+	y += i;
+      else
+	x += i;
+      bc_set_reg(vm, d, gfx_vector_xy_2p(x, y));
+      break;
+    }
+
+    case 0x6000:		/* gfx_addv */
+    case 0x6100: {		/* gfx_subv */
+      uint_fast8_t d = (op >> 4) & 15;
+      uint32_t p1 = gfx_vector_p2xy(bc_get_reg(vm, d)) & 0xfffefffe;
+      uint32_t p0 = gfx_vector_p2xy(bc_get_reg(vm, op & 15));
+      if (op & 0x0100)
+	p0 = ~p0 + 0x00020002;
+      p0 &= 0xfffefffe;
+      bc_set_reg(vm, d, gfx_vector_xy2p((p1 + p0) & 0xfffefffe));
+      break;
+    }
+
+    case 0x6200: {		/* gfx_neg* gfx_swp* */
+      uint_fast8_t d = op & 15;
+      uint32_t p0 = bc_get_reg(vm, d);
+      gfx_pos_t x = gfx_vector_x(p0);
+      gfx_pos_t y = gfx_vector_y(p0);
+      if (op & 0x0010)
+	x = -x;
+      if (op & 0x0020)
+	y = -y;
+      if (op & 0x0040)
+	_GFX_SWAP(x, y);
+      bc_set_reg(vm, d, gfx_vector_xy_2p(x, y));
+      break;
+    }
+
+    case 0x6300: {		/* gfx_mul */
+      uint_fast8_t d = op & 15;
+      uint_fast8_t e = (op >> 4) & 15;
+      bc_set_reg(vm, e, ((int64_t)(int32_t)bc_get_reg(vm, d)
+			 * (int32_t)bc_get_reg(vm, e)) >> 5);
+      break;
+    }
+
+    case 0x6400: 		/* gfx_mulxy */
+    case 0x6500: 		/* gfx_addx */
+    case 0x6600: 		/* gfx_addy */
+    case 0x6700: {		/* gfx_divxy */
+      uint_fast8_t d = (op >> 4) & 15;
+      gfx_pos_t r = bc_get_reg(vm, op & 15);
+      uint32_t p0 = bc_get_reg(vm, d);
+      gfx_pos_t x = gfx_vector_x(p0);
+      gfx_pos_t y = gfx_vector_y(p0);
+      switch (op & 0x0300)
+	{
+	case 0x0000:
+	  x = (x * r) >> 5;
+	  y = (y * r) >> 5;
+	  break;
+	case 0x0100:
+	  x += r;
+	  break;
+	case 0x0200:
+	  y += r;
+	  break;
+	case 0x0300:
+	  x = (x << 5) / r;
+	  y = (y << 5) / r;
+	  break;
+	}
+      bc_set_reg(vm, d, gfx_vector_xy_2p(x, y));
+      break;
+    }
+
+    case 0x6800: {		/* gfx_unpackxy */
+      uint_fast8_t d = (op >> 4) & 14;
+      uint32_t p0 = bc_get_reg(vm, op & 15);
+      uint32_t x = gfx_vector_x(p0);
+      uint32_t y = gfx_vector_y(p0);
+      if (op & 0x0010)
+	{
+	  x = (int32_t)(int16_t)x;
+	  y = (int32_t)(int16_t)y;
+	}
+      bc_set_reg(vm, d, x);
+      bc_set_reg(vm, d | 1, y);
+      break;
+    }
+
+    case 0x6900: 		/* gfx_packx */
+    case 0x6a00: 		/* gfx_packy */
+    case 0x6b00: {		/* gfx_pack */
+      uint_fast8_t d = (op >> 4) & 14;
+      uint_fast8_t e = op & 15;
+      uint32_t p0 = bc_get_reg(vm, e);
+      uint32_t x = 0, y = 0;
+      if (op & 0x0010)
+	{
+	  x = gfx_vector_x(p0);
+	  y = gfx_vector_y(p0);
+	}
+      if (op & 0x0100)
+	x = bc_get_reg(vm, d);
+      if (op & 0x0200)
+	y = bc_get_reg(vm, d | 1);
+      bc_set_reg(vm, e, gfx_vector_xy_2p(x, y));
+      break;
+    }
+
+    case 0x6c00: {
+      uint_fast8_t d = op & 15;
+
+      switch (op & 0x00f0)
+	{
+	case 0x0000:
+	case 0x0010:
+	case 0x0020:
+	case 0x0030: {		/* gfx_size */
+	  uint_fast8_t n = (op >> 4) & 3;
+	  struct gfx_surface_s *s = ctx->s + n;
+	  bc_set_reg(vm, d, gfx_vector_xy_2p(gfx_width(s) << 5,
+					     gfx_height(s) << 5));
+	  break;
+	}
+
+	case 0x0040: {		/* gfx_hypot */
+	  uint32_t p0 = bc_get_reg(vm, d);
+	  gfx_pos_t x = (int16_t)gfx_vector_x(p0);
+	  gfx_pos_t y = (int16_t)gfx_vector_y(p0);
+	  x = (x * x) >> 2;
+	  y = (y * y) >> 2;
+	  bc_set_reg(vm, d, gfx_sqrt32(x + y) << 1);
+	  break;
+	}
+
+	case 0x0050: {		/* gfx_sqrt */
+	  bc_set_reg(vm, d, gfx_sqrt32(bc_get_reg(vm, d) << 1) << 2);
+	  break;
+	}
+
+	case 0x0060: {		/* gfx_sincos */
+	  int32_t a = bc_get_reg(vm, d);
+	  int32_t s = gfx_sin(a >> 5);
+	  int32_t c = gfx_cos(a >> 5);
+	  int64_t p1 = (int32_t)bc_get_reg(vm, 15);
+	  c = (c * p1) >> 30;
+	  s = (s * p1) >> 30;
+	  bc_set_reg(vm, d, gfx_vector_xy_2p(c, s));
+	  break;
+	}
+
+	case 0x0080: 		/* gfx_sin */
+	case 0x0090: {		/* gfx_cos */
+	  int32_t a = bc_get_reg(vm, d);
+	  int32_t s = gfx_sin((a >> 5) + ((op & 0x0010) << 3));
+	  int64_t p1 = (int32_t)bc_get_reg(vm, 15);
+	  s = (s * p1) >> 30;
+	  bc_set_reg(vm, d, s);
+	  break;
+	}
+
+	}
+      break;
+    }
+
+    default:
+      return -ENOTSUP;
+    }
+
+  return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/gfx.c	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,392 @@
+/*
+    This file is part of MutekH.
+
+    MutekH is free software; you can redistribute it and/or modify it
+    under the terms of the GNU Lesser General Public License as
+    published by the Free Software Foundation; version 2.1 of the
+    License.
+
+    MutekH is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with MutekH; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+    02110-1301 USA.
+
+    Copyright Alexandre Becoulet <alexandre.becoulet@free.fr> (c) 2017
+
+*/
+
+#include <gfx/pixel.h>
+#include <gfx/line.h>
+#include <gfx/circle.h>
+#include <gfx/arc.h>
+#include <gfx/rect.h>
+#include <gfx/blit.h>
+
+#include <assert.h>
+
+error_t gfx_surface_bits(size_t *bits_, gfx_pos_t w, gfx_pos_t h,
+			 enum gfx_surface_format fmt)
+{
+  uint_fast8_t l2bpp = gfx_fmt_desc[fmt].l2bpp;
+  uint_fast8_t l2ppw = gfx_fmt_desc[fmt].l2ppw;
+
+#ifdef CONFIG_GFX_LOG2_SURFACE_SIZE
+  if ((w & (w - 1)) | (h & (h - 1)))
+    return -ENOTSUP;
+
+  uint_fast8_t l2w = bit_msb_index(w);
+  uint_fast8_t l2h = bit_msb_index(h);
+
+  /* surface data size in bits */
+  uint32_t bits = 1 << (l2w + l2h + l2bpp);
+#else
+  uint32_t bits = (w * h) << l2bpp;
+#endif
+
+  /* must not be less than a word */
+  if (bits & ((1 << l2ppw) - 1))
+    return -ERANGE;
+
+#ifdef CONFIG_GFX_LOG2_SURFACE_SIZE
+  /* a single row must not be less than a word */
+  if (l2w < l2ppw)
+    return -ERANGE;
+#endif
+
+  *bits_ = bits;
+  return 0;
+}
+
+error_t gfx_surface_bytes(size_t *bytes, gfx_pos_t w, gfx_pos_t h,
+			  enum gfx_surface_format fmt)
+{
+  size_t bits;
+  if (gfx_surface_bits(&bits, w, h, fmt))
+    return -ERANGE;
+  *bytes = align_pow2_up(bits, 8 << CONFIG_GFX_LOG2_WORD_WIDTH) >> 3;
+  return 0;
+}
+
+error_t gfx_surface_init(struct gfx_surface_s *s, gfx_word_t *data,
+                         size_t bytes, gfx_pos_t w, gfx_pos_t h,
+                         enum gfx_surface_format fmt)
+{
+  uint_fast8_t l2bpp = gfx_fmt_desc[fmt].l2bpp;
+  uint_fast8_t l2ppw = gfx_fmt_desc[fmt].l2ppw;
+
+  size_t bits;
+  if (gfx_surface_bits(&bits, w, h, fmt))
+    return -ERANGE;
+
+#ifdef CONFIG_GFX_LOG2_SURFACE_SIZE
+  gfx_addr_t mask = (1 << (l2w + l2h - l2ppw)) - 1;
+  if (mask >= bytes)
+    return -ERANGE;
+
+  s->mask = mask;
+  s->l2bw = l2w - l2ppw;
+  s->l2w = l2w;
+  s->l2h = l2h;
+
+# if 0
+  fprintf(stderr, "surface ptr:%x l2w:%u l2h:%u fmt:%u l2bpp:%u bits:%u l2bw:%u mask:%08x\n",
+          s->ptr, l2w, l2h, fmt, l2bpp, bits, s->l2bw, s->mask);
+# endif
+
+#else
+  /* row size in words, round up */
+  uint16_t bw = ((w - 1) >> l2ppw) + 1;
+  gfx_addr_t bsize = bw * h;
+
+  if (!bsize || (bsize << (l2bpp + l2ppw)) > bytes * 8)
+    return -ERANGE;
+
+  s->bsize = bsize;
+  s->bw = bw;
+  s->w = w;
+  s->h = h;
+
+# if 0
+  fprintf(stderr, "surface ptr:%x w:%u h:%u fmt:%u l2bpp:%u bw:%u bsize:%08x\n",
+          s->ptr, w, h, fmt, l2bpp, bw, bsize);
+# endif
+#endif
+
+  s->ptr = data;
+  s->fmt = fmt;
+
+  return 0;
+}
+
+void
+gfx_surface_dummy(struct gfx_surface_s * __restrict__ s)
+{
+  static gfx_word_t dummy;
+  s->ptr = &dummy;
+#ifdef CONFIG_GFX_LOG2_SURFACE_SIZE
+  s->mask = 0;
+  s->l2bw = 0;
+  s->l2w = 0;
+  s->l2h = 0;
+#else
+  s->bsize = 1;
+  s->bw = 1;
+  s->w = 1;
+  s->h = 1;
+#endif
+  s->fmt = GFX_FMT_DEFAULT;
+}
+
+error_t gfx_tilemap_init(struct gfx_tilemap_s *t,
+			 const struct gfx_surface_s *s,
+			 gfx_pos_t tw, gfx_pos_t th,
+			 uint_fast8_t first)
+{
+  if (!tw || !th)
+    return -ERANGE;
+
+  gfx_pos_t sw = gfx_width(s);
+  gfx_pos_t sh = gfx_height(s);
+
+  if (sw < tw || sh < th)
+    return -ERANGE;
+
+  t->s = *s;
+  t->offset = first;
+  t->tw = tw;
+  t->th = th;
+  t->l2tpr = bit_msb_index(sw / tw);
+
+  return 0;
+}
+
+extern inline bool_t
+gfx_box_check(const struct gfx_surface_s * __restrict__ s,
+	      gfx_pos_t x0, gfx_pos_t y0,
+	      gfx_pos_t x1, gfx_pos_t y1);
+
+extern inline bool_t
+gfx_box_safe(const struct gfx_surface_s * __restrict__ s,
+	     gfx_pos_t *x0, gfx_pos_t *y0,
+	     gfx_pos_t *x1, gfx_pos_t *y1);
+
+extern inline int32_t gfx_sin(uint_fast16_t x);
+
+extern inline int32_t gfx_cos(uint_fast16_t x);
+
+extern inline uint32_t gfx_sqrt32(uint32_t x);
+
+const int32_t gfx_sin_table[129] = { /* sin table in Q2.30 fixed point */
+  0x00000000, 0x00c90e8f, 0x0192155f, 0x025b0cae, 0x0323ecbe, 0x03ecadcf, 0x04b54824, 0x057db402,
+  0x0645e9af, 0x070de171, 0x07d59395, 0x089cf867, 0x09640837, 0x0a2abb58, 0x0af10a22, 0x0bb6ecef,
+  0x0c7c5c1e, 0x0d415012, 0x0e05c135, 0x0ec9a7f2, 0x0f8cfcbd, 0x104fb80e, 0x1111d262, 0x11d3443f,
+  0x1294062e, 0x135410c2, 0x14135c94, 0x14d1e242, 0x158f9a75, 0x164c7ddd, 0x17088530, 0x17c3a931,
+  0x187de2a6, 0x19372a63, 0x19ef7943, 0x1aa6c82b, 0x1b5d1009, 0x1c1249d8, 0x1cc66e99, 0x1d79775b,
+  0x1e2b5d38, 0x1edc1952, 0x1f8ba4db, 0x2039f90e, 0x20e70f32, 0x2192e09a, 0x223d66a8, 0x22e69ac7,
+  0x238e7673, 0x2434f332, 0x24da0a99, 0x257db64b, 0x261feff9, 0x26c0b162, 0x275ff452, 0x27fdb2a6,
+  0x2899e64a, 0x29348937, 0x29cd9577, 0x2a650525, 0x2afad269, 0x2b8ef77c, 0x2c216eaa, 0x2cb2324b,
+  0x2d413ccc, 0x2dce88a9, 0x2e5a106f, 0x2ee3cebe, 0x2f6bbe44, 0x2ff1d9c6, 0x30761c17, 0x30f8801f,
+  0x317900d6, 0x31f79947, 0x32744493, 0x32eefde9, 0x3367c08f, 0x33de87de, 0x34534f40, 0x34c61236,
+  0x3536cc52, 0x35a5793c, 0x361214b0, 0x367c9a7d, 0x36e5068a, 0x374b54ce, 0x37af8158, 0x3811884c,
+  0x387165e3, 0x38cf1669, 0x392a9642, 0x3983e1e7, 0x39daf5e8, 0x3a2fcee8, 0x3a8269a2, 0x3ad2c2e7,
+  0x3b20d79e, 0x3b6ca4c4, 0x3bb6276d, 0x3bfd5cc4, 0x3c424209, 0x3c84d496, 0x3cc511d8, 0x3d02f756,
+  0x3d3e82ad, 0x3d77b191, 0x3dae81ce, 0x3de2f147, 0x3e14fdf7, 0x3e44a5ee, 0x3e71e758, 0x3e9cc076,
+  0x3ec52f9f, 0x3eeb3347, 0x3f0ec9f4, 0x3f2ff249, 0x3f4eaafe, 0x3f6af2e3, 0x3f84c8e1, 0x3f9c2bfa,
+  0x3fb11b47, 0x3fc395f9, 0x3fd39b5a, 0x3fe12acb, 0x3fec43c6, 0x3ff4e5df, 0x3ffb10c1, 0x3ffec42d,
+  0x40000000
+};
+
+_GFX_BPP_EXPAND(_GFX_PIXEL_OPS);
+
+extern inline gfx_pixel_t
+gfx_get_pixel_safe(const struct gfx_surface_s * __restrict__ s,
+		   gfx_pos_t x, gfx_pos_t y);
+
+extern inline void
+gfx_put_pixel_safe(const struct gfx_surface_s * __restrict__ s,
+		   gfx_pos_t x, gfx_pos_t y, gfx_pixel_t a);
+
+extern inline void
+gfx_hline_safe(const struct gfx_surface_s * __restrict__ s,
+	       gfx_pos_t x0, gfx_pos_t x1, gfx_pos_t y, gfx_pixel_t a);
+
+extern inline void
+gfx_vline_safe(const struct gfx_surface_s * __restrict__ s,
+	       gfx_pos_t x, gfx_pos_t y0, gfx_pos_t y1, gfx_pixel_t a);
+
+extern inline void
+gfx_clear(const struct gfx_surface_s * __restrict__ s, gfx_pixel_t a);
+
+_GFX_BPP_EXPAND(_GFX_CIRCLE_OPS);
+
+extern inline bool_t
+gfx_circle_check(const struct gfx_surface_s * __restrict__ s,
+                 gfx_pos_t xc, gfx_pos_t yc,
+                 gfx_pos_t r,
+                 uint8_t oct);
+
+extern inline void
+gfx_draw_circle_safe(const struct gfx_surface_s * __restrict__ s,
+                     gfx_pos_t xc, gfx_pos_t yc,
+                     gfx_pos_t r, uint8_t oct, gfx_pixel_t a);
+
+extern inline void
+gfx_draw_circle_infill_safe(const struct gfx_surface_s * __restrict__ s,
+                            gfx_pos_t xc, gfx_pos_t yc,
+                            gfx_pos_t r, uint8_t oct, gfx_pixel_t a);
+
+extern inline void
+gfx_draw_circle_outfill_safe(const struct gfx_surface_s * __restrict__ s,
+                             gfx_pos_t xc, gfx_pos_t yc,
+                             gfx_pos_t r, uint8_t oct, gfx_pixel_t a);
+
+_GFX_BPP_EXPAND(_GFX_LINE_OPS);
+
+extern inline void
+gfx_draw_line_safe(const struct gfx_surface_s * __restrict__ s,
+		   gfx_pos_t x0, gfx_pos_t y0,
+		   gfx_pos_t x1, gfx_pos_t y1, gfx_pixel_t a);
+
+_GFX_BPP_EXPAND(_GFX_ARC_OPS);
+
+extern inline void
+gfx_draw_arc_angles_safe(const struct gfx_surface_s *s,
+                         uint_fast16_t xc, uint_fast16_t yc,
+                         uint_fast16_t a0, uint_fast16_t a1,
+                         uint_fast16_t r, uint32_t ccw, gfx_pixel_t a);
+
+extern inline void
+gfx_draw_arc_xy_safe(const struct gfx_surface_s *s,
+                     int32_t x0, int32_t y0,
+                     int32_t x1, int32_t y1,
+                     uint_fast16_t r, uint32_t ccw, gfx_pixel_t a);
+
+_GFX_BPP_EXPAND(_GFX_RECT_OPS);
+
+extern inline void
+gfx_draw_rect_safe(const struct gfx_surface_s * __restrict__ s,
+		   gfx_pos_t x0, gfx_pos_t y0,
+		   gfx_pos_t x1, gfx_pos_t y1, gfx_pixel_t a);
+
+extern inline void
+gfx_draw_rect_r_safe(const struct gfx_surface_s * __restrict__ s,
+		     gfx_pos_t x0, gfx_pos_t y0,
+		     gfx_pos_t x1, gfx_pos_t y1, gfx_pos_t r, gfx_pixel_t a);
+
+extern inline void
+gfx_draw_rect_f_safe(const struct gfx_surface_s * __restrict__ s,
+		     gfx_pos_t x0, gfx_pos_t y0,
+		     gfx_pos_t x1, gfx_pos_t y1, gfx_pixel_t a);
+
+extern inline void
+gfx_draw_rect_fr_safe(const struct gfx_surface_s * __restrict__ s,
+		      gfx_pos_t x0, gfx_pos_t y0,
+		      gfx_pos_t x1, gfx_pos_t y1, gfx_pos_t r, gfx_pixel_t a);
+
+_GFX_BPP_EXPAND(_GFX_BLIT_OPS);
+
+extern inline void
+gfx_blit_nc(const struct gfx_surface_s * __restrict__ d,
+	    gfx_pos_t x2, gfx_pos_t y2, /* dest */
+	    const struct gfx_surface_s * __restrict__ s,
+	    gfx_pos_t x0, gfx_pos_t y0, /* src */
+	    gfx_pos_t w, gfx_pos_t h);
+
+extern inline void
+gfx_blit_safe(const struct gfx_surface_s * __restrict__ d,
+	      gfx_pos_t x2, gfx_pos_t y2, /* dest */
+	      const struct gfx_surface_s * __restrict__ s,
+	      gfx_pos_t x0, gfx_pos_t y0, /* src */
+	      gfx_pos_t w, gfx_pos_t h);
+
+extern inline void
+gfx_blit_overlap_nc(const struct gfx_surface_s * __restrict__ d,
+		    gfx_pos_t x2, gfx_pos_t y2, /* dest */
+		    gfx_pos_t x0, gfx_pos_t y0, /* src */
+		    gfx_pos_t w, gfx_pos_t h);
+
+extern inline void
+gfx_blit_overlap_safe(const struct gfx_surface_s * __restrict__ d,
+		      gfx_pos_t x2, gfx_pos_t y2, /* dest */
+		      gfx_pos_t x0, gfx_pos_t y0, /* src */
+		      gfx_pos_t w, gfx_pos_t h);
+
+bool_t
+gfx_draw_tile(const struct gfx_surface_s * __restrict__ s,
+              const struct gfx_tilemap_s * __restrict__ t,
+              uint_fast16_t tile, gfx_pos_t x, gfx_pos_t y,
+              bool_t center)
+{
+  uint_fast8_t tw = t->tw;
+  uint_fast8_t th = t->th;
+
+  /* compute tile coords in the tilemap surface */
+  tile -= t->offset;
+  uint_fast8_t l2tpr = t->l2tpr;
+  uint32_t m = (1 << l2tpr) - 1;
+  gfx_pos_t tx = (tile & m) * tw;
+  gfx_pos_t ty = (tile >> l2tpr) * th;
+
+  if (center)
+    {
+      x -= tw >> 1;
+      y -= th >> 1;
+    }
+
+  const struct gfx_surface_s * __restrict__ ts = &t->s;
+
+  if (gfx_ycheck(ts, ty + th - 1) &&
+      /* FIXME perform a single check when called from gfx_draw_tile_string */
+      gfx_box_check(s, x, y, x + tw - 1, y + th - 1))
+    {
+      gfx_blit_nc(s, x, y, ts, tx, ty, tw, th);
+      return 1;
+    }
+
+  return 0;
+}
+
+void
+gfx_draw_tile_string(const struct gfx_surface_s * __restrict__ s,
+                     const struct gfx_tilemap_s * __restrict__ t,
+                     const uint8_t *str, uint_fast16_t size,
+                     gfx_pos_t x, gfx_pos_t y, enum gfx_direction_e dir,
+                     bool_t center)
+{
+  gfx_pos_t xd = 0, yd = 0;
+
+  switch (dir)
+    {
+    case GFX_DIR_LEFT:
+      xd = -t->tw;
+      break;
+    case GFX_DIR_RIGHT:
+      xd = t->tw;
+      break;
+    case GFX_DIR_UP:
+      yd = -t->th;
+      break;
+    case GFX_DIR_DOWN:
+      yd = t->th;
+      break;
+    }
+
+  if (center)
+    {
+      uint_fast16_t sm1 = size - 1;
+      x -= (xd * sm1) >> 1;
+      y -= (yd * sm1) >> 1;
+    }
+
+  while (size--)
+    {
+      gfx_draw_tile(s, t, *str++, x, y, center);
+      x += xd;
+      y += yd;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/gfx.config	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,68 @@
+
+%config CONFIG_GFX
+  desc Enable 2d graphic library
+  module libgfx Libraries::Gfx Lightweight 2d graphic library
+  flags auto
+%config end
+
+%config CONFIG_GFX_DEFAULT_L2BPP
+  parent CONFIG_GFX
+  desc specifies log2 of default surface bits per pixel
+  flags value
+%config end
+
+%config CONFIG_GFX_LOG2_WORD_WIDTH
+  parent CONFIG_GFX
+  desc Specifies the log 2 word width in bytes used for performing memory access on surfaces.
+  desc The storage size of a single surface row will be aligned on a word boundary.
+  flags value
+  default 2
+%config end
+
+%config CONFIG_GFX_BPP0
+  parent CONFIG_GFX
+  desc enable support for 1 bit per pixel surfaces
+%config end
+
+%config CONFIG_GFX_BPP1
+  parent CONFIG_GFX
+  desc enable support for 2 bit per pixel surfaces
+%config end
+
+%config CONFIG_GFX_BPP2
+  parent CONFIG_GFX
+  desc enable support for 4 bit per pixel surfaces
+%config end
+
+%config CONFIG_GFX_BPP3
+  parent CONFIG_GFX
+  desc enable support for 8 bit per pixel surfaces
+%config end
+
+%config CONFIG_GFX_BPP4
+  parent CONFIG_GFX
+  desc enable support for 16 bit per pixel surfaces
+  require CONFIG_GFX_LOG2_WORD_WIDTH>=1
+%config end
+
+%config CONFIG_GFX_BPP5
+  parent CONFIG_GFX
+  desc enable support for 32 bit per pixel surfaces
+  require CONFIG_GFX_LOG2_WORD_WIDTH>=2
+%config end
+
+%config CONFIG_GFX_UNROLL
+  parent CONFIG_GFX
+  default defined
+  desc generate faster but larger code
+%config end
+
+%config CONFIG_GFX_BYTECODE
+  parent CONFIG_GFX
+  desc enable support for graphic bytecode interpreter
+%config end
+
+%config CONFIG_GFX_LOG2_SURFACE_SIZE
+  parent CONFIG_GFX
+  desc optimize by only supporting surfaces with power of 2 width and height
+%config end
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/include/gfx/arc.t	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,315 @@
+/*  -*- c -*-
+
+    This file is part of MutekH.
+
+    MutekH is free software; you can redistribute it and/or modify it
+    under the terms of the GNU Lesser General Public License as
+    published by the Free Software Foundation; version 2.1 of the
+    License.
+
+    MutekH is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with MutekH; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+    02110-1301 USA.
+
+    Copyright Alexandre Becoulet <alexandre.becoulet@free.fr> (c) 2017
+
+*/
+
+#ifndef _GFX_ARC_H_
+#define _GFX_ARC_H_
+
+#include <gfx/gfx.h>
+
+/*
+
+  This code draws an arc with center at (xc, yc), starting at (x,
+  y) and ending at (xt, yt) with radius r. The function never
+  terminates when xt and yt are both greater than radius / sqrt(2).
+
+  The algorithm works by splitting the drawing plane in 8 octants:
+
+   \         |         /
+     \  o5   | o6    /
+       \     |     /
+   o4    \   |   /   o7
+           \ | /
+------------ X -----------> x
+           / | \
+   o3    /   |   \   o0
+       /     |     \
+     /  o2   | o1    \
+   /         |         \
+             v y
+
+  The difference between squared arc radius and squared distance
+  from current pixel center to arc center is computed:
+
+  err = x*x + y*y - r*r
+
+  The algorithm chooses the next pixel so that the error remains
+  small. The error is updated when moving to the next pixel.
+
+  In order to simplify computations of derr_x, the sign of the squared
+  difference is reversed between o0, o1, o4, o5 and o2, o3, o4,
+  o5. This is possible because the error is always 0 when x=0 or y=0,
+  that is when switching from o7, o1, o3, o5 to o0, o2, o4, o6. The
+  following formula is used to update the error:
+
+  derr_x = (2*x + dx)
+  derr_y = (2*y * dy*dx + dx)
+
+In CW direction:
+
+  o0, o1: dx=-1, dy=+1, derr_x=2x-1, derr_y=-2y-1
+  o2, o3: dx=-1, dy=-1, derr_x=2x-1, derr_y=+2y-1
+  o4, o5: dx=+1, dy=-1, derr_x=2x+1, derr_y=-2y+1
+  o6, o7: dx=+1, dy=+1, derr_x=2x+1, derr_y=+2y+1
+
+*/
+
+/* backslash-region-begin */
+#define _GFX_ARC_DRAW(_put_pixel, s, xc, yx, x, y, xt, yt, r2, ccw)
+{
+
+  if (!r2)
+    return;
+
+  /* squared error */
+  int32_t m = r2 - x*x - y*y;
+
+  if ((y > 0) ^ ccw)
+    m = -m;
+
+  /* direction */
+  ccw = !ccw - 1;
+
+  int32_t dx, dy, dxdy, e, be, bm, t;
+
+  while (1)
+    {
+      /* compute the x stop value. when yt and y have different sign,
+	 the value of t is so large that it never matches. */
+      t = xt ^ ((yt ^ y) & 0x80000000);
+
+      while (1)			/* used in o1, o2, o5, o6 */
+        {
+          _put_pixel(xc + x, gfx_yaddr(s, yc + y));
+
+          if (x == t)		/* check stop condition */
+            return;
+
+	  /* initialy use dx and dy variables as masks for abs() */
+          dx = y >> 31;
+          dy = x >> 31;
+
+	  /* switch to other loop */
+          if ((x ^ dy) - dy > (y ^ dx) - dx)	  /* if abs(x) > abs(y) */
+            break;
+
+	  /* compute actual values of dx and dy */
+          dxdy = dx ^ dy;
+          dx ^= ccw;
+          dy ^= ~ccw;
+          dx = dx | 1;  /* make dx and dy either 1 or -1 */
+          dy = dy | 1;
+
+	  /* update the error with move along x axis (apply derr_x) */
+          m = m + (x << 1) + dx;
+	  /* compute the error when we move along y too (apply derr_y) */
+          e = m - (y << 1 ^ dxdy) + dxdy + dx;
+
+	  /* retained move depends on lowest resulting error */
+          bm = m >> 31;
+          be = e >> 31;
+          if ((e ^ be) - be < (m ^ bm) - bm)	  /* if (abs(e) < abs(m)) */
+            {
+	      /* keep smallest error and move y */
+              m = e;
+              y += dy;
+            }
+	  /* move x */
+          x += dx;
+        }
+
+      t = yt ^ ((xt ^ x) & 0x80000000);
+
+      while (1)			/* used in o0, o3, o4, o7 */
+        {
+          _put_pixel(xc + x, gfx_yaddr(s, yc + y));
+
+          if (y == t)
+            return;
+
+          dx = y >> 31;
+          dy = x >> 31;
+
+          if ((x ^ dy) - dy < (y ^ dx) - dx)
+            break;
+
+          dxdy = dx ^ dy;
+          dx ^= ccw;
+          dy ^= ~ccw;
+          dx = dx | 1;
+          dy = dy | 1;
+
+          m = m - (y << 1 ^ dxdy) + dxdy + dx;
+          e = m + (x << 1) + dx;
+
+          bm = m >> 31;
+          be = e >> 31;
+          if ((e ^ be) - be < (m ^ bm) - bm)
+            {
+              m = e;
+              x += dx;
+            }
+          y += dy;
+        }
+    }
+}
+/* backslash-region-end */
+
+/* backslash-region-begin */
+#define _GFX_ARC_PROTO(bpp, ppw, l2bpp, l2ppw, pm, ps, word_t)
+
+/** @internal */
+void
+gfx_draw_arc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+		     gfx_pos_t xc, gfx_pos_t yc, /* arc center xy */
+		     gfx_pos_t x, gfx_pos_t y, /* arc start xy relative to center */
+		     gfx_pos_t xt, gfx_pos_t yt, /* arc end xy relative to center */
+		     uint32_t r2,		 /* squared radius */
+		     uint32_t ccw,               /* direction boolean */
+		     gfx_pixel_t a);
+/* backslash-region-end */
+
+_GFX_BPP_EXPAND(_GFX_ARC_PROTO);
+
+
+/* backslash-region-begin */
+#define _GFX_ARC_OPS(bpp, ppw, l2bpp, l2ppw, pm, ps, word_t)
+
+void
+gfx_draw_arc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+		     gfx_pos_t xc, gfx_pos_t yc,
+		     gfx_pos_t x, gfx_pos_t y,
+		     gfx_pos_t xt, gfx_pos_t yt,
+		     uint32_t r2, uint32_t ccw, gfx_pixel_t a)
+{
+  _GFX_ARC_DRAW(gfx_put_pixel_safe_##l2bpp _GFX_PUT_PIXEL_ARGS, s, xc, yc, x, y, xt, yt, r2, ccw);
+}
+/* backslash-region-end */
+
+inline void
+gfx_draw_arc_angles_safe(const struct gfx_surface_s *s,
+                         uint_fast16_t xc, uint_fast16_t yc,
+                         uint_fast16_t a0, uint_fast16_t a1,
+                         uint_fast16_t r, uint32_t ccw, gfx_pixel_t a)
+{
+  a0 &= 511;
+  a1 &= 511;
+
+  if (r > 4096)
+    return;
+
+  int32_t x0 = ((int64_t)gfx_cos(a0) * r) >> 30;
+  int32_t y0 = ((int64_t)gfx_sin(a0) * r) >> 30;
+
+  int32_t x1 = ((int64_t)gfx_cos(a1) * r) >> 30;
+  int32_t y1 = ((int64_t)gfx_sin(a1) * r) >> 30;
+
+  uint32_t r2 = r*r;
+
+  switch (s->fmt)
+    _GFX_FMT_SWITCH(gfx_draw_arc, (s, xc, yc, x0, y0, x1, y1, r2, ccw, a));
+}
+
+inline void
+gfx_draw_arc_xy_safe(const struct gfx_surface_s *s,
+                     int32_t x0, int32_t y0,
+                     int32_t x1, int32_t y1,
+                     uint_fast16_t r, uint32_t ccw, gfx_pixel_t a)
+{
+#if 0
+  if (r > 4096)
+    return;
+
+  int32_t r2 = r*r;
+
+  /* middle point */
+  int32_t xm = (x0 + x1) >> 1;
+  int32_t ym = (y0 + y1) >> 1;
+
+  /* vector from middle point to possible arc center. we have to
+     rescale this vector to match the radius. */
+  int32_t xp = ym - y0;
+  int32_t yp = x0 - xm;
+
+  int32_t dx = xp;
+  int32_t dy = yp;
+  int32_t d2 = dx*dx + dy*dy;
+
+  uint_fast8_t k = __builtin_clz(dx | dy) - 1;
+  dx <<= k;
+  dy <<= k;
+
+  printf("dx %u dx %u k %u\n", dx, dy, k);
+
+  unsigned n = 50;
+  while (n--)
+    {
+      /* we want to rescale (xp, yp) so that m is small */
+      int32_t m = r2 - d2 - xp*xp - yp*yp;
+
+      printf("m %-5i %-5u %-5u %u\n", m, xp, yp, n);
+      gfx_put_pixel_safe_##l2bpp(s, xm + xp, ym + yp, a);
+
+      int32_t kx, ky;
+      if (m > 0)
+	{
+	  int_fast8_t l = 32 - (32 - __builtin_clz(m)) / 2;
+	  printf(" l %i\n", l);
+	  kx = dx >> l;
+	  ky = dy >> l;
+	}
+      else
+	{
+	  m = -m;
+	  int_fast8_t l = 32 - (32 - __builtin_clz(m)) / 2;
+	  printf(" l %i\n", l);
+	  kx = -dx >> l;
+	  ky = -dy >> l;
+	}
+      if (!kx && !ky)
+	break;
+      printf(" x %i %i\n", xp, kx);
+      printf(" y %i %i\n", yp, ky);
+      xp += kx;
+      yp += ky;
+    }
+
+  gfx_put_pixel_safe_##l2bpp(s, x0, y0, 0xff);
+  gfx_put_pixel_safe_##l2bpp(s, x1, y1, 0xff);
+
+  int32_t xc = xm + xp;
+  int32_t yc = ym + yp;
+
+  printf("r2 %u\n", r2);
+
+  r2 = xp*xp + yp*yp;
+
+  switch (s->fmt)
+    _GFX_FMT_SWITCH(gfx_draw_arc, (s, xc, yc,
+				   x0 - xc, y0 - yc,
+				   x1 - xc, y1 - yc,
+				   r2, ccw, a));
+#endif
+}
+
+#endif
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/include/gfx/blit.t	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,450 @@
+/*  -*- c -*-
+
+    This file is part of MutekH.
+
+    MutekH is free software; you can redistribute it and/or modify it
+    under the terms of the GNU Lesser General Public License as
+    published by the Free Software Foundation; version 2.1 of the
+    License.
+
+    MutekH is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with MutekH; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+    02110-1301 USA.
+
+    Copyright Alexandre Becoulet <alexandre.becoulet@free.fr> (c) 2017
+
+*/
+
+#ifndef _GFX_BLIT_H_
+#define _GFX_BLIT_H_
+
+#include <gfx/gfx.h>
+
+/* backslash-region-begin */
+#define _GFX_BLIT_ROW_FWD(word_t, get_src, get_dst, put_dst, blend, k2, e, d)
+{
+  word_t v, o, p;
+
+  o = get_dst(x2j) & ~m1;
+
+  if (k1)
+    {
+      v = get_src(x0j++);
+      o |= (v >> e) & m1;
+    }
+
+  if (k0)
+    {
+      v = get_src(x0j++);
+      if (k2)
+	o |= (v << d) & m1;
+      o = blend(o, get_dst(x2j));
+      put_dst(x2j++, o);
+      o = v >> e;
+
+      while (x0j < x1i)
+	{
+	  v = get_src(x0j++);
+	  if (k2)
+	    o |= v << d;
+	  o = blend(o, get_dst(x2j));
+	  put_dst(x2j++, o);
+	  o = v >> e;
+	}
+    }
+
+  v = get_src(x0j);
+  if (k2)
+    o |= (v << d) & m2;
+
+  if (x3i > x2j)
+    {
+      o = blend(o, get_dst(x2j));
+      put_dst(x2j++, o);
+      o = v >> e;
+    }
+
+  if (x3f)
+    {
+      p = get_dst(x2j);
+      o = blend(o, p);
+      put_dst(x2j, (o & m3) | (p & ~m3));
+    }
+}
+/* backslash-region-end */
+
+/* backslash-region-begin */
+#define _GFX_BLIT_ROW_BWD(word_t, get_src, get_dst, put_dst, blend, k2, e, d)
+{
+  word_t v = v, o, p;
+
+  o = get_dst(x2j) & ~m1;
+
+  if (k3)
+    {
+      v = get_src(x0j--);
+      o |= (v << e) & m1;
+    }
+
+  if (k0)
+    {
+      v = get_src(x0j--);
+      if (k2)
+	o |= (v >> d) & m1;
+      o = blend(o, get_dst(x2j));
+      put_dst(x2j--, o);
+      o = v << e;
+
+      while (x0j > x1i)
+	{
+	  v = get_src(x0j--);
+	  if (k2)
+	    o |= v >> d;
+	  o = blend(o, get_dst(x2j));
+	  put_dst(x2j--, o);
+	  o = v << e;
+	}
+    }
+
+  if (x0j >= 0)
+    v = get_src(x0j);
+
+  if (k2)
+    o |= (v >> d) & m2;
+
+  if (x3i < x2j)
+    {
+      o = blend(o, get_dst(x2j));
+      put_dst(x2j--, o);
+      o = v << e;
+    }
+
+  if (x3f)
+    {
+      p = get_dst(x2j);
+      o = blend(o, p);
+      put_dst(x2j, (o & m3) | (p & ~m3));
+    }
+}
+/* backslash-region-end */
+
+#ifdef CONFIG_GFX_UNROLL
+/* backslash-region-begin */
+# define _GFX_BLIT_ROWS(dir, word_t, get_src, get_dst, put_dst, blend)
+if (e)
+  {
+    while (ys != yse)
+      {
+	int32_t x0j = x0i, x2j = x2i;
+	_GFX_BLIT_ROW_##dir(word_t, get_src, get_dst, put_dst, blend, 1, e, d);
+	yd += ydw;
+	ys += ysw;
+      }
+  }
+else
+  {
+    while (ys != yse)
+      {
+	int32_t x0j = x0i, x2j = x2i;
+	_GFX_BLIT_ROW_##dir(word_t, get_src, get_dst, put_dst, blend, 0, 0, 0);
+	yd += ydw;
+	ys += ysw;
+      }
+  }
+/* backslash-region-end */
+#else
+/* backslash-region-begin */
+# define _GFX_BLIT_ROWS(dir, word_t, get_src, get_dst, put_dst, blend)
+while (ys != yse)
+  {
+    int32_t x0j = x0i, x2j = x2i;
+    _GFX_BLIT_ROW_##dir(word_t, get_src, get_dst, put_dst, blend, e, e, d);
+    yd += ydw;
+    ys += ysw;
+  }
+/* backslash-region-end */
+#endif
+
+/* backslash-region-begin */
+#define _GFX_BLIT_INIT_FWD(word_t, l2bpp, l2ppw)
+
+  /* bits per word */
+  uint_fast8_t bpw = 8 * sizeof(word_t);
+
+  /* compute word index in row */
+  int32_t x0i = x0 >> l2ppw;
+  int32_t x1i = x1 >> l2ppw;
+  int32_t x2i = x2 >> l2ppw;
+  int32_t x3i = x3 >> l2ppw;
+  bool_t k0 = x3i != x2i;
+
+  /* compute bit index in word */
+  uint_fast8_t x0f, x1f, x2f, x3f;
+  uint_fast8_t d, e;
+  bool_t k1;
+
+  if (l2ppw > 0)
+    {
+      x0f = (x0 << l2bpp) & (bpw - 1);
+      x1f = (x1 << l2bpp) & (bpw - 1);
+      x2f = (x2 << l2bpp) & (bpw - 1);
+      x3f = (x3 << l2bpp) & (bpw - 1);
+
+      k1 = x2f <= x0f;
+      if (k1)
+	{
+	  e = x0f - x2f;
+	  d = bpw - e;
+	}
+      else
+	{
+	  d = x2f - x0f;
+	  e = bpw - d;
+	}
+    }
+  else
+    {
+      x0f = x1f = x2f = x3f = 0;
+      k1 = 1;
+      d = e = 0;
+    }
+
+  /* precompute word masks */
+  word_t m1 = (word_t)0xffffffffU;
+  m1 = m1 << x2f;
+
+  word_t m2 = m1;
+  if (k1 || k0)
+    m2 = (word_t)0xffffffffU;
+
+  word_t m3 = (word_t)0xffffffffU;
+  m3 = m3 >> ((bpw - x3f) & (bpw - 1));
+/* backslash-region-end */
+
+/* backslash-region-begin */
+#define _GFX_BLIT_INIT_BWD(word_t, l2bpp, l2ppw)
+
+  /* bits per word */
+  uint_fast8_t bpw = 8 * sizeof(word_t);
+
+  /* compute word index in row */
+  int32_t x0i = (x0 - 1) >> l2ppw;
+  int32_t x1i = (x1 - 1) >> l2ppw;
+  int32_t x2i = (x2 - 1) >> l2ppw;
+  int32_t x3i = (x3 - 1) >> l2ppw;
+  bool_t k0 = x3i != x2i;
+
+  /* compute bit index in word */
+  uint32_t x0f, x1f, x2f, x3f;
+  uint_fast8_t d, e;
+  bool_t k1, k3;
+
+  if (l2ppw > 0)
+    {
+      x0f = (x0 << l2bpp) & (bpw - 1);
+      x1f = (x1 << l2bpp) & (bpw - 1);
+      x2f = (x2 << l2bpp) & (bpw - 1);
+      x3f = (x3 << l2bpp) & (bpw - 1);
+
+      k1 = x2f >= x0f;
+      k3 = (x2f - 1) >= (x0f - 1);
+      if (k1)
+	{
+	  e = x2f - x0f;
+	  d = bpw - e;
+	}
+      else
+	{
+	  d = x0f - x2f;
+	  e = bpw - d;
+	}
+    }
+  else
+    {
+      x0f = x1f = x2f = x3f = 0;
+      k3 = k1 = 1;
+      d = e = 0;
+    }
+
+  /* precompute word masks */
+  word_t m1 = (word_t)0xffffffffU;
+  m1 = m1 >> ((bpw - x2f) & (bpw - 1));
+
+  word_t m2 = m1;
+  if (k1 || k0)
+    m2 = (word_t)0xffffffffU;
+
+  word_t m3 = (word_t)0xffffffffU;
+  m3 = m3 << x3f;
+/* backslash-region-end */
+
+/* backslash-region-begin */
+#define _GFX_BLIT_PROTO(bpp, ppw, l2bpp, l2ppw, pm, ps, word_t)
+
+/** @internal */
+void
+gfx_blit_nc_##l2bpp(const struct gfx_surface_s * __restrict__ d,
+		    gfx_pos_t x2, gfx_pos_t y2, /* dest */
+		    const struct gfx_surface_s * __restrict__ s,
+		    gfx_pos_t x0, gfx_pos_t y0, /* src */
+		    gfx_pos_t w, gfx_pos_t h);
+
+/** @internal */
+void
+gfx_blit_overlap_nc_##l2bpp(const struct gfx_surface_s * __restrict__ d,
+			    gfx_pos_t x2, gfx_pos_t y2, /* dest */
+			    gfx_pos_t x0, gfx_pos_t y0, /* src */
+			    gfx_pos_t w, gfx_pos_t h);
+/* backslash-region-end */
+
+_GFX_BPP_EXPAND(_GFX_BLIT_PROTO);
+
+
+#define _GFX_BLIT_GET_SRC(x) ys[x]
+#define _GFX_BLIT_GET_DST(x) yd[x]
+#define _GFX_BLIT_PUT_DST(x, v) (yd[x] = (v))
+#define _GFX_BLIT_BLEND(v, old) (v)
+
+/* backslash-region-begin */
+#define _GFX_BLIT_OPS(bpp, ppw, l2bpp, l2ppw, pm, ps, word_t)
+
+void
+gfx_blit_nc_##l2bpp(const struct gfx_surface_s * __restrict__ d,
+		    gfx_pos_t x2, gfx_pos_t y2, /* dest */
+		    const struct gfx_surface_s * __restrict__ s,
+		    gfx_pos_t x0, gfx_pos_t y0, /* src */
+		    gfx_pos_t w, gfx_pos_t h)
+{
+  gfx_pos_t x1 = x0 + w;
+  gfx_pos_t y1 = y0 + h;
+  gfx_pos_t x3 = x2 + w;
+
+  const word_t *ys = (word_t*)s->ptr + gfx_yaddr(s, y0);
+  const word_t *yse = (word_t*)s->ptr + gfx_yaddr(s, y1);
+  word_t *yd = (word_t*)d->ptr + gfx_yaddr(d, y2);
+  gfx_addr_t ysw = gfx_yaddr(s, 1);
+  gfx_addr_t ydw = gfx_yaddr(d, 1);
+
+  {
+    _GFX_BLIT_INIT_FWD(word_t, l2bpp, l2ppw);
+    _GFX_BLIT_ROWS(FWD, word_t, _GFX_BLIT_GET_SRC,
+		   _GFX_BLIT_GET_DST, _GFX_BLIT_PUT_DST, _GFX_BLIT_BLEND);
+  }
+}
+
+void
+gfx_blit_overlap_nc_##l2bpp(const struct gfx_surface_s * __restrict__ d,
+			    gfx_pos_t x2, gfx_pos_t y2, /* dest */
+			    gfx_pos_t x0, gfx_pos_t y0, /* src */
+			    gfx_pos_t w, gfx_pos_t h)
+{
+  gfx_pos_t x1 = x0 + w;
+  gfx_pos_t y1 = y0 + h;
+  gfx_pos_t x3 = x2 + w;
+  gfx_pos_t y3 = y2 + h;
+
+  const word_t *ys, *yse;
+  word_t *yd;
+  gfx_pos_t ysw, ydw;
+
+  if (y2 > y0)
+    {
+      ysw = ydw = -gfx_yaddr(d, 1);
+      ys = (word_t*)d->ptr + gfx_yaddr(d, y1) + ysw;
+      yse = (word_t*)d->ptr + gfx_yaddr(d, y0) + ysw;
+      yd = (word_t*)d->ptr + gfx_yaddr(d, y3) + ydw;
+    }
+  else
+    {
+      ysw = ydw = gfx_yaddr(d, 1);
+      ys = (word_t*)d->ptr + gfx_yaddr(d, y0);
+      yse = (word_t*)d->ptr + gfx_yaddr(d, y1);
+      yd = (word_t*)d->ptr + gfx_yaddr(d, y2);
+    }
+
+  if (x0 <= x1)
+    {
+      _GFX_BLIT_INIT_FWD(word_t, l2bpp, l2ppw);
+      _GFX_BLIT_ROWS(FWD, word_t, _GFX_BLIT_GET_SRC,
+		     _GFX_BLIT_GET_DST, _GFX_BLIT_PUT_DST, _GFX_BLIT_BLEND);
+    }
+  else
+    {
+      _GFX_SWAP(x0, x1);
+      _GFX_SWAP(x3, x2);
+      _GFX_BLIT_INIT_BWD(word_t, l2bpp, l2ppw);
+      _GFX_BLIT_ROWS(BWD, word_t, _GFX_BLIT_GET_SRC,
+		     _GFX_BLIT_GET_DST, _GFX_BLIT_PUT_DST, _GFX_BLIT_BLEND);
+    }
+}
+/* backslash-region-end */
+
+inline void
+gfx_blit_nc(const struct gfx_surface_s * __restrict__ d,
+	    gfx_pos_t x2, gfx_pos_t y2, /* dest */
+	    const struct gfx_surface_s * __restrict__ s,
+	    gfx_pos_t x0, gfx_pos_t y0, /* src */
+	    gfx_pos_t w, gfx_pos_t h)
+{
+  if (d->fmt != s->fmt)
+    return;			/* FIXME not supported yet */
+
+  switch (d->fmt)
+    _GFX_FMT_SWITCH(gfx_blit_nc, (d, x2, y2, s, x0, y0, w, h));
+}
+
+inline void
+gfx_blit_safe(const struct gfx_surface_s * __restrict__ d,
+	      gfx_pos_t x2, gfx_pos_t y2, /* dest */
+	      const struct gfx_surface_s * __restrict__ s,
+	      gfx_pos_t x0, gfx_pos_t y0, /* src */
+	      gfx_pos_t w, gfx_pos_t h)
+{
+  w &= 0xffff;
+  h &= 0xffff;
+  gfx_pos_t x1 = x0 + w - 1;
+  gfx_pos_t y1 = y0 + h - 1;
+  gfx_pos_t x3 = x2 + w - 1;
+  gfx_pos_t y3 = y2 + h - 1;
+
+  if (gfx_box_check(s, x0, y0, x1, y1) &&
+      gfx_box_check(d, x2, y2, x3, y3))
+    gfx_blit_nc(d, x2, y2, s, x0, y0, w, h);
+}
+
+inline void
+gfx_blit_overlap_nc(const struct gfx_surface_s * __restrict__ d,
+		    gfx_pos_t x2, gfx_pos_t y2, /* dest */
+		    gfx_pos_t x0, gfx_pos_t y0, /* src */
+		    gfx_pos_t w, gfx_pos_t h)
+{
+  switch (d->fmt)
+    _GFX_FMT_SWITCH(gfx_blit_overlap_nc, (d, x2, y2, x0, y0, w, h));
+}
+
+inline void
+gfx_blit_overlap_safe(const struct gfx_surface_s * __restrict__ d,
+		      gfx_pos_t x2, gfx_pos_t y2, /* dest */
+		      gfx_pos_t x0, gfx_pos_t y0, /* src */
+		      gfx_pos_t w, gfx_pos_t h)
+{
+  w &= 0xffff;
+  h &= 0xffff;
+  gfx_pos_t x1 = x0 + w - 1;
+  gfx_pos_t y1 = y0 + h - 1;
+  gfx_pos_t x3 = x2 + w - 1;
+  gfx_pos_t y3 = y2 + h - 1;
+
+  if (gfx_box_check(d, x0, y0, x1, y1) &&
+      gfx_box_check(d, x2, y2, x3, y3))
+    gfx_blit_overlap_nc(d, x2, y2, x0, y0, w, h);
+}
+
+#endif
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/include/gfx/bytecode.h	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,196 @@
+/*  -*- c -*-
+
+    This file is part of MutekH.
+
+    MutekH is free software; you can redistribute it and/or modify it
+    under the terms of the GNU Lesser General Public License as
+    published by the Free Software Foundation; version 2.1 of the
+    License.
+
+    MutekH is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with MutekH; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+    02110-1301 USA.
+
+    Copyright Alexandre Becoulet <alexandre.becoulet@free.fr> (c) 2017
+
+*/
+
+#ifndef _GFX_BYTECODE_H_
+#define _GFX_BYTECODE_H_
+
+#include <gfx/gfx.h>
+
+struct bc_context_s;
+
+typedef uint32_t gfx_bc_surface_desc_t;
+typedef uint32_t gfx_bc_tilemap_desc_t;
+typedef uint32_t gfx_2dvector_t;
+
+/** @This packs 11 bits integer coordinates as a 32 bits value.
+@code r
+    in:                         XXX XXXXXXXX
+                                YYY YYYYYYYY
+    out: xxxxxyyy yyXXXYYY YYYYYYYY XXXXXXXX
+@end code
+*/
+#define GFX_XY(x, y) ( ((x) & 0x700) << 11 | ((y) & 0x7ff) << 8 | ((x) & 0xff) )
+
+/** @This packs Q11.5 fixed point coordinates as a 32 bits value
+for use in a bytecode programm.
+@code r
+    in:                     XXXXXXXXXXXxxxxx
+                            YYYYYYYYYYYyyyyy
+    out: xxxxxyyy yyXXXYYY YYYYYYYY XXXXXXXX
+@end code
+*/
+#define GFX_XYF(x, y) (((x) & 0x1f) << 27 | ((x) & 0x1fe0) >> 5 | ((x) & 0xe000) << 6 \
+                     | ((y) & 0x1f) << 22 | ((y) & 0xffe0) << 3)
+
+/** @This packs surface attributes as a 32 bits value for use in a
+    bytecode programm. */
+#define GFX_SURFACE(width, height, fmt) \
+	((width) << 20 | (height) << 8 | (fmt))
+
+/** @This packs tilemap attributes as a 32 bits value for use in a
+    bytecode programm. */
+#define GFX_TILEMAP(width, height, offset)              \
+	((width) << 20 | (height) << 8 | (offset))
+
+/** @internal
+@code r
+    in:  xxxxxyyy yyXXXYYY YYYYYYYY XXXXXXXX
+    out: YYYYYYYY YYYyyyyy XXXXXXXX XXXxxxxx
+@end code
+*/
+ALWAYS_INLINE gfx_2dvector_t gfx_vector_p2xy(gfx_2dvector_t p)
+{
+  return ((p & 0x7ff00) << 13)
+       | ((p & 0xff) << 5)
+       | ((p >> 6) & 0x1fe000)
+       | ((p >> 27) & 0x1f);
+}
+
+/** @internal
+@code r
+    in:  YYYYYYYY YYYyyyyy XXXXXXXX XXXxxxxx
+    out: xxxxxyyy yyXXXYYY YYYYYYYY XXXXXXXX
+@end code
+*/
+ALWAYS_INLINE gfx_2dvector_t gfx_vector_xy2p(gfx_2dvector_t p)
+{
+  return ((p >> 13) & 0x7ff00)
+       | ((p >> 5) & 0xff)
+       | ((p & 0x1fe000) << 6)
+       | ((p & 0x1f) << 27);
+}
+
+/** @internal
+@code r
+    out: xxxxxyyy yyXXXYYY YYYYYYYY XXXXXXXX
+@end code
+*/
+ALWAYS_INLINE gfx_2dvector_t gfx_vector_xy_2p(gfx_pos_t x, gfx_pos_t y)
+{
+  return (x & 0x1f) << 27 | (x & 0x1fe0) >> 5 | (x & 0xe000) << 6
+    |    (y & 0x1f) << 22 | (y & 0xffe0) << 3;
+}
+
+/** @internal
+@code r
+    in:  xxxxxyyy yyXXXYYY YYYYYYYY XXXXXXXX
+    out:                        XXX XXXXXXXX
+@end code
+*/
+ALWAYS_INLINE uint_fast16_t gfx_vector_xint(gfx_2dvector_t p)
+{
+  return ((p >> 11) & 0x700) | (p & 0xff);
+}
+
+/** @internal
+@code r
+    in:  xxxxxyyy yyXXXYYY YYYYYYYY XXXXXXXX
+    out:                  XXX XXXXXXXX xxxxx
+@end code
+*/
+ALWAYS_INLINE uint_fast16_t gfx_vector_x(gfx_2dvector_t p)
+{
+  return ((p >> 6) & 0xe000) | ((p & 0xff) << 5) | ((p >> 27) & 0x1f);
+}
+
+/** @internal
+@code r
+    in:                         XXX XXXXXXXX
+    out: xxxxxyyy yyXXXYYY YYYYYYYY XXXXXXXX
+@end code
+*/
+ALWAYS_INLINE gfx_2dvector_t gfx_vector_xint2p(uint_fast16_t x)
+{
+  return ((x & 0x700) << 11) | (x & 0xff);
+}
+
+/** @internal
+@code r
+    in:  xxxxxyyy yyXXXYYY YYYYYYYY XXXXXXXX
+    out:                        YYY YYYYYYYY
+@end code
+*/
+ALWAYS_INLINE uint_fast16_t gfx_vector_yint(gfx_2dvector_t p)
+{
+  return (p >> 8) & 0x7ff;
+}
+
+/** @internal
+@code r
+    in:  xxxxxyyy yyXXXYYY YYYYYYYY XXXXXXXX
+    out:                  YYY YYYYYYYY yyyyy
+@end code
+*/
+ALWAYS_INLINE uint_fast16_t gfx_vector_y(gfx_2dvector_t p)
+{
+  return ((p >> 3) & 0xffe0) | ((p >> 22) & 0x1f);
+}
+
+/** @internal
+@code r
+    in:                         YYY YYYYYYYY
+    out: xxxxxyyy yyXXXYYY YYYYYYYY XXXXXXXX
+@end code
+*/
+ALWAYS_INLINE gfx_2dvector_t gfx_vector_yint2p(uint_fast16_t y)
+{
+  return (y & 0x7ff) << 8;
+}
+
+/** @This contains the fours surfaces and the single tilemap available
+    to the bytecode program. */
+struct gfx_bc_context_s
+{
+  /** surface pool */
+  struct gfx_surface_s s[4];
+
+  /** currently selected tilemap */
+  struct gfx_tilemap_s tilemap;
+
+  /** current drawing attribute */
+  gfx_pixel_t attr;
+};
+
+/** @This initializes a gfx bytecode context. */
+void gfx_bc_init(struct gfx_bc_context_s *ctx);
+
+/** @This executes a single gfx bytecode operation. */
+error_t gfx_bc_run(struct bc_context_s *vm,
+		   struct gfx_bc_context_s *ctx,
+		   uint16_t op);
+
+/** @This tests if the return status of @ref bc_run is gfx opcode. */
+#define GFX_BC_IS_GFX_OP(op) (((op) & 0xf000) > 0x8000)
+
+#endif
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/include/gfx/circle.t	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,310 @@
+/*  -*- c -*-
+
+    This file is part of MutekH.
+
+    MutekH is free software; you can redistribute it and/or modify it
+    under the terms of the GNU Lesser General Public License as
+    published by the Free Software Foundation; version 2.1 of the
+    License.
+
+    MutekH is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with MutekH; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+    02110-1301 USA.
+
+    Copyright Alexandre Becoulet <alexandre.becoulet@free.fr> (c) 2017
+
+*/
+
+#ifndef _GFX_CIRCLE_H_
+#define _GFX_CIRCLE_H_
+
+#include <gfx/gfx.h>
+
+/* backslash-region-begin */
+#define _GFX_CIRCLE_DRAW(_put_pixel, s, xc, yc, r, oct)
+{
+  gfx_pos_t x = r;
+  gfx_pos_t y = 0;
+  gfx_pos_t m = 0;
+  gfx_pos_t e, be, bm;
+
+  while (x >= y)
+    {
+      gfx_addr_t ypx = gfx_yaddr(s, yc + x);
+      gfx_addr_t ymx = gfx_yaddr(s, yc - x);
+      gfx_addr_t ypy = gfx_yaddr(s, yc + y);
+      gfx_addr_t ymy = gfx_yaddr(s, yc - y);
+
+      if (oct & 1)
+        {
+          _put_pixel(xc + x, ypy);
+          _put_pixel(xc + y, ypx);
+        }
+      if (oct & 2)
+        {
+          _put_pixel(xc - x, ypy);
+          _put_pixel(xc - y, ypx);
+        }
+      if (oct & 4)
+        {
+          _put_pixel(xc - x, ymy);
+          _put_pixel(xc - y, ymx);
+        }
+      if (oct & 8)
+        {
+          _put_pixel(xc + x, ymy);
+          _put_pixel(xc + y, ymx);
+        }
+
+      m = m - (y << 1) - 1;
+      e = m + (x << 1) - 1;
+      bm = m >> 31;
+      be = e >> 31;
+      if ((e ^ be) - be < (m ^ bm) - bm)          /* if (abs(e) < abs(m)) */
+        {
+          m = e;
+          x--;
+        }
+      y++;
+    }
+}
+/* backslash-region-end */
+
+/* backslash-region-begin */
+#define _GFX_CIRCLE_INFILL_DRAW(_hline, s, xc, yc, r, oct)
+{
+  gfx_pos_t x = r, y = 0, m = 0;
+  gfx_pos_t e, be, bm;
+
+  while (x >= y)
+    {
+      gfx_addr_t ypy = gfx_yaddr(s, yc + y);
+      gfx_addr_t ymy = gfx_yaddr(s, yc - y);
+
+      if (oct & 1)
+        _hline(xc, xc + x, ypy);
+      if (oct & 2)
+        _hline(xc - x, xc, ypy);
+      if (oct & 4)
+        _hline(xc - x, xc, ymy);
+      if (oct & 8)
+        _hline(xc, xc + x, ymy);
+
+      m = m - (y << 1) - 1;
+      e = m + (x << 1) - 1;
+      bm = m >> 31;
+      be = e >> 31;
+      if ((e ^ be) - be < (m ^ bm) - bm)          /* if (abs(e) < abs(m)) */
+        {
+          m = e;
+          x--;
+        }
+      y++;
+    }
+
+  goto dr;
+  while (x >= 0)
+    {
+      m = m + (x << 1) - 1;
+      e = m - (y << 1) - 1;
+      bm = m >> 31;
+      be = e >> 31;
+      x--;
+      if ((e ^ be) - be < (m ^ bm) - bm)          /* if (abs(e) < abs(m)) */
+        {
+          m = e;
+          y++;
+        dr:;
+          gfx_addr_t ypy = gfx_yaddr(s, yc + y);
+          gfx_addr_t ymy = gfx_yaddr(s, yc - y);
+
+          if (oct & 1)
+            _hline(xc, xc + x, ypy);
+          if (oct & 2)
+            _hline(xc - x, xc, ypy);
+          if (oct & 4)
+            _hline(xc - x, xc, ymy);
+          if (oct & 8)
+            _hline(xc, xc + x, ymy);
+        }
+    }
+}
+/* backslash-region-end */
+
+/* backslash-region-begin */
+#define _GFX_CIRCLE_OUTFILL_DRAW(_hline, s, xc, yc, r, oct)
+{
+  gfx_pos_t x = r, y = 0, m = 0;
+  gfx_pos_t e, be, bm;
+
+  while (x >= y)
+    {
+      gfx_addr_t ypy = gfx_yaddr(s, yc + y);
+      gfx_addr_t ymy = gfx_yaddr(s, yc - y);
+
+      if (x < r)
+        {
+          if (oct & 1)
+            _hline(xc + x + 1, xc + r, ypy);
+          if (oct & 2)
+            _hline(xc - r, xc - x - 1, ypy);
+          if (oct & 4)
+            _hline(xc - r, xc - x - 1, ymy);
+          if (oct & 8)
+            _hline(xc + x + 1, xc + r, ymy);
+        }
+
+      m = m - (y << 1) - 1;
+      e = m + (x << 1) - 1;
+      bm = m >> 31;
+      be = e >> 31;
+      if ((e ^ be) - be < (m ^ bm) - bm)          /* if (abs(e) < abs(m)) */
+        {
+          m = e;
+          x--;
+        }
+      y++;
+    }
+
+  goto dr;
+  while (x >= 0)
+    {
+      m = m + (x << 1) - 1;
+      e = m - (y << 1) - 1;
+      bm = m >> 31;
+      be = e >> 31;
+      x--;
+      if ((e ^ be) - be < (m ^ bm) - bm)          /* if (abs(e) < abs(m)) */
+        {
+          m = e;
+          y++;
+        dr:;
+          gfx_addr_t ypy = gfx_yaddr(s, yc + y);
+          gfx_addr_t ymy = gfx_yaddr(s, yc - y);
+
+          if (x < r)
+            {
+              if (oct & 1)
+                _hline(xc + x + 1, xc + r, ypy);
+              if (oct & 2)
+                _hline(xc - r, xc - x - 1, ypy);
+              if (oct & 4)
+                _hline(xc - r, xc - x - 1, ymy);
+              if (oct & 8)
+                _hline(xc + x + 1, xc + r, ymy);
+            }
+        }
+    }
+}
+/* backslash-region-end */
+
+
+/* backslash-region-begin */
+#define _GFX_CIRCLE_PROTO(bpp, ppw, l2bpp, l2ppw, pm, ps, word_t)
+
+/** @internal */
+void
+gfx_draw_circle_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+                           gfx_pos_t xc, gfx_pos_t yc,
+                           gfx_pos_t r, uint8_t oct, gfx_pixel_t a);
+
+/** @internal */
+void
+gfx_draw_circle_infill_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+                                  gfx_pos_t xc, gfx_pos_t yc,
+                                  gfx_pos_t r, uint8_t oct, gfx_pixel_t a);
+
+/** @internal */
+void
+gfx_draw_circle_outfill_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+                                   gfx_pos_t xc, gfx_pos_t yc,
+                                   gfx_pos_t r, uint8_t oct, gfx_pixel_t a);
+/* backslash-region-end */
+
+_GFX_BPP_EXPAND(_GFX_CIRCLE_PROTO);
+
+
+/* backslash-region-begin */
+#define _GFX_CIRCLE_OPS(bpp, ppw, l2bpp, l2ppw, pm, ps, word_t)
+
+void
+gfx_draw_circle_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+                           gfx_pos_t xc, gfx_pos_t yc,
+                           gfx_pos_t r, uint8_t oct, gfx_pixel_t a)
+{
+    _GFX_CIRCLE_DRAW(gfx_put_pixel_nc_##l2bpp _GFX_PUT_PIXEL_ARGS, s, xc, yc, r, oct);
+}
+
+void
+gfx_draw_circle_infill_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+                                  gfx_pos_t xc, gfx_pos_t yc,
+                                  gfx_pos_t r, uint8_t oct, gfx_pixel_t a)
+{
+  if (gfx_circle_check(s, xc, yc, r, oct))
+    _GFX_CIRCLE_INFILL_DRAW(gfx_hline_nc_##l2bpp _GFX_PUT_PIXEL_ARGS, s, xc, yc, r, oct);
+}
+
+void
+gfx_draw_circle_outfill_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+                                   gfx_pos_t xc, gfx_pos_t yc,
+                                   gfx_pos_t r, uint8_t oct, gfx_pixel_t a)
+{
+  if (gfx_circle_check(s, xc, yc, r, oct))
+    _GFX_CIRCLE_OUTFILL_DRAW(gfx_hline_nc_##l2bpp _GFX_PUT_PIXEL_ARGS, s, xc, yc, r, oct);
+}
+/* backslash-region-end */
+
+
+/** @This checks that a circle fits in a surface. */
+inline bool_t
+gfx_circle_check(const struct gfx_surface_s * __restrict__ s,
+                 gfx_pos_t xc, gfx_pos_t yc, /* arc center xy */
+                 gfx_pos_t r,                /* radius */
+                 uint8_t oct)                /* octant mask */
+{
+  gfx_pos_t x0 = oct & 6  ? xc - r : xc;
+  gfx_pos_t y0 = oct & 12 ? yc - r : yc;
+  gfx_pos_t x1 = oct & 9  ? xc + r : xc;
+  gfx_pos_t y1 = oct & 3  ? yc + r : yc;
+
+  return gfx_xcheck(s, x0) && gfx_xcheck(s, x1) && gfx_ycheck(s, y0) && gfx_ycheck(s, y1);
+}
+
+inline void
+gfx_draw_circle_safe(const struct gfx_surface_s * __restrict__ s,
+                     gfx_pos_t xc, gfx_pos_t yc,
+                     gfx_pos_t r, uint8_t oct, gfx_pixel_t a)
+{
+  if (gfx_circle_check(s, xc, yc, r, oct))
+    switch (s->fmt)
+      _GFX_FMT_SWITCH(gfx_draw_circle_nc, (s, xc, yc, r, oct, a));
+}
+
+inline void
+gfx_draw_circle_infill_safe(const struct gfx_surface_s * __restrict__ s,
+                            gfx_pos_t xc, gfx_pos_t yc,
+                            gfx_pos_t r, uint8_t oct, gfx_pixel_t a)
+{
+  if (gfx_circle_check(s, xc, yc, r, oct))
+    switch (s->fmt)
+      _GFX_FMT_SWITCH(gfx_draw_circle_infill_nc, (s, xc, yc, r, oct, a));
+}
+
+inline void
+gfx_draw_circle_outfill_safe(const struct gfx_surface_s * __restrict__ s,
+                             gfx_pos_t xc, gfx_pos_t yc,
+                             gfx_pos_t r, uint8_t oct, gfx_pixel_t a)
+{
+  if (gfx_circle_check(s, xc, yc, r, oct))
+    switch (s->fmt)
+      _GFX_FMT_SWITCH(gfx_draw_circle_outfill_nc, (s, xc, yc, r, oct, a));
+}
+
+#endif
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/include/gfx/gfx.t	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,551 @@
+/*  -*- c -*-
+
+    This file is part of MutekH.
+
+    MutekH is free software; you can redistribute it and/or modify it
+    under the terms of the GNU Lesser General Public License as
+    published by the Free Software Foundation; version 2.1 of the
+    License.
+
+    MutekH is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with MutekH; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+    02110-1301 USA.
+
+    Copyright Alexandre Becoulet <alexandre.becoulet@free.fr> (c) 2017
+
+*/
+
+#ifndef _GFX_H_
+#define _GFX_H_
+
+#include <stdint.h>
+
+#ifndef __MUTEK__		/* mkdoc:skip */
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#define align_pow2_up(x, b) ((((x) - 1) | ((b) - 1)) + 1)
+#define align_pow2_down(x, b) ((x) & ~((b) - 1))
+#define bit_msb_index(x) (sizeof(int) * 8 - 1 - __builtin_clz(x))
+# define ALWAYS_INLINE __attribute__((always_inline)) inline
+#define GFX_ASSERT(...)
+typedef int8_t error_t;
+typedef uint8_t bool_t;
+
+/* specifies log2 of default surface bits per pixel */
+#define CONFIG_GFX_DEFAULT_L2BPP 0
+
+/* enable support for 1 bit per pixel surfaces */
+#define CONFIG_GFX_BPP0
+/* enable support for 2 bit per pixel surfaces */
+#define CONFIG_GFX_BPP1
+/* enable support for 4 bit per pixel surfaces */
+#define CONFIG_GFX_BPP2
+/* enable support for 8 bit per pixel surfaces */
+#define CONFIG_GFX_BPP3
+/* enable support for 16 bit per pixel surfaces */
+#define CONFIG_GFX_BPP4
+/* enable support for 32 bit per pixel surfaces */
+#define CONFIG_GFX_BPP5
+
+#define CONFIG_GFX_LOG2_WORD_WIDTH 2
+
+//#define CONFIG_GFX_LOG2_SURFACE_SIZE
+#define CONFIG_GFX_UNROLL
+
+#else                            /* MutekH */
+#define GFX_ASSERT(...) assert(__VA_ARGS__)
+#include <hexo/error.h>
+#include <hexo/types.h>
+#include <hexo/bit.h>
+#endif
+
+/** An integer type large enough to hold a pixel value in any format. */
+typedef uint32_t gfx_pixel_t;
+/** An signed integer type large enough to hold a pixel coordinate. */
+typedef int32_t  gfx_pos_t;
+/** An integer type large enough to address a word in the data storage of a surface. */
+typedef uint32_t gfx_addr_t;
+
+#include <gfx/math.h>
+
+/** @This specifies the various implemented pixel formats */
+enum gfx_surface_format
+{
+  /** see CONFIG_GFX_DEFAULT_L2BPP */
+  GFX_FMT_DEFAULT,
+  GFX_FMT_1BIT,
+  GFX_FMT_2BIT,
+  GFX_FMT_4BIT,
+  GFX_FMT_8BIT,
+  GFX_FMT_16BIT,
+  GFX_FMT_32BIT,
+};
+
+/** @This specifies the various implemented surface compression formats */
+enum gfx_surface_compress
+{
+  GFX_CMP_NONE,
+  GFX_CMP_RLE,
+  GFX_CMP_LZO,
+};
+
+/** @This specifies directions. */
+enum gfx_direction_e
+{
+  GFX_DIR_LEFT  = 0,
+  GFX_DIR_RIGHT = 1,
+  GFX_DIR_UP    = 2,
+  GFX_DIR_DOWN  = 3,
+};
+
+/** @internalmembers
+    @This is a surface descriptor.
+
+    A surface object has an associated user provided buffer used to
+    store the image data. The actual data format depends on the pixel
+    format and library configuration.
+
+    The image data is stored as an array of @ref gfx_word_t. Native
+    endianess is used. The word width used is defined by the @ref
+    #CONFIG_GFX_LOG2_WORD_WIDTH token.
+
+    @see gfx_surface_init
+*/
+struct gfx_surface_s
+{
+  /** pointer to surface data */
+  void *ptr;
+#ifdef CONFIG_GFX_LOG2_SURFACE_SIZE
+  /** ptr index mask */
+  gfx_addr_t mask;
+  /** log2 of row size in words */
+  uint8_t l2bw;
+  /** log2 of width in pixels */
+  uint8_t l2w;
+  /** log2 of height in pixels */
+  uint8_t l2h;
+#else
+  /** size in words */
+  gfx_addr_t bsize;
+  /** row size in words */
+  uint16_t bw;
+  /** width in pixels */
+  uint16_t w;
+  /** height in pixels */
+  uint16_t h;
+#endif
+  /** pixel format */
+  enum gfx_surface_format fmt;
+};
+
+/** @internalmembers
+    @This contains a @ref gfx_surface_s object along with information
+    about the set of tiles it contains. This allows extracting a single
+    tile for drawing on an other surface.
+
+    This is typically used to implement bitmap fonts.
+*/
+struct gfx_tilemap_s
+{
+  struct gfx_surface_s s;
+  /* log2 tiles per row */
+  uint8_t l2tpr;
+  /* tile width */
+  uint8_t tw;
+  /* tile height */
+  uint8_t th;
+  /* index of first tile in strings */
+  uint8_t offset;
+};
+
+/** @This initializes a tilemap object from a surface containing the
+    tile set. You need to specifies the resolution of a single tile.
+
+    The number of tiles on a single row of the tile set image is the
+    largest power of 2 that fits. They may be padding on the right if
+    you have some surface resolution constraints.
+
+    Example: You have configured the library to only support surfaces
+    with power of two resolution. You have a tile set image with a
+    resolution of 256x128 pixels and the resolution of a single tile
+    is 14x24. You then need to design your tile set image with 16
+    tiles per row and 32 pixels of padding at the end of each row.
+*/
+error_t gfx_tilemap_init(struct gfx_tilemap_s *t,
+			 const struct gfx_surface_s *s,
+			 gfx_pos_t tw, gfx_pos_t th,
+			 uint_fast8_t first);
+
+/** @This returns the address of the row of pixels at position @em y in a surface. */
+ALWAYS_INLINE gfx_addr_t
+gfx_yaddr(const struct gfx_surface_s * __restrict__ s,
+	  gfx_pos_t y)
+{
+#ifdef CONFIG_GFX_LOG2_SURFACE_SIZE
+  return y << s->l2bw;
+#else
+  return y * s->bw;
+#endif
+}
+
+/** @This returns the height of a surface in pixels. */
+ALWAYS_INLINE gfx_pos_t
+gfx_height(const struct gfx_surface_s * __restrict__ s)
+{
+#ifdef CONFIG_GFX_LOG2_SURFACE_SIZE
+  return 1 << s->l2h;
+#else
+  return s->h;
+#endif
+}
+
+/** @This performs a modulus on the @em y coordinate in a surface. */
+#ifndef CONFIG_GFX_LOG2_SURFACE_SIZE
+__attribute__((deprecated))
+#endif
+ALWAYS_INLINE gfx_pos_t
+gfx_ymod(const struct gfx_surface_s * __restrict__ s,
+	 gfx_pos_t y)
+{
+#ifdef CONFIG_GFX_LOG2_SURFACE_SIZE
+  return y & ((1 << s->l2h) - 1);
+#else
+  return y % s->h;
+#endif
+}
+
+/** @This returns the width of a surface in pixels. */
+ALWAYS_INLINE gfx_pos_t
+gfx_width(const struct gfx_surface_s * __restrict__ s)
+{
+#ifdef CONFIG_GFX_LOG2_SURFACE_SIZE
+  return 1 << s->l2w;
+#else
+  return s->w;
+#endif
+}
+
+/** @This performs a modulus on the @em x coordinate in a surface. */
+#ifndef CONFIG_GFX_LOG2_SURFACE_SIZE
+__attribute__((deprecated))
+#endif
+ALWAYS_INLINE gfx_pos_t
+gfx_xmod(const struct gfx_surface_s * __restrict__ s,
+	 gfx_pos_t x)
+{
+#ifdef CONFIG_GFX_LOG2_SURFACE_SIZE
+  return x & ((1 << s->l2w) - 1);
+#else
+  return x % s->w;
+#endif
+}
+
+/** @This returns false if the x coordinate falls outside the surface */
+ALWAYS_INLINE bool_t
+gfx_xcheck(const struct gfx_surface_s * __restrict__ s,
+	   gfx_pos_t x)
+{
+#ifdef CONFIG_GFX_LOG2_SURFACE_SIZE
+  return !(x >> s->l2w);
+#else
+  return (gfx_addr_t)x < (gfx_addr_t)s->w;
+#endif
+}
+
+/** @This returns false if the y coordinate falls outside the surface */
+ALWAYS_INLINE bool_t
+gfx_ycheck(const struct gfx_surface_s * __restrict__ s,
+	   gfx_pos_t y)
+{
+#ifdef CONFIG_GFX_LOG2_SURFACE_SIZE
+  return !(y >> s->l2h);
+#else
+  return (gfx_addr_t)y < (gfx_addr_t)s->h;
+#endif
+}
+
+/** @internal @This clamps the pixel address so that it always fall in the surface. */
+ALWAYS_INLINE gfx_addr_t
+gfx_xymod(const struct gfx_surface_s * __restrict__ s,
+          gfx_addr_t a)
+{
+#ifdef CONFIG_GFX_LOG2_SURFACE_SIZE
+  return a & s->mask;
+#else
+  return a < s->bsize ? a : 0;
+#endif
+}
+
+/** @This returns false if the box falls outside the surface */
+inline bool_t
+gfx_box_check(const struct gfx_surface_s * __restrict__ s,
+	      gfx_pos_t x0, gfx_pos_t y0,
+	      gfx_pos_t x1, gfx_pos_t y1)
+{
+  return gfx_xcheck(s, x0) &&
+         gfx_xcheck(s, x1) &&
+         gfx_ycheck(s, y0) &&
+         gfx_ycheck(s, y1);
+}
+
+/** @This prevents coordinates from falling outside a surface either
+    by mangling the values or returns an error. */
+__attribute__((warn_unused_result))
+inline bool_t
+gfx_box_safe(const struct gfx_surface_s * __restrict__ s,
+             gfx_pos_t *x0, gfx_pos_t *y0,
+             gfx_pos_t *x1, gfx_pos_t *y1)
+{
+#ifdef CONFIG_GFX_LOG2_SURFACE_SIZE
+  gfx_pos_t xm = ((1 << s->l2w) - 1);
+  *x0 &= xm;
+  *x1 &= xm;
+  gfx_pos_t ym = ((1 << s->l2h) - 1);
+  *y0 &= ym;
+  *y1 &= ym;
+  return 1;
+#else
+  return gfx_box_check(s, *x0, *y0, *x1, *y1);
+#endif
+}
+
+#define _GFX_CPP1(a) a
+#define _GFX_CPP3(a, b, c) a##b##c
+#define __GFX_CPP3(a, b, c) _GFX_CPP3(a, b, c)
+
+#define _GFX_SWAP(a, b) do { typeof(a) _t = (a); (a) = (b); (b) = _t; } while (0)
+
+#ifdef CONFIG_GFX_BPP0
+# define _GFX_BPP0(...) __VA_ARGS__
+#elif CONFIG_GFX_DEFAULT_L2BPP == 0
+# error CONFIG_GFX_DEFAULT_L2BPP == 0 requires CONFIG_GFX_BPP0
+#else
+# define _GFX_BPP0(...)
+#endif
+
+#ifdef CONFIG_GFX_BPP1
+# define _GFX_BPP1(...) __VA_ARGS__
+#elif CONFIG_GFX_DEFAULT_L2BPP == 1
+# error CONFIG_GFX_DEFAULT_L2BPP == 1 requires CONFIG_GFX_BPP1
+#else
+# define _GFX_BPP1(...)
+#endif
+
+#ifdef CONFIG_GFX_BPP2
+# define _GFX_BPP2(...) __VA_ARGS__
+#elif CONFIG_GFX_DEFAULT_L2BPP == 2
+# error CONFIG_GFX_DEFAULT_L2BPP == 2 requires CONFIG_GFX_BPP2
+#else
+# define _GFX_BPP2(...)
+#endif
+
+#ifdef CONFIG_GFX_BPP3
+# define _GFX_BPP3(...) __VA_ARGS__
+#elif CONFIG_GFX_DEFAULT_L2BPP == 3
+# error CONFIG_GFX_DEFAULT_L2BPP == 3 requires CONFIG_GFX_BPP3
+#else
+# define _GFX_BPP3(...)
+#endif
+
+#ifdef CONFIG_GFX_BPP4
+# define _GFX_BPP4(...) __VA_ARGS__
+#elif CONFIG_GFX_DEFAULT_L2BPP == 4
+# error CONFIG_GFX_DEFAULT_L2BPP == 4 requires CONFIG_GFX_BPP4
+#else
+# define _GFX_BPP4(...)
+#endif
+
+#ifdef CONFIG_GFX_BPP5
+# define _GFX_BPP5(...) __VA_ARGS__
+#elif CONFIG_GFX_DEFAULT_L2BPP == 5
+# error CONFIG_GFX_DEFAULT_L2BPP == 5 requires CONFIG_GFX_BPP5
+#else
+# define _GFX_BPP5(...)
+#endif
+
+/* backslash-region-begin */
+#define _GFX_FMT_SWITCH(name, args)
+      {
+	_GFX_BPP0( case GFX_FMT_1BIT:
+		   return name##_0 args; );
+	_GFX_BPP1( case GFX_FMT_2BIT:
+		   return name##_1 args; );
+	_GFX_BPP2( case GFX_FMT_4BIT:
+		   return name##_2 args; );
+	_GFX_BPP3( case GFX_FMT_8BIT:
+		   return name##_3 args; );
+	_GFX_BPP4( case GFX_FMT_16BIT:
+		   return name##_4 args; );
+	_GFX_BPP5( case GFX_FMT_32BIT:
+		   return name##_5 args; );
+      default:
+	return __GFX_CPP3(name, _, CONFIG_GFX_DEFAULT_L2BPP) args;
+      }
+/* backslash-region-end */
+
+/*  The _GFX_BPP_EXPAND macro expands a macro containing some template
+    code which depends on the number of bits per pixel.
+
+    The @em ops macro must take the following arguments:
+      bits per pixel:  bpp
+      pixels per word: ppw
+      log2(bpp):       l2bpp
+      log2(ppw):       l2ppw
+      pixel mask:      pm
+      pixs=1 in word:  ps
+      word type:       word_t
+*/
+
+#if CONFIG_GFX_LOG2_WORD_WIDTH == 0 /* mkdoc:skip */
+typedef uint8_t gfx_word_t;
+/* backslash-region-begin */
+#define _GFX_BPP_EXPAND(ops)
+_GFX_BPP0(ops(1, 8, 0, 3, 0x01, 0xff, gfx_word_t))
+_GFX_BPP1(ops(2, 4, 1, 2, 0x03, 0x55, gfx_word_t))
+_GFX_BPP2(ops(4, 2, 2, 1, 0x0f, 0x11, gfx_word_t))
+_GFX_BPP3(ops(8, 1, 3, 0, 0xff, 0x01, gfx_word_t))
+/* backslash-region-end */
+# undef _GFX_BPP4
+# define _GFX_BPP4(...)
+# undef _GFX_BPP5
+# define _GFX_BPP5(...)
+
+#elif CONFIG_GFX_LOG2_WORD_WIDTH == 1 /* mkdoc:skip */
+typedef uint16_t gfx_word_t;
+/* backslash-region-begin */
+#define _GFX_BPP_EXPAND(ops)
+_GFX_BPP0(ops(1,  16, 0, 4, 0x0001, 0xffff, gfx_word_t))
+_GFX_BPP1(ops(2,  8,  1, 3, 0x0003, 0x5555, gfx_word_t))
+_GFX_BPP2(ops(4,  4,  2, 2, 0x000f, 0x1111, gfx_word_t))
+_GFX_BPP3(ops(8,  2,  3, 1, 0x00ff, 0x0101, gfx_word_t))
+_GFX_BPP4(ops(16, 1,  4, 0, 0xffff, 0x0001, gfx_word_t))
+# undef _GFX_BPP5
+# define _GFX_BPP5(...)
+/* backslash-region-end */
+
+#elif CONFIG_GFX_LOG2_WORD_WIDTH == 2 /* mkdoc:skip */
+typedef uint32_t gfx_word_t;
+/* backslash-region-begin */
+#define _GFX_BPP_EXPAND(ops)
+_GFX_BPP0(ops(1,  32, 0, 5, 0x00000001, 0xffffffff, gfx_word_t))
+_GFX_BPP1(ops(2,  16, 1, 4, 0x00000003, 0x55555555, gfx_word_t))
+_GFX_BPP2(ops(4,  8,  2, 3, 0x0000000f, 0x11111111, gfx_word_t))
+_GFX_BPP3(ops(8,  4,  3, 2, 0x000000ff, 0x01010101, gfx_word_t))
+_GFX_BPP4(ops(16, 2,  4, 1, 0x0000ffff, 0x00010001, gfx_word_t))
+_GFX_BPP5(ops(32, 1,  5, 0, 0x00ffffff, 0x00000001, gfx_word_t))
+/* backslash-region-end */
+
+#else
+# error word width not supported
+#endif
+
+/** @internal @This contains properties of a pixel format. */
+struct gfx_fmt_desc_s
+{
+  /* log2 bits per pixel */
+  uint8_t l2bpp;
+  /* log2 pixels per word */
+  uint8_t l2ppw;
+  /* pixel value mask */
+  gfx_pixel_t pm;
+};
+
+/** @This initializes a surface with specified log2 width, log2 height
+    and format. The surface data is located at @tt {data + offset} and
+    is checked to be within @tt {data + size}. */
+error_t gfx_surface_init(struct gfx_surface_s *s, gfx_word_t *data,
+                         size_t bytes, gfx_pos_t w, gfx_pos_t h,
+                         enum gfx_surface_format fmt);
+
+/** @This computes the size in bits needed to store the surface data.
+    This may not be byte aligned. @see gfx_surface_bytes */
+error_t gfx_surface_bits(size_t *bits, gfx_pos_t w, gfx_pos_t h,
+			 enum gfx_surface_format fmt);
+
+/** @This computes the size in bytes needed to store the surface data. */
+error_t gfx_surface_bytes(size_t *bytes, gfx_pos_t w, gfx_pos_t h,
+			  enum gfx_surface_format fmt);
+
+/** @This initializes a surface with a dummy single pixel storage. */
+void
+gfx_surface_dummy(struct gfx_surface_s * __restrict__ s);
+
+/** @This returns a pointer to the surface storage. */
+ALWAYS_INLINE gfx_word_t *
+gfx_surface_data(const struct gfx_surface_s * __restrict__ s)
+{
+  return s->ptr;
+}
+
+/* backslash-region-begin */
+#define _GFX_BPP_L2PPW(bpp, ppw, l2bpp, l2ppw, pm, ps, word_t)
+enum {
+  _GFX_BPP##l2bpp##_L2PPW = l2ppw,
+  _GFX_BPP##l2bpp##_PM = pm
+};
+/* backslash-region-end */
+_GFX_BPP_EXPAND(_GFX_BPP_L2PPW);
+
+/** @internal @This contains properties all supported pixel formats. */
+static const struct gfx_fmt_desc_s gfx_fmt_desc[16] = {
+  {
+    .l2bpp = CONFIG_GFX_DEFAULT_L2BPP,
+    .l2ppw = __GFX_CPP3(_GFX_BPP, CONFIG_GFX_DEFAULT_L2BPP, _L2PPW),
+    .pm = __GFX_CPP3(_GFX_BPP, CONFIG_GFX_DEFAULT_L2BPP, _PM),
+  },
+  {
+    _GFX_BPP0( .l2bpp = 0,
+               .l2ppw = _GFX_BPP0_L2PPW,
+               .pm = _GFX_BPP0_PM )
+  },
+  {
+    _GFX_BPP1( .l2bpp = 1,
+               .l2ppw = _GFX_BPP1_L2PPW,
+               .pm = _GFX_BPP1_PM )
+  },
+  {
+    _GFX_BPP2( .l2bpp = 2,
+               .l2ppw = _GFX_BPP2_L2PPW,
+               .pm = _GFX_BPP2_PM )
+  },
+  {
+    _GFX_BPP3( .l2bpp = 3,
+               .l2ppw = _GFX_BPP3_L2PPW,
+               .pm = _GFX_BPP3_PM )
+  },
+  {
+    _GFX_BPP4( .l2bpp = 4,
+               .l2ppw = _GFX_BPP4_L2PPW,
+               .pm = _GFX_BPP4_PM )
+  },
+  {
+    _GFX_BPP5( .l2bpp = 5,
+               .l2ppw = _GFX_BPP5_L2PPW,
+               .pm = _GFX_BPP5_PM )
+  },
+};
+
+/** @This draws a single tile from a tilemap on a surface. */
+bool_t
+gfx_draw_tile(const struct gfx_surface_s * __restrict__ s,
+              const struct gfx_tilemap_s * __restrict__ t,
+              uint_fast16_t tile, gfx_pos_t x, gfx_pos_t y, bool_t center);
+
+/** @This draws multiple tiles from a tilemap on a surface.
+    Tiles index are taken from a string. */
+void
+gfx_draw_tile_string(const struct gfx_surface_s * __restrict__ s,
+                     const struct gfx_tilemap_s * __restrict__ t,
+                     const uint8_t *str, uint_fast16_t size,
+                     gfx_pos_t x, gfx_pos_t y, enum gfx_direction_e dir,
+                     bool_t center);
+
+#endif
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/include/gfx/line.t	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,114 @@
+/*  -*- c -*-
+
+    This file is part of MutekH.
+
+    MutekH is free software; you can redistribute it and/or modify it
+    under the terms of the GNU Lesser General Public License as
+    published by the Free Software Foundation; version 2.1 of the
+    License.
+
+    MutekH is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with MutekH; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+    02110-1301 USA.
+
+    Copyright Alexandre Becoulet <alexandre.becoulet@free.fr> (c) 2017
+
+*/
+
+#ifndef _GFX_LINE_H_
+#define _GFX_LINE_H_
+
+#include <gfx/gfx.h>
+
+/* backslash-region-begin */
+#define _GFX_LINE_DRAW(_put_pixel, s, x0, x1, y0, y1)
+{
+  const uint_fast8_t bits = sizeof(gfx_pos_t) * 8 - 1;
+  gfx_pos_t dx = x1 - x0;
+  gfx_pos_t dy = y1 - y0;
+  gfx_pos_t sx = dx >> bits; /* sx = dx < 0 ? -1 : 0 */
+  gfx_pos_t sy = dy >> bits;
+  dx = ((2 * dx) ^ sx) - sx;    /* dx = dx < 0 ? 2 * -dx : 2 * dx */
+  dy = ((2 * dy) ^ sy) - sy;
+  sx |= 1;                      /* if (sx != -1) sx = 1 */
+  sy = gfx_yaddr(s, sy | 1);
+
+  gfx_pos_t x = x0;
+  gfx_addr_t yw = gfx_yaddr(s, y0);
+
+  if (dx > dy)
+    {
+      gfx_pos_t d = 2 * dy - dx;
+      while (1)
+	{
+	  _put_pixel(x, yw);
+	  if (x == x1)
+	    break;
+	  gfx_pos_t m = ~(d >> bits);
+	  d += dy - (dx & m);
+	  x += sx;
+	  yw += sy & m;
+	}
+    }
+  else
+    {
+      gfx_pos_t d = 2 * dx - dy;
+      gfx_addr_t ye = gfx_yaddr(s, y1);
+      while (1)
+	{
+	  _put_pixel(x, yw);
+	  if (yw == ye)
+	    break;
+	  gfx_pos_t m = ~(d >> bits);
+	  d += dx - (dy & m);
+	  x += sx & m;
+	  yw += sy;
+	}
+    }
+}
+/* backslash-region-end */
+
+/* backslash-region-begin */
+#define _GFX_LINE_PROTO(bpp, ppw, l2bpp, l2ppw, pm, ps, word_t)
+
+/** @internal */
+void
+gfx_draw_line_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+			 gfx_pos_t x0, gfx_pos_t y0,
+			 gfx_pos_t x1, gfx_pos_t y1, gfx_pixel_t a);
+/* backslash-region-end */
+
+_GFX_BPP_EXPAND(_GFX_LINE_PROTO);
+
+
+/* backslash-region-begin */
+#define _GFX_LINE_OPS(bpp, ppw, l2bpp, l2ppw, pm, ps, word_t)
+
+void
+gfx_draw_line_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+			   gfx_pos_t x0, gfx_pos_t y0,
+			   gfx_pos_t x1, gfx_pos_t y1, gfx_pixel_t a)
+{
+  _GFX_LINE_DRAW(gfx_put_pixel_nc_##l2bpp _GFX_PUT_PIXEL_ARGS,
+		 s, x0, x1, y0, y1);
+}
+/* backslash-region-end */
+
+
+inline void
+gfx_draw_line_safe(const struct gfx_surface_s * __restrict__ s,
+		   gfx_pos_t x0, gfx_pos_t y0,
+		   gfx_pos_t x1, gfx_pos_t y1, gfx_pixel_t a)
+{
+  if (gfx_box_safe(s, &x0, &y0, &x1, &y1))
+    switch (s->fmt)
+      _GFX_FMT_SWITCH(gfx_draw_line_nc, (s, x0, y0, x1, y1, a));
+}
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/include/gfx/math.h	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,73 @@
+/*  -*- c -*-
+
+    This file is part of MutekH.
+
+    MutekH is free software; you can redistribute it and/or modify it
+    under the terms of the GNU Lesser General Public License as
+    published by the Free Software Foundation; version 2.1 of the
+    License.
+
+    MutekH is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with MutekH; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+    02110-1301 USA.
+
+    Copyright Alexandre Becoulet <alexandre.becoulet@free.fr> (c) 2017
+
+*/
+
+#ifndef _GFX_MATH_H_
+#define _GFX_MATH_H_
+
+#include <stdint.h>
+
+#define GFX_SQRT2_2_Q0_32 0xb504f334 /* sqrt(2) / 2 in Q0.32 fixed point */
+
+const int32_t gfx_sin_table[129];
+
+/** @This returns a Q2.30 signed fixed point. angle period is 512 */
+inline int32_t gfx_sin(uint_fast16_t x)
+{
+  uint32_t neg = -((x >> 8) & 1);
+  uint32_t sym = (x >> 7) & 1;
+  uint_fast8_t q = ((x ^ -sym) & 127) + sym;
+  uint32_t c = gfx_sin_table[q];
+  return (c ^ neg) - neg;
+}
+
+/** @see gfx_sin */
+inline int32_t gfx_cos(uint_fast16_t x)
+{
+  return gfx_sin(x + 128);
+}
+
+inline uint32_t gfx_sqrt32(uint32_t x)
+{
+  uint_fast8_t n = (31 - __builtin_clz(x)) & ~1;
+  uint32_t t, r = 0;
+
+  for (t = 1 << n; t; t = t >> 2)
+    {
+      uint32_t q = r + t;
+      r = r >> 1;
+      if (x >= q)
+	{
+	  x -= q;
+	  r += t;
+	}
+    }
+
+  /* rounding */
+  if (x > r)
+    r++;
+
+  return r;
+}
+
+#endif
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/include/gfx/pixel.t	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,298 @@
+/*  -*- c -*-
+
+    This file is part of MutekH.
+
+    MutekH is free software; you can redistribute it and/or modify it
+    under the terms of the GNU Lesser General Public License as
+    published by the Free Software Foundation; version 2.1 of the
+    License.
+
+    MutekH is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with MutekH; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+    02110-1301 USA.
+
+    Copyright Alexandre Becoulet <alexandre.becoulet@free.fr> (c) 2017
+
+*/
+
+#ifndef _GFX_PIXEL_H_
+#define _GFX_PIXEL_H_
+
+#include <string.h>
+#include <gfx/gfx.h>
+
+/** @internal */
+#define _GFX_PUT_PIXEL_ARGS(...) (s, __VA_ARGS__, a)
+
+/** @internal Declare and precompute variables for horizontal line loop */
+/* backslash-region-begin */
+#define _GFX_HLINE_VARS(word_t, l2bpp, l2ppw, x0, x1, x0i, x1i, k0, m0, m1)
+gfx_addr_t x0i, x1i;
+word_t m0, m1;
+bool_t k0;
+{
+  uint_fast8_t bpw = 8 * sizeof(word_t);
+  gfx_pos_t x1_ = x1 + 1;
+
+  gfx_addr_t x0f = (x0 << l2bpp) & (bpw - 1);
+  x0i = x0 >> l2ppw;
+  m0 = (1ULL << x0f) - 1;
+
+  gfx_addr_t x1f = (x1_ << l2bpp) & (bpw - 1);
+  x1i = x1_ >> l2ppw;
+  m1 = (1ULL << x1f) - 1;
+
+  k0 = x1i > x0i;
+  if (!k0)
+    m1 = m1 & ~m0;
+}
+/* backslash-region-end */
+
+/** @internal Horizontal line loop code */
+/* backslash-region-begin */
+#define _GFX_HLINE_LOOP(d, x0i, x1i, k0, m0, m1, v)
+{
+  gfx_addr_t x0j = x0i;
+  if (k0)
+    {
+      if (m0)
+        {
+          d[x0j] = (d[x0j] & m0) | (v & ~m0);
+          x0j++;
+        }
+
+      while (x0j < x1i)
+        d[x0j++] = v;
+    }
+
+  if (m1)
+    d[x0j] = (d[x0j] & ~m1) | (v & m1);
+}
+/* backslash-region-end */
+
+/* backslash-region-begin */
+#define _GFX_PIXEL_PROTO(bpp, ppw, l2bpp, l2ppw, pm, ps, word_t)
+
+/** @internal Change a pixel on a bpp bits surface, wrap pixel address
+    on position overflow */
+void
+gfx_put_pixel_safe_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+                           gfx_pos_t x, gfx_addr_t yw, gfx_pixel_t a);
+
+/** @internal Change a pixel on a bpp bits surface, generate bad
+    memory access on position overflow */
+void
+gfx_put_pixel_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+                         gfx_pos_t x, gfx_addr_t yw,
+			 gfx_pixel_t a);
+
+/** @internal Get pixel value on a bpp bits surface. Wrap pixel
+    address on position overflow. */
+gfx_pixel_t
+gfx_get_pixel_safe_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+                           gfx_pos_t x, gfx_addr_t yw);
+
+/** @internal Get pixel value on a bpp bits surface. Generate bad
+    memory access on position overflow. */
+gfx_pixel_t
+gfx_get_pixel_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+                         gfx_pos_t x, gfx_addr_t yw);
+
+/** @internal Change a pixels along a vertical line on a bpp bits surface. */
+void
+gfx_vline_safe_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+                       gfx_pos_t x, gfx_addr_t y0w, gfx_addr_t y1w,
+		       gfx_addr_t ybw, gfx_pixel_t a);
+
+/** @internal Change a pixels along an horizontal line on a bpp bits
+    surface. Generate bad memory access on position overflow. */
+void
+gfx_hline_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+                     gfx_pos_t x0, gfx_pos_t x1,
+		     gfx_addr_t yw, gfx_pixel_t a);
+
+/** @internal Change a pixels along a vertical line on a bpp bits
+    surface. Generate bad memory access on position overflow. */
+void
+gfx_vline_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+		     gfx_pos_t x, gfx_addr_t y0w, gfx_addr_t y1w,
+		     uint_fast32_t ybw, gfx_pixel_t a);
+
+/** @internal fill a rectangular area.
+    Generate bad memory access on position overflow. */
+void
+gfx_frect_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+		     gfx_pos_t x0, gfx_pos_t x1,
+                     gfx_pos_t y0w, gfx_pos_t y1w,
+                     gfx_pos_t ybw, gfx_pixel_t a);
+
+void
+gfx_clear_##l2bpp(const struct gfx_surface_s * __restrict__ s, gfx_pixel_t a);
+/* backslash-region-end */
+
+_GFX_BPP_EXPAND(_GFX_PIXEL_PROTO);
+
+
+/* backslash-region-begin */
+#define _GFX_PIXEL_OPS(bpp, ppw, l2bpp, l2ppw, pm, ps, word_t)
+
+extern inline void
+gfx_put_pixel_safe_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+                           gfx_pos_t x, gfx_addr_t yw, gfx_pixel_t a)
+{
+  word_t * __restrict__ d = s->ptr;
+  uint_fast8_t sh = (x & (ppw - 1)) << l2bpp;
+  word_t mask = (pm << sh);
+  gfx_addr_t i = (x >> l2ppw) + yw;
+  word_t *p = d + gfx_xymod(s, i);
+  *p = (*p & ~mask) | (a << sh);
+}
+
+extern inline void
+gfx_put_pixel_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+                         gfx_pos_t x, gfx_addr_t yw,
+			 gfx_pixel_t a)
+{
+  word_t * __restrict__ d = s->ptr;
+  uint_fast8_t sh = (x & (ppw - 1)) << l2bpp;
+  word_t mask = (pm << sh);
+  gfx_addr_t i = (x >> l2ppw) + yw;
+  word_t *p = d + i;
+  *p = (*p & ~mask) | (a << sh);
+}
+
+extern inline gfx_pixel_t
+gfx_get_pixel_safe_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+                           gfx_pos_t x, gfx_addr_t yw)
+{
+  const word_t *d = s->ptr;
+  uint_fast8_t sh = (x & (ppw - 1)) << l2bpp;
+  gfx_addr_t i = (x >> l2ppw) + yw;
+  const word_t *p = d + gfx_xymod(s, i);
+  return (*p >> sh) & pm;
+}
+
+extern inline gfx_pixel_t
+gfx_get_pixel_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+                         gfx_pos_t x, gfx_addr_t yw)
+{
+  const word_t *d = s->ptr;
+  uint_fast8_t sh = (x & (ppw - 1)) << l2bpp;
+  gfx_addr_t i = (x >> l2ppw) + yw;
+  const word_t *p = d + i;
+  return (*p >> sh) & pm;
+}
+
+extern inline void
+gfx_hline_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+		     gfx_pos_t x0, gfx_pos_t x1,
+		     gfx_addr_t yw, gfx_pixel_t a)
+{
+  GFX_ASSERT(x0 <= x1);
+  word_t * __restrict__ d = (word_t*)s->ptr + yw;
+  word_t v = a * ps;
+  _GFX_HLINE_VARS(word_t, l2bpp, l2ppw, x0, x1, x0i, x1i, k0, m0, m1);
+  _GFX_HLINE_LOOP(d, x0i, x1i, k0, m0, m1, v);
+}
+
+extern inline void
+gfx_vline_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+		     gfx_pos_t x, gfx_addr_t y0w, gfx_addr_t y1w,
+		     uint_fast32_t ybw, gfx_pixel_t a)
+{
+  while (y0w <= y1w)
+    {
+      gfx_put_pixel_nc_##l2bpp(s, x, y0w, a);
+      y0w += ybw;
+    }
+}
+
+extern inline void
+gfx_frect_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+		     gfx_pos_t x0, gfx_pos_t x1,
+                     gfx_pos_t y0w, gfx_pos_t y1w,
+                     gfx_pos_t ybw, gfx_pixel_t a)
+{
+  word_t * __restrict__ d = s->ptr;
+  word_t v = a * ps;
+  _GFX_HLINE_VARS(word_t, l2bpp, l2ppw, x0, x1, x0i, x1i, k0, m0, m1);
+
+  while (y0w <= y1w)
+    {
+      word_t *e = d + y0w;
+      _GFX_HLINE_LOOP(e, x0i, x1i, k0, m0, m1, v);
+      y0w += ybw;
+    }
+}
+
+void
+gfx_clear_##l2bpp(const struct gfx_surface_s * __restrict__ s, gfx_pixel_t a)
+{
+  word_t v = a * ps;
+  word_t *d = s->ptr;
+  size_t c = gfx_yaddr(s, gfx_height(s));
+
+  if (bpp <= 8)
+    {
+      memset(d, v, sizeof(word_t) * c);
+    }
+  else
+    {
+      word_t *e = d + c;
+      while (d < e)
+        *d++ = v;
+    }
+}
+/* backslash-region-end */
+
+inline gfx_pixel_t
+gfx_get_pixel_safe(const struct gfx_surface_s * __restrict__ s,
+		   gfx_pos_t x, gfx_pos_t y)
+{
+  switch (s->fmt)
+    _GFX_FMT_SWITCH(gfx_get_pixel_safe, (s, x, gfx_yaddr(s, y)));
+}
+
+inline void
+gfx_put_pixel_safe(const struct gfx_surface_s * __restrict__ s,
+		   gfx_pos_t x, gfx_pos_t y, gfx_pixel_t a)
+{
+  switch (s->fmt)
+    _GFX_FMT_SWITCH(gfx_put_pixel_safe, (s, x, gfx_yaddr(s, y), a));
+}
+
+inline void
+gfx_hline_safe(const struct gfx_surface_s * __restrict__ s,
+	       gfx_pos_t x0, gfx_pos_t x1, gfx_pos_t y, gfx_pixel_t a)
+{
+  if (gfx_xcheck(s, x0) && gfx_xcheck(s, x1) && gfx_ycheck(s, y))
+    switch (s->fmt)
+      _GFX_FMT_SWITCH(gfx_hline_nc, (s, x0, x1, gfx_yaddr(s, y), a));
+}
+
+inline void
+gfx_vline_safe(const struct gfx_surface_s * __restrict__ s,
+	       gfx_pos_t x, gfx_pos_t y0, gfx_pos_t y1, gfx_pixel_t a)
+{
+  if (gfx_xcheck(s, 0) && gfx_ycheck(s, y0) && gfx_ycheck(s, y1))
+    switch (s->fmt)
+      _GFX_FMT_SWITCH(gfx_vline_nc,
+                      (s, x, gfx_yaddr(s, y0),
+                       gfx_yaddr(s, y1), gfx_yaddr(s, 1), a));
+}
+
+inline void
+gfx_clear(const struct gfx_surface_s * __restrict__ s, gfx_pixel_t a)
+{
+  switch (s->fmt)
+    _GFX_FMT_SWITCH(gfx_clear, (s, a));
+}
+
+#endif
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/include/gfx/rect.t	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,284 @@
+/*  -*- c -*-
+
+    This file is part of MutekH.
+
+    MutekH is free software; you can redistribute it and/or modify it
+    under the terms of the GNU Lesser General Public License as
+    published by the Free Software Foundation; version 2.1 of the
+    License.
+
+    MutekH is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with MutekH; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+    02110-1301 USA.
+
+    Copyright Alexandre Becoulet <alexandre.becoulet@free.fr> (c) 2017
+
+*/
+
+#include <gfx/gfx.h>
+
+#ifndef _GFX_RECT_H_
+#define _GFX_RECT_H_
+
+/* backslash-region-begin */
+#define _GFX_RECT_DRAW(_hline, _vline)
+{
+  y0 = gfx_yaddr(s, y0);
+  y1 = gfx_yaddr(s, y1);
+  gfx_addr_t dy = gfx_yaddr(s, 1);
+
+  _hline(x0, x1, y0);
+  _hline(x0, x1, y1);
+  _vline(x0, y0, y1, dy);
+  _vline(x1, y0, y1, dy);
+}
+/* backslash-region-end */
+
+/* backslash-region-begin */
+#define _GFX_RECT_R_DRAW(_hline, _vline, _put_pixel)
+{
+  if (r)
+    {
+      gfx_pos_t w = x1 - x0;
+      gfx_pos_t h = y1 - y0;
+
+      /* clip corner radius */
+      if (r > (w >> 1))
+        r = w >> 1;
+      if (r > (h >> 1))
+        r = h >> 1;
+
+      gfx_pos_t x = r, y = 0, m = 0;
+      gfx_pos_t e, be, bm;
+      gfx_pos_t xa, xb, ya, yb;
+
+      while (y <= x)
+        {
+          xa = x0 + r - x;
+          xb = x1 - r + x;
+          ya = gfx_yaddr(s, y0 + r - y);
+          yb = gfx_yaddr(s, y1 - r + y);
+          _put_pixel(xa, ya);
+          _put_pixel(xb, ya);
+          _put_pixel(xa, yb);
+          _put_pixel(xb, yb);
+          xa = x0 + r - y;
+          xb = x1 - r + y;
+          ya = gfx_yaddr(s, y0 + r - x);
+          yb = gfx_yaddr(s, y1 - r + x);
+          _put_pixel(xa, ya);
+          _put_pixel(xb, ya);
+          _put_pixel(xa, yb);
+          _put_pixel(xb, yb);
+
+          m = m - (y << 1) - 1;
+          e = m + (x << 1) - 1;
+          bm = m >> 31;
+          be = e >> 31;
+          if ((e ^ be) - be < (m ^ bm) - bm)          /* if (abs(e) < abs(m)) */
+            {
+              m = e;
+              x--;
+            }
+          y++;
+        }
+    }
+
+  gfx_addr_t dy = gfx_yaddr(s, 1);
+  gfx_pos_t y2 = gfx_yaddr(s, y0 + r);
+  gfx_pos_t y3 = gfx_yaddr(s, y1 - r);
+  y0 = gfx_yaddr(s, y0);
+  y1 = gfx_yaddr(s, y1);
+
+  _hline(x0 + r, x1 - r, y0);
+  _hline(x0 + r, x1 - r, y1);
+  _vline(x0, y2, y3, dy);
+  _vline(x1, y2, y3, dy);
+}
+/* backslash-region-end */
+
+/* backslash-region-begin */
+#define _GFX_RECT_FR_DRAW(_frect, _hline)
+{
+  if (r)
+    {
+      gfx_pos_t w = x1 - x0;
+      gfx_pos_t h = y1 - y0;
+
+      /* clip corner radius */
+      if (r > (w >> 1))
+        r = w >> 1;
+      if (r > (h >> 1))
+        r = h >> 1;
+
+      gfx_pos_t x = r, y = 0, m = 0;
+      gfx_pos_t e, be, bm;
+
+      while (x >= y)
+        {
+          gfx_pos_t xa = x0 - x + r;
+          gfx_pos_t xb = x1 - r + x;
+          _hline(xa, xb, gfx_yaddr(s, y1 - r + y));
+          _hline(xa, xb, gfx_yaddr(s, y0 + r - y));
+
+          m = m - (y << 1) - 1;
+          e = m + (x << 1) - 1;
+          bm = m >> 31;
+          be = e >> 31;
+          if ((e ^ be) - be < (m ^ bm) - bm)          /* if (abs(e) < abs(m)) */
+            {
+              m = e;
+              x--;
+            }
+          y++;
+        }
+
+      goto dr;
+      while (x >= 0)
+        {
+          m = m + (x << 1) - 1;
+          e = m - (y << 1) - 1;
+          bm = m >> 31;
+          be = e >> 31;
+          x--;
+          if ((e ^ be) - be < (m ^ bm) - bm)          /* if (abs(e) < abs(m)) */
+            {
+              m = e;
+              y++;
+            dr:;
+              gfx_pos_t xa = x0 - x + r;
+              gfx_pos_t xb = x1 - r + x;
+              _hline(xa, xb, gfx_yaddr(s, y1 - r + y));
+              _hline(xa, xb, gfx_yaddr(s, y0 + r - y));
+            }
+        }
+
+      y0 += r + 1;
+      y1 -= r;
+    }
+
+  /* raw filled rect */
+  _frect(x0, x1, gfx_yaddr(s, y0), gfx_yaddr(s, y1), gfx_yaddr(s, 1));
+}
+/* backslash-region-end */
+
+/* backslash-region-begin */
+#define _GFX_RECT_PROTO(bpp, ppw, l2bpp, l2ppw, pm, ps, word_t)
+
+/** @internal */
+void
+gfx_draw_rect_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+                         gfx_pos_t x0, gfx_pos_t y0,
+                         gfx_pos_t x1, gfx_pos_t y1, gfx_pixel_t a);
+
+/** @internal */
+void
+gfx_draw_rect_r_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+                           gfx_pos_t x0, gfx_pos_t y0,
+                           gfx_pos_t x1, gfx_pos_t y1, gfx_pos_t r, gfx_pixel_t a);
+
+/** @internal */
+void
+gfx_draw_rect_f_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+			   gfx_pos_t x0, gfx_pos_t y0,
+			   gfx_pos_t x1, gfx_pos_t y1, gfx_pixel_t a);
+
+/** @internal */
+void
+gfx_draw_rect_fr_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+                            gfx_pos_t x0, gfx_pos_t y0,
+                            gfx_pos_t x1, gfx_pos_t y1, gfx_pos_t r, gfx_pixel_t a);
+/* backslash-region-end */
+
+_GFX_BPP_EXPAND(_GFX_RECT_PROTO);
+
+
+/* backslash-region-begin */
+#define _GFX_RECT_OPS(bpp, ppw, l2bpp, l2ppw, pm, ps, word_t)
+
+void
+gfx_draw_rect_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+			   gfx_pos_t x0, gfx_pos_t y0,
+			   gfx_pos_t x1, gfx_pos_t y1, gfx_pixel_t a)
+{
+  _GFX_RECT_DRAW(gfx_hline_nc_##l2bpp _GFX_PUT_PIXEL_ARGS,
+		 gfx_vline_nc_##l2bpp _GFX_PUT_PIXEL_ARGS);
+}
+
+void
+gfx_draw_rect_r_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+			     gfx_pos_t x0, gfx_pos_t y0,
+			     gfx_pos_t x1, gfx_pos_t y1, gfx_pos_t r, gfx_pixel_t a)
+{
+  _GFX_RECT_R_DRAW(gfx_hline_nc_##l2bpp _GFX_PUT_PIXEL_ARGS,
+		   gfx_vline_nc_##l2bpp _GFX_PUT_PIXEL_ARGS,
+		   gfx_put_pixel_nc_##l2bpp _GFX_PUT_PIXEL_ARGS);
+}
+
+void
+gfx_draw_rect_f_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+			   gfx_pos_t x0, gfx_pos_t y0,
+			   gfx_pos_t x1, gfx_pos_t y1, gfx_pixel_t a)
+{
+  gfx_frect_nc_##l2bpp(s, x0, x1,
+                       gfx_yaddr(s, y0), gfx_yaddr(s, y1), gfx_yaddr(s, 1), a);
+}
+
+void
+gfx_draw_rect_fr_nc_##l2bpp(const struct gfx_surface_s * __restrict__ s,
+			      gfx_pos_t x0, gfx_pos_t y0,
+			      gfx_pos_t x1, gfx_pos_t y1, gfx_pos_t r, gfx_pixel_t a)
+{
+  _GFX_RECT_FR_DRAW(gfx_frect_nc_##l2bpp _GFX_PUT_PIXEL_ARGS,
+		    gfx_hline_nc_##l2bpp _GFX_PUT_PIXEL_ARGS);
+}
+/* backslash-region-end */
+
+inline void
+gfx_draw_rect_safe(const struct gfx_surface_s * __restrict__ s,
+		   gfx_pos_t x0, gfx_pos_t y0,
+		   gfx_pos_t x1, gfx_pos_t y1, gfx_pixel_t a)
+{
+  if (gfx_box_safe(s, &x0, &y0, &x1, &y1))
+    switch (s->fmt)
+      _GFX_FMT_SWITCH(gfx_draw_rect_nc, (s, x0, y0, x1, y1, a));
+}
+
+inline void
+gfx_draw_rect_r_safe(const struct gfx_surface_s * __restrict__ s,
+		     gfx_pos_t x0, gfx_pos_t y0,
+		     gfx_pos_t x1, gfx_pos_t y1, gfx_pos_t r, gfx_pixel_t a)
+{
+  if (gfx_box_safe(s, &x0, &y0, &x1, &y1))
+    switch (s->fmt)
+      _GFX_FMT_SWITCH(gfx_draw_rect_r_nc, (s, x0, y0, x1, y1, r, a));
+}
+
+inline void
+gfx_draw_rect_f_safe(const struct gfx_surface_s * __restrict__ s,
+		     gfx_pos_t x0, gfx_pos_t y0,
+		     gfx_pos_t x1, gfx_pos_t y1, gfx_pixel_t a)
+{
+  if (gfx_box_safe(s, &x0, &y0, &x1, &y1))
+    switch (s->fmt)
+      _GFX_FMT_SWITCH(gfx_draw_rect_f_nc, (s, x0, y0, x1, y1, a));
+}
+
+inline void
+gfx_draw_rect_fr_safe(const struct gfx_surface_s * __restrict__ s,
+		      gfx_pos_t x0, gfx_pos_t y0,
+		      gfx_pos_t x1, gfx_pos_t y1, gfx_pos_t r, gfx_pixel_t a)
+{
+  if (gfx_box_safe(s, &x0, &y0, &x1, &y1))
+    switch (s->fmt)
+      _GFX_FMT_SWITCH(gfx_draw_rect_fr_nc, (s, x0, y0, x1, y1, r, a));
+}
+
+#endif
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/test/Makefile	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,19 @@
+
+#MUTEKH=~/mutekh/
+CSRC=bytecode.c ../gfx.c ../bytecode.c
+TSRC=../include/gfx/gfx.t ../include/gfx/pixel.t ../include/gfx/line.t ../include/gfx/rect.t ../include/gfx/circle.t ../include/gfx/arc.t ../include/gfx/blit.t
+HSRC=../include/gfx/bytecode.h ../include/gfx/math.h
+
+all: test_tile.out test_circle.out test_line.out test_rect.out test_arc.out test_blit.out test_scroll.out test_arith.out test
+
+%.h: %.t
+	perl backslash.pl $< $@
+
+%.out: %.bc bc_custom_gfx.pm
+	cpp -I ../include $< | perl $(MUTEKH)scripts/decl_filter.pl | perl $(MUTEKH)scripts/bc_asm.pl -w 2 -b blob -p . -o $@
+
+test: test.c $(CSRC) $(TSRC:.t=.h) $(HSRC)
+	gcc -Wall -Wsign-compare -include bytecode.h -I ../include -ggdb -O test.c $(CSRC) -lSDL -o test
+
+clean:
+	rm -f *.out *.out.adr test $(TSRC:.t=.h)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/test/backslash.pl	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,160 @@
+#!/usr/bin/perl
+
+# Copyright (C) 2009 Alexandre Becoulet
+# 
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+use strict;
+use integer;
+
+my $region_state = 0;
+my $block_state = 0;
+my $count = 0;
+my $loc = 1;
+
+my $fname_in = @ARGV[0];
+my $fname_out = @ARGV[1];
+
+open ( INFILE, "<".$fname_in ) or die "error: unable to open `$fname_in' input file\n";
+
+my $content = do { local $/; <INFILE> };
+
+close ( INFILE );
+
+open ( OUTFILE, ">".$fname_out ) or die "error: unable to open `$fname_out' output file\n";
+
+sub error
+{
+    my ( $msg ) = @_;
+
+    close ( OUTFILE );
+    unlink( $fname_out );
+    die $msg;
+}
+
+sub process_fornum
+{
+    my ( $var, $begin, $end, $text ) = @_;
+    my $res;
+
+    for (my $i = $begin; $i < $end; $i++)
+    {
+	my $t = $text;
+	$t =~ s/%$var%/$i/g;
+	$res .= $t;
+    }
+
+    return $res;
+}
+
+sub process_for
+{
+    my ( $var, $list, $text ) = @_;
+    my $res;
+
+    foreach my $i (split(/[\s,\\]+/, $list))
+    {
+	my $t = $text;
+	$i = '' if ($i eq '##'); 
+	$t =~ s/%$var%/$i/g;
+	$res .= $t;
+    }
+
+    return $res;
+}
+
+while ($content =~ s/\#\s*fornum \s+ (\w+) \s+ (\d+) \s+ (\d+)\s*?\n
+              ((?:(?!\#\s*for)(?!\#\s*endfor)[^\n]*\n)*)
+              \#\s*endfornum\s*?\n
+             /&process_fornum($1,$2,$3,$4)/gesx){}
+
+while ($content =~ s/\#\s*for \s+ (\w+) \s+ ([#\w\s\\]+)(?!\\)\n
+              ((?:(?!\#\s*for)(?!\#\s*endfor)[^\n]*\n)*)
+              \#\s*endfor\s*?\n
+             /&process_for($1,$2,$3)/gesx){}
+
+print OUTFILE
+"
+/* -*- buffer-read-only: 1 ; -*- */
+
+/*
+ * Generated file. ANY CHANGES WILL BE LOST!
+ */
+
+# 1 \"$fname_in\"
+";
+
+foreach my $line ( split ( "\n", $content ) )
+{
+    if ($line =~ /^\/\*\s*backslash-region-end\b/)
+    {
+	error( "$fname_in:$loc: error: no backslash region to terminate\n" ) if (not $region_state);
+	$region_state = 0;
+    }
+
+    $line =~ s/\s*$//;
+    $line =~ s/(^|[^\t]+)(\t+)/$1." " x (length($2) * 8 - (length($1) & 7))/ge;
+
+    $block_state = 0 if ($line eq "");
+
+    if ($region_state || $block_state)
+    {
+	$line =~ s/[\s\\]*$//;
+
+	if ( $line !~ /^\s*\#\s*define\b/ ) {
+	    print STDERR "$fname_in:$loc: warning: backslash region doesn't start with #define\n"
+		if ( $count == 1 );
+	} else {
+	    print STDERR "$fname_in:$loc: warning: #define in backslash region\n"
+		if ( $count > 1 );
+	}
+
+	my $tab_count = 9 - (length($line) / 8);
+
+	if (length($line) < 72)
+	{
+	    $line .= "\t" x (9 - (length($line) / 8)) . "\\";
+	}
+	else
+	{
+	    $line .= " \\";
+	}
+
+	$count++;
+    }
+
+    if ($line =~ /^\/\*\s*backslash-region-begin/)
+    {
+	error( "$fname_in:$loc: error: unterminated backslash region\n" ) if ($region_state || $block_state);
+	$region_state = 1;
+	$count = 1;
+    } 
+
+    elsif ($line =~ /^\/\*\s*backslash-block/)
+    {
+	error( "$fname_in:$loc: error: unterminated backslash region\n" ) if ($region_state || $block_state);
+	$block_state = 1;
+	$count = 1;
+    }
+
+    print OUTFILE $line."\n";
+    $loc++;
+}
+
+error( "$fname_in:$loc: error: unterminated backslash region at end of file\n" ) if ($region_state);
+
+close ( OUTFILE );
+
+exit 0;
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/test/bc_custom_gfx.pm	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,1 @@
+../bc_custom_gfx.pm
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/test/bytecode.c	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,1003 @@
+/*
+ * This file is part of MutekH.
+ * 
+ * MutekH is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 of the License.
+ * 
+ * MutekH is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with MutekH; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ * Alexandre Becoulet <alexandre.becoulet@free.fr>
+ */
+
+#include "bytecode.h"
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <stdio.h>
+
+static inline uint16_t load16_le(const uint16_t *x)
+{
+  const uint8_t *t = (void*)x;
+  return t[0] | t[1] << 8;
+}
+
+static inline uint16_t endian_swap16(uint16_t x)
+{
+  return (x >> 8) | (x << 8);
+}
+
+static inline uint32_t endian_swap32(uint32_t x)
+{
+  return (((x >> 24) & 0x000000ff) |
+	  ((x >> 8 ) & 0x0000ff00) |
+	  ((x << 8 ) & 0x00ff0000) |
+	  ((x << 24) & 0xff000000));
+}
+
+void
+bc_set_regs_va(struct bc_context_s *ctx, uint16_t mask, va_list ap)
+{
+  while (mask)
+    {
+      uint_fast8_t r = __builtin_ctz(mask);
+      ctx->v[r] = va_arg(ap, uintptr_t);
+      mask ^= 1 << r;
+    }
+}
+
+void
+bc_set_regs(struct bc_context_s *ctx, uint16_t mask, ...)
+{
+  va_list ap;
+  va_start(ap, mask);
+  bc_set_regs_va(ctx, mask, ap);
+  va_end(ap);
+}
+
+bc_error_t
+bc_desc_init(struct bc_descriptor_s *desc,
+             const void *code, size_t len,
+             enum bc_flags_s flags)
+{
+  assert(!((uintptr_t)code & 1));
+  desc->run = &bc_run_vm;
+  desc->code = code;
+  desc->flags = flags | len;
+  return 0;
+}
+
+bc_error_t
+bc_load(struct bc_descriptor_s *desc,
+        const uint8_t *blob, size_t len)
+{
+  if ((uintptr_t)blob & 1)
+    return -ENOTSUP;
+
+  if (len < /* header */ 4)
+    return -EINVAL;
+
+  uint32_t flags = blob[2] << 16 | blob[3] << 24 |
+                   blob[0] | blob[1] << 8;
+  size_t bytes = flags & BC_FLAGS_SIZEMASK;
+
+  if (len < /* header */ 4 + bytes)
+    return -EINVAL;
+
+  return bc_desc_init(desc, blob + 4, 0, flags);
+}
+
+void
+bc_init(struct bc_context_s *ctx,
+        const struct bc_descriptor_s *desc)
+{
+  ctx->vpc = desc->code;
+  ctx->mode = 0;
+  ctx->desc = desc;
+  ctx->sandbox = 0;
+  ctx->max_cycles = -1;
+  ctx->trace = 0;
+  ctx->trace_regs = 0;
+#if CONFIG_MUTEK_BYTECODE_BREAKPOINTS > 0
+  ctx->bp_mask = 0;
+  ctx->bp_skip = 0;
+#endif
+}
+
+void
+bc_init_sandbox(struct bc_context_s *ctx, const struct bc_descriptor_s *desc,
+                void *data_base, uint_fast8_t data_addr_bits,
+                uint_fast32_t max_cycles)
+{
+  ctx->vpc = desc->code;
+  ctx->mode = 0;
+  ctx->desc = desc;
+  ctx->sandbox = 1;
+  ctx->max_cycles = max_cycles;
+  if (data_addr_bits)
+    {
+      assert(data_addr_bits >= 3);
+      assert(((uintptr_t)data_base & 7) == 0);
+      ctx->data_base = (uintptr_t)data_base;
+      ctx->data_addr_mask = (1 << data_addr_bits) - 1;
+    }
+  else
+    {
+      static uint64_t dummy;
+      ctx->data_base = (uintptr_t)&dummy;
+      ctx->data_addr_mask = 7;
+    }
+  ctx->trace = 0;
+  ctx->trace_regs = 0;
+#if CONFIG_MUTEK_BYTECODE_BREAKPOINTS > 0
+  ctx->bp_mask = 0;
+  ctx->bp_skip = 0;
+#endif
+}
+
+static const char * bc_opname(uint16_t op)
+{
+  struct op_s
+  {
+    uint16_t   mask;
+    uint16_t   op;
+    const char *name, *name1;
+  };
+  static const struct op_s ops[] =
+  {
+    { 0x8000, 0x8000, "custom" },
+    { 0xffff, 0x0000, "end" },
+    { 0xffff, 0x0001, "dump" },
+    { 0xffff, 0x0002, "abort" },
+    { 0xffff, 0x0003, "die" },
+    { 0xfffc, 0x0008, "trace" },
+    { 0xfff0, 0x0000, "nop" },
+    { 0xf000, 0x0000, "add8" },
+    { 0xf000, 0x1000, "cst8" },
+    { 0xfff0, 0x2000, "ret" },
+    { 0xf00f, 0x2000, "jmp8" },
+    { 0xf000, 0x2000, "call8" },
+    { 0xf800, 0x3000, "loop" },
+    { 0xf800, 0x3800, "(un)pack/swap" },
+    { 0xff00, 0x4000, "(n)eq", "eq0" },
+    { 0xff00, 0x4100, "mov", "neq0" },
+    { 0xff00, 0x4200, "lt", "exts8" },
+    { 0xff00, 0x4300, "lts", "exts16" },
+    { 0xff00, 0x4400, "lteq", "exts32" },
+    { 0xff00, 0x4500, "lteqs" },
+    { 0xff00, 0x4600, "add" },
+    { 0xff00, 0x4700, "sub", "neg" },
+    { 0xff00, 0x4800, "or", "rand" },
+    { 0xff00, 0x4900, "xor", "ccall" },
+    { 0xff00, 0x4a00, "and" },
+    { 0xff00, 0x4b00, "sha", "not" },
+    { 0xff00, 0x4c00, "shl" },
+    { 0xff00, 0x4d00, "shr" },
+    { 0xff00, 0x4e00, "mul" },
+    { 0xff00, 0x4f00, "div", "msbs" },
+    { 0xfe00, 0x5000, "tstc" },
+    { 0xfe00, 0x5200, "tsts" },
+    { 0xfe00, 0x5400, "bitc" },
+    { 0xfe00, 0x5600, "bits" },
+    { 0xfe00, 0x5800, "shil" },
+    { 0xfe00, 0x5a00, "shir" },
+    { 0xfe00, 0x5c00, "shia" },
+    { 0xfe00, 0x5e00, "extz" },
+    { 0xf300, 0x6000, "ld" },
+    { 0xf300, 0x6100, "ldi" },
+    { 0xf300, 0x6100, "st" },
+    { 0xf300, 0x6300, "sti" },
+    { 0xf3f0, 0x7010, "mode" },
+    { 0xf3f0, 0x7000, "gaddr" },
+    { 0xf380, 0x7080, "cst" },
+    { 0xf3a0, 0x7020, "laddr" },
+    { 0xf3f0, 0x7040, "jmp" },
+    { 0xf3f0, 0x7050, "call" },
+    { 0xf300, 0x7200, "std" },
+    { 0xf300, 0x7100, "lde" },
+    { 0xf300, 0x7300, "ste" },
+    { 0x0000, 0x0000, "invalid" },
+  };
+  uint_fast8_t i;
+  for (i = 0; ; i++)
+    if ((op & ops[i].mask) == ops[i].op)
+      return !ops[i].name1 || ((op ^ (op >> 4)) & 15) ? ops[i].name : ops[i].name1;
+  return NULL;
+}
+
+static void bc_dump_op(const struct bc_context_s *ctx, const uint16_t *pc)
+{
+  const void *code = ctx->desc->code;
+  size_t size = ctx->desc->flags & BC_FLAGS_SIZEMASK;
+
+  fprintf(stderr, "bytecode: pc=%p (%u)", pc, (unsigned)((pc - (uint16_t*)code)));
+
+  if (pc >= (uint16_t*)code &&
+      (uint8_t*)pc < (uint8_t*)code + size &&
+      !((uintptr_t)pc & 1))
+    {
+      uint16_t op = load16_le(pc);
+      fprintf(stderr, ", opcode=%04x (%s)", op, bc_opname(op));
+    }
+
+  fprintf(stderr, ", mode=%u\n", ctx->mode);
+}
+
+static void bc_dump_regs(const struct bc_context_s *ctx)
+{
+  uint_fast8_t i;
+  for (i = 0; i < 16; i++)
+    fprintf(stderr, "r%02u=%" BC_REG_FORMAT "%c", i, ctx->v[i], (i + 1) % 4 ? ' ' : '\n');
+}
+
+static void bc_dump_pc(const struct bc_context_s *ctx, const uint16_t *pc)
+{
+  bc_dump_op(ctx, pc);
+  bc_dump_regs(ctx);
+}
+
+void bc_dump(const struct bc_context_s *ctx, bc_bool_t regs)
+{
+  bc_dump_op(ctx, (void*)(ctx->pc & (intptr_t)-2));
+  if (regs)
+    bc_dump_regs(ctx);
+}
+
+#define BC_PACK(n)				\
+static void bc_pack##n(void *t, uint_fast8_t c)	\
+{						\
+  const bc_reg_t *s = t;                        \
+  uint##n##_t *d = t;				\
+  uint_fast8_t i;				\
+  for (i = 0; i < c; i++)			\
+    d[i] = s[i];				\
+}
+
+#define BC_UNPACK(n)					\
+static void bc_unpack##n(void *t, uint_fast8_t c)	\
+{							\
+  const uint##n##_t *s = t;				\
+  bc_reg_t *d = t;                                      \
+  uint_fast8_t i;					\
+  for (i = c; i--; )					\
+    d[i] = s[i];					\
+}
+
+BC_PACK(8);
+BC_UNPACK(8);
+BC_PACK(16);
+BC_UNPACK(16);
+#if INT_REG_SIZE > 32 || defined(CONFIG_MUTEK_BYTECODE_VM64)
+BC_PACK(32);
+BC_UNPACK(32);
+#else
+# define bc_pack32(...)
+# define bc_unpack32(...)
+#endif
+
+static void bc_swap16(void *t, uint_fast8_t c)
+{
+  const bc_reg_t *s = t;
+  bc_reg_t *d = t;
+  uint_fast8_t i;
+  for (i = 0; i < c; i++)
+    d[i] = endian_swap16(s[i]);
+}
+
+static void bc_swap32(void *t, uint_fast8_t c)
+{
+  const bc_reg_t *s = t;
+  bc_reg_t *d = t;
+  uint_fast8_t i;
+  for (i = 0; i < c; i++)
+    d[i] = endian_swap32(s[i]);
+}
+
+__attribute__((noinline))
+static void bc_run_packing(struct bc_context_s *ctx,
+                           uint_fast8_t c, uint_fast8_t r,
+                           uint_fast8_t op)
+{
+  bc_reg_t *t = ctx->v + r;
+
+  switch (op)
+    {
+    case BC_OP_UNPACK8:
+      bc_unpack8(t, c);
+      break;
+    case BC_OP_UNPACK16LE:
+    case BC_OP_UNPACK16BE:
+      bc_unpack16(t, c);
+      break;
+#if INT_REG_SIZE > 32 || defined(CONFIG_MUTEK_BYTECODE_VM64)
+    case BC_OP_UNPACK32LE:
+    case BC_OP_UNPACK32BE:
+      bc_unpack32(t, c);
+      break;
+#endif
+    }
+
+  switch (op)
+    {
+    case BC_OP_SWAP16:
+#if !defined (CONFIG_CPU_ENDIAN_BIG)
+    case BC_OP_SWAP16BE:
+    case BC_OP_UNPACK16BE:
+    case BC_OP_PACK16BE:
+#else
+    case BC_OP_SWAP16LE:
+    case BC_OP_UNPACK16LE:
+    case BC_OP_PACK16LE:
+#endif
+      bc_swap16(t, c);
+      break;
+
+    case BC_OP_SWAP32:
+#if !defined (CONFIG_CPU_ENDIAN_BIG)
+    case BC_OP_SWAP32BE:
+    case BC_OP_UNPACK32BE:
+    case BC_OP_PACK32BE:
+#else
+    case BC_OP_SWAP32LE:
+    case BC_OP_UNPACK32LE:
+    case BC_OP_PACK32LE:
+#endif
+      bc_swap32(t, c);
+      break;
+    }
+
+  switch (op)
+    {
+    case BC_OP_PACK8:
+      bc_pack8(t, c);
+      break;
+    case BC_OP_PACK16LE:
+    case BC_OP_PACK16BE:
+      bc_pack16(t, c);
+      break;
+#if INT_REG_SIZE > 32 || defined(CONFIG_MUTEK_BYTECODE_VM64)
+    case BC_OP_PACK32LE:
+    case BC_OP_PACK32BE:
+      bc_pack32(t, c);
+      break;
+#endif
+    }
+}
+
+#define BC_DISPATCH(name) ((&&dispatch_##name - &&dispatch_begin))
+#define BC_DISPATCH_GOTO(index) goto *(&&dispatch_begin + dispatch[index])
+typedef int16_t bs_dispatch_t;
+
+#if (INT_REG_SIZE > 32 || defined(CONFIG_MUTEK_BYTECODE_VM64))
+# define BC_CLAMP32(x) do { if (ctx->sandbox) (x) = (uint32_t)(x); } while (0)
+#else
+# define BC_CLAMP32(x) do { } while (0)
+#endif
+
+bc_error_t bc_set_sandbox_pc(struct bc_context_s *ctx, uint32_t pc)
+{
+  assert(ctx->sandbox);
+  const struct bc_descriptor_s * __restrict__ desc = ctx->desc;
+  size_t size = desc->flags & BC_FLAGS_SIZEMASK;
+
+  if (pc >= size || pc & 1)
+    return -ERANGE;
+
+#if CONFIG_MUTEK_BYTECODE_BREAKPOINTS > 0
+  ctx->bp_skip = 0;
+#endif
+  ctx->vpc = (uint8_t*)ctx->desc->code + pc;
+  return 0;
+}
+
+void *
+bc_translate_addr(struct bc_context_s *ctx,
+                  bc_reg_t addr_, size_t size,
+                  bc_bool_t writable)
+{
+  const struct bc_descriptor_s * __restrict__ desc = ctx->desc;
+  uintptr_t addr = addr_;
+  uint32_t end = addr + size;
+
+  if (ctx->sandbox)
+    {
+      if (end < addr)
+	return NULL;
+
+      if (addr & 0x80000000)    /* rw data segment */
+	{
+	  uintptr_t m = ctx->data_addr_mask;
+
+	  addr &= m;
+	  if (addr + size > m)
+	    return NULL;
+
+	  addr += ctx->data_base;
+	}
+      else                      /* code segment */
+	{
+	  if (writable)
+	    return NULL;
+
+	  if (end > (desc->flags & BC_FLAGS_SIZEMASK))
+	    return NULL;
+
+	  addr += (uintptr_t)desc->code;
+	}
+    }
+
+  return (void*)addr;
+}
+
+__attribute__((noinline))
+static uint_fast8_t bc_run_ldst(const struct bc_descriptor_s * __restrict__ desc,
+                                struct bc_context_s *ctx, const uint16_t *pc,
+                                uint16_t op)
+{
+ dispatch_begin:;
+  bc_bool_t sandbox = ctx->sandbox;
+  bc_reg_t *dst = &ctx->v[op & 0xf], d = *dst;
+  op >>= 4;
+  bc_reg_t *addrp = &ctx->v[op & 0xf];
+  uintptr_t addr = *addrp;
+  op >>= 4;
+  uint_fast8_t inc = op & 1;
+  uint_fast8_t w = 1 << ((op >> 2) & 3);
+
+  if (op & 16)
+    {
+      if (inc)                  /* BC_LDnE/BC_STnE */
+        {
+          addr += (intptr_t)load16_le(pc);
+        }
+      else                      /* BC_STnD */
+        {
+          *addrp -= w;
+          BC_CLAMP32(*addrp);
+          addr = *addrp;
+        }
+    }
+  else if (inc)              /* BC_LDnI/BC_STnI */
+    {
+      *addrp += w;
+      BC_CLAMP32(*addrp);
+    }
+
+  if (sandbox)
+    {
+      if (addr & 0x80000000)    /* rw data segment */
+        {
+          addr &= ctx->data_addr_mask;
+          addr += ctx->data_base;
+        }
+      else                      /* code segment */
+        {
+          if (op & 2 /* store */)
+            return 1;
+
+          size_t s = desc->flags & BC_FLAGS_SIZEMASK;
+          if (addr + w > s)
+            return 1;
+
+          /* address translation */
+          addr += (uintptr_t)desc->code;
+        }
+
+      if (addr & (w - 1))       /* not aligned */
+        return 1;
+    }
+
+  do {
+    static const bs_dispatch_t dispatch[8] = {
+      BC_DISPATCH(LD8),      BC_DISPATCH(ST8),
+      BC_DISPATCH(LD16),     BC_DISPATCH(ST16),
+      BC_DISPATCH(LD32),     BC_DISPATCH(ST32),
+      BC_DISPATCH(LD64),     BC_DISPATCH(ST64),
+    };
+    BC_DISPATCH_GOTO((op >> 1) & 7);
+
+  dispatch_LD8:
+    d = *(uint8_t*)addr;
+    break;
+  dispatch_LD16:
+    d = *(uint16_t*)addr;
+#if defined (CONFIG_CPU_ENDIAN_BIG)
+    if (sandbox)
+      d = endian_swap16(d);
+#endif
+    break;
+  dispatch_LD32:
+    d = *(uint32_t*)addr;
+#if defined (CONFIG_CPU_ENDIAN_BIG)
+    if (sandbox)
+      d = endian_swap32(d);
+#endif
+    break;
+  dispatch_LD64:
+    if (sandbox)
+      return 1;
+    d = *(uint64_t*)addr;
+    break;
+  dispatch_ST8:
+    *(uint8_t*)addr = d;
+    return 0;
+  dispatch_ST16:
+#if defined (CONFIG_CPU_ENDIAN_BIG)
+    if (sandbox)
+      d = endian_swap16(d);
+#endif
+    *(uint16_t*)addr = d;
+    return 0;
+  dispatch_ST32:
+#if defined (CONFIG_CPU_ENDIAN_BIG)
+    if (sandbox)
+      d = endian_swap32(d);
+#endif
+    *(uint32_t*)addr = d;
+    return 0;
+  dispatch_ST64:
+    if (sandbox)
+      return 1;
+    *(uint64_t*)addr = d;
+    return 0;
+  } while (0);
+
+  *dst = d;
+  return 0;
+}
+
+__attribute__((noinline))
+static bc_bool_t bc_run_alu(struct bc_context_s *ctx, uint16_t op)
+{
+  dispatch_begin:;
+  bc_bool_t sandbox = ctx->sandbox;
+  uint8_t a = op & 15;
+  op >>= 4;
+  uint8_t b = op & 15;
+  bc_reg_t *dstp = &ctx->v[a];
+  bc_reg_t dst = *dstp;
+  bc_reg_t *srcp = &ctx->v[b];
+  bc_reg_t src = *srcp;
+
+  do {
+    static const bs_dispatch_t dispatch[32] = {
+      BC_DISPATCH(EQ),         BC_DISPATCH(EQ0),
+      BC_DISPATCH(MOV),        BC_DISPATCH(NEQ0),
+      BC_DISPATCH(LT),         BC_DISPATCH(EXTS8),
+      BC_DISPATCH(LTS),        BC_DISPATCH(EXTS16),
+      BC_DISPATCH(LTEQ),       BC_DISPATCH(EXTS32),
+      BC_DISPATCH(LTEQS),      BC_DISPATCH(RES),
+      BC_DISPATCH(ADD),        BC_DISPATCH(RES),
+      BC_DISPATCH(SUB),        BC_DISPATCH(NEG),
+      BC_DISPATCH(OR),         BC_DISPATCH(RAND),
+      BC_DISPATCH(XOR),        BC_DISPATCH(CCALL),
+      BC_DISPATCH(AND),        BC_DISPATCH(RES),
+      BC_DISPATCH(SHA),        BC_DISPATCH(NOT),
+      BC_DISPATCH(SHL),        BC_DISPATCH(RES),
+      BC_DISPATCH(SHR),        BC_DISPATCH(RES),
+      BC_DISPATCH(MUL),        BC_DISPATCH(MUL),
+      BC_DISPATCH(DIV),        BC_DISPATCH(MSBS)
+    };
+    BC_DISPATCH_GOTO(((op >> 3) & 0x01e) | (a == b));
+
+  dispatch_EQ:
+    return (dst != src) ^ (a < b);
+  dispatch_EQ0:
+    return dst != 0;
+  dispatch_MOV:
+    dst = src;
+    break;
+  dispatch_NEQ0:
+    return dst == 0;
+  dispatch_LT:
+    return dst >= src;
+  dispatch_EXTS8:
+    dst = (bc_sreg_t)(int8_t)src;
+    break;
+  dispatch_LTS:
+    return (bc_sreg_t)dst >= (bc_sreg_t)src;
+  dispatch_EXTS16:
+    dst = (bc_sreg_t)(int16_t)src;
+    break;
+  dispatch_LTEQ:
+    return dst > src;
+  dispatch_EXTS32:
+    dst = (bc_sreg_t)(int32_t)src;
+    break;
+  dispatch_LTEQS:
+    return (bc_sreg_t)dst > (bc_sreg_t)src;
+  dispatch_ADD:
+    dst += src;
+    break;
+  dispatch_NEG:
+    dst = 0;
+  dispatch_SUB:
+    dst -= src;
+    break;
+  dispatch_OR:
+    dst = (uint32_t)(dst | src);
+    break;
+  dispatch_RAND:
+    dst = (uint32_t)rand();
+    break;
+  dispatch_XOR:
+    dst = (uint32_t)(dst ^ src);
+    break;
+  dispatch_CCALL:
+    if (!sandbox)
+      ((bc_ccall_function_t*)(uintptr_t)src)(ctx);
+    break;
+  dispatch_AND:
+    dst = (uint32_t)(dst & src);
+    break;
+  dispatch_SHA:
+    dst = (uint32_t)((int32_t)dst >> src);
+    break;
+  dispatch_NOT:
+    dst = (uint32_t)~src;
+    break;
+  dispatch_SHL:
+    dst = (uint32_t)(dst << src);
+    break;
+  dispatch_SHR:
+    dst = (uint32_t)(dst >> src);
+    break;
+  dispatch_MUL:
+    dst = (uint32_t)(dst * src);
+    break;
+  dispatch_DIV:
+    if (src == 0 && sandbox)
+      return 0;
+    *dstp = (uint32_t)dst / (uint32_t)src;
+    *srcp = (uint32_t)dst - (uint32_t)*dstp * (uint32_t)src;
+    return 0;
+  dispatch_MSBS:
+    dst = sizeof(int) * 8 - 1 - __builtin_clz((uint32_t)dst);
+    break;
+  dispatch_RES:
+    return 0;
+  } while (0);
+
+  BC_CLAMP32(dst);
+  *dstp = dst;
+  return 0;
+}
+
+bc_opcode_t bc_run_vm(struct bc_context_s *ctx)
+{
+  const struct bc_descriptor_s * __restrict__ desc = ctx->desc;
+  const uint16_t *pc = (void*)(ctx->pc & (intptr_t)-2);
+  bc_bool_t skip = ctx->pc & 1;
+  uint16_t op = 0;
+
+  int_fast16_t max_cycles = ctx->max_cycles;
+  bc_bool_t sandbox = ctx->sandbox;
+
+  if (desc->flags & BC_FLAGS_NATIVE)
+    return BC_RUN_STATUS_FAULT;
+  if (!!(desc->flags & BC_FLAGS_SANDBOX) ^ sandbox)
+    return BC_RUN_STATUS_FAULT;
+
+  const size_t size = desc->flags & BC_FLAGS_SIZEMASK;
+  const uint16_t *code_end = (uint16_t*)desc->code + (size >> 1);
+  const uintptr_t code_offset = sandbox ? (uintptr_t)desc->code : 0;
+
+  for (;; pc++)
+    {
+      /* check pc upper bound */
+      if (pc + 1 > code_end)
+        goto err_pc;
+
+      op = load16_le(pc);
+
+      /* get number of extra words in the instruction */
+      uint_fast8_t cst_len = 0;
+      if ((op & 0xf000) == 0x7000)
+        {
+          uint_fast8_t s = (op >> 5) & 31;
+
+          if ((0xff00fffe >> s) & 1)
+            {
+              cst_len = 1 + ((op & 0x0700) == 0x0400);
+
+	      /* check upper bound again with extra words */
+	      if (sandbox && pc + 1 + cst_len > code_end)
+		goto err_pc;
+            }
+        }
+
+      if (skip)
+        {
+          /* skip embedded constant value words if any */
+          pc += cst_len;
+          skip = 0;
+          continue;
+        }
+
+#if CONFIG_MUTEK_BYTECODE_BREAKPOINTS > 0
+      uint16_t bp_mask = ctx->bp_mask;
+      uint16_t bp_skip = ctx->bp_skip;
+      ctx->bp_skip = 0;
+      if (bp_mask && !bp_skip)
+        {
+          uint_fast8_t i;
+          for (i = 0; bp_mask && i < CONFIG_MUTEK_BYTECODE_BREAKPOINTS; i++)
+            {
+              uint16_t m = 1 << i;
+              uintptr_t bp = ctx->bp_list[i];
+              if (bp_mask & m)
+                {
+                  if ((uintptr_t)pc == bp)
+                    {
+                      ctx->vpc = pc;
+                      ctx->bp_skip = 1;
+                      return BC_RUN_STATUS_BREAK;
+                    }
+                  bp_mask ^= m;
+                }
+            }
+        }
+#endif
+
+      if (sandbox)
+        {
+          if (max_cycles == 0)
+            {
+              ctx->vpc = pc;
+              ctx->max_cycles = 0;
+              return BC_RUN_STATUS_CYCLES;
+            }
+          max_cycles--;
+        }
+
+      if (ctx->trace)
+        {
+	  if (ctx->trace_regs)
+	    bc_dump_regs(ctx);
+	  bc_dump_op(ctx, pc);
+	}
+
+      /* custom op */
+      if (op & 0x8000)
+        {
+          ctx->vpc = pc + 1;
+	  if (sandbox)
+	    ctx->max_cycles = max_cycles;
+          return op;
+        }
+
+      bc_reg_t *dstp = &ctx->v[op & 0xf];
+
+      do {
+	static const bs_dispatch_t dispatch[8] = {
+	  BC_DISPATCH(add8),
+          BC_DISPATCH(cst8),
+	  BC_DISPATCH(jmp),
+          BC_DISPATCH(loop_pack),
+	  BC_DISPATCH(alu),
+          BC_DISPATCH(fmt2),
+	  BC_DISPATCH(ldst),
+          BC_DISPATCH(cstn_call),
+	};
+	BC_DISPATCH_GOTO((op >> 12) & 0x7);
+
+      dispatch_begin:
+      dispatch_add8: {
+          int8_t x = (op >> 4) & 0xff;
+          if (x)
+            {
+              *dstp += (bc_sreg_t)x;
+              BC_CLAMP32(*dstp);
+              break;
+            }
+          if (op == 0)
+            {
+              ctx->vpc = pc;
+	      if (sandbox)
+		ctx->max_cycles = max_cycles;
+              return BC_RUN_STATUS_END;
+            }
+          else if (op == 1)
+            bc_dump_pc(ctx, pc);
+          else if (op & 8)
+            {
+              ctx->trace = op & 1;
+              ctx->trace_regs = (op & 2) >> 1;
+            }
+          else if (op & 2)
+            {
+              if ((op & 1) && !sandbox)   /* abort */
+                abort();
+              goto err_die;   /* die */
+            }
+          break;
+        }
+
+      dispatch_cst8:
+	*dstp = (bc_reg_t)((op >> 4) & 0xff);
+	break;
+
+      dispatch_jmp: {
+          int8_t disp = op >> 4;
+          if (disp)             /* jmp* */
+            {
+              if (op & 0xf)     /* call8 */
+                *dstp = (uintptr_t)pc - code_offset;
+              pc += (intptr_t)disp;
+            }
+          else                  /* ret */
+            {
+              pc = (void*)(uintptr_t)(code_offset + *dstp);
+            }
+          goto check_pc;
+        }
+
+      dispatch_loop_pack: {
+          if (op & 0x800)         /* packing */
+            {
+              uint_fast8_t c = ((op >> 8) & 0x7) + 1;
+              uint_fast8_t r = (op & 0xf);
+	      if (sandbox && r + c > 16)
+		break;
+              assert(r + c <= 16);
+              bc_run_packing(ctx, c, r, (op >> 4) & 0xf);
+              break;
+            }
+
+          /* loop */
+	  int8_t d = op >> 3;
+          d >>= 1;
+	  if (d < 0)
+	    {
+	      if (!--(*dstp))
+		break;
+              BC_CLAMP32(*dstp);
+	    }
+	  else if (*dstp > 0)
+	    {
+	      (*dstp)--;
+	      break;
+	    }
+	  pc += d;
+          goto check_pc;
+	}
+
+      dispatch_alu:
+	skip = bc_run_alu(ctx, op);
+	break;
+
+      dispatch_fmt2: {
+	  uint_fast8_t bit = (op >> 4) & 0x1f;
+          bc_reg_t dst = *dstp;
+	  if (op & 0x0800)
+            {
+              if (op & 0x0400)
+                {
+                  if (op & 0x0200) /* BC_EXTZ */
+                    dst &= 0xffffffff >> bit;
+                  else
+                    dst = (uint32_t)((int32_t)dst >> bit); /* BC_SHIA */
+                }
+              else
+                {
+                  dst = op & 0x0200 ? (uint32_t)(dst >> bit)
+                                   : (uint32_t)(dst << bit); /* BC_SHI* */
+                }
+            }
+          else
+            {
+              bc_reg_t mask = 1U << bit;
+              bc_reg_t vmask = op & 0x0200 ? mask : 0;
+              if (op & 0x0400)
+                dst = (dst & ~mask) | vmask; /* BC_BIT* */
+              else
+                skip = ((dst ^ vmask) >> bit) & 1; /* BC_TST* */
+            }
+          BC_CLAMP32(dst);
+          *dstp = dst;
+	  break;
+	}
+
+      dispatch_cstn_call: {
+	  if ((op & 0x0300) == 0x0000) /* not ld/st */
+	    {
+              if ((op & 0x00e0) == 0x0000)
+                {
+                  if (op & 0x0010) /* mode */
+                    {
+                      ctx->mode = ((op & 0x0c00) >> 6) | (op & 15);
+                    }
+                  else /* gaddr */
+                    {
+		      goto err_ret;
+                    }
+                  break;
+                }
+
+              uintptr_t rpc = (void*)pc - desc->code;
+
+              /* fetch constant */
+              bc_reg_t x = load16_le(++pc);
+              if (op & 0x0400)
+                x |= (uint32_t)load16_le(++pc) << 16;
+
+              if (op & 0x0080)  /* cst16, cst32 */
+                {
+                  if (op & 0x0800)  /* set high */
+                    {
+                      if (op & 0x0400)
+                        x |= 0xffffffff00000000ULL;
+                      else
+                        x |= 0xffffffffffff0000ULL;
+                    }
+
+                  *dstp = x << ((op & 0x0070) >> 1); /* byte shift */
+                  break;
+                }
+
+              if (op & 0x0800)  /* pc relative */
+                {
+                  if (op & 0x0400)  /* sign extend */
+                    x = (bc_sreg_t)(int32_t)x;
+                  else
+                    x = (bc_sreg_t)(int16_t)x;
+                  x += rpc;
+                }
+
+              if (op & 0x0020)  /* laddr */
+                {
+                    if (sandbox)
+                      x |= (op & 0x40) << 25; /* bit 31 */
+                    else
+		      x += (uintptr_t)desc->code;
+                  *dstp = x;
+                  break;
+                }
+
+              /* call/jmp */
+              if (op & 0x0010)     /* save return address */
+                *dstp = (uintptr_t)pc - code_offset;
+
+              pc = (const uint16_t*)(desc->code + x);
+              goto check_pc;
+	    }
+        }
+
+      dispatch_ldst:
+	if (bc_run_ldst(desc, ctx, pc + 1, op))
+          goto err_ret;
+	break;
+
+      check_pc:
+        if (pc + 1 < (uint16_t*)desc->code || ((uintptr_t)pc & 1))
+          goto err_pc;
+	break;
+
+      } while (0);
+
+    }
+
+ err_pc:
+  if (sandbox)
+    goto err_ret;
+  assert(!"bytecode pc out of range");
+
+ err_die:
+  fprintf(stderr, "bytecode: die %p\n", pc);
+  bc_dump_pc(ctx, pc);
+
+ err_ret:
+  ctx->vpc = pc;
+  if (sandbox)
+    ctx->max_cycles = max_cycles;
+  return BC_RUN_STATUS_FAULT;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/test/bytecode.h	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,360 @@
+/*
+ * This file is part of MutekH.
+ * 
+ * MutekH is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 of the License.
+ * 
+ * MutekH is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with MutekH; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ * Copyright Alexandre Becoulet <alexandre.becoulet@free.fr> (c) 2013
+ */
+
+#ifndef MUTEK_BYTECODE_H_
+#define MUTEK_BYTECODE_H_
+
+#include <stdarg.h>
+#include <assert.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <stddef.h>
+
+typedef uint16_t bc_opcode_t;
+typedef int8_t bc_error_t;
+typedef int8_t bc_bool_t;
+
+#define CONFIG_MUTEK_BYTECODE_SANDBOX
+#define CONFIG_MUTEK_BYTECODE_DEBUG
+#define CONFIG_MUTEK_BYTECODE_TRACE
+#define CONFIG_MUTEK_BYTECODE_VM
+
+#define BC_ALWAYS_INLINE static inline __attribute__((always_inline))
+
+#if __UINTPTR_MAX__ <= 0xffffffff
+typedef uint32_t bc_reg_t;
+typedef int32_t bc_sreg_t;
+# define BC_REG_FORMAT "08" PRIx32
+# define INT_PTR_SIZE 32
+#else
+typedef uint64_t bc_reg_t;
+typedef int64_t bc_sreg_t;
+# define BC_REG_FORMAT "016" PRIx64
+# define INT_PTR_SIZE 64
+#endif
+
+/** @internal */
+enum bc_flags_s
+{
+  BC_FLAGS_NATIVE   = 0x01000000,
+  BC_FLAGS_SANDBOX  = 0x02000000,
+  BC_FLAGS_SIZEMASK = 0x00ffffff,
+};
+
+struct bc_context_s;
+
+/** @This can be used to declare bytecode entry points. @see bc_set_pc */
+typedef struct bytecode_entry_s bytecode_entry_t;
+
+/** @This specifes status codes returned by the @ref bc_run function.
+    @see #BC_STATUS_CUSTOM */
+enum bc_run_status_e
+{
+  BC_RUN_STATUS_END = 0,
+  BC_RUN_STATUS_CYCLES = 1,
+  BC_RUN_STATUS_BREAK = 2,
+  BC_RUN_STATUS_FAULT = 3,
+};
+
+/** @This tests if the return status of @ref bc_run is a custom opcode
+    or a value specified in @ref bc_run_status_e */
+#define BC_STATUS_CUSTOM(op) ((op) & 0x8000)
+
+/** @internal */
+typedef bc_opcode_t (bc_run_t)(struct bc_context_s *ctx);
+
+/** @This is the bytecode descriptor header */
+struct bc_descriptor_s
+{
+  const void *code;
+  bc_run_t *run;
+  uint32_t flags;
+};
+
+/** @This defines the virtual machine context.
+    @internalcontent */
+struct bc_context_s
+{
+  bc_reg_t v[16];
+  union {
+    /** Bytecode resume execution pointer. For native code, this is a
+        pointer to the machine code instruction. For vm bytecode, this
+        is a 16 bits aligned pointer to the next instruction word with
+        the bit 0 indicating if the next instruction must be skipped
+        on resume. */
+    uintptr_t pc;
+    const void *vpc;
+  };
+
+  const struct bc_descriptor_s *desc;
+  uint8_t mode;
+
+  /** address of writable data segment when sandboxed */
+  uintptr_t data_base;
+  /** mask address of writable data segment when sandboxed */
+  uintptr_t data_addr_mask;
+  /** @see bc_init_sandbox */
+  bc_bool_t sandbox;
+  /** maximum number of executed cycles by a single call to @ref bc_run_vm */
+  uint16_t max_cycles;
+  bc_bool_t trace;
+  bc_bool_t trace_regs;
+#if CONFIG_MUTEK_BYTECODE_BREAKPOINTS > 0
+  uintptr_t bp_list[CONFIG_MUTEK_BYTECODE_BREAKPOINTS];
+  uint16_t bp_mask;
+  bool_t BITFIELD(bp_skip,1);
+#endif
+};
+
+/** @This initializes the virtual machine. The initial value of the
+    registers is undefined. */
+void
+bc_init(struct bc_context_s *ctx,
+        const struct bc_descriptor_s *desc);
+
+/** @This initializes the virtual machine in sandbox mode. When
+    working in sandbox mode, address are translated and the following
+    checks are performed:
+
+    @list
+      @item Execution of instructions are not allowed outside of the
+        code segment specified in the bytecode descriptor. Code base
+        address inside the virtual machine is 0.
+      @item Load and store instructions addresses are translated from
+        0x80000000 to the @tt data_base address and the address is
+        masked according to @tt data_addr_bits. Loads
+        below 0x8000000 are translated to the code segment.
+      @item The @tt ccall instruction can not be used.
+      @item The @tt abort instruction is equivalent to @tt die.
+    @end list
+
+    When the @tt data_addr_bits parameter is not 0, it must be at
+    least 8 and @tt data_base must point to a 8 bytes aligned buffer.
+
+    When in sandbox mode on a 64 bits target, instructions wont touch
+    registers above bit 31. This makes the sandbox a 32 bits virtual
+    machine.
+
+    The @tt max_cycles parameter specifies the maximum number of
+    executed cycles by a single call to @ref bc_run_vm
+*/
+void bc_init_sandbox(struct bc_context_s *ctx, const struct bc_descriptor_s *desc,
+                     void *data_base, uint_fast8_t data_addr_bits,
+                     uint_fast16_t max_cycles);
+
+void *
+bc_translate_addr(struct bc_context_s *ctx,
+                  bc_reg_t addr_, size_t size,
+                  bc_bool_t writable);
+
+/** @This updates the remaining number of cycles before the @ref bc_run
+    function returns @ref BC_RUN_STATUS_CYCLES. */
+BC_ALWAYS_INLINE void
+bc_set_cycles(struct bc_context_s *ctx, uint_fast16_t cycles)
+{
+  assert(ctx->sandbox);
+  ctx->max_cycles = cycles;
+}
+
+/** @see bc_set_cycles */
+BC_ALWAYS_INLINE uint_fast16_t
+bc_get_cycles(const struct bc_context_s *ctx)
+{
+  assert(ctx->sandbox);
+  return ctx->max_cycles;
+}
+
+/** @This changes the program counter of a sandboxed virtual machine. */
+bc_error_t bc_set_sandbox_pc(struct bc_context_s *ctx, uint32_t pc);
+
+/** @This returns the program counter of a sandboxed virtual machine. */
+BC_ALWAYS_INLINE uint32_t bc_get_sandbox_pc(const struct bc_context_s *ctx)
+{
+  assert(ctx->sandbox);
+  return ctx->vpc - ctx->desc->code;
+}
+
+/** @This initializes a bytecode descriptor from a bytecode loadable
+    blob. The format of the blob is:
+    @list
+      @item flags in 16 bits little endian representation
+      @item words count in 16 bits little endian representation
+      @item instruction words
+    @end list
+    The @tt blob pointer must be 16 bits aligned.
+*/
+bc_error_t
+bc_load(struct bc_descriptor_s *desc,
+        const uint8_t *blob, size_t len);
+
+
+/** @see bc_set_regs */
+void
+bc_set_regs_va(struct bc_context_s *ctx, uint16_t mask, va_list ap);
+
+/** @This set the value of multiple registers of the virtual
+    machine. The @tt mask parameter specifies which register must be
+    initialized. An additional value of type @ref uintptr_t must be
+    passed for each bit set in @tt mask. */
+void
+bc_set_regs(struct bc_context_s *ctx, uint16_t mask, ...);
+
+/** @This returns the value of one of the 16 virtual machine registers */
+BC_ALWAYS_INLINE uintptr_t
+bc_get_reg(struct bc_context_s *ctx, uint_fast8_t i)
+{
+  return ctx->v[i];
+}
+
+/** @This returns a pointer to a packed array of bytes stored in virtual
+    machine register storage. See the @tt pack and @tt unpack instructions. */
+BC_ALWAYS_INLINE uint8_t *
+bc_get_bytepack(struct bc_context_s *ctx, uint_fast8_t i)
+{
+  return (uint8_t*)(ctx->v + i);
+}
+
+/** @This returns a pointer to a packed array of bytes stored in
+    virtual machine register storage. When the index of the register
+    is to high for the specified number of bytes, a pointer to
+    register 0 is retured instead. */
+BC_ALWAYS_INLINE uint8_t *
+bc_get_bytepack_safe(struct bc_context_s *ctx, uint_fast8_t i,
+                     size_t bytes)
+{
+  size_t reg_count = (((bytes - 1) | 3) + 1) >> 2;
+  int32_t m = i + reg_count - 17;
+  i &= m >> 31;
+  return (uint8_t*)(ctx->v + i);
+}
+
+/** @This sets the value of one of the 16 virtual machine registers */
+BC_ALWAYS_INLINE void
+bc_set_reg(struct bc_context_s *ctx, uint_fast8_t i, uintptr_t value)
+{
+  ctx->v[i] = value;
+}
+
+/** @This returns the value of one of the 16 virtual machine registers */
+BC_ALWAYS_INLINE const void *
+bc_get_pc(struct bc_context_s *ctx)
+{
+  return ctx->vpc;
+}
+
+/** @This sets the value of the virtual machine pc */
+BC_ALWAYS_INLINE void
+bc_set_pc(struct bc_context_s *ctx, const void *pc)
+{
+  ctx->vpc = pc;
+}
+
+/** @This function enables or disable the bytecode execution trace
+    debug output. If the @ref #CONFIG_MUTEK_BYTECODE_TRACE token is
+    not defined, this function has no effect. The @tt trace
+    instruction can be used to enable and disable trace output. */
+BC_ALWAYS_INLINE void
+bc_set_trace(struct bc_context_s *ctx, bc_bool_t enabled, bc_bool_t regs)
+{
+  ctx->trace = enabled;
+  ctx->trace_regs = regs;
+}
+
+/** @This skip the next instruction. This can only be called if the
+    execution has stopped on a conditional custom instruction. */
+BC_ALWAYS_INLINE void
+bc_skip(struct bc_context_s *ctx)
+{
+  ctx->pc |= 1;
+}
+
+/** @This returns the current bytecode execution mode */
+BC_ALWAYS_INLINE uint_fast8_t bc_get_mode(const struct bc_context_s *ctx)
+{
+  return ctx->mode;
+}
+
+/** @This sets the current bytecode execution mode */
+BC_ALWAYS_INLINE void bc_set_mode(struct bc_context_s *ctx, uint_fast8_t mode)
+{
+  ctx->mode = mode & 63;
+}
+
+/** @This dumps the virtual machine state. If the @ref
+    #CONFIG_MUTEK_BYTECODE_DEBUG token is not defined, this
+    function has no effect. */
+void bc_dump(const struct bc_context_s *ctx, bc_bool_t regs);
+
+/** This function starts or resumes executions of the bytecode. It
+    stops when an instruction which is not handled is encountered and
+    returns its opcode. Instructions words with the most significant
+    bit set are custom instructions and must be handled by the caller
+    before resuming execution of the bytecode.
+
+    If the end of bytecode instruction has been reached, this function
+    returns 0. Other return values less than 32768 indicate an error
+    condition.
+
+    This function will eiter run the vm or jump to the machine
+    compiled bytecode. The type of bytecode is guessed from the
+    descriptor.
+*/
+BC_ALWAYS_INLINE bc_opcode_t bc_run(struct bc_context_s *ctx)
+{
+  return ctx->desc->run(ctx);
+}
+
+/** This function starts or resumes executions of the bytecode using
+    the virtual machine. This function does not work if the bytecode
+    is compiled in machine code.
+
+    When sandboxed, at most @tt max_cycles instructions are
+    executed. This function returns 1 when this limit is reached. It
+    will return 3 if an error occurs. */
+bc_opcode_t bc_run_vm(struct bc_context_s *ctx);
+
+/** @internal @This specifies packing and byteswap opcode operations */
+enum bc_opcode_pack_e
+{
+  BC_OP_PACK8       = 0,
+  BC_OP_PACK16LE    = 1,
+  BC_OP_PACK16BE    = 2,
+  BC_OP_UNPACK16LE  = 3,
+  BC_OP_UNPACK16BE  = 4,
+  BC_OP_SWAP16LE    = 5,
+  BC_OP_SWAP16BE    = 6,
+  BC_OP_SWAP16      = 7,
+  BC_OP_UNPACK8     = 8,
+  BC_OP_PACK32LE    = 9,
+  BC_OP_PACK32BE    = 10,
+  BC_OP_UNPACK32LE  = 11,
+  BC_OP_UNPACK32BE  = 12,
+  BC_OP_SWAP32LE    = 13,
+  BC_OP_SWAP32BE    = 14,
+  BC_OP_SWAP32      = 15,
+};
+
+/** @see #BC_CCALL_FUNCTION */
+#define BC_CCALL_FUNCTION(n) void (n)(struct bc_context_s *ctx)
+/** C function type invoked by the @tt ccall instruction. */
+typedef BC_CCALL_FUNCTION(bc_ccall_function_t);
+
+#endif
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/test/font_8x11.xbm	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,91 @@
+#define font_8x11_width 32
+#define font_8x11_height 264
+static unsigned char font_8x11_bits[] = {
+   0x00, 0x10, 0x28, 0x00, 0x00, 0x10, 0x28, 0x24, 0x00, 0x10, 0x28, 0x24,
+   0x00, 0x10, 0x00, 0x7e, 0x00, 0x10, 0x00, 0x24, 0x00, 0x10, 0x00, 0x24,
+   0x00, 0x10, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x24, 0x00, 0x10, 0x00, 0x24,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x10,
+   0x3c, 0x86, 0x12, 0x10, 0x4a, 0x49, 0x12, 0x10, 0x0a, 0x29, 0x12, 0x00,
+   0x3c, 0x16, 0x0c, 0x00, 0x50, 0x68, 0x52, 0x00, 0x52, 0x94, 0x22, 0x00,
+   0x3c, 0x92, 0x22, 0x00, 0x10, 0x61, 0x5c, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x20, 0x08, 0x00, 0x00, 0x10, 0x10, 0x00, 0x00,
+   0x10, 0x10, 0x00, 0x10, 0x08, 0x20, 0x24, 0x10, 0x08, 0x20, 0x18, 0x10,
+   0x08, 0x20, 0x7e, 0xfe, 0x10, 0x10, 0x18, 0x10, 0x10, 0x10, 0x24, 0x10,
+   0x20, 0x08, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x10,
+   0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x3e, 0x00, 0x08,
+   0x00, 0x00, 0x00, 0x04, 0x20, 0x00, 0x00, 0x04, 0x20, 0x00, 0x18, 0x02,
+   0x20, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x38, 0x10, 0x3c, 0x3c,
+   0x44, 0x18, 0x42, 0x42, 0x64, 0x14, 0x42, 0x40, 0x54, 0x10, 0x20, 0x40,
+   0x54, 0x10, 0x10, 0x38, 0x54, 0x10, 0x08, 0x40, 0x4c, 0x10, 0x04, 0x40,
+   0x44, 0x10, 0x02, 0x42, 0x38, 0x7c, 0x7e, 0x3c, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x20, 0x7e, 0x3c, 0x7e, 0x30, 0x02, 0x42, 0x40,
+   0x28, 0x02, 0x02, 0x40, 0x24, 0x02, 0x02, 0x20, 0x22, 0x3e, 0x3e, 0x20,
+   0x7e, 0x40, 0x42, 0x10, 0x20, 0x40, 0x42, 0x10, 0x20, 0x42, 0x42, 0x08,
+   0x20, 0x3c, 0x3c, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x3c, 0x3c, 0x00, 0x00, 0x42, 0x42, 0x00, 0x00, 0x42, 0x42, 0x18, 0x00,
+   0x42, 0x42, 0x18, 0x00, 0x3c, 0x7c, 0x00, 0x00, 0x42, 0x40, 0x00, 0x0c,
+   0x42, 0x40, 0x00, 0x00, 0x42, 0x42, 0x18, 0x00, 0x3c, 0x3c, 0x18, 0x08,
+   0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x3c,
+   0x00, 0x00, 0x00, 0x42, 0x20, 0x00, 0x04, 0x42, 0x10, 0x7e, 0x08, 0x40,
+   0x08, 0x00, 0x10, 0x20, 0x04, 0x00, 0x20, 0x10, 0x08, 0x7e, 0x10, 0x10,
+   0x10, 0x00, 0x08, 0x00, 0x20, 0x00, 0x04, 0x10, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x1c, 0x18, 0x3e, 0x3c, 0x22, 0x24, 0x42, 0x42,
+   0x49, 0x42, 0x42, 0x02, 0x55, 0x42, 0x42, 0x02, 0x51, 0x7e, 0x3e, 0x02,
+   0x5d, 0x42, 0x42, 0x02, 0x55, 0x42, 0x42, 0x02, 0x5d, 0x42, 0x42, 0x42,
+   0x31, 0x42, 0x3e, 0x3c, 0x02, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
+   0x3e, 0x7e, 0x7e, 0x3c, 0x42, 0x02, 0x02, 0x42, 0x42, 0x02, 0x02, 0x02,
+   0x42, 0x02, 0x02, 0x02, 0x42, 0x1e, 0x1e, 0x72, 0x42, 0x02, 0x02, 0x42,
+   0x42, 0x02, 0x02, 0x42, 0x42, 0x02, 0x02, 0x42, 0x3e, 0x7e, 0x02, 0x3c,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x42, 0x7c, 0x7c, 0x42,
+   0x42, 0x10, 0x20, 0x42, 0x42, 0x10, 0x20, 0x22, 0x42, 0x10, 0x20, 0x12,
+   0x7e, 0x10, 0x20, 0x0e, 0x42, 0x10, 0x20, 0x12, 0x42, 0x10, 0x20, 0x22,
+   0x42, 0x10, 0x22, 0x42, 0x42, 0x7c, 0x1c, 0x42, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x02, 0x42, 0x42, 0x3c, 0x02, 0x66, 0x46, 0x42,
+   0x02, 0x66, 0x46, 0x42, 0x02, 0x5a, 0x4a, 0x42, 0x02, 0x42, 0x4a, 0x42,
+   0x02, 0x42, 0x52, 0x42, 0x02, 0x42, 0x52, 0x42, 0x02, 0x42, 0x62, 0x42,
+   0x7e, 0x42, 0x42, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x3e, 0x3c, 0x3e, 0x3c, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x42, 0x02,
+   0x42, 0x42, 0x42, 0x02, 0x3e, 0x42, 0x3e, 0x3c, 0x02, 0x42, 0x0a, 0x40,
+   0x02, 0x52, 0x12, 0x40, 0x02, 0x62, 0x22, 0x42, 0x02, 0x7c, 0x42, 0x3c,
+   0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0x42, 0x42, 0x82,
+   0x10, 0x42, 0x42, 0x82, 0x10, 0x42, 0x42, 0x82, 0x10, 0x42, 0x24, 0x44,
+   0x10, 0x42, 0x24, 0x54, 0x10, 0x42, 0x24, 0x54, 0x10, 0x42, 0x18, 0x28,
+   0x10, 0x42, 0x18, 0x28, 0x10, 0x3c, 0x18, 0x28, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x42, 0x42, 0x7e, 0x38, 0x42, 0x42, 0x40, 0x08,
+   0x24, 0x24, 0x20, 0x08, 0x24, 0x24, 0x10, 0x08, 0x18, 0x18, 0x08, 0x08,
+   0x24, 0x08, 0x04, 0x08, 0x24, 0x08, 0x02, 0x08, 0x42, 0x04, 0x02, 0x08,
+   0x42, 0x04, 0x7e, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x04, 0x38, 0x10, 0x00, 0x04, 0x20, 0x28, 0x00, 0x08, 0x20, 0x44, 0x00,
+   0x08, 0x20, 0x00, 0x00, 0x10, 0x20, 0x00, 0x00, 0x10, 0x20, 0x00, 0x00,
+   0x20, 0x20, 0x00, 0x00, 0x20, 0x20, 0x00, 0x00, 0x40, 0x38, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00,
+   0x08, 0x00, 0x02, 0x00, 0x10, 0x00, 0x02, 0x00, 0x20, 0x3c, 0x3e, 0x3c,
+   0x00, 0x40, 0x42, 0x42, 0x00, 0x7c, 0x42, 0x02, 0x00, 0x42, 0x42, 0x02,
+   0x00, 0x62, 0x42, 0x42, 0x00, 0x5c, 0x3e, 0x3c, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x38, 0x00, 0x40, 0x00, 0x04, 0x00,
+   0x40, 0x00, 0x04, 0x00, 0x7c, 0x3c, 0x04, 0x3c, 0x42, 0x42, 0x1c, 0x42,
+   0x42, 0x7e, 0x04, 0x42, 0x42, 0x02, 0x04, 0x42, 0x42, 0x42, 0x04, 0x7c,
+   0x7c, 0x3c, 0x04, 0x40, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x3c,
+   0x02, 0x00, 0x00, 0x02, 0x02, 0x10, 0x20, 0x02, 0x02, 0x00, 0x00, 0x02,
+   0x02, 0x18, 0x30, 0x02, 0x3a, 0x10, 0x20, 0x62, 0x46, 0x10, 0x20, 0x1a,
+   0x42, 0x10, 0x20, 0x06, 0x42, 0x10, 0x20, 0x1a, 0x42, 0x38, 0x20, 0x62,
+   0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x18, 0x00, 0x00, 0x00,
+   0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x10, 0x6c, 0x38, 0x3c,
+   0x10, 0x92, 0x44, 0x42, 0x10, 0x92, 0x44, 0x42, 0x10, 0x92, 0x44, 0x42,
+   0x10, 0x92, 0x44, 0x42, 0x38, 0x92, 0x44, 0x3c, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x3e, 0x7c, 0x3a, 0x3c, 0x42, 0x42, 0x46, 0x42,
+   0x42, 0x42, 0x02, 0x0c, 0x42, 0x42, 0x02, 0x30, 0x3e, 0x7c, 0x02, 0x42,
+   0x02, 0x40, 0x02, 0x3c, 0x02, 0x40, 0x00, 0x00, 0x02, 0x40, 0x00, 0x00,
+   0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
+   0x1c, 0x42, 0x22, 0x82, 0x04, 0x42, 0x22, 0x82, 0x04, 0x42, 0x14, 0x54,
+   0x04, 0x42, 0x14, 0x54, 0x04, 0x42, 0x08, 0x28, 0x38, 0x3c, 0x08, 0x28,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30,
+   0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x42, 0x42, 0x7e, 0x08,
+   0x24, 0x42, 0x20, 0x0c, 0x18, 0x42, 0x10, 0x08, 0x18, 0x42, 0x08, 0x08,
+   0x24, 0x7c, 0x04, 0x08, 0x42, 0x40, 0x7e, 0x30, 0x00, 0x42, 0x00, 0x00,
+   0x00, 0x3c, 0x00, 0x00, 0x10, 0x18, 0x00, 0x00, 0x10, 0x20, 0x4c, 0x00,
+   0x10, 0x20, 0x32, 0x00, 0x10, 0x20, 0x00, 0x24, 0x10, 0x60, 0x00, 0x24,
+   0x10, 0x20, 0x00, 0x00, 0x10, 0x20, 0x00, 0x00, 0x10, 0x20, 0x00, 0x42,
+   0x10, 0x18, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/test/font_8x11_v.xbm	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,99 @@
+#define font_8x11_v_width 44
+#define font_8x11_v_height 192
+static unsigned char font_8x11_v_bits[] = {
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x01,
+   0x00, 0x00, 0x00, 0x00, 0xf8, 0x07, 0x00, 0x00, 0x00, 0xc0, 0x21, 0x01,
+   0x00, 0xa0, 0x3f, 0x00, 0x20, 0x01, 0x00, 0x00, 0x00, 0xc0, 0xf9, 0x07,
+   0x00, 0x00, 0x00, 0x00, 0x20, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x20, 0x0c, 0x00, 0x00, 0x00, 0x90, 0x41, 0x12, 0xee, 0x00, 0x00,
+   0x48, 0x82, 0x12, 0x11, 0x01, 0x00, 0xc8, 0x07, 0x0d, 0x11, 0x01, 0x00,
+   0x7c, 0xc2, 0x02, 0xe9, 0x00, 0x0e, 0x48, 0x22, 0x05, 0x06, 0x00, 0x00,
+   0x30, 0x21, 0x09, 0x09, 0x00, 0x00, 0x00, 0xc0, 0x10, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x40, 0x00,
+   0x00, 0x00, 0x00, 0x2a, 0x40, 0x00, 0xe0, 0x20, 0x20, 0x1c, 0x40, 0x00,
+   0x18, 0xc3, 0x18, 0x1c, 0xf8, 0x03, 0x04, 0x04, 0x07, 0x2a, 0x40, 0x00,
+   0x00, 0x00, 0x00, 0x08, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x08, 0x00,
+   0x00, 0x00, 0x01, 0x00, 0x30, 0x00, 0x00, 0x00, 0x01, 0x01, 0xc0, 0x00,
+   0x01, 0x00, 0x01, 0x01, 0x00, 0x03, 0x0e, 0x00, 0x01, 0x00, 0x00, 0x0c,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x10, 0x04,
+   0xf8, 0x23, 0x08, 0x05, 0x09, 0x08, 0x14, 0x24, 0x10, 0x09, 0x89, 0x08,
+   0xe4, 0xe4, 0x3f, 0x11, 0x89, 0x08, 0x04, 0x25, 0x00, 0x21, 0x89, 0x08,
+   0xf8, 0x23, 0x00, 0xc1, 0x70, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x40, 0x3e, 0xfe, 0x00, 0x08,
+   0xa0, 0x20, 0x22, 0x11, 0x01, 0x08, 0x20, 0x21, 0x22, 0x11, 0x19, 0x08,
+   0x20, 0x22, 0x22, 0x11, 0x61, 0x08, 0xfc, 0x27, 0x22, 0x11, 0x81, 0x09,
+   0x20, 0xc0, 0x21, 0x8e, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb8, 0x43, 0x1c, 0x00, 0x00, 0x00,
+   0x44, 0x24, 0x22, 0x00, 0x42, 0x00, 0x44, 0x24, 0x22, 0x63, 0x4c, 0x00,
+   0x44, 0x24, 0x22, 0x63, 0x00, 0x00, 0x44, 0x24, 0x22, 0x00, 0x00, 0x00,
+   0xb8, 0xc3, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x04, 0x00, 0x00, 0x06,
+   0x20, 0x80, 0x04, 0x41, 0x00, 0x08, 0x50, 0x80, 0x04, 0x22, 0x00, 0x08,
+   0x88, 0x80, 0x04, 0x14, 0x68, 0x08, 0x04, 0x81, 0x04, 0x08, 0x80, 0x08,
+   0x00, 0x80, 0x04, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0xfc, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02, 0xe2, 0x0f, 0xff, 0xf1, 0x07,
+   0xb9, 0x04, 0x12, 0x11, 0x09, 0x08, 0x29, 0x05, 0x22, 0x11, 0x09, 0x08,
+   0xfd, 0x04, 0x22, 0x11, 0x09, 0x08, 0x04, 0x02, 0x12, 0x11, 0x09, 0x08,
+   0xf8, 0xe1, 0x0f, 0xee, 0x10, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0xe7, 0x3f, 0xff, 0xf1, 0x07,
+   0x04, 0x24, 0x22, 0x10, 0x09, 0x08, 0x04, 0x24, 0x22, 0x10, 0x09, 0x08,
+   0x04, 0x24, 0x22, 0x10, 0x89, 0x08, 0x04, 0x24, 0x20, 0x00, 0x89, 0x08,
+   0xf8, 0x23, 0x20, 0x00, 0xf1, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x07, 0x00, 0x02, 0xf8, 0x0f,
+   0x40, 0x20, 0x20, 0x01, 0x81, 0x00, 0x40, 0x20, 0x20, 0x01, 0x81, 0x00,
+   0x40, 0xe0, 0x3f, 0x01, 0x41, 0x01, 0x40, 0x20, 0x20, 0xfe, 0x21, 0x02,
+   0xfc, 0x27, 0x20, 0x00, 0x19, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0xe7, 0x3f, 0xff, 0xf1, 0x07,
+   0x04, 0x00, 0x18, 0xc0, 0x08, 0x08, 0x04, 0x00, 0x04, 0x30, 0x08, 0x08,
+   0x04, 0x00, 0x04, 0x0c, 0x08, 0x08, 0x04, 0x00, 0x18, 0x02, 0x08, 0x08,
+   0x04, 0xe0, 0x3f, 0xff, 0xf1, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0xc7, 0x1f, 0xff, 0x11, 0x07,
+   0x40, 0x24, 0x20, 0x10, 0x89, 0x08, 0x40, 0x24, 0x20, 0x18, 0x89, 0x08,
+   0x40, 0xa4, 0x20, 0x14, 0x89, 0x08, 0x40, 0x64, 0x20, 0x12, 0x89, 0x08,
+   0x80, 0xe3, 0x1f, 0xe1, 0x70, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x3f, 0xc0, 0x01, 0x0e,
+   0x00, 0x24, 0x00, 0x38, 0xc0, 0x01, 0x00, 0x24, 0x00, 0x07, 0x38, 0x00,
+   0xfc, 0x27, 0x00, 0x07, 0xc0, 0x00, 0x00, 0x24, 0x00, 0x38, 0x38, 0x00,
+   0x00, 0xc4, 0x3f, 0xc0, 0xc1, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x06, 0x30, 0x07, 0x01, 0x00,
+   0xb0, 0x61, 0x0c, 0x09, 0x01, 0x00, 0x40, 0x80, 0x03, 0x11, 0xf9, 0x0f,
+   0x40, 0x00, 0x02, 0x21, 0x09, 0x08, 0xb0, 0x01, 0x0c, 0x41, 0x09, 0x08,
+   0x0c, 0x06, 0x30, 0x81, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+   0x00, 0x06, 0x00, 0x40, 0x04, 0x00, 0x80, 0x21, 0x20, 0x80, 0x04, 0x00,
+   0x60, 0x20, 0x20, 0x00, 0x05, 0x00, 0x18, 0xe0, 0x3f, 0x80, 0x04, 0x00,
+   0x04, 0x00, 0x00, 0x40, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0xff, 0xf1, 0x00,
+   0x00, 0x20, 0x05, 0x21, 0x08, 0x01, 0x00, 0x22, 0x05, 0x21, 0x08, 0x01,
+   0x00, 0x21, 0x05, 0x21, 0x08, 0x01, 0x80, 0x40, 0x05, 0x21, 0x08, 0x01,
+   0x00, 0xe0, 0x03, 0x1e, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0xc0, 0x03, 0x00, 0xe4, 0x00,
+   0x84, 0x20, 0x05, 0xff, 0x12, 0x01, 0x84, 0x20, 0x05, 0x10, 0x13, 0x01,
+   0x84, 0x20, 0x05, 0x10, 0x13, 0x01, 0x84, 0x20, 0x05, 0x00, 0x13, 0x01,
+   0xfc, 0x47, 0x03, 0x00, 0xfc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x07, 0x00, 0x00, 0xf8, 0x0f,
+   0x20, 0x00, 0x80, 0x00, 0x20, 0x00, 0x40, 0x20, 0x44, 0x00, 0x50, 0x00,
+   0x40, 0xe0, 0x57, 0x20, 0x50, 0x00, 0x40, 0x20, 0x80, 0xbf, 0x88, 0x00,
+   0x3c, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x03, 0x00, 0xf0, 0x00,
+   0x00, 0x00, 0x04, 0x1f, 0x08, 0x01, 0x04, 0x04, 0x04, 0x20, 0x08, 0x01,
+   0xfc, 0xe7, 0x03, 0x20, 0x08, 0x01, 0x04, 0x00, 0x04, 0x20, 0x08, 0x01,
+   0x00, 0x00, 0x04, 0x1f, 0xf0, 0x00, 0x00, 0xe0, 0x03, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x80, 0x03, 0x3f, 0x90, 0x00,
+   0x88, 0x40, 0x04, 0x10, 0x48, 0x01, 0x88, 0x40, 0x04, 0x20, 0x48, 0x01,
+   0x88, 0x40, 0x04, 0x20, 0x28, 0x01, 0x88, 0x40, 0x04, 0x20, 0x28, 0x01,
+   0x70, 0xf8, 0x07, 0x10, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x07, 0x30, 0x80, 0x01,
+   0xf8, 0x27, 0x00, 0x0c, 0x60, 0x00, 0x84, 0x20, 0x00, 0x03, 0x18, 0x00,
+   0x84, 0x20, 0x00, 0x0c, 0x60, 0x00, 0x04, 0x20, 0x00, 0x30, 0x18, 0x00,
+   0x00, 0xc0, 0x07, 0x00, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x01,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x84, 0x90, 0x07, 0x21, 0x00, 0x00,
+   0x48, 0x48, 0x00, 0x23, 0x80, 0x00, 0x30, 0x48, 0x00, 0x25, 0xf0, 0x07,
+   0x30, 0x48, 0x00, 0x29, 0x08, 0x08, 0x48, 0x48, 0x00, 0x31, 0x08, 0x08,
+   0x84, 0xf0, 0x07, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x10, 0x00,
+   0x00, 0x00, 0x00, 0x80, 0x88, 0x01, 0x00, 0x20, 0x20, 0x80, 0x08, 0x00,
+   0xfc, 0x27, 0x20, 0x40, 0x08, 0x00, 0x00, 0xc0, 0x1f, 0x40, 0x88, 0x01,
+   0x00, 0x00, 0x02, 0x80, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/test/font_8x11_v_pow2.xbm	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,4 @@
+#define font3_width 64
+#define font3_height 192
+static unsigned char font3_bits[] = {
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x21, 0x01, 0x00, 0x00, 0x00, 0xa0, 0x3f, 0x00, 0x20, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xf9, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x90, 0x41, 0x12, 0xee, 0x00, 0x00, 0x00, 0x00, 0x48, 0x82, 0x12, 0x11, 0x01, 0x00, 0x00, 0x00, 0xc8, 0x07, 0x0d, 0x11, 0x01, 0x00, 0x00, 0x00, 0x7c, 0xc2, 0x02, 0xe9, 0x00, 0x0e, 0x00, 0x00, 0x48, 0x22, 0x05, 0x06, 0x00, 0x00, 0x00, 0x00, 0x30, 0x21, 0x09, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x40, 0x00, 0x00, 0x00, 0xe0, 0x20, 0x20, 0x1c, 0x40, 0x00, 0x00, 0x00, 0x18, 0xc3, 0x18, 0x1c, 0xf8, 0x03, 0x00, 0x00, 0x04, 0x04, 0x07, 0x2a, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0xc0, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x01, 0x00, 0x03, 0x00, 0x00, 0x0e, 0x00, 0x01, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x10, 0x04, 0x00, 0x00, 0xf8, 0x23, 0x08, 0x05, 0x09, 0x08, 0x00, 0x00, 0x14, 0x24, 0x10, 0x09, 0x89, 0x08, 0x00, 0x00, 0xe4, 0xe4, 0x3f, 0x11, 0x89, 0x08, 0x00, 0x00, 0x04, 0x25, 0x00, 0x21, 0x89, 0x08, 0x00, 0x00, 0xf8, 0x23, 0x00, 0xc1, 0x70, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x40, 0x3e, 0xfe, 0x00, 0x08, 0x00, 0x00, 0xa0, 0x20, 0x22, 0x11, 0x01, 0x08, 0x00, 0x00, 0x20, 0x21, 0x22, 0x11, 0x19, 0x08, 0x00, 0x00, 0x20, 0x22, 0x22, 0x11, 0x61, 0x08, 0x00, 0x00, 0xfc, 0x27, 0x22, 0x11, 0x81, 0x09, 0x00, 0x00, 0x20, 0xc0, 0x21, 0x8e, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb8, 0x43, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x24, 0x22, 0x00, 0x42, 0x00, 0x00, 0x00, 0x44, 0x24, 0x22, 0x63, 0x4c, 0x00, 0x00, 0x00, 0x44, 0x24, 0x22, 0x63, 0x00, 0x00, 0x00, 0x00, 0x44, 0x24, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb8, 0xc3, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x04, 0x00, 0x00, 0x06, 0x00, 0x00, 0x20, 0x80, 0x04, 0x41, 0x00, 0x08, 0x00, 0x00, 0x50, 0x80, 0x04, 0x22, 0x00, 0x08, 0x00, 0x00, 0x88, 0x80, 0x04, 0x14, 0x68, 0x08, 0x00, 0x00, 0x04, 0x81, 0x04, 0x08, 0x80, 0x08, 0x00, 0x00, 0x00, 0x80, 0x04, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xe2, 0x0f, 0xff, 0xf1, 0x07, 0x00, 0x00, 0xb9, 0x04, 0x12, 0x11, 0x09, 0x08, 0x00, 0x00, 0x29, 0x05, 0x22, 0x11, 0x09, 0x08, 0x00, 0x00, 0xfd, 0x04, 0x22, 0x11, 0x09, 0x08, 0x00, 0x00, 0x04, 0x02, 0x12, 0x11, 0x09, 0x08, 0x00, 0x00, 0xf8, 0xe1, 0x0f, 0xee, 0x10, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0xe7, 0x3f, 0xff, 0xf1, 0x07, 0x00, 0x00, 0x04, 0x24, 0x22, 0x10, 0x09, 0x08, 0x00, 0x00, 0x04, 0x24, 0x22, 0x10, 0x09, 0x08, 0x00, 0x00, 0x04, 0x24, 0x22, 0x10, 0x89, 0x08, 0x00, 0x00, 0x04, 0x24, 0x20, 0x00, 0x89, 0x08, 0x00, 0x00, 0xf8, 0x23, 0x20, 0x00, 0xf1, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x07, 0x00, 0x02, 0xf8, 0x0f, 0x00, 0x00, 0x40, 0x20, 0x20, 0x01, 0x81, 0x00, 0x00, 0x00, 0x40, 0x20, 0x20, 0x01, 0x81, 0x00, 0x00, 0x00, 0x40, 0xe0, 0x3f, 0x01, 0x41, 0x01, 0x00, 0x00, 0x40, 0x20, 0x20, 0xfe, 0x21, 0x02, 0x00, 0x00, 0xfc, 0x27, 0x20, 0x00, 0x19, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0xe7, 0x3f, 0xff, 0xf1, 0x07, 0x00, 0x00, 0x04, 0x00, 0x18, 0xc0, 0x08, 0x08, 0x00, 0x00, 0x04, 0x00, 0x04, 0x30, 0x08, 0x08, 0x00, 0x00, 0x04, 0x00, 0x04, 0x0c, 0x08, 0x08, 0x00, 0x00, 0x04, 0x00, 0x18, 0x02, 0x08, 0x08, 0x00, 0x00, 0x04, 0xe0, 0x3f, 0xff, 0xf1, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0xc7, 0x1f, 0xff, 0x11, 0x07, 0x00, 0x00, 0x40, 0x24, 0x20, 0x10, 0x89, 0x08, 0x00, 0x00, 0x40, 0x24, 0x20, 0x18, 0x89, 0x08, 0x00, 0x00, 0x40, 0xa4, 0x20, 0x14, 0x89, 0x08, 0x00, 0x00, 0x40, 0x64, 0x20, 0x12, 0x89, 0x08, 0x00, 0x00, 0x80, 0xe3, 0x1f, 0xe1, 0x70, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x3f, 0xc0, 0x01, 0x0e, 0x00, 0x00, 0x00, 0x24, 0x00, 0x38, 0xc0, 0x01, 0x00, 0x00, 0x00, 0x24, 0x00, 0x07, 0x38, 0x00, 0x00, 0x00, 0xfc, 0x27, 0x00, 0x07, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x24, 0x00, 0x38, 0x38, 0x00, 0x00, 0x00, 0x00, 0xc4, 0x3f, 0xc0, 0xc1, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x06, 0x30, 0x07, 0x01, 0x00, 0x00, 0x00, 0xb0, 0x61, 0x0c, 0x09, 0x01, 0x00, 0x00, 0x00, 0x40, 0x80, 0x03, 0x11, 0xf9, 0x0f, 0x00, 0x00, 0x40, 0x00, 0x02, 0x21, 0x09, 0x08, 0x00, 0x00, 0xb0, 0x01, 0x0c, 0x41, 0x09, 0x08, 0x00, 0x00, 0x0c, 0x06, 0x30, 0x81, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x40, 0x04, 0x00, 0x00, 0x00, 0x80, 0x21, 0x20, 0x80, 0x04, 0x00, 0x00, 0x00, 0x60, 0x20, 0x20, 0x00, 0x05, 0x00, 0x00, 0x00, 0x18, 0xe0, 0x3f, 0x80, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x40, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x00, 0xff, 0xf1, 0x00, 0x00, 0x00, 0x00, 0x20, 0x05, 0x21, 0x08, 0x01, 0x00, 0x00, 0x00, 0x22, 0x05, 0x21, 0x08, 0x01, 0x00, 0x00, 0x00, 0x21, 0x05, 0x21, 0x08, 0x01, 0x00, 0x00, 0x80, 0x40, 0x05, 0x21, 0x08, 0x01, 0x00, 0x00, 0x00, 0xe0, 0x03, 0x1e, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0xc0, 0x03, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x84, 0x20, 0x05, 0xff, 0x12, 0x01, 0x00, 0x00, 0x84, 0x20, 0x05, 0x10, 0x13, 0x01, 0x00, 0x00, 0x84, 0x20, 0x05, 0x10, 0x13, 0x01, 0x00, 0x00, 0x84, 0x20, 0x05, 0x00, 0x13, 0x01, 0x00, 0x00, 0xfc, 0x47, 0x03, 0x00, 0xfc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x07, 0x00, 0x00, 0xf8, 0x0f, 0x00, 0x00, 0x20, 0x00, 0x80, 0x00, 0x20, 0x00, 0x00, 0x00, 0x40, 0x20, 0x44, 0x00, 0x50, 0x00, 0x00, 0x00, 0x40, 0xe0, 0x57, 0x20, 0x50, 0x00, 0x00, 0x00, 0x40, 0x20, 0x80, 0xbf, 0x88, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x88, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x03, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x1f, 0x08, 0x01, 0x00, 0x00, 0x04, 0x04, 0x04, 0x20, 0x08, 0x01, 0x00, 0x00, 0xfc, 0xe7, 0x03, 0x20, 0x08, 0x01, 0x00, 0x00, 0x04, 0x00, 0x04, 0x20, 0x08, 0x01, 0x00, 0x00, 0x00, 0x00, 0x04, 0x1f, 0xf0, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x80, 0x03, 0x3f, 0x90, 0x00, 0x00, 0x00, 0x88, 0x40, 0x04, 0x10, 0x48, 0x01, 0x00, 0x00, 0x88, 0x40, 0x04, 0x20, 0x48, 0x01, 0x00, 0x00, 0x88, 0x40, 0x04, 0x20, 0x28, 0x01, 0x00, 0x00, 0x88, 0x40, 0x04, 0x20, 0x28, 0x01, 0x00, 0x00, 0x70, 0xf8, 0x07, 0x10, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x07, 0x30, 0x80, 0x01, 0x00, 0x00, 0xf8, 0x27, 0x00, 0x0c, 0x60, 0x00, 0x00, 0x00, 0x84, 0x20, 0x00, 0x03, 0x18, 0x00, 0x00, 0x00, 0x84, 0x20, 0x00, 0x0c, 0x60, 0x00, 0x00, 0x00, 0x04, 0x20, 0x00, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x07, 0x00, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x84, 0x90, 0x07, 0x21, 0x00, 0x00, 0x00, 0x00, 0x48, 0x48, 0x00, 0x23, 0x80, 0x00, 0x00, 0x00, 0x30, 0x48, 0x00, 0x25, 0xf0, 0x07, 0x00, 0x00, 0x30, 0x48, 0x00, 0x29, 0x08, 0x08, 0x00, 0x00, 0x48, 0x48, 0x00, 0x31, 0x08, 0x08, 0x00, 0x00, 0x84, 0xf0, 0x07, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x88, 0x01, 0x00, 0x00, 0x00, 0x20, 0x20, 0x80, 0x08, 0x00, 0x00, 0x00, 0xfc, 0x27, 0x20, 0x40, 0x08, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x1f, 0x40, 0x88, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02, 0x80, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, };
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/test/test.c	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,200 @@
+
+#include <SDL/SDL.h>
+#include <stdint.h>
+
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+#include "bytecode.h"
+#include <gfx/gfx.h>
+#include <gfx/pixel.h>
+#include <gfx/bytecode.h>
+
+#include "font_8x11.xbm"
+
+int main(int argc, char **argv)
+{
+  if (argc <= 1)
+    {
+      fprintf(stderr, "usage: %s bytecode.out\n", argv[0]);
+      return -1;
+    }
+
+  int fd = open(argv[1], O_RDONLY);
+  if (fd < 0)
+    {
+      fprintf(stderr, "bytecode file opened failed\n");
+      return -1;
+    }
+
+  /* Start SDL */
+  SDL_Init(SDL_INIT_EVERYTHING);
+
+  /* Set up screen */
+  SDL_Surface *screen;
+  switch (CONFIG_GFX_DEFAULT_L2BPP)
+    {
+    case 0 ... 3:
+      screen = SDL_SetVideoMode(512, 256, 8, SDL_SWSURFACE);
+      assert(screen->format->BitsPerPixel == 8);
+      break;
+    case 4:
+      screen = SDL_SetVideoMode(512, 256, 16, SDL_SWSURFACE);
+      assert(screen->format->BitsPerPixel == 16);
+      break;
+    case 5:
+      screen = SDL_SetVideoMode(512, 256, 32, SDL_SWSURFACE);
+      assert(screen->format->BitsPerPixel == 32);
+      break;
+    }
+
+  int quit = 0;
+
+  struct stat s;
+  if (fstat(fd, &s) < 0)
+    {
+      fprintf(stderr, "bytecode file stat failed\n");
+      return -1;
+    }
+
+  size_t blob_len = s.st_size;
+  void *blob = mmap(NULL, blob_len, PROT_READ, MAP_SHARED, fd, 0);
+  if (blob == MAP_FAILED)
+    {
+      fprintf(stderr, "bytecode mmap failed: %s\n", strerror(errno));
+      return -1;
+    }
+
+  struct bc_descriptor_s desc;
+
+  if (bc_load(&desc, blob, blob_len))
+    {
+      fprintf(stderr, "bytecode loading failed\n");
+      return -1;
+    }
+
+  struct bc_context_s vm;
+  bc_init(&vm, &desc);
+  bc_set_trace(&vm, 1, 1);
+
+  static struct gfx_bc_context_s ctx;
+  size_t size = 0x100000;
+  void *data = malloc(size);
+  gfx_bc_init(&ctx);
+
+  memcpy(data, font_8x11_bits, font_8x11_width * font_8x11_height / 8);
+  bc_set_reg(&vm, 0, (uintptr_t)data);
+
+  while (!quit)
+    {
+      SDL_Event event;
+      if (SDL_PollEvent(&event))
+	{
+	  switch (event.type)
+	    {
+	    case SDL_KEYDOWN:
+	      if (event.key.keysym.sym != SDLK_ESCAPE)
+		break;
+	    case SDL_QUIT:
+	      quit = 1;
+	    }
+	}
+
+      uint16_t op = bc_run(&vm);
+
+      if (!BC_STATUS_CUSTOM(op))
+	{
+	  switch (op)
+	    {
+	    case BC_RUN_STATUS_FAULT:
+	    case BC_RUN_STATUS_END:
+	      SDL_Delay(3000 /* ms */);
+	      quit = 1;
+	    case BC_RUN_STATUS_BREAK:
+	    case BC_RUN_STATUS_CYCLES:
+	      break;
+	    }
+	  continue;
+	}
+
+      if (GFX_BC_IS_GFX_OP(op))
+	{
+	  error_t err = gfx_bc_run(&vm, &ctx, op);
+	  if (err)
+	    {
+	      fprintf(stderr, "gfx bytecode %04x error %i, before:\n", op, err);
+	      bc_dump(&vm, 1);
+	      quit = 1;
+	      continue;
+	    }
+	}
+
+      switch (op & 0x7c00)
+        {
+        case 0x0800: {            /* display */
+          uint_fast8_t n = (op >> 4) & 3;
+
+          struct gfx_surface_s *s = ctx.s + n;
+
+          SDL_LockSurface(screen);
+          uint8_t *pdst = (uint8_t*)screen->pixels;
+          uint_fast16_t bytes = screen->format->BytesPerPixel;
+
+          uint_fast32_t ys = 0, ye = screen->h;
+          uint_fast32_t xs = 0, xe = screen->w;
+
+          if (op & 0x0040)
+            {
+              uint32_t p0 = bc_get_reg(&vm, op & 15);
+              ys = gfx_vector_yint(p0);
+              xs = gfx_vector_xint(p0);
+
+              uint32_t p1 = bc_get_reg(&vm, 15);
+              ye = ys + gfx_vector_yint(p1);
+              if (ye > (uint_fast16_t)screen->h)
+                ye = screen->h;
+
+              xs = xs + gfx_vector_xint(p1);
+              if (xe > (uint_fast16_t)screen->w)
+                xe = screen->w;
+            }
+
+          for (uint_fast16_t y = ys; y < ye; y++)
+            {
+              for (uint_fast16_t x = xs; x < xe; x++)
+                {
+                  gfx_pixel_t p = gfx_get_pixel_safe(s, x - xs, y - ys);
+
+                  switch (CONFIG_GFX_DEFAULT_L2BPP)
+                    {
+                    case 0 ... 3: {
+                      uint32_t h = 1 << gfx_fmt_desc[s->fmt].l2bpp;
+                      *pdst = (0x100 * p) >> h;
+                      break;
+                    }
+                    case 4:
+                      *(uint16_t*)pdst = p;
+                      break;
+                    case 5:
+                      *(uint32_t*)pdst = p;
+                      break;
+                    }
+                  pdst += bytes;
+                }
+              pdst += screen->pitch - screen->w * bytes;
+            }
+          SDL_UnlockSurface(screen);
+          SDL_Flip(screen);
+          SDL_Delay(30 /* ms */);
+          break;
+        }
+
+        }
+    }
+
+  SDL_Quit();
+
+  return 0;
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/test/test_arc.bc	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,36 @@
+
+.name test
+.custom gfx
+
+#include <gfx/bytecode.h>
+
+test_bytecode_entry:
+
+	.entry %0
+
+	cst32	    %1, GFX_SURFACE(512, 256, 0), 0
+	gfx_surface %0, %1, 0
+
+	cst32       %0, GFX_XY(128, 128), 0
+	cst32       %2, GFX_XY(0, 40), 0
+	cst32       %3, GFX_XY(1, 2), 0
+	cst32	    %14, 0x500000, 0
+
+loop:
+	gfx_clear 0
+ 	mov	    %15, %14
+	shi32r      %15, 16
+	gfx_arc_cw    %0, %2
+
+ 	mov	    %15, %14
+	shi32r      %15, 16
+	add8        %15, 5
+	gfx_arc_ccw %0, %2
+
+	gfx_disp    0
+	gfx_addv    %2, %3
+	add8        %14, 127
+
+	jmp8 loop
+
+	end
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/test/test_arith.bc	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,305 @@
+
+.name test
+.custom gfx
+
+#include <gfx/bytecode.h>
+
+test_bytecode_entry:
+
+    .entry %0
+    cst32	    %1, GFX_SURFACE(512, 256, 0), 0
+    gfx_surface     %0, %1, 0
+
+    // gfx_subv, GFX_XY
+    cst32       %0, GFX_XY(0x555, 0x555), 0
+    cst32       %1, GFX_XY(-1, 2), 0
+    gfx_subv     %0, %1
+    cst32       %2, GFX_XY(0x556, 0x553), 0
+    neq %0, %2
+     die
+
+    // gfx_addv, GFX_XY
+    gfx_addv     %0, %1
+    cst32       %2, GFX_XY(0x555, 0x555), 0
+    neq %0, %2
+     die
+
+    // gfx_unpacks, gfx_unpack, GFX_XYF
+    cst32       %0, GFX_XYF(0x1234, 0x5678), 0
+    gfx_unpack  %2, %3, %0
+    cst16       %1, 0x1234, 0
+    neq         %2, %1
+     die
+    cst16       %1, 0x5678, 0
+    neq         %3, %1
+     die
+
+    cst32       %0, GFX_XYF(0x9234, 0xa678), 0
+    gfx_unpack  %2, %3, %0
+    cst16       %1, 0x9234, 0
+    neq         %2, %1
+     die
+    cst16       %1, 0xa678, 0
+    neq         %3, %1
+     die
+
+    cst32       %0, GFX_XYF(0x1234, 0x5678), 0
+    gfx_unpacks %2, %3, %0
+    cst16       %1, 0x1234, 0
+    neq         %2, %1
+     die
+    cst16       %1, 0x5678, 0
+    neq         %3, %1
+     die
+
+    cst32       %0, GFX_XYF(0x9234, 0xa678), 0
+    gfx_unpacks %2, %3, %0
+    cst32       %1, 0xffff9234, 0
+    neq         %2, %1
+     die
+    cst32       %1, 0xffffa678, 0
+    neq         %3, %1
+     die
+
+    // gfx_pack
+    gfx_pack    %5, %2, %3
+    neq         %5, %0
+     die
+
+    // gfx_packx
+    cst32       %0, GFX_XYF(0x1234, 0x5678), 0
+    cst32       %1, GFX_XYF(0xabcd, 0x5678), 0
+    cst16       %2, 0xabcd, 0
+    gfx_packx   %0, %2
+    neq         %1, %0
+     die
+
+    // gfx_packx0
+    cst32       %0, GFX_XYF(0x1234, 0x5678), 0
+    cst32       %1, GFX_XYF(0xabcd, 0), 0
+    cst16       %2, 0xabcd, 0
+    gfx_packx0  %0, %2
+    neq         %1, %0
+     die
+
+    // gfx_packy
+    cst32       %0, GFX_XYF(0x1234, 0x5678), 0
+    cst32       %1, GFX_XYF(0x1234, 0xabcd), 0
+    cst16       %3, 0xabcd, 0
+    gfx_packy   %0, %3
+    neq         %1, %0
+     die
+
+    // gfx_pack0y
+    cst32       %0, GFX_XYF(0x1234, 0x5678), 0
+    cst32       %1, GFX_XYF(0, 0xabcd), 0
+    cst16       %3, 0xabcd, 0
+    gfx_pack0y  %0, %3
+    neq         %1, %0
+     die
+
+    // gfx_addv gfx_subv
+    cst16       %8, 0x7ffe, 0
+    cst8        %9, 255
+l1:
+    rand32      %0
+    and32       %0, %8
+    rand32      %1
+    and32       %1, %8
+    gfx_pack    %4, %0, %1
+
+    rand32      %2
+    and32       %2, %8
+    rand32      %3
+    and32       %3, %8
+    gfx_pack    %5, %2, %3
+
+    gfx_addv    %5, %4
+    add         %2, %0
+    add         %3, %1
+    gfx_pack    %6, %2, %3
+
+    neq         %5, %6
+     die
+
+    gfx_subv    %5, %4
+    sub         %2, %0
+    sub         %3, %1
+    gfx_pack    %6, %2, %3
+
+    neq         %5, %6
+     die
+
+    loop        %9, l1
+
+    // gfx_addx, gfx_addy, gfx_mulxy
+    cst8        %9, 255
+l2:
+    rand32      %0
+    exts        %0, 15
+    rand32      %1
+    exts        %1, 15
+    gfx_pack    %4, %0, %1
+
+    rand32      %2
+    exts        %2, 15
+
+    gfx_addx    %4, %2
+    add         %0, %2
+    gfx_pack    %6, %0, %1
+
+    neq         %4, %6
+     die
+
+    gfx_addy    %4, %2
+    add         %1, %2
+    gfx_pack    %6, %0, %1
+
+    neq         %4, %6
+     die
+
+    gfx_unpack  %0, %1, %4
+    gfx_mulxy   %4, %2
+    mul32       %0, %2
+    shi32r      %0, 5
+    mul32       %1, %2
+    shi32r      %1, 5
+    gfx_pack    %6, %0, %1
+
+    neq         %4, %6
+     die
+
+    loop        %9, l2
+
+    // gfx_addxi, gfx_addyi
+    cst8        %9, 255
+l3:
+    rand32      %0
+    exts        %0, 15
+    rand32      %1
+    exts        %1, 15
+    gfx_pack    %2, %0, %1
+
+    gfx_addxi   %2, 42
+    cst16       %5, 42 * 32, 0
+    add         %0, %5
+    gfx_pack    %6, %0, %1
+
+    neq         %2, %6
+     die
+
+    gfx_addxi   %2, -41
+    cst16       %5, 41 * 32, 0
+    sub         %0, %5
+    gfx_pack    %6, %0, %1
+
+    gfx_addyi   %2, 42
+    cst16       %5, 42 * 32, 0
+    add         %1, %5
+    gfx_pack    %6, %0, %1
+
+    neq         %2, %6
+     die
+
+    gfx_addyi   %2, -41
+    cst16       %5, 41 * 32, 0
+    sub         %1, %5
+    gfx_pack    %6, %0, %1
+
+    neq         %2, %6
+     die
+
+    loop        %9, l3
+
+    // gfx_negv
+    cst32       %2, GFX_XY(27, 55), 0
+    cst32       %3, GFX_XY(-27, -55), 0
+    gfx_negv    %2
+    neq         %2, %3
+     die
+
+    // gfx_negx_swpv
+    cst32       %2, GFX_XY(27, 55), 0
+    cst32       %3, GFX_XY(55, -27), 0
+    gfx_negx_swpv %2
+    neq         %2, %3
+     die
+
+    // gfx_mul
+    cst32       %1, 27<<5, 0
+    cst32       %2, 41<<5, 0
+    cst32       %3, (27*41)<<5, 0
+    gfx_mul     %1, %2
+    neq         %1, %3
+     die
+
+    cst32       %1, 27<<5, 0
+    neg         %1
+    cst32       %2, 41<<5, 0
+    cst32       %3, (27*41)<<5, 0
+    neg         %3
+    gfx_mul     %1, %2
+    neq         %1, %3
+     die
+
+    // gfx_size
+    gfx_size    %2, 0
+    cst32       %3, GFX_XY(512, 256), 0
+    neq         %2, %3
+     die
+
+    // gfx_hypot
+    cst32       %2, GFX_XY(-40, 30), 0
+    gfx_hypot   %2
+    cst32       %1, 50<<5, 0
+    neq         %2, %1
+     die
+
+    // gfx_sqrt
+    cst32       %2, 81<<5, 0
+    gfx_sqrt    %2
+    cst32       %1, 9<<5, 0
+    neq         %2, %1
+     die
+
+    // gfx_hypot, gfx_sincos
+    cst16       %15, 100 << 5, 0
+    cst8        %9, 255
+    cst8        %8, 2
+l4:
+    rand32      %1
+    gfx_sincos  %1, %15
+
+    gfx_hypot   %1
+    sub         %1, %15
+
+    tst32s      %1, 31   /* abs */
+     neg        %1
+
+    lt          %8, %1
+     die
+    loop        %9, l4
+
+    // gfx_hypot, gfx_sin, gfx_cos
+    cst8        %9, 255
+    cst8        %8, 2
+l5:
+    rand32      %4
+    mov         %5, %4
+    gfx_cos     %4, %15
+    gfx_sin     %5, %15
+    gfx_pack    %1, %4, %5
+
+    gfx_hypot   %1
+    sub         %1, %15
+
+    tst32s      %1, 31   /* abs */
+     neg        %1
+
+    lt          %8, %1
+     die
+    loop        %9, l5
+
+    end
+//    dump
+//    die
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/test/test_blit.bc	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,89 @@
+
+.name test
+.custom gfx
+
+#include <gfx/bytecode.h>
+
+test_bytecode_entry:
+
+	.entry %0
+
+	cst32	    %1, GFX_SURFACE(512, 256, 0), 0
+	gfx_surface %0, %1, 0
+
+	gfx_clear   0
+
+	/* fill surface with random bubbles */
+	cst8	    %5, 255
+r:
+	rand32      %2
+	cst32       %7, GFX_XY(511, 255), 0
+	and32	    %7, %2
+
+	rand32      %2
+	cst8        %15, 15
+	and32	    %15, %2
+	gfx_circle  %7, 15
+
+	loop	    %5, r
+
+	gfx_swap    0, 1
+
+loop:
+	cst32	    %10, 0x20000, 0
+	add         %10, %0
+	gfx_surface %10, %1, 0
+
+	cst32       %3, GFX_XY(1, 1), 0
+	cst32       %7, GFX_XY(0, 0), 0
+	cst32       %2, GFX_XY(250, 0), 0
+	cst32       %15, GFX_XY(100, 100), 0
+
+        cst8        %5, 64
+loop1:
+	gfx_clear 0
+	gfx_blit    %7, %2, 1
+
+	mov 	    %4, %7
+	gfx_addv    %4, %15
+	gfx_rect    %4, %7
+
+	gfx_disp    0
+
+	gfx_addv    %7, %3
+
+        loop %5, loop1
+
+        cst8        %5, 64
+loop2:
+	gfx_clear 0
+	gfx_blit    %7, %2, 1
+
+	mov 	    %4, %7
+	gfx_addv    %4, %15
+	gfx_rect    %4, %7
+
+	gfx_disp    0
+
+	gfx_addv    %2, %3
+
+        loop %5, loop2
+
+loop3:
+	gfx_clear 0
+	gfx_blit    %7, %2, 1
+
+	mov 	    %4, %7
+	add	    %4, %15
+	gfx_rect    %4, %7
+
+ 	gfx_disp    0
+
+	gfx_subv    %15, %3
+
+	neq0	    %15
+	 jmp8 loop3
+
+ 	gfx_disp    1
+//	jmp8	loop
+	end
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/test/test_circle.bc	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,50 @@
+
+.name test
+.custom gfx
+
+#include <gfx/bytecode.h>
+
+test_bytecode_entry:
+
+	.entry %0
+
+	cst32	    %1, GFX_SURFACE(512, 256, 0), 0
+	gfx_surface %0, %1, 0
+
+	cst32       %0, GFX_XY(128, 128), 0
+	cst32       %6, GFX_XY(384, 128), 0
+
+	cst8        %15, 50
+	cst8	    %14, 1
+
+loop:
+	neg	%14
+loop2:
+	gfx_clear 0
+
+	gfx_circle_i  %0, 1
+	gfx_circle_f  %0, 2
+	gfx_circle_i  %0, 4
+	gfx_circle_f  %0, 8
+	gfx_circle_f  %6, 1
+	gfx_circle_i  %6, 2
+	gfx_circle_f  %6, 4
+	gfx_circle_i  %6, 8
+
+	add8        %15, 20
+	gfx_circle  %0, 10
+	gfx_circle  %6, 5
+	add8        %15, -40
+	gfx_circle  %6, 10
+	gfx_circle  %0, 5
+	add8        %15, 20
+
+	gfx_disp 0
+
+	add	%15, %14
+	tst32c  %15, 7
+	 jmp8 loop2
+
+	jmp8 loop
+
+	end
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/test/test_line.bc	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,72 @@
+
+.name test
+.custom gfx
+
+#include <gfx/bytecode.h>
+
+test_bytecode_entry:
+
+	.entry %0
+
+	cst32	    %1, GFX_SURFACE(512, 256, 0), 0
+	gfx_surface %0, %1, 0
+
+loop:
+	cst32       %2, GFX_XY(20, 20), 0
+	cst32       %13, GFX_XY(20, 180), 0
+
+	cst32       %3, GFX_XY(180, 180), 0
+
+loop2:
+	gfx_clear 0
+	gfx_line    %2, %3
+	gfx_disp    0
+
+	gfx_addyi   %2, 1
+	gfx_addyi   %3, -1
+
+	neq         %2, %13
+	jmp8 loop2
+
+	cst32       %13, GFX_XY(180, 180), 0
+
+loop3:
+	gfx_clear 0
+	gfx_line    %2, %3
+	gfx_disp    0
+
+	gfx_addxi   %2, 1
+	gfx_addxi   %3, -1
+
+	neq         %2, %13
+	jmp8 loop3
+
+	cst32       %13, GFX_XY(180, 20), 0
+
+loop4:
+	gfx_clear 0
+	gfx_line    %2, %3
+	gfx_disp    0
+
+	gfx_addyi   %2, -1
+	gfx_addyi   %3, 1
+
+	neq         %2, %13
+	jmp8 loop4
+
+	cst32       %13, GFX_XY(20, 20), 0
+
+loop5:
+	gfx_clear 0
+	gfx_line    %2, %3
+	gfx_disp    0
+
+	gfx_addxi   %2, -1
+	gfx_addxi   %3, 1
+
+	neq         %2, %13
+	jmp8 loop5
+
+	jmp8 loop
+
+	end
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/test/test_rect.bc	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,43 @@
+
+.name test
+.custom gfx
+
+#include <gfx/bytecode.h>
+
+test_bytecode_entry:
+
+	.entry %0
+
+	cst32	    %1, GFX_SURFACE(512, 256, 0), 0
+	gfx_surface %0, %1, 0
+
+//	gfx_attr_l8    255
+
+loop:
+	cst32       %0, GFX_XY(100, 100), 0
+	cst32       %7, GFX_XY(300, 230), 0
+	cst32       %6, GFX_XY(384, 128), 0
+	cst32       %2, GFX_XY(0, 40), 0
+	cst32       %3, GFX_XY(1, 2), 0
+	cst32	    %14, 0x500000, 0
+	cst8        %15, 0
+
+loop2:
+	gfx_clear 100
+	gfx_rect_r %7, %0
+	gfx_disp 0
+	add8        %15, 1
+	tst32c	    %15, 6
+	jmp8 loop2
+
+loop3:
+	add8        %15, -1
+	gfx_clear 100
+	gfx_rect_fr %7, %0
+	gfx_disp 0
+	neq0	    %15
+	 jmp8 loop3
+
+	jmp8 loop
+
+	end
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/test/test_scroll.bc	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,110 @@
+
+.name test
+.custom gfx
+
+#include <gfx/bytecode.h>
+
+#define W 512
+#define H 256
+#define A 3     // scroll step
+
+test_bytecode_entry:
+
+    .entry %0
+
+    cst32	    %1, GFX_SURFACE(W, H, 0), 0
+    gfx_surface %0, %1, 0
+
+    gfx_clear   0
+
+    /* fill surface with random bubbles */
+    cst8	    %5, 255
+r:
+    rand32      %2
+    cst32       %7, GFX_XY(511, 255), 0
+    and32	    %7, %2
+
+    rand32      %2
+    cst8        %15, 15
+    and32	    %15, %2
+    gfx_circle  %7, 15
+
+    loop	    %5, r
+
+d:
+
+    cst8	    %5, 32
+d0:
+    gfx_disp    0
+
+    // wrap
+    cst32       %15, GFX_XY(A, H), 0
+    cst32       %1, GFX_XY(W-A, 0), 0
+    cst32       %2, GFX_XY(0, 0), 0
+    gfx_blit    %1, %2, 0
+
+    // scroll left
+    cst32       %15, GFX_XY(W-A, H), 0
+    cst32       %1, GFX_XY(0, 0), 0
+    cst32       %2, GFX_XY(A, 0), 0
+    gfx_blit_o  %1, %2
+
+    loop        %5, d0
+
+    cst8	%5, 32
+d2:
+    gfx_disp    0
+
+    // wrap
+    cst32       %15, GFX_XY(W, A), 0
+    cst32       %1, GFX_XY(0, 0), 0
+    cst32       %2, GFX_XY(0, H-A), 0
+    gfx_blit    %1, %2, 0
+
+    // scroll down
+    cst32       %15, GFX_XY(W, H-A), 0
+    cst32       %1, GFX_XY(0, A), 0
+    cst32       %2, GFX_XY(0, 0), 0
+    gfx_blit_o  %1, %2
+
+    loop        %5, d2
+
+    cst8	%5, 32
+d1:
+    gfx_disp    0
+
+    // wrap
+    cst32       %15, GFX_XY(A, H), 0
+    cst32       %1, GFX_XY(0, 0), 0
+    cst32       %2, GFX_XY(W-A, 0), 0
+    gfx_blit    %1, %2, 0
+
+    // scroll right
+    cst32       %15, GFX_XY(W-A, H), 0
+    cst32       %1, GFX_XY(A, 0), 0
+    cst32       %2, GFX_XY(0, 0), 0
+    gfx_blit_o  %1, %2
+
+    loop        %5, d1
+
+    cst8	%5, 32
+d3:
+    gfx_disp    0
+
+    // wrap
+    cst32       %15, GFX_XY(W, A), 0
+    cst32       %1, GFX_XY(0, H-A), 0
+    cst32       %2, GFX_XY(0, 0), 0
+    gfx_blit    %1, %2, 0
+
+    // scroll up
+    cst32       %15, GFX_XY(W, H-A), 0
+    cst32       %1, GFX_XY(0, 0), 0
+    cst32       %2, GFX_XY(0, A), 0
+    gfx_blit_o  %1, %2
+
+    loop        %5, d3
+
+    jmp8        d
+
+    end
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgfx/test/test_tile.bc	Sun May 27 18:45:35 2018 +0200
@@ -0,0 +1,85 @@
+
+.name test
+.custom gfx
+
+#include <gfx/bytecode.h>
+
+test_bytecode_entry:
+
+    .entry %0
+
+    cst32	    %1, GFX_SURFACE(512, 256, 0), 0
+    cst32	    %10, 0x20000, 0
+    add         %10, %0
+    gfx_surface %10, %1, 0
+
+    cst32	    %1, GFX_SURFACE(32, 256, 0), 0
+    gfx_surface %0, %1, 1
+
+    cst32	    %2, GFX_TILEMAP(8, 11, 32), 0
+    gfx_tilemap %2, 1
+
+    cst16	    %4, GFX_XY(0, 128), 0
+    cst32	    %5, GFX_XY(511, 128), 0
+    gfx_line %4, %5
+
+    cst16	    %4, GFX_XY(128, 0), 0
+    cst16	    %5, GFX_XY(128, 255), 0
+    gfx_line %4, %5
+
+    cst16	    %4, GFX_XY(0, 64), 0
+    cst32	    %5, GFX_XY(511, 64), 0
+    gfx_line %4, %5
+
+    cst16	    %4, GFX_XY(64, 0), 0
+    cst32	    %5, GFX_XY(64, 255), 0
+    gfx_line %4, %5
+
+    cst32	    %4, GFX_XY(64, 64), 0
+    laddr16     %3, str
+
+    ld8i %15, %3   /* read and skip the len byte */
+
+    gfx_tilestr %4, %3, %15, 0
+    gfx_tilestr %4, %3, %15, 1
+    gfx_tilestr %4, %3, %15, 2
+    gfx_tilestr %4, %3, %15, 3
+
+    cst32	    %4, GFX_XY(128, 128), 0
+    gfx_tilestrc %4, %3, %15, 0
+
+    cst32	    %4, GFX_XY(128, 140), 0
+    gfx_tilestrc %4, %3, %15, 1
+
+    cst32	    %4, GFX_XY(128, 128), 0
+    gfx_tilestrc %4, %3, %15, 2
+
+    cst32	    %4, GFX_XY(140, 128), 0
+    gfx_tilestrc %4, %3, %15, 3
+
+    cst16	    %4, GFX_XY(0, 32), 0
+    cst16	    %5, GFX_XY(64, 32), 0
+    gfx_line %4, %5
+
+    cst16	    %4, GFX_XY(32, 0), 0
+    cst16	    %5, GFX_XY(32, 255), 0
+    gfx_line %4, %5
+
+    cst8 %0, '?'
+    cst16	    %4, GFX_XY(32, 32), 0
+    gfx_tile %4, %0
+
+    cst16	    %4, GFX_XY(0, 96), 0
+    cst16	    %5, GFX_XY(64, 96), 0
+    gfx_line %4, %5
+
+    cst8 %0, '?'
+    cst16	    %4, GFX_XY(32, 96), 0
+    gfx_tilec %4, %0
+
+    gfx_disp    0
+
+    end
+
+str:
+	strp "Miaou miaou !!!"  /* pascal type string */