View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2007-2022, University of Amsterdam
    7                              VU University Amsterdam
    8                              SWI-Prolog Solutions b.v.
    9    All rights reserved.
   10
   11    Redistribution and use in source and binary forms, with or without
   12    modification, are permitted provided that the following conditions
   13    are met:
   14
   15    1. Redistributions of source code must retain the above copyright
   16       notice, this list of conditions and the following disclaimer.
   17
   18    2. Redistributions in binary form must reproduce the above copyright
   19       notice, this list of conditions and the following disclaimer in
   20       the documentation and/or other materials provided with the
   21       distribution.
   22
   23    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   24    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   25    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   26    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   27    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   28    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   29    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   30    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   31    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   33    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   34    POSSIBILITY OF SUCH DAMAGE.
   35*/
   36
   37:- module(base64,
   38          [ base64_encoded/3,		% ?Plain, ?Encoded, +Options
   39            base64_encoded//2,          % ?Plain, +Options
   40
   41            base64/2,                   % ?PlainText, ?Encoded
   42            base64//1,                  % ?PlainText
   43
   44            base64url/2,                % ?PlainText, ?Encoded
   45            base64url//1                % ?PlainText
   46          ]).   47:- autoload(library(error),
   48	    [instantiation_error/1,must_be/2,syntax_error/1]).   49:- autoload(library(option),[option/3]).   50
   51:- encoding(utf8).
   52
   53/** <module> Base64 encoding and decoding
   54
   55Prolog-based base64 encoding using  DCG   rules.  Encoding  according to
   56rfc2045. For example:
   57
   58==
   591 ?- base64('Hello World', X).
   60X = 'SGVsbG8gV29ybGQ='.
   61
   622 ?- base64(H, 'SGVsbG8gV29ybGQ=').
   63H = 'Hello World'.
   64==
   65
   66The Base64URL encoding provides a URL and file name friendly alternative
   67to base64. Base64URL encoded strings do not contain white space.
   68
   69@tbd    Stream I/O
   70@tbd    White-space introduction and parsing
   71@tbd	Encoding support (notably UTF-8)
   72@bug	Base64 only works with _bytes_.  The grammars do not check
   73        the input to be in the range 0..255.
   74*/
   75
   76%!  base64_encoded(+Plain, -Encoded, +Options) is det.
   77%!  base64_encoded(-Plain, +Encoded, +Options) is det.
   78%
   79%   General the base64 encoding and   decoding.  This predicate subsumes
   80%   base64/2  and  base64url/2,  providing  control  over  padding,  the
   81%   characters used for encoding and the output type. Options:
   82%
   83%     - charset(+Charset)
   84%     Define the encoding character set to use.  The (default) `classic`
   85%     uses the classical rfc2045 characters.  The value `url` uses URL
   86%     and file name friendly characters.  See base64url/2.
   87%     - padding(+Boolean)
   88%     If `true` (default), the output is padded with `=` characters.
   89%     - as(+Type)
   90%     Defines the type of the output.  One of `string` (default) or
   91%     `atom`.
   92%     - encoding(+Encoding)
   93%     Encoding to use for translation between (Unicode) text and
   94%     _bytes_ (Base64 is an encoding for bytes).  Default is `utf8`.
   95%
   96%   @arg Plain is an atom or string containing the unencoded (plain)
   97%   text.
   98%   @arg Encoded is an atom or string containing the base64 encoded
   99%   version of Plain.
  100
  101base64_encoded(Plain, Encoded, Options) :-
  102    option(charset(CharSet), Options, classic),
  103    option(padding(Padding), Options, true),
  104    option(as(As), Options, string),
  105    option(encoding(Enc), Options, utf8),
  106    (   nonvar(Plain)
  107    ->  string_bytes(Plain, PlainBytes, Enc),
  108        phrase(base64(Padding, PlainBytes, CharSet), EncCodes),
  109        as(As, Encoded, EncCodes, iso_latin_1)
  110    ;   nonvar(Encoded)
  111    ->  string_bytes(Encoded, EncCodes, iso_latin_1),
  112        phrase(base64(Padding, PlainBytes, CharSet), EncCodes),
  113        as(As, Plain, PlainBytes, Enc)
  114    ;   instantiation_error(base64(Plain, Encoded))
  115    ).
  116
  117as(atom, Atom, Codes, Enc) :-
  118    !,
  119    string_bytes(String, Codes, Enc),
  120    atom_string(Atom, String).
  121as(string, String, Codes, Enc) :-
  122    !,
  123    string_bytes(String, Codes, Enc).
  124as(As, _, _, _) :-
  125    must_be(oneof([atom,string]), As).
  126
  127%!  base64(+Plain, -Encoded) is det.
  128%!  base64(-Plain, +Encoded) is det.
  129%
  130%   Equivalent  to  base64_encoded/3  using  the  options  as(atom)  and
  131%   encoding(iso_latin_1).
  132%
  133%   @deprecated  New  code  should  use  base64_encoded/3.  Notably  the
  134%   `iso_latin_1` should be `utf8` in most today's applications.
  135
  136base64(Plain, Encoded) :-
  137    base64_encoded(Plain, Encoded, [ as(atom), encoding(iso_latin_1) ]).
  138
  139%!  base64url(+Plain, -Encoded) is det.
  140%!  base64url(-Plain, +Encoded) is det.
  141%
  142%   Translates between plaintext  and  base64url   encoded  atom  or
  143%   string. Base64URL encoded values can safely  be used as URLs and
  144%   file names. The use "-" instead of   "+", "_" instead of "/" and
  145%   do not use padding. This implies   that the encoded value cannot
  146%   be embedded inside a longer string.
  147%
  148%   Equivalent  to  base64_encoded/3   using    the   options  as(atom),
  149%   encoding(utf8) and charset(url).
  150
  151base64url(Plain, Encoded) :-
  152    base64_encoded(Plain, Encoded,
  153                   [ as(atom),
  154                     encoding(utf8),
  155                     charset(url)
  156                   ]).
  157
  158%!  base64_encoded(+PlainText, +Options)// is det.
  159%!  base64_encoded(-PlainText, +Options)// is det.
  160
  161base64_encoded(PlainText, Options) -->
  162    { option(charset(CharSet), Options, classic),
  163      option(padding(Padding), Options, true)
  164    },
  165    base64(Padding, PlainText, CharSet).
  166
  167
  168%!  base64(+PlainText)// is det.
  169%!  base64(-PlainText)// is det.
  170%
  171%   Encode/decode list of character codes using _base64_.  See also
  172%   base64/2.
  173
  174base64(PlainText) -->
  175    base64(true, PlainText, classic).
  176
  177%!  base64url(+PlainText)// is det.
  178%!  base64url(-PlainText)// is det.
  179%
  180%   Encode/decode list of character codes  using Base64URL. See also
  181%   base64url/2.
  182
  183base64url(PlainText) -->
  184    base64(false, PlainText, url).
  185
  186base64(Padded, Input, Charset) -->
  187    { nonvar(Input) },
  188    !,
  189    encode(Padded, Input, Charset).
  190base64(Padded, Output, Charset) -->
  191    decode(Padded, Output, Charset).
  192
  193                 /*******************************
  194                 *            ENCODING          *
  195                 *******************************/
  196
  197%!  encode(+Padded, +PlainText, +Charset)//
  198
  199encode(Padded, [I0, I1, I2|Rest], Charset) -->
  200    !,
  201    [O0, O1, O2, O3],
  202    { A is (I0<<16)+(I1<<8)+I2,
  203      O00 is (A>>18) /\ 0x3f,
  204      O01 is (A>>12) /\ 0x3f,
  205      O02 is  (A>>6) /\ 0x3f,
  206      O03 is       A /\ 0x3f,
  207      base64_char(Charset, O00, O0),
  208      base64_char(Charset, O01, O1),
  209      base64_char(Charset, O02, O2),
  210      base64_char(Charset, O03, O3)
  211    },
  212    encode(Padded, Rest, Charset).
  213encode(true, [I0, I1], Charset) -->
  214    !,
  215    [O0, O1, O2, 0'=],
  216    { A is (I0<<16)+(I1<<8),
  217      O00 is (A>>18) /\ 0x3f,
  218      O01 is (A>>12) /\ 0x3f,
  219      O02 is  (A>>6) /\ 0x3f,
  220      base64_char(Charset, O00, O0),
  221      base64_char(Charset, O01, O1),
  222      base64_char(Charset, O02, O2)
  223    }.
  224encode(true, [I0], Charset) -->
  225    !,
  226    [O0, O1, 0'=, 0'=],
  227    { A is (I0<<16),
  228      O00 is (A>>18) /\ 0x3f,
  229      O01 is (A>>12) /\ 0x3f,
  230      base64_char(Charset, O00, O0),
  231      base64_char(Charset, O01, O1)
  232    }.
  233encode(false, [I0, I1], Charset) -->
  234    !,
  235    [O0, O1, O2],
  236    { A is (I0<<16)+(I1<<8),
  237      O00 is (A>>18) /\ 0x3f,
  238      O01 is (A>>12) /\ 0x3f,
  239      O02 is  (A>>6) /\ 0x3f,
  240      base64_char(Charset, O00, O0),
  241      base64_char(Charset, O01, O1),
  242      base64_char(Charset, O02, O2)
  243    }.
  244encode(false, [I0], Charset) -->
  245    !,
  246    [O0, O1],
  247    { A is (I0<<16),
  248      O00 is (A>>18) /\ 0x3f,
  249      O01 is (A>>12) /\ 0x3f,
  250      base64_char(Charset, O00, O0),
  251      base64_char(Charset, O01, O1)
  252    }.
  253encode(_, [], _) -->
  254    [].
  255
  256
  257                 /*******************************
  258                 *            DECODE            *
  259                 *******************************/
  260
  261%!  decode(+Padded, -PlainText, +Charset)//
  262
  263decode(true, Text, Charset) -->
  264    [C0, C1, C2, C3],
  265    !,
  266    { base64_char(Charset, B0, C0),
  267      base64_char(Charset, B1, C1)
  268    },
  269    !,
  270    {   C3 == 0'=
  271    ->  (   C2 == 0'=
  272        ->  A is (B0<<18) + (B1<<12),
  273            I0 is (A>>16) /\ 0xff,
  274            Text = [I0|Rest]
  275        ;   base64_char(Charset, B2, C2)
  276        ->  A is (B0<<18) + (B1<<12) + (B2<<6),
  277            I0 is (A>>16) /\ 0xff,
  278            I1 is  (A>>8) /\ 0xff,
  279            Text = [I0,I1|Rest]
  280        )
  281    ;   base64_char(Charset, B2, C2),
  282        base64_char(Charset, B3, C3)
  283    ->  A is (B0<<18) + (B1<<12) + (B2<<6) + B3,
  284        I0 is (A>>16) /\ 0xff,
  285        I1 is  (A>>8) /\ 0xff,
  286        I2 is      A  /\ 0xff,
  287        Text = [I0,I1,I2|Rest]
  288    },
  289    decode(true, Rest, Charset).
  290decode(false, Text, Charset) -->
  291    [C0, C1, C2, C3],
  292    !,
  293    { base64_char(Charset, B0, C0),
  294      base64_char(Charset, B1, C1),
  295      base64_char(Charset, B2, C2),
  296      base64_char(Charset, B3, C3),
  297      A is (B0<<18) + (B1<<12) + (B2<<6) + B3,
  298      I0 is (A>>16) /\ 0xff,
  299      I1 is  (A>>8) /\ 0xff,
  300      I2 is      A  /\ 0xff,
  301      Text = [I0,I1,I2|Rest]
  302    },
  303    decode(false, Rest, Charset).
  304decode(false, Text, Charset) -->
  305    [C0, C1, C2],
  306    !,
  307    { base64_char(Charset, B0, C0),
  308      base64_char(Charset, B1, C1),
  309      base64_char(Charset, B2, C2),
  310      A is (B0<<18) + (B1<<12) + (B2<<6),
  311      I0 is (A>>16) /\ 0xff,
  312      I1 is  (A>>8) /\ 0xff,
  313      Text = [I0,I1]
  314    }.
  315decode(false, Text, Charset) -->
  316    [C0, C1],
  317    !,
  318    { base64_char(Charset, B0, C0),
  319      base64_char(Charset, B1, C1),
  320      A is (B0<<18) + (B1<<12),
  321      I0 is (A>>16) /\ 0xff,
  322      Text = [I0]
  323    }.
  324decode(_, [], _) -->
  325    [].
  326
  327
  328
  329                 /*******************************
  330                 *   BASIC CHARACTER ENCODING   *
  331                 *******************************/
  332
  333base64_char(00, 0'A).
  334base64_char(01, 0'B).
  335base64_char(02, 0'C).
  336base64_char(03, 0'D).
  337base64_char(04, 0'E).
  338base64_char(05, 0'F).
  339base64_char(06, 0'G).
  340base64_char(07, 0'H).
  341base64_char(08, 0'I).
  342base64_char(09, 0'J).
  343base64_char(10, 0'K).
  344base64_char(11, 0'L).
  345base64_char(12, 0'M).
  346base64_char(13, 0'N).
  347base64_char(14, 0'O).
  348base64_char(15, 0'P).
  349base64_char(16, 0'Q).
  350base64_char(17, 0'R).
  351base64_char(18, 0'S).
  352base64_char(19, 0'T).
  353base64_char(20, 0'U).
  354base64_char(21, 0'V).
  355base64_char(22, 0'W).
  356base64_char(23, 0'X).
  357base64_char(24, 0'Y).
  358base64_char(25, 0'Z).
  359base64_char(26, 0'a).
  360base64_char(27, 0'b).
  361base64_char(28, 0'c).
  362base64_char(29, 0'd).
  363base64_char(30, 0'e).
  364base64_char(31, 0'f).
  365base64_char(32, 0'g).
  366base64_char(33, 0'h).
  367base64_char(34, 0'i).
  368base64_char(35, 0'j).
  369base64_char(36, 0'k).
  370base64_char(37, 0'l).
  371base64_char(38, 0'm).
  372base64_char(39, 0'n).
  373base64_char(40, 0'o).
  374base64_char(41, 0'p).
  375base64_char(42, 0'q).
  376base64_char(43, 0'r).
  377base64_char(44, 0's).
  378base64_char(45, 0't).
  379base64_char(46, 0'u).
  380base64_char(47, 0'v).
  381base64_char(48, 0'w).
  382base64_char(49, 0'x).
  383base64_char(50, 0'y).
  384base64_char(51, 0'z).
  385base64_char(52, 0'0).
  386base64_char(53, 0'1).
  387base64_char(54, 0'2).
  388base64_char(55, 0'3).
  389base64_char(56, 0'4).
  390base64_char(57, 0'5).
  391base64_char(58, 0'6).
  392base64_char(59, 0'7).
  393base64_char(60, 0'8).
  394base64_char(61, 0'9).
  395base64_char(62, 0'+).
  396base64_char(63, 0'/).
  397
  398base64url_char_x(62, 0'-).
  399base64url_char_x(63, 0'_).
  400
  401base64_char(classic, Value, Char) :-
  402    (   base64_char(Value, Char)
  403    ->  true
  404    ;   syntax_error(base64_char(Value, Char))
  405    ).
  406base64_char(url, Value, Char) :-
  407    (   base64url_char_x(Value, Char)
  408    ->  true
  409    ;   base64_char(Value, Char)
  410    ->  true
  411    ;   syntax_error(base64_char(Value, Char))
  412    ).
  413
  414
  415                 /*******************************
  416                 *            MESSAGES          *
  417                 *******************************/
  418
  419:- multifile prolog:error_message//1.  420
  421prolog:error_message(syntax_error(base64_char(_D,E))) -->
  422    { nonvar(E) },
  423    !,
  424    [ 'Illegal Base64 character: "~c"'-[E] ]