View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2002-2022, University of Amsterdam
    7                              VU University Amsterdam
    8                              CWI, Amsterdam
    9                              SWI-Prolog Solutions b.v.
   10    All rights reserved.
   11
   12    Redistribution and use in source and binary forms, with or without
   13    modification, are permitted provided that the following conditions
   14    are met:
   15
   16    1. Redistributions of source code must retain the above copyright
   17       notice, this list of conditions and the following disclaimer.
   18
   19    2. Redistributions in binary form must reproduce the above copyright
   20       notice, this list of conditions and the following disclaimer in
   21       the documentation and/or other materials provided with the
   22       distribution.
   23
   24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   25    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   26    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   27    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   28    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   29    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   30    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   31    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   32    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   34    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   35    POSSIBILITY OF SUCH DAMAGE.
   36*/
   37
   38:- module(http_open,
   39          [ http_open/3,                % +URL, -Stream, +Options
   40            http_set_authorization/2,   % +URL, +Authorization
   41            http_close_keep_alive/1     % +Address
   42          ]).   43:- autoload(library(aggregate),[aggregate_all/3]).   44:- autoload(library(apply),[foldl/4,include/3]).   45:- autoload(library(base64),[base64/3]).   46:- autoload(library(debug),[debug/3,debugging/1]).   47:- autoload(library(error),
   48	    [ domain_error/2, must_be/2, existence_error/2, instantiation_error/1
   49	    ]).   50:- autoload(library(lists),[last/2,member/2]).   51:- autoload(library(option),
   52	    [ meta_options/3, option/2, select_option/4, merge_options/3,
   53	      option/3, select_option/3
   54	    ]).   55:- autoload(library(readutil),[read_line_to_codes/2]).   56:- autoload(library(uri),
   57	    [ uri_resolve/3, uri_components/2, uri_data/3,
   58              uri_authority_components/2, uri_authority_data/3,
   59	      uri_encoded/3, uri_query_components/2, uri_is_global/1
   60	    ]).   61:- autoload(library(http/http_header),
   62            [ http_parse_header/2, http_post_data/3 ]).   63:- autoload(library(http/http_stream),[stream_range_open/3]).   64:- if(exists_source(library(ssl))).   65:- autoload(library(ssl), [ssl_upgrade_legacy_options/2]).   66:- endif.   67:- use_module(library(socket)).   68
   69
   70/** <module> HTTP client library
   71
   72This library defines http_open/3, which opens an URL as a Prolog stream.
   73The functionality of the  library  can   be  extended  by  loading two
   74additional modules that act as plugins:
   75
   76    * library(http/http_ssl_plugin)
   77    Loading this library causes http_open/3 to handle HTTPS connections.
   78    Relevant options for SSL certificate handling are handed to
   79    ssl_context/3. This plugin is loaded automatically if the scheme
   80    `https` is requested using a default SSL context. See the plugin for
   81    additional information regarding security.
   82
   83    * library(zlib)
   84    Loading this library supports the `gzip` transfer encoding.  This
   85    plugin is lazily loaded if a connection is opened that claims this
   86    transfer encoding.
   87
   88    * library(http/http_cookie)
   89    Loading this library adds tracking cookies to http_open/3. Returned
   90    cookies are collected in the Prolog database and supplied for
   91    subsequent requests.
   92
   93    * library(http/http_stream)
   94    This library adds support for _chunked_ encoding and makes the
   95    http_open/3 advertise itself as HTTP/1.1 instead of HTTP/1.0.
   96
   97
   98Here is a simple example to fetch a web-page:
   99
  100```
  101?- http_open('http://www.google.com/search?q=prolog', In, []),
  102   copy_stream_data(In, user_output),
  103   close(In).
  104<!doctype html><head><title>prolog - Google Search</title><script>
  105...
  106```
  107
  108The example below fetches the modification time of a web-page. Note that
  109=|Modified|= is =|''|= (the empty atom) if the  web-server does not provide a
  110time-stamp for the resource. See also parse_time/2.
  111
  112```
  113modified(URL, Stamp) :-
  114       http_open(URL, In,
  115                 [ method(head),
  116                   header(last_modified, Modified)
  117                 ]),
  118       close(In),
  119       Modified \== '',
  120       parse_time(Modified, Stamp).
  121```
  122
  123Then next example uses Google search. It exploits library(uri) to manage
  124URIs, library(sgml) to load  an  HTML   document  and  library(xpath) to
  125navigate the parsed HTML. Note that  you   may  need to adjust the XPath
  126queries if the data returned by Google changes (this example indeed
  127no longer works and currently fails at the first xpath/3 call)
  128
  129```
  130:- use_module(library(http/http_open)).
  131:- use_module(library(xpath)).
  132:- use_module(library(sgml)).
  133:- use_module(library(uri)).
  134
  135google(For, Title, HREF) :-
  136        uri_encoded(query_value, For, Encoded),
  137        atom_concat('http://www.google.com/search?q=', Encoded, URL),
  138        http_open(URL, In, []),
  139        call_cleanup(
  140            load_html(In, DOM, []),
  141            close(In)),
  142        xpath(DOM, //h3(@class=r), Result),
  143        xpath(Result, //a(@href=HREF0, text), Title),
  144        uri_components(HREF0, Components),
  145        uri_data(search, Components, Query),
  146        uri_query_components(Query, Parts),
  147        memberchk(q=HREF, Parts).
  148```
  149
  150An example query is below:
  151
  152```
  153?- google(prolog, Title, HREF).
  154Title = 'SWI-Prolog',
  155HREF = 'http://www.swi-prolog.org/' ;
  156Title = 'Prolog - Wikipedia',
  157HREF = 'https://nl.wikipedia.org/wiki/Prolog' ;
  158Title = 'Prolog - Wikipedia, the free encyclopedia',
  159HREF = 'https://en.wikipedia.org/wiki/Prolog' ;
  160Title = 'Pro-Log is logistiek dienstverlener m.b.t. vervoer over water.',
  161HREF = 'http://www.pro-log.nl/' ;
  162Title = 'Learn Prolog Now!',
  163HREF = 'http://www.learnprolognow.org/' ;
  164Title = 'Free Online Version - Learn Prolog
  165...
  166```
  167
  168@see load_html/3 and xpath/3 can be used to parse and navigate HTML
  169     documents.
  170@see http_get/3 and http_post/4 provide an alternative interface that
  171     convert the reply depending on the =|Content-Type|= header.
  172*/
  173
  174:- multifile
  175    http:encoding_filter/3,           % +Encoding, +In0, -In
  176    http:current_transfer_encoding/1, % ?Encoding
  177    http:disable_encoding_filter/1,   % +ContentType
  178    http:http_protocol_hook/5,        % +Protocol, +Parts, +StreamPair,
  179                                      % -NewStreamPair, +Options
  180    http:open_options/2,              % +Parts, -Options
  181    http:write_cookies/3,             % +Out, +Parts, +Options
  182    http:update_cookies/3,            % +CookieLine, +Parts, +Options
  183    http:authenticate_client/2,       % +URL, +Action
  184    http:http_connection_over_proxy/6.  185
  186:- meta_predicate
  187    http_open(+,-,:).  188
  189:- predicate_options(http_open/3, 3,
  190                     [ authorization(compound),
  191                       final_url(-atom),
  192                       header(+atom, -atom),
  193                       headers(-list),
  194                       raw_headers(-list(string)),
  195                       connection(+atom),
  196                       method(oneof([delete,get,put,head,post,patch,options])),
  197                       size(-integer),
  198                       status_code(-integer),
  199                       output(-stream),
  200                       timeout(number),
  201                       unix_socket(+atom),
  202                       proxy(atom, integer),
  203                       proxy_authorization(compound),
  204                       bypass_proxy(boolean),
  205                       request_header(any),
  206                       user_agent(atom),
  207                       version(-compound),
  208        % The option below applies if library(http/http_header) is loaded
  209                       post(any),
  210        % The options below apply if library(http/http_ssl_plugin)) is loaded
  211                       pem_password_hook(callable),
  212                       cacert_file(atom),
  213                       cert_verify_hook(callable)
  214                     ]).  215
  216%!  user_agent(-Agent) is det.
  217%
  218%   Default value for =|User-Agent|=,  can   be  overruled using the
  219%   option user_agent(Agent) of http_open/3.
  220
  221user_agent('SWI-Prolog').
  222
  223%!  http_open(+URL, -Stream, +Options) is det.
  224%
  225%   Open the data at the HTTP  server   as  a  Prolog stream. URL is
  226%   either an atom  specifying  a  URL   or  a  list  representing a
  227%   broken-down  URL  as  specified  below.   After  this  predicate
  228%   succeeds the data can be read from Stream. After completion this
  229%   stream must be  closed  using   the  built-in  Prolog  predicate
  230%   close/1. Options provides additional options:
  231%
  232%     * authenticate(+Boolean)
  233%     If `false` (default `true`), do _not_ try to automatically
  234%     authenticate the client if a 401 (Unauthorized) status code
  235%     is received.
  236%
  237%     * authorization(+Term)
  238%     Send authorization. See also http_set_authorization/2. Supported
  239%     schemes:
  240%
  241%       - basic(+User, +Password)
  242%       HTTP Basic authentication.
  243%       - bearer(+Token)
  244%       HTTP Bearer authentication.
  245%       - digest(+User, +Password)
  246%       HTTP Digest authentication.  This option is only provided
  247%       if the plugin library(http/http_digest) is also loaded.
  248%
  249%     * unix_socket(+Path)
  250%     Connect to the given Unix domain socket.  In this scenario
  251%     the host name and port or ignored.  If the server replies
  252%     with a _redirect_ message and the host differs from the
  253%     original host as normal TCP connection is used to handle
  254%     the redirect.  This option is inspired by curl(1)'s option
  255%     `--unix-socket`.
  256%
  257%     * connection(+Connection)
  258%     Specify the =Connection= header.  Default is =close=.  The
  259%     alternative is =|Keep-alive|=.  This maintains a pool of
  260%     available connections as determined by keep_connection/1.
  261%     The library(http/websockets) uses =|Keep-alive, Upgrade|=.
  262%     Keep-alive connections can be closed explicitly using
  263%     http_close_keep_alive/1. Keep-alive connections may
  264%     significantly improve repetitive requests on the same server,
  265%     especially if the IP route is long, HTTPS is used or the
  266%     connection uses a proxy.
  267%
  268%     * final_url(-FinalURL)
  269%     Unify FinalURL with the final   destination. This differs from
  270%     the  original  URL  if  the  returned  head  of  the  original
  271%     indicates an HTTP redirect (codes 301,  302 or 303). Without a
  272%     redirect, FinalURL is the same as URL if  URL is an atom, or a
  273%     URL constructed from the parts.
  274%
  275%     * header(Name, -AtomValue)
  276%     If provided, AtomValue is  unified  with   the  value  of  the
  277%     indicated  field  in  the  reply    header.  Name  is  matched
  278%     case-insensitive and the underscore  (_)   matches  the hyphen
  279%     (-). Multiple of these options  may   be  provided  to extract
  280%     multiple  header  fields.  If  the  header  is  not  available
  281%     AtomValue is unified to the empty atom ('').
  282%
  283%     * headers(-List)
  284%     If provided, List is unified with  a list of Name(Value) pairs
  285%     corresponding to fields in the reply   header.  Name and Value
  286%     follow the same conventions  used   by  the header(Name,Value)
  287%     option.  See also raw_headers(-List) which provides the entire
  288%     HTTP reply header in unparsed representation.
  289%
  290%     * method(+Method)
  291%     One of =get= (default), =head=, =delete=, =post=,   =put=   or
  292%     =patch=.
  293%     The  =head= message can be
  294%     used in combination with  the   header(Name,  Value) option to
  295%     access information on the resource   without actually fetching
  296%     the resource itself.  The  returned   stream  must  be  closed
  297%     immediately.
  298%
  299%     If post(Data) is provided, the default is =post=.
  300%
  301%     * size(-Size)
  302%     Size is unified with the   integer value of =|Content-Length|=
  303%     in the reply header.
  304%
  305%     * version(-Version)
  306%     Version is a _pair_ `Major-Minor`, where `Major` and `Minor`
  307%     are integers representing the HTTP version in the reply header.
  308%
  309%     * range(+Range)
  310%     Ask for partial content. Range   is  a term _|Unit(From,To)|_,
  311%     where `From` is an integer and `To`   is  either an integer or
  312%     the atom `end`. HTTP 1.1 only   supports Unit = `bytes`. E.g.,
  313%     to   ask   for    bytes    1000-1999,     use    the    option
  314%     range(bytes(1000,1999))
  315%
  316%     * raw_encoding(+Encoding)
  317%     Do not install a decoding filter for Encoding.  For example,
  318%     using raw_encoding('applocation/gzip') the system will not
  319%     decompress the stream if it is compressed using `gzip`.
  320%
  321%     * raw_headers(-Lines)
  322%     Unify Lines with a list of strings that represents the complete
  323%     reply header returned by the server.  See also headers(-List).
  324%
  325%     * redirect(+Boolean)
  326%     If `false` (default `true`), do _not_ automatically redirect
  327%     if a 3XX code is received.  Must be combined with
  328%     status_code(Code) and one of the header options to read the
  329%     redirect reply. In particular, without status_code(Code) a
  330%     redirect is mapped to an exception.
  331%
  332%     * status_code(-Code)
  333%     If this option is  present  and   Code  unifies  with the HTTP
  334%     status code, do *not* translate errors (4xx, 5xx) into an
  335%     exception. Instead, http_open/3 behaves as if 2xx (success) is
  336%     returned, providing the application to read the error document
  337%     from the returned stream.
  338%
  339%     * output(-Out)
  340%     Unify the output stream with Out and do not close it. This can
  341%     be used to upgrade a connection.
  342%
  343%     * timeout(+Timeout)
  344%     If provided, set a timeout on   the stream using set_stream/2.
  345%     With this option if no new data arrives within Timeout seconds
  346%     the stream raises an exception.  Default   is  to wait forever
  347%     (=infinite=).
  348%
  349%     * post(+Data)
  350%     Issue a =POST= request on the HTTP server.  Data is
  351%     handed to http_post_data/3.
  352%
  353%     * proxy(+Host:Port)
  354%     Use an HTTP proxy to connect to the outside world.  See also
  355%     socket:proxy_for_url/3.  This option overrules the proxy
  356%     specification defined by socket:proxy_for_url/3.
  357%
  358%     * proxy(+Host, +Port)
  359%     Synonym for proxy(+Host:Port).  Deprecated.
  360%
  361%     * proxy_authorization(+Authorization)
  362%     Send authorization to the proxy.  Otherwise   the  same as the
  363%     =authorization= option.
  364%
  365%     * bypass_proxy(+Boolean)
  366%     If =true=, bypass proxy hooks.  Default is =false=.
  367%
  368%     * request_header(Name = Value)
  369%     Additional  name-value  parts  are  added   in  the  order  of
  370%     appearance to the HTTP request   header.  No interpretation is
  371%     done.
  372%
  373%     * max_redirect(+Max)
  374%     Sets the maximum length of a redirection chain.  This is needed
  375%     for some IRIs that redirect indefinitely to other IRIs without
  376%     looping (e.g., redirecting to IRIs with a random element in them).
  377%     Max must be either a non-negative integer or the atom `infinite`.
  378%     The default value is `10`.
  379%
  380%     * user_agent(+Agent)
  381%     Defines the value of the  =|User-Agent|=   field  of  the HTTP
  382%     header. Default is =SWI-Prolog=.
  383%
  384%   The hook http:open_options/2 can  be   used  to  provide default
  385%   options   based   on   the   broken-down     URL.   The   option
  386%   status_code(-Code)  is  particularly  useful   to  query  *REST*
  387%   interfaces that commonly return status   codes  other than `200`
  388%   that need to be be processed by the client code.
  389%
  390%   @param URL is either an atom or string (url) or a list of _parts_.
  391%
  392%               When provided, this list may contain the fields
  393%               =scheme=, =user=, =password=, =host=, =port=, =path=
  394%               and either =query_string= (whose argument is an atom)
  395%               or =search= (whose argument is a list of
  396%               =|Name(Value)|= or =|Name=Value|= compound terms).
  397%               Only =host= is mandatory.  The example below opens the
  398%               URL =|http://www.example.com/my/path?q=Hello%20World&lang=en|=.
  399%               Note that values must *not* be quoted because the
  400%               library inserts the required quotes.
  401%
  402%               ```
  403%               http_open([ host('www.example.com'),
  404%                           path('/my/path'),
  405%                           search([ q='Hello world',
  406%                                    lang=en
  407%                                  ])
  408%                         ])
  409%               ```
  410%
  411%   @throws error(existence_error(url, Id),Context) is raised if the
  412%   HTTP result code is not in the range 200..299. Context has the
  413%   shape context(Message, status(Code, TextCode)), where `Code` is the
  414%   numeric HTTP code and `TextCode` is the textual description thereof
  415%   provided by the server. `Message` may provide additional details or
  416%   may be unbound.
  417%
  418%   @see ssl_context/3 for SSL related options if
  419%   library(http/http_ssl_plugin) is loaded.
  420
  421:- multifile
  422    socket:proxy_for_url/3.           % +URL, +Host, -ProxyList
  423
  424http_open(URL, Stream, QOptions) :-
  425    meta_options(is_meta, QOptions, Options0),
  426    (   atomic(URL)
  427    ->  parse_url_ex(URL, Parts)
  428    ;   Parts = URL
  429    ),
  430    autoload_https(Parts),
  431    upgrade_ssl_options(Parts, Options0, Options),
  432    add_authorization(Parts, Options, Options1),
  433    findall(HostOptions, hooked_options(Parts, HostOptions), AllHostOptions),
  434    foldl(merge_options_rev, AllHostOptions, Options1, Options2),
  435    (   option(bypass_proxy(true), Options)
  436    ->  try_http_proxy(direct, Parts, Stream, Options2)
  437    ;   term_variables(Options2, Vars2),
  438        findall(Result-Vars2,
  439                try_a_proxy(Parts, Result, Options2),
  440                ResultList),
  441        last(ResultList, Status-Vars2)
  442    ->  (   Status = true(_Proxy, Stream)
  443        ->  true
  444        ;   throw(error(proxy_error(tried(ResultList)), _))
  445        )
  446    ;   try_http_proxy(direct, Parts, Stream, Options2)
  447    ).
  448
  449try_a_proxy(Parts, Result, Options) :-
  450    parts_uri(Parts, AtomicURL),
  451    option(host(Host), Parts),
  452    (   option(unix_socket(Path), Options)
  453    ->  Proxy = unix_socket(Path)
  454    ;   (   option(proxy(ProxyHost:ProxyPort), Options)
  455        ;   is_list(Options),
  456            memberchk(proxy(ProxyHost,ProxyPort), Options)
  457        )
  458    ->  Proxy = proxy(ProxyHost, ProxyPort)
  459    ;   socket:proxy_for_url(AtomicURL, Host, Proxy)
  460    ),
  461    debug(http(proxy),
  462          'http_open: Connecting via ~w to ~w', [Proxy, AtomicURL]),
  463    (   catch(try_http_proxy(Proxy, Parts, Stream, Options), E, true)
  464    ->  (   var(E)
  465        ->  !, Result = true(Proxy, Stream)
  466        ;   Result = error(Proxy, E)
  467        )
  468    ;   Result = false(Proxy)
  469    ),
  470    debug(http(proxy), 'http_open: ~w: ~p', [Proxy, Result]).
  471
  472try_http_proxy(Method, Parts, Stream, Options0) :-
  473    option(host(Host), Parts),
  474    proxy_request_uri(Method, Parts, RequestURI),
  475    select_option(visited(Visited0), Options0, OptionsV, []),
  476    Options = [visited([Parts|Visited0])|OptionsV],
  477    parts_scheme(Parts, Scheme),
  478    default_port(Scheme, DefPort),
  479    url_part(port(Port), Parts, DefPort),
  480    host_and_port(Host, DefPort, Port, HostPort),
  481    (   option(connection(Connection), Options0),
  482        keep_alive(Connection),
  483        get_from_pool(Host:Port, StreamPair),
  484        debug(http(connection), 'Trying Keep-alive to ~p using ~p',
  485              [ Host:Port, StreamPair ]),
  486        catch(send_rec_header(StreamPair, Stream, HostPort,
  487                              RequestURI, Parts, Options),
  488              error(E,_),
  489              keep_alive_error(E))
  490    ->  true
  491    ;   http:http_connection_over_proxy(Method, Parts, Host:Port,
  492                                        SocketStreamPair, Options, Options1),
  493        (   catch(http:http_protocol_hook(Scheme, Parts,
  494                                          SocketStreamPair,
  495                                          StreamPair, Options),
  496                  Error,
  497                  ( close(SocketStreamPair, [force(true)]),
  498                    throw(Error)))
  499        ->  true
  500        ;   StreamPair = SocketStreamPair
  501        ),
  502        send_rec_header(StreamPair, Stream, HostPort,
  503                        RequestURI, Parts, Options1)
  504    ),
  505    return_final_url(Options).
  506
  507proxy_request_uri(direct, Parts, RequestURI) :-
  508    !,
  509    parts_request_uri(Parts, RequestURI).
  510proxy_request_uri(unix_socket(_), Parts, RequestURI) :-
  511    !,
  512    parts_request_uri(Parts, RequestURI).
  513proxy_request_uri(_, Parts, RequestURI) :-
  514    parts_uri(Parts, RequestURI).
  515
  516http:http_connection_over_proxy(unix_socket(Path), _, _,
  517                                StreamPair, Options, Options) :-
  518    !,
  519    unix_domain_socket(Socket),
  520    tcp_connect(Socket, Path),
  521    tcp_open_socket(Socket, In, Out),
  522    stream_pair(StreamPair, In, Out).
  523http:http_connection_over_proxy(direct, _, Host:Port,
  524                                StreamPair, Options, Options) :-
  525    !,
  526    open_socket(Host:Port, StreamPair, Options).
  527http:http_connection_over_proxy(proxy(ProxyHost, ProxyPort), Parts, _,
  528                                StreamPair, Options, Options) :-
  529    \+ ( memberchk(scheme(Scheme), Parts),
  530         secure_scheme(Scheme)
  531       ),
  532    !,
  533    % We do not want any /more/ proxy after this
  534    open_socket(ProxyHost:ProxyPort, StreamPair,
  535                [bypass_proxy(true)|Options]).
  536http:http_connection_over_proxy(socks(SocksHost, SocksPort), _Parts, Host:Port,
  537                                StreamPair, Options, Options) :-
  538    !,
  539    tcp_connect(SocksHost:SocksPort, StreamPair, [bypass_proxy(true)]),
  540    catch(negotiate_socks_connection(Host:Port, StreamPair),
  541          Error,
  542          ( close(StreamPair, [force(true)]),
  543            throw(Error)
  544          )).
  545
  546%!  hooked_options(+Parts, -Options) is nondet.
  547%
  548%   Calls  http:open_options/2  and  if  necessary    upgrades  old  SSL
  549%   cacerts_file(File) option to a cacerts(List) option to ensure proper
  550%   merging of options.
  551
  552hooked_options(Parts, Options) :-
  553    http:open_options(Parts, Options0),
  554    upgrade_ssl_options(Parts, Options0, Options).
  555
  556:- if(current_predicate(ssl_upgrade_legacy_options/2)).  557upgrade_ssl_options(Parts, Options0, Options) :-
  558    requires_ssl(Parts),
  559    !,
  560    ssl_upgrade_legacy_options(Options0, Options).
  561:- endif.  562upgrade_ssl_options(_, Options, Options).
  563
  564merge_options_rev(Old, New, Merged) :-
  565    merge_options(New, Old, Merged).
  566
  567is_meta(pem_password_hook).             % SSL plugin callbacks
  568is_meta(cert_verify_hook).
  569
  570
  571http:http_protocol_hook(http, _, StreamPair, StreamPair, _).
  572
  573default_port(https, 443) :- !.
  574default_port(wss,   443) :- !.
  575default_port(_,     80).
  576
  577host_and_port(Host, DefPort, DefPort, Host) :- !.
  578host_and_port(Host, _,       Port,    Host:Port).
  579
  580%!  autoload_https(+Parts) is det.
  581%
  582%   If the requested scheme is https or wss, load the HTTPS plugin.
  583
  584autoload_https(Parts) :-
  585    requires_ssl(Parts),
  586    memberchk(scheme(S), Parts),
  587    \+ clause(http:http_protocol_hook(S, _, StreamPair, StreamPair, _),_),
  588    exists_source(library(http/http_ssl_plugin)),
  589    !,
  590    use_module(library(http/http_ssl_plugin)).
  591autoload_https(_).
  592
  593requires_ssl(Parts) :-
  594    memberchk(scheme(S), Parts),
  595    secure_scheme(S).
  596
  597secure_scheme(https).
  598secure_scheme(wss).
  599
  600%!  send_rec_header(+StreamPair, -Stream,
  601%!                  +Host, +RequestURI, +Parts, +Options) is det.
  602%
  603%   Send header to Out and process reply.  If there is an error or
  604%   failure, close In and Out and return the error or failure.
  605
  606send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  607    (   catch(guarded_send_rec_header(StreamPair, Stream,
  608                                      Host, RequestURI, Parts, Options),
  609              E, true)
  610    ->  (   var(E)
  611        ->  (   option(output(StreamPair), Options)
  612            ->  true
  613            ;   true
  614            )
  615        ;   close(StreamPair, [force(true)]),
  616            throw(E)
  617        )
  618    ;   close(StreamPair, [force(true)]),
  619        fail
  620    ).
  621
  622guarded_send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  623    user_agent(Agent, Options),
  624    method(Options, MNAME),
  625    http_version(Version),
  626    option(connection(Connection), Options, close),
  627    debug(http(send_request), "> ~w ~w HTTP/~w", [MNAME, RequestURI, Version]),
  628    debug(http(send_request), "> Host: ~w", [Host]),
  629    debug(http(send_request), "> User-Agent: ~w", [Agent]),
  630    debug(http(send_request), "> Connection: ~w", [Connection]),
  631    format(StreamPair,
  632           '~w ~w HTTP/~w\r\n\c
  633               Host: ~w\r\n\c
  634               User-Agent: ~w\r\n\c
  635               Connection: ~w\r\n',
  636           [MNAME, RequestURI, Version, Host, Agent, Connection]),
  637    parts_uri(Parts, URI),
  638    x_headers(Options, URI, StreamPair),
  639    write_cookies(StreamPair, Parts, Options),
  640    (   option(post(PostData), Options)
  641    ->  http_post_data(PostData, StreamPair, [])
  642    ;   format(StreamPair, '\r\n', [])
  643    ),
  644    flush_output(StreamPair),
  645                                    % read the reply header
  646    read_header(StreamPair, Parts, ReplyVersion, Code, Comment, Lines),
  647    update_cookies(Lines, Parts, Options),
  648    reply_header(Lines, Options),
  649    do_open(ReplyVersion, Code, Comment, Lines, Options, Parts, Host,
  650            StreamPair, Stream).
  651
  652
  653%!  http_version(-Version:atom) is det.
  654%
  655%   HTTP version we publish. We  can  only   use  1.1  if we support
  656%   chunked encoding.
  657
  658http_version('1.1') :-
  659    http:current_transfer_encoding(chunked),
  660    !.
  661http_version('1.0').
  662
  663method(Options, MNAME) :-
  664    option(post(_), Options),
  665    !,
  666    option(method(M), Options, post),
  667    (   map_method(M, MNAME0)
  668    ->  MNAME = MNAME0
  669    ;   domain_error(method, M)
  670    ).
  671method(Options, MNAME) :-
  672    option(method(M), Options, get),
  673    (   map_method(M, MNAME0)
  674    ->  MNAME = MNAME0
  675    ;   map_method(_, M)
  676    ->  MNAME = M
  677    ;   domain_error(method, M)
  678    ).
  679
  680%!  map_method(+MethodID, -Method)
  681%
  682%   Support additional ``METHOD`` keywords.  Default   are  the official
  683%   HTTP methods as defined by the various RFCs.
  684
  685:- multifile
  686    map_method/2.  687
  688map_method(delete,  'DELETE').
  689map_method(get,     'GET').
  690map_method(head,    'HEAD').
  691map_method(post,    'POST').
  692map_method(put,     'PUT').
  693map_method(patch,   'PATCH').
  694map_method(options, 'OPTIONS').
  695
  696%!  x_headers(+Options, +URI, +Out) is det.
  697%
  698%   Emit extra headers from   request_header(Name=Value)  options in
  699%   Options.
  700%
  701%   @tbd Use user/password fields
  702
  703x_headers(Options, URI, Out) :-
  704    x_headers_(Options, [url(URI)|Options], Out).
  705
  706x_headers_([], _, _).
  707x_headers_([H|T], Options, Out) :-
  708    x_header(H, Options, Out),
  709    x_headers_(T, Options, Out).
  710
  711x_header(request_header(Name=Value), _, Out) :-
  712    !,
  713    debug(http(send_request), "> ~w: ~w", [Name, Value]),
  714    format(Out, '~w: ~w\r\n', [Name, Value]).
  715x_header(proxy_authorization(ProxyAuthorization), Options, Out) :-
  716    !,
  717    auth_header(ProxyAuthorization, Options, 'Proxy-Authorization', Out).
  718x_header(authorization(Authorization), Options, Out) :-
  719    !,
  720    auth_header(Authorization, Options, 'Authorization', Out).
  721x_header(range(Spec), _, Out) :-
  722    !,
  723    Spec =.. [Unit, From, To],
  724    (   To == end
  725    ->  ToT = ''
  726    ;   must_be(integer, To),
  727        ToT = To
  728    ),
  729    debug(http(send_request), "> Range: ~w=~d-~w", [Unit, From, ToT]),
  730    format(Out, 'Range: ~w=~d-~w\r\n', [Unit, From, ToT]).
  731x_header(_, _, _).
  732
  733%!  auth_header(+AuthOption, +Options, +HeaderName, +Out)
  734
  735auth_header(basic(User, Password), _, Header, Out) :-
  736    !,
  737    format(codes(Codes), '~w:~w', [User, Password]),
  738    phrase(base64(Codes), Base64Codes),
  739    debug(http(send_request), "> ~w: Basic ~s", [Header, Base64Codes]),
  740    format(Out, '~w: Basic ~s\r\n', [Header, Base64Codes]).
  741auth_header(bearer(Token), _, Header, Out) :-
  742    !,
  743    debug(http(send_request), "> ~w: Bearer ~w", [Header,Token]),
  744    format(Out, '~w: Bearer ~w\r\n', [Header, Token]).
  745auth_header(Auth, Options, _, Out) :-
  746    option(url(URL), Options),
  747    add_method(Options, Options1),
  748    http:authenticate_client(URL, send_auth_header(Auth, Out, Options1)),
  749    !.
  750auth_header(Auth, _, _, _) :-
  751    domain_error(authorization, Auth).
  752
  753user_agent(Agent, Options) :-
  754    (   option(user_agent(Agent), Options)
  755    ->  true
  756    ;   user_agent(Agent)
  757    ).
  758
  759add_method(Options0, Options) :-
  760    option(method(_), Options0),
  761    !,
  762    Options = Options0.
  763add_method(Options0, Options) :-
  764    option(post(_), Options0),
  765    !,
  766    Options = [method(post)|Options0].
  767add_method(Options0, [method(get)|Options0]).
  768
  769%!  do_open(+HTTPVersion, +HTTPStatusCode, +HTTPStatusComment, +Header,
  770%!          +Options, +Parts, +Host, +In, -FinalIn) is det.
  771%
  772%   Handle the HTTP status once available. If   200-299, we are ok. If a
  773%   redirect, redo the open,  returning  a   new  stream.  Else issue an
  774%   error.
  775%
  776%   @error  existence_error(url, URL)
  777
  778                                        % Redirections
  779do_open(_, Code, _, Lines, Options0, Parts, _, In, Stream) :-
  780    redirect_code(Code),
  781    option(redirect(true), Options0, true),
  782    location(Lines, RequestURI),
  783    !,
  784    debug(http(redirect), 'http_open: redirecting to ~w', [RequestURI]),
  785    close(In),
  786    parts_uri(Parts, Base),
  787    uri_resolve(RequestURI, Base, Redirected),
  788    parse_url_ex(Redirected, RedirectedParts),
  789    (   redirect_limit_exceeded(Options0, Max)
  790    ->  format(atom(Comment), 'max_redirect (~w) limit exceeded', [Max]),
  791        throw(error(permission_error(redirect, http, Redirected),
  792                    context(_, Comment)))
  793    ;   redirect_loop(RedirectedParts, Options0)
  794    ->  throw(error(permission_error(redirect, http, Redirected),
  795                    context(_, 'Redirection loop')))
  796    ;   true
  797    ),
  798    redirect_options(Parts, RedirectedParts, Options0, Options),
  799    http_open(RedirectedParts, Stream, Options).
  800                                        % Need authentication
  801do_open(_Version, Code, _Comment, Lines, Options0, Parts, _Host, In0, Stream) :-
  802    authenticate_code(Code),
  803    option(authenticate(true), Options0, true),
  804    parts_uri(Parts, URI),
  805    parse_headers(Lines, Headers),
  806    http:authenticate_client(
  807             URI,
  808             auth_reponse(Headers, Options0, Options)),
  809    !,
  810    close(In0),
  811    http_open(Parts, Stream, Options).
  812                                        % Accepted codes
  813do_open(Version, Code, _, Lines, Options, Parts, Host, In0, In) :-
  814    (   option(status_code(Code), Options),
  815        Lines \== []
  816    ->  true
  817    ;   successful_code(Code)
  818    ),
  819    !,
  820    parts_uri(Parts, URI),
  821    parse_headers(Lines, Headers),
  822    return_version(Options, Version),
  823    return_size(Options, Headers),
  824    return_fields(Options, Headers),
  825    return_headers(Options, Headers),
  826    consider_keep_alive(Lines, Parts, Host, In0, In1, Options),
  827    transfer_encoding_filter(Lines, In1, In, Options),
  828                                    % properly re-initialise the stream
  829    set_stream(In, file_name(URI)),
  830    set_stream(In, record_position(true)).
  831do_open(_, _, _, [], Options, _, _, _, _) :-
  832    option(connection(Connection), Options),
  833    keep_alive(Connection),
  834    !,
  835    throw(error(keep_alive(closed),_)).
  836                                        % report anything else as error
  837do_open(_Version, Code, Comment, _,  _, Parts, _, _, _) :-
  838    parts_uri(Parts, URI),
  839    (   map_error_code(Code, Error)
  840    ->  Formal =.. [Error, url, URI]
  841    ;   Formal = existence_error(url, URI)
  842    ),
  843    throw(error(Formal, context(_, status(Code, Comment)))).
  844
  845
  846successful_code(Code) :-
  847    between(200, 299, Code).
  848
  849%!  redirect_limit_exceeded(+Options:list(compound), -Max:nonneg) is semidet.
  850%
  851%   True if we have exceeded the maximum redirection length (default 10).
  852
  853redirect_limit_exceeded(Options, Max) :-
  854    option(visited(Visited), Options, []),
  855    length(Visited, N),
  856    option(max_redirect(Max), Options, 10),
  857    (Max == infinite -> fail ; N > Max).
  858
  859
  860%!  redirect_loop(+Parts, +Options) is semidet.
  861%
  862%   True if we are in  a  redirection   loop.  Note  that some sites
  863%   redirect once to the same place using  cookies or similar, so we
  864%   allow for two tries. In fact,   we  should probably test whether
  865%   authorization or cookie headers have changed.
  866
  867redirect_loop(Parts, Options) :-
  868    option(visited(Visited), Options, []),
  869    include(==(Parts), Visited, Same),
  870    length(Same, Count),
  871    Count > 2.
  872
  873
  874%!  redirect_options(+Parts, +RedirectedParts, +Options0, -Options) is det.
  875%
  876%   A redirect from a POST should do  a   GET  on the returned URI. This
  877%   means we must remove the method(post)   and  post(Data) options from
  878%   the original option-list.
  879%
  880%   If we are connecting over a Unix   domain socket we drop this option
  881%   if the redirect host does not match the initial host.
  882
  883redirect_options(Parts, RedirectedParts, Options0, Options) :-
  884    select_option(unix_socket(_), Options0, Options1),
  885    memberchk(host(Host), Parts),
  886    memberchk(host(RHost), RedirectedParts),
  887    debug(http(redirect), 'http_open: redirecting AF_UNIX ~w to ~w',
  888          [Host, RHost]),
  889    Host \== RHost,
  890    !,
  891    redirect_options(Options1, Options).
  892redirect_options(_, _, Options0, Options) :-
  893    redirect_options(Options0, Options).
  894
  895redirect_options(Options0, Options) :-
  896    (   select_option(post(_), Options0, Options1)
  897    ->  true
  898    ;   Options1 = Options0
  899    ),
  900    (   select_option(method(Method), Options1, Options),
  901        \+ redirect_method(Method)
  902    ->  true
  903    ;   Options = Options1
  904    ).
  905
  906redirect_method(delete).
  907redirect_method(get).
  908redirect_method(head).
  909
  910
  911%!  map_error_code(+HTTPCode, -PrologError) is semidet.
  912%
  913%   Map HTTP error codes to Prolog errors.
  914%
  915%   @tbd    Many more maps. Unfortunately many have no sensible Prolog
  916%           counterpart.
  917
  918map_error_code(401, permission_error).
  919map_error_code(403, permission_error).
  920map_error_code(404, existence_error).
  921map_error_code(405, permission_error).
  922map_error_code(407, permission_error).
  923map_error_code(410, existence_error).
  924
  925redirect_code(301).                     % Moved Permanently
  926redirect_code(302).                     % Found (previously "Moved Temporary")
  927redirect_code(303).                     % See Other
  928redirect_code(307).                     % Temporary Redirect
  929
  930authenticate_code(401).
  931
  932%!  open_socket(+Address, -StreamPair, +Options) is det.
  933%
  934%   Create and connect a client socket to Address.  Options
  935%
  936%       * timeout(+Timeout)
  937%       Sets timeout on the stream, *after* connecting the
  938%       socket.
  939%
  940%   @tbd    Make timeout also work on tcp_connect/4.
  941%   @tbd    This is the same as do_connect/4 in http_client.pl
  942
  943open_socket(Address, StreamPair, Options) :-
  944    debug(http(open), 'http_open: Connecting to ~p ...', [Address]),
  945    tcp_connect(Address, StreamPair, Options),
  946    stream_pair(StreamPair, In, Out),
  947    debug(http(open), '\tok ~p ---> ~p', [In, Out]),
  948    set_stream(In, record_position(false)),
  949    (   option(timeout(Timeout), Options)
  950    ->  set_stream(In, timeout(Timeout))
  951    ;   true
  952    ).
  953
  954
  955return_version(Options, Major-Minor) :-
  956    option(version(Major-Minor), Options, _).
  957
  958return_size(Options, Headers) :-
  959    (   memberchk(content_length(Size), Headers)
  960    ->  option(size(Size), Options, _)
  961    ;   true
  962    ).
  963
  964return_fields([], _).
  965return_fields([header(Name, Value)|T], Headers) :-
  966    !,
  967    (   Term =.. [Name,Value],
  968        memberchk(Term, Headers)
  969    ->  true
  970    ;   Value = ''
  971    ),
  972    return_fields(T, Headers).
  973return_fields([_|T], Lines) :-
  974    return_fields(T, Lines).
  975
  976return_headers(Options, Headers) :-
  977    option(headers(Headers), Options, _).
  978
  979%!  parse_headers(+Lines, -Headers:list(compound)) is det.
  980%
  981%   Parse the header lines for   the  headers(-List) option. Invalid
  982%   header   lines   are   skipped,   printing   a   warning   using
  983%   pring_message/2.
  984
  985parse_headers([], []) :- !.
  986parse_headers([Line|Lines], Headers) :-
  987    catch(http_parse_header(Line, [Header]), Error, true),
  988    (   var(Error)
  989    ->  Headers = [Header|More]
  990    ;   print_message(warning, Error),
  991        Headers = More
  992    ),
  993    parse_headers(Lines, More).
  994
  995
  996%!  return_final_url(+Options) is semidet.
  997%
  998%   If Options contains final_url(URL), unify URL with the final
  999%   URL after redirections.
 1000
 1001return_final_url(Options) :-
 1002    option(final_url(URL), Options),
 1003    var(URL),
 1004    !,
 1005    option(visited([Parts|_]), Options),
 1006    parts_uri(Parts, URL).
 1007return_final_url(_).
 1008
 1009
 1010%!  transfer_encoding_filter(+Lines, +In0, -In, +Options) is det.
 1011%
 1012%   Install filters depending on the transfer  encoding. If In0 is a
 1013%   stream-pair, we close the output   side. If transfer-encoding is
 1014%   not specified, the content-encoding is  interpreted as a synonym
 1015%   for transfer-encoding, because many   servers incorrectly depend
 1016%   on  this.  Exceptions  to  this   are  content-types  for  which
 1017%   disable_encoding_filter/1 holds.
 1018
 1019transfer_encoding_filter(Lines, In0, In, Options) :-
 1020    transfer_encoding(Lines, Encoding),
 1021    !,
 1022    transfer_encoding_filter_(Encoding, In0, In, Options).
 1023transfer_encoding_filter(Lines, In0, In, Options) :-
 1024    content_encoding(Lines, Encoding),
 1025    content_type(Lines, Type),
 1026    \+ http:disable_encoding_filter(Type),
 1027    !,
 1028    transfer_encoding_filter_(Encoding, In0, In, Options).
 1029transfer_encoding_filter(_, In, In, _Options).
 1030
 1031transfer_encoding_filter_(Encoding, In0, In, Options) :-
 1032    option(raw_encoding(Encoding), Options),
 1033    !,
 1034    In = In0.
 1035transfer_encoding_filter_(Encoding, In0, In, _Options) :-
 1036    stream_pair(In0, In1, Out),
 1037    (   nonvar(Out)
 1038    ->  close(Out)
 1039    ;   true
 1040    ),
 1041    (   http:encoding_filter(Encoding, In1, In)
 1042    ->  true
 1043    ;   autoload_encoding(Encoding),
 1044        http:encoding_filter(Encoding, In1, In)
 1045    ->  true
 1046    ;   domain_error(http_encoding, Encoding)
 1047    ).
 1048
 1049:- multifile
 1050    autoload_encoding/1. 1051
 1052:- if(exists_source(library(zlib))). 1053autoload_encoding(gzip) :-
 1054    use_module(library(zlib)).
 1055:- endif. 1056
 1057content_type(Lines, Type) :-
 1058    member(Line, Lines),
 1059    phrase(field('content-type'), Line, Rest),
 1060    !,
 1061    atom_codes(Type, Rest).
 1062
 1063%!  http:disable_encoding_filter(+ContentType) is semidet.
 1064%
 1065%   Do not use  the   =|Content-encoding|=  as =|Transfer-encoding|=
 1066%   encoding for specific values of   ContentType. This predicate is
 1067%   multifile and can thus be extended by the user.
 1068
 1069http:disable_encoding_filter('application/x-gzip').
 1070http:disable_encoding_filter('application/x-tar').
 1071http:disable_encoding_filter('x-world/x-vrml').
 1072http:disable_encoding_filter('application/zip').
 1073http:disable_encoding_filter('application/x-gzip').
 1074http:disable_encoding_filter('application/x-zip-compressed').
 1075http:disable_encoding_filter('application/x-compress').
 1076http:disable_encoding_filter('application/x-compressed').
 1077http:disable_encoding_filter('application/x-spoon').
 1078
 1079%!  transfer_encoding(+Lines, -Encoding) is semidet.
 1080%
 1081%   True if Encoding  is  the   value  of  the =|Transfer-encoding|=
 1082%   header.
 1083
 1084transfer_encoding(Lines, Encoding) :-
 1085    what_encoding(transfer_encoding, Lines, Encoding).
 1086
 1087what_encoding(What, Lines, Encoding) :-
 1088    member(Line, Lines),
 1089    phrase(encoding_(What, Debug), Line, Rest),
 1090    !,
 1091    atom_codes(Encoding, Rest),
 1092    debug(http(What), '~w: ~p', [Debug, Rest]).
 1093
 1094encoding_(content_encoding, 'Content-encoding') -->
 1095    field('content-encoding').
 1096encoding_(transfer_encoding, 'Transfer-encoding') -->
 1097    field('transfer-encoding').
 1098
 1099%!  content_encoding(+Lines, -Encoding) is semidet.
 1100%
 1101%   True if Encoding is the value of the =|Content-encoding|=
 1102%   header.
 1103
 1104content_encoding(Lines, Encoding) :-
 1105    what_encoding(content_encoding, Lines, Encoding).
 1106
 1107%!  read_header(+In:stream, +Parts, -Version, -Code:int,
 1108%!  -Comment:atom, -Lines:list) is det.
 1109%
 1110%   Read the HTTP reply-header.  If the reply is completely empty
 1111%   an existence error is thrown.  If the replied header is
 1112%   otherwise invalid a 500 HTTP error is simulated, having the
 1113%   comment =|Invalid reply header|=.
 1114%
 1115%   @param Parts    A list of compound terms that describe the
 1116%                   parsed request URI.
 1117%   @param Version  HTTP reply version as Major-Minor pair
 1118%   @param Code     Numeric HTTP reply-code
 1119%   @param Comment  Comment of reply-code as atom
 1120%   @param Lines    Remaining header lines as code-lists.
 1121%
 1122%   @error existence_error(http_reply, Uri)
 1123
 1124read_header(In, Parts, Major-Minor, Code, Comment, Lines) :-
 1125    read_line_to_codes(In, Line),
 1126    (   Line == end_of_file
 1127    ->  parts_uri(Parts, Uri),
 1128        existence_error(http_reply,Uri)
 1129    ;   true
 1130    ),
 1131    Line \== end_of_file,
 1132    phrase(first_line(Major-Minor, Code, Comment), Line),
 1133    debug(http(open), 'HTTP/~d.~d ~w ~w', [Major, Minor, Code, Comment]),
 1134    read_line_to_codes(In, Line2),
 1135    rest_header(Line2, In, Lines),
 1136    !,
 1137    (   debugging(http(open))
 1138    ->  forall(member(HL, Lines),
 1139               debug(http(open), '~s', [HL]))
 1140    ;   true
 1141    ).
 1142read_header(_, _, 1-1, 500, 'Invalid reply header', []).
 1143
 1144rest_header([], _, []) :- !.            % blank line: end of header
 1145rest_header(L0, In, [L0|L]) :-
 1146    read_line_to_codes(In, L1),
 1147    rest_header(L1, In, L).
 1148
 1149%!  content_length(+Header, -Length:int) is semidet.
 1150%
 1151%   Find the Content-Length in an HTTP reply-header.
 1152
 1153content_length(Lines, Length) :-
 1154    member(Line, Lines),
 1155    phrase(content_length(Length0), Line),
 1156    !,
 1157    Length = Length0.
 1158
 1159location(Lines, RequestURI) :-
 1160    member(Line, Lines),
 1161    phrase(atom_field(location, RequestURI), Line),
 1162    !.
 1163
 1164connection(Lines, Connection) :-
 1165    member(Line, Lines),
 1166    phrase(atom_field(connection, Connection0), Line),
 1167    !,
 1168    Connection = Connection0.
 1169
 1170first_line(Major-Minor, Code, Comment) -->
 1171    "HTTP/", integer(Major), ".", integer(Minor),
 1172    skip_blanks,
 1173    integer(Code),
 1174    skip_blanks,
 1175    rest(Comment).
 1176
 1177atom_field(Name, Value) -->
 1178    field(Name),
 1179    rest(Value).
 1180
 1181content_length(Len) -->
 1182    field('content-length'),
 1183    integer(Len).
 1184
 1185field(Name) -->
 1186    { atom_codes(Name, Codes) },
 1187    field_codes(Codes).
 1188
 1189field_codes([]) -->
 1190    ":",
 1191    skip_blanks.
 1192field_codes([H|T]) -->
 1193    [C],
 1194    { match_header_char(H, C)
 1195    },
 1196    field_codes(T).
 1197
 1198match_header_char(C, C) :- !.
 1199match_header_char(C, U) :-
 1200    code_type(C, to_lower(U)),
 1201    !.
 1202match_header_char(0'_, 0'-).
 1203
 1204
 1205skip_blanks -->
 1206    [C],
 1207    { code_type(C, white)
 1208    },
 1209    !,
 1210    skip_blanks.
 1211skip_blanks -->
 1212    [].
 1213
 1214%!  integer(-Int)//
 1215%
 1216%   Read 1 or more digits and return as integer.
 1217
 1218integer(Code) -->
 1219    digit(D0),
 1220    digits(D),
 1221    { number_codes(Code, [D0|D])
 1222    }.
 1223
 1224digit(C) -->
 1225    [C],
 1226    { code_type(C, digit)
 1227    }.
 1228
 1229digits([D0|D]) -->
 1230    digit(D0),
 1231    !,
 1232    digits(D).
 1233digits([]) -->
 1234    [].
 1235
 1236%!  rest(-Atom:atom)//
 1237%
 1238%   Get rest of input as an atom.
 1239
 1240rest(Atom) --> call(rest_(Atom)).
 1241
 1242rest_(Atom, L, []) :-
 1243    atom_codes(Atom, L).
 1244
 1245
 1246%!  reply_header(+Lines, +Options) is det.
 1247%
 1248%   Return the entire reply header as  a   list  of strings to te option
 1249%   reply_headers(-Headers).
 1250
 1251reply_header(Lines, Options) :-
 1252    option(raw_headers(Headers), Options),
 1253    !,
 1254    maplist(string_codes, Headers, Lines).
 1255reply_header(_, _).
 1256
 1257
 1258                 /*******************************
 1259                 *   AUTHORIZATION MANAGEMENT   *
 1260                 *******************************/
 1261
 1262%!  http_set_authorization(+URL, +Authorization) is det.
 1263%
 1264%   Set user/password to supply with URLs   that have URL as prefix.
 1265%   If  Authorization  is  the   atom    =|-|=,   possibly   defined
 1266%   authorization is cleared.  For example:
 1267%
 1268%   ```
 1269%   ?- http_set_authorization('http://www.example.com/private/',
 1270%                             basic('John', 'Secret'))
 1271%   ```
 1272%
 1273%   @tbd    Move to a separate module, so http_get/3, etc. can use this
 1274%           too.
 1275
 1276:- dynamic
 1277    stored_authorization/2,
 1278    cached_authorization/2. 1279
 1280http_set_authorization(URL, Authorization) :-
 1281    must_be(atom, URL),
 1282    retractall(stored_authorization(URL, _)),
 1283    (   Authorization = (-)
 1284    ->  true
 1285    ;   check_authorization(Authorization),
 1286        assert(stored_authorization(URL, Authorization))
 1287    ),
 1288    retractall(cached_authorization(_,_)).
 1289
 1290check_authorization(Var) :-
 1291    var(Var),
 1292    !,
 1293    instantiation_error(Var).
 1294check_authorization(basic(User, Password)) :-
 1295    must_be(atom, User),
 1296    must_be(text, Password).
 1297check_authorization(digest(User, Password)) :-
 1298    must_be(atom, User),
 1299    must_be(text, Password).
 1300
 1301%!  authorization(+URL, -Authorization) is semidet.
 1302%
 1303%   True if Authorization must be supplied for URL.
 1304%
 1305%   @tbd    Cleanup cache if it gets too big.
 1306
 1307authorization(_, _) :-
 1308    \+ stored_authorization(_, _),
 1309    !,
 1310    fail.
 1311authorization(URL, Authorization) :-
 1312    cached_authorization(URL, Authorization),
 1313    !,
 1314    Authorization \== (-).
 1315authorization(URL, Authorization) :-
 1316    (   stored_authorization(Prefix, Authorization),
 1317        sub_atom(URL, 0, _, _, Prefix)
 1318    ->  assert(cached_authorization(URL, Authorization))
 1319    ;   assert(cached_authorization(URL, -)),
 1320        fail
 1321    ).
 1322
 1323add_authorization(_, Options, Options) :-
 1324    option(authorization(_), Options),
 1325    !.
 1326add_authorization(Parts, Options0, Options) :-
 1327    url_part(user(User), Parts),
 1328    url_part(password(Passwd), Parts),
 1329    !,
 1330    Options = [authorization(basic(User,Passwd))|Options0].
 1331add_authorization(Parts, Options0, Options) :-
 1332    stored_authorization(_, _) ->   % quick test to avoid work
 1333    parts_uri(Parts, URL),
 1334    authorization(URL, Auth),
 1335    !,
 1336    Options = [authorization(Auth)|Options0].
 1337add_authorization(_, Options, Options).
 1338
 1339
 1340%!  parse_url_ex(+URL, -Parts)
 1341%
 1342%   Parts:  Scheme,  Host,  Port,    User:Password,  RequestURI  (no
 1343%   fragment).
 1344
 1345parse_url_ex(URL, [uri(URL)|Parts]) :-
 1346    uri_components(URL, Components),
 1347    phrase(components(Components), Parts),
 1348    (   option(host(_), Parts)
 1349    ->  true
 1350    ;   domain_error(url, URL)
 1351    ).
 1352
 1353components(Components) -->
 1354    uri_scheme(Components),
 1355    uri_path(Components),
 1356    uri_authority(Components),
 1357    uri_request_uri(Components).
 1358
 1359uri_scheme(Components) -->
 1360    { uri_data(scheme, Components, Scheme), nonvar(Scheme) },
 1361    !,
 1362    [ scheme(Scheme)
 1363    ].
 1364uri_scheme(_) --> [].
 1365
 1366uri_path(Components) -->
 1367    { uri_data(path, Components, Path0), nonvar(Path0),
 1368      (   Path0 == ''
 1369      ->  Path = (/)
 1370      ;   Path = Path0
 1371      )
 1372    },
 1373    !,
 1374    [ path(Path)
 1375    ].
 1376uri_path(_) --> [].
 1377
 1378uri_authority(Components) -->
 1379    { uri_data(authority, Components, Auth), nonvar(Auth),
 1380      !,
 1381      uri_authority_components(Auth, Data)
 1382    },
 1383    [ authority(Auth) ],
 1384    auth_field(user, Data),
 1385    auth_field(password, Data),
 1386    auth_field(host, Data),
 1387    auth_field(port, Data).
 1388uri_authority(_) --> [].
 1389
 1390auth_field(Field, Data) -->
 1391    { uri_authority_data(Field, Data, EncValue), nonvar(EncValue),
 1392      !,
 1393      (   atom(EncValue)
 1394      ->  uri_encoded(query_value, Value, EncValue)
 1395      ;   Value = EncValue
 1396      ),
 1397      Part =.. [Field,Value]
 1398    },
 1399    [ Part ].
 1400auth_field(_, _) --> [].
 1401
 1402uri_request_uri(Components) -->
 1403    { uri_data(path, Components, Path0),
 1404      uri_data(search, Components, Search),
 1405      (   Path0 == ''
 1406      ->  Path = (/)
 1407      ;   Path = Path0
 1408      ),
 1409      uri_data(path, Components2, Path),
 1410      uri_data(search, Components2, Search),
 1411      uri_components(RequestURI, Components2)
 1412    },
 1413    [ request_uri(RequestURI)
 1414    ].
 1415
 1416%!  parts_scheme(+Parts, -Scheme) is det.
 1417%!  parts_uri(+Parts, -URI) is det.
 1418%!  parts_request_uri(+Parts, -RequestURI) is det.
 1419%!  parts_search(+Parts, -Search) is det.
 1420%!  parts_authority(+Parts, -Authority) is semidet.
 1421
 1422parts_scheme(Parts, Scheme) :-
 1423    url_part(scheme(Scheme), Parts),
 1424    !.
 1425parts_scheme(Parts, Scheme) :-          % compatibility with library(url)
 1426    url_part(protocol(Scheme), Parts),
 1427    !.
 1428parts_scheme(_, http).
 1429
 1430parts_authority(Parts, Auth) :-
 1431    url_part(authority(Auth), Parts),
 1432    !.
 1433parts_authority(Parts, Auth) :-
 1434    url_part(host(Host), Parts, _),
 1435    url_part(port(Port), Parts, _),
 1436    url_part(user(User), Parts, _),
 1437    url_part(password(Password), Parts, _),
 1438    uri_authority_components(Auth,
 1439                             uri_authority(User, Password, Host, Port)).
 1440
 1441parts_request_uri(Parts, RequestURI) :-
 1442    option(request_uri(RequestURI), Parts),
 1443    !.
 1444parts_request_uri(Parts, RequestURI) :-
 1445    url_part(path(Path), Parts, /),
 1446    ignore(parts_search(Parts, Search)),
 1447    uri_data(path, Data, Path),
 1448    uri_data(search, Data, Search),
 1449    uri_components(RequestURI, Data).
 1450
 1451parts_search(Parts, Search) :-
 1452    option(query_string(Search), Parts),
 1453    !.
 1454parts_search(Parts, Search) :-
 1455    option(search(Fields), Parts),
 1456    !,
 1457    uri_query_components(Search, Fields).
 1458
 1459
 1460parts_uri(Parts, URI) :-
 1461    option(uri(URI), Parts),
 1462    !.
 1463parts_uri(Parts, URI) :-
 1464    parts_scheme(Parts, Scheme),
 1465    ignore(parts_authority(Parts, Auth)),
 1466    parts_request_uri(Parts, RequestURI),
 1467    uri_components(RequestURI, Data),
 1468    uri_data(scheme, Data, Scheme),
 1469    uri_data(authority, Data, Auth),
 1470    uri_components(URI, Data).
 1471
 1472parts_port(Parts, Port) :-
 1473    parts_scheme(Parts, Scheme),
 1474    default_port(Scheme, DefPort),
 1475    url_part(port(Port), Parts, DefPort).
 1476
 1477url_part(Part, Parts) :-
 1478    Part =.. [Name,Value],
 1479    Gen =.. [Name,RawValue],
 1480    option(Gen, Parts),
 1481    !,
 1482    Value = RawValue.
 1483
 1484url_part(Part, Parts, Default) :-
 1485    Part =.. [Name,Value],
 1486    Gen =.. [Name,RawValue],
 1487    (   option(Gen, Parts)
 1488    ->  Value = RawValue
 1489    ;   Value = Default
 1490    ).
 1491
 1492
 1493                 /*******************************
 1494                 *            COOKIES           *
 1495                 *******************************/
 1496
 1497write_cookies(Out, Parts, Options) :-
 1498    http:write_cookies(Out, Parts, Options),
 1499    !.
 1500write_cookies(_, _, _).
 1501
 1502update_cookies(_, _, _) :-
 1503    predicate_property(http:update_cookies(_,_,_), number_of_clauses(0)),
 1504    !.
 1505update_cookies(Lines, Parts, Options) :-
 1506    (   member(Line, Lines),
 1507        phrase(atom_field('set_cookie', CookieData), Line),
 1508        http:update_cookies(CookieData, Parts, Options),
 1509        fail
 1510    ;   true
 1511    ).
 1512
 1513
 1514                 /*******************************
 1515                 *           OPEN ANY           *
 1516                 *******************************/
 1517
 1518:- multifile iostream:open_hook/6. 1519
 1520%!  iostream:open_hook(+Spec, +Mode, -Stream, -Close,
 1521%!                     +Options0, -Options) is semidet.
 1522%
 1523%   Hook implementation that makes  open_any/5   support  =http= and
 1524%   =https= URLs for =|Mode == read|=.
 1525
 1526iostream:open_hook(URL, read, Stream, Close, Options0, Options) :-
 1527    (atom(URL) -> true ; string(URL)),
 1528    uri_is_global(URL),
 1529    uri_components(URL, Components),
 1530    uri_data(scheme, Components, Scheme),
 1531    http_scheme(Scheme),
 1532    !,
 1533    Options = Options0,
 1534    Close = close(Stream),
 1535    http_open(URL, Stream, Options0).
 1536
 1537http_scheme(http).
 1538http_scheme(https).
 1539
 1540
 1541                 /*******************************
 1542                 *          KEEP-ALIVE          *
 1543                 *******************************/
 1544
 1545%!  consider_keep_alive(+HeaderLines, +Parts, +Host,
 1546%!                      +Stream0, -Stream,
 1547%!                      +Options) is det.
 1548
 1549consider_keep_alive(Lines, Parts, Host, StreamPair, In, Options) :-
 1550    option(connection(Asked), Options),
 1551    keep_alive(Asked),
 1552    connection(Lines, Given),
 1553    keep_alive(Given),
 1554    content_length(Lines, Bytes),
 1555    !,
 1556    stream_pair(StreamPair, In0, _),
 1557    connection_address(Host, Parts, HostPort),
 1558    debug(http(connection),
 1559          'Keep-alive to ~w (~D bytes)', [HostPort, Bytes]),
 1560    stream_range_open(In0, In,
 1561                      [ size(Bytes),
 1562                        onclose(keep_alive(StreamPair, HostPort))
 1563                      ]).
 1564consider_keep_alive(_, _, _, Stream, Stream, _).
 1565
 1566connection_address(Host, _, Host) :-
 1567    Host = _:_,
 1568    !.
 1569connection_address(Host, Parts, Host:Port) :-
 1570    parts_port(Parts, Port).
 1571
 1572keep_alive(keep_alive) :- !.
 1573keep_alive(Connection) :-
 1574    downcase_atom(Connection, 'keep-alive').
 1575
 1576:- public keep_alive/4. 1577
 1578keep_alive(StreamPair, Host, _In, 0) :-
 1579    !,
 1580    debug(http(connection), 'Adding connection to ~p to pool', [Host]),
 1581    add_to_pool(Host, StreamPair).
 1582keep_alive(StreamPair, Host, In, Left) :-
 1583    Left < 100,
 1584    debug(http(connection), 'Reading ~D left bytes', [Left]),
 1585    read_incomplete(In, Left),
 1586    add_to_pool(Host, StreamPair),
 1587    !.
 1588keep_alive(StreamPair, _, _, _) :-
 1589    debug(http(connection),
 1590          'Closing connection due to excessive unprocessed input', []),
 1591    (   debugging(http(connection))
 1592    ->  catch(close(StreamPair), E,
 1593              print_message(warning, E))
 1594    ;   close(StreamPair, [force(true)])
 1595    ).
 1596
 1597%!  read_incomplete(+In, +Left) is semidet.
 1598%
 1599%   If we have not all input from  a Keep-alive connection, read the
 1600%   remainder if it is short. Else, we fail and close the stream.
 1601
 1602read_incomplete(In, Left) :-
 1603    catch(setup_call_cleanup(
 1604              open_null_stream(Null),
 1605              copy_stream_data(In, Null, Left),
 1606              close(Null)),
 1607          _,
 1608          fail).
 1609
 1610:- dynamic
 1611    connection_pool/4,              % Hash, Address, Stream, Time
 1612    connection_gc_time/1. 1613
 1614add_to_pool(Address, StreamPair) :-
 1615    keep_connection(Address),
 1616    get_time(Now),
 1617    term_hash(Address, Hash),
 1618    assertz(connection_pool(Hash, Address, StreamPair, Now)).
 1619
 1620get_from_pool(Address, StreamPair) :-
 1621    term_hash(Address, Hash),
 1622    retract(connection_pool(Hash, Address, StreamPair, _)).
 1623
 1624%!  keep_connection(+Address) is semidet.
 1625%
 1626%   Succeeds if we want to keep   the  connection open. We currently
 1627%   keep a maximum of 10 connections  waiting   and  a  maximum of 2
 1628%   waiting for the same address. Connections   older than 2 seconds
 1629%   are closed.
 1630
 1631keep_connection(Address) :-
 1632    close_old_connections(2),
 1633    predicate_property(connection_pool(_,_,_,_), number_of_clauses(C)),
 1634    C =< 10,
 1635    term_hash(Address, Hash),
 1636    aggregate_all(count, connection_pool(Hash, Address, _, _), Count),
 1637    Count =< 2.
 1638
 1639close_old_connections(Timeout) :-
 1640    get_time(Now),
 1641    Before is Now - Timeout,
 1642    (   connection_gc_time(GC),
 1643        GC > Before
 1644    ->  true
 1645    ;   (   retractall(connection_gc_time(_)),
 1646            asserta(connection_gc_time(Now)),
 1647            connection_pool(Hash, Address, StreamPair, Added),
 1648            Added < Before,
 1649            retract(connection_pool(Hash, Address, StreamPair, Added)),
 1650            debug(http(connection),
 1651                  'Closing inactive keep-alive to ~p', [Address]),
 1652            close(StreamPair, [force(true)]),
 1653            fail
 1654        ;   true
 1655        )
 1656    ).
 1657
 1658
 1659%!  http_close_keep_alive(+Address) is det.
 1660%
 1661%   Close all keep-alive connections matching Address. Address is of
 1662%   the  form  Host:Port.  In  particular,  http_close_keep_alive(_)
 1663%   closes all currently known keep-alive connections.
 1664
 1665http_close_keep_alive(Address) :-
 1666    forall(get_from_pool(Address, StreamPair),
 1667           close(StreamPair, [force(true)])).
 1668
 1669%!  keep_alive_error(+Error)
 1670%
 1671%   Deal with an error from reusing  a keep-alive connection. If the
 1672%   error is due to an I/O error   or end-of-file, fail to backtrack
 1673%   over get_from_pool/2. Otherwise it is a   real error and we thus
 1674%   re-raise it.
 1675
 1676keep_alive_error(keep_alive(closed)) :-
 1677    !,
 1678    debug(http(connection), 'Keep-alive connection was closed', []),
 1679    fail.
 1680keep_alive_error(io_error(_,_)) :-
 1681    !,
 1682    debug(http(connection), 'IO error on Keep-alive connection', []),
 1683    fail.
 1684keep_alive_error(Error) :-
 1685    throw(Error).
 1686
 1687
 1688                 /*******************************
 1689                 *     HOOK DOCUMENTATION       *
 1690                 *******************************/
 1691
 1692%!  http:open_options(+Parts, -Options) is nondet.
 1693%
 1694%   This hook is used by the HTTP   client library to define default
 1695%   options based on the the broken-down request-URL.  The following
 1696%   example redirects all trafic, except for localhost over a proxy:
 1697%
 1698%       ```
 1699%       :- multifile
 1700%           http:open_options/2.
 1701%
 1702%       http:open_options(Parts, Options) :-
 1703%           option(host(Host), Parts),
 1704%           Host \== localhost,
 1705%           Options = [proxy('proxy.local', 3128)].
 1706%       ```
 1707%
 1708%   This hook may return multiple   solutions.  The returned options
 1709%   are  combined  using  merge_options/3  where  earlier  solutions
 1710%   overrule later solutions.
 1711
 1712%!  http:write_cookies(+Out, +Parts, +Options) is semidet.
 1713%
 1714%   Emit a =|Cookie:|= header for the  current connection. Out is an
 1715%   open stream to the HTTP server, Parts is the broken-down request
 1716%   (see uri_components/2) and Options is the list of options passed
 1717%   to http_open.  The predicate is called as if using ignore/1.
 1718%
 1719%   @see complements http:update_cookies/3.
 1720%   @see library(http/http_cookie) implements cookie handling on
 1721%   top of these hooks.
 1722
 1723%!  http:update_cookies(+CookieData, +Parts, +Options) is semidet.
 1724%
 1725%   Update the cookie database.  CookieData  is   the  value  of the
 1726%   =|Set-Cookie|= field, Parts is  the   broken-down  request  (see
 1727%   uri_components/2) and Options is the list   of options passed to
 1728%   http_open.
 1729%
 1730%   @see complements http:write_cookies
 1731%   @see library(http/http_cookies) implements cookie handling on
 1732%   top of these hooks.