%%% BEGIN openflax/http/parse.erl %%%
%%%
%%% openflax - Open Source web server for Erlang/OTP
%%% Copyright (c)2004 Cat's Eye Technologies. All rights reserved.
%%%
%%% Redistribution and use in source and binary forms, with or without
%%% modification, are permitted provided that the following conditions
%%% are met:
%%%
%%% Redistributions of source code must retain the above copyright
%%% notice, this list of conditions and the following disclaimer.
%%%
%%% Redistributions in binary form must reproduce the above copyright
%%% notice, this list of conditions and the following disclaimer in
%%% the documentation and/or other materials provided with the
%%% distribution.
%%%
%%% Neither the name of Cat's Eye Technologies nor the names of its
%%% contributors may be used to endorse or promote products derived
%%% from this software without specific prior written permission.
%%%
%%% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
%%% CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
%%% INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
%%% MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
%%% DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
%%% LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
%%% OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
%%% PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
%%% OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
%%% ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
%%% OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
%%% OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
%%% POSSIBILITY OF SUCH DAMAGE.
%% @doc Routines for parsing HTTP requests.
%%
%% @end
-module(openflax.http.parse).
-vsn('$Id: parse.erl 31 2004-04-23 07:00:11Z catseye $').
-author('catseye@catseye.mb.ca').
-copyright('Copyright (c)2004 Cat`s Eye Technologies. All rights reserved.').
-export([request_line/1, http_version/1, uri/1, hostport/1]).
-export([header_field/1, body/1]).
-export([unescape_hex/1]).
-import(lists).
-import(string).
%% @spec request_line(string()) ->
%% {Host, Port, Request0, Major, Minor, Method, URI, Root, Args, ArgList}
%% @doc Parses a well-formed HTTP request line.
request_line(Request) ->
Request0 = openflax.string:chomp(Request),
[Method, URI, HTTPVersion] = string:tokens(Request0, " "),
{Major, Minor} = http_version(HTTPVersion),
{Host, Port, Root, Args, ArgList} = uri(URI),
{Host, Port, Request0, Major, Minor, Method, URI, Root, Args, ArgList}.
%% @spec hostport(string()) -> {string(), string()}
%% @doc Given a host and port both in a string like
%% "www.foo.bar:1234", returns the host and
%% port as seperate strings. The host is always returned
%% in lowercase.
hostport(HostPort) ->
{Host, Port} = case string:chr(HostPort, $:) of
0 ->
{HostPort, "80"};
ColonPosition ->
{string:left(HostPort, ColonPosition-1),
string:right(HostPort, length(HostPort)-ColonPosition)}
end,
{openflax.string:to_lower(Host), Port}.
%% @spec http_version(string()) -> {string(), string()}
%% @doc Given an HTTP version string, returns the major and
%% minor version numbers as seperate strings.
http_version("HTTP/" ++ HTTPVersion) ->
{Major, HTTPVersion0} = parse_http_version_digits(HTTPVersion, []),
{Minor, HTTPVersion1} = parse_http_version_digits(HTTPVersion0, []),
{Major, Minor}.
parse_http_version_digits("", "") ->
{"0", ""};
parse_http_version_digits("", Acc) ->
{lists:reverse(Acc), ""};
parse_http_version_digits([$. | Tail], Acc) ->
{lists:reverse(Acc), Tail};
parse_http_version_digits("0" ++ Tail, []) ->
parse_http_version_digits(Tail, []);
parse_http_version_digits([Digit | Tail], Acc) when Digit >= $0, Digit =< $9 ->
parse_http_version_digits(Tail, [Digit | Acc]).
%% @spec header_field(string()) -> {key(), [{key(), value()}]}
%% @doc Parses a single, well-formed HTTP header.
%% Actually this is only for the kinds that work like
%% Key: Value;key=value;key=value, also supporting
%% quoted strings. However, most of the headers we're concerned
%% with parsing look enough like this for now. TODO write
%% different parsers for different kinds of headers.
header_field(Line) ->
{Key, Value} = openflax.string:keyvalue(openflax.string:chomp(Line), ":"),
Key0 = list_to_atom("req_" ++ openflax.string:from_header(Key)),
Value0 = case scan_header_field(Value) of
[{V, ""}] -> V;
Else -> Else
end,
{Key0, Value0}.
scan_header_field(F) ->
L = scan_header_field(F, "", []),
lists:reverse(convert(L, [])).
convert([], Acc) -> Acc;
convert(["" | Tail], Acc) ->
convert(Tail, Acc);
convert([Head | Tail], Acc) ->
case lists:last(Head) of
$= ->
convert(tl(Tail),
[{string:strip(openflax.string:truncate(Head)), hd(Tail)} | Acc]);
_ ->
convert(Tail, [{Head, []} | Acc])
end.
scan_header_field("", String, Stack) ->
lists:reverse([lists:reverse(String) | Stack]);
scan_header_field("=" ++ Tail, String, Stack) ->
scan_header_field(Tail, "", [lists:reverse(String) ++ "=" | Stack]);
scan_header_field(";" ++ Tail, String, Stack) ->
scan_header_field(Tail, "", [lists:reverse(String) | Stack]);
scan_header_field("\"" ++ Tail, String, Stack) ->
scan_header_string(Tail, String, Stack);
scan_header_field([Char | Tail], String, Stack) ->
scan_header_field(Tail, [Char | String], Stack).
scan_header_string("", String, Stack) ->
lists:reverse([lists:reverse(String) | Stack]);
scan_header_string("\"" ++ Tail, String, Stack) ->
scan_header_field(Tail, String, Stack);
scan_header_string([Char | Tail], String, Stack) ->
scan_header_string(Tail, [Char | String], Stack).
%% @spec unescape_hex(string()) -> string()
%% @doc Unescapes the hex sequences in URI and/or form data.
unescape_hex(H) -> lists:reverse(unescape_hex(H, [])).
unescape_hex([], Acc) -> Acc;
unescape_hex([$+ | T], Acc) ->
unescape_hex(T, [$ | Acc]);
unescape_hex([$%, A, B | T], Acc) ->
unescape_hex(T, [openflax.string:decode_hex([A,B]) | Acc]);
unescape_hex([H | T], Acc) ->
unescape_hex(T, [H | Acc]).
%% @spec uri(string()) ->
%% {string(), string(), string(), string(), [{string(), string()}]}
%% @doc Returns basic request and arguments given a URI.
%% {Host, Port, Root, ArgString, Args}.
uri(URI) ->
{Root, Args} = case string:tokens(URI, "?") of
[Rootw | Rest] -> {Rootw, openflax.string:join("?", Rest)};
[Rootw] -> {Rootw, ""}
end,
{Host, Port, Root1} = case Root of
"http://" ++ DomainAndRoot ->
LDaR = length(DomainAndRoot),
{{HostI, PortI}, TruncRoot} = case string:chr(DomainAndRoot, $/) of
0 ->
DomainAndRoot;
LDaR ->
HostPort = openflax.string:truncate(DomainAndRoot),
{hostport(HostPort), ""};
SlashPos ->
HostPort = string:left(DomainAndRoot, SlashPos - 1),
Tail = string:right(DomainAndRoot, length(DomainAndRoot) - SlashPos),
{hostport(HostPort), Tail};
_ ->
{{"", ""}, DomainAndRoot}
end,
{HostI, PortI, TruncRoot};
[$/ | TruncRoot] ->
{"", "", TruncRoot};
R ->
{"", "", R}
end,
ArgList = string:tokens(Args, "&"),
ArgTuples = lists:foldl(fun(X, A) ->
case string:tokens(X, "=") of
[Key, Val] -> A ++ [{unescape_hex(Key), unescape_hex(Val)}];
_ -> A
end
end, [], ArgList),
{Host, Port, unescape_hex(Root1), unescape_hex(Args), ArgTuples}.
%% @spec body(string()) -> [{string(), string()}]
%% @doc Parses an HTTP request body, assuming it is in form POST format.
body(Body) ->
{_, _, _, _, ArgList} = uri("/?" ++ lists:flatten(Body)),
ArgList.
%%% END of openflax/http/parse.erl %%%