%%% BEGIN openflax/http/parse.erl %%% %%% %%% openflax - Open Source web server for Erlang/OTP %%% Copyright (c)2004 Cat's Eye Technologies. All rights reserved. %%% %%% Redistribution and use in source and binary forms, with or without %%% modification, are permitted provided that the following conditions %%% are met: %%% %%% Redistributions of source code must retain the above copyright %%% notice, this list of conditions and the following disclaimer. %%% %%% Redistributions in binary form must reproduce the above copyright %%% notice, this list of conditions and the following disclaimer in %%% the documentation and/or other materials provided with the %%% distribution. %%% %%% Neither the name of Cat's Eye Technologies nor the names of its %%% contributors may be used to endorse or promote products derived %%% from this software without specific prior written permission. %%% %%% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND %%% CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, %%% INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF %%% MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE %%% DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE %%% LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, %%% OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, %%% PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, %%% OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON %%% ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, %%% OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY %%% OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE %%% POSSIBILITY OF SUCH DAMAGE. %% @doc Routines for parsing HTTP requests. %% %% @end -module(openflax.http.parse). -vsn('$Id: parse.erl 31 2004-04-23 07:00:11Z catseye $'). -author('catseye@catseye.mb.ca'). -copyright('Copyright (c)2004 Cat`s Eye Technologies. All rights reserved.'). -export([request_line/1, http_version/1, uri/1, hostport/1]). -export([header_field/1, body/1]). -export([unescape_hex/1]). -import(lists). -import(string). %% @spec request_line(string()) -> %% {Host, Port, Request0, Major, Minor, Method, URI, Root, Args, ArgList} %% @doc Parses a well-formed HTTP request line. request_line(Request) -> Request0 = openflax.string:chomp(Request), [Method, URI, HTTPVersion] = string:tokens(Request0, " "), {Major, Minor} = http_version(HTTPVersion), {Host, Port, Root, Args, ArgList} = uri(URI), {Host, Port, Request0, Major, Minor, Method, URI, Root, Args, ArgList}. %% @spec hostport(string()) -> {string(), string()} %% @doc Given a host and port both in a string like %% "www.foo.bar:1234", returns the host and %% port as seperate strings. The host is always returned %% in lowercase. hostport(HostPort) -> {Host, Port} = case string:chr(HostPort, $:) of 0 -> {HostPort, "80"}; ColonPosition -> {string:left(HostPort, ColonPosition-1), string:right(HostPort, length(HostPort)-ColonPosition)} end, {openflax.string:to_lower(Host), Port}. %% @spec http_version(string()) -> {string(), string()} %% @doc Given an HTTP version string, returns the major and %% minor version numbers as seperate strings. http_version("HTTP/" ++ HTTPVersion) -> {Major, HTTPVersion0} = parse_http_version_digits(HTTPVersion, []), {Minor, HTTPVersion1} = parse_http_version_digits(HTTPVersion0, []), {Major, Minor}. parse_http_version_digits("", "") -> {"0", ""}; parse_http_version_digits("", Acc) -> {lists:reverse(Acc), ""}; parse_http_version_digits([$. | Tail], Acc) -> {lists:reverse(Acc), Tail}; parse_http_version_digits("0" ++ Tail, []) -> parse_http_version_digits(Tail, []); parse_http_version_digits([Digit | Tail], Acc) when Digit >= $0, Digit =< $9 -> parse_http_version_digits(Tail, [Digit | Acc]). %% @spec header_field(string()) -> {key(), [{key(), value()}]} %% @doc Parses a single, well-formed HTTP header. %% Actually this is only for the kinds that work like %% Key: Value;key=value;key=value, also supporting %% quoted strings. However, most of the headers we're concerned %% with parsing look enough like this for now. TODO write %% different parsers for different kinds of headers. header_field(Line) -> {Key, Value} = openflax.string:keyvalue(openflax.string:chomp(Line), ":"), Key0 = list_to_atom("req_" ++ openflax.string:from_header(Key)), Value0 = case scan_header_field(Value) of [{V, ""}] -> V; Else -> Else end, {Key0, Value0}. scan_header_field(F) -> L = scan_header_field(F, "", []), lists:reverse(convert(L, [])). convert([], Acc) -> Acc; convert(["" | Tail], Acc) -> convert(Tail, Acc); convert([Head | Tail], Acc) -> case lists:last(Head) of $= -> convert(tl(Tail), [{string:strip(openflax.string:truncate(Head)), hd(Tail)} | Acc]); _ -> convert(Tail, [{Head, []} | Acc]) end. scan_header_field("", String, Stack) -> lists:reverse([lists:reverse(String) | Stack]); scan_header_field("=" ++ Tail, String, Stack) -> scan_header_field(Tail, "", [lists:reverse(String) ++ "=" | Stack]); scan_header_field(";" ++ Tail, String, Stack) -> scan_header_field(Tail, "", [lists:reverse(String) | Stack]); scan_header_field("\"" ++ Tail, String, Stack) -> scan_header_string(Tail, String, Stack); scan_header_field([Char | Tail], String, Stack) -> scan_header_field(Tail, [Char | String], Stack). scan_header_string("", String, Stack) -> lists:reverse([lists:reverse(String) | Stack]); scan_header_string("\"" ++ Tail, String, Stack) -> scan_header_field(Tail, String, Stack); scan_header_string([Char | Tail], String, Stack) -> scan_header_string(Tail, [Char | String], Stack). %% @spec unescape_hex(string()) -> string() %% @doc Unescapes the hex sequences in URI and/or form data. unescape_hex(H) -> lists:reverse(unescape_hex(H, [])). unescape_hex([], Acc) -> Acc; unescape_hex([$+ | T], Acc) -> unescape_hex(T, [$ | Acc]); unescape_hex([$%, A, B | T], Acc) -> unescape_hex(T, [openflax.string:decode_hex([A,B]) | Acc]); unescape_hex([H | T], Acc) -> unescape_hex(T, [H | Acc]). %% @spec uri(string()) -> %% {string(), string(), string(), string(), [{string(), string()}]} %% @doc Returns basic request and arguments given a URI. %% {Host, Port, Root, ArgString, Args}. uri(URI) -> {Root, Args} = case string:tokens(URI, "?") of [Rootw | Rest] -> {Rootw, openflax.string:join("?", Rest)}; [Rootw] -> {Rootw, ""} end, {Host, Port, Root1} = case Root of "http://" ++ DomainAndRoot -> LDaR = length(DomainAndRoot), {{HostI, PortI}, TruncRoot} = case string:chr(DomainAndRoot, $/) of 0 -> DomainAndRoot; LDaR -> HostPort = openflax.string:truncate(DomainAndRoot), {hostport(HostPort), ""}; SlashPos -> HostPort = string:left(DomainAndRoot, SlashPos - 1), Tail = string:right(DomainAndRoot, length(DomainAndRoot) - SlashPos), {hostport(HostPort), Tail}; _ -> {{"", ""}, DomainAndRoot} end, {HostI, PortI, TruncRoot}; [$/ | TruncRoot] -> {"", "", TruncRoot}; R -> {"", "", R} end, ArgList = string:tokens(Args, "&"), ArgTuples = lists:foldl(fun(X, A) -> case string:tokens(X, "=") of [Key, Val] -> A ++ [{unescape_hex(Key), unescape_hex(Val)}]; _ -> A end end, [], ArgList), {Host, Port, unescape_hex(Root1), unescape_hex(Args), ArgTuples}. %% @spec body(string()) -> [{string(), string()}] %% @doc Parses an HTTP request body, assuming it is in form POST format. body(Body) -> {_, _, _, _, ArgList} = uri("/?" ++ lists:flatten(Body)), ArgList. %%% END of openflax/http/parse.erl %%%