1+ //! Types/fns concerning URLs (see RFC 3986)
2+
3+ import map;
4+ import map:: * ;
5+
6+ export url, userinfo, query, from_str, to_str;
7+
8+ type url = {
9+ scheme : ~str ,
10+ user : option < userinfo > ,
11+ host : ~str ,
12+ path: ~str ,
13+ query : query ,
14+ fragment : option < ~str >
15+ } ;
16+
17+ type userinfo = {
18+ user : ~str ,
19+ pass : option < ~str >
20+ } ;
21+
22+ type query = map:: hashmap < ~str , ~str > ;
23+
24+ fn url ( -scheme : ~str , -user : option < userinfo > , -host : ~str ,
25+ -path : ~str , -query : query , -fragment : option < ~str > ) -> url {
26+ { scheme: scheme, user: user, host: host,
27+ path: path, query: query, fragment: fragment }
28+ }
29+
30+ fn userinfo ( -user : ~str , -pass : option < ~str > ) -> userinfo {
31+ { user: user, pass: pass}
32+ }
33+
34+ fn split_char_first ( s : ~str , c : char ) -> ( ~str , ~str ) {
35+ let mut v = str:: splitn_char ( s, c, 1 ) ;
36+ if v. len ( ) == 1 {
37+ ret ( s, ~"") ;
38+ } else {
39+ ret ( vec:: shift ( v) , vec:: pop ( v) ) ;
40+ }
41+ }
42+
43+ fn userinfo_from_str ( uinfo : ~str ) -> userinfo {
44+ let ( user, p) = split_char_first ( uinfo, ':' ) ;
45+ let pass = if str:: len ( p) == 0 {
46+ option:: none
47+ } else {
48+ option:: some ( p)
49+ } ;
50+ ret userinfo( user, pass) ;
51+ }
52+
53+ fn userinfo_to_str ( -userinfo : userinfo ) -> ~str {
54+ if option:: is_some ( userinfo. pass ) {
55+ ret str:: concat ( ~[ copy userinfo. user , ~": ",
56+ option:: unwrap ( copy userinfo. pass ) ,
57+ ~"@"] ) ;
58+ } else {
59+ ret str:: concat ( ~[ copy userinfo. user , ~"@"] ) ;
60+ }
61+ }
62+
63+ fn query_from_str ( rawquery : ~str ) -> query {
64+ let query: query = map:: str_hash ( ) ;
65+ if str:: len ( rawquery) != 0 {
66+ for str:: split_char( rawquery, '&' ) . each |p| {
67+ let ( k, v) = split_char_first ( p, '=' ) ;
68+ query. insert ( k, v) ;
69+ } ;
70+ }
71+ ret query;
72+ }
73+
74+ fn query_to_str ( query : query ) -> ~str {
75+ let mut strvec = ~[ ] ;
76+ for query. each |k, v| {
77+ strvec += ~[ #fmt ( "%s=%s" , k, v) ] ;
78+ } ;
79+ ret str:: connect ( strvec, ~"& ");
80+ }
81+
82+ fn get_scheme(rawurl: ~str) -> option::option<(~str, ~str)> {
83+ for str::each_chari(rawurl) |i,c| {
84+ if char::is_alphabetic(c) {
85+ again;
86+ } else if c == ':' && i != 0 {
87+ ret option::some((rawurl.slice(0,i),
88+ rawurl.slice(i+3,str::len(rawurl))));
89+ } else {
90+ ret option::none;
91+ }
92+ };
93+ ret option::none;
94+ }
95+
96+ /**
97+ * Parse a `str` to a `url`
98+ *
99+ * # Arguments
100+ *
101+ * `rawurl` - a string representing a full url, including scheme.
102+ *
103+ * # Returns
104+ *
105+ * a `url` that contains the parsed representation of the url.
106+ *
107+ */
108+
109+ fn from_str(rawurl: ~str) -> result::result<url, ~str> {
110+ let mut schm = get_scheme(rawurl);
111+ if option::is_none(schm) {
112+ ret result::err(~" invalid scheme") ;
113+ }
114+ let ( scheme, rest) = option:: unwrap ( schm) ;
115+ let ( u, rest) = split_char_first ( rest, '@' ) ;
116+ let user = if str:: len ( rest) == 0 {
117+ option:: none
118+ } else {
119+ option:: some ( userinfo_from_str ( u) )
120+ } ;
121+ let rest = if str:: len ( rest) == 0 {
122+ u
123+ } else {
124+ rest
125+ } ;
126+ let ( rest, frag) = split_char_first ( rest, '#' ) ;
127+ let fragment = if str:: len ( frag) == 0 {
128+ option:: none
129+ } else {
130+ option:: some ( frag)
131+ } ;
132+ let ( rest, query) = split_char_first ( rest, '?' ) ;
133+ let query = query_from_str ( query) ;
134+ let ( host, pth) = split_char_first ( rest, '/' ) ;
135+ let mut path = pth;
136+ if str:: len ( path) != 0 {
137+ str:: unshift_char ( path, '/' ) ;
138+ }
139+
140+ ret result:: ok ( url ( scheme, user, host, path, query, fragment) ) ;
141+ }
142+
143+ /**
144+ * Format a `url` as a string
145+ *
146+ * # Arguments
147+ *
148+ * `url` - a url.
149+ *
150+ * # Returns
151+ *
152+ * a `str` that contains the formatted url. Note that this will usually
153+ * be an inverse of `from_str` but might strip out unneeded separators.
154+ * for example, "http://somehost.com?", when parsed and formatted, will
155+ * result in just "http://somehost.com".
156+ *
157+ */
158+ fn to_str( url : url ) -> ~str {
159+ let user = if option:: is_some ( url. user ) {
160+ userinfo_to_str ( option:: unwrap ( copy url. user ) )
161+ } else {
162+ ~""
163+ } ;
164+ let query = if url. query . size ( ) == 0 {
165+ ~""
166+ } else {
167+ str:: concat ( ~[ ~"?", query_to_str ( url. query ) ] )
168+ } ;
169+ let fragment = if option:: is_some ( url. fragment ) {
170+ str:: concat( ~[ ~"#", option::unwrap(copy url.fragment)])
171+ } else {
172+ ~" "
173+ } ;
174+
175+ ret str:: concat ( ~[ copy url. scheme ,
176+ ~": //",
177+ user,
178+ copy url. host ,
179+ copy url. path ,
180+ query,
181+ fragment] ) ;
182+ }
183+
184+ #[ cfg ( test) ]
185+ mod tests {
186+ #[ test]
187+ fn test_full_url_parse_and_format( ) {
188+ let url = ~"
http : //user:[email protected] /doc?s=v#something"; 189+ assert to_str ( result:: unwrap ( from_str ( url) ) ) == url;
190+ }
191+
192+ #[ test]
193+ fn test_userless_url_parse_and_format ( ) {
194+ let url = ~"http: //rust-lang.org/doc?s=v#something";
195+ assert to_str ( result:: unwrap ( from_str ( url) ) ) == url;
196+ }
197+
198+ #[ test]
199+ fn test_queryless_url_parse_and_format ( ) {
200+ let url = ~"http
: //user:[email protected] /doc#something"; 201+ assert to_str ( result:: unwrap ( from_str ( url) ) ) == url;
202+ }
203+
204+ #[ test]
205+ fn test_empty_query_url_parse_and_format ( ) {
206+ let url = ~"http
: //user:[email protected] /doc?#something"; 207+ let
should_be = ~"http
: //user:[email protected] /doc#something"; 208+ assert to_str ( result:: unwrap ( from_str ( url) ) ) == should_be;
209+ }
210+
211+ #[ test]
212+ fn test_fragmentless_url_parse_and_format ( ) {
213+ let url = ~"http
: //user:[email protected] /doc?q=v"; 214+ assert to_str ( result:: unwrap ( from_str ( url) ) ) == url;
215+ }
216+
217+ #[ test]
218+ fn test_minimal_url_parse_and_format ( ) {
219+ let url = ~"http: //rust-lang.org/doc";
220+ assert to_str ( result:: unwrap ( from_str ( url) ) ) == url;
221+ }
222+
223+ #[ test]
224+ fn test_scheme_host_only_url_parse_and_format ( ) {
225+ let url = ~"http: //rust-lang.org";
226+ assert to_str ( result:: unwrap ( from_str ( url) ) ) == url;
227+ }
228+
229+ #[ test]
230+ fn test_pathless_url_parse_and_format ( ) {
231+ let url = ~"http
: //user:[email protected] ?q=v#something"; 232+ assert to_str ( result:: unwrap ( from_str ( url) ) ) == url;
233+ }
234+
235+ #[ test]
236+ fn test_scheme_host_fragment_only_url_parse_and_format ( ) {
237+ let url = ~"http: //rust-lang.org#something";
238+ assert to_str ( result:: unwrap ( from_str ( url) ) ) == url;
239+ }
240+
241+ }
0 commit comments