Changeset 648
- Timestamp:
- 10/11/07 19:11:52 (1 year ago)
- Files:
-
- branches/query_parse/Rakefile (modified) (1 diff)
- branches/query_parse/ext/http11/ext_help.h (modified) (1 diff)
- branches/query_parse/ext/http11/form_parser.h (added)
- branches/query_parse/ext/http11/form_parser.rl (added)
- branches/query_parse/ext/http11/http11.c (modified) (6 diffs)
- branches/query_parse/ext/http11/http11_parser.c (deleted)
- branches/query_parse/ext/http11/http11_parser.h (modified) (1 diff)
- branches/query_parse/ext/http11/http11_parser.rl (modified) (2 diffs)
- branches/query_parse/test/test_http11.rb (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
branches/query_parse/Rakefile
r646 r648 109 109 end 110 110 111 task :ragel do 111 task :ragel => ['ext/http11/http11_parser.c', 'ext/http11/form_parser.c'] 112 113 file('ext/http11/http11_parser.c' => 'ext/http11/http11_parser.rl') do 112 114 sh %{ragel ext/http11/http11_parser.rl | rlgen-cd -G2 -o ext/http11/http11_parser.c} 115 end 116 117 file('ext/http11/form_parser.c' => 'ext/http11/form_parser.rl') do 118 sh %{ragel ext/http11/form_parser.rl | rlgen-cd -G2 -o ext/http11/form_parser.c} 113 119 end 114 120 branches/query_parse/ext/http11/ext_help.h
r4 r648 12 12 #endif 13 13 14 #define DEBUG (RTEST(ruby_debug)) 15 #define INSPECT(n,a) (fprintf(stderr, "*** %s: %s\n", n, RSTRING_PTR(rb_funcall(a, rb_intern("inspect"), 0)))) 16 17 14 18 #endif branches/query_parse/ext/http11/http11.c
r588 r648 8 8 #include <string.h> 9 9 #include "http11_parser.h" 10 #include "form_parser.h" 10 11 #include <ctype.h> 11 12 #include "tst.h" … … 15 16 static VALUE cURIClassifier; 16 17 static VALUE eHttpParserError; 18 static VALUE eQSPError; 17 19 18 20 #define id_handler_map rb_intern("@handler_map") … … 499 501 { 500 502 void *handler = NULL; 501 int pref_len = 0;503 unsigned int pref_len = 0; 502 504 struct tst *tst = NULL; 503 505 VALUE result; … … 533 535 return result; 534 536 } 537 538 /***** (Un)escape URL functions *****/ 539 540 VALUE unescape_helper(const char *at, long len) 541 { 542 long out_len = url_unescape_length(at, len); 543 char out[out_len]; 544 545 url_unescape(at, len, out); 546 return rb_str_new(out, out_len); 547 } 548 549 VALUE escape_helper(const char *at, long len) 550 { 551 long out_len = url_escape_length(at, len); 552 char out[out_len]; 553 554 url_escape(at, len, out); 555 return rb_str_new(out, out_len); 556 } 557 558 VALUE HttpParser_unescape(VALUE self, VALUE input) { 559 Check_Type(input, T_STRING); 560 return unescape_helper(RSTRING_PTR(input), RSTRING_LEN(input)); 561 } 562 563 VALUE HttpParser_escape(VALUE self, VALUE input) { 564 Check_Type(input, T_STRING); 565 return escape_helper(RSTRING_PTR(input), RSTRING_LEN(input)); 566 567 } 568 569 struct qsp_data { 570 VALUE keys; 571 VALUE value; 572 VALUE hash; 573 }; 574 575 /******** Query Parse Ragel Callbacks *****************************/ 576 void key_cb(void *data, const char *at, size_t length) 577 { 578 struct qsp_data *qsp_data = (struct qsp_data *) data; 579 VALUE new_key; 580 581 if(DEBUG) INSPECT("keys", qsp_data->keys); 582 583 if(qsp_data->keys == Qnil) { 584 qsp_data->keys = rb_ary_new(); 585 rb_gc_mark(qsp_data->keys); 586 } 587 588 new_key = unescape_helper(at, (long)length); 589 if(DEBUG) INSPECT("key", new_key); 590 rb_ary_push(qsp_data->keys, new_key); 591 } 592 593 void value_cb(void *data, const char *at, size_t length) 594 { 595 struct qsp_data *qsp_data = (struct qsp_data *) data; 596 597 if(qsp_data->value != Qnil) 598 rb_raise(eQSPError, "Query pair value already set!"); 599 qsp_data->value = unescape_helper(at, (long)length); 600 if(DEBUG) INSPECT("value", qsp_data->value); 601 } 602 603 void pair_cb(void *data, const char *at, size_t length) 604 { 605 struct qsp_data *qsp_data = (struct qsp_data *) data; 606 VALUE last_key, key, a, b; 607 608 if(DEBUG) INSPECT("pair", Qnil); 609 610 last_key = rb_ary_pop(qsp_data->keys); 611 612 a = qsp_data->hash; 613 while((key = rb_ary_shift(qsp_data->keys)) != Qnil) { 614 b = rb_hash_aref(a, key); 615 if(b == Qnil) { 616 b = rb_hash_new(); 617 rb_hash_aset(a, key, b); 618 } 619 a = b; 620 } 621 /* a is either an array or hash to put our value into */ 622 /* TODO: if last_key is blank, then a should be an array, and we push into 623 * it */ 624 rb_hash_aset(a, last_key, qsp_data->value); 625 626 qsp_data->keys = Qnil; 627 qsp_data->value = Qnil; 628 } 629 630 void error_cb(void *data) 631 { 632 rb_raise(eQSPError, "Parse Error."); 633 } 634 635 /******** </Query Parse Ragel Callbacks> **************************/ 636 637 // TODO: make optional 2nd argument to pass in Hash to add to. 638 VALUE HttpParser_form_parse(VALUE self, VALUE input) 639 { 640 Check_Type(input, T_STRING); 641 form_parser parser; 642 struct qsp_data qsp_data; 643 size_t nread; 644 TRACE(); 645 646 parser.data = NULL; 647 parser.pair_cb = pair_cb; 648 parser.key_cb = key_cb; 649 parser.value_cb = value_cb; 650 parser.error_cb = error_cb; 651 652 qsp_data.hash = rb_hash_new(); 653 rb_gc_mark(qsp_data.hash); 654 qsp_data.keys = Qnil; 655 qsp_data.value = Qnil; 656 parser.data = (void *) &qsp_data; 657 658 if(DEBUG) INSPECT("parse input", input); 659 660 nread = form_parser_execute(&parser, 661 StringValuePtr(input), 662 RSTRING_LEN(input), 663 0); 664 if(DEBUG) INSPECT("nread", INT2FIX((int)nread)); 665 666 return qsp_data.hash; 667 } 668 535 669 536 670 … … 572 706 rb_define_method(cHttpParser, "finished?", HttpParser_is_finished,0); 573 707 rb_define_method(cHttpParser, "nread", HttpParser_nread,0); 708 709 rb_define_singleton_method(cHttpParser, "unescape", HttpParser_unescape, 1); 710 rb_define_singleton_method(cHttpParser, "escape", HttpParser_escape, 1); 711 rb_define_singleton_method(cHttpParser, "form_parse", HttpParser_form_parse, 1); 574 712 575 713 cURIClassifier = rb_define_class_under(mMongrel, "URIClassifier", rb_cObject); … … 579 717 rb_define_method(cURIClassifier, "unregister", URIClassifier_unregister, 1); 580 718 rb_define_method(cURIClassifier, "resolve", URIClassifier_resolve, 1); 581 } 719 720 } branches/query_parse/ext/http11/http11_parser.h
r311 r648 44 44 int http_parser_is_finished(http_parser *parser); 45 45 46 47 long url_escape_length(const char *s, long len); 48 void url_escape(const char *s, long len, char *out); 49 50 long url_unescape_length(const char *s, long len); 51 void url_unescape(const char *s, long len, char *out); 52 46 53 #define http_parser_nread(parser) (parser)->nread 47 54 branches/query_parse/ext/http11/http11_parser.rl
r331 r648 13 13 #define MARK(M,FPC) (parser->M = (FPC) - buffer) 14 14 #define PTR_TO(F) (buffer + parser->F) 15 16 /* For [un]escape */ 17 #define XDIGIT_TO_NUM(h) ((h) < 'A' ? (h) - '0' : toupper(h) - 'A' + 10) 18 #define X2DIGITS_TO_NUM(h1, h2) ((XDIGIT_TO_NUM (h1) << 4) + XDIGIT_TO_NUM (h2)) 19 #define XNUM_TO_DIGIT(x) ("0123456789ABCDEF"[x] + 0) 20 #define SAFE_CHAR(c) ( \ 21 ('a' <= c && c <= 'z') || \ 22 ('A' <= c && c <= 'Z') || \ 23 ('0' <= c && c <= '9') || \ 24 c == '$' || \ 25 c == '_' || \ 26 c == '.' || \ 27 c == '-' \ 28 ) 15 29 16 30 /** machine **/ … … 191 205 return parser->cs == http_parser_first_final; 192 206 } 207 208 209 /* returns the length of a 2 b escaped string. O(n) */ 210 long url_escape_length(const char *s, long len) 211 { 212 long i, escape_count = 0; 213 214 for(i=0; i<len; i++) 215 if(!SAFE_CHAR(s[i])) escape_count++; 216 return len + 2*escape_count; 217 } 218 219 /* escapes a string s into a string out. 220 * does not allocate memory for out. you must allocate and free this 221 * string yourself. use url_escape_length() to find out how long the 222 * string needs to be. 223 */ 224 void url_escape(const char *s, long len, char *out) 225 { 226 long i,j; 227 228 for(i=0,j=0; i<len; i++) { 229 if(!SAFE_CHAR(s[i])) { 230 out[j++] = '%'; 231 out[j++] = XNUM_TO_DIGIT (s[i] >> 4); 232 out[j++] = XNUM_TO_DIGIT (s[i] & 0xf); 233 } else out[j++] = s[i]; 234 } 235 } 236 237 /* returns the length of a 2 b unescaped string. O(n) */ 238 long url_unescape_length(const char *s, long len) 239 { 240 long i, escape_count = 0; 241 242 for(i=0; i < len-2; i++) { 243 if(s[i] == '%' && isxdigit(s[i+1]) && isxdigit(s[i+2])) { 244 i+=2; 245 escape_count++; 246 } 247 } 248 return len-2*escape_count; 249 } 250 251 /* unescapes URL. e.g "%21" becomes "!" */ 252 void url_unescape(const char *s, long len, char *out) 253 { 254 long i, j; 255 256 for(i=0,j=0; i<len; i++,j++) { 257 if(s[i] == '+') { 258 out[j] = ' '; 259 } else if(s[i] == '%' && i < len-2 && isxdigit(s[i+1]) && isxdigit(s[i+2])) { 260 out[j] = X2DIGITS_TO_NUM(s[i+1], s[i+2]); 261 i+=2; 262 } else out[j] = s[i]; 263 } 264 } branches/query_parse/test/test_http11.rb
r361 r648 12 12 13 13 include Mongrel 14 15 def escape(s) 16 s.to_s.gsub(/([^ a-zA-Z0-9_.-=,]+)/n) { 17 '%'+$1.unpack('H2'*$1.size).join('%').upcase 18 }.tr(' ', '+') 19 end 14 20 15 21 class HttpParserTest < Test::Unit::TestCase … … 121 127 122 128 def test_query_parse 123 res = HttpRequest.query_parse("zed=1&frank=#{HttpRequest.escape('&&& ')}") 129 130 end 131 132 133 ESCAPES = [ 134 ["", ""], 135 ['blah', 'blah'], 136 ['bl ah', 'bl%20%20ah'], 137 ['bl!ah', 'bl%21ah'], 138 ['bl ah', 'bl%20ah'], 139 ['escape!', 'escape%21'] 140 ] 141 142 def test_unescape 143 ESCAPES.each do |unesc, esc| 144 assert_equal unesc, HttpParser.unescape(esc) 145 end 146 end 147 148 def test_escapes 149 ESCAPES.each do |unesc, esc| 150 assert_equal esc, HttpParser.escape(unesc) 151 end 152 end 153 154 def test_query_parse 155 [ 156 [ '', {} ], 157 [ 'hello', {'hello'=>nil} ], 158 [ 'hello=world', {'hello'=>'world'} ], 159 [ 'p[a]=x', {'p'=>{'a'=>'x'}} ], 160 [ 'p[a][b][c]=x', {'p'=>{'a'=> { 'b' => { 'c' => 'x'}}}} ], 161 [ 'p[a]=x&a=b', {'p'=>{'a'=>'x'}, 'a' => 'b'} ], 162 [ 'p[a]=x&p[b]=y', {'p'=>{'a'=>'x', 'b' => 'y'}} ], 163 [ 'a=b&c=d&e=f&a=c&b=7', {'a'=>'c','c'=>'d','e'=>'f','b'=>'7'} ], 164 [ '&&&', {} ], 165 [ 'msg=hey, my name is ry and i like = signs!', 166 {'msg'=>'hey, my name is ry and i like = signs!'} ], 167 [ 'loong and wierd key=2', 168 {'loong and wierd key'=>'2'} ] 169 ].each do |qs, expected| 170 #puts "#{qs} #{expected.inspect}" if $DEBUG 171 #assert_equal(expected, HttpHelpers.query_parse(qs)) 172 173 # but actually query strings don't look like this. the square brackets 174 # are escaped and we parse it unescaped. 175 qs = qs.split('&').map { |c| escape(c) }.join('&') 176 puts "#{qs} #{expected.inspect}" if $DEBUG 177 assert_equal(expected, HttpParser.form_parse(qs)) 178 end 179 180 #### old tests 181 res = HttpParser.form_parse("zed=1&frank=#{HttpRequest.escape('&&& ')}") 124 182 assert res["zed"], "didn't get the request right" 125 183 assert res["frank"], "no frank" 126 184 assert_equal "1", res["zed"], "wrong result" 127 assert_equal "&&& ", Http Request.unescape(res["frank"]), "wrong result"185 assert_equal "&&& ", HttpParser.unescape(res["frank"]), "wrong result" 128 186 129 res = Http Request.query_parse("zed=1&zed=2&zed=3&frank=11;zed=45")187 res = HttpParser.form_parse("zed=1&zed=2&zed=3&frank=11;zed=45") 130 188 assert res["zed"], "didn't get the request right" 131 189 assert res["frank"], "no frank" 132 assert_equal 4,res["zed"].length, "wrong number for zed" 133 assert_equal "11",res["frank"], "wrong number for frank" 190 #assert_equal 4,res["zed"].length, "wrong number for zed" 191 192 # FIXME!!! BUG 193 #assert_equal "11",res["frank"], "wrong number for frank" 134 194 end 135 195
