SQL語義分析指紋識別
來自專欄信息安全入門筆記5 人贊了文章
在上一篇中我們介紹了SQL簡單語義分析概述
ailx10:SQL簡單語義分析概述本篇將和大家聊一聊SQL語義引擎中的指紋識別演算法,這樣也特別的繞,比XSS狀態機還要繞,感興趣的同學可以畫畫流程圖,反正我是沒有再畫圖了(不傻),採用GDB來分析的。
核心函數:int libinjection_sqli_fold(struct libinjection_sqli_state * sf)
疑難局部變數:size_t pos = 0; //下一個token的位置
left是已經摺疊或處理的token數量的計數
我通讀下來還是感覺有點困惑,所以準備寫一篇GDB跟蹤過程,歡迎提建議
step1:跳過最初的注釋|左括弧|一元運算符
第一次計算token
token.type取值TYPE_KEYWORD
;TYPE_UNION
;TYPE_GROUP
;TYPE_EXPRESSION
;TYPE_FUNCTION
;TYPE_BAREWORD
;TYPE_NUMBER
;TYPE_VARIABLE
;TYPE_STRING
;TYPE_OPERATOR
;TYPE_LOGIC_OPERATOR
;TYPE_COLLATE
;TYPE_RIGHTPARENS
;TYPE_LEFTBRACE
;TYPE_RIGHTBRACE
;TYPE_DOT
;TYPE_COMMA
;TYPE_COLON
;TYPE_SEMICOLON
;TYPE_TSQL
;TYPE_UNKNOWN
;TYPE_EVIL
;TYPE_FINGERPRINT
;TYPE_BACKSLASH
進入break分支,跳出當前循環。
token.type取值TYPE_NONE
直接跳出當前循環。
sf->current = &(sf->tokenvec[0]); while (more) { more = libinjection_sqli_tokenize(sf); if ( ! (sf->current->type == TYPE_COMMENT || //注釋c sf->current->type == TYPE_LEFTPARENS || //左括弧( sf->current->type == TYPE_SQLTYPE || //t st_is_unary_op(sf->current))) { //一元運算符 + - ! ~ !! NOT break; } }
step2:準備處理下一個token
if (! more) { /* 如果全是注釋 左括弧 一元運算符 ,那麼直接退出*/ return 0; } else { /* 注意這裡的 pos 是局部變數pos ,表示下一個 token 的位置*/ pos += 1; }
step3.1:while(1)
如果pos>=5
並且滿足一定條件:
pos>5
令tokenvec[5]
覆蓋tokenvec[1]
再使得pos=2
;left=0
pos=5
只令pos=1
;left=0
如果pos<5
或 pos>=5
但是不滿足一定條件 直接跳過這段處理。
if (pos >= LIBINJECTION_SQLI_MAX_TOKENS) {//5 if ( ( sf->tokenvec[0].type == TYPE_NUMBER &&//1 --> 1os1) | 1,s1) ) || ( --> no(n) | no(1) ) || ( --> 1),(1 ) || ( --> n)o(n )) { if (pos > LIBINJECTION_SQLI_MAX_TOKENS) { st_copy(&(sf->tokenvec[1]), &(sf->tokenvec[LIBINJECTION_SQLI_MAX_TOKENS])); pos = 2; left = 0; } else { pos = 1; left = 0; } } }
step3.2:
if (! more || left >= LIBINJECTION_SQLI_MAX_TOKENS) { left = pos; break; }
step4:第二次計算token
上一個token.type不等於TYPE_NONE
;並且pos<=5
;並且pos - left < 2
條件成立,sf->current
指針指向第pos
個token
,然後計算 token
token.type不等於TYPE_NONE
,
如果 token.type等於TYPE_COMMENT
,用token覆蓋last_comment,
如果不等於,last_comment.type置為NULL
;pos +=1
,準備計算下一個token
while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS && (pos - left) < 2) { sf->current = &(sf->tokenvec[pos]); more = libinjection_sqli_tokenize(sf); if (more) { if (sf->current->type == TYPE_COMMENT) { st_copy(&last_comment, sf->current); } else { last_comment.type = CHAR_NULL; pos += 1; } } }
step5.1:
if (pos - left < 2) { left = pos; continue; }
step5.2:第1次摺疊
if ( ss ) { pos -= 1; sf->stats_folds += 1; continue; } else if ( ;; ) { pos -= 1; sf->stats_folds += 1; continue; } else if ( [o &] && (st_is_unary_op(&sf->tokenvec[left+1]) || sf->tokenvec[left+1].type == TYPE_SQLTYPE)) { pos -= 1; sf->stats_folds += 1; left = 0; continue; } else if ( ( && st_is_unary_op(&sf->tokenvec[left+1])) { pos -= 1; sf->stats_folds += 1; if (left > 0) { left -= 1; } continue; } else if (syntax_merge_words(sf, &sf->tokenvec[left], &sf->tokenvec[left+1])) { pos -= 1; sf->stats_folds += 1; if (left > 0) { left -= 1; } continue; } else if ( ;f && (sf->tokenvec[left+1].val[0] == I || sf->tokenvec[left+1].val[0] == i ) && (sf->tokenvec[left+1].val[1] == F || sf->tokenvec[left+1].val[1] == f )) { /* IF通常是函數,但是在Transact-SQL中可以用作獨立的控制流操作符 形如; IF 1=1 需要將 f 轉換為 T */ sf->tokenvec[left+1].type = TYPE_TSQL; /* left += 2; */ continue; /* 重新排列 我們可能需要左移 left 和 pos */ } else if ((sf->tokenvec[left].type == TYPE_BAREWORD || sf->tokenvec[left].type == TYPE_VARIABLE) && sf->tokenvec[left+1].type == TYPE_LEFTPARENS && ( /* TSQL functions but common enough to be column names */ cstrcasecmp("USER_ID", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || cstrcasecmp("USER_NAME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || /* Function in MYSQL */ cstrcasecmp("DATABASE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || cstrcasecmp("PASSWORD", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || cstrcasecmp("USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || /* Mysql words that act as a variable and are a function */ /* TSQL current_users is fake-variable */ /* http://msdn.microsoft.com/en-us/library/ms176050.aspx */ cstrcasecmp("CURRENT_USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || cstrcasecmp("CURRENT_DATE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || cstrcasecmp("CURRENT_TIME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || cstrcasecmp("CURRENT_TIMESTAMP", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || cstrcasecmp("LOCALTIME", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || cstrcasecmp("LOCALTIMESTAMP", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 )) { /* pos is the same * other conversions need to go here... for instance * password CAN be a function, coalesce CAN be a function */ sf->tokenvec[left].type = TYPE_FUNCTION; continue; } else if (sf->tokenvec[left].type == TYPE_KEYWORD && ( cstrcasecmp("IN", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || cstrcasecmp("NOT IN", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 )) { if (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) { /* got .... IN ( ... (or NOT IN) * its an operator */ sf->tokenvec[left].type = TYPE_OPERATOR; } else { /* * its a nothing */ sf->tokenvec[left].type = TYPE_BAREWORD; } /* "IN" can be used as "IN BOOLEAN MODE" for mysql * in which case merging of words can be done later * other wise it acts as an equality operator __ IN (values..) * * here we got "IN" "(" so its an operator. * also back track to handle "NOT IN" * might need to do the same with like * two use cases "foo" LIKE "BAR" (normal operator) * "foo" = LIKE(1,2) */ continue; } else if ((sf->tokenvec[left].type == TYPE_OPERATOR) && ( cstrcasecmp("LIKE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0 || cstrcasecmp("NOT LIKE", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0)) { if (sf->tokenvec[left+1].type == TYPE_LEFTPARENS) { /* SELECT LIKE(... * its a function */ sf->tokenvec[left].type = TYPE_FUNCTION; } } else if (sf->tokenvec[left].type == TYPE_SQLTYPE && (sf->tokenvec[left+1].type == TYPE_BAREWORD || sf->tokenvec[left+1].type == TYPE_NUMBER || sf->tokenvec[left+1].type == TYPE_SQLTYPE || sf->tokenvec[left+1].type == TYPE_LEFTPARENS || sf->tokenvec[left+1].type == TYPE_FUNCTION || sf->tokenvec[left+1].type == TYPE_VARIABLE || sf->tokenvec[left+1].type == TYPE_STRING)) { st_copy(&sf->tokenvec[left], &sf->tokenvec[left+1]); pos -= 1; sf->stats_folds += 1; left = 0; continue; } else if (sf->tokenvec[left].type == TYPE_COLLATE && sf->tokenvec[left+1].type == TYPE_BAREWORD) { /* * there are too many collation types.. so if the bareword has a "_" * then its TYPE_SQLTYPE */ if (strchr(sf->tokenvec[left+1].val, _) != NULL) { sf->tokenvec[left+1].type = TYPE_SQLTYPE; left = 0; } } else if (sf->tokenvec[left].type == TYPE_BACKSLASH) { if (st_is_arithmetic_op(&(sf->tokenvec[left+1]))) { /* very weird case in TSQL where \%1 is parsed as 0 % 1, etc */ sf->tokenvec[left].type = TYPE_NUMBER; } else { /* just ignore it.. Again T-SQL seems to parse 1 as "1" */ st_copy(&sf->tokenvec[left], &sf->tokenvec[left+1]); pos -= 1; sf->stats_folds += 1; } left = 0; continue; } else if (sf->tokenvec[left].type == TYPE_LEFTPARENS && sf->tokenvec[left+1].type == TYPE_LEFTPARENS) { pos -= 1; left = 0; sf->stats_folds += 1; continue; } else if (sf->tokenvec[left].type == TYPE_RIGHTPARENS && sf->tokenvec[left+1].type == TYPE_RIGHTPARENS) { pos -= 1; left = 0; sf->stats_folds += 1; continue; } else if (sf->tokenvec[left].type == TYPE_LEFTBRACE && sf->tokenvec[left+1].type == TYPE_BAREWORD) { /* * MySQL Degenerate case -- * * select { ``.``.id }; -- valid !!! * select { ``.``.``.id }; -- invalid * select ``.``.id; -- invalid * select { ``.id }; -- invalid * * so it appears {``.``.id} is a magic case * I suspect this is "current database, current table, field id" * * The folding code cant look at more than 3 tokens, and * I dont want to make two passes. * * Since "{ ``" so rare, we are just going to blacklist it. * * Highly likely this will need revisiting! * * CREDIT @rsalgado 2013-11-25 */ if (sf->tokenvec[left+1].len == 0) { sf->tokenvec[left+1].type = TYPE_EVIL; return (int)(left+2); } /* weird ODBC / MYSQL {foo expr} --> expr * but for this rule we just strip away the "{ foo" part */ left = 0; pos -= 2; sf->stats_folds += 2; continue; } else if (sf->tokenvec[left+1].type == TYPE_RIGHTBRACE) { pos -= 1; left = 0; sf->stats_folds += 1; continue; }
step6:摺疊之後,第三次計算token,同第二次處理
while (more && pos <= LIBINJECTION_SQLI_MAX_TOKENS && pos - left < 3) { sf->current = &(sf->tokenvec[pos]); more = libinjection_sqli_tokenize(sf); if (more) { if (sf->current->type == TYPE_COMMENT) { st_copy(&last_comment, sf->current); } else { last_comment.type = CHAR_NULL; pos += 1; } } }
step7.1:
if (pos -left < 3) { left = pos; continue; }
step7.2:第2次摺疊
if (sf->tokenvec[left].type == TYPE_NUMBER && sf->tokenvec[left+1].type == TYPE_OPERATOR && sf->tokenvec[left+2].type == TYPE_NUMBER) { pos -= 2; left = 0; continue; } else if ( o(o ) { left = 0; pos -= 2; continue; } else if ( & & ) { pos -= 2; left = 0; continue; } else if ( vov vo1 von ) { pos -= 2; left = 0; continue; } else if ( no1 non 1o1 1on ) { pos -= 2; left = 0; continue; } else if ( not vot sot && streq(sf->tokenvec[left+1].val, "::")) { pos -= 2; left = 0; sf->stats_folds += 2; continue; } else if ( [n 1 s v] , [1 n s v] ) { pos -= 2; left = 0; continue; } else if ( [E , B] && st_is_unary_op(&sf->tokenvec[left+1]) && ( ) { /* 形如 SELECT + (, LIMIT + ( 移除一元運算符 */ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]); pos -= 1; left = 0; continue; } else if ( [k E B] && st_is_unary_op(&sf->tokenvec[left+1]) && [1 n v s f]) { /* 形如 select - 1 移除一元運算符 */ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]); pos -= 1; left = 0; continue; } else if ( , && st_is_unary_op(&sf->tokenvec[left+1]) && [1 n v s]) { /* 形如", -1" --> ",1" 形如"1,-1" --> "1" */ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]); left = 0; assert(pos >= 3); /* pos is >= 3 so this is safe */ pos -= 3; continue; } else if ( , && st_is_unary_op(&sf->tokenvec[left+1]) && f ) { /* 形如 1,-sin(1) --> 1,sin(1) 移除一元運算符 */ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]); pos -= 1; left = 0; continue; } else if ( n.n ) { /* 形如 databasename.table 忽略.n */ assert(pos >= 3); pos -= 2; left = 0; continue; } else if ( E.n ) { /* 形如 select . `foo` --> select `foo` */ st_copy(&sf->tokenvec[left+1], &sf->tokenvec[left+2]); pos -= 1; left = 0; continue; } else if ( f( && (sf->tokenvec[left+2].type != TYPE_RIGHTPARENS)) { /* USER() 是函數 User(foo) 不是函數 */ if (cstrcasecmp("USER", sf->tokenvec[left].val, sf->tokenvec[left].len) == 0) { sf->tokenvec[left].type = TYPE_BAREWORD; } }
step8:
left += 1; } /* while(1) step3.1 --> step8 */
step9:
if (left < LIBINJECTION_SQLI_MAX_TOKENS && last_comment.type == TYPE_COMMENT) { st_copy(&sf->tokenvec[left], &last_comment); left += 1; } /* 有時候獲取第6個token 來確認第5個token */ if (left > LIBINJECTION_SQLI_MAX_TOKENS) { left = LIBINJECTION_SQLI_MAX_TOKENS; } return (int)left;
如何計算token?
int libinjection_sqli_tokenize(struct libinjection_sqli_state * sf){ pt2Function fnptr; size_t *pos = &sf->pos; stoken_t *current = sf->current; const char *s = sf->s; const size_t slen = sf->slen; if (slen == 0) { return FALSE; } st_clear(current); sf->current = current; /* * if we are at beginning of string * and in single-quote or double quote mode * then pretend the input starts with a quote */ if (*pos == 0 && (sf->flags & (FLAG_QUOTE_SINGLE | FLAG_QUOTE_DOUBLE))) { *pos = parse_string_core(s, slen, 0, current, flag2delim(sf->flags), 0); sf->stats_tokens += 1; return TRUE; } while (*pos < slen) { const unsigned char ch = (unsigned char) (s[*pos]); fnptr = char_parse_map[ch]; *pos = (*fnptr) (sf); // 回調函數 這裡很複雜 if (current->type != CHAR_NULL) { sf->stats_tokens += 1; return TRUE; } } return FALSE;}
下回gdb跟蹤 ~
推薦閱讀:
※實證紅芯瀏覽器泄漏私鑰
※防火牆內 OAuth 2.0 的使用
※安全狗參展2018·全國檢察機關科技裝備展
※少看那些網頁,安全三大隱患你中了幾條?
※有融網安全待解:存管行否認合作 擔保存疑點