深入PHP内核之in_array,php内核in_array
深入PHP内核之in_array,php内核in_array
无意中看到一段代码
1、a.php
测试
[root@dev tmp]# time php a.php real 0m0.101s user 0m0.080s sys 0m0.013s
2、b.php
测试
[root@dev tmp]# time php b.php real 0m9.517s user 0m4.486s sys 0m0.015s需要9s
对于b.php 有严重的效率问题,跟踪测试一下好了
[root@dev tmp]# ltrace -c php b.php % time seconds usecs/call calls function ------ ----------- ----------- --------- -------------------- 52.92 175.486683 224 780155 strtol 33.34 110.550182 55275091 2 __libc_start_main 6.33 20.999979 177 118479 memcpy 3.56 11.819558 234 50394 __ctype_b_loc 1.66 5.510564 285 19294 free 1.42 4.704605 156820 30 dlopen 0.43 1.416750 76 18430 malloc 0.13 0.422050 76 5510 strlen 0.08 0.258620 76 3375 __ctype_tolower_loc 0.04 0.137245 77 1770 strrchr 0.02 0.067158 76 880 strcasecmp 0.02 0.059222 76 776 calloc 0.01 0.034591 17295 2 getprotobyname 0.01 0.031099 76 407 realloc 0.01 0.021277 77 276 memset 0.00 0.011502 383 30 dlclose 0.00 0.010671 85 125 0.00 0.006806 73 93 fileno 0.00 0.006095 76 80 strncasecmp 0.00 0.005397 77 70 strchr 0.00 0.005139 160 32 ftell 0.00 0.004405 137 32 fclose 0.00 0.003644 104 35 __fxstat 0.00 0.003432 107 32 fopen 0.00 0.003103 107 29 munmap 0.00 0.003062 78 39 getenv 0.00 0.003031 104 29 mmap 0.00 0.003027 104 29 isatty 0.00 0.002825 104 27 __xstat 0.00 0.002468 82 30 dlsym 0.00 0.002167 74 29 sysconf 0.00 0.001956 75 26 _setjmp 0.00 0.001419 109 13 __lxstat 0.00 0.001294 76 17 memchr 0.00 0.001125 1125 1 SYS_clone 0.00 0.001099 1099 1 tzset 0.00 0.001098 1098 1 exit 0.00 0.001075 1075 1 ERR_load_crypto_strings 0.00 0.000986 986 1 using_history 0.00 0.000837 837 1 SYS_exit_group 0.00 0.000606 75 8 __strdup 0.00 0.000596 74 8 strcmp 0.00 0.000591 73 8 __memcpy_chk 0.00 0.000457 76 6 fflush 0.00 0.000326 108 3 getcwd 0.00 0.000309 77 4 __errno_location 0.00 0.000271 271 1 setlocale 0.00 0.000271 271 1 scandir 0.00 0.000238 79 3 __sprintf_chk 0.00 0.000222 111 2 signal 0.00 0.000206 206 1 SSL_library_init 0.00 0.000186 186 1 fgetc 0.00 0.000155 155 1 ERR_load_ERR_strings 0.00 0.000154 77 2 xmlSetGenericErrorFunc 0.00 0.000151 75 2 __strtok_r 0.00 0.000151 75 2 xmlParserInputBufferCreateFilenameDefault 0.00 0.000148 148 1 xmlInitParser 0.00 0.000147 73 2 xmlOutputBufferCreateFilenameDefault 0.00 0.000130 130 1 OpenSSL_add_all_ciphers 0.00 0.000122 122 1 EVP_cleanup 0.00 0.000112 112 1 access 0.00 0.000109 109 1 rewind 0.00 0.000106 106 1 OPENSSL_add_all_algorithms_noconf 0.00 0.000105 105 1 sigprocmask 0.00 0.000093 93 1 gnu_get_libc_version 0.00 0.000090 90 1 OpenSSL_add_all_digests 0.00 0.000084 84 1 xmlCleanupParser 0.00 0.000081 81 1 xmlRelaxNGCleanupTypes 0.00 0.000079 79 1 xmlSetStructuredErrorFunc 0.00 0.000078 78 1 SSL_get_ex_new_index 0.00 0.000077 77 1 __gmp_set_memory_functions 0.00 0.000076 76 1 ERR_load_EVP_strings 0.00 0.000076 76 1 pcre_version 0.00 0.000075 75 1 X509_get_default_cert_area 0.00 0.000075 75 1 strstr 0.00 0.000074 74 1 sigemptyset 0.00 0.000074 74 1 __xmlParserVersion 0.00 0.000074 74 1 time 0.00 0.000074 74 1 xmlResetLastError 0.00 0.000074 74 1 strncmp 0.00 0.000073 73 1 sigaddset ------ ----------- ----------- --------- -------------------- 100.00 331.614442 1000662 total我们发现 strtol 占用了大量的时间
查一下库函数
/* 函数名: strtol 功 能: 将串转换为长整数 用 法: long strtol(char *str, char **endptr, int base); 程序例: */ #include#include int main(void) { char *string = "87654321", *endptr; long lnumber; /* strtol converts string to long integer */ lnumber = strtol(string, &endptr, 10); printf("string = %s long = %ld\n", string, lnumber); return 0; } 所以应该是源代码中有大量的类型转换
关于in_array
in_array是这个样子的
bool in_array ( mixed $needle , array $haystack [, bool $strict = FALSE ] )在
haystack
中搜索needle
,如果没有设置strict
则使用宽松的比较。
needle
待搜索的值。如果
needle
是字符串,则比较是区分大小写的。
haystack
这个数组。
strict
如果第三个参数
strict
的值为TRUE
则 in_array() 函数还会检查needle
的类型是否和haystack
中的相同。那么我看一下源代码
第一步 在ext/standard/array.c 文件中
/* }}} */ /* {{{ proto bool in_array(mixed needle, array haystack [, bool strict]) Checks if the given value exists in the array */ PHP_FUNCTION(in_array) { php_search_array(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); } /* }}} */ /* {{{ proto mixed array_search(mixed needle, array haystack [, bool strict]) Searches the array for a given value and returns the corresponding key if successful */ PHP_FUNCTION(array_search) { php_search_array(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); } /* }}} */顺便看到了array_search,原来和in_array的内部实现基本一致
其中函数的参数 在./zend.h中
#define INTERNAL_FUNCTION_PARAM_PASSTHRU ht, return_value, return_value_ptr, this_ptr, return_value_used TSRMLS_CC第二步 在ext/standard/array.c 文件中 查看php_search_array原型
/* void php_search_array(INTERNAL_FUNCTION_PARAMETERS, int behavior) * 0 = return boolean * 1 = return key */ static void php_search_array(INTERNAL_FUNCTION_PARAMETERS, int behavior) /* {{{ */ { zval *value, /* value to check for */ *array, /* array to check in */ **entry, /* pointer to array entry */ res; /* comparison result */ HashPosition pos; /* hash iterator */ zend_bool strict = 0; /* strict comparison or not */ ulong num_key; uint str_key_len; char *string_key; int (*is_equal_func)(zval *, zval *, zval * TSRMLS_DC) = is_equal_function; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "za|b", &value, &array, &strict) == FAILURE) { return; } if (strict) { is_equal_func = is_identical_function; } zend_hash_internal_pointer_reset_ex(Z_ARRVAL_P(array), &pos); while (zend_hash_get_current_data_ex(Z_ARRVAL_P(array), (void **)&entry, &pos) == SUCCESS) { is_equal_func(&res, value, *entry TSRMLS_CC); if (Z_LVAL(res)) { if (behavior == 0) { RETURN_TRUE; } else { /* Return current key */ switch (zend_hash_get_current_key_ex(Z_ARRVAL_P(array), &string_key, &str_key_len, &num_key, 0, &pos)) { case HASH_KEY_IS_STRING: RETURN_STRINGL(string_key, str_key_len - 1, 1); break; case HASH_KEY_IS_LONG: RETURN_LONG(num_key); break; } } } zend_hash_move_forward_ex(Z_ARRVAL_P(array), &pos); } RETURN_FALSE; } /* }}} */ /* {{{ proto bool in_array(mixed needle, array haystack [, bool strict]) Checks if the given value exists in the array */我们发现 strict 这个值的不同有两种比较方式,看一下两个函数的不同之处
is_identical_function 检查类型是否相同
ZEND_API int is_identical_function(zval *result, zval *op1, zval *op2 TSRMLS_DC) /* {{{ */ { Z_TYPE_P(result) = IS_BOOL; if (Z_TYPE_P(op1) != Z_TYPE_P(op2)) { Z_LVAL_P(result) = 0; return SUCCESS; } switch (Z_TYPE_P(op1)) { case IS_NULL: Z_LVAL_P(result) = 1; break; case IS_BOOL: case IS_LONG: case IS_RESOURCE: Z_LVAL_P(result) = (Z_LVAL_P(op1) == Z_LVAL_P(op2)); break; case IS_DOUBLE: Z_LVAL_P(result) = (Z_DVAL_P(op1) == Z_DVAL_P(op2)); break; case IS_STRING: Z_LVAL_P(result) = ((Z_STRLEN_P(op1) == Z_STRLEN_P(op2)) && (!memcmp(Z_STRVAL_P(op1), Z_STRVAL_P(op2), Z_STRLEN_P(op1)))); break; case IS_ARRAY: Z_LVAL_P(result) = (Z_ARRVAL_P(op1) == Z_ARRVAL_P(op2) zend_hash_compare(Z_ARRVAL_P(op1), Z_ARRVAL_P(op2), (compare_func_t) hash_zval_identical_function, 1 TSRMLS_CC)==0); break; case IS_OBJECT: if (Z_OBJ_HT_P(op1) == Z_OBJ_HT_P(op2)) { Z_LVAL_P(result) = (Z_OBJ_HANDLE_P(op1) == Z_OBJ_HANDLE_P(op2)); } else { Z_LVAL_P(result) = 0; } break; default: Z_LVAL_P(result) = 0; return FAILURE; } return SUCCESS; } /* }}} */
is_equal_function 不检查类型是否相同,所以需要隐式转换
ZEND_API int is_equal_function(zval *result, zval *op1, zval *op2 TSRMLS_DC) /* {{{ */ { if (compare_function(result, op1, op2 TSRMLS_CC) == FAILURE) { return FAILURE; } ZVAL_BOOL(result, (Z_LVAL_P(result) == 0)); return SUCCESS; } /* }}} */==》compare_function
ZEND_API int compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC) /* {{{ */ { int ret; int converted = 0; zval op1_copy, op2_copy; zval *op_free; while (1) { switch (TYPE_PAIR(Z_TYPE_P(op1), Z_TYPE_P(op2))) { case TYPE_PAIR(IS_LONG, IS_LONG): ZVAL_LONG(result, Z_LVAL_P(op1)>Z_LVAL_P(op2)?1:(Z_LVAL_P(op1)compare_objects(op1, op2 TSRMLS_CC)); return SUCCESS; } /* break missing intentionally */ default: if (Z_TYPE_P(op1) == IS_OBJECT) { if (Z_OBJ_HT_P(op1)->get) { op_free = Z_OBJ_HT_P(op1)->get(op1 TSRMLS_CC); ret = compare_function(result, op_free, op2 TSRMLS_CC); zend_free_obj_get_result(op_free TSRMLS_CC); return ret; } else if (Z_TYPE_P(op2) != IS_OBJECT && Z_OBJ_HT_P(op1)->cast_object) { ALLOC_INIT_ZVAL(op_free); if (Z_OBJ_HT_P(op1)->cast_object(op1, op_free, Z_TYPE_P(op2) TSRMLS_CC) == FAILURE) { ZVAL_LONG(result, 1); zend_free_obj_get_result(op_free TSRMLS_CC); return SUCCESS; } ret = compare_function(result, op_free, op2 TSRMLS_CC); zend_free_obj_get_result(op_free TSRMLS_CC); return ret; } } if (Z_TYPE_P(op2) == IS_OBJECT) { if (Z_OBJ_HT_P(op2)->get) { op_free = Z_OBJ_HT_P(op2)->get(op2 TSRMLS_CC); ret = compare_function(result, op1, op_free TSRMLS_CC); zend_free_obj_get_result(op_free TSRMLS_CC); return ret; } else if (Z_TYPE_P(op1) != IS_OBJECT && Z_OBJ_HT_P(op2)->cast_object) { ALLOC_INIT_ZVAL(op_free); if (Z_OBJ_HT_P(op2)->cast_object(op2, op_free, Z_TYPE_P(op1) TSRMLS_CC) == FAILURE) { ZVAL_LONG(result, -1); zend_free_obj_get_result(op_free TSRMLS_CC); return SUCCESS; } ret = compare_function(result, op1, op_free TSRMLS_CC); zend_free_obj_get_result(op_free TSRMLS_CC); return ret; } else if (Z_TYPE_P(op1) == IS_OBJECT) { ZVAL_LONG(result, 1); return SUCCESS; } } if (!converted) { if (Z_TYPE_P(op1) == IS_NULL) { zendi_convert_to_boolean(op2, op2_copy, result); ZVAL_LONG(result, Z_LVAL_P(op2) ? -1 : 0); return SUCCESS; } else if (Z_TYPE_P(op2) == IS_NULL) { zendi_convert_to_boolean(op1, op1_copy, result); ZVAL_LONG(result, Z_LVAL_P(op1) ? 1 : 0); return SUCCESS; } else if (Z_TYPE_P(op1) == IS_BOOL) { zendi_convert_to_boolean(op2, op2_copy, result); ZVAL_LONG(result, ZEND_NORMALIZE_BOOL(Z_LVAL_P(op1) - Z_LVAL_P(op2))); return SUCCESS; } else if (Z_TYPE_P(op2) == IS_BOOL) { zendi_convert_to_boolean(op1, op1_copy, result); ZVAL_LONG(result, ZEND_NORMALIZE_BOOL(Z_LVAL_P(op1) - Z_LVAL_P(op2))); return SUCCESS; } else { zendi_convert_scalar_to_number(op1, op1_copy, result); zendi_convert_scalar_to_number(op2, op2_copy, result); converted = 1; } } else if (Z_TYPE_P(op1)==IS_ARRAY) { ZVAL_LONG(result, 1); return SUCCESS; } else if (Z_TYPE_P(op2)==IS_ARRAY) { ZVAL_LONG(result, -1); return SUCCESS; } else if (Z_TYPE_P(op1)==IS_OBJECT) { ZVAL_LONG(result, 1); return SUCCESS; } else if (Z_TYPE_P(op2)==IS_OBJECT) { ZVAL_LONG(result, -1); return SUCCESS; } else { ZVAL_LONG(result, 0); return FAILURE; } } } } /* }}} */ 接着看一下array 和string 怎么比较的
==》zend_hash_compare 在Zend/zend_hash.c中
ZEND_API int zend_hash_compare(HashTable *ht1, HashTable *ht2, compare_func_t compar, zend_bool ordered TSRMLS_DC) { Bucket *p1, *p2 = NULL; int result; void *pData2; IS_CONSISTENT(ht1); IS_CONSISTENT(ht2); HASH_PROTECT_RECURSION(ht1);. HASH_PROTECT_RECURSION(ht2);. result = ht1->nNumOfElements - ht2->nNumOfElements; if (result!=0) { HASH_UNPROTECT_RECURSION(ht1);. HASH_UNPROTECT_RECURSION(ht2);. return result; } p1 = ht1->pListHead; if (ordered) { p2 = ht2->pListHead; } while (p1) { if (ordered && !p2) { HASH_UNPROTECT_RECURSION(ht1);. HASH_UNPROTECT_RECURSION(ht2);. return 1; /* That's not supposed to happen */ } if (ordered) { if (p1->nKeyLength==0 && p2->nKeyLength==0) { /* numeric indices */ result = p1->h - p2->h; if (result!=0) { HASH_UNPROTECT_RECURSION(ht1);. HASH_UNPROTECT_RECURSION(ht2);. return result; } } else { /* string indices */ result = p1->nKeyLength - p2->nKeyLength; if (result!=0) { HASH_UNPROTECT_RECURSION(ht1);. HASH_UNPROTECT_RECURSION(ht2);. return result; } result = memcmp(p1->arKey, p2->arKey, p1->nKeyLength); if (result!=0) { HASH_UNPROTECT_RECURSION(ht1);. HASH_UNPROTECT_RECURSION(ht2);. return result; } } pData2 = p2->pData; } else { if (p1->nKeyLength==0) { /* numeric index */ if (zend_hash_index_find(ht2, p1->h, &pData2)==FAILURE) { HASH_UNPROTECT_RECURSION(ht1);. HASH_UNPROTECT_RECURSION(ht2);. return 1; } } else { /* string index */ if (zend_hash_quick_find(ht2, p1->arKey, p1->nKeyLength, p1->h, &pData2)==FAILURE) { HASH_UNPROTECT_RECURSION(ht1);. HASH_UNPROTECT_RECURSION(ht2);. return 1; } } } result = compar(p1->pData, pData2 TSRMLS_CC); if (result!=0) { HASH_UNPROTECT_RECURSION(ht1);. HASH_UNPROTECT_RECURSION(ht2);. return result; } p1 = p1->pListNext; if (ordered) { p2 = p2->pListNext; } } HASH_UNPROTECT_RECURSION(ht1);. HASH_UNPROTECT_RECURSION(ht2);. return 0; }==》还有一个 zendi_smart_strcmp 在Zend/zend_operators.c中
ZEND_API void zendi_smart_strcmp(zval *result, zval *s1, zval *s2) /* {{{ */ { int ret1, ret2; int oflow1, oflow2; long lval1, lval2; double dval1, dval2; if ((ret1=is_numeric_string_ex(Z_STRVAL_P(s1), Z_STRLEN_P(s1), &lval1, &dval1, 0, &oflow1)) && (ret2=is_numeric_string_ex(Z_STRVAL_P(s2), Z_STRLEN_P(s2), &lval2, &dval2, 0, &oflow2))) { #if ULONG_MAX == 0xFFFFFFFF if (oflow1 != 0 && oflow1 == oflow2 && dval1 - dval2 == 0. && ((oflow1 == 1 && dval1 > 9007199254740991. /*0x1FFFFFFFFFFFFF*/) (oflow1 == -1 && dval1 LONG_MAX (oflow2==1) or lval2 ? 1 : (lval1我们这个类型转换函数 is_numeric_string_ex 在 Zend/zend_operators.h
继续跟踪
==》is_numeric_string_ex
static inline zend_uchar is_numeric_string_ex(const char *str, int length, long *lval, double *dval, int allow_errors, int *oflow_info) { const char *ptr; int base = 10, digits = 0, dp_or_e = 0; double local_dval; zend_uchar type; if (!length) { return 0; } if (oflow_info != NULL) { *oflow_info = 0; } /* Skip any whitespace * This is much faster than the isspace() function */ while (*str == ' ' *str == '\t' *str == '\n' *str == '\r' *str == '\v' *str == '\f') { str++; length--; } ptr = str; if (*ptr == '-' *ptr == '+') { ptr++; } if (ZEND_IS_DIGIT(*ptr)) { /* Handle hex numbers * str is used instead of ptr to disallow signs and keep old behavior */ if (length > 2 && *str == '0' && (str[1] == 'x' str[1] == 'X')) { base = 16; ptr += 2; } /* Skip any leading 0s */ while (*ptr == '0') { ptr++; } /* Count the number of digits. If a decimal point/exponent is found, * it's a double. Otherwise, if there's a dval or no need to check for * a full match, stop when there are too many digits for a long */ for (type = IS_LONG; !(digits >= MAX_LENGTH_OF_LONG && (dval allow_errors == 1)); digits++, ptr++) { check_digits: if (ZEND_IS_DIGIT(*ptr) (base == 16 && ZEND_IS_XDIGIT(*ptr))) { continue; } else if (base == 10) { if (*ptr == '.' && dp_or_e = MAX_LENGTH_OF_LONG) { if (oflow_info != NULL) { *oflow_info = *str == '-' ? -1 : 1; } dp_or_e = -1; goto process_double; } } else if (!(digitstype == IS_LONG时果然有这个代码
截取这一段代码
…… if (type == IS_LONG) { if (digits == MAX_LENGTH_OF_LONG - 1) { int cmp = strcmp(&ptr[-digits], long_min_digits); if (!(cmp原来如此 strtol 在这里的