今天一同事在使用php trie_filter跑脚本,循环匹配关键词,发现cpu利用率达到了99%。经过strace命令跟踪,发现进程在反复的打开读取和关闭.tree文件,定位到trie_filter_load这个方法被反复调用。按正常的状况,这个文件应该会一次读到内存中,在内存中查询匹配关键词。为了证明,我追踪到扩展源码以下:php
PHP_FUNCTION(trie_filter_load)
{
Trie *trie;
char *path;
int path_len;ui
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s",
&path, &path_len) == FAILURE) {
RETURN_NULL();
}进程
trie = trie_new_from_file(path);
if (!trie) {
php_error_docref(NULL TSRMLS_CC, E_WARNING,
"Unable to load %s", path);
RETURN_NULL();
}内存
ZEND_REGISTER_RESOURCE(return_value, trie, le_trie_filter);
}源码
trie变量保存的是一个Trie *结构体,还不能保证已读取到内存中,继续打开libdatrie源码,查找到.datrie/trie.cit
Trie *
trie_new_from_file (const char *path)
{
Trie *trie;
FILE *trie_file;变量
trie_file = fopen (path, "r");
if (!trie_file)
return NULL;扩展
trie = trie_fread (trie_file);
fclose (trie_file);
return trie;
}file
粗看之下trie 结构体读取并保存了文件内容,再继续定位trie_fread循环
Trie *
trie_fread (FILE *file)
{
Trie *trie;
trie = (Trie *) malloc (sizeof (Trie));
if (!trie)
return NULL;
if (NULL == (trie->alpha_map = alpha_map_fread_bin (file)))
goto exit_trie_created;
if (NULL == (trie->da = da_fread (file)))
goto exit_alpha_map_created;
if (NULL == (trie->tail = tail_fread (file)))
goto exit_da_created;
trie->is_dirty = FALSE;
return trie;
exit_da_created:
da_free (trie->da);
exit_alpha_map_created:
alpha_map_free (trie->alpha_map);
exit_trie_created:
free (trie);
return NULL;
}
经过alpha_map_fread_bin方法读取二进制文件到trie->alpha_map
AlphaMap *
alpha_map_fread_bin (FILE *file)
{
long save_pos;
uint32 sig;
int32 total, i;
AlphaMap *alpha_map;
/* check signature */
save_pos = ftell (file);
if (!file_read_int32 (file, (int32 *) &sig) || ALPHAMAP_SIGNATURE != sig)
goto exit_file_read;
if (NULL == (alpha_map = alpha_map_new ()))
goto exit_file_read;
/* read number of ranges */
if (!file_read_int32 (file, &total))
goto exit_map_created;
/* read character ranges */
for (i = 0; i < total; i++) {
int32 b, e;
if (!file_read_int32 (file, &b) || !file_read_int32 (file, &e))
goto exit_map_created;
alpha_map_add_range (alpha_map, b, e);
}
return alpha_map;
exit_map_created:
alpha_map_free (alpha_map);
exit_file_read:
fseek (file, save_pos, SEEK_SET);
return NULL;
}
Bool
file_read_int32 (FILE *file, int32 *o_val)
{
unsigned char buff[4];
if (fread (buff, 4, 1, file) == 1) {
*o_val = (buff[0] << 24) | (buff[1] << 16) | (buff[2] << 8) | buff[3];
return TRUE;
}
return FALSE;
}
至此已肯定trie_filter_load方法在打开tree文件后读取到内存,所以将trie_filter_load的返回值赋给静态变量,就不用反复打开读取关闭tree文件了,修改后跟踪进程恢复正常。