| 订阅 | 在线投稿
分享
 
 
 

分析PHP的similar_text函数

来源:互联网  宽屏版  评论
2008-12-22 08:09:22

//比较字串,返回两个字串第一个相同字符的区域

static void php_similar_str(const char *txt1, int len1, const char *txt2, int len2, int *pos1, int *pos2, int *max)

{

char *p, *q;

char *end1 = (char *) txt1 + len1;

char *end2 = (char *) txt2 + len2;

int l;

*max = 0;

//遍历字串

for (p = (char *) txt1; p < end1; p++) {

for (q = (char *) txt2; q < end2; q++) {

for (l = 0; (p + l < end1) && (q + l < end2) && (p[l] == q[l]); l++);

if (l > *max) {

//保存相同区域信息

*max = l;

*pos1 = p - txt1;

*pos2 = q - txt2;

}

}

}

}

//递归函数,比较txt1和txt2的相同字符数量

static int php_similar_char(const char *txt1, int len1, const char *txt2, int len2)

{

int sum;

int pos1, pos2, max;

php_similar_str(txt1, len1, txt2, len2, &pos1, &pos2, &max);

if ((sum = max)) {//这样写有什么意义?????!!!!!

//递归上次不同部分的前部

if (pos1 && pos2) {

sum += php_similar_char(txt1, pos1, txt2, pos2);

}

//递归上次不同部分的后部

if ((pos1 + max < len1) && (pos2 + max < len2)) {

sum += php_similar_char(txt1 + pos1 + max, len1 - pos1 - max,

txt2 + pos2 + max, len2 - pos2 - max);

}

}

//返回本次比较后的相同字符数量

return sum;

}

//PHP函数本身,一堆宏,做了些串转换和返回值换算,主功能由上面两个函数做了。

PHP_FUNCTION(similar_text)

{

zval **t1, **t2, **percent;

int ac = ZEND_NUM_ARGS();

int sim;

if (ac < 2 || ac > 3 || zend_get_parameters_ex(ac, &t1, &t2, &percent) == FAILURE) {

WRONG_PARAM_COUNT;

}

convert_to_string_ex(t1);

convert_to_string_ex(t2);

if (ac > 2) {

convert_to_double_ex(percent);

}

if (Z_STRLEN_PP(t1) + Z_STRLEN_PP(t2) == 0) {

if (ac > 2) {

Z_DVAL_PP(percent) = 0;

}

RETURN_LONG(0);

}

sim = php_similar_char(Z_STRVAL_PP(t1), Z_STRLEN_PP(t1), Z_STRVAL_PP(t2), Z_STRLEN_PP(t2));

if (ac > 2) {

Z_DVAL_PP(percent) = sim * 200.0 / (Z_STRLEN_PP(t1) + Z_STRLEN_PP(t2));

}

RETURN_LONG(sim);

}

所有看出similar_text是根据ASCII做比较的,是不考虑词组问题的。

测试了一下,结果如下:

int main(int argc, char *argv[])

{

char *str1="weican wuxin";

char *str2="wuxin weican";

int pos1,pos2,max;

php_similar_str(str1,strlen(str1),str2,strlen(str2),&pos1,&pos2,&max);

printf("php_similar_str:%d,%d=%d\n",pos1,pos2,max);

max=php_similar_char(str1,strlen(str1),str2,strlen(str2));

printf("php_similar_char:%d\n",max);

system("PAUSE");

return 0;

}

php_similar_str:0,6=6

php_similar_char:6

char *str1="TCP协议通讯工作原a理";

char *str2="TCPa协议通讯工作原理";

php_similar_str:3,4=14

php_similar_char:19

//比较字串,返回两个字串第一个相同字符的区域 static void php_similar_str(const char *txt1, int len1, const char *txt2, int len2, int *pos1, int *pos2, int *max) { char *p, *q; char *end1 = (char *) txt1 + len1; char *end2 = (char *) txt2 + len2; int l; *max = 0; //遍历字串 for (p = (char *) txt1; p < end1; p++) { for (q = (char *) txt2; q < end2; q++) { for (l = 0; (p + l < end1) && (q + l < end2) && (p[l] == q[l]); l++); if (l > *max) { //保存相同区域信息 *max = l; *pos1 = p - txt1; *pos2 = q - txt2; } } } } //递归函数,比较txt1和txt2的相同字符数量 static int php_similar_char(const char *txt1, int len1, const char *txt2, int len2) { int sum; int pos1, pos2, max; php_similar_str(txt1, len1, txt2, len2, &pos1, &pos2, &max); if ((sum = max)) {//这样写有什么意义?????!!!!! //递归上次不同部分的前部 if (pos1 && pos2) { sum += php_similar_char(txt1, pos1, txt2, pos2); } //递归上次不同部分的后部 if ((pos1 + max < len1) && (pos2 + max < len2)) { sum += php_similar_char(txt1 + pos1 + max, len1 - pos1 - max, txt2 + pos2 + max, len2 - pos2 - max); } } //返回本次比较后的相同字符数量 return sum; } //PHP函数本身,一堆宏,做了些串转换和返回值换算,主功能由上面两个函数做了。 PHP_FUNCTION(similar_text) { zval **t1, **t2, **percent; int ac = ZEND_NUM_ARGS(); int sim; if (ac < 2 || ac > 3 || zend_get_parameters_ex(ac, &t1, &t2, &percent) == FAILURE) { WRONG_PARAM_COUNT; } convert_to_string_ex(t1); convert_to_string_ex(t2); if (ac > 2) { convert_to_double_ex(percent); } if (Z_STRLEN_PP(t1) + Z_STRLEN_PP(t2) == 0) { if (ac > 2) { Z_DVAL_PP(percent) = 0; } RETURN_LONG(0); } sim = php_similar_char(Z_STRVAL_PP(t1), Z_STRLEN_PP(t1), Z_STRVAL_PP(t2), Z_STRLEN_PP(t2)); if (ac > 2) { Z_DVAL_PP(percent) = sim * 200.0 / (Z_STRLEN_PP(t1) + Z_STRLEN_PP(t2)); } RETURN_LONG(sim); } 所有看出similar_text是根据ASCII做比较的,是不考虑词组问题的。 测试了一下,结果如下: int main(int argc, char *argv[]) { char *str1="weican wuxin"; char *str2="wuxin weican"; int pos1,pos2,max; php_similar_str(str1,strlen(str1),str2,strlen(str2),&pos1,&pos2,&max); printf("php_similar_str:%d,%d=%d\n",pos1,pos2,max); max=php_similar_char(str1,strlen(str1),str2,strlen(str2)); printf("php_similar_char:%d\n",max); system("PAUSE"); return 0; } php_similar_str:0,6=6 php_similar_char:6 char *str1="TCP协议通讯工作原a理"; char *str2="TCPa协议通讯工作原理"; php_similar_str:3,4=14 php_similar_char:19
󰈣󰈤
 
 
 
>>返回首页<<
 
 热帖排行
 
 
王朝网络微信公众号
微信扫码关注本站公众号wangchaonetcn
 
 
静静地坐在废墟上,四周的荒凉一望无际,忽然觉得,凄凉也很美
©2005- 王朝网络 版权所有