Index: ext/mbstring/config.m4 =================================================================== RCS file: /repository/php-src/ext/mbstring/config.m4,v retrieving revision 1.58.2.4.2.11.2.5 diff -u -r1.58.2.4.2.11.2.5 config.m4 --- ext/mbstring/config.m4 16 Jul 2008 02:30:48 -0000 1.58.2.4.2.11.2.5 +++ ext/mbstring/config.m4 22 Jul 2008 15:25:17 -0000 @@ -111,6 +111,7 @@ AC_DEFINE([HAVE_STDARG_PROTOTYPES], [1], [Define to 1 if you have the header file.]) ], []) AC_DEFINE([PHP_ONIG_BUNDLED], [1], [Define to 1 if the bundled oniguruma is used]) + AC_DEFINE([HAVE_ONIG], [1], [Define to 1 if the oniguruma library is available]) PHP_MBSTRING_ADD_CFLAG([-DNOT_RUBY]) PHP_MBSTRING_ADD_BUILD_DIR([oniguruma]) PHP_MBSTRING_ADD_BUILD_DIR([oniguruma/enc]) @@ -170,6 +171,7 @@ PHP_CHECK_LIBRARY(onig, onig_init, [ PHP_ADD_LIBRARY_WITH_PATH(onig, $PHP_ONIG/$PHP_LIBDIR, MBSTRING_SHARED_LIBADD) + AC_DEFINE([HAVE_ONIG], [1], [Define to 1 if the oniguruma library is available]) ],[ AC_MSG_ERROR([Problem with oniguruma. Please check config.log for more information.]) ], [ Index: ext/mbstring/mbstring.c =================================================================== RCS file: /repository/php-src/ext/mbstring/mbstring.c,v retrieving revision 1.224.2.22.2.25.2.19 diff -u -r1.224.2.22.2.25.2.19 mbstring.c --- ext/mbstring/mbstring.c 17 Jul 2008 20:04:05 -0000 1.224.2.22.2.25.2.19 +++ ext/mbstring/mbstring.c 22 Jul 2008 15:25:18 -0000 @@ -81,9 +81,16 @@ #include "zend_multibyte.h" #endif /* ZEND_MULTIBYTE */ -#if HAVE_MBSTRING +#if HAVE_ONIG +#include +#undef UChar +#elif HAVE_PCRE || HAVE_BUNDLED_PCRE +#include +#endif /* }}} */ +#if HAVE_MBSTRING + /* {{{ prototypes */ ZEND_DECLARE_MODULE_GLOBALS(mbstring) static PHP_GINIT_FUNCTION(mbstring); @@ -900,6 +907,79 @@ } /* }}} */ +static void *_php_mb_compile_regex(const char *pattern); +static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len); +static void _php_mb_free_regex(void *opaque); + +#if HAVE_ONIG +/* {{{ _php_mb_compile_regex */ +void *_php_mb_compile_regex(const char *pattern) +{ + OnigRegex *retval; + OnigErrorInfo err_info; + int err_code; + + if ((err_code = onig_new(&retval, + (const OnigUChar *)pattern, + (const OnigUChar *)pattern + strlen(pattern), + ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP, + ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) { + OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str(err_str, err_code, err_info); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", pattern, err_str); + retval = NULL; + } + return retval; +} +/* }}} */ + +/* {{{ _php_mb_match_regex */ +int _php_mb_match_regex(void *opaque, const char *str, size_t str_len) +{ + return onig_match((OnigRegex)opaque, (const OnigUChar *)str, + (const OnigUChar*)str + str_len, (const OnigUChar *)str, + NULL, ONIG_OPTION_NONE) >= 0; +} +/* }}} */ + +/* {{{ _php_mb_free_regex */ +void _php_mb_free_regex(void *opaque) +{ + onig_free((OnigRegex)opaque); +} +/* }}} */ +#elif HAVE_PCRE || HAVE_BUNDLED_PCRE +/* {{{ _php_mb_compile_regex */ +void *_php_mb_compile_regex(const char *pattern) +{ + pcre *retval; + const char *err_str; + int err_offset; + + if (!(retval = pcre_compile(pattern, + PCRE_CASELESS, &err_str, &err_offset, NULL))) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str); + } + return retval; +} +/* }}} */ + +/* {{{ _php_mb_match_regex */ +int _php_mb_match_regex(void *opaque, const char *str, size_t str_len) +{ + return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0, + 0, NULL, 0) >= 0; +} +/* }}} */ + +/* {{{ _php_mb_free_regex */ +void _php_mb_free_regex(void *opaque) +{ + pcre_free(opaque); +} +/* }}} */ +#endif + /* {{{ php_mb_nls_get_default_detect_order_list */ static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, int* plist_size) { @@ -920,6 +1000,7 @@ /* }}} */ /* {{{ php.ini directive handler */ +/* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */ static PHP_INI_MH(OnUpdate_mbstring_language) { enum mbfl_no_language no_language; @@ -1105,26 +1186,63 @@ } /* }}} */ +/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_content_type_regex */ +static PHP_INI_MH(OnUpdate_mbstring_http_output_content_type_regex) +{ + zval tmp; + if (!new_value) { + return SUCCESS; + } + php_trim(new_value, new_value_length, NULL, 0, &tmp, 3 TSRMLS_CC); + + OnUpdateString(entry, Z_STRVAL(tmp), Z_STRLEN(tmp), + mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC); + + if (Z_STRLEN(tmp) > 0) { + void *re; + if (!(re = _php_mb_compile_regex(Z_STRVAL(tmp)))) { + return FAILURE; + } + MBSTRG(http_output_content_type_regex).re = re; + } else { + MBSTRG(http_output_content_type_regex).re = NULL; + } + + zval_dtor(&tmp); + return SUCCESS; +} +/* }}} */ +/* }}} */ + /* {{{ php.ini directive registration */ PHP_INI_BEGIN() - PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_SYSTEM | PHP_INI_PERDIR, OnUpdate_mbstring_language) - PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order) - PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input) - PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output) - PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding) + PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_SYSTEM | PHP_INI_PERDIR, OnUpdate_mbstring_language) + PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order) + PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input) + PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output) + PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding) #ifdef ZEND_MULTIBYTE - PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding) + PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding) #endif /* ZEND_MULTIBYTE */ - PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character) - STD_PHP_INI_ENTRY("mbstring.func_overload", "0", - PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals) - - STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0", - PHP_INI_SYSTEM | PHP_INI_PERDIR, OnUpdate_mbstring_encoding_translation, - encoding_translation, zend_mbstring_globals, mbstring_globals) - - STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0", - PHP_INI_ALL, OnUpdateLong, strict_detection, zend_mbstring_globals, mbstring_globals) + PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character) + STD_PHP_INI_ENTRY("mbstring.func_overload", "0", + PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals) + + STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0", + PHP_INI_SYSTEM | PHP_INI_PERDIR, + OnUpdate_mbstring_encoding_translation, + encoding_translation, zend_mbstring_globals, mbstring_globals) + STD_PHP_INI_BOOLEAN("mbstring.http_output_content_type_regex", + "^(text/|application/xhtml\\+xml)", + PHP_INI_SYSTEM | PHP_INI_PERDIR, + OnUpdate_mbstring_http_output_content_type_regex, + http_output_content_type_regex.str, + zend_mbstring_globals, mbstring_globals) + + STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0", + PHP_INI_ALL, + OnUpdateLong, + strict_detection, zend_mbstring_globals, mbstring_globals) PHP_INI_END() /* }}} */ @@ -1163,6 +1281,8 @@ mbstring_globals->encoding_translation = 0; mbstring_globals->strict_detection = 0; mbstring_globals->outconv = NULL; + mbstring_globals->http_output_content_type_regex.str = 0; + mbstring_globals->http_output_content_type_regex.re = NULL; #if HAVE_MBREGEX mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc(TSRMLS_C); #endif @@ -1402,9 +1522,7 @@ php_info_print_table_start(); php_info_print_table_row(2, "Multibyte Support", "enabled"); php_info_print_table_row(2, "Multibyte string engine", "libmbfl"); - if (MBSTRG(encoding_translation)) { - php_info_print_table_row(2, "HTTP input encoding translation", "enabled"); - } + php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled"); php_info_print_table_end(); php_info_print_table_start(); @@ -1839,8 +1957,11 @@ } /* analyze mime type */ - if (SG(sapi_headers).mimetype && - strncmp(SG(sapi_headers).mimetype, "text/", 5) == 0) { + if (SG(sapi_headers).mimetype && + _php_mb_match_regex( + MBSTRG(http_output_content_type_regex).re, + SG(sapi_headers).mimetype, + strlen(SG(sapi_headers).mimetype))) { if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){ mimetype = estrdup(SG(sapi_headers).mimetype); } else { Index: ext/mbstring/mbstring.h =================================================================== RCS file: /repository/php-src/ext/mbstring/mbstring.h,v retrieving revision 1.66.2.4.2.5.2.3 diff -u -r1.66.2.4.2.5.2.3 mbstring.h --- ext/mbstring/mbstring.h 17 Jul 2008 16:08:37 -0000 1.66.2.4.2.5.2.3 +++ ext/mbstring/mbstring.h 22 Jul 2008 15:25:18 -0000 @@ -193,6 +193,10 @@ long strict_detection; long illegalchars; mbfl_buffer_converter *outconv; + struct { + char *str; + void *re; + } http_output_content_type_regex; #if HAVE_MBREGEX struct _zend_mb_regex_globals *mb_regex_globals; #endif Index: ext/mbstring/tests/mb_output_handler_pattern-01.phpt =================================================================== RCS file: ext/mbstring/tests/mb_output_handler_pattern-01.phpt diff -N ext/mbstring/tests/mb_output_handler_pattern-01.phpt --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ ext/mbstring/tests/mb_output_handler_pattern-01.phpt 22 Jul 2008 15:25:18 -0000 @@ -0,0 +1,18 @@ +--TEST-- +mb_output_handler() patterns +--SKIPIF-- + +--INI-- +mbstring.internal_encoding=UTF-8 +--FILE-- + +--EXPECT-- +string(12) "a5c6a5b9a5c8" Index: ext/mbstring/tests/mb_output_handler_pattern-02.phpt =================================================================== RCS file: ext/mbstring/tests/mb_output_handler_pattern-02.phpt diff -N ext/mbstring/tests/mb_output_handler_pattern-02.phpt --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ ext/mbstring/tests/mb_output_handler_pattern-02.phpt 22 Jul 2008 15:25:18 -0000 @@ -0,0 +1,18 @@ +--TEST-- +mb_output_handler() patterns +--SKIPIF-- + +--INI-- +mbstring.internal_encoding=UTF-8 +--FILE-- + +--EXPECT-- +string(12) "a5c6a5b9a5c8" Index: ext/mbstring/tests/mb_output_handler_pattern-03.phpt =================================================================== RCS file: ext/mbstring/tests/mb_output_handler_pattern-03.phpt diff -N ext/mbstring/tests/mb_output_handler_pattern-03.phpt --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ ext/mbstring/tests/mb_output_handler_pattern-03.phpt 22 Jul 2008 15:25:18 -0000 @@ -0,0 +1,18 @@ +--TEST-- +mb_output_handler() patterns +--SKIPIF-- + +--INI-- +mbstring.internal_encoding=UTF-8 +--FILE-- + +--EXPECT-- +string(12) "a5c6a5b9a5c8" Index: ext/mbstring/tests/mb_output_handler_pattern-04.phpt =================================================================== RCS file: ext/mbstring/tests/mb_output_handler_pattern-04.phpt diff -N ext/mbstring/tests/mb_output_handler_pattern-04.phpt --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ ext/mbstring/tests/mb_output_handler_pattern-04.phpt 22 Jul 2008 15:25:18 -0000 @@ -0,0 +1,18 @@ +--TEST-- +mb_output_handler() patterns +--SKIPIF-- + +--INI-- +mbstring.internal_encoding=UTF-8 +--FILE-- + +--EXPECT-- +string(18) "e38386e382b9e38388" Index: ext/mbstring/tests/mb_output_handler_pattern-05.phpt =================================================================== RCS file: ext/mbstring/tests/mb_output_handler_pattern-05.phpt diff -N ext/mbstring/tests/mb_output_handler_pattern-05.phpt --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ ext/mbstring/tests/mb_output_handler_pattern-05.phpt 22 Jul 2008 15:25:18 -0000 @@ -0,0 +1,17 @@ +--TEST-- +mb_output_handler() patterns +--SKIPIF-- + +--INI-- +mbstring.internal_encoding=UTF-8 +--FILE-- + +--EXPECT-- +string(12) "a5c6a5b9a5c8" Index: ext/mbstring/tests/mb_output_handler_pattern-06.phpt =================================================================== RCS file: ext/mbstring/tests/mb_output_handler_pattern-06.phpt diff -N ext/mbstring/tests/mb_output_handler_pattern-06.phpt --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ ext/mbstring/tests/mb_output_handler_pattern-06.phpt 22 Jul 2008 15:25:18 -0000 @@ -0,0 +1,18 @@ +--TEST-- +mb_output_handler() patterns +--SKIPIF-- + +--INI-- +mbstring.internal_encoding=UTF-8 +--FILE-- + +--EXPECT-- +string(12) "a5c6a5b9a5c8" Index: ext/mbstring/tests/mb_output_handler_pattern-07.phpt =================================================================== RCS file: ext/mbstring/tests/mb_output_handler_pattern-07.phpt diff -N ext/mbstring/tests/mb_output_handler_pattern-07.phpt --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ ext/mbstring/tests/mb_output_handler_pattern-07.phpt 22 Jul 2008 15:25:18 -0000 @@ -0,0 +1,19 @@ +--TEST-- +mb_output_handler() patterns +--SKIPIF-- + +--INI-- +mbstring.internal_encoding=UTF-8 +mbstring.http_output_content_type_regex=html +--FILE-- + +--EXPECT-- +string(12) "a5c6a5b9a5c8" Index: ext/mbstring/tests/mb_output_handler_pattern-08.phpt =================================================================== RCS file: ext/mbstring/tests/mb_output_handler_pattern-08.phpt diff -N ext/mbstring/tests/mb_output_handler_pattern-08.phpt --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ ext/mbstring/tests/mb_output_handler_pattern-08.phpt 22 Jul 2008 15:25:18 -0000 @@ -0,0 +1,19 @@ +--TEST-- +mb_output_handler() patterns +--SKIPIF-- + +--INI-- +mbstring.internal_encoding=UTF-8 +mbstring.http_output_content_type_regex=html +--FILE-- + +--EXPECT-- +string(18) "e38386e382b9e38388" Index: ext/mbstring/tests/mb_output_handler_pattern-09.phpt =================================================================== RCS file: ext/mbstring/tests/mb_output_handler_pattern-09.phpt diff -N ext/mbstring/tests/mb_output_handler_pattern-09.phpt --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ ext/mbstring/tests/mb_output_handler_pattern-09.phpt 22 Jul 2008 15:25:18 -0000 @@ -0,0 +1,19 @@ +--TEST-- +mb_output_handler() patterns +--SKIPIF-- + +--INI-- +mbstring.internal_encoding=UTF-8 +mbstring.http_output_content_type_regex=html +--FILE-- + +--EXPECT-- +string(12) "a5c6a5b9a5c8" Index: ext/mbstring/tests/mb_output_handler_pattern-10.phpt =================================================================== RCS file: ext/mbstring/tests/mb_output_handler_pattern-10.phpt diff -N ext/mbstring/tests/mb_output_handler_pattern-10.phpt --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ ext/mbstring/tests/mb_output_handler_pattern-10.phpt 22 Jul 2008 15:25:18 -0000 @@ -0,0 +1,19 @@ +--TEST-- +mb_output_handler() patterns +--SKIPIF-- + +--INI-- +mbstring.internal_encoding=UTF-8 +mbstring.http_output_content_type_regex=html +--FILE-- + +--EXPECT-- +string(18) "e38386e382b9e38388" Index: ext/mbstring/tests/mb_output_handler_pattern-11.phpt =================================================================== RCS file: ext/mbstring/tests/mb_output_handler_pattern-11.phpt diff -N ext/mbstring/tests/mb_output_handler_pattern-11.phpt --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ ext/mbstring/tests/mb_output_handler_pattern-11.phpt 22 Jul 2008 15:25:18 -0000 @@ -0,0 +1,17 @@ +--TEST-- +mb_output_handler() patterns +--SKIPIF-- + +--INI-- +mbstring.internal_encoding=UTF-8 +--FILE-- + +--EXPECT-- +string(12) "a5c6a5b9a5c8" Index: ext/mbstring/tests/mb_output_handler_pattern-12.phpt =================================================================== RCS file: ext/mbstring/tests/mb_output_handler_pattern-12.phpt diff -N ext/mbstring/tests/mb_output_handler_pattern-12.phpt --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ ext/mbstring/tests/mb_output_handler_pattern-12.phpt 22 Jul 2008 15:25:18 -0000 @@ -0,0 +1,18 @@ +--TEST-- +mb_output_handler() patterns +--SKIPIF-- + +--INI-- +mbstring.internal_encoding=UTF-8 +--FILE-- + +--EXPECT-- +string(12) "a5c6a5b9a5c8"