Skip to content

Commit 4fe3d10

Browse files
committed
Don't create a new array for empty/null match every time
If PREG_OFFSET_CAPTURE is used, unmatched subpatterns will be either [null, -1] or ['', -1] depending on PREG_UNMATCHED_AS_NULL mode. Instead of creating a new array like this every time, cache it inside a global (per-request -- could make it immutable though). Additionally check whether the subpattern is an empty string or single character string and use an existing interned string in that case. Empty / single-char subpatterns are common, so let's avoid allocating strings for them.
1 parent 38b1627 commit 4fe3d10

File tree

2 files changed

+62
-19
lines changed

2 files changed

+62
-19
lines changed

ext/pcre/php_pcre.c

Lines changed: 59 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,8 @@ static PHP_GINIT_FUNCTION(pcre) /* {{{ */
259259
pcre_globals->backtrack_limit = 0;
260260
pcre_globals->recursion_limit = 0;
261261
pcre_globals->error_code = PHP_PCRE_NO_ERROR;
262+
ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
263+
ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
262264
#ifdef HAVE_PCRE_JIT_SUPPORT
263265
pcre_globals->jit = 1;
264266
#endif
@@ -460,6 +462,15 @@ static PHP_RINIT_FUNCTION(pcre)
460462
/* }}} */
461463
#endif
462464

465+
static PHP_RSHUTDOWN_FUNCTION(pcre)
466+
{
467+
zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
468+
zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
469+
ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
470+
ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
471+
return SUCCESS;
472+
}
473+
463474
/* {{{ static pcre_clean_cache */
464475
static int pcre_clean_cache(zval *data, void *arg)
465476
{
@@ -937,26 +948,57 @@ PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
937948
}
938949
}/*}}}*/
939950

951+
static void init_unmatched_null_pair() {
952+
zval tmp;
953+
zval *pair = &PCRE_G(unmatched_null_pair);
954+
array_init_size(pair, 2);
955+
ZVAL_NULL(&tmp);
956+
zend_hash_next_index_insert_new(Z_ARRVAL_P(pair), &tmp);
957+
ZVAL_LONG(&tmp, -1);
958+
zend_hash_next_index_insert_new(Z_ARRVAL_P(pair), &tmp);
959+
}
960+
961+
static void init_unmatched_empty_pair() {
962+
zval tmp;
963+
zval *pair = &PCRE_G(unmatched_empty_pair);
964+
array_init_size(pair, 2);
965+
ZVAL_EMPTY_STRING(&tmp);
966+
zend_hash_next_index_insert_new(Z_ARRVAL_P(pair), &tmp);
967+
ZVAL_LONG(&tmp, -1);
968+
zend_hash_next_index_insert_new(Z_ARRVAL_P(pair), &tmp);
969+
}
970+
940971
/* {{{ add_offset_pair */
941972
static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SIZE offset, zend_string *name, uint32_t unmatched_as_null)
942973
{
943974
zval match_pair, tmp;
944975

945-
array_init_size(&match_pair, 2);
946-
947976
/* Add (match, offset) to the return value */
948977
if (PCRE2_UNSET == offset) {
949978
if (unmatched_as_null) {
950-
ZVAL_NULL(&tmp);
979+
if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
980+
init_unmatched_null_pair();
981+
}
982+
ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
951983
} else {
952-
ZVAL_EMPTY_STRING(&tmp);
984+
if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
985+
init_unmatched_empty_pair();
986+
}
987+
ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
953988
}
954989
} else {
955-
ZVAL_STRINGL(&tmp, str, len);
990+
array_init_size(&match_pair, 2);
991+
if (len == 0) {
992+
ZVAL_EMPTY_STRING(&tmp);
993+
} else if (len == 1) {
994+
ZVAL_INTERNED_STR(&tmp, ZSTR_CHAR((unsigned char) *str));
995+
} else {
996+
ZVAL_STRINGL(&tmp, str, len);
997+
}
998+
zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
999+
ZVAL_LONG(&tmp, offset);
1000+
zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
9561001
}
957-
zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
958-
ZVAL_LONG(&tmp, offset);
959-
zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
9601002

9611003
if (name) {
9621004
Z_ADDREF(match_pair);
@@ -975,6 +1017,10 @@ static inline void populate_match_value(
9751017
} else {
9761018
ZVAL_EMPTY_STRING(val);
9771019
}
1020+
} else if (start_offset == end_offset) {
1021+
ZVAL_EMPTY_STRING(val);
1022+
} else if (start_offset + 1 == end_offset) {
1023+
ZVAL_INTERNED_STR(val, ZSTR_CHAR((unsigned char) subject[start_offset]));
9781024
} else {
9791025
ZVAL_STRINGL(val, subject + start_offset, end_offset - start_offset);
9801026
}
@@ -1223,16 +1269,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str,
12231269
}
12241270
} else {
12251271
for (i = 0; i < count; i++) {
1226-
if (PCRE2_UNSET == offsets[i<<1]) {
1227-
if (unmatched_as_null) {
1228-
add_next_index_null(&match_sets[i]);
1229-
} else {
1230-
add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
1231-
}
1232-
} else {
1233-
add_next_index_stringl(&match_sets[i], subject + offsets[i<<1],
1234-
offsets[(i<<1)+1] - offsets[i<<1]);
1235-
}
1272+
zval val;
1273+
populate_match_value(
1274+
&val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1275+
zend_hash_next_index_insert_new(Z_ARRVAL(match_sets[i]), &val);
12361276
}
12371277
}
12381278
mark = pcre2_get_mark(match_data);
@@ -2955,7 +2995,7 @@ zend_module_entry pcre_module_entry = {
29552995
#else
29562996
NULL,
29572997
#endif
2958-
NULL,
2998+
PHP_RSHUTDOWN(pcre),
29592999
PHP_MINFO(pcre),
29603000
PHP_PCRE_VERSION,
29613001
PHP_MODULE_GLOBALS(pcre),

ext/pcre/php_pcre.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ ZEND_BEGIN_MODULE_GLOBALS(pcre)
7575
zend_bool jit;
7676
#endif
7777
int error_code;
78+
/* Used for unmatched subpatterns in OFFSET_CAPTURE mode */
79+
zval unmatched_null_pair;
80+
zval unmatched_empty_pair;
7881
ZEND_END_MODULE_GLOBALS(pcre)
7982

8083
PHPAPI ZEND_EXTERN_MODULE_GLOBALS(pcre)

0 commit comments

Comments
 (0)