ruk·si

PHP
Regex

Updated at 2013-01-30 19:40

This file contain notes about PHP regular expressions.

Do not use regex for:

  • Parsing XML, HTML, use DOMDocument::loadHTML.
  • Validating form input, use native PHP data filtering. print_r(filter_list());
  • Parsing time, use strtotime().

Comment your regex.

preg_match('/^
    (1[-\s.])?  # optional 1-, 1. or 1
    ( \( )?     # optional opening parenthesis
    \d{3}       # the area code
    (?(2) \) )  # if there was opening parenthesis, close it
    [-\s.]?     # followed by - or . or space
    \d{3}       # first 3 digits
    [-\s.]?     # followed by - or . or space
    \d{4}       # last 4 digits
    $/x',
    $number
);

// The whitespaces in the pattern to be ignored, unless they are
// escaped (\s)

Learn to use callbacks.

// This will call my_callback() every time it sees brackets.
$template = preg_replace_callback('/\[(.*)\]/', 'my_callback', $template);
function my_callback($matches) {
    // $matches[1] now contains the string between the brackets
    if (isset($data[$matches[1]])) {
        // return the replacement string
        return $data[$matches[1]];
    }
    else {
        return $matches[0];
    }
}

When regex is greedy, the quantifiers (such as * or +) match as many character as possible.

// If you add a question mark after the quantifier
// (.*?) it becomes ungreedy.

$html = '<a href="http://net.tutsplus.com/hello">Hello</a> '
    .'<a href="http://net.tutsplus.com/world">World!</a>';

// Note the ?'s after the *'s
if ( preg_match_all('/<a.*?>.*?<\/a>/', $html, $matches) ) {
    print_r($matches);
}

/* output:
Array
(
    [0] => Array
        (
            [0] => <a href="http://net.tutsplus.com/hello">Hello</a>
            [1] => <a href="http://net.tutsplus.com/world">World!</a>
        )
)
*/

Look pattern.

// The following pattern first matches for 'foo', and then it checks to see
// if it is followed by 'bar':
$pattern = '/foo(?=bar)/';
preg_match($pattern, 'Hello foo'); // false
preg_match($pattern, 'Hello foobar'); // true

// The following example matches 'foo', only if it is NOT followed
// by 'bar'
$pattern = '/foo(?!bar)/';
preg_match($pattern, 'Hello foo'); // true
preg_match($pattern, 'Hello foobar'); // false
preg_match($pattern, 'Hello foobaz'); // true

// The following pattern matches if there is a 'bar' and it is not
// following 'foo'.
$pattern = '/(?<!foo)bar/';
preg_match($pattern, 'Hello bar'); // true
preg_match($pattern, 'Hello foobar'); // false
preg_match($pattern, 'Hello bazbar'); // true

Conditionals

// For example we can use this to check for opening and closing angle
// brackets:

$pattern = '/^(<)?[a-z]+(?(1)>)$/';
preg_match($pattern, '<test>'); // true
preg_match($pattern, '<foo'); // false
preg_match($pattern, 'bar>'); // false
preg_match($pattern, 'hello'); // true

// If it begins with 'q', it must begin with 'qu'
// else it must begin with 'f'
$pattern = '/^(?(?=q)qu|f)/';
preg_match($pattern, 'quake'); // true
preg_match($pattern, 'qwerty'); // false
preg_match($pattern, 'foo'); // true
preg_match($pattern, 'bar'); // false

Filtering

// In the following example we use a string that contains a special
// character (*).
$word = '*world*';
$text = 'Hello *world*!';
preg_match('/'.$word.'/', $text); // causes a warning
preg_match('/'.preg_quote($word).'/', $text); // true

Non-capturing subpatterns

// Subpatterns, enclosed by parentheses, get captured into an array so
// that we can use them later if needed.
preg_match('/(f.*)(b.*)/', 'Hello foobar', $matches);
echo "f* => " . $matches[1]; // prints 'f* => foo'
echo "b* => " . $matches[2]; // prints 'b* => bar'

// Now let's make a small change by adding another subpattern (H.*)
// to the front:
preg_match('/(H.*) (f.*)(b.*)/', 'Hello foobar', $matches);
echo "f* => " . $matches[1]; // prints 'f* => Hello'
echo "b* => " . $matches[2]; // prints 'b* => foo'

// Non-capturing
preg_match('/(?:H.*) (f.*)(b.*)/', 'Hello foobar', $matches);
echo "f* => " . $matches[1]; // prints 'f* => foo'
echo "b* => " . $matches[2]; // prints 'b* => bar'

Named Subpatterns

preg_match('/(?P<fstar>f.*)(?P<bstar>b.*)/', 'Hello foobar', $matches);
echo "f* => " . $matches['fstar']; // prints 'f* => foo'
echo "b* => " . $matches['bstar']; // prints 'b* => bar'
</bstar></fstar>

// Now we can add another subpattern, without disturbing the existing
// matches in the $matches array:
preg_match(
    '/(?P<hi>H.*) (?P<fstar>f.*)(?P<bstar>b.*)/',
    'Hello foobar',
    $matches
);
echo "f* => " . $matches['fstar']; // prints 'f* => foo'
echo "b* => " . $matches['bstar']; // prints 'b* => bar'
echo "h* => " . $matches['hi']; // prints 'h* => Hello'
</bstar></fstar></hi>