PHP - Regex
Updated at 2013-01-30 19:40
This file contain notes about PHP regular expressions.
Do not use regex for:
- Parsing XML, HTML, use
DOMDocument::loadHTML
. - Validating form input, use native PHP data filtering.
print_r(filter_list());
- Parsing time, use
strtotime()
.
Comment your regex.
preg_match('/^
(1[-\s.])? # optional 1-, 1. or 1
( \( )? # optional opening parenthesis
\d{3} # the area code
(?(2) \) ) # if there was opening parenthesis, close it
[-\s.]? # followed by - or . or space
\d{3} # first 3 digits
[-\s.]? # followed by - or . or space
\d{4} # last 4 digits
$/x',
$number
);
// The whitespaces in the pattern to be ignored, unless they are
// escaped (\s)
Learn to use callbacks.
// This will call my_callback() every time it sees brackets.
$template = preg_replace_callback('/\[(.*)\]/', 'my_callback', $template);
function my_callback($matches) {
// $matches[1] now contains the string between the brackets
if (isset($data[$matches[1]])) {
// return the replacement string
return $data[$matches[1]];
}
else {
return $matches[0];
}
}
When regex is greedy, the quantifiers (such as * or +) match as many character as possible.
// If you add a question mark after the quantifier
// (.*?) it becomes ungreedy.
$html = '<a href="http://net.tutsplus.com/hello">Hello</a> '
.'<a href="http://net.tutsplus.com/world">World!</a>';
// Note the ?'s after the *'s
if ( preg_match_all('/<a.*?>.*?<\/a>/', $html, $matches) ) {
print_r($matches);
}
/* output:
Array
(
[0] => Array
(
[0] => <a href="http://net.tutsplus.com/hello">Hello</a>
[1] => <a href="http://net.tutsplus.com/world">World!</a>
)
)
*/
Look pattern.
// The following pattern first matches for 'foo', and then it checks to see
// if it is followed by 'bar':
$pattern = '/foo(?=bar)/';
preg_match($pattern, 'Hello foo'); // false
preg_match($pattern, 'Hello foobar'); // true
// The following example matches 'foo', only if it is NOT followed
// by 'bar'
$pattern = '/foo(?!bar)/';
preg_match($pattern, 'Hello foo'); // true
preg_match($pattern, 'Hello foobar'); // false
preg_match($pattern, 'Hello foobaz'); // true
// The following pattern matches if there is a 'bar' and it is not
// following 'foo'.
$pattern = '/(?<!foo)bar/';
preg_match($pattern, 'Hello bar'); // true
preg_match($pattern, 'Hello foobar'); // false
preg_match($pattern, 'Hello bazbar'); // true
Conditionals
// For example we can use this to check for opening and closing angle
// brackets:
$pattern = '/^(<)?[a-z]+(?(1)>)$/';
preg_match($pattern, '<test>'); // true
preg_match($pattern, '<foo'); // false
preg_match($pattern, 'bar>'); // false
preg_match($pattern, 'hello'); // true
// If it begins with 'q', it must begin with 'qu'
// else it must begin with 'f'
$pattern = '/^(?(?=q)qu|f)/';
preg_match($pattern, 'quake'); // true
preg_match($pattern, 'qwerty'); // false
preg_match($pattern, 'foo'); // true
preg_match($pattern, 'bar'); // false
Filtering
// In the following example we use a string that contains a special
// character (*).
$word = '*world*';
$text = 'Hello *world*!';
preg_match('/'.$word.'/', $text); // causes a warning
preg_match('/'.preg_quote($word).'/', $text); // true
Non-capturing subpatterns
// Subpatterns, enclosed by parentheses, get captured into an array so
// that we can use them later if needed.
preg_match('/(f.*)(b.*)/', 'Hello foobar', $matches);
echo "f* => " . $matches[1]; // prints 'f* => foo'
echo "b* => " . $matches[2]; // prints 'b* => bar'
// Now let's make a small change by adding another subpattern (H.*)
// to the front:
preg_match('/(H.*) (f.*)(b.*)/', 'Hello foobar', $matches);
echo "f* => " . $matches[1]; // prints 'f* => Hello'
echo "b* => " . $matches[2]; // prints 'b* => foo'
// Non-capturing
preg_match('/(?:H.*) (f.*)(b.*)/', 'Hello foobar', $matches);
echo "f* => " . $matches[1]; // prints 'f* => foo'
echo "b* => " . $matches[2]; // prints 'b* => bar'
Named Subpatterns
preg_match('/(?P<fstar>f.*)(?P<bstar>b.*)/', 'Hello foobar', $matches);
echo "f* => " . $matches['fstar']; // prints 'f* => foo'
echo "b* => " . $matches['bstar']; // prints 'b* => bar'
</bstar></fstar>
// Now we can add another subpattern, without disturbing the existing
// matches in the $matches array:
preg_match(
'/(?P<hi>H.*) (?P<fstar>f.*)(?P<bstar>b.*)/',
'Hello foobar',
$matches
);
echo "f* => " . $matches['fstar']; // prints 'f* => foo'
echo "b* => " . $matches['bstar']; // prints 'b* => bar'
echo "h* => " . $matches['hi']; // prints 'h* => Hello'
</bstar></fstar></hi>