N'abend,
Ab Seite 300 seines Buchs 'Reguläre Ausdrücke' beschreibt
Jeffrey Friedl die Schwierigkeiten einer korrekten
Überprüfung einer e-mail-Adresse. Auszugehen ist dabei von
http://www.faqs.org/rfcs/rfc822.html
wie immer, wenn man's genau machen möchte.
Jeffrey Friedl sagt, die Aufgabe sei prinzipiell nicht
zu lösen.
Mit gewissen Einschränkungen ist es aber dennoch machbar,
führt im Endergebnis allerdings zu einer regex mit um
die 5000 Zeichen. Das Programm, das diese regex konstruiert,
findest Du hier:
http://public.yahoo.com/~jfriedl/regex/email-opt.txt
Es ist in perl geschrieben, und weil's einfach sonderschön ist,
füge ich diese unglaubliche regex hier mit an (optimierte Fassung):
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
# optional leading comment
(?:
(?:
[^( 40)<>@,;:".\[] 00- 37x80-xff]+ # some number of atom characters...
(?![^( 40)<>@,;:".\[] 00- 37x80-xff]) # ..not followed by something that could be part of an atom
# Atom
| # or
" # "
[^\x80-xffn 15"] * # normal
(?: \ [^x80-xff] [^\x80-xffn 15"] * )* # ( special normal* )*
" # "
# Quoted string
)
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
(?:
.
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
(?:
[^( 40)<>@,;:".\[] 00- 37x80-xff]+ # some number of atom characters...
(?![^( 40)<>@,;:".\[] 00- 37x80-xff]) # ..not followed by something that could be part of an atom
# Atom
| # or
" # "
[^\x80-xffn 15"] * # normal
(?: \ [^x80-xff] [^\x80-xffn 15"] * )* # ( special normal* )*
" # "
# Quoted string
)
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
# additional words
)*
@
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
(?:
[^( 40)<>@,;:".\[] 00- 37x80-xff]+ # some number of atom characters...
(?![^( 40)<>@,;:".\[] 00- 37x80-xff]) # ..not followed by something that could be part of an atom
|
[ # [
(?: [^\x80-xffn 15[]] | \ [^x80-xff] )* # stuff
] # ]
)
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
# optional trailing comments
(?:
.
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
(?:
[^( 40)<>@,;:".\[] 00- 37x80-xff]+ # some number of atom characters...
(?![^( 40)<>@,;:".\[] 00- 37x80-xff]) # ..not followed by something that could be part of an atom
|
[ # [
(?: [^\x80-xffn 15[]] | \ [^x80-xff] )* # stuff
] # ]
)
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
# optional trailing comments
)*
# address
| # or
(?:
[^( 40)<>@,;:".\[] 00- 37x80-xff]+ # some number of atom characters...
(?![^( 40)<>@,;:".\[] 00- 37x80-xff]) # ..not followed by something that could be part of an atom
# Atom
| # or
" # "
[^\x80-xffn 15"] * # normal
(?: \ [^x80-xff] [^\x80-xffn 15"] * )* # ( special normal* )*
" # "
# Quoted string
)
# leading word
[^()<>@,;:".\[]x80-xff 00- 10 12- 37] * # "normal" atoms and/or spaces
(?:
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
|
" # "
[^\x80-xffn 15"] * # normal
(?: \ [^x80-xff] [^\x80-xffn 15"] * )* # ( special normal* )*
" # "
) # "special" comment or quoted string
[^()<>@,;:".\[]x80-xff 00- 10 12- 37] * # more "normal"
)*
<
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
# <
(?:
@
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
(?:
[^( 40)<>@,;:".\[] 00- 37x80-xff]+ # some number of atom characters...
(?![^( 40)<>@,;:".\[] 00- 37x80-xff]) # ..not followed by something that could be part of an atom
|
[ # [
(?: [^\x80-xffn 15[]] | \ [^x80-xff] )* # stuff
] # ]
)
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
# optional trailing comments
(?:
.
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
(?:
[^( 40)<>@,;:".\[] 00- 37x80-xff]+ # some number of atom characters...
(?![^( 40)<>@,;:".\[] 00- 37x80-xff]) # ..not followed by something that could be part of an atom
|
[ # [
(?: [^\x80-xffn 15[]] | \ [^x80-xff] )* # stuff
] # ]
)
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
# optional trailing comments
)*
(?: ,
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
@
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
(?:
[^( 40)<>@,;:".\[] 00- 37x80-xff]+ # some number of atom characters...
(?![^( 40)<>@,;:".\[] 00- 37x80-xff]) # ..not followed by something that could be part of an atom
|
[ # [
(?: [^\x80-xffn 15[]] | \ [^x80-xff] )* # stuff
] # ]
)
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
# optional trailing comments
(?:
.
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
(?:
[^( 40)<>@,;:".\[] 00- 37x80-xff]+ # some number of atom characters...
(?![^( 40)<>@,;:".\[] 00- 37x80-xff]) # ..not followed by something that could be part of an atom
|
[ # [
(?: [^\x80-xffn 15[]] | \ [^x80-xff] )* # stuff
] # ]
)
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
# optional trailing comments
)*
)* # additional domains
:
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
# optional trailing comments
)? # optional route
(?:
[^( 40)<>@,;:".\[] 00- 37x80-xff]+ # some number of atom characters...
(?![^( 40)<>@,;:".\[] 00- 37x80-xff]) # ..not followed by something that could be part of an atom
# Atom
| # or
" # "
[^\x80-xffn 15"] * # normal
(?: \ [^x80-xff] [^\x80-xffn 15"] * )* # ( special normal* )*
" # "
# Quoted string
)
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
(?:
.
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
(?:
[^( 40)<>@,;:".\[] 00- 37x80-xff]+ # some number of atom characters...
(?![^( 40)<>@,;:".\[] 00- 37x80-xff]) # ..not followed by something that could be part of an atom
# Atom
| # or
" # "
[^\x80-xffn 15"] * # normal
(?: \ [^x80-xff] [^\x80-xffn 15"] * )* # ( special normal* )*
" # "
# Quoted string
)
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
# additional words
)*
@
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
(?:
[^( 40)<>@,;:".\[] 00- 37x80-xff]+ # some number of atom characters...
(?![^( 40)<>@,;:".\[] 00- 37x80-xff]) # ..not followed by something that could be part of an atom
|
[ # [
(?: [^\x80-xffn 15[]] | \ [^x80-xff] )* # stuff
] # ]
)
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
# optional trailing comments
(?:
.
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
(?:
[^( 40)<>@,;:".\[] 00- 37x80-xff]+ # some number of atom characters...
(?![^( 40)<>@,;:".\[] 00- 37x80-xff]) # ..not followed by something that could be part of an atom
|
[ # [
(?: [^\x80-xffn 15[]] | \ [^x80-xff] )* # stuff
] # ]
)
[ 40t]* # Nab whitespace.
(?:
( # (
[^\x80-xffn 15()] * # normal*
(?: # (
(?: \ [^x80-xff] |
( # (
[^\x80-xffn 15()] * # normal*
(?: \ [^x80-xff] [^\x80-xffn 15()] * )* # (special normal*)*
) # )
) # special
[^\x80-xffn 15()] * # normal*
)* # )*
) # )
[ 40t]* )* # If comment found, allow more spaces.
# optional trailing comments
)*
# address spec
> # >
# name and address
)
Wenn man sich mit solcherlei befasst, sind eben diese Seiten
300ff - wie immer - ausserordentlich empfehlenswert.
Jeffrey E.F. Friedl
Reguläre Ausdrücke
O'Reilly
ISBN 3-930673-62-2
gruß
matho