J
joosteto
/*
I'd like to search for several regex's in a (large) String, walking
through the string.
In order not to copy the String all the time, I thought I'd use
matcherObject.find(position), where
position is set position=macherObject.end() whenever a regex is found.
For example, search for the regex's:
ABLEWORD: \b\S*able\b
FULWORD: \b\S*ful\b
ANYWORD: \b\S+\b
SPACE: \s+
The only way I found was to create a Pattern and a Matcher for each
regex I want to search for, and use \\G
to make the matcherObject.find(position) start at position (not the
"previous match" as the documentation
claims), as I do in the code below.
Now, my question is: does it really have to be this clumsy?
(declaring two objects for each regex, having to copy end position
from last match, etc)
And, does "\G" really mean match from start index for
matcherObject.find(index), and not match from end
of previous match, as claimed by the documentation
http://java.sun.com/docs/books/tutorial/essential/regex/bounds.html
*/
import java.util.regex.Matcher;
import java.util.regex.Pattern;
class Scan {
public Scan() {
}
public static void main(String[] args){
int pos=0;
String s="a beautiful string with matchable words";
Pattern able=Pattern.compile("\\G\\b(\\S*able)\\b");
Matcher matchAble=able.matcher(s);
Pattern ful=Pattern.compile("\\G\\b(\\S*ful)\\b");
Matcher matchFul=ful.matcher(s);
Pattern any=Pattern.compile("\\G(\\S+)");
Matcher matchAny=any.matcher(s);
Pattern space=Pattern.compile("\\G(\\s+)");
Matcher matchSpace=space.matcher(s);
while(pos<s.length()){
if(matchAble.find(pos)){
pos=matchAble.end();
System.out.print("ABLE: \""+matchAble.group(1)+"\",
");
} else if(matchFul.find(pos)){
pos=matchFul.end();
System.out.print("FUL: \""+matchFul.group(1)+"\", ");
} else if(matchAny.find(pos)){
pos=matchAny.end();
System.out.print("ANY: \""+matchAny.group(1)+"\", ");
} else if(matchSpace.find(pos)){
pos=matchSpace.end();
System.out.print("SPACE: \""+matchSpace.group(1)+"\",
");
} else {
System.out.println("No match found at:
\""+s.substring(pos)+"\"");
break;
}
}
}
}
I'd like to search for several regex's in a (large) String, walking
through the string.
In order not to copy the String all the time, I thought I'd use
matcherObject.find(position), where
position is set position=macherObject.end() whenever a regex is found.
For example, search for the regex's:
ABLEWORD: \b\S*able\b
FULWORD: \b\S*ful\b
ANYWORD: \b\S+\b
SPACE: \s+
The only way I found was to create a Pattern and a Matcher for each
regex I want to search for, and use \\G
to make the matcherObject.find(position) start at position (not the
"previous match" as the documentation
claims), as I do in the code below.
Now, my question is: does it really have to be this clumsy?
(declaring two objects for each regex, having to copy end position
from last match, etc)
And, does "\G" really mean match from start index for
matcherObject.find(index), and not match from end
of previous match, as claimed by the documentation
http://java.sun.com/docs/books/tutorial/essential/regex/bounds.html
*/
import java.util.regex.Matcher;
import java.util.regex.Pattern;
class Scan {
public Scan() {
}
public static void main(String[] args){
int pos=0;
String s="a beautiful string with matchable words";
Pattern able=Pattern.compile("\\G\\b(\\S*able)\\b");
Matcher matchAble=able.matcher(s);
Pattern ful=Pattern.compile("\\G\\b(\\S*ful)\\b");
Matcher matchFul=ful.matcher(s);
Pattern any=Pattern.compile("\\G(\\S+)");
Matcher matchAny=any.matcher(s);
Pattern space=Pattern.compile("\\G(\\s+)");
Matcher matchSpace=space.matcher(s);
while(pos<s.length()){
if(matchAble.find(pos)){
pos=matchAble.end();
System.out.print("ABLE: \""+matchAble.group(1)+"\",
");
} else if(matchFul.find(pos)){
pos=matchFul.end();
System.out.print("FUL: \""+matchFul.group(1)+"\", ");
} else if(matchAny.find(pos)){
pos=matchAny.end();
System.out.print("ANY: \""+matchAny.group(1)+"\", ");
} else if(matchSpace.find(pos)){
pos=matchSpace.end();
System.out.print("SPACE: \""+matchSpace.group(1)+"\",
");
} else {
System.out.println("No match found at:
\""+s.substring(pos)+"\"");
break;
}
}
}
}