If what you need is all you state,
this code should fix up your xml.
Its restricted to just single tag-attribute pair.
It works by parsing exclusionary and specific markup.
The advantage here is that nothing else changes in the
original markup, only the string content of Id is changed
via the replacement side of the regex.
This avoids formatting headaches with some writers.
The regex may look simple for a parser, thats becuse it
is custom to the specific task.
The markup interraction is correct.
With a slight modification, multiple attr-val's can be done
within a single tag. Of course this includes some re-eval
fringe code (?{}) and a conditional (?() | ) but does the
same search and replace and on multiples.
Cheers!
-sln
Some output:
--------------------------
Id = "/Local/App/App1", (valnew = "App1")
Id2 = "/Local/App/App2", (valnew = "App2")
Id = '/Dummy/Test/iii', (valnew = 'iii')
Id = "/testing", (valnew = "testing")
Id = "/Dum
my/Test/iii
", (valnew = "iii")
Id = "/Dat/Inp/Out", (valnew = "Out")
Id = "/Local/App/App1", (valnew = "App1")
Id = "/Dummy/Test/iii", (valnew = "iii")
Id = "/Dat/Inp/Out", (valnew = "Out")
Tt = "TT/tt hello", (valnew = "tt hello")
Id = "/he llo", (valnew = "he llo")
-----------------------------
# -------------------------------------------
# rx_html_fixval2.pl
# -sln, 5/5/2010
#
# Util to search/replace attribute/val's from
# xml/html
# -------------------------------------------
use strict;
use warnings;
## Initialization
##
my $rxopen = "(?: Application )"; # Open tags , cannot be empty alternation
my $rxattr = "(?: Id.?|Tt )"; # Attributes we seek, cannot have an empty alternation
# "(?: \\w+ )";
use re 'eval';
my $topen = 0;
my $Rxmarkup = qr
{
(?(?{$topen}) # Begin Conditional
# Have <OPEN> ?
(?:
# Try to match next attr-val pair
\s+[^>]*? (?<=\s) (?<ATTR> $rxattr) \s*=\s* \K(?<VAL> ".+?"|'.+?')
(?= [^>]*? \s* /? > )
|
# No more attr-value pairs
(?{$topen = 0})
)
|
# Look for new <OPEN>
(?:
[^<]*
(?:
# Things that hide markup:
# - Comments/CDATA
(?: <! (?: \[CDATA\[.*?\]\] | --.*?-- | \[[A-Z][A-Z\ ]*\[.*?\]\] ) > ) \K
|
# Specific markup we seek:
# - OPEN tag
(?: < (?<OPEN> $rxopen \K) )
(?{$topen = 1})
)
|
< \K
)
) # End Conditional
}xs;
## Code
##
my $html = join '', <DATA>;
$html =~ s/$Rxmarkup/ fixval( $+{ATTR}, $+{VAL} ) /eg;
print "\n",$html;
exit (0);
## Subs
##
sub fixval {
return '' unless defined $_[1];
print "$_[0] = $_[1], ";
if ($_[1] =~ / \/ \s* (?<val>[^\/]+?) \s* (?<delim>["']) $/x) {
my $valnew = $+{delim}.$+{val}.$+{delim};
print "(valnew = $valnew)\n";
return $valnew;
}
print "(val unchanged)\n";
return $_[1];
}
__DATA__
<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
<Profile xmlns="xxxxxxxxx" name="" version="1.1" xmlns:xsi="http://
www.w3.org/2001/XMLSchema-instance">
<Application Name="App1" Id="/Local/App/App1"
Id2="/Local/App/App2" Services="1" policy=""
StartApp="" Bal="5" sessInt="500" WaterMark="1.0"/>
<AppProfileGuid>586e3456dt</AppProfileGuid>
</Profile>
<Application
Name="App99" Id='/Dummy/Test/iii' Services="3"
policy="99" StartApp="2" Bal="7" sessInt="27"
WaterMark="4.3" />
<Application Id="/testing"
Name="App100" Id="/Dum
my/Test/iii
" Services="4"
policy="99" StartApp="2" Bal="7" sessInt="27"
WaterMark="4.3"/>
<Application
Name="Yyee" Id="/Dat/Inp/Out" Services="5"
policy="88" StartApp="" Bal="1" sessInt="8"
WaterMark="2.1"/>
<![INCLUDE CDATA [ <Application Name="App99" Id="//Test/can't see me"/> ]]>
<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
<Profile
xmlns="xxxxxxxxx"
name=""
version="1.1"
xmlns:xsi="
http://www.w3.org/2001/XMLSchema-instance">
<Application
Name="App1" Id="/Local/App/App1" Services="1"
policy="" StartApp="" Bal="5" sessInt="500"
WaterMark="1.0"/>
<Application
Name="App99" Id="/Dummy/Test/iii" Services="3"
policy="99" StartApp="2" Bal="7" sessInt="27"
WaterMark="4.3"/>
<Application
Name="Yyee" Id="/Dat/Inp/Out" Services="5"
policy="88" StartApp="" Bal="1" sessInt="8"
WaterMark="2.1" Tt = "TT/tt hello"/>
<Application
Name="Yyee" Id="/he llo" Services="5"
policy="88" StartApp="" Bal="1" sessInt="8"
WaterMark="2.1"/>
<AppProfileGuid>586e3456dt</AppProfileGuid>
<AppProfileGuid>a46y2hktt7</AppProfileGuid>
<AppProfileGuid>mi6j77mae6</AppProfileGuid>
</Profile>