S
sln
First installment.
This was inspired by some other post on here.
I was wondering if I could get a review of my preliminary.
I need constructive critque's.
Thank you!
- sln
## ===============================================
## C_FunctionParser_v1.pl
## -------------------------------
## C/C++ Style Function Parser
## Idea - To parse out C/C++ style functions
## that have parenthetical closures (some don't).
## (Could be a package some day, dunno, maybe ..)
## - sln *** @ 2/6/09
## ===============================================
my $VERSION = 1.0;
$|=1;
use strict;
use warnings;
# Prototype's
sub Find_Function(\$\@);
# File-scoped variables
my ($FxParse,$FName,$Preamble);
# Set default function name
SetFunctionName();
## ----------------------
## Main (user play area)
## ----------------------
# Source file
my $Source = join '', <DATA>;
# Extended, possibly non-compliant, function name - pattern examples:
# SetFunctionName(qr/_T/);
# SetFunctionName(qr/\(\s*void\s*\)\s*function/);
# SetFunctionName("\\(\\s*void\\s*\\)\\s*function");
# Parse some functions
# func ...
my @Funct = ();
Find_Function($Source, @Funct);
# func2 ...
my @Funct2 = ();
SetFunctionName(qr/_T/);
Find_Function($Source, @Funct2);
# Print @Funct functions found
# Note that segments can be modified and collated.
if (!@Funct) {
print "Function name pattern: '$FName' not found!\n";
} else {
print "\nFound ".@Funct." matches.\nFunction pattern: '$FName' \n";
}
for my $ref (@Funct) {
printf "\n\@: %6d - %s\n", $$ref[3], substr($Source, $$ref[0], $$ref[2] - $$ref[0]);
}
## ----------
## End Main
## ----------
# ---------------------------------------------------------
# Set the parser's function regex pattern
#
sub SetFunctionName
{
if (!@_) {
$FName = "_*[a-zA-Z][\\w]*"; # Matches all compliant function names (default)
} else {
$FName = shift;
}
$Preamble = "\\s*\\(";
# Compile function parser regular expression
# Regex condensed:
# $FxParse = qr!/{2}.*?\n|/\*.*?\*/|\\.|'["()]'|(")|($FName$Preamble)|(\()|(\))!s;
# | | |1 1|2 2|3 3|4 4
# Note - Non-Captured, matching items, are meant to consume!
# -----------------------------------------------------------
# Regex /xpanded (with commentary):
$FxParse = # Regex Precedence (items MUST be in this order):
qr! # -----------------------------------------------
/{2}.*?\n | # comment - // + anything + end of line
/\*.*?\*/ | # comment - /* + anything + */
\\. | # escaped char - backslash + ANY character
'["()]' | # single quote char - quote then one of ", (, or ), then quote
(") | # capture $1 - double quote as a flag
($FName$Preamble) | # capture $2 - $FName + $Preamble
(\() | # capture $3 - ( as a flag
(\)) # capture $4 - ) as a flag
!xs;
}
# Recursive procedure that finds C/C++ style functions
# (the engine)
# Notes:
# - This is not a syntax checker !!!
# - Nested functions are found recursively, but the search is still streamed and single pass.
# - Parenthetical closures are determined via counter that persists in recursion.
# - This precedence avoids all ambigous paranthetical open/close conditions:
# 1. Dual comment styles.
# 2. Escapes.
# 3. Single quoted characters.
# 4. Double quotes, fip-flopped to determine closure.
# - Improper closures are reported, index removed from the stack and processing continues
# (this would be a syntax error, ie: the code won't complie, but it is reported as a closure error).
#
sub Find_Function(\$\@)
{
my ($src,$Funct,$offset,$pos,$Ndx,$Lines) = @_;
my ($closure,$dquotes,$aref,$misc) = (1,0,[],1);
$pos = 0 if (!defined $pos);
$offset = 0 if (!defined $offset);
$Ndx = $aref if (!defined $Ndx);
$Lines = \$misc if (!defined $Lines);
pos($$src) = $pos;
while ($$src =~ /$FxParse/gc)
{
if (defined $1) # double quote "
{
$dquotes = !$dquotes;
}
next if ($dquotes);
if (defined $2) # 'function name'
{
# ------------------------------------
# Placeholder for exclusions......
# ------------------------------------
# Cache the current function index
push @$Ndx, scalar(@$Funct);
my ($funcpos, $parampos) = ( $-[0], pos($$src) );
# Get newlines since last function
$$Lines += substr ($$src, $offset, $funcpos - $offset) =~ tr/\n//;
# print $$Lines,"\n";
# Save positions: function( parms )
push @$Funct , [$funcpos, $parampos, 0, $$Lines];
# Recurse this procedure
($offset, pos($$src)) = &Find_Function ( $src, $Funct, $funcpos, $parampos, $Ndx, $Lines );
}
elsif (defined $3) # '('
{
++$closure;
}
elsif (defined $4) # ')'
{
--$closure;
if ($closure <= 0)
{
$closure = 0;
if (@$Ndx)
{
# Pop index stack, assign closure, return function/closure positions
$$Funct[pop @$Ndx][2] = pos($$src);
return ($offset, pos($$src));
}
}
}
}
# To test an error, either take off the closure of a function in its source,
# or force it this way (but is pseudo error, make sure you have data in @$Funct):
# push @$Ndx, 1;
# Should only get here once.
# Its an error if index stack has elements
if (@$Ndx)
{
## BAD RETURN ... take this one off, try to recover
my $func_index = pop @$Ndx;
my $ref = $$Funct[$func_index];
$$ref[2] = $$ref[1];
print STDERR "** Bad return, index = $func_index\n";
print "** Error! Unclosed function [$func_index], line ".
$$ref[3].": '".substr ($$src, $$ref[0], $$ref[2] - $$ref[0] )."'\n";
}
return ($offset, pos($$src));
}
__DATA__
This was inspired by some other post on here.
I was wondering if I could get a review of my preliminary.
I need constructive critque's.
Thank you!
- sln
## ===============================================
## C_FunctionParser_v1.pl
## -------------------------------
## C/C++ Style Function Parser
## Idea - To parse out C/C++ style functions
## that have parenthetical closures (some don't).
## (Could be a package some day, dunno, maybe ..)
## - sln *** @ 2/6/09
## ===============================================
my $VERSION = 1.0;
$|=1;
use strict;
use warnings;
# Prototype's
sub Find_Function(\$\@);
# File-scoped variables
my ($FxParse,$FName,$Preamble);
# Set default function name
SetFunctionName();
## ----------------------
## Main (user play area)
## ----------------------
# Source file
my $Source = join '', <DATA>;
# Extended, possibly non-compliant, function name - pattern examples:
# SetFunctionName(qr/_T/);
# SetFunctionName(qr/\(\s*void\s*\)\s*function/);
# SetFunctionName("\\(\\s*void\\s*\\)\\s*function");
# Parse some functions
# func ...
my @Funct = ();
Find_Function($Source, @Funct);
# func2 ...
my @Funct2 = ();
SetFunctionName(qr/_T/);
Find_Function($Source, @Funct2);
# Print @Funct functions found
# Note that segments can be modified and collated.
if (!@Funct) {
print "Function name pattern: '$FName' not found!\n";
} else {
print "\nFound ".@Funct." matches.\nFunction pattern: '$FName' \n";
}
for my $ref (@Funct) {
printf "\n\@: %6d - %s\n", $$ref[3], substr($Source, $$ref[0], $$ref[2] - $$ref[0]);
}
## ----------
## End Main
## ----------
# ---------------------------------------------------------
# Set the parser's function regex pattern
#
sub SetFunctionName
{
if (!@_) {
$FName = "_*[a-zA-Z][\\w]*"; # Matches all compliant function names (default)
} else {
$FName = shift;
}
$Preamble = "\\s*\\(";
# Compile function parser regular expression
# Regex condensed:
# $FxParse = qr!/{2}.*?\n|/\*.*?\*/|\\.|'["()]'|(")|($FName$Preamble)|(\()|(\))!s;
# | | |1 1|2 2|3 3|4 4
# Note - Non-Captured, matching items, are meant to consume!
# -----------------------------------------------------------
# Regex /xpanded (with commentary):
$FxParse = # Regex Precedence (items MUST be in this order):
qr! # -----------------------------------------------
/{2}.*?\n | # comment - // + anything + end of line
/\*.*?\*/ | # comment - /* + anything + */
\\. | # escaped char - backslash + ANY character
'["()]' | # single quote char - quote then one of ", (, or ), then quote
(") | # capture $1 - double quote as a flag
($FName$Preamble) | # capture $2 - $FName + $Preamble
(\() | # capture $3 - ( as a flag
(\)) # capture $4 - ) as a flag
!xs;
}
# Recursive procedure that finds C/C++ style functions
# (the engine)
# Notes:
# - This is not a syntax checker !!!
# - Nested functions are found recursively, but the search is still streamed and single pass.
# - Parenthetical closures are determined via counter that persists in recursion.
# - This precedence avoids all ambigous paranthetical open/close conditions:
# 1. Dual comment styles.
# 2. Escapes.
# 3. Single quoted characters.
# 4. Double quotes, fip-flopped to determine closure.
# - Improper closures are reported, index removed from the stack and processing continues
# (this would be a syntax error, ie: the code won't complie, but it is reported as a closure error).
#
sub Find_Function(\$\@)
{
my ($src,$Funct,$offset,$pos,$Ndx,$Lines) = @_;
my ($closure,$dquotes,$aref,$misc) = (1,0,[],1);
$pos = 0 if (!defined $pos);
$offset = 0 if (!defined $offset);
$Ndx = $aref if (!defined $Ndx);
$Lines = \$misc if (!defined $Lines);
pos($$src) = $pos;
while ($$src =~ /$FxParse/gc)
{
if (defined $1) # double quote "
{
$dquotes = !$dquotes;
}
next if ($dquotes);
if (defined $2) # 'function name'
{
# ------------------------------------
# Placeholder for exclusions......
# ------------------------------------
# Cache the current function index
push @$Ndx, scalar(@$Funct);
my ($funcpos, $parampos) = ( $-[0], pos($$src) );
# Get newlines since last function
$$Lines += substr ($$src, $offset, $funcpos - $offset) =~ tr/\n//;
# print $$Lines,"\n";
# Save positions: function( parms )
push @$Funct , [$funcpos, $parampos, 0, $$Lines];
# Recurse this procedure
($offset, pos($$src)) = &Find_Function ( $src, $Funct, $funcpos, $parampos, $Ndx, $Lines );
}
elsif (defined $3) # '('
{
++$closure;
}
elsif (defined $4) # ')'
{
--$closure;
if ($closure <= 0)
{
$closure = 0;
if (@$Ndx)
{
# Pop index stack, assign closure, return function/closure positions
$$Funct[pop @$Ndx][2] = pos($$src);
return ($offset, pos($$src));
}
}
}
}
# To test an error, either take off the closure of a function in its source,
# or force it this way (but is pseudo error, make sure you have data in @$Funct):
# push @$Ndx, 1;
# Should only get here once.
# Its an error if index stack has elements
if (@$Ndx)
{
## BAD RETURN ... take this one off, try to recover
my $func_index = pop @$Ndx;
my $ref = $$Funct[$func_index];
$$ref[2] = $$ref[1];
print STDERR "** Bad return, index = $func_index\n";
print "** Error! Unclosed function [$func_index], line ".
$$ref[3].": '".substr ($$src, $$ref[0], $$ref[2] - $$ref[0] )."'\n";
}
return ($offset, pos($$src));
}
__DATA__