W
William James
## Read, parse, and create csv records.
# The program conforms to the csv specification at this site:
# http://www.creativyst.com/Doc/Articles/CSV/CSV01.htm
# The only extra is that you can change the field-separator.
# For a field-separator other than a comma, for example
# a semicolon:
# Csv.fs=";"
#
# After a record has been read and parsed,
# Csv.string contains the record in raw string format.
#
class Csv
def Csv.unescape( array )
array.map{|x| x.gsub( /""/, '"' ) }
end
@@fs = ","
# Set regexp for parse.
# @@fs is the field-separator, which must be
# a single character.
def Csv.make_regexp
fs = @@fs
if "^" == fs
fs = "\\^"
end
@@regexp =
## Assumes embedded quotes are escaped as "".
%r{
\G ## Anchor at end of previous match.
[ \t]* ## Leading spaces or tabs are discarded.
(?:
## For a field in quotes.
" ( [^"]* (?: "" [^"]* )* ) " |
## For a field not in quotes.
( [^"\n#{fs}]*? )
)
[ \t]*
[#{fs}]
}mx
## When get_rec finds after reading a line that the record isn't
## complete, this regexp will be used to decide whether to read
## another line or to raise an exception.
@@reading_regexp =
%r{
\A # Anchor at beginning of string.
(?:
[ \t]*
(?:
" [^"]* (?: "" [^"]* )* " |
[^"\n#{fs}]*?
)
[ \t]*
[#{fs}]
)*
[ \t]*
" [^"]* (?: "" [^"]* )*
\Z # Anchor at end of string.
}mx
end # def make_regexp
Csv.make_regexp
def Csv.parse( s )
ary = (s + @@fs).scan( @@regexp )
raise "\nBad csv record:\n#{s}\n" if $' != ""
Csv.unescape( ary.flatten.compact )
end
@@string = nil
def Csv.get_rec( file )
return nil if file.eof?
@@string = ""
begin
if @@string.size>0
raise "\nBad record:\n#{@@string}\n" if
@@string !~ @@reading_regexp
raise "\nPremature end of csv file." if file.eof?
end
@@string += file.gets
end until @@string.count( '"' ) % 2 == 0
@@string.chomp!
Csv.parse( @@string )
end
def Csv.string
@@string
end
def Csv.fs=( s )
raise "\nCsv.fs must be a single character.\n" if s.size != 1
@@fs = s
Csv.make_regexp
end
def Csv.fs
@@fs
end
def Csv.to_csv( array )
s = ''
array.map { |item|
str = item.to_s
# Quote the string if it contains the field-separator or
# a " or a newline or a carriage-return, or if it has leading or
# trailing whitespace.
if str.index(@@fs) or /^\s|["\r\n]|\s$/.match(str)
str = '"' + str.gsub( /"/, '""' ) + '"'
end
str
}.join(@@fs)
end
end # class Csv
# The program conforms to the csv specification at this site:
# http://www.creativyst.com/Doc/Articles/CSV/CSV01.htm
# The only extra is that you can change the field-separator.
# For a field-separator other than a comma, for example
# a semicolon:
# Csv.fs=";"
#
# After a record has been read and parsed,
# Csv.string contains the record in raw string format.
#
class Csv
def Csv.unescape( array )
array.map{|x| x.gsub( /""/, '"' ) }
end
@@fs = ","
# Set regexp for parse.
# @@fs is the field-separator, which must be
# a single character.
def Csv.make_regexp
fs = @@fs
if "^" == fs
fs = "\\^"
end
@@regexp =
## Assumes embedded quotes are escaped as "".
%r{
\G ## Anchor at end of previous match.
[ \t]* ## Leading spaces or tabs are discarded.
(?:
## For a field in quotes.
" ( [^"]* (?: "" [^"]* )* ) " |
## For a field not in quotes.
( [^"\n#{fs}]*? )
)
[ \t]*
[#{fs}]
}mx
## When get_rec finds after reading a line that the record isn't
## complete, this regexp will be used to decide whether to read
## another line or to raise an exception.
@@reading_regexp =
%r{
\A # Anchor at beginning of string.
(?:
[ \t]*
(?:
" [^"]* (?: "" [^"]* )* " |
[^"\n#{fs}]*?
)
[ \t]*
[#{fs}]
)*
[ \t]*
" [^"]* (?: "" [^"]* )*
\Z # Anchor at end of string.
}mx
end # def make_regexp
Csv.make_regexp
def Csv.parse( s )
ary = (s + @@fs).scan( @@regexp )
raise "\nBad csv record:\n#{s}\n" if $' != ""
Csv.unescape( ary.flatten.compact )
end
@@string = nil
def Csv.get_rec( file )
return nil if file.eof?
@@string = ""
begin
if @@string.size>0
raise "\nBad record:\n#{@@string}\n" if
@@string !~ @@reading_regexp
raise "\nPremature end of csv file." if file.eof?
end
@@string += file.gets
end until @@string.count( '"' ) % 2 == 0
@@string.chomp!
Csv.parse( @@string )
end
def Csv.string
@@string
end
def Csv.fs=( s )
raise "\nCsv.fs must be a single character.\n" if s.size != 1
@@fs = s
Csv.make_regexp
end
def Csv.fs
@@fs
end
def Csv.to_csv( array )
s = ''
array.map { |item|
str = item.to_s
# Quote the string if it contains the field-separator or
# a " or a newline or a carriage-return, or if it has leading or
# trailing whitespace.
if str.index(@@fs) or /^\s|["\r\n]|\s$/.match(str)
str = '"' + str.gsub( /"/, '""' ) + '"'
end
str
}.join(@@fs)
end
end # class Csv