K
Kenneth McDonald
I'd very much like to use ReXML's XPATH features to extract info from
Google's financial info pages, but find that Rexml chokes on the
Javascript, here's the result of trying to read in a page with this
bit of code:
require "rexml/document"
require 'net/http'
Net::HTTP.start('finance.google.com') do |http|
response = http.get('/finance?fstype=ii&q=NYSE:WAT')
rdoc = REXML:ocument.new(response.body)
end
==========
Output:
/usr/local/lib/ruby/1.8/rexml/parsers/treeparser.rb:92:in `parse':
#<RuntimeError: Illegal character '&' in raw string
" (REXML:arseException)
(function(){
var d=navigator.userAgent.toLowerCase().indexOf("msie")!=-1;function
e(){var b=document.styleSheets;for(var a=b.length-1;a>=0;--a){var
c=b[a].href;if(c)if(c.indexOf("styles/finance_")!=-1||
c.indexOf("styles_")!=-1)return b[a]}return null}function f(){var
b=e();if(b){var a=b.rules;return
a.length>0&&a[a.length-1].selectorText==".lastFinanceRule"}return false}
function g(){if(document.scripts)for(var b=0;b">
/usr/local/lib/ruby/1.8/rexml/text.rb:91:in `initialize'
Google's financial info pages, but find that Rexml chokes on the
Javascript, here's the result of trying to read in a page with this
bit of code:
require "rexml/document"
require 'net/http'
Net::HTTP.start('finance.google.com') do |http|
response = http.get('/finance?fstype=ii&q=NYSE:WAT')
rdoc = REXML:ocument.new(response.body)
end
==========
Output:
/usr/local/lib/ruby/1.8/rexml/parsers/treeparser.rb:92:in `parse':
#<RuntimeError: Illegal character '&' in raw string
" (REXML:arseException)
(function(){
var d=navigator.userAgent.toLowerCase().indexOf("msie")!=-1;function
e(){var b=document.styleSheets;for(var a=b.length-1;a>=0;--a){var
c=b[a].href;if(c)if(c.indexOf("styles/finance_")!=-1||
c.indexOf("styles_")!=-1)return b[a]}return null}function f(){var
b=e();if(b){var a=b.rules;return
a.length>0&&a[a.length-1].selectorText==".lastFinanceRule"}return false}
function g(){if(document.scripts)for(var b=0;b">
/usr/local/lib/ruby/1.8/rexml/text.rb:91:in `initialize'