K
Ken Fine
I have a portion of a web page that I am scraping via .NET's WebRequest
object. Code and page URL is below. Some characters are being mis-rendered
when the string representing the page portion is returned: these are various
entity characters that do not translate correctly into renderable HTML.Can
someone suggest a systemic way that is built into the .NET framework's Text
classes to fix this so it renders correctly on a web page?
Thanks,
-KF
public partial class UweekHome : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
litHTMLfromScrapedPage.Text = GetHtmlPage("http://uweek.org");
}
public String GetHtmlPage(string strURL)
{
// the html retrieved from the page
String strResult;
WebResponse objResponse;
WebRequest objRequest = System.Net.HttpWebRequest.Create(strURL);
objResponse = objRequest.GetResponse();
using (StreamReader sr =
new StreamReader(objResponse.GetResponseStream()))
{
strResult = sr.ReadToEnd();
int pos1 = strResult.IndexOf("<slstart>", 0);
int pos2 = strResult.IndexOf("<storylist>", pos1);
int pos3 = strResult.IndexOf("</storylist>", pos2);
strResult = strResult.Substring(pos2 + 11, pos3 - pos2 + 11);
sr.Close();
}
return strResult;
}
}
object. Code and page URL is below. Some characters are being mis-rendered
when the string representing the page portion is returned: these are various
entity characters that do not translate correctly into renderable HTML.Can
someone suggest a systemic way that is built into the .NET framework's Text
classes to fix this so it renders correctly on a web page?
Thanks,
-KF
public partial class UweekHome : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
litHTMLfromScrapedPage.Text = GetHtmlPage("http://uweek.org");
}
public String GetHtmlPage(string strURL)
{
// the html retrieved from the page
String strResult;
WebResponse objResponse;
WebRequest objRequest = System.Net.HttpWebRequest.Create(strURL);
objResponse = objRequest.GetResponse();
using (StreamReader sr =
new StreamReader(objResponse.GetResponseStream()))
{
strResult = sr.ReadToEnd();
int pos1 = strResult.IndexOf("<slstart>", 0);
int pos2 = strResult.IndexOf("<storylist>", pos1);
int pos3 = strResult.IndexOf("</storylist>", pos2);
strResult = strResult.Substring(pos2 + 11, pos3 - pos2 + 11);
sr.Close();
}
return strResult;
}
}