G
Generic Usenet Account
Sometime back I wrote a simple Java program that reads a list of URLs
from a file and stores their contents on the local file system. I
have no problems with normal (i.e. html) pages, but I am not able to
download asp files. They are stored as zero length files.
I would greatly appreciate if someone could suggest a way out.
-- Bhat
My source code follows:
/////////////////// Source code begin /////////////////////
// This program reads a list of URLs to access and store on the local
// file system from a file. The name of the file is passed as the
// first command line argument. Each URL is on a separate line.
// Lines beginning with the '#' character are treated as blanks and
// are skipped.
//
import java.io.*;
import java.net.*;
import java.security.*;
class WebsiteLoader
{
public static char replaceChar = '~';
public static void main(String argv[]) throws IOException
{
// The following two lines were suggested by the following
website:
// http://www.javaworld.com/javaworld/javatips/jw-javatip96.html
// They help in suppressing the java.net.MalformedURLException
System.setProperty("java.protocol.handler.pkgs",
"com.sun.net.ssl.internal.www.protocol");
Security.addProvider(new com.sun.net.ssl.internal.ssl.Provider());
BufferedReader br;
String origName;
if(argv.length != 0)
{
br = new BufferedReader(new FileReader(argv[0]));
// Read URLs from the file. Skip blank lines and lines
beginning
// with the '#' character.
for(;
{
origName = br.readLine();
if(origName == null)
break;
origName = origName.trim();
if(origName.length() == 0)
continue;
if(origName.charAt(0) == '#')
continue;
URL url = new URL(origName);
if(url == null)
continue;
BufferedReader bufRdr = new BufferedReader(new
InputStreamReader(url.openStream()));
// The name of the file to which the website contents are
written
// is derived from the URL by substituting the following
characters
// with some "non-offending" character:
// \,/,:,*,?,",<,>,|
String modName = origName;
modName = modName.replace('\\', replaceChar);
modName = modName.replace('/', replaceChar);
modName = modName.replace(':', replaceChar);
modName = modName.replace('*', replaceChar);
modName = modName.replace('?', replaceChar);
modName = modName.replace('"', replaceChar);
modName = modName.replace('<', replaceChar);
modName = modName.replace('>', replaceChar);
modName = modName.replace('|', replaceChar);
FileWriter fWriter = new FileWriter(modName);
System.out.println("Writing contents of " + origName + " to "
+
"the following file: " + modName);
for(;
{
String thisLine = bufRdr.readLine();
if(thisLine == null)
break;
fWriter.write(thisLine);
}
}
}
}
}
from a file and stores their contents on the local file system. I
have no problems with normal (i.e. html) pages, but I am not able to
download asp files. They are stored as zero length files.
I would greatly appreciate if someone could suggest a way out.
-- Bhat
My source code follows:
/////////////////// Source code begin /////////////////////
// This program reads a list of URLs to access and store on the local
// file system from a file. The name of the file is passed as the
// first command line argument. Each URL is on a separate line.
// Lines beginning with the '#' character are treated as blanks and
// are skipped.
//
import java.io.*;
import java.net.*;
import java.security.*;
class WebsiteLoader
{
public static char replaceChar = '~';
public static void main(String argv[]) throws IOException
{
// The following two lines were suggested by the following
website:
// http://www.javaworld.com/javaworld/javatips/jw-javatip96.html
// They help in suppressing the java.net.MalformedURLException
System.setProperty("java.protocol.handler.pkgs",
"com.sun.net.ssl.internal.www.protocol");
Security.addProvider(new com.sun.net.ssl.internal.ssl.Provider());
BufferedReader br;
String origName;
if(argv.length != 0)
{
br = new BufferedReader(new FileReader(argv[0]));
// Read URLs from the file. Skip blank lines and lines
beginning
// with the '#' character.
for(;
{
origName = br.readLine();
if(origName == null)
break;
origName = origName.trim();
if(origName.length() == 0)
continue;
if(origName.charAt(0) == '#')
continue;
URL url = new URL(origName);
if(url == null)
continue;
BufferedReader bufRdr = new BufferedReader(new
InputStreamReader(url.openStream()));
// The name of the file to which the website contents are
written
// is derived from the URL by substituting the following
characters
// with some "non-offending" character:
// \,/,:,*,?,",<,>,|
String modName = origName;
modName = modName.replace('\\', replaceChar);
modName = modName.replace('/', replaceChar);
modName = modName.replace(':', replaceChar);
modName = modName.replace('*', replaceChar);
modName = modName.replace('?', replaceChar);
modName = modName.replace('"', replaceChar);
modName = modName.replace('<', replaceChar);
modName = modName.replace('>', replaceChar);
modName = modName.replace('|', replaceChar);
FileWriter fWriter = new FileWriter(modName);
System.out.println("Writing contents of " + origName + " to "
+
"the following file: " + modName);
for(;
{
String thisLine = bufRdr.readLine();
if(thisLine == null)
break;
fWriter.write(thisLine);
}
}
}
}
}