I\'m trying to read html code from a URL Connection. In one case the html file I\'m trying to read includes 5 line breaks before the actual doc type declaration. In this cas
This works fine:
package url;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
/**
* UrlReader
* @author Michael
* @since 3/20/11
*/
public class UrlReader
{
public static void main(String[] args)
{
UrlReader urlReader = new UrlReader();
for (String url : args)
{
try
{
String contents = urlReader.readContents(url);
System.out.printf("url: %s contents: %s\n", url, contents);
}
catch (Exception e)
{
e.printStackTrace();
}
}
}
public String readContents(String address) throws IOException
{
StringBuilder contents = new StringBuilder(2048);
BufferedReader br = null;
try
{
URL url = new URL(address);
br = new BufferedReader(new InputStreamReader(url.openStream()));
String line = "";
while (line != null)
{
line = br.readLine();
contents.append(line);
}
}
finally
{
close(br);
}
return contents.toString();
}
private static void close(Reader br)
{
try
{
if (br != null)
{
br.close();
}
}
catch (Exception e)
{
e.printStackTrace();
}
}
}