/*
* TitleURL : Extract the title of a webpage (URL)
* we know have not converged yet.
*
* Parke Godfrey
* 2009 October 27
*/
import java.io.IOException;
import java.io.PrintStream;
import java.util.Scanner;
import java.net.MalformedURLException;
import java.net.URL;
public class TitleURL
{
public static void main(String[] args)
{
Scanner input = new Scanner(System.in);
PrintStream output = System.out;
// Title is between
start-tag and end-tag
// in the HTML document (webpage).
final String TITLE_START = "";
final String TITLE_END = "";
// Ask the user for a URL.
output.println("Enter a URL:");
String path = input.nextLine();
// We make an object of type URL to handle our reading of the page.
URL page = null;
// Because creating the object might fail (the URL address we
// provide could fail), the constructor call could throw an
// exception. So we must put this in a TRY..CATCH block.
// This has not been covered yet, but do not be concerned.
try
{
page = new URL("http://" + path);
} catch (Exception e) {
output.println("Malformed URL.");
System.exit(-1); // Exit app if we could not create page.
}
// Create a scanner to read the webpage.
Scanner reader = null;
// The constructor call can again fail, so TRY..CATCH.
try
{
reader = new Scanner(page.openStream());
} catch (IOException e) {
output.println("Problem opening the URL.");
System.exit(-1);
}
// Find where the title is in the document, if there is one.
String content = "";
while (reader.hasNext())
{
content += reader.nextLine();
}
String lower = content.toLowerCase();
int start = lower.indexOf(TITLE_START);
int end = lower.indexOf(TITLE_END);
if (start >= 0)
{
output.println(
content.substring(
start + TITLE_START.length(),
end)
);
} else
{
output.println("no title");
}
}
}