/* * TitleURL : Extract the title of a webpage (URL) * we know have not converged yet. * * Parke Godfrey * 2009 October 27 */ import java.io.IOException; import java.io.PrintStream; import java.util.Scanner; import java.net.MalformedURLException; import java.net.URL; public class TitleURL { public static void main(String[] args) { Scanner input = new Scanner(System.in); PrintStream output = System.out; // Title is between start-tag and end-tag // in the HTML document (webpage). final String TITLE_START = ""; final String TITLE_END = ""; // Ask the user for a URL. output.println("Enter a URL:"); String path = input.nextLine(); // We make an object of type URL to handle our reading of the page. URL page = null; // Because creating the object might fail (the URL address we // provide could fail), the constructor call could throw an // exception. So we must put this in a TRY..CATCH block. // This has not been covered yet, but do not be concerned. try { page = new URL("http://" + path); } catch (Exception e) { output.println("Malformed URL."); System.exit(-1); // Exit app if we could not create page. } // Create a scanner to read the webpage. Scanner reader = null; // The constructor call can again fail, so TRY..CATCH. try { reader = new Scanner(page.openStream()); } catch (IOException e) { output.println("Problem opening the URL."); System.exit(-1); } // Find where the title is in the document, if there is one. String content = ""; while (reader.hasNext()) { content += reader.nextLine(); } String lower = content.toLowerCase(); int start = lower.indexOf(TITLE_START); int end = lower.indexOf(TITLE_END); if (start >= 0) { output.println( content.substring( start + TITLE_START.length(), end) ); } else { output.println("no title"); } } }