/****************************************************************************** CountedInput extends the BufferedInputStream by providing character and line counting plus a few additional methods to scan some input structures. ---------------------------------------------------- Copyright (c) Gunnar Gotshalks. All Rights Reserved. Permission to use, copy, modify, and distribute this software and its documentation for NON-COMMERCIAL purposes and without fee is hereby granted. ******************************************************************************/ package FlexOr.io; import java.io.*; /** CountedInput is meant to be used to process ASCII files. It provides a base for from which specific scanning files can be constructed. To help programmers provide some input statistics in application programs we provide charCount to count the total number of characters input from a file, charNumber to give the position of the character within the current line, and lineNumber of the current line. Scan and skip operations for whitespace and the rest of the current line are provided.
The Java file input API read method always returns a character, including an unbounded number of EOF characters when the end of file is reached. Consequently, all input files are implicitly unbounded in length giving the following logical structure for a file.
file = input[0..N-1 , N, ...]where a file contains the N data characters input[0..N-1] and an unbounded number EOF characters in input[N, N+1, ...].
The EOL character (end-ofline) always counts as one data character. See the method read()
Class invariant:
charCount = max(markCharCount + #userReadCalls, #input) lineNumber = markLineNumber + #linesReadSinceMark 0 <= position
@author Gunnar Gotshalks @version 1.0 1999 Jan 10 */ public class CountedInput extends BufferedInputStream { /****************************************************************************** Creating an instance of CountedInput opens the file named by the String. There is no open required by the user.
Requires: True Ensures: charChar = 0 and lineNumber = 1 and position = 0 and charNumber = 0;@exception FileNotFoundException when the file named by fileName cannot be opened. */ public CountedInput (String fileName) throws FileNotFoundException { super(new FileInputStream(fileName)); } /****************************************************************************** To help programmers provide input statistics in application programs we provide charCount to count the total number of characters input from a file, charNumber to provide the position of the current charcter in the current line, and lineNumber to count the input lines. ---------------------------*/ /** Net number of data characters -- exclusive of backtracking -- read from the file. End-of-line counts as one character even if \r\n is its representation. End-of-file is not a data character. */ protected int theCharCount = 0; /** Position of the current character in the current line. */ protected int theCharNumber = 0; /** Net number of end-of-line characters -- exclusive of backtracking -- read from the file. End-of-line counts as one even if \r\n is its representation. End-of-file is not an end of line character. */ protected int theLineNumber = 1; /** Return the net number of data characters read from the file.
Requires: True@return charCount */ public int charCount() { return theCharCount; } /** Return the position of the current character in the current line.
Requires: True@return charNumber */ public int charNumber() { return theCharNumber; } /** Return the current line number -- the net number of lines read from the file.
Requires: True@return lineNumber */ public int lineNumber() { return theLineNumber; } /****************************************************************************** Need to keep the previous read character to handle the case of '\r\n' sequence which is counted as one character. To speed up the read operation we have the current character global for the object. ---------------------------*/ /** Value is the actual data character read at the time of the previous call to read. If \r was the actual data character read on the previous call it was returned to the user as Character.EOL (= \n). */ protected int prevChar = Char.EOF; /** Value is the result returned by each call to read. If Character.EOL (=\n) was returned on the previous read, the actual data character read may have been Character.CR (= \n). */ protected int currentChar = Char.EOF; /** Return the current character.
Requires: True@return currentChar */ public char currentChar() { return (char) currentChar; } /****************************************************************************** read() No need to test for EOF before reading as Java returns EOF an unbounded number of times. Need to have different cases depending upon how end-of-line is represented but end-of-line always counts as 1 character. In a Macintosh file it is '\r' (13), in a Unix file is is '\n' (10), in a Windows file it is '\r\n'. This is the case that requires either a special look ahead one character or keeping track of the previous character. It requires that after recognizing '\r' we must skip a following '\n'. EOF does not count as a data character. */ /** Read the next character in the file.
Requires: True Ensures: position = 1 + old position ( input[old position] = EOF and charCount = old charCount and lineNumber = old lineNumber or input[old position] = EOL and charCount = 1 + old charCount and charNumber = 0 and lineNumber = 1 + oldLineNumber or input[old position] notin {EOL, EOF} and charCount = 1 + old charCount and charNumber = 1 + old charNumber and lineNumber = old lineNumber )@return input[position-1] */ public int read() { try { currentChar = super.read(); switch (currentChar) { case 13 : prevChar = currentChar; currentChar = Char.EOL; theCharCount++; theCharNumber = 0; theLineNumber++; break; case 10 : if (prevChar == 13) { prevChar = currentChar; currentChar = read(); } else { prevChar = currentChar; theCharCount++; theCharNumber = 0; theLineNumber++; } break; case -1 : currentChar = Char.EOF; break; default : prevChar = currentChar; theCharCount++; theCharNumber++; } } catch (IOException e) { System.err.println("Trouble reading" + e); } return currentChar; } /***************************************************************************** Reads the rest of the current line. The next read will read the first character on the next line, if any, or read EOF. If the last read character was EOL or EOF then no characters are read -- rest of line is empty.
Requires: 100 >= number of characters in rest of line Ensures: input[old position .. position-2] intersect {EOF, EOL} = null input[position-1] in {EOF, EOL}@return input[old position .. position-2] */ public String readLine() { StringBuffer buffer = new StringBuffer(100); while ((currentChar != Char.EOF) && (currentChar != Char.EOL)) { buffer.append((char) currentChar); read(); } return buffer.toString(); } /***************************************************************************** Skip characters until a non whitespace character is read. EOF is not a whitespace character. Using whitespace definition in Character.isWhitespace
Requires: True Ensures: input[old position .. position-1] intersect nonWhitespace = null@return input[position] */ public int skipWhitespace() { while (Character.isWhitespace((char) currentChar)) read(); return currentChar; } /***************************************************************************** Skip to and return the first character on the next line.
Requires: True Ensures: input[old position .. position-1] intersect {EOL, EOF} = null@return input[position] */ public int skipToNextLine() { while((currentChar != Char.EOL) && (currentChar != Char.EOF)) read(); return read(); } /***************************************************************************** Span whitespace.
Requires: True Ensures: input[old position .. position-1] intersect nonWhiteSpace = null@return input[old position .. position-1] */ public String spanWhitespace() { StringBuffer sb = new StringBuffer(); while (Character.isWhitespace((char) currentChar)) { sb.append((char) currentChar); read(); } return sb.toString(); } /***************************************************************************** Span non whitespace
Requires: True Ensures: input[old position .. position-1] intersect whiteSpace = null@return input[old position .. position-1] */ public String spanNonWhitespace() { StringBuffer sb = new StringBuffer(); while (!Character.isWhitespace((char) currentChar)) { sb.append((char) currentChar); read(); } return sb.toString(); } /***************************************************************************** Objects and methods for backtracking. --------------------------*/ /** The maximum lookahead for the last mark call. */ protected int marklimit; /** The net number of characters read at the time of the last mark call. */ protected int markCharCount; /** The character position at the time of the last mark call. */ protected int markCharNumber; /** The net number of end-of-line characters read at the time of the last mark call. */ protected int markLineNumber; /** Mark position for backtracking with reset. Marklimit is the maximum read ahead before reset cannot be used.
Requires: True Ensures: position = old position and charNumber = old charNumber charCount = old charCount and lineNumber = old lineNumber markPosition = position and markCharNumber = charNumber markCharCount = charCount and markLineNumber = lineNumber*/ public void mark(int marklimit) { super.mark(marklimit); markCharCount = theCharCount; markCharNumber = theCharNumber; markLineNumber = theLineNumber; } /***************************************************************************** Reset position to last mark point.
Requires: mark(marklimit) toave been called Ensures: markPosition = old markPosition markCharCount = old markCharCount markCharCount = old markCharCount markLineNumber = old markLineNumber markPosition = position markCharCount = charCount and markLineNumber = theLineNumber markCharNumber = charNumber@exception IOException When position-markPosition > marklimit or mark(marklimit) has not been called. */ public void reset() throws IOException { super.reset(); theCharCount = markCharCount; theLineNumber = markLineNumber; theCharNumber = markCharNumber; } }