001    package org.LiveGraph.dataFile.read;
002    
003    import java.io.BufferedReader;
004    import java.io.Closeable;
005    import java.io.IOException;
006    import java.io.InputStream;
007    import java.io.InputStreamReader;
008    import java.util.ArrayList;
009    import java.util.Collections;
010    import java.util.HashMap;
011    import java.util.List;
012    import java.util.Map;
013    
014    import org.LiveGraph.dataFile.common.DataFormatException;
015    
016    import com.softnetConsult.utils.exceptions.Bug;
017    
018    
019    import static org.LiveGraph.dataFile.common.DataFormatTools.*;
020    
021    
022    /**
023     * A reader for a data stream (usually, a CSV file). This reader
024     * will parse the data stream and extract the file information, the data
025     * series headings and the actual data.<br />
026     * <br />
027     * The information extracted from the data stream is passed to the application
028     * using an observer pattern: after a line was parsed, the appropriate 
029     * {@code notifyXXXX(...)}-method of this class is called with the extracted
030     * information. The {@code notifyXXXX(...)}-methods dispatch appropriate
031     * notifications to all {@link DataStreamObserver}-objects registered with this
032     * {@code DataStreamReader}-instance.<br />
033     * If required, an application may also overwrite the {@code notifyXXXX(...)}-methods
034     * to handle data read events.<br /> 
035     * <br />
036     * See {@link org.LiveGraph.dataFile.write.DataStreamWriter} for the details of the
037     * data file format.<br />
038     * <br />
039     * Note, that this class has a different role than it did in version 1.01 of the
040     * LiveGraph API. The {@code DataStreamReader} class from version 1.01 is replaced by
041     * {@link org.LiveGraph.dataCache.DataStreamToCacheReader}.
042     * 
043     * <p><strong>LiveGraph</strong> (http://www.live-graph.org).</p>
044     * <p>Copyright (c) 2007 by G. Paperin.</p>
045     * <p>File: DataStreamReader.java</p> 
046     * <p style="font-size:smaller;">Redistribution and use in source and binary forms, with or
047     *    without modification, are permitted provided that the following terms and conditions are met:
048     * </p>
049     * <p style="font-size:smaller;">1. Redistributions of source code must retain the above
050     *    acknowledgement of the LiveGraph project and its web-site, the above copyright notice,
051     *    this list of conditions and the following disclaimer.<br />
052     *    2. Redistributions in binary form must reproduce the above acknowledgement of the
053     *    LiveGraph project and its web-site, the above copyright notice, this list of conditions
054     *    and the following disclaimer in the documentation and/or other materials provided with
055     *    the distribution.<br />
056     *    3. All advertising materials mentioning features or use of this software or any derived
057     *    software must display the following acknowledgement:<br />
058     *    <em>This product includes software developed by the LiveGraph project and its
059     *    contributors.<br />(http://www.live-graph.org)</em><br />
060     *    4. All advertising materials distributed in form of HTML pages or any other technology
061     *    permitting active hyper-links that mention features or use of this software or any
062     *    derived software must display the acknowledgment specified in condition 3 of this
063     *    agreement, and in addition, include a visible and working hyper-link to the LiveGraph
064     *    homepage (http://www.live-graph.org).
065     * </p>
066     * <p style="font-size:smaller;">THIS SOFTWARE IS PROVIDED &quot;AS IS&quot;, WITHOUT WARRANTY
067     *    OF ANY KIND, EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
068     *    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND  NONINFRINGEMENT. IN NO EVENT SHALL
069     *    THE AUTHORS, CONTRIBUTORS OR COPYRIGHT  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
070     *    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING  FROM, OUT OF OR
071     *    IN CONNECTION WITH THE SOFTWARE OR THE USE OR  OTHER DEALINGS IN THE SOFTWARE.
072     * </p>
073     * 
074     * @author Greg Paperin (http://www.paperin.org)
075     * @version {@value org.LiveGraph.LiveGraph#version}
076     * @see DataStreamObserver
077     * @see DataStreamObserverAdapter
078     * @see org.LiveGraph.dataCache.DataStreamToCacheReader
079     */
080    public class DataStreamReader implements Closeable {
081    
082    /**
083     * Data stream reader.
084     */
085    private BufferedReader in = null;
086    
087    /**
088     * Data values separator.
089     */
090    private String separator = DefaultSeparator;
091    
092    /**
093     * Whether the data values separator was already finalised. 
094     */
095    private boolean separatorSet = false;
096    
097    /**
098     * Whether the data series headings are already set-up.
099     */
100    private boolean labelsSet = false;
101    
102    /**
103     * The data stream index of the next data record.
104     */
105    private int nextDatasetFileIndex = -1;
106    
107    /**
108     * Observers who want to know what's on the data stream.
109     */
110    private List<DataStreamObserver> observers = null; 
111    
112    
113    /**
114     * Creates a data reader on the specified stream.
115     * 
116     * @param is The stream from which to read. 
117     */
118    public DataStreamReader(InputStream is) {
119            
120            if (null == is)
121                    throw new NullPointerException("Cannot read from a null stream.");
122            
123            this.in = new BufferedReader(new InputStreamReader(is));        
124            this.separator = DefaultSeparator;
125            this.separatorSet = false;
126            this.labelsSet = false;
127            this.nextDatasetFileIndex = -1;
128            this.observers = new ArrayList<DataStreamObserver>(); 
129    }
130    
131    /**
132     * Creates a data reader on the specified stream and add one initial observer.
133     * 
134     * @param is The stream from which to read. 
135     * @param observer An observer for the data stream contents.
136     */
137    public DataStreamReader(InputStream is, DataStreamObserver observer) {
138            this(is);
139            addObserver(observer);
140    }
141    
142    
143    /**
144     * Tells whether this reader's underlying data stream is ready to be read.
145     * 
146     * @return {@code true} if the next {@code readFromStream()} is guaranteed not to block for input,
147     * {@code false} otherwise. Note that returning {@code false} does not guarantee that the next read
148     * will block.
149     * @throws IOException If an I/O error occurs.
150     */
151    public boolean ready() throws IOException {
152            return in.ready();
153    }
154    
155    /**
156     * Closes the underlying data stream. Further reading is not possible after calling this method.
157     * @throws IOException If an I/O error occurs.
158     */
159    public void close() throws IOException {
160            in.close();
161    }
162    
163    /**
164     * Reads as many data lines from the underlying stream as there are available and parses them.
165     *  
166     * @return The number on non-empty data lines read.
167     * @throws IOException If an I/O error occurs.
168     * @throws DataFormatException If the data stream contents do not conform with the expected data
169     * stream format.
170     * @see org.LiveGraph.dataFile.write.DataStreamWriter
171     * @see #readFromStream(int)
172     */
173    public int readFromStream() throws IOException, DataFormatException {   
174            return readFromStream(-1);      
175    }
176    
177    /**
178     * Reads up to a specified number of data lines from the underlying stream, and parses the lines.
179     * Reading is stopped when the specified number of lines in reached or if no more lines are available.
180     * 
181     * @param maxLines The maximum number of data lines to read (empty lines are ignored and not counted,
182     * but all other lines including comment lines are counted). If negative, all available lines will
183     * be read.
184     * @return The number on non-empty data lines read.
185     * @throws IOException If an I/O error occurs.
186     * @throws DataFormatException If the data stream contents do not conform with the expected data
187     * stream format.
188     * @see org.LiveGraph.dataFile.write.DataStreamWriter
189     */
190    public int readFromStream(int maxLines) throws IOException, DataFormatException {
191            
192            int linesRead = 0;
193            String line = null;
194            while (ready() && (0 > maxLines || linesRead < maxLines) ) {
195                    line = in.readLine();
196                    line = line.trim();
197                    if (line.length() > 0) {
198                            processLine(line);
199                            linesRead++;
200                    }
201            }
202            return linesRead;
203    }
204    
205    /**
206     * Notifies observers regestered with this parser of a "data values separator set"-event.
207     * 
208     * @param separator New data separator to be passed to the observers.
209     */
210    protected void notifySeparatorSet(String separator) {
211            for (DataStreamObserver observer : observers)
212                    observer.eventSeparatorSet(separator, this);
213    }
214    
215    /**
216     * Notifies observers regestered with this parser of a "comment line parsed"-event.
217     * 
218     * @param comment The parsed comment line to be passed to the observers.
219     */
220    protected void notifyCommentLine(String comment) {
221            for (DataStreamObserver observer : observers)
222                    observer.eventCommentLine(comment, this);
223    }
224    
225    /**
226     * Notifies observers regestered with this parser of a "file info line parsed"-event.
227     * 
228     * @param info The parsed file info to be passed to the observers.
229     */
230    protected void notifyFileInfoLine(String info) {
231            for (DataStreamObserver observer : observers)
232                    observer.eventFileInfoLine(info, this);
233    }
234    
235    /**
236     * Notifies observers regestered with this parser of a "data series labels parsed"-event.
237     * 
238     * @param labels The parsed data series labels to be passed to the observers.
239     */
240    protected void notifyLabelsSet(List<String> labels) {
241            for (DataStreamObserver observer : observers)
242                    observer.eventLabelsSet(labels, this);
243    }
244    
245    /**
246     * Notifies observers regestered with this parser of a "dataset parsed"-event.
247     * 
248     * @param dataTokens The parsed data tokens to be passed to the observers.
249     * @param datasetIndex The file index of the parsed dataset to be passed to the observers.
250     */
251    protected void notifyDataLineRead(List<String> dataTokens, int datasetIndex) {
252            for (DataStreamObserver observer : observers)
253                    observer.eventDataLineRead(dataTokens, datasetIndex, this);
254    }
255    
256    /**
257     * Adds an observer to this parser.
258     * 
259     * @param observer The observer to add.
260     * @return {@code if the specified observer cound not be added because it was already registered},
261     * {@code true otherwise}.
262     */
263    public boolean addObserver(DataStreamObserver observer) {
264            if (null == observer || hasObserver(observer))
265                    return false;
266            return observers.add(observer);
267    }
268    
269    /**
270     * Checks whether the specified observer is registered with this parser.
271     *  
272     * @param observer An observer.
273     * @return {@code true} if the specified {@code observer} is not {@code null} and is regestered
274     * with this parser, {@code false} otherwise.
275     */
276    public boolean hasObserver(DataStreamObserver observer) {
277            if (null == observer)
278                    return false;
279            return observers.contains(observer);    
280    }
281    
282    /**
283     * De-registeres the specified observer from this parser.
284     * 
285     * @param observer An observer.
286     * @return {@code true} if the specified observer is not {@code null} and was on the 
287     * list of registered observers and is now removed from this list, {@code false} otherwise. 
288     */
289    public boolean removeObserver(DataStreamObserver observer) {
290            if (null == observer)
291                    return false;
292            return observers.remove(observer);
293    }
294    
295    /**
296     * Counts this parser's observers.
297     * 
298     * @return The number of observers registered with this parser.
299     */
300    public int countObservers() {
301            return observers.size();
302    }
303    
304    /**
305     * This static utility method converts a list of {@code String} tokens (presumably just parsed
306     * from a data line) to a list of {@code Double} objects containing the tokens' values; tokens
307     * that cannot be parsed to a {@code Double} are represented by {@code null}-objects in the
308     * resulting list.
309     * 
310     * @param tokens A list of data tokens.
311     * @return A list of the double values of the specified tokens.
312     */
313    public static List<Double> convertTokensToDoubles(List<String> tokens) {
314            
315            if (null == tokens)
316                    return Collections.emptyList();
317            
318            List<Double> doubles = new ArrayList<Double>(tokens.size());
319            for (String tok : tokens) {
320                    
321                    if (null == tok)
322                            continue;
323                    
324                    tok = tok.trim();
325                    
326                    Double val = null;
327                    if (null != tok && 0 < tok.length()) {
328                            try { val = Double.valueOf(tok); }
329                            catch (NumberFormatException e) { val = null; }
330                    }
331                    
332                    doubles.add(val);               
333            }
334            return doubles;
335    }
336    
337    
338    /**
339     * This static utility method converts a list of strings (presumably representing a list of
340     * labels just parsed from the data file) to a list of strings where each string is unique
341     * in respect to its {@code equals} method (case sensitive); this happens by attaching 
342     * counters to repreated strings: for instance, {@code ["boo", "foo", "boo"]} it converted to
343     * {@code ["boo (1)", "foo", "boo (2)"]}.  
344     *  
345     * @param rawLabels The list of labels to convert.
346     * @param allowEmptyLabels If this is {@code false}, all empty strings ({@code ""}) are converted
347     * to underscores ({@code "_"}) before possibly applying the counters.
348     * @return A list of unique data series labels based on the specified list.
349     */
350    public static List<String> createUniqueLabels(List<String> rawLabels, boolean allowEmptyLabels) {
351            
352            List<String> uniqueLabels = new ArrayList<String>();
353            Map<String, Integer> labelCounts = new HashMap<String, Integer>();
354                    
355            // Mark labels which occure more than once:
356            for (String rawLabel : rawLabels) {
357                    
358                    rawLabel = rawLabel.trim();
359                    if (!allowEmptyLabels && rawLabel.length() == 0)
360                            rawLabel = "_";
361                    
362                    if (!labelCounts.containsKey(rawLabel)) {
363                            
364                            labelCounts.put(rawLabel, 1);
365                            
366                    } else {
367            
368                            int c = labelCounts.get(rawLabel);                      
369                            labelCounts.put(rawLabel, ++c);
370                            rawLabel = rawLabel + " (" + c + ")";
371                    }
372                                                    
373                    uniqueLabels.add(rawLabel);
374            }
375            
376            // Change first occurence of "label" into "label (1)" for the labels which appear more than once:
377            for (String label : labelCounts.keySet()) {
378                    int c = labelCounts.get(label); 
379                    if (1 < c) {
380                            int p = uniqueLabels.indexOf(label);
381                            uniqueLabels.set(p, label + " (1)");
382                    }
383            }
384            
385            // Done:        
386            return uniqueLabels;
387    }
388    
389    
390    /**
391     * Examines a data line and dispatches to a specialised parsing routine.
392     * 
393     * @param line A data line.
394     * @throws DataFormatException If the data stream contents do not conform with the expected data
395     * stream format.
396     */
397    private void processLine(String line) throws DataFormatException {
398    
399            if (!separatorSet && line.startsWith(TAGSepDefinition) && line.endsWith(TAGSepDefinition)) {
400                    processSeparatorDefinitionLine(line);
401                    return;
402            }
403            
404            if (line.startsWith(TAGComment)) {
405                    processCommentLine(line);
406                    return;
407            }
408            
409            if (line.startsWith(TAGFileInfo)) {
410                    processFileInfoLine(line);
411                    return;
412            }
413            
414            if (!labelsSet) {
415                    processSeriesLabelsLine(line);
416                    return;
417            }
418            
419            if (true) {
420                    processDataLine(line);
421                    return;
422            }
423            
424            throw new Bug("The program should never get to this line!");            
425    }
426    
427    /**
428     * Parses a data values separator definition line.
429     * 
430     * @param line Data line to parse.
431     * @throws DataFormatException If the data line contents are not in the expected format.
432     */
433    private void processSeparatorDefinitionLine(String line) throws DataFormatException {
434            
435            if (line.length() < TAGSepDefinition.length() * 2)
436                    throw new DataFormatException("Illegal separator definition: \"" + line + "\"");
437            
438            if (line.length() == TAGSepDefinition.length() * 2)
439                    throw new DataFormatException("Illegal separator definition: separator may not be an empty string");
440            
441            String sep = line.substring(TAGSepDefinition.length(), line.length() - TAGSepDefinition.length());
442            
443            String problem = isValidSeparator(sep);
444            if (null != problem)
445                    throw new DataFormatException("Illegal separator definition: " + problem);
446            
447            separator = sep;
448            separatorSet = true;
449            notifySeparatorSet(separator);
450    }
451    
452    /**
453     * Parses a comments line.
454     * 
455     * @param line Data line to parse.
456     * @throws DataFormatException If the data line contents are not in the expected format.
457     */
458    private void processCommentLine(String line) throws DataFormatException {
459            String comment = "";
460            if (line.length() > TAGComment.length())
461                    comment = line.substring(TAGComment.length()).trim();
462            
463            separatorSet = true;
464            notifyCommentLine(comment);
465    }
466    
467    /**
468     * Parses a file information line.
469     * 
470     * @param line Data line to parse.
471     * @throws DataFormatException If the data line contents are not in the expected format.
472     */
473    private void processFileInfoLine(String line) throws DataFormatException {
474            String info = "";
475            if (line.length() > TAGFileInfo.length())
476                    info = line.substring(TAGFileInfo.length()).trim();
477            
478            separatorSet = true;
479            notifyFileInfoLine(info);
480    }
481    
482    /**
483     * Parses a data series headings line.
484     * 
485     * @param line Data line to parse.
486     * @throws DataFormatException If the data line contents are not in the expected format.
487     */
488    private void processSeriesLabelsLine(String line) throws DataFormatException {
489            
490            DataLineTokenizer tok = new DataLineTokenizer(line, separator);         
491            nextDatasetFileIndex = 0;
492            labelsSet = true;
493            separatorSet = true;
494            notifyLabelsSet(Collections.unmodifiableList(tok.getTokens()));
495    }
496    
497    /**
498     * Parses a data line.
499     * 
500     * @param line Data line to parse.
501     * @throws DataFormatException If the data line contents are not in the expected format.
502     */
503    private void processDataLine(String line) throws DataFormatException {
504            
505            DataLineTokenizer tok = new DataLineTokenizer(line, separator);
506            separatorSet = true;
507            notifyDataLineRead(tok.getTokens(), nextDatasetFileIndex++);
508    }
509    
510    }