Columns Class

Show Documentation
package javaxt.express.utils;
import java.util.HashMap;
import java.util.ArrayList;

//******************************************************************************
//**  CSV
//******************************************************************************
/**
 *   Provides static methods used to parse tabular data stored in plain text
 *   where records (aka rows) are separated with a line break and columns are
 *   delimited with a character (comma, tab, pipe, etc). CSV files is an
 *   example of such tabular data which uses commas to separate values in a
 *   row.
 * 
 *   <p>
 *   Here's an example of how to parse a CSV file using the static methods
 *   found in this class:
 *   </p>
 <pre>
    javaxt.io.File csvFile = new javaxt.io.File("/temp/employees.csv");
    try (java.io.BufferedReader br = csvFile.getBufferedReader("UTF-8")){


      //Read header
        String header = CSV.readLine(br);

      //Remove the Byte Order Mark (BOM) if there is one
        int bom = CSV.getByteOrderMark(header);
        if (bom>-1) header = header.substring(bom);


      //Parse header
        ArrayList<String> headers = new ArrayList<>();
        for (javaxt.utils.Value col : CSV.getColumns(header, ",")){
            headers.add(col.toString());
        }


      //Read rows
        String row;
        while (!(row=CSV.readLine(br)).isEmpty()){


          //Parse row
            CSV.Columns columns = CSV.getColumns(row, ",");
            for (int i=0; i<columns.length(); i++){
                String colName = headers.get(i);
                String colValue = columns.get(i).toString();
                System.out.println(colName + ": " + colValue);
            }

            System.out.println("---------------------------");
        }

    }
 </pre>
 *
 *
 ******************************************************************************/

public class CSV {

    public static final String TAB_DELIMITER = "\t";
    public static final String COMMA_DELIMITER = ",";


  //**************************************************************************
  //** Columns
  //**************************************************************************
  /** Class used to encapsulate columns in a row
   */
    public static class Columns implements Iterable<javaxt.utils.Value> {

        private ArrayList<javaxt.utils.Value> cols;
        private HashMap<String, Integer> header;

        public Columns(){
            cols = new ArrayList<>();
        }

        public void add(javaxt.utils.Value col){
            cols.add(col);
        }

        public javaxt.utils.Value get(int idx){
            try{
                return cols.get(idx);
            }
            catch(Exception e){
                return new javaxt.utils.Value(null);
            }
        }

        public javaxt.utils.Value get(String key){
            Integer idx = header.get(key.toLowerCase());
            if (idx==null) return new javaxt.utils.Value(null);
            return get(idx);
        }

        public void setHeader(Columns header){
            if (header==null) return;
            this.header = new HashMap<>();
            int x = 0;
            for (javaxt.utils.Value val : header){
                String str = val.toString();
                if (str!=null) str = str.toLowerCase();
                this.header.put(str, x);
                x++;
            }
        }

        public int length(){
            return cols.size();
        }

        @Override
        public java.util.Iterator<javaxt.utils.Value> iterator() {
            return cols.iterator();
        }
    }


  //**************************************************************************
  //** getColumns
  //**************************************************************************
  /** Returns column values for a given row
   */
    public static Columns getColumns(String row, String delimiter){
        Columns cols = new Columns();

        boolean insideDoubleQuotes = false;
        boolean isCSV = delimiter.equals(",");
        StringBuilder str = new StringBuilder();
        String c;

        for (int i=0; i<row.length(); i++){

            c = row.substring(i,i+1);

            if (c.equals("\"") && isCSV){
                if (!insideDoubleQuotes) insideDoubleQuotes = true;
                else insideDoubleQuotes = false;
            }

            if (c.equals(delimiter) && !insideDoubleQuotes){
                cols.add(getValue(str));
                str = new StringBuilder();
            }
            else{
                str.append(c);
            }
        }

      //Add last column
        cols.add(getValue(str));


        return cols;
    }


  //**************************************************************************
  //** readLine
  //**************************************************************************
  /** Returns a substring for the given data, ending at the first line break
   *  that is not inside a quote
   */
    public static String readLine(String data){

        StringBuilder str = new StringBuilder();
        boolean insideDoubleQuotes = false;
        for (int i=0; i<data.length(); i++){
            char c = data.charAt(i);
            if (c=='"'){
                if (insideDoubleQuotes) insideDoubleQuotes = false;
                else insideDoubleQuotes = true;
            }

            if (c=='\r' || c=='\n'){
                if (!insideDoubleQuotes) break;
            }
            str.append(c);
        }
        return str.toString();
    }


  //**************************************************************************
  //** readLine
  //**************************************************************************
  /** Returns a row of data from an InputStream. This method will read
   *  characters one at a time until it reaches a line break that is not
   *  inside a double quote. Depending on the source of the InputStream, this
   *  method may be significantly slower than the other readLine() method that
   *  uses a BufferedReader. Example usage:
    <pre>

      //Create an input stream
        java.io.InputStream is = ...

      //Read header
        String header = CSV.readLine(is);
        int bom = CSV.getByteOrderMark(header);
        if (bom>-1) header = header.substring(bom);
        console.log(header);

      //Read rows
        String row;
        while (!(row=CSV.readLine(is)).isEmpty()){
            console.log(row);
        }

    </pre>
   */
    public static String readLine(java.io.InputStream is) throws java.io.IOException {

        StringBuilder str = new StringBuilder();
        boolean insideDoubleQuotes = false;
        int i;
        while((i=is.read())!=-1) {
            char c = (char) i;

            if ((c=='\r' || c=='\n') && str.length()==0) continue;

            if (c=='"'){
                if (insideDoubleQuotes) insideDoubleQuotes = false;
                else insideDoubleQuotes = true;
            }

            if (c=='\r' || c=='\n'){
                if (!insideDoubleQuotes) break;
            }
            str.append(c);
        }
        return str.toString();
    }


  //**************************************************************************
  //** readLine
  //**************************************************************************
  /** Returns a row of data from a BufferedReader. Unlike the BufferedReader
   *  readLine() method, this method will not stop at line breaks inside a
   *  double quote. Note that a BufferedReader is significantly faster than
   *  an InputStream when reading files. Example usage:
    <pre>

      //Open input stream from an javaxt.io.File
        try (java.io.BufferedReader is = file.getBufferedReader("UTF-8")){

          //Read header
            String header = CSV.readLine(is);
            int bom = CSV.getByteOrderMark(header);
            if (bom>-1) header = header.substring(bom);
            console.log(header);

          //Read rows
            String row;
            while (!(row=CSV.readLine(is)).isEmpty()){
                console.log(row);
            }

        }
    </pre>
   */
    public static String readLine(java.io.BufferedReader reader) throws java.io.IOException {

        StringBuilder str = new StringBuilder();
        boolean insideDoubleQuotes = false;
        int i;

        while((i=reader.read())!=-1) {
            char c = (char) i;

            if ((c=='\r' || c=='\n') && str.length()==0) continue;

            if (c=='"'){
                if (insideDoubleQuotes) insideDoubleQuotes = false;
                else insideDoubleQuotes = true;
            }

            if (c=='\r' || c=='\n'){
                if (!insideDoubleQuotes) break;
            }
            str.append(c);
        }
        return str.toString();
    }


  //**************************************************************************
  //** parseHeader
  //**************************************************************************
  /** Parses a header (e.g. first row in a file) into columns. Removes the
   *  Byte Order Mark (BOM) as needed.
   */
    public static Columns parseHeader(String header, String delimiter){
        int bom = CSV.getByteOrderMark(header);
        if (bom>-1) header = header.substring(bom);
        return CSV.getColumns(header, delimiter);
    }


  //**************************************************************************
  //** parseHeader
  //**************************************************************************
  /** Parses a header (e.g. first row in a file) into columns. Removes the
   *  Byte Order Mark (BOM) as needed.
   */
    public static Columns parseHeader(java.io.InputStream is, String delimiter) throws java.io.IOException {
        return parseHeader(CSV.readLine(is), delimiter);
    }


  //**************************************************************************
  //** parseHeader
  //**************************************************************************
  /** Parses a header (e.g. first row in a file) into columns. Removes the
   *  Byte Order Mark (BOM) as needed.
   */
    public static Columns parseHeader(java.io.BufferedReader reader, String delimiter) throws java.io.IOException {
        return parseHeader(CSV.readLine(reader), delimiter);
    }


  //**************************************************************************
  //** getByteOrderMark
  //**************************************************************************
  /** Returns end position of the Byte Order Mark (BOM). Example usage:
    <pre>
        int bom = CSV.getByteOrderMark(header);
        if (bom>-1) header = header.substring(bom);
    </pre>
   */
    public static int getByteOrderMark(String str){

        if (str.startsWith("\uFEFF")) return 1;


        if (str.length()<2) return -1;

        int a=-1, b=-1, c=-1, d=-1;
        if (str.length()>1){
            a = (int) str.charAt(0);
            b = (int) str.charAt(1);
            if (a==254 && b==255) return 2; //UTF-16 (BE)
            if (b==255 && b==254) return 2; //UTF-16 (LE)
        }

        if (str.length()>2){
            c = (int) str.charAt(2);
            if (a==239 && b==187 && c==191) return 3; //UTF-8
            if (a==43 && b==47 && c==118) return 3; //UTF-7
            if (a==247 && b==100 && c==76) return 3; //UTF-1
        }

        if (str.length()>3){
            d = (int) str.charAt(3);
            if (a==0 && b==0 && c==254 && d==255) return 4; //UTF-32 (BE)
            if (a==255 && b==254 && c==0 && d==0) return 4; //UTF-32 (LE)
        }

        return -1;
    }


  //**************************************************************************
  //** startsWithByteOrderMark
  //**************************************************************************
  /** Returns true if the given string starts with a Byte Order Mark (BOM)
   */
    public static boolean startsWithByteOrderMark(String str){
        return getByteOrderMark(str)>-1;
    }


  //**************************************************************************
  //** getValue
  //**************************************************************************
  /** Returns a value for a given column
   */
    private static javaxt.utils.Value getValue(StringBuilder str){
        String col = str.toString().trim();
        if (col.length()==0) col = null;
        if (col!=null){
            if (col.startsWith("\"") && col.endsWith("\"")){
                if (col.length()>1){
                    col = col.substring(1, col.length()-1).trim();
                    if (col.length()==0) col = null;
                }
                else{
                    col = null;
                }
            }
        }
        return new javaxt.utils.Value(col);
    }
}