Source

src/csvToJson.js

/* globals FileOperationError */
"use strict";

const fileUtils = require('./util/fileUtils');
const stringUtils = require('./util/stringUtils');
const jsonUtils = require('./util/jsonUtils');
const {
    ConfigurationError,
    CsvFormatError,
    JsonValidationError
} = require('./core/errors');
const Configurable = require('./core/configurable');
const ParserConfig = require('./core/parserConfig');

const DEFAULT_FIELD_DELIMITER = ",";
const QUOTE_CHAR = '"';
const CRLF = '\r\n';
const LF = '\n';
const CR = '\r';

/**
 * Main CSV to JSON converter class
 * Provides chainable API for configuring and converting CSV data
 * @category 2-Sync 
 */
class CsvToJson extends Configurable {

  /**
   * Parse CSV content using a frozen configuration snapshot.
   * @param {string} parsedCsv - Raw CSV content as string
   * @param {ParserConfig} config - Frozen parser configuration
   * @returns {Array<object>} Parsed JSON array
   */
  csvToJsonWithConfig(parsedCsv, config) {
    this.validateInputConfig(config);

    const records = this.parseRecords(parsedCsv);
    const fieldDelimiter = this.getFieldDelimiter(config);
    let index = this.getIndexHeader(config);
    let headers;

    while (index < records.length) {
      headers = this.getFields(records[index], config, fieldDelimiter);
      if (stringUtils.hasContent(headers)) {
        break;
      }
      index++;
    }

    if (!headers) {
      throw CsvFormatError.missingHeader();
    }

    const jsonResult = [];
    for (let i = index + 1; i < records.length; i++) {
      const currentLine = this.getFields(records[i], config, fieldDelimiter);

      if (stringUtils.hasContent(currentLine)) {
        let row = this.buildJsonResult(headers, currentLine, config);
        if (config.rowMapper) {
          row = config.rowMapper(row, i - (index + 1));
          if (row != null) {
            jsonResult.push(row);
          }
        } else {
          jsonResult.push(row);
        }
      }
    }

    return jsonResult;
  }

  /**
   * Read a CSV file and write the parsed JSON to an output file
   * @param {string} fileInputName - Path to input CSV file
   * @param {string} fileOutputName - Path to output JSON file
   * @throws {FileOperationError} If file read or write fails
   * @throws {CsvFormatError} If CSV is malformed
   */
  generateJsonFileFromCsv(fileInputName, fileOutputName) {
    let jsonStringified = this.getJsonFromCsvStringified(fileInputName);
    fileUtils.writeFile(jsonStringified, fileOutputName);
  }

  /**
   * Read a CSV file and return parsed data as stringified JSON
   * @param {string} fileInputName - Path to input CSV file
   * @returns {string} JSON stringified array of objects
   * @throws {FileOperationError} If file read fails
   * @throws {CsvFormatError} If CSV is malformed
   * @throws {JsonValidationError} If JSON generation fails
   * @example
   * const csvToJson = require('convert-csv-to-json');
   * const jsonString = csvToJson.getJsonFromCsvStringified('resource/input.csv');
   * console.log(jsonString);
   */
  getJsonFromCsvStringified(fileInputName) {
    let json = this.getJsonFromCsv(fileInputName);
    let jsonStringified = JSON.stringify(json, undefined, 1);
    jsonUtils.validateJson(jsonStringified);
    return jsonStringified;
  }

  /**
   * Read a CSV file and return parsed data as JSON array of objects
   * @param {string} fileInputName - Path to input CSV file
   * @returns {Array<object>} Array of objects representing CSV rows
   * @throws {FileOperationError} If file read fails
   * @throws {CsvFormatError} If CSV is malformed
   * @example
   * const csvToJson = require('convert-csv-to-json');
   * const rows = csvToJson.getJsonFromCsv('resource/input.csv');
   * console.log(rows);
   */
  getJsonFromCsv(fileInputName) {
    const config = this.getParserConfig();
    const parsedCsv = fileUtils.readFile(fileInputName, config.encoding || 'utf8');
    return this.csvToJson(parsedCsv);
  }

  /**
   * Parse CSV string content and return as JSON array of objects
   * @param {string} csvString - CSV content as string
   * @returns {Array<object>} Array of objects representing CSV rows
   * @throws {CsvFormatError} If CSV is malformed
   * @example
   * const csvToJson = require('convert-csv-to-json');
   * const rows = csvToJson.csvStringToJson('name,age\nAlice,30');
   * console.log(rows); // [{ name: 'Alice', age: '30' }]
   */
  csvStringToJson(csvString) {
    return this.csvToJson(csvString);
  }

  /**
   * Parse CSV string content and return as stringified JSON
   * @param {string} csvString - CSV content as string
   * @returns {string} JSON stringified array of objects
   * @throws {CsvFormatError} If CSV is malformed
   * @throws {JsonValidationError} If JSON generation fails
   * @example
   * const csvToJson = require('convert-csv-to-json');
   * const jsonString = csvToJson.csvStringToJsonStringified('name,age\nAlice,30');
   * console.log(jsonString);
   */
  csvStringToJsonStringified(csvString) {
    let json = this.csvStringToJson(csvString);
    let jsonStringified = JSON.stringify(json, undefined, 1);
    jsonUtils.validateJson(jsonStringified);
    return jsonStringified;
  }

  /**
   * Core CSV parsing logic - converts CSV string to JSON array
   * Handles quoted fields per RFC 4180 when configured
   * Applies row mapping and filtering when configured
   * @param {string} parsedCsv - Raw CSV content as string
   * @returns {Array<object>} Array of objects with CSV data
   * @private
   */
  csvToJson(parsedCsv) {
    return this.csvToJsonWithConfig(parsedCsv, this.getParserConfig());
  }

  /**
   * Parse CSV content into individual records, respecting quoted fields that may span multiple lines.
   * RFC 4180 compliant parsing - handles quoted fields that may contain newlines.
   * @param {string} csvContent - The raw CSV content
   * @returns {string[]} Array of record strings
   */
  parseRecords(csvContent) {
    let records = [];
    let currentRecord = '';
    let insideQuotes = false;
    let i = 0;
    
    while (i < csvContent.length) {
      let char = csvContent[i];
      
      // Handle quote characters
      if (char === QUOTE_CHAR) {
        if (insideQuotes && i + 1 < csvContent.length && csvContent[i + 1] === QUOTE_CHAR) {
          // Escaped quote: two consecutive quotes = single quote representation
          currentRecord += QUOTE_CHAR + QUOTE_CHAR;
          i += 2;
        } else {
          // Toggle quote state
          insideQuotes = !insideQuotes;
          currentRecord += char;
          i++;
        }
        continue;
      }
      
      // Handle line endings (only outside quoted fields)
      if (!insideQuotes) {
        let lineEndingLength = this.getLineEndingLength(csvContent, i);
        if (lineEndingLength > 0) {
          records.push(currentRecord);
          currentRecord = '';
          i += lineEndingLength;
          continue;
        }
      }
      
      // Regular character
      currentRecord += char;
      i++;
    }
    
    // Add the last record if not empty
    if (currentRecord.length > 0) {
      records.push(currentRecord);
    }
    
    // Validate matching quotes
    if (insideQuotes) {
      throw CsvFormatError.mismatchedQuotes('CSV');
    }
    
    return records;
  }

  /**
   * Get the length of line ending at current position (CRLF=2, LF=1, CR=1, or 0)
   * @param {string} content - CSV content
   * @param {number} index - Current index to check
   * @returns {number} Length of line ending (0 if none)
   */
  getLineEndingLength(content, index) {
    if (content.slice(index, index + 2) === CRLF) {
      return 2;
    }
    if (content[index] === LF) {
      return 1;
    }
    if (content[index] === CR && content[index + 1] !== LF) {
      return 1;
    }
    return 0;
  }

  /**
   * Get the configured field delimiter, or default if not set
   * @param {ParserConfig} [config] - Parser configuration
   * @returns {string} Field delimiter character
   * @private
   */
  getFieldDelimiter(config = this.config) {
    if (config.delimiter) {
      return config.delimiter;
    }
    return DEFAULT_FIELD_DELIMITER;
  }

  /**
   * Get the configured header row index, or default (0) if not set
   * @param {ParserConfig} [config] - Parser configuration
   * @returns {number} Header row index
   * @private
   */
  getIndexHeader(config = this.config) {
    if (config.indexHeaderValue !== null && !isNaN(config.indexHeaderValue)) {
      return config.indexHeaderValue;
    }
    return 0;
  }

  /**
   * Get fields for a CSV record line according to configured parser rules.
   * Uses quoted-field splitting when RFC 4180 support is enabled.
   * @param {string} line - CSV record line
   * @param {ParserConfig} [config] - Parser configuration
   * @param {string} [fieldDelimiter] - Field delimiter
   * @returns {string[]} Parsed fields
   * @private
   */
  getFields(line, config = this.config, fieldDelimiter = this.getFieldDelimiter(config)) {
    if (config.isSupportQuotedField) {
      return this.split(line, config);
    }
    return line.split(fieldDelimiter);
  }

  /**
   * Build a JSON object from headers and field values
   * Applies type formatting and sub-array parsing as configured
   * @param {string[]} headers - Array of header field names
   * @param {string[]} currentLine - Array of field values
   * @param {ParserConfig} [config] - Frozen parser configuration
   * @returns {object} JSON object with header names as keys
   * @private
   */
  buildJsonResult(headers, currentLine, config = this.config) {
    let jsonObject = {};
    const ignoredIndexes = config.indexesToIgnore ? new Set(config.indexesToIgnore) : new Set();
    for (let j = 0; j < headers.length; j++) {
      if (ignoredIndexes.has(j)) {
        continue;
      }

      let propertyName = stringUtils.trimPropertyName(config.isTrimHeaderFieldWhiteSpace, headers[j]);
      let value = currentLine[j];

      if (this.isParseSubArray(value, config)) {
        value = this.buildJsonSubArray(value, config);
      }

      if (config.printValueFormatByType && !Array.isArray(value)) {
        value = stringUtils.getValueFormatByType(currentLine[j]);
      }

      jsonObject[propertyName] = value;
    }
    return jsonObject;
  }

  /**
   * Parse a field value into a sub-array using configured delimiter and separator
   * @param {string} value - Field value to parse
   * @param {ParserConfig} [config] - Frozen parser configuration
   * @returns {Array<string|number|boolean>} Array of parsed values
   * @private
   */
  buildJsonSubArray(value, config = this.config) {
    let extractedValues = value.substring(
        value.indexOf(config.parseSubArrayDelimiter) + 1,
        value.lastIndexOf(config.parseSubArrayDelimiter)
    );
    extractedValues.trim();
    value = extractedValues.split(config.parseSubArraySeparator);
    if (config.printValueFormatByType) {
      for (let i = 0; i < value.length; i++) {
        value[i] = stringUtils.getValueFormatByType(value[i]);
      }
    }
    return value;
  }

  /**
   * Check if a field value should be parsed as a sub-array
   * @param {string} value - Field value to check
   * @param {ParserConfig} [config] - Frozen parser configuration
   * @returns {boolean} True if value is bracketed with sub-array delimiter
   * @private
   */
  isParseSubArray(value, config = this.config) {
    if (config.parseSubArrayDelimiter) {
      if (value && (value.indexOf(config.parseSubArrayDelimiter) === 0 && value.lastIndexOf(config.parseSubArrayDelimiter) === (value.length - 1))) {
        return true;
      }
    }
    return false;
  }

  /**
   * Validate configuration for conflicts and incompatibilities
   * @param {ParserConfig} [config] - Parser configuration
   * @throws {ConfigurationError} If incompatible options are set
   * @private
   */
  validateInputConfig(config = this.config) {
    if (config.isSupportQuotedField) {
      if (this.getFieldDelimiter(config) === '"') {
        throw ConfigurationError.quotedFieldConflict('fieldDelimiter', '"');
      }
      if (config.parseSubArraySeparator === '"') {
        throw ConfigurationError.quotedFieldConflict('parseSubArraySeparator', '"');
      }
      if (config.parseSubArrayDelimiter === '"') {
        throw ConfigurationError.quotedFieldConflict('parseSubArrayDelimiter', '"');
      }
    }
  }

  /**
   * Check if a line contains quote characters
   * @param {string} line - Line to check
   * @returns {boolean} True if line contains quotes
   * @private
   */
  hasQuotes(line) {
    return line.includes('"');
  }

  /**
   * Split a CSV record line into fields, respecting quoted fields per RFC 4180.
   * Handles:
   * - Quoted fields with embedded delimiters and newlines
   * - Escaped quotes (double quotes within quoted fields)
   * - Empty quoted fields
   * @param {string} line - A single CSV record line
   * @param {ParserConfig} [config] - Parser configuration
   * @returns {string[]} Array of field values
   */
  split(line, config = this.config) {
    if (line.length === 0) {
      return [];
    }
    
    let fields = [];
    let currentField = '';
    let insideQuotes = false;
    let delimiter = this.getFieldDelimiter(config);
    
    for (let i = 0; i < line.length; i++) {
      let char = line[i];
      
      // Handle quote character
      if (char === QUOTE_CHAR) {
        if (this.isEscapedQuote(line, i, insideQuotes)) {
          // Two consecutive quotes inside quoted field = escaped quote
          currentField += QUOTE_CHAR;
          i++; // Skip next quote
        } else if (this.isEmptyQuotedField(line, i, insideQuotes, currentField, delimiter)) {
          // Empty quoted field: "" at field start before delimiter/end
          i++; // Skip closing quote
        } else {
          // Regular quote: toggle quoted state
          insideQuotes = !insideQuotes;
        }
      } else if (char === delimiter && !insideQuotes) {
        // Delimiter outside quotes marks field boundary
        fields.push(currentField);
        currentField = '';
      } else {
        // Regular character (including embedded newlines in quoted fields)
        currentField += char;
      }
    }
    
    // Add final field
    fields.push(currentField);
    
    // Validate matching quotes
    if (insideQuotes) {
      throw CsvFormatError.mismatchedQuotes('row');
    }
    
    return fields;
  }

  /**
   * Check if character at index is an escaped quote (double quote)
   * Escaped quotes appear as "" within quoted fields per RFC 4180
   * @param {string} line - Line being parsed
   * @param {number} index - Character index to check
   * @param {boolean} insideQuoted - Whether currently inside a quoted field
   * @returns {boolean} True if character is an escaped quote
   * @private
   */
  isEscapedQuote(line, index, insideQuoted) {
    return insideQuoted && 
           index + 1 < line.length && 
           line[index + 1] === QUOTE_CHAR;
  }

  /**
   * Check if this is an empty quoted field: "" before delimiter or end of line
   * @param {string} line - Line being parsed
   * @param {number} index - Character index to check
   * @param {boolean} insideQuoted - Whether currently inside a quoted field
   * @param {string} currentField - Current field accumulation
   * @param {string} delimiter - Field delimiter character
   * @returns {boolean} True if this represents an empty quoted field
   * @private
   */
  isEmptyQuotedField(line, index, insideQuoted, currentField, delimiter) {
    if (insideQuoted || currentField !== '' || index + 1 >= line.length) {
      return false;
    }
    
    let nextChar = line[index + 1];
    if (nextChar !== QUOTE_CHAR) {
      return false; // Not a quote pair
    }
    
    let afterQuotes = index + 2;
    return afterQuotes === line.length || line[afterQuotes] === delimiter;
  }
}

module.exports = new CsvToJson();
module.exports.CsvToJson = CsvToJson;