var getDelimiter = require("./getDelimiter"); var filterRow=require("./filterRow"); /** * Convert a line of string to csv columns according to its delimiter * the param._header may not be ready when this is called. * @param {[type]} rowStr [description] * @param {[type]} param [Converter param] * @return {[type]} {cols:["a","b","c"],closed:boolean} the closed field indicate if the row is a complete row */ module.exports = function rowSplit(rowStr, param) { if (rowStr === "") { return { cols: [], closed: true }; } var quote = param.quote; var trim = param.trim; var escape = param.escape; if (param.delimiter instanceof Array || param.delimiter.toLowerCase() === "auto") { param.delimiter = getDelimiter(rowStr, param); } var delimiter = param.delimiter; var rowArr = rowStr.split(delimiter); if (quote === "off") { return { cols: rowArr, closed: true }; } var row = []; var inquote = false; var quoteBuff = ''; for (var i = 0, rowLen = rowArr.length; i < rowLen; i++) { var e = rowArr[i]; if (!inquote && trim) { e = e.trim(); } var len = e.length; if (!inquote) { if (isQuoteOpen(e, param)) { //quote open e = e.substr(1); if (isQuoteClose(e, param)) { //quote close e = e.substring(0, e.length - 1); e = _escapeQuote(e, quote, escape); row.push(e); continue; } else { inquote = true; quoteBuff += e; continue; } } else { row.push(e); continue; } } else { //previous quote not closed if (isQuoteClose(e, param)) { //close double quote inquote = false; e = e.substr(0, len - 1); quoteBuff += delimiter + e; quoteBuff = _escapeQuote(quoteBuff, quote, escape); if (trim) { quoteBuff = quoteBuff.replace(/\s+$/, ""); } row.push(quoteBuff); quoteBuff = ""; } else { quoteBuff += delimiter + e; } } } if (!inquote && param._needFilterRow) { row = filterRow(row, param); } return { cols: row, closed: !inquote }; // if (param.workerNum<=1){ // }else{ // if (inquote && quoteBuff.length>0){//for multi core, quote will be closed at the end of line // quoteBuff=_escapeQuote(quoteBuff,quote,escape);; // if (trim){ // quoteBuff=quoteBuff.trimRight(); // } // row.push(quoteBuff); // } // return {cols:row,closed:true}; // } }; function isQuoteOpen(str, param) { var quote = param.quote; var escape = param.escape; return str[0] === quote && ( str[1] !== quote || str[1] === escape && (str[2] === quote || str.length === 2)); } function isQuoteClose(str, param) { var quote = param.quote; var count = 0; var idx = str.length - 1; var escape = param.escape; while (str[idx] === quote || str[idx] === escape) { idx--; count++; } return count % 2 !== 0; } function twoDoubleQuote(str, quote) { var twoQuote = quote + quote; var curIndex = -1; while ((curIndex = str.indexOf(twoQuote, curIndex)) > -1) { str = str.substring(0, curIndex) + str.substring(++curIndex); } return str; } var cachedRegExp = {}; function _escapeQuote(segment, quote, escape) { var key = "es|" + quote + "|" + escape; if (cachedRegExp[key] === undefined) { // if (escape === "\\") { // escape = "\\\\"; // } cachedRegExp[key] = new RegExp('\\'+escape + '\\'+quote, 'g'); } var regExp = cachedRegExp[key]; // console.log(regExp,segment); return segment.replace(regExp, quote); }