1

I'm currently exporting data (on the client side) to CSV, with specific utf-8 encoding

var csvContent = "data:text/csv;charset=utf-8,";

arr.forEach(function(infoArray, index){

    var dataString = infoArray.join(",");
    csvContent += index < arr.length ? dataString+ "\n" : dataString;

});

var encodedUri = encodeURI(csvContent);


var link = document.createElement("a");
link.setAttribute("href", encodedUri);
link.setAttribute("download", "data.csv");
document.body.appendChild(link);

link.click(); 

The data is in arr and looks like :

[{'firstname':'John', 'surname':'Doe', 'city':'Paris'}, ... , {'firstname':'Johnny', 'surname':'Done', 'city':'Paris'}]

It works pretty well expect that when I'm importing the resulting file in Excel I have encoding error (É -> É for example), but when I open the file in Sublime Text everything looks fine.

kwn
  • 819
  • 1
  • 9
  • 22

1 Answers1

0

Looks like you need to include the UTF-8 BOM (Byte-Order Mark) after the comma and before the start of the data. Three-byte sequence: [0xEF, 0xBB, 0xBF].

Microsoft compilers and interpreters, and many pieces of software on Microsoft Windows such as Notepad treat the BOM as a required magic number rather than use heuristics. These tools add a BOM when saving text as UTF-8, and cannot interpret UTF-8 unless the BOM is present, or the file contains only ASCII bytes.

Check out these articles/posts for more info.

var personArr = [
  { firstname: 'John',  surname: 'Doe',   city: 'Paris' },
  // ... ,
  { firstname: 'James', surname: 'Brown', city: 'Barnwell' }
];

var csvData = jsonToCsv({ data : personArr });
var downloadLinkEle = createDownloadLink(csvData, 'people.csv');

document.body.appendChild(downloadLinkEle);

function createDownloadLink(content, filename, text) {
  text = text || filename;
  var link = document.createElement('a');
  link.setAttribute('href', encodeURI(content));
  link.setAttribute('download', filename);
  link.innerHTML = text;
  return link;
}

function jsonToCsv(opts) {
  var BOM = "\uFEFF";
  opts.data = opts.data || [];
  opts.columns = opts.columns || [];
  opts.delim = opts.delim || ',';
  opts.headers = opts.headers || [ 'text/csv', 'charset=utf-8' ];
  if (opts.columns.length < 1 && opts.data.length > 0) {
    opts.columns = Object.keys(opts.data[0]);
  }
  return 'data:' + opts.headers.join(';') + ',' + BOM + [
    opts.columns.join(opts.delim),
    opts.data.map(rec => opts.columns.map(col => rec[col]).join(opts.delim)).join('\n')
  ].join('\n');
}
Community
  • 1
  • 1
Mr. Polywhirl
  • 31,606
  • 11
  • 65
  • 114
  • thanks for the snippet but even with the UTF-8 BOM there is still an encoding issue ("É" are now "√â"). Further investigation seems to point out that Excel on OSX is awfully bad at detecting file encoding. I tried using various tools (such as text-encoding and FileSaver.js ) or Blop objects but no luck so far. – kwn Jan 16 '17 at 12:51