Duolingo Fandom Japanese vocab CSV extractor in JS

I’m casually going through the Japanese course in Duolingo, and found this handy wiki series on Duolingo Fandom Japanese with lists of all of the vocab.

I wanted an easier way to import it into Anki, so I put together this little pastable JS script to export a CSV of the vocab on one page:

(function () {
  const pattern =
    /^([\p{sc=Han}\p{sc=Hiragana}\p{sc=Katakana}?、,。]+)\s*\(([a-z\p{P}-]+)\)\s*=\s*([a-z\p{P}\(\) ?]+)(\s*\(([\p{sc=Han}\p{sc=Hiragana}\p{sc=Katakana}?、,。]+)\))?$/iu;
  const csvRows = ['日本語,ふりがな,ローマ字,英語'];
  document
    .querySelectorAll('h3 ~ ul:not([class]) > li:not([class])')
    .forEach((li) => {
      const match = String(li.innerText).match(pattern);
      console.log({ innerText: String(li.innerText), match });
      if (match) {
        const csvRow = '"'+[
          (match[1] || '').replace(',', ' /').trim() || '',
          (match[5] || '').replace(',', ' /').trim() || '',
          (match[2] || '').replace(',', ' /').trim() || '',
          (match[3] || '').replace(',', ' /').trim() || '',
        ].join('","')+'"';
        console.log(csvRow);
        csvRows.push(csvRow);
      }
    });
  const csv = csvRows.join('\n');
  const download = document.createElement('a');
  download.setAttribute(
    'href',
    'data:text/csv;charset=utf-8,' + encodeURIComponent(csv)
  );
  download.setAttribute('download', String(window.location).split('/').pop());
  document.body.appendChild(download);
  download.click();
  document.body.removeChild(download);
})();

You can paste this into your browser devtools console on one of those pages, and it will trigger a download of the vocab there as a CSV.

You can then import the CSV file into your Anki deck.


Tech mentioned