diff options
Diffstat (limited to 'js-ver.org')
-rw-r--r-- | js-ver.org | 513 |
1 files changed, 513 insertions, 0 deletions
diff --git a/js-ver.org b/js-ver.org new file mode 100644 index 0000000..fea26bd --- /dev/null +++ b/js-ver.org @@ -0,0 +1,513 @@ +#+startup: showall + +* http server +** guix +#+begin_src shell + guix shell -m manifest.scm -- make run +#+end_src + +#+RESULTS: + +** by hand +start a python http server with: + +1. to compile this org document, call ~org-babel-tangle~ (=C-c C-v t=), which will produce =index.html=. alternately, you can just call =make= (a =manifest.scm= is provided for guix) + +2. start an http server quickly in python on port 8000 to work around cors issues with file uris: +#+begin_src shell + python3 -m http.server -b :: 8000 +#+end_src + +3. then navigate to http://localhost:8000/ + +* html skeleton +#+name: skeleton +#+begin_src html :noweb yes :tangle index.html + <!DOCTYPE html> + <html lang="en"> + <head> + <meta charset="utf-8"> + <meta http-equiv="X-UA-Compatible" content="IE=edge"> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <link rel='stylesheet' href='style.css'> + <link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png"> + <link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png"> + <link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png"> + <link rel="manifest" href="/site.webmanifest"> + <title>functional annotation for varscan</title> + </head> + <body> + <h1>functional annotation for varscan</h1> + + <form id='varscan'> + <label for='reference-genome-select'> + reference genome: + </label> + <select id='reference-genome' name='reference-genome'> + <option>phi6 RefWT_from Lele.txt</option> + </select> + + <label for='protein-coding-regions'> + protein coding regions: + </label> + <select id='protein-coding-regions-select' name='protein-coding-regions'> + <option>phi6 wt protein start stops.csv</option> + </select> + + <label for='variants-data'> + variants file (csv): + </label> + <input type='file' name='variants-data'> + + <button type='submit'>go</button> + </form> + + <div id='log'> + </div> + + <div id='results' hidden> + <div id='output'> + <a id='download'>dowload</a> + <table> + <thead> + <tr> + <td>name</td> + <td>nucleotide change</td> + <td>protein</td> + <td>amino acid change</td> + </tr> + </thead> + <tbody> + </tbody> + </table> + </div> + </div> + <script src='main.mjs' type='module'></script> + </body> + </html> +#+end_src + +* javascript +** entry point +set up the javascript code by loading direct module dependencies and running the analysis when the form is submitted. + +#+begin_src javascript :noweb yes :tangle main.mjs + import codon2AA from './codon2AminoAcid.mjs'; + import aa2Code from './aminoAcid2Code.mjs'; + import process from './varscan.mjs'; + import Log from './logging.mjs'; + + function init() { + console.info('initializing'); + + const varscanForm = document.querySelector('#varscan'); + varscanForm.onsubmit = (event) => { + submitForm(varscanForm); + event.preventDefault(); + }; + const variantsInput = varscanForm.querySelector('input[type="file"]'); + variantsInput.onchange = (event) => { + Log.info("file uploaded"); + submitButton.disabled = variantsInput.value === ""; + }; + + const submitButton = varscanForm.querySelector('button'); + submitButton.disabled = variantsInput.value === ""; + } + + init(); +#+end_src + +*** form submission +when the form is submitted, load the reference genome,its protein coding regions, the uploaded variant data, and spit out the changed proteins. +#+name: submit-form +#+begin_src javascript :noweb yes :tangle main.mjs + function submitForm(form) { + Log.clear(); + + <<upload-variant>> + <<reference-genome>> + <<protein-coding-region>> + + const filename = + form + .querySelector('input[name="variants-data"]') + .files[0] + .name; + const extIndex = filename.lastIndexOf('.'); + const resultsFilename = + filename.substring(0, extIndex) + "-varscan" + filename.substring(extIndex); + + Promise.all([variantsPromise, genomePromise, protein2PosPromise]) + .then(([variants, genome, protein2Pos]) => { + const results = process(codon2AA, aa2Code, genome, protein2Pos, variants); + updateDownloadLink(resultsFilename, results); + fillTable(results); + }); + } +#+end_src + +*** TODO variants upload +we don't need to upload the file, but we're going to pretend we do for ux reasons. this just returns a promise for the “uploaded” file's csv data. +- [ ] verify file format is csv without (or with) headers +#+name: upload-variant +#+begin_src javascript + const variantsPromise = new Promise((resolve, reject) => { + const file = form.querySelector('input[name="variants-data"]').files[0]; + const reader = new FileReader(); + reader.onload = (event) => { + resolve(event.target.result); + }; + reader.readAsText(file); + }); +#+end_src + +*** reference genome promise +load the reference genome from the server based on the filename in the form. + +#+name: reference-genome +#+begin_src javascript + const referenceGenomeFile = + form.querySelector('select[name="reference-genome"]').value; + + const genomePromise = fetch(referenceGenomeFile) + .then((response) => response.text()) + .then((text) => text.replace('\r', '').replace('\n', '')) + .catch((err) => Log.error("couldn't load reference genome:", err)); +#+end_src + +*** TODO protein coding regions promise +load the protein coding regions from the server based on the filename in the form. + +- [ ] this should probably be tied to the reference genome + +#+name: protein-coding-region +#+begin_src javascript + const proteinCodingRegionsFile = + form.querySelector('select[name="protein-coding-regions"]').value; + + const protein2PosPromise = fetch(proteinCodingRegionsFile) + .then((response) => response.text()) + .then((text) => { + return text + .replace('\r', '') + .split('\n') + .reduce( + (acc, line) => { + const [name, start, stop] = line.split(','); + acc[name] = [Number(start), Number(stop)]; + return acc; + }, + {}); + }) + .catch((err) => Log.error("couldn't load protein coding regions:", err)); +#+end_src + +*** display results +take the results and stuff them in the output table +#+begin_src javascript :noweb yes :tangle main.mjs + function fillTable(results) { + const outputDiv = document.querySelector('#results') + outputDiv.hidden = false; + + const tbody = outputDiv.querySelector('table tbody'); + // clear the table + while (tbody.firstChild) { + tbody.removeChild(tbody.firstChild); + } + + results.forEach((row) => { + const tr = document.createElement('tr'); + row.forEach((col) => { + const td = document.createElement('td'); + td.innerHTML = col; + tr.appendChild(td); + }) + tbody.appendChild(tr); + }); + } +#+end_src + +create a download link +#+begin_src javascript :noweb yes :tangle main.mjs + function updateDownloadLink(filename, results) { + const anchor = document.querySelector('#download'); + if (anchor.url) { + window.URL.revokeObjectURL(anchor.url); + anchor.url = undefined; + } + + const data = results.map((row) => row.join(",") + "\r\n"); + const blob = new Blob( + data, + { type: 'text/csv' }); + const url = window.URL.createObjectURL(blob); + anchor.href = url; + anchor.download = filename; + + return anchor; + } +#+end_src + +** variant to codon whatever i need to name this +#+begin_src javascript :tangle varscan.mjs + import Log from './logging.mjs'; + + function process(codon2AA, aa2Code, genome, protein2Pos, variants) { + let lineno = 0; + let name = 'unnamed-change'; + return variants + .split('\n') + .reduce( + (acc, line) => { + lineno++; + line.replace('\r', ''); + if (line === "") { + return acc; + } + + const [nameMaybe, ref, posStr, origNucleotide, newNucleotide] = + line.split(','); + name = nameMaybe || name; + const pos = Number(posStr); + + const [protein, proteinStart, aaIndex] = + findProtein(pos, protein2Pos); + if (!protein) { + return acc.concat([[ + name, + origNucleotide.toLowerCase()+posStr+newNucleotide.toLowerCase(), + '', + "non-coding" + ]]); + } + + // start of codon relative to start of protein, 0-based + const changedStart = pos - proteinStart; + + // convert position to start of the codon and the + // offset of the change from the start of the codon. + const offset = changedStart % 3; + const codonStart = changedStart - offset; + + // pos points to the change within the genome (1-index) + // proteinStart points to the start of the changed protein within the genome (1-index) + // changedStart points to the changed nucleotide within the protein (0-index) + // codonStart points to the start of the changed codon within the protein (0-index) + // offset points to the changed nucleotide within the codon (0-index) + + // position of codon within entire genome (0-index) + const absCodonStart = proteinStart + codonStart - 1; + + const origCodon = genome.substring(absCodonStart, absCodonStart + 3); + const origAA = aa2Code[codon2AA[origCodon]]; + const newCodon = origCodon.substring(0, offset) + newNucleotide + origCodon.substring(offset+1); + const newAA = aa2Code[codon2AA[newCodon]]; + + // check the change against the reference genome. + const checkNucleotide = origCodon[offset]; + if (checkNucleotide !== origNucleotide) { + const checkCodon = + origCodon.substring(0, offset) + + origNucleotide + + origCodon.substring(offset+1); + const checkAA = aa2Code[codon2AA[checkCodon]]; + const aaChange = `${checkAA}${aaIndex}${newAA}` + Log.warn(`${name} (line ${lineno}): nucleotide at position ${pos} is “${checkNucleotide.toLowerCase()}” in the reference, but “${origNucleotide.toLowerCase()}” was supplied. If the supplied nucleotide is correct, then the amino acid change is ${aaChange}.`) + } + + const nucleotideChange = `${checkNucleotide.toLowerCase()}${pos}${newNucleotide.toLowerCase()}` + const aaChange = `${origAA}${aaIndex}${newAA}` + return acc.concat([[ + name, + nucleotideChange, + protein, + aaChange + ]]); + }, + []); + } + + // pos is 1-based index + // + // returns protein name, 1-index of start of protein in genome, and + // 1-index of of offset of `pos` in its codon. + function findProtein(pos, protein2Pos) { + for (const name in protein2Pos) { + const [start, end] = protein2Pos[name]; + if (start <= pos && pos <= end) { + // normal people count from 1, not 0 + const index = Math.floor((pos - start) / 3) + 1; + return [name, start, index]; + } + } + return []; + } + export default process +#+end_src + +** codon to amino acid table +:PROPERTIES: +:VISIBILITY: folded +:END: + +#+begin_src javascript :noweb yes :tangle codon2AminoAcid.mjs + const codon2AA = { + 'GCT': 'Ala', + 'GCC': 'Ala', + 'GCA': 'Ala', + 'GCG': 'Ala', + 'CGT': 'Arg', + 'CGC': 'Arg', + 'CGA': 'Arg', + 'CGG': 'Arg', + 'AGA': 'Arg', + 'AGG': 'Arg', + 'AAT': 'Asn', + 'AAC': 'Asn', + 'GAT': 'Asp', + 'GAC': 'Asp', + 'TGT': 'Cys', + 'TGC': 'Cys', + 'CAA': 'Gln', + 'CAG': 'Gln', + 'GAA': 'Glu', + 'GAG': 'Glu', + 'GGT': 'Gly', + 'GGC': 'Gly', + 'GGA': 'Gly', + 'GGG': 'Gly', + 'CAT': 'His', + 'CAC': 'His', + 'ATT': 'Ile', + 'ATC': 'Ile', + 'ATA': 'Ile', + 'CTT': 'Leu', + 'CTC': 'Leu', + 'CTA': 'Leu', + 'CTG': 'Leu', + 'TTA': 'Leu', + 'TTG': 'Leu', + 'AAA': 'Lys', + 'AAG': 'Lys', + 'ATG': 'Met', + 'TTT': 'Phe', + 'TTC': 'Phe', + 'CCT': 'Pro', + 'CCC': 'Pro', + 'CCA': 'Pro', + 'CCG': 'Pro', + 'TCT': 'Ser', + 'TCC': 'Ser', + 'TCA': 'Ser', + 'TCG': 'Ser', + 'AGT': 'Ser', + 'AGC': 'Ser', + 'ACT': 'Thr', + 'ACC': 'Thr', + 'ACA': 'Thr', + 'ACG': 'Thr', + 'TGG': 'Trp', + 'TAT': 'Tyr', + 'TAC': 'Tyr', + 'GTT': 'Val', + 'GTC': 'Val', + 'GTA': 'Val', + 'GTG': 'Val', + 'TAA': 'STOP', + 'TGA': 'STOP', + 'TAG': 'STOP' + } + + export default codon2AA +#+end_src + +** amino acid to letter table +:PROPERTIES: +:VISIBILITY: folded +:END: + +#+begin_src javascript :noweb yes :tangle aminoAcid2Code.mjs + const aa2Code = { + 'Ala': 'A', + 'Arg': 'R', + 'Asn': 'N', + 'Asp': 'D', + 'Cys': 'C', + 'Gln': 'Q', + 'Glu': 'E', + 'Gly': 'G', + 'His': 'H', + 'Ile': 'I', + 'Leu': 'L', + 'Lys': 'K', + 'Met': 'M', + 'Phe': 'F', + 'Pro': 'P', + 'Ser': 'S', + 'Thr': 'T', + 'Trp': 'W', + 'Tyr': 'Y', + 'Val': 'V', + 'STOP': '*' + } + + export default aa2Code +#+end_src + +** logging +#+begin_src javascript :noweb yes :tangle logging.mjs + class Log { + get elt() { + if (!document.querySelector('#log')) { + const elt = document.createElement('div'); + elt.id = 'log'; + document.body.appendChild(elt); + } + return document.querySelector('#log'); + } + + clear() { + while (this.elt.firstChild) { + this.elt.removeChild(this.elt.firstChild); + } + } + + logAt(level, items) { + const list = document.createElement('ul'); + items.forEach((item) => { + const msg = document.createElement('li'); + msg.setAttribute('class', level); + msg.innerText = item; + list.appendChild(msg); + }); + this.elt.appendChild(list); + } + + error(...items) { + this.logAt('error', items); + console.error.apply(null, items) + } + + warn(...items) { + this.logAt('warn', items); + console.warn.apply(null, items) + } + + info(...items) { + console.info.apply(null, items) + } + + debug(...items) { + console.debug.apply(null, items) + } + }; + Log.logger = new Log(); + Log.clear = Log.logger.clear.bind(Log.logger); + Log.error = Log.logger.error.bind(Log.logger); + Log.warn = Log.logger.warn.bind(Log.logger); + Log.info = Log.logger.info.bind(Log.logger); + Log.debug = Log.logger.debug.bind(Log.logger); + + export default Log +#+end_src |