diff options
-rw-r--r-- | CRESSdna.html | 156 | ||||
-rw-r--r-- | CRESSresults.html | 51 | ||||
-rw-r--r-- | cgi-bin/SVM_linear_aa_clf.pkl | bin | 0 -> 187597 bytes | |||
-rw-r--r-- | cgi-bin/UniqRepsGemys_6089_StSCALER.pkl | bin | 0 -> 980 bytes | |||
-rwxr-xr-x | cgi-bin/classifier.py | 52 | ||||
-rw-r--r-- | index.html | 3 |
6 files changed, 261 insertions, 1 deletions
diff --git a/CRESSdna.html b/CRESSdna.html new file mode 100644 index 0000000..f037c56 --- /dev/null +++ b/CRESSdna.html @@ -0,0 +1,156 @@ +<!DOCTYPE html> + +<html> +<head> +<style> +* {box-sizing: border-box} +body {font-family: "Lato", sans-serif;} + +/* Style the tab */ +div.tab { + float: left; + border: 1px solid #ccc; + background-color: #f1f1f1; + width: 20%; + height: 250px; +} + +/* Style the buttons inside the tab */ +div.tab button { + display: block; + background-color: inherit; + color: black; + padding: 22px 16px; + width: 100%; + border: none; + outline: none; + text-align: left; + cursor: pointer; + transition: 0.3s; + font-size: 17px; +} + +/* Change background color of buttons on hover */ +div.tab button:hover { + background-color: #ddd; +} + +/* Create an active/current "tab button" class */ +div.tab button.active { + background-color: #1acefc; +} + +/* Style the tab content */ +.tabcontent { + float: left; + padding: 0px 12px; + border: 1px solid #ccc; + width: 80%; + min-height: 250px; +} +</style> +</head> +<body> + +<p>Welcome to CRESSdna.org</p> + +<div class="tab"> + <button class="tablinks" onclick="openTab(event, 'Home')" id="defaultOpen">Home</button> + <button class="tablinks" onclick="openTab(event, 'Taxonomy')">Taxonomy</button> + <button class="tablinks" onclick="openTab(event, 'Contact')">Contact</button> + <button class="tablinks" onclick="openTab(event, 'Results')">Results</button> + </div> + +<div id="Home" class="tabcontent"> + <h3>Home</h3> + <p>Part of the <a href='http://www.nsf.gov/pubs/2010/nsf10513/nsf10513.htm'>National Science Foundation's Assembling the Tree of Life</a>.</p> + <img src='nsf1.jpg' alt='Sponsored with a Grant from the National Science Foundation'> +</div> + +<div id="Taxonomy" class="tabcontent"> + <h3>Taxonomy</h3> + <p>Please enter only one word as the name(no space) and only one Rep sequence</p> + <form action="./cgi-bin/classifier.py" method="post"><br> + <input type="text" name="seqname" value="seqID"><br> + <textarea rows="4" cols="50" name="fasta" input type="submit"> +Enter ONE Rep protein sequence here...</textarea> + <br> + <input type="reset"> + <input type="submit"> +</form> + <p> + <ul> + <li>This classifier requires Rep protein sequence to be:</li> + <ul> + <li>Complete</li> + <li>Unaligned</li> + <li>in FASTA format</li> + </ul> + <p>And has been trained on the following Genera:</p> + <li>Circoviridae</li> + <ul> + <li>Circovirus</li> + <li>Cyclovirus</li> + </ul> + <li>Nanoviridae</li> + <ul> + <li>Babuvirus</li> + <li>Nanovirus</li> + </ul> + <li>Genomoviridae</li> + <ul> + <li>Gemycircularvirus</li> + <li>Gemygorvirus</li> + <li>Gemykibivirus</li> + <li>Gemykolovirus</li> + <li>Gemykrogvirus</li> + <li>Gemyvongvirus</li> + </ul> + <li>Geminiviridae</li> + <ul> + <li>Becurtovirus</li> + <li>Begomovirus</li> + <li>Capulavirus</li> + <li>Curtovirus</li> + <li>Eragrovirus</li> + <li>Grablovirus</li> + <li>Mastrevirus</li> + <li>Turncurtovirus</li> + </ul> + <li>Smacovirus</li> +</ul> </p> +</div> + + +<div id="Contact" class="tabcontent"> + <h3>Contact</h3> + <p>Questions or comments? Send us an email:</p> + <p>email At domain Dot something</p> +</div> + +<div id="Results" class="tabcontent"> + <h3>Results</h3> + <p>Results from Taxonomy prediction</p> +</div> + +<script> +function openTab(evt, tabTitle) { + var i, tabcontent, tablinks; + tabcontent = document.getElementsByClassName("tabcontent"); + for (i = 0; i < tabcontent.length; i++) { + tabcontent[i].style.display = "none"; + } + tablinks = document.getElementsByClassName("tablinks"); + for (i = 0; i < tablinks.length; i++) { + tablinks[i].className = tablinks[i].className.replace(" active", ""); + } + document.getElementById(tabTitle).style.display = "block"; + evt.currentTarget.className += " active"; +} + +// Get the element with id="defaultOpen" and click on it +document.getElementById("defaultOpen").click(); +</script> + +</body> +</html> diff --git a/CRESSresults.html b/CRESSresults.html new file mode 100644 index 0000000..47c4227 --- /dev/null +++ b/CRESSresults.html @@ -0,0 +1,51 @@ +<!DOCTYPE html> + +<html> +<head> + +</head> +<body> + +<h3>Taxonomy Prediction Results</h3> + <p>Results as Name, predicted Genus, length of sequence: </p><br> + <input type="text" readonly="true" value={prediction} size="40"/><br><br> + <p>This classifier will return the best fit of the submitted sequence to the training data.<br> +Currently included in the training data:<br> +<li>Circoviridae</li> + <ul> + <li>Circovirus</li> + <li>Cyclovirus</li> + </ul> + <li>Nanoviridae</li> + <ul> + <li>Babuvirus</li> + <li>Nanovirus</li> + </ul> + <li>Genomoviridae</li> + <ul> + <li>Gemycircularvirus</li> + <li>Gemygorvirus</li> + <li>Gemykibivirus</li> + <li>Gemykolovirus</li> + <li>Gemykrogvirus</li> + <li>Gemyvongvirus</li> + </ul> + <li>Geminiviridae</li> + <ul> + <li>Becurtovirus</li> + <li>Begomovirus</li> + <li>Capulavirus</li> + <li>Curtovirus</li> + <li>Eragrovirus</li> + <li>Grablovirus</li> + <li>Mastrevirus</li> + <li>Turncurtovirus</li> + </ul> + <li>Smacovirus</li> + + <a href="./www/html/CRESSdna.html">Return to CRESSdna.org</a> +</p> + +</body> +</html> + diff --git a/cgi-bin/SVM_linear_aa_clf.pkl b/cgi-bin/SVM_linear_aa_clf.pkl Binary files differnew file mode 100644 index 0000000..1afce0a --- /dev/null +++ b/cgi-bin/SVM_linear_aa_clf.pkl diff --git a/cgi-bin/UniqRepsGemys_6089_StSCALER.pkl b/cgi-bin/UniqRepsGemys_6089_StSCALER.pkl Binary files differnew file mode 100644 index 0000000..3a098bd --- /dev/null +++ b/cgi-bin/UniqRepsGemys_6089_StSCALER.pkl diff --git a/cgi-bin/classifier.py b/cgi-bin/classifier.py new file mode 100755 index 0000000..fee11b9 --- /dev/null +++ b/cgi-bin/classifier.py @@ -0,0 +1,52 @@ +#!/home/erik/bin/python3.6m + +#import packages to be used +from sklearn.svm import SVC +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.preprocessing import StandardScaler +from sklearn.externals import joblib +import cgi, cgitb + +cgitb.enable() +form=cgi.FieldStorage() +if form.getvalue('fasta'): + alignment = form.getvalue('fasta') + alignment=[alignment] + name=form.getvalue('seqname') + size=len(alignment[0]) +else: + alignment = ["MPSKKSGPQPHKRWVFTLNNPSEEEKNKIRELPISLFDYFVCGEEGLEEGRTAHLQGFANFAKKQTFNKVKWYFGARCHIEKAKGTDQQNKEYCSKEGHILIECGAPRNQGKRSDLSTAYFDYQQSGPPGMVLLNCCPSCRSSLSEDYYFAILEDCWRTINGGTRRPI"] + name='demo' + size=len(alignment[0]) + +html = open("./var/www/html/CRESSresults.html") +page=html.read() + + +AAs=['a','c','d','e','f','g','h','i','k','l','m','n','p','q','r','s','t','v','w','y'] +clf=joblib.load("./cgi-bin/SVM_linear_aa_clf.pkl") +StSc=joblib.load("./cgi-bin/UniqRepsGemys_6089_StSCALER.pkl") +cv=CountVectorizer(analyzer='char',ngram_range=(1,1),vocabulary=AAs) + + +#initialize text data vectorizer + +dataVect=cv.transform(alignment) + +#Scale the data to the training set +X=StSc.transform(dataVect.astype("float64")) + +#make predictions for the original dataset +results=",".join([name,clf.predict(X)[0]]) +results=",".join([results,str(size)]) +#for i in results: + #print(i[0],"\t",i[1]) + +output = page.format(prediction=results) +"""f=open('test.html','w') +f.write(output) +f.close()""" +print (output) + + +quit()
\ No newline at end of file @@ -3,7 +3,8 @@ <head> <title>CRESSDNA</title> </head> - +<*> +</*> <body> <h1>Circular Rep-Encoding Single Stranded DNA Viruses</h1> <p>Part of the <a href='http://www.nsf.gov/pubs/2010/nsf10513/nsf10513.htm'>National Science Foundation's Assembling the Tree of Life</a>.</p> |