From 228f8f203eac1b5881d890e266ac10d46bb1b024 Mon Sep 17 00:00:00 2001 From: elavington Date: Thu, 27 Jul 2017 15:33:17 -0400 Subject: Add files via upload --- CRESSdna.html | 156 ++++++++++++++++++++++++++++++++ CRESSresults.html | 51 +++++++++++ cgi-bin/SVM_linear_aa_clf.pkl | Bin 0 -> 187597 bytes cgi-bin/UniqRepsGemys_6089_StSCALER.pkl | Bin 0 -> 980 bytes cgi-bin/classifier.py | 52 +++++++++++ 5 files changed, 259 insertions(+) create mode 100644 CRESSdna.html create mode 100644 CRESSresults.html create mode 100644 cgi-bin/SVM_linear_aa_clf.pkl create mode 100644 cgi-bin/UniqRepsGemys_6089_StSCALER.pkl create mode 100644 cgi-bin/classifier.py diff --git a/CRESSdna.html b/CRESSdna.html new file mode 100644 index 0000000..a97d1c1 --- /dev/null +++ b/CRESSdna.html @@ -0,0 +1,156 @@ + + + + + + + + +

Welcome to CRESSdna.org

+ +

+ + + + +

+ +

Home

Part of the National Science Foundation's Assembling the Tree of Life.

Sponsored with a Grant from the National Science Foundation

+ +

Taxonomy

Please enter only one word as the name(no space) and only one Rep sequence

+ +

This classifier requires Rep protein sequence to be:

Complete
Unaligned
in FASTA format

And has been trained on the following Genera:

Circoviridae

Circovirus
Cyclovirus

Nanoviridae

Babuvirus
Nanovirus

Genomoviridae

Gemycircularvirus
Gemygorvirus
Gemykibivirus
Gemykolovirus
Gemykrogvirus
Gemyvongvirus

Geminiviridae

Becurtovirus
Begomovirus
Capulavirus
Curtovirus
Eragrovirus
Grablovirus
Mastrevirus
Turncurtovirus

Smacovirus

+ + +

Contact

Questions or comments? Send us an email:

email At domain Dot something

+ +

Results

Results from Taxonomy prediction

+ + + + + diff --git a/CRESSresults.html b/CRESSresults.html new file mode 100644 index 0000000..8a78bcd --- /dev/null +++ b/CRESSresults.html @@ -0,0 +1,51 @@ + + + + + + + + +

Taxonomy Prediction Results

Results as Name, predicted Genus, length of sequence:

+

+

This classifier will return the best fit of the submitted sequence to the training data.
+Currently included in the training data:
+

Circoviridae

Circovirus
Cyclovirus

Nanoviridae

Babuvirus
Nanovirus

Genomoviridae

Gemycircularvirus
Gemygorvirus
Gemykibivirus
Gemykolovirus
Gemykrogvirus
Gemyvongvirus

Geminiviridae

Becurtovirus
Begomovirus
Capulavirus
Curtovirus
Eragrovirus
Grablovirus
Mastrevirus
Turncurtovirus

Smacovirus

+ + Return to CRESSdna.org +

+ + + + diff --git a/cgi-bin/SVM_linear_aa_clf.pkl b/cgi-bin/SVM_linear_aa_clf.pkl new file mode 100644 index 0000000..1afce0a Binary files /dev/null and b/cgi-bin/SVM_linear_aa_clf.pkl differ diff --git a/cgi-bin/UniqRepsGemys_6089_StSCALER.pkl b/cgi-bin/UniqRepsGemys_6089_StSCALER.pkl new file mode 100644 index 0000000..3a098bd Binary files /dev/null and b/cgi-bin/UniqRepsGemys_6089_StSCALER.pkl differ diff --git a/cgi-bin/classifier.py b/cgi-bin/classifier.py new file mode 100644 index 0000000..ec2b634 --- /dev/null +++ b/cgi-bin/classifier.py @@ -0,0 +1,52 @@ +#!/usr/bin/python + +#import packages to be used +from sklearn.svm import SVC +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.preprocessing import StandardScaler +from sklearn.externals import joblib +import cgi, cgitb + +cgitb.enable() +form=cgi.FieldStorage() +if form.getvalue('fasta'): + alignment = form.getvalue('fasta') + alignment=[alignment] + name=form.getvalue('seqname') + size=len(alignment[0]) +else: + alignment = ["MPSKKSGPQPHKRWVFTLNNPSEEEKNKIRELPISLFDYFVCGEEGLEEGRTAHLQGFANFAKKQTFNKVKWYFGARCHIEKAKGTDQQNKEYCSKEGHILIECGAPRNQGKRSDLSTAYFDYQQSGPPGMVLLNCCPSCRSSLSEDYYFAILEDCWRTINGGTRRPI"] + name='demo' + size=len(alignment[0]) + +html = open("./www.html/CRESSresults.html") +page=html.read() + + +AAs=['a','c','d','e','f','g','h','i','k','l','m','n','p','q','r','s','t','v','w','y'] +clf=joblib.load("./cgi-bin/SVM_linear_aa_clf.pkl") +StSc=joblib.load("./cgi-bin/UniqRepsGemys_6089_StSCALER.pkl") +cv=CountVectorizer(analyzer='char',ngram_range=(1,1),vocabulary=AAs) + + +#initialize text data vectorizer + +dataVect=cv.transform(alignment) + +#Scale the data to the training set +X=StSc.transform(dataVect.astype("float64")) + +#make predictions for the original dataset +results=",".join([name,clf.predict(X)[0]]) +results=",".join([results,str(size)]) +#for i in results: + #print(i[0],"\t",i[1]) + +output = page.format(prediction=results) +"""f=open('test.html','w') +f.write(output) +f.close()""" +print (output) + + +quit() \ No newline at end of file -- cgit v1.2.3