From 1475085bf6cc046260e570cd53cc590201f66b40 Mon Sep 17 00:00:00 2001 From: elavington Date: Mon, 25 Sep 2017 12:50:03 -0400 Subject: Remove unwanted files --- CRESSdna.html | 156 --------------------------- CRESSresults.html | 51 --------- classifier.py | 313 ------------------------------------------------------ 3 files changed, 520 deletions(-) delete mode 100644 CRESSdna.html delete mode 100644 CRESSresults.html delete mode 100644 classifier.py diff --git a/CRESSdna.html b/CRESSdna.html deleted file mode 100644 index f037c56..0000000 --- a/CRESSdna.html +++ /dev/null @@ -1,156 +0,0 @@ - - - - - - - - -

Welcome to CRESSdna.org

- -

- - - - -

- -

Home

Part of the National Science Foundation's Assembling the Tree of Life.

Sponsored with a Grant from the National Science Foundation

- -

Taxonomy

Please enter only one word as the name(no space) and only one Rep sequence

- -

This classifier requires Rep protein sequence to be:

Complete
Unaligned
in FASTA format

And has been trained on the following Genera:

Circoviridae

Circovirus
Cyclovirus

Nanoviridae

Babuvirus
Nanovirus

Genomoviridae

Gemycircularvirus
Gemygorvirus
Gemykibivirus
Gemykolovirus
Gemykrogvirus
Gemyvongvirus

Geminiviridae

Becurtovirus
Begomovirus
Capulavirus
Curtovirus
Eragrovirus
Grablovirus
Mastrevirus
Turncurtovirus

Smacovirus

- - -

Contact

Questions or comments? Send us an email:

email At domain Dot something

- -

Results

Results from Taxonomy prediction

- - - - - diff --git a/CRESSresults.html b/CRESSresults.html deleted file mode 100644 index 47c4227..0000000 --- a/CRESSresults.html +++ /dev/null @@ -1,51 +0,0 @@ - - - - - - - - -

Taxonomy Prediction Results

Results as Name, predicted Genus, length of sequence:

-

-

This classifier will return the best fit of the submitted sequence to the training data.
-Currently included in the training data:
-

Circoviridae

Circovirus
Cyclovirus

Nanoviridae

Babuvirus
Nanovirus

Genomoviridae

Gemycircularvirus
Gemygorvirus
Gemykibivirus
Gemykolovirus
Gemykrogvirus
Gemyvongvirus

Geminiviridae

Becurtovirus
Begomovirus
Capulavirus
Curtovirus
Eragrovirus
Grablovirus
Mastrevirus
Turncurtovirus

Smacovirus

- - Return to CRESSdna.org -

- - - - diff --git a/classifier.py b/classifier.py deleted file mode 100644 index ecf7c15..0000000 --- a/classifier.py +++ /dev/null @@ -1,313 +0,0 @@ -#!/home/erik/bin/python3.6 - -#import packages to be used -from sklearn.svm import SVC -from sklearn.feature_extraction.text import CountVectorizer -from sklearn.preprocessing import StandardScaler -from sklearn.externals import joblib -import cgi, cgitb - -#----------------------------------------------\ -# Parse the web-form information to variables \ -# \_______________________________________________________ -# | -cgitb.enable() -form=cgi.FieldStorage() -alignment = form.getvalue('fasta') -if alignment.startswith(">"): #naive check for FASTA format - list=alignment.split(">") - book={} - for a in list: - tempList=a.splitlines() - nameLine=tempList.pop(0) - name=nameLine.split(" ")[0] - seq="".join(tempList) - book[name]=seq - seqList=[] - lenList=[] - nameList=[] - for i in book: - nameList.append(i) - seqList.append(book[i]) - lenList.append(str(len(book[i]))) - - if len(seqList)=0: #check for empty sequence list - seqList = ["MPSKKSGPQPHKRWVFTLNNPSEEEKNKIRELPISLFDYFVCGEEGLEEGRTAHLQGFANFAKKQTFNKVKWYFGARCHIEKAKGTDQQNKEYCSKEGHILIECGAPRNQGKRSDLSTAYFDYQQSGPPGMVLLNCCPSCRSSLSEDYYFAILEDCWRTINGGTRRPI"] - nameList=['demo'] - lenList=[str(len(alignment[0]))] - -else: - seqList = ["MPSKKSGPQPHKRWVFTLNNPSEEEKNKIRELPISLFDYFVCGEEGLEEGRTAHLQGFANFAKKQTFNKVKWYFGARCHIEKAKGTDQQNKEYCSKEGHILIECGAPRNQGKRSDLSTAYFDYQQSGPPGMVLLNCCPSCRSSLSEDYYFAILEDCWRTINGGTRRPI"] - nameList=['demo'] - lenList=[str(len(alignment[0]))] - -#--------------------------------------------------------------------------------------------------------+ - -#----------------------------------------------\ -# predict genus of input sequences \ -# \_______________________________________________________ -# | -#list of amino acids as vocabulary for the CountVectorizer -AAs=['a','c','d','e','f','g','h','i','k','l','m','n','p','q','r','s','t','v','w','y'] - -#load the classifier and scaler -clf=joblib.load("./cgi-bin/SVM_linear_aa_clf.pkl") -StSc=joblib.load("./cgi-bin/UniqRepsGemys_6089_StSCALER.pkl") -cv=CountVectorizer(analyzer='char',ngram_range=(1,1),vocabulary=AAs) - -#initialize text data vectorizer -dataVect=cv.transform(seqList) - -#Scale the data to the training set -X=StSc.transform(dataVect.astype("float64")) - -#make predictions for the original dataset -predictions=clf.predict(X) - - -#----------------------------------------------\ -# Build HTML table of results \ -# \_______________________________________________________ -# | -results="""""" -for k in len(seqList): - results+="""{0}{1}{2}""".format(nameList[k],lenList[k],predictions[k]) -if "demo" in nameList: - results+="""

There seems to have been an error.
If you are expecting more than one prediction or - do not see the name you entered please try the submission form again, making sure that the input is in FASTA format.""" - -#----------------------------------------------\ -# Build output page \ -# \_______________________________________________________ -# | -#build output page parts -#Header and CSS Style bits -header=""" - - - - - - -""" - -#Page contents, first part -body1=""" - - -