-
-
-
-
diff --git a/classifier.py b/classifier.py
deleted file mode 100644
index ecf7c15..0000000
--- a/classifier.py
+++ /dev/null
@@ -1,313 +0,0 @@
-#!/home/erik/bin/python3.6
-
-#import packages to be used
-from sklearn.svm import SVC
-from sklearn.feature_extraction.text import CountVectorizer
-from sklearn.preprocessing import StandardScaler
-from sklearn.externals import joblib
-import cgi, cgitb
-
-#----------------------------------------------\
-# Parse the web-form information to variables \
-# \_______________________________________________________
-# |
-cgitb.enable()
-form=cgi.FieldStorage()
-alignment = form.getvalue('fasta')
-if alignment.startswith(">"): #naive check for FASTA format
- list=alignment.split(">")
- book={}
- for a in list:
- tempList=a.splitlines()
- nameLine=tempList.pop(0)
- name=nameLine.split(" ")[0]
- seq="".join(tempList)
- book[name]=seq
- seqList=[]
- lenList=[]
- nameList=[]
- for i in book:
- nameList.append(i)
- seqList.append(book[i])
- lenList.append(str(len(book[i])))
-
- if len(seqList)=0: #check for empty sequence list
- seqList = ["MPSKKSGPQPHKRWVFTLNNPSEEEKNKIRELPISLFDYFVCGEEGLEEGRTAHLQGFANFAKKQTFNKVKWYFGARCHIEKAKGTDQQNKEYCSKEGHILIECGAPRNQGKRSDLSTAYFDYQQSGPPGMVLLNCCPSCRSSLSEDYYFAILEDCWRTINGGTRRPI"]
- nameList=['demo']
- lenList=[str(len(alignment[0]))]
-
-else:
- seqList = ["MPSKKSGPQPHKRWVFTLNNPSEEEKNKIRELPISLFDYFVCGEEGLEEGRTAHLQGFANFAKKQTFNKVKWYFGARCHIEKAKGTDQQNKEYCSKEGHILIECGAPRNQGKRSDLSTAYFDYQQSGPPGMVLLNCCPSCRSSLSEDYYFAILEDCWRTINGGTRRPI"]
- nameList=['demo']
- lenList=[str(len(alignment[0]))]
-
-#--------------------------------------------------------------------------------------------------------+
-
-#----------------------------------------------\
-# predict genus of input sequences \
-# \_______________________________________________________
-# |
-#list of amino acids as vocabulary for the CountVectorizer
-AAs=['a','c','d','e','f','g','h','i','k','l','m','n','p','q','r','s','t','v','w','y']
-
-#load the classifier and scaler
-clf=joblib.load("./cgi-bin/SVM_linear_aa_clf.pkl")
-StSc=joblib.load("./cgi-bin/UniqRepsGemys_6089_StSCALER.pkl")
-cv=CountVectorizer(analyzer='char',ngram_range=(1,1),vocabulary=AAs)
-
-#initialize text data vectorizer
-dataVect=cv.transform(seqList)
-
-#Scale the data to the training set
-X=StSc.transform(dataVect.astype("float64"))
-
-#make predictions for the original dataset
-predictions=clf.predict(X)
-
-
-#----------------------------------------------\
-# Build HTML table of results \
-# \_______________________________________________________
-# |
-results=""""""
-for k in len(seqList):
- results+="""
{0}
{1}
{2}
""".format(nameList[k],lenList[k],predictions[k])
-if "demo" in nameList:
- results+="""
There seems to have been an error. If you are expecting more than one prediction or
- do not see the name you entered please try the submission form again, making sure that the input is in FASTA format."""
-
-#----------------------------------------------\
-# Build output page \
-# \_______________________________________________________
-# |
-#build output page parts
-#Header and CSS Style bits
-header="""
-
-
-
-
-
-
-"""
-
-#Page contents, first part
-body1="""
-
-
-
Please enter only one word as the name(no space) and only one Rep sequence
-
-
-
-
This classifier requires Rep protein sequence to be:
-
-
Complete
-
Unaligned
-
in FASTA format
-
-
And has been trained on the following Genera:
-
Circoviridae
-
-
Circovirus
-
Cyclovirus
-
-
Nanoviridae
-
-
Babuvirus
-
Nanovirus
-
-
Genomoviridae
-
-
Gemycircularvirus
-
Gemygorvirus
-
Gemykibivirus
-
Gemykolovirus
-
Gemykrogvirus
-
Gemyvongvirus
-
-
Geminiviridae
-
-
Becurtovirus
-
Begomovirus
-
Capulavirus
-
Curtovirus
-
Eragrovirus
-
Grablovirus
-
Mastrevirus
-
Turncurtovirus
-
-
Smacovirus
-
-
-
-
Contact
-
Questions or comments? Send us an email:
-
email At domain Dot something
-
-
-
-
Results
-
Results from Taxonomy prediction
-
-
-
Sequence Name
-
Length
-
Prediction
-
-"""
-
-#Page contents, second part (results fit between body1 and body2)
-body2="""
-
-
This classifier will return the best fit of the submitted sequence to the training data.
-Currently included in the training data:
-
Circoviridae
-
-
Circovirus
-
Cyclovirus
-
-
Nanoviridae
-
-
Babuvirus
-
Nanovirus
-
-
Genomoviridae
-
-
Gemycircularvirus
-
Gemygorvirus
-
Gemykibivirus
-
Gemykolovirus
-
Gemykrogvirus
-
Gemyvongvirus
-
-
Geminiviridae
-
-
Becurtovirus
-
Begomovirus
-
Capulavirus
-
Curtovirus
-
Eragrovirus
-
Grablovirus
-
Mastrevirus
-
Turncurtovirus
-
-
Smacovirus
-
-
-
-
-
-
-"""
-
-#close the Page
-footer="""
-
-"""
-
-#build the output page
-page=header+body1+results+body2+footer
-
-#send the output as html
-output = page.format()
-print (output)
-
-quit()
\ No newline at end of file
--
cgit v1.2.3