From 00fec851a688f0f3b65e8619391b17a96d16799a Mon Sep 17 00:00:00 2001 From: elavington Date: Wed, 30 Aug 2017 15:47:56 -0400 Subject: Commit changes --- cgi-bin/classifier.py | 377 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 313 insertions(+), 64 deletions(-) (limited to 'cgi-bin/classifier.py') diff --git a/cgi-bin/classifier.py b/cgi-bin/classifier.py index aa97555..ecf7c15 100755 --- a/cgi-bin/classifier.py +++ b/cgi-bin/classifier.py @@ -1,64 +1,313 @@ -<<<<<<< HEAD -#!/home/erik/bin/python3.6m -======= -#!/home/erik/bin/python3.6 ->>>>>>> f7d7849fcd6ce02a59db8c5fadc29d1962476493 - -#import packages to be used -from sklearn.svm import SVC -from sklearn.feature_extraction.text import CountVectorizer -from sklearn.preprocessing import StandardScaler -from sklearn.externals import joblib -import cgi, cgitb - -cgitb.enable() -form=cgi.FieldStorage() -if form.getvalue('fasta'): - alignment = form.getvalue('fasta') - alignment=[alignment] - name=form.getvalue('seqname') - size=len(alignment[0]) -else: - alignment = ["MPSKKSGPQPHKRWVFTLNNPSEEEKNKIRELPISLFDYFVCGEEGLEEGRTAHLQGFANFAKKQTFNKVKWYFGARCHIEKAKGTDQQNKEYCSKEGHILIECGAPRNQGKRSDLSTAYFDYQQSGPPGMVLLNCCPSCRSSLSEDYYFAILEDCWRTINGGTRRPI"] - name='demo' - size=len(alignment[0]) - -<<<<<<< HEAD -html = open("./var/www/html/CRESSresults.html") -======= -html = open("./www.html/CRESSresults.html") ->>>>>>> f7d7849fcd6ce02a59db8c5fadc29d1962476493 -page=html.read() - - -AAs=['a','c','d','e','f','g','h','i','k','l','m','n','p','q','r','s','t','v','w','y'] -clf=joblib.load("./cgi-bin/SVM_linear_aa_clf.pkl") -StSc=joblib.load("./cgi-bin/UniqRepsGemys_6089_StSCALER.pkl") -cv=CountVectorizer(analyzer='char',ngram_range=(1,1),vocabulary=AAs) - - -#initialize text data vectorizer - -dataVect=cv.transform(alignment) - -#Scale the data to the training set -X=StSc.transform(dataVect.astype("float64")) - -#make predictions for the original dataset -results=",".join([name,clf.predict(X)[0]]) -results=",".join([results,str(size)]) -#for i in results: - #print(i[0],"\t",i[1]) - -output = page.format(prediction=results) -"""f=open('test.html','w') -f.write(output) -f.close()""" -print (output) - - -<<<<<<< HEAD -quit() -======= -quit() ->>>>>>> f7d7849fcd6ce02a59db8c5fadc29d1962476493 +#!/home/erik/bin/python3.6 + +#import packages to be used +from sklearn.svm import SVC +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.preprocessing import StandardScaler +from sklearn.externals import joblib +import cgi, cgitb + +#----------------------------------------------\ +# Parse the web-form information to variables \ +# \_______________________________________________________ +# | +cgitb.enable() +form=cgi.FieldStorage() +alignment = form.getvalue('fasta') +if alignment.startswith(">"): #naive check for FASTA format + list=alignment.split(">") + book={} + for a in list: + tempList=a.splitlines() + nameLine=tempList.pop(0) + name=nameLine.split(" ")[0] + seq="".join(tempList) + book[name]=seq + seqList=[] + lenList=[] + nameList=[] + for i in book: + nameList.append(i) + seqList.append(book[i]) + lenList.append(str(len(book[i]))) + + if len(seqList)=0: #check for empty sequence list + seqList = ["MPSKKSGPQPHKRWVFTLNNPSEEEKNKIRELPISLFDYFVCGEEGLEEGRTAHLQGFANFAKKQTFNKVKWYFGARCHIEKAKGTDQQNKEYCSKEGHILIECGAPRNQGKRSDLSTAYFDYQQSGPPGMVLLNCCPSCRSSLSEDYYFAILEDCWRTINGGTRRPI"] + nameList=['demo'] + lenList=[str(len(alignment[0]))] + +else: + seqList = ["MPSKKSGPQPHKRWVFTLNNPSEEEKNKIRELPISLFDYFVCGEEGLEEGRTAHLQGFANFAKKQTFNKVKWYFGARCHIEKAKGTDQQNKEYCSKEGHILIECGAPRNQGKRSDLSTAYFDYQQSGPPGMVLLNCCPSCRSSLSEDYYFAILEDCWRTINGGTRRPI"] + nameList=['demo'] + lenList=[str(len(alignment[0]))] + +#--------------------------------------------------------------------------------------------------------+ + +#----------------------------------------------\ +# predict genus of input sequences \ +# \_______________________________________________________ +# | +#list of amino acids as vocabulary for the CountVectorizer +AAs=['a','c','d','e','f','g','h','i','k','l','m','n','p','q','r','s','t','v','w','y'] + +#load the classifier and scaler +clf=joblib.load("./cgi-bin/SVM_linear_aa_clf.pkl") +StSc=joblib.load("./cgi-bin/UniqRepsGemys_6089_StSCALER.pkl") +cv=CountVectorizer(analyzer='char',ngram_range=(1,1),vocabulary=AAs) + +#initialize text data vectorizer +dataVect=cv.transform(seqList) + +#Scale the data to the training set +X=StSc.transform(dataVect.astype("float64")) + +#make predictions for the original dataset +predictions=clf.predict(X) + + +#----------------------------------------------\ +# Build HTML table of results \ +# \_______________________________________________________ +# | +results="""""" +for k in len(seqList): + results+="""{0}{1}{2}""".format(nameList[k],lenList[k],predictions[k]) +if "demo" in nameList: + results+="""

There seems to have been an error.
If you are expecting more than one prediction or + do not see the name you entered please try the submission form again, making sure that the input is in FASTA format.""" + +#----------------------------------------------\ +# Build output page \ +# \_______________________________________________________ +# | +#build output page parts +#Header and CSS Style bits +header=""" + + + + + + +""" + +#Page contents, first part +body1=""" + + +

Welcome to CRESSdna.org

+ +
+ + + + +
+ +
+

Home

+

Part of the National Science Foundation's Assembling the Tree of Life.

+ Sponsored with a Grant from the National Science Foundation +
+ +
+

Taxonomy

+

Please enter only one word as the name(no space) and only one Rep sequence

+

+
+ +
+ + +
+

+

+
+
+

Contact

+

Questions or comments? Send us an email:

+

email At domain Dot something

+
+ +
+

Results

+

Results from Taxonomy prediction

+ + + + + + +""" + +#Page contents, second part (results fit between body1 and body2) +body2=""" +
Sequence NameLengthPrediction
+

This classifier will return the best fit of the submitted sequence to the training data.
+Currently included in the training data:
+

  • Circoviridae
  • + +
  • Nanoviridae
  • + +
  • Genomoviridae
  • + +
  • Geminiviridae
  • + +
  • Smacovirus
  • +

    +

    +
    + + + +""" + +#close the Page +footer=""" + +""" + +#build the output page +page=header+body1+results+body2+footer + +#send the output as html +output = page.format() +print (output) + +quit() \ No newline at end of file -- cgit v1.2.3