From 47b146536121acc6ac8e3d847be2152500fe3167 Mon Sep 17 00:00:00 2001 From: Brian Cully Date: Fri, 22 Sep 2017 17:12:26 -0400 Subject: Fixup various CGI problems. * Rename cgi-bin to bin, to bypass default cgi-bin alias. * Change assignment to equality check in classifier. * Add .htaccess file to bin dir to allow CGI execution. * Point index.html form to bin. --- cgi-bin/SVM_linear_aa_clf.pkl | Bin 187597 -> 0 bytes cgi-bin/UniqRepsGemys_6089_StSCALER.pkl | Bin 980 -> 0 bytes cgi-bin/classifier.py | 313 -------------------------------- 3 files changed, 313 deletions(-) delete mode 100644 cgi-bin/SVM_linear_aa_clf.pkl delete mode 100644 cgi-bin/UniqRepsGemys_6089_StSCALER.pkl delete mode 100755 cgi-bin/classifier.py (limited to 'cgi-bin') diff --git a/cgi-bin/SVM_linear_aa_clf.pkl b/cgi-bin/SVM_linear_aa_clf.pkl deleted file mode 100644 index 1afce0a..0000000 Binary files a/cgi-bin/SVM_linear_aa_clf.pkl and /dev/null differ diff --git a/cgi-bin/UniqRepsGemys_6089_StSCALER.pkl b/cgi-bin/UniqRepsGemys_6089_StSCALER.pkl deleted file mode 100644 index 3a098bd..0000000 Binary files a/cgi-bin/UniqRepsGemys_6089_StSCALER.pkl and /dev/null differ diff --git a/cgi-bin/classifier.py b/cgi-bin/classifier.py deleted file mode 100755 index ecf7c15..0000000 --- a/cgi-bin/classifier.py +++ /dev/null @@ -1,313 +0,0 @@ -#!/home/erik/bin/python3.6 - -#import packages to be used -from sklearn.svm import SVC -from sklearn.feature_extraction.text import CountVectorizer -from sklearn.preprocessing import StandardScaler -from sklearn.externals import joblib -import cgi, cgitb - -#----------------------------------------------\ -# Parse the web-form information to variables \ -# \_______________________________________________________ -# | -cgitb.enable() -form=cgi.FieldStorage() -alignment = form.getvalue('fasta') -if alignment.startswith(">"): #naive check for FASTA format - list=alignment.split(">") - book={} - for a in list: - tempList=a.splitlines() - nameLine=tempList.pop(0) - name=nameLine.split(" ")[0] - seq="".join(tempList) - book[name]=seq - seqList=[] - lenList=[] - nameList=[] - for i in book: - nameList.append(i) - seqList.append(book[i]) - lenList.append(str(len(book[i]))) - - if len(seqList)=0: #check for empty sequence list - seqList = ["MPSKKSGPQPHKRWVFTLNNPSEEEKNKIRELPISLFDYFVCGEEGLEEGRTAHLQGFANFAKKQTFNKVKWYFGARCHIEKAKGTDQQNKEYCSKEGHILIECGAPRNQGKRSDLSTAYFDYQQSGPPGMVLLNCCPSCRSSLSEDYYFAILEDCWRTINGGTRRPI"] - nameList=['demo'] - lenList=[str(len(alignment[0]))] - -else: - seqList = ["MPSKKSGPQPHKRWVFTLNNPSEEEKNKIRELPISLFDYFVCGEEGLEEGRTAHLQGFANFAKKQTFNKVKWYFGARCHIEKAKGTDQQNKEYCSKEGHILIECGAPRNQGKRSDLSTAYFDYQQSGPPGMVLLNCCPSCRSSLSEDYYFAILEDCWRTINGGTRRPI"] - nameList=['demo'] - lenList=[str(len(alignment[0]))] - -#--------------------------------------------------------------------------------------------------------+ - -#----------------------------------------------\ -# predict genus of input sequences \ -# \_______________________________________________________ -# | -#list of amino acids as vocabulary for the CountVectorizer -AAs=['a','c','d','e','f','g','h','i','k','l','m','n','p','q','r','s','t','v','w','y'] - -#load the classifier and scaler -clf=joblib.load("./cgi-bin/SVM_linear_aa_clf.pkl") -StSc=joblib.load("./cgi-bin/UniqRepsGemys_6089_StSCALER.pkl") -cv=CountVectorizer(analyzer='char',ngram_range=(1,1),vocabulary=AAs) - -#initialize text data vectorizer -dataVect=cv.transform(seqList) - -#Scale the data to the training set -X=StSc.transform(dataVect.astype("float64")) - -#make predictions for the original dataset -predictions=clf.predict(X) - - -#----------------------------------------------\ -# Build HTML table of results \ -# \_______________________________________________________ -# | -results="""""" -for k in len(seqList): - results+="""{0}{1}{2}""".format(nameList[k],lenList[k],predictions[k]) -if "demo" in nameList: - results+="""

There seems to have been an error.
If you are expecting more than one prediction or - do not see the name you entered please try the submission form again, making sure that the input is in FASTA format.""" - -#----------------------------------------------\ -# Build output page \ -# \_______________________________________________________ -# | -#build output page parts -#Header and CSS Style bits -header=""" - - - - - - -""" - -#Page contents, first part -body1=""" - - -

Welcome to CRESSdna.org

- -
- - - - -
- -
-

Home

-

Part of the National Science Foundation's Assembling the Tree of Life.

- Sponsored with a Grant from the National Science Foundation -
- -
-

Taxonomy

-

Please enter only one word as the name(no space) and only one Rep sequence

-

-
- -
- - -
-

-

-
-
-

Contact

-

Questions or comments? Send us an email:

-

email At domain Dot something

-
- -
-

Results

-

Results from Taxonomy prediction

- - - - - - -""" - -#Page contents, second part (results fit between body1 and body2) -body2=""" -
Sequence NameLengthPrediction
-

This classifier will return the best fit of the submitted sequence to the training data.
-Currently included in the training data:
-

  • Circoviridae
  • - -
  • Nanoviridae
  • - -
  • Genomoviridae
  • - -
  • Geminiviridae
  • - -
  • Smacovirus
  • -

    -

    -
    - - - -""" - -#close the Page -footer=""" - -""" - -#build the output page -page=header+body1+results+body2+footer - -#send the output as html -output = page.format() -print (output) - -quit() \ No newline at end of file -- cgit v1.2.3