From 47b146536121acc6ac8e3d847be2152500fe3167 Mon Sep 17 00:00:00 2001 From: Brian Cully Date: Fri, 22 Sep 2017 17:12:26 -0400 Subject: Fixup various CGI problems. * Rename cgi-bin to bin, to bypass default cgi-bin alias. * Change assignment to equality check in classifier. * Add .htaccess file to bin dir to allow CGI execution. * Point index.html form to bin. --- SVM_linear_aa_clf.pkl | Bin 187597 -> 0 bytes UniqRepsGemys_6089_StSCALER.pkl | Bin 980 -> 0 bytes bin/.htaccess | 2 + bin/SVM_linear_aa_clf.pkl | Bin 0 -> 187597 bytes bin/UniqRepsGemys_6089_StSCALER.pkl | Bin 0 -> 980 bytes bin/classifier.py | 313 ++++++++++++++++++++++++++++++++ cgi-bin/SVM_linear_aa_clf.pkl | Bin 187597 -> 0 bytes cgi-bin/UniqRepsGemys_6089_StSCALER.pkl | Bin 980 -> 0 bytes cgi-bin/classifier.py | 313 -------------------------------- index.html | 2 +- 10 files changed, 316 insertions(+), 314 deletions(-) delete mode 100644 SVM_linear_aa_clf.pkl delete mode 100644 UniqRepsGemys_6089_StSCALER.pkl create mode 100644 bin/.htaccess create mode 100644 bin/SVM_linear_aa_clf.pkl create mode 100644 bin/UniqRepsGemys_6089_StSCALER.pkl create mode 100755 bin/classifier.py delete mode 100644 cgi-bin/SVM_linear_aa_clf.pkl delete mode 100644 cgi-bin/UniqRepsGemys_6089_StSCALER.pkl delete mode 100755 cgi-bin/classifier.py diff --git a/SVM_linear_aa_clf.pkl b/SVM_linear_aa_clf.pkl deleted file mode 100644 index 1afce0a..0000000 Binary files a/SVM_linear_aa_clf.pkl and /dev/null differ diff --git a/UniqRepsGemys_6089_StSCALER.pkl b/UniqRepsGemys_6089_StSCALER.pkl deleted file mode 100644 index 3a098bd..0000000 Binary files a/UniqRepsGemys_6089_StSCALER.pkl and /dev/null differ diff --git a/bin/.htaccess b/bin/.htaccess new file mode 100644 index 0000000..698afb4 --- /dev/null +++ b/bin/.htaccess @@ -0,0 +1,2 @@ +Options +ExecCGI +SetHandler cgi-script diff --git a/bin/SVM_linear_aa_clf.pkl b/bin/SVM_linear_aa_clf.pkl new file mode 100644 index 0000000..1afce0a Binary files /dev/null and b/bin/SVM_linear_aa_clf.pkl differ diff --git a/bin/UniqRepsGemys_6089_StSCALER.pkl b/bin/UniqRepsGemys_6089_StSCALER.pkl new file mode 100644 index 0000000..3a098bd Binary files /dev/null and b/bin/UniqRepsGemys_6089_StSCALER.pkl differ diff --git a/bin/classifier.py b/bin/classifier.py new file mode 100755 index 0000000..0ae13b5 --- /dev/null +++ b/bin/classifier.py @@ -0,0 +1,313 @@ +#!/home/erik/bin/python3.6 + +#import packages to be used +from sklearn.svm import SVC +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.preprocessing import StandardScaler +from sklearn.externals import joblib +import cgi, cgitb + +#----------------------------------------------\ +# Parse the web-form information to variables \ +# \_______________________________________________________ +# | +cgitb.enable() +form=cgi.FieldStorage() +alignment = form.getvalue('fasta') +if alignment.startswith(">"): #naive check for FASTA format + list=alignment.split(">") + book={} + for a in list: + tempList=a.splitlines() + nameLine=tempList.pop(0) + name=nameLine.split(" ")[0] + seq="".join(tempList) + book[name]=seq + seqList=[] + lenList=[] + nameList=[] + for i in book: + nameList.append(i) + seqList.append(book[i]) + lenList.append(str(len(book[i]))) + + if len(seqList)==0: #check for empty sequence list + seqList = ["MPSKKSGPQPHKRWVFTLNNPSEEEKNKIRELPISLFDYFVCGEEGLEEGRTAHLQGFANFAKKQTFNKVKWYFGARCHIEKAKGTDQQNKEYCSKEGHILIECGAPRNQGKRSDLSTAYFDYQQSGPPGMVLLNCCPSCRSSLSEDYYFAILEDCWRTINGGTRRPI"] + nameList=['demo'] + lenList=[str(len(alignment[0]))] + +else: + seqList = ["MPSKKSGPQPHKRWVFTLNNPSEEEKNKIRELPISLFDYFVCGEEGLEEGRTAHLQGFANFAKKQTFNKVKWYFGARCHIEKAKGTDQQNKEYCSKEGHILIECGAPRNQGKRSDLSTAYFDYQQSGPPGMVLLNCCPSCRSSLSEDYYFAILEDCWRTINGGTRRPI"] + nameList=['demo'] + lenList=[str(len(alignment[0]))] + +#--------------------------------------------------------------------------------------------------------+ + +#----------------------------------------------\ +# predict genus of input sequences \ +# \_______________________________________________________ +# | +#list of amino acids as vocabulary for the CountVectorizer +AAs=['a','c','d','e','f','g','h','i','k','l','m','n','p','q','r','s','t','v','w','y'] + +#load the classifier and scaler +clf=joblib.load("./cgi-bin/SVM_linear_aa_clf.pkl") +StSc=joblib.load("./cgi-bin/UniqRepsGemys_6089_StSCALER.pkl") +cv=CountVectorizer(analyzer='char',ngram_range=(1,1),vocabulary=AAs) + +#initialize text data vectorizer +dataVect=cv.transform(seqList) + +#Scale the data to the training set +X=StSc.transform(dataVect.astype("float64")) + +#make predictions for the original dataset +predictions=clf.predict(X) + + +#----------------------------------------------\ +# Build HTML table of results \ +# \_______________________________________________________ +# | +results="""""" +for k in len(seqList): + results+="""{0}{1}{2}""".format(nameList[k],lenList[k],predictions[k]) +if "demo" in nameList: + results+="""

There seems to have been an error.
If you are expecting more than one prediction or + do not see the name you entered please try the submission form again, making sure that the input is in FASTA format.""" + +#----------------------------------------------\ +# Build output page \ +# \_______________________________________________________ +# | +#build output page parts +#Header and CSS Style bits +header=""" + + + + + + +""" + +#Page contents, first part +body1=""" + + +

Welcome to CRESSdna.org

+ +
+ + + + +
+ +
+

Home

+

Part of the National Science Foundation's Assembling the Tree of Life.

+ Sponsored with a Grant from the National Science Foundation +
+ +
+

Taxonomy

+

Please enter only one word as the name(no space) and only one Rep sequence

+

+
+ +
+ + +
+

+

+
+
+

Contact

+

Questions or comments? Send us an email:

+

email At domain Dot something

+
+ +
+

Results

+

Results from Taxonomy prediction

+ + + + + + +""" + +#Page contents, second part (results fit between body1 and body2) +body2=""" +
Sequence NameLengthPrediction
+

This classifier will return the best fit of the submitted sequence to the training data.
+Currently included in the training data:
+

  • Circoviridae
  • + +
  • Nanoviridae
  • + +
  • Genomoviridae
  • + +
  • Geminiviridae
  • + +
  • Smacovirus
  • +

    +

    +
    + + + +""" + +#close the Page +footer=""" + +""" + +#build the output page +page=header+body1+results+body2+footer + +#send the output as html +output = page.format() +print (output) + +quit() diff --git a/cgi-bin/SVM_linear_aa_clf.pkl b/cgi-bin/SVM_linear_aa_clf.pkl deleted file mode 100644 index 1afce0a..0000000 Binary files a/cgi-bin/SVM_linear_aa_clf.pkl and /dev/null differ diff --git a/cgi-bin/UniqRepsGemys_6089_StSCALER.pkl b/cgi-bin/UniqRepsGemys_6089_StSCALER.pkl deleted file mode 100644 index 3a098bd..0000000 Binary files a/cgi-bin/UniqRepsGemys_6089_StSCALER.pkl and /dev/null differ diff --git a/cgi-bin/classifier.py b/cgi-bin/classifier.py deleted file mode 100755 index ecf7c15..0000000 --- a/cgi-bin/classifier.py +++ /dev/null @@ -1,313 +0,0 @@ -#!/home/erik/bin/python3.6 - -#import packages to be used -from sklearn.svm import SVC -from sklearn.feature_extraction.text import CountVectorizer -from sklearn.preprocessing import StandardScaler -from sklearn.externals import joblib -import cgi, cgitb - -#----------------------------------------------\ -# Parse the web-form information to variables \ -# \_______________________________________________________ -# | -cgitb.enable() -form=cgi.FieldStorage() -alignment = form.getvalue('fasta') -if alignment.startswith(">"): #naive check for FASTA format - list=alignment.split(">") - book={} - for a in list: - tempList=a.splitlines() - nameLine=tempList.pop(0) - name=nameLine.split(" ")[0] - seq="".join(tempList) - book[name]=seq - seqList=[] - lenList=[] - nameList=[] - for i in book: - nameList.append(i) - seqList.append(book[i]) - lenList.append(str(len(book[i]))) - - if len(seqList)=0: #check for empty sequence list - seqList = ["MPSKKSGPQPHKRWVFTLNNPSEEEKNKIRELPISLFDYFVCGEEGLEEGRTAHLQGFANFAKKQTFNKVKWYFGARCHIEKAKGTDQQNKEYCSKEGHILIECGAPRNQGKRSDLSTAYFDYQQSGPPGMVLLNCCPSCRSSLSEDYYFAILEDCWRTINGGTRRPI"] - nameList=['demo'] - lenList=[str(len(alignment[0]))] - -else: - seqList = ["MPSKKSGPQPHKRWVFTLNNPSEEEKNKIRELPISLFDYFVCGEEGLEEGRTAHLQGFANFAKKQTFNKVKWYFGARCHIEKAKGTDQQNKEYCSKEGHILIECGAPRNQGKRSDLSTAYFDYQQSGPPGMVLLNCCPSCRSSLSEDYYFAILEDCWRTINGGTRRPI"] - nameList=['demo'] - lenList=[str(len(alignment[0]))] - -#--------------------------------------------------------------------------------------------------------+ - -#----------------------------------------------\ -# predict genus of input sequences \ -# \_______________________________________________________ -# | -#list of amino acids as vocabulary for the CountVectorizer -AAs=['a','c','d','e','f','g','h','i','k','l','m','n','p','q','r','s','t','v','w','y'] - -#load the classifier and scaler -clf=joblib.load("./cgi-bin/SVM_linear_aa_clf.pkl") -StSc=joblib.load("./cgi-bin/UniqRepsGemys_6089_StSCALER.pkl") -cv=CountVectorizer(analyzer='char',ngram_range=(1,1),vocabulary=AAs) - -#initialize text data vectorizer -dataVect=cv.transform(seqList) - -#Scale the data to the training set -X=StSc.transform(dataVect.astype("float64")) - -#make predictions for the original dataset -predictions=clf.predict(X) - - -#----------------------------------------------\ -# Build HTML table of results \ -# \_______________________________________________________ -# | -results="""""" -for k in len(seqList): - results+="""{0}{1}{2}""".format(nameList[k],lenList[k],predictions[k]) -if "demo" in nameList: - results+="""

    There seems to have been an error.
    If you are expecting more than one prediction or - do not see the name you entered please try the submission form again, making sure that the input is in FASTA format.""" - -#----------------------------------------------\ -# Build output page \ -# \_______________________________________________________ -# | -#build output page parts -#Header and CSS Style bits -header=""" - - - - - - -""" - -#Page contents, first part -body1=""" - - -

    Welcome to CRESSdna.org

    - -
    - - - - -
    - -
    -

    Home

    -

    Part of the National Science Foundation's Assembling the Tree of Life.

    - Sponsored with a Grant from the National Science Foundation -
    - -
    -

    Taxonomy

    -

    Please enter only one word as the name(no space) and only one Rep sequence

    -

    -
    - -
    - - -
    -

    -

    -
    -
    -

    Contact

    -

    Questions or comments? Send us an email:

    -

    email At domain Dot something

    -
    - -
    -

    Results

    -

    Results from Taxonomy prediction

    - - - - - - -""" - -#Page contents, second part (results fit between body1 and body2) -body2=""" -
    Sequence NameLengthPrediction
    -

    This classifier will return the best fit of the submitted sequence to the training data.
    -Currently included in the training data:
    -

  • Circoviridae
  • - -
  • Nanoviridae
  • - -
  • Genomoviridae
  • - -
  • Geminiviridae
  • - -
  • Smacovirus
  • -

    -

    -
    - - - -""" - -#close the Page -footer=""" - -""" - -#build the output page -page=header+body1+results+body2+footer - -#send the output as html -output = page.format() -print (output) - -quit() \ No newline at end of file diff --git a/index.html b/index.html index b90789a..6fbf4a8 100644 --- a/index.html +++ b/index.html @@ -64,7 +64,7 @@ div.tab button.active {

    Taxonomy

    -

    +
    -- cgit v1.2.3