From 1475085bf6cc046260e570cd53cc590201f66b40 Mon Sep 17 00:00:00 2001 From: elavington Date: Mon, 25 Sep 2017 12:50:03 -0400 Subject: Remove unwanted files --- CRESSdna.html | 156 --------------------------- CRESSresults.html | 51 --------- classifier.py | 313 ------------------------------------------------------ 3 files changed, 520 deletions(-) delete mode 100644 CRESSdna.html delete mode 100644 CRESSresults.html delete mode 100644 classifier.py diff --git a/CRESSdna.html b/CRESSdna.html deleted file mode 100644 index f037c56..0000000 --- a/CRESSdna.html +++ /dev/null @@ -1,156 +0,0 @@ - - - - - - - - -

Welcome to CRESSdna.org

- -
- - - - -
- -
-

Home

-

Part of the National Science Foundation's Assembling the Tree of Life.

- Sponsored with a Grant from the National Science Foundation -
- -
-

Taxonomy

-

Please enter only one word as the name(no space) and only one Rep sequence

-

-
- -
- - -
-

-

-
- - -
-

Contact

-

Questions or comments? Send us an email:

-

email At domain Dot something

-
- -
-

Results

-

Results from Taxonomy prediction

-
- - - - - diff --git a/CRESSresults.html b/CRESSresults.html deleted file mode 100644 index 47c4227..0000000 --- a/CRESSresults.html +++ /dev/null @@ -1,51 +0,0 @@ - - - - - - - - -

Taxonomy Prediction Results

-

Results as Name, predicted Genus, length of sequence:


-

-

This classifier will return the best fit of the submitted sequence to the training data.
-Currently included in the training data:
-

  • Circoviridae
  • - -
  • Nanoviridae
  • - -
  • Genomoviridae
  • - -
  • Geminiviridae
  • - -
  • Smacovirus
  • - - Return to CRESSdna.org -

    - - - - diff --git a/classifier.py b/classifier.py deleted file mode 100644 index ecf7c15..0000000 --- a/classifier.py +++ /dev/null @@ -1,313 +0,0 @@ -#!/home/erik/bin/python3.6 - -#import packages to be used -from sklearn.svm import SVC -from sklearn.feature_extraction.text import CountVectorizer -from sklearn.preprocessing import StandardScaler -from sklearn.externals import joblib -import cgi, cgitb - -#----------------------------------------------\ -# Parse the web-form information to variables \ -# \_______________________________________________________ -# | -cgitb.enable() -form=cgi.FieldStorage() -alignment = form.getvalue('fasta') -if alignment.startswith(">"): #naive check for FASTA format - list=alignment.split(">") - book={} - for a in list: - tempList=a.splitlines() - nameLine=tempList.pop(0) - name=nameLine.split(" ")[0] - seq="".join(tempList) - book[name]=seq - seqList=[] - lenList=[] - nameList=[] - for i in book: - nameList.append(i) - seqList.append(book[i]) - lenList.append(str(len(book[i]))) - - if len(seqList)=0: #check for empty sequence list - seqList = ["MPSKKSGPQPHKRWVFTLNNPSEEEKNKIRELPISLFDYFVCGEEGLEEGRTAHLQGFANFAKKQTFNKVKWYFGARCHIEKAKGTDQQNKEYCSKEGHILIECGAPRNQGKRSDLSTAYFDYQQSGPPGMVLLNCCPSCRSSLSEDYYFAILEDCWRTINGGTRRPI"] - nameList=['demo'] - lenList=[str(len(alignment[0]))] - -else: - seqList = ["MPSKKSGPQPHKRWVFTLNNPSEEEKNKIRELPISLFDYFVCGEEGLEEGRTAHLQGFANFAKKQTFNKVKWYFGARCHIEKAKGTDQQNKEYCSKEGHILIECGAPRNQGKRSDLSTAYFDYQQSGPPGMVLLNCCPSCRSSLSEDYYFAILEDCWRTINGGTRRPI"] - nameList=['demo'] - lenList=[str(len(alignment[0]))] - -#--------------------------------------------------------------------------------------------------------+ - -#----------------------------------------------\ -# predict genus of input sequences \ -# \_______________________________________________________ -# | -#list of amino acids as vocabulary for the CountVectorizer -AAs=['a','c','d','e','f','g','h','i','k','l','m','n','p','q','r','s','t','v','w','y'] - -#load the classifier and scaler -clf=joblib.load("./cgi-bin/SVM_linear_aa_clf.pkl") -StSc=joblib.load("./cgi-bin/UniqRepsGemys_6089_StSCALER.pkl") -cv=CountVectorizer(analyzer='char',ngram_range=(1,1),vocabulary=AAs) - -#initialize text data vectorizer -dataVect=cv.transform(seqList) - -#Scale the data to the training set -X=StSc.transform(dataVect.astype("float64")) - -#make predictions for the original dataset -predictions=clf.predict(X) - - -#----------------------------------------------\ -# Build HTML table of results \ -# \_______________________________________________________ -# | -results="""""" -for k in len(seqList): - results+="""{0}{1}{2}""".format(nameList[k],lenList[k],predictions[k]) -if "demo" in nameList: - results+="""

    There seems to have been an error.
    If you are expecting more than one prediction or - do not see the name you entered please try the submission form again, making sure that the input is in FASTA format.""" - -#----------------------------------------------\ -# Build output page \ -# \_______________________________________________________ -# | -#build output page parts -#Header and CSS Style bits -header=""" - - - - - - -""" - -#Page contents, first part -body1=""" - - -

    Welcome to CRESSdna.org

    - -
    - - - - -
    - -
    -

    Home

    -

    Part of the National Science Foundation's Assembling the Tree of Life.

    - Sponsored with a Grant from the National Science Foundation -
    - -
    -

    Taxonomy

    -

    Please enter only one word as the name(no space) and only one Rep sequence

    -

    -
    - -
    - - -
    -

    -

    -
    -
    -

    Contact

    -

    Questions or comments? Send us an email:

    -

    email At domain Dot something

    -
    - -
    -

    Results

    -

    Results from Taxonomy prediction

    - - - - - - -""" - -#Page contents, second part (results fit between body1 and body2) -body2=""" -
    Sequence NameLengthPrediction
    -

    This classifier will return the best fit of the submitted sequence to the training data.
    -Currently included in the training data:
    -

  • Circoviridae
  • - -
  • Nanoviridae
  • - -
  • Genomoviridae
  • - -
  • Geminiviridae
  • - -
  • Smacovirus
  • -

    -

    -
    - - - -""" - -#close the Page -footer=""" - -""" - -#build the output page -page=header+body1+results+body2+footer - -#send the output as html -output = page.format() -print (output) - -quit() \ No newline at end of file -- cgit v1.2.3