From 228f8f203eac1b5881d890e266ac10d46bb1b024 Mon Sep 17 00:00:00 2001
From: elavington
Date: Thu, 27 Jul 2017 15:33:17 -0400
Subject: Add files via upload
---
CRESSdna.html | 156 ++++++++++++++++++++++++++++++++
CRESSresults.html | 51 +++++++++++
cgi-bin/SVM_linear_aa_clf.pkl | Bin 0 -> 187597 bytes
cgi-bin/UniqRepsGemys_6089_StSCALER.pkl | Bin 0 -> 980 bytes
cgi-bin/classifier.py | 52 +++++++++++
5 files changed, 259 insertions(+)
create mode 100644 CRESSdna.html
create mode 100644 CRESSresults.html
create mode 100644 cgi-bin/SVM_linear_aa_clf.pkl
create mode 100644 cgi-bin/UniqRepsGemys_6089_StSCALER.pkl
create mode 100644 cgi-bin/classifier.py
diff --git a/CRESSdna.html b/CRESSdna.html
new file mode 100644
index 0000000..a97d1c1
--- /dev/null
+++ b/CRESSdna.html
@@ -0,0 +1,156 @@
+
+
+
+
+
+
+
+
+Welcome to CRESSdna.org
+
+
+
+
+
+
+
+
+
+
+
+
Taxonomy
+
Please enter only one word as the name(no space) and only one Rep sequence
+
+
+
+
+
+
+
+
Contact
+
Questions or comments? Send us an email:
+
email At domain Dot something
+
+
+
+
Results
+
Results from Taxonomy prediction
+
+
+
+
+
+
diff --git a/CRESSresults.html b/CRESSresults.html
new file mode 100644
index 0000000..8a78bcd
--- /dev/null
+++ b/CRESSresults.html
@@ -0,0 +1,51 @@
+
+
+
+
+
+
+
+
+Taxonomy Prediction Results
+ Results as Name, predicted Genus, length of sequence:
+
+ This classifier will return the best fit of the submitted sequence to the training data.
+Currently included in the training data:
+
Circoviridae
+
+ - Circovirus
+ - Cyclovirus
+
+ Nanoviridae
+
+ - Babuvirus
+ - Nanovirus
+
+ Genomoviridae
+
+ - Gemycircularvirus
+ - Gemygorvirus
+ - Gemykibivirus
+ - Gemykolovirus
+ - Gemykrogvirus
+ - Gemyvongvirus
+
+ Geminiviridae
+
+ - Becurtovirus
+ - Begomovirus
+ - Capulavirus
+ - Curtovirus
+ - Eragrovirus
+ - Grablovirus
+ - Mastrevirus
+ - Turncurtovirus
+
+ Smacovirus
+
+ Return to CRESSdna.org
+
+
+
+
+
diff --git a/cgi-bin/SVM_linear_aa_clf.pkl b/cgi-bin/SVM_linear_aa_clf.pkl
new file mode 100644
index 0000000..1afce0a
Binary files /dev/null and b/cgi-bin/SVM_linear_aa_clf.pkl differ
diff --git a/cgi-bin/UniqRepsGemys_6089_StSCALER.pkl b/cgi-bin/UniqRepsGemys_6089_StSCALER.pkl
new file mode 100644
index 0000000..3a098bd
Binary files /dev/null and b/cgi-bin/UniqRepsGemys_6089_StSCALER.pkl differ
diff --git a/cgi-bin/classifier.py b/cgi-bin/classifier.py
new file mode 100644
index 0000000..ec2b634
--- /dev/null
+++ b/cgi-bin/classifier.py
@@ -0,0 +1,52 @@
+#!/usr/bin/python
+
+#import packages to be used
+from sklearn.svm import SVC
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.preprocessing import StandardScaler
+from sklearn.externals import joblib
+import cgi, cgitb
+
+cgitb.enable()
+form=cgi.FieldStorage()
+if form.getvalue('fasta'):
+ alignment = form.getvalue('fasta')
+ alignment=[alignment]
+ name=form.getvalue('seqname')
+ size=len(alignment[0])
+else:
+ alignment = ["MPSKKSGPQPHKRWVFTLNNPSEEEKNKIRELPISLFDYFVCGEEGLEEGRTAHLQGFANFAKKQTFNKVKWYFGARCHIEKAKGTDQQNKEYCSKEGHILIECGAPRNQGKRSDLSTAYFDYQQSGPPGMVLLNCCPSCRSSLSEDYYFAILEDCWRTINGGTRRPI"]
+ name='demo'
+ size=len(alignment[0])
+
+html = open("./www.html/CRESSresults.html")
+page=html.read()
+
+
+AAs=['a','c','d','e','f','g','h','i','k','l','m','n','p','q','r','s','t','v','w','y']
+clf=joblib.load("./cgi-bin/SVM_linear_aa_clf.pkl")
+StSc=joblib.load("./cgi-bin/UniqRepsGemys_6089_StSCALER.pkl")
+cv=CountVectorizer(analyzer='char',ngram_range=(1,1),vocabulary=AAs)
+
+
+#initialize text data vectorizer
+
+dataVect=cv.transform(alignment)
+
+#Scale the data to the training set
+X=StSc.transform(dataVect.astype("float64"))
+
+#make predictions for the original dataset
+results=",".join([name,clf.predict(X)[0]])
+results=",".join([results,str(size)])
+#for i in results:
+ #print(i[0],"\t",i[1])
+
+output = page.format(prediction=results)
+"""f=open('test.html','w')
+f.write(output)
+f.close()"""
+print (output)
+
+
+quit()
\ No newline at end of file
--
cgit v1.2.3
From e73bbde58c22e50cb77b2e8b0eb2193dc55fd8fa Mon Sep 17 00:00:00 2001
From: elavington <27739361+elavington@users.noreply.github.com>
Date: Mon, 7 Aug 2017 09:35:07 -0400
Subject: Rename index.html to index_placeholder.html
---
index.html | 12 ------------
index_placeholder.html | 12 ++++++++++++
2 files changed, 12 insertions(+), 12 deletions(-)
delete mode 100644 index.html
create mode 100644 index_placeholder.html
diff --git a/index.html b/index.html
deleted file mode 100644
index 2957612..0000000
--- a/index.html
+++ /dev/null
@@ -1,12 +0,0 @@
-
-
-
- CRESSDNA
-
-
-
- Circular Rep-Encoding Single Stranded DNA Viruses
- Part of the National Science Foundation's Assembling the Tree of Life.
-
-
-
diff --git a/index_placeholder.html b/index_placeholder.html
new file mode 100644
index 0000000..2957612
--- /dev/null
+++ b/index_placeholder.html
@@ -0,0 +1,12 @@
+
+
+
+ CRESSDNA
+
+
+
+ Circular Rep-Encoding Single Stranded DNA Viruses
+ Part of the National Science Foundation's Assembling the Tree of Life.
+
+
+
--
cgit v1.2.3
From 44b05977b9cdd1d7c66f52cb81f988177d4ec391 Mon Sep 17 00:00:00 2001
From: elavington <27739361+elavington@users.noreply.github.com>
Date: Mon, 7 Aug 2017 09:36:08 -0400
Subject: Rename CRESSdna.html to index.html
---
CRESSdna.html | 156 ----------------------------------------------------------
index.html | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 156 insertions(+), 156 deletions(-)
delete mode 100644 CRESSdna.html
create mode 100644 index.html
diff --git a/CRESSdna.html b/CRESSdna.html
deleted file mode 100644
index a97d1c1..0000000
--- a/CRESSdna.html
+++ /dev/null
@@ -1,156 +0,0 @@
-
-
-
-
-
-
-
-
-Welcome to CRESSdna.org
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Contact
-
Questions or comments? Send us an email:
-
email At domain Dot something
-
-
-
-
Results
-
Results from Taxonomy prediction
-
-
-
-
-
-
diff --git a/index.html b/index.html
new file mode 100644
index 0000000..a97d1c1
--- /dev/null
+++ b/index.html
@@ -0,0 +1,156 @@
+
+
+
+
+
+
+
+
+Welcome to CRESSdna.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Contact
+
Questions or comments? Send us an email:
+
email At domain Dot something
+
+
+
+
Results
+
Results from Taxonomy prediction
+
+
+
+
+
+
--
cgit v1.2.3
From ced7b0fadf5f113e6e040a1e97e7199a2cc65e05 Mon Sep 17 00:00:00 2001
From: elavington <27739361+elavington@users.noreply.github.com>
Date: Mon, 7 Aug 2017 09:36:41 -0400
Subject: Update CRESSresults.html
---
CRESSresults.html | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/CRESSresults.html b/CRESSresults.html
index 8a78bcd..09c552a 100644
--- a/CRESSresults.html
+++ b/CRESSresults.html
@@ -43,7 +43,7 @@ Currently included in the training data:
Smacovirus
- Return to CRESSdna.org
+ Return to CRESSdna.org
- Circular Rep-Encoding Single Stranded DNA Viruses
- Part of the National Science Foundation's Assembling the Tree of Life.
-
-
--
cgit v1.2.3
From f1528dc908f0ebc97f2d3864db57ee8417cfccb4 Mon Sep 17 00:00:00 2001
From: elavington <27739361+elavington@users.noreply.github.com>
Date: Mon, 7 Aug 2017 09:38:22 -0400
Subject: Update classifier.py
---
cgi-bin/classifier.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/cgi-bin/classifier.py b/cgi-bin/classifier.py
index ec2b634..8f8450b 100644
--- a/cgi-bin/classifier.py
+++ b/cgi-bin/classifier.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/home/erik/bin/python3.6
#import packages to be used
from sklearn.svm import SVC
@@ -49,4 +49,4 @@ f.close()"""
print (output)
-quit()
\ No newline at end of file
+quit()
--
cgit v1.2.3
From f7d7849fcd6ce02a59db8c5fadc29d1962476493 Mon Sep 17 00:00:00 2001
From: elavington <27739361+elavington@users.noreply.github.com>
Date: Wed, 9 Aug 2017 14:34:42 -0400
Subject: Delete index_placeholder.html
could not create file on server via pull request
---
index_placeholder.html | 12 ------------
1 file changed, 12 deletions(-)
delete mode 100644 index_placeholder.html
diff --git a/index_placeholder.html b/index_placeholder.html
deleted file mode 100644
index 2957612..0000000
--- a/index_placeholder.html
+++ /dev/null
@@ -1,12 +0,0 @@
-
-
-