aboutsummaryrefslogtreecommitdiffstats
path: root/cgi-bin/classifier.py
diff options
context:
space:
mode:
authorelavington <elavington@hotmail.com>2017-07-27 15:33:17 -0400
committerGitHub <noreply@github.com>2017-07-27 15:33:17 -0400
commit228f8f203eac1b5881d890e266ac10d46bb1b024 (patch)
tree6ee415846c8889e94480676c991581b92fa44de6 /cgi-bin/classifier.py
parent3e5406bbd3e05ac59c1cc954de7bf8187baf9a39 (diff)
downloadcressdna-228f8f203eac1b5881d890e266ac10d46bb1b024.tar.gz
cressdna-228f8f203eac1b5881d890e266ac10d46bb1b024.zip
Add files via uploadbackup
Diffstat (limited to 'cgi-bin/classifier.py')
-rw-r--r--cgi-bin/classifier.py52
1 files changed, 52 insertions, 0 deletions
diff --git a/cgi-bin/classifier.py b/cgi-bin/classifier.py
new file mode 100644
index 0000000..ec2b634
--- /dev/null
+++ b/cgi-bin/classifier.py
@@ -0,0 +1,52 @@
+#!/usr/bin/python
+
+#import packages to be used
+from sklearn.svm import SVC
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.preprocessing import StandardScaler
+from sklearn.externals import joblib
+import cgi, cgitb
+
+cgitb.enable()
+form=cgi.FieldStorage()
+if form.getvalue('fasta'):
+ alignment = form.getvalue('fasta')
+ alignment=[alignment]
+ name=form.getvalue('seqname')
+ size=len(alignment[0])
+else:
+ alignment = ["MPSKKSGPQPHKRWVFTLNNPSEEEKNKIRELPISLFDYFVCGEEGLEEGRTAHLQGFANFAKKQTFNKVKWYFGARCHIEKAKGTDQQNKEYCSKEGHILIECGAPRNQGKRSDLSTAYFDYQQSGPPGMVLLNCCPSCRSSLSEDYYFAILEDCWRTINGGTRRPI"]
+ name='demo'
+ size=len(alignment[0])
+
+html = open("./www.html/CRESSresults.html")
+page=html.read()
+
+
+AAs=['a','c','d','e','f','g','h','i','k','l','m','n','p','q','r','s','t','v','w','y']
+clf=joblib.load("./cgi-bin/SVM_linear_aa_clf.pkl")
+StSc=joblib.load("./cgi-bin/UniqRepsGemys_6089_StSCALER.pkl")
+cv=CountVectorizer(analyzer='char',ngram_range=(1,1),vocabulary=AAs)
+
+
+#initialize text data vectorizer
+
+dataVect=cv.transform(alignment)
+
+#Scale the data to the training set
+X=StSc.transform(dataVect.astype("float64"))
+
+#make predictions for the original dataset
+results=",".join([name,clf.predict(X)[0]])
+results=",".join([results,str(size)])
+#for i in results:
+ #print(i[0],"\t",i[1])
+
+output = page.format(prediction=results)
+"""f=open('test.html','w')
+f.write(output)
+f.close()"""
+print (output)
+
+
+quit() \ No newline at end of file