#!/usr/bin/env python # -*- coding: utf-8 -*- #A program to parse DrReposER webserver import sys, os, re, requests, ast, optparse """ Description of DrReposER webserver: The webserver consists of 3 interfaces; interface1 = "Search for amino acid arrangements similar to known drug binding sites in known target structures / protein-drug complexes" interface2 = "Search for drug binding interfaces in protein-drug complexes" interface3 = "Search a protein structure for amino acid residue arrangements similar to a known drug binding site " -------------------------------------------------------- FIRST_INTERFACE (Similar patterns of amino acids) -------------------------------------------------------- 1) Search by PDB ID ('PDB_ID'): General view page--> http://27.126.156.175/drreposed/get_by_assam.php?query_id=PDB_ID&query_assam=1eqc&submit=Submit Results in tabulated form--> http://27.126.156.175/drreposed/drreposer_man_assam.php?myresult=PDB_ID,1eqc 2) Search by Ligand ID ('HETATM_RESIDUES'): General view page--> http://27.126.156.175/drreposed/get_by_assam.php?query_id=HETATM_RESIDUES&query_assam=CTS&submit=Submit Results in tabulated form--> http://27.126.156.175/drreposed/drreposer_man_assam.php?myresult=HETATM_RESIDUES,CTS -------------------------------------------------------- SECOND_INTERFACE (Known binding sites) -------------------------------------------------------- 1) Search by PDB ID ('pdb'): General view page--> http://27.126.156.175/drreposed/get_by_pdbid.php?pdbid=1mxd&submit=Submit Results in tabulated form--> http://27.126.156.175/drreposed/drreposer_man_pdb.php?myresult=1mxd 2) Search by Ligand ID ('ligand'): General view page--> http://27.126.156.175/drreposed/get_by_ligandid.php?ligandid=ACR&submit=Submit Results in tabulated form--> http://27.126.156.175/drreposed/drreposer_man_ligand.php?myresult=ACR 3) Search by Drug ('drug'): General view page--> http://27.126.156.175/drreposed/get_by_drugbankid.php?drugbankid=acarbose&submit=Submit Results in tabulated form--> http://27.126.156.175/drreposed/drreposer_man_drug.php?myresult=ACARBOSE 4) Search by Keyword('keywords'): a) Drug Indication (Results in tabulated form): http://27.126.156.175/drreposed/drreposer_man_keywords.php?myresult=query_indication,diabetes b) Source Organism (Results in tabulated form): http://27.126.156.175/drreposed/drreposer_man_keywords.php?myresult=query_organism,Aspergillus%20awamori c) Macromolecule name (Results in tabulated form): http://27.126.156.175/drreposed/drreposer_man_keywords.php?myresult=query_macromoleculename,GLUCOAMYLASE-471 d) Pfam annotation (Results in tabulated form): http://27.126.156.175/drreposed/drreposer_man_keywords.php?myresult=query_pfamannotation,7tm_1 -------------------------------------------------------- THIRD_INTERFACE (Search for potential motifs in a protein structure) -------------------------------------------------------- 1) Upload a PDB file 2) Or enter a 4-letter PDB ID """ def get_html_text(urll): r = requests.get(urll) return r.text.encode('utf-8') def get_results_list(urll): try: r_text = get_html_text(urll) r_text_post = re.findall(r"",r_text) export_data_value = r_text_post[0].split(">")[1].split("<")[0].replace('null','"null"') export_data_value = ast.literal_eval(export_data_value) return export_data_value except: return "Error! Either no matches found or could not generate results. " def FIRST_INTERFACE(args): #e.g. 'PDB_ID,1eqc' or 'HETATM_RESIDUES,CTS' if len(args.split(","))==2: query_id=args.split(",")[0] query_term=args.split(",")[1] if query_id =="PDB_ID" or query_id =="HETATM_RESIDUES": urll="http://27.126.156.175/drreposed/drreposer_man_assam.php?myresult={},{}".format(query_id,query_term) return get_results_list(urll) else: return "Error! First argument must be 'PDB_ID' or HETATM_RESIDUES." else: return "Error! First interface requires two arguments joined by comma. e.g. 'PDB_ID,1eqc' or 'HETATM_RESIDUES,cts'\nSearch for PDB ID: 'PDB_ID,$query'\nSearch for Ligand ID: 'HETATM_RESIDUES,$query'" def SECOND_INTERFACE(args): #e.g. 'pdbid,1mxd' or 'ligandid,ACR' or 'drug,acarbose' or 'keywords,drugindication,diabetes' if len(args.split(","))==2: query_id=args.split(",")[0] query_term=args.split(",")[1] #if query_id != "keywords": if query_id =="pdb" or query_id =="ligand" or query_id =="drug": urll="http://27.126.156.175/drreposed/drreposer_man_{}.php?myresult={}".format(query_id,query_term) return get_results_list(urll) if query_id =="keywords": return "Search by keywords requires three arguments joined by comma. e.g. 'keywords,indication,diabetes'" else: return "First argument must be 'pdb', 'ligand', 'drug' or 'keywords'." if len(args.split(","))==3: query_id=args.split(",")[0] #keywords if query_id=="keywords": query_category=args.split(",")[1] query_term=args.split(",")[2] urll="http://27.126.156.175/drreposed/drreposer_man_keywords.php?myresult=query_{},{}".format(query_category,query_term) return get_results_list(urll) else: return "First argument for 'Search by keywords' must be 'keywords'" else: return "error! Second interface requires two or three arguments joined by comma. e.g. pdbid,1mxd' or 'ligand,ACR' or 'drug,acarbose' or 'keywords,indication,diabetes'\n" \ "Search for PDB ID: 'pdb,$query'\nSearch for Ligand ID: 'ligand,$query'\nSearch for Drug: 'drug,$query'\nSearch for Keywords:\n" \ " Drug indication-->'keywords,indication,$query'\n Organism-->'keywords,organism,$query'\n Macromolecule-->'keywords,macromoleculename,$query'\n Pfam annotation-->'keywords,pfamannotation,$query'" def THIRD_INTERFACE(args): if len(args)==4: urll = "http://27.126.156.175/drreposed/sprite_res/execute_sprite.php" response=requests.post(urll,{'get':args}, allow_redirects=True) rposttext=response.text.encode('utf-8') if len(rposttext)>300: rpost_link=re.findall(r"http://27.126.156.175/drreposed/sprite_res/result_sprite.php[?]id[=].*",rposttext)[0].split("<")[0] rpost_result_file=re.findall(r"http://27.126.156.175/drreposed/sprite_res/result/.*/results.txt",rposttext)[0] return rpost_link, rpost_result_file else: return rposttext else: return "Error! PDB ID must be a four-letter code." if __name__ == '__main__': # 13- PARSE COMMAND LINE ARGUMENTS usage = "--USAGE: python template_drreposer.py -i interface (first,second,third) -a args -o output file (optional)\n\n" \ "1) First interface requires two arguments joined by comma. e.g. 'PDB_ID,1eqc' or 'HETATM_RESIDUES,cts'\nSearch for PDB ID: 'PDB_ID,$query' || Search for Ligand ID: 'HETATM_RESIDUES,$query'\n\n\n" \ "2) Second interface requires two or three arguments joined by comma. e.g. pdbid,1mxd' or 'ligand,ACR' or 'drug,acarbose' or 'keywords,indication,diabetes'\n" \ "Search for PDB ID: 'pdb,$query'\nSearch for Ligand ID: 'ligand,$query'\nSearch for Drug: 'drug,$query'\nSearch for Keywords:\n" \ " Drug indication-->'keywords,indication,$query'\n Organism-->'keywords,organism,$query'\n Macromolecule-->'keywords,macromoleculename,$query'\n Pfam annotation-->'keywords,pfamannotation,$query'\n\n\n" \ "3) Third interface requires a PDB ID in four-letter code. e.g. '11gs'" parser = optparse.OptionParser(usage=usage) parser.add_option("-i", "--interface", dest="interface", type=str, help="Interface") parser.add_option("-a", "--args", dest="args", type=str, help="Arguments") parser.add_option("-o", "--outputfile", dest="output", type=str, help="Output text file") (options, args) = parser.parse_args() if (options.interface and options.args): if (options.output): output = open(options.output,"wb") if options.interface=="first": results=FIRST_INTERFACE(options.args) if isinstance(results, list): for res in results: print>>output, res else: print results if options.interface=="second": results=SECOND_INTERFACE(options.args) if isinstance(results, list): for res in results: print>>output, res else: print results if options.interface=="third": results=THIRD_INTERFACE(options.args) if isinstance(results, list): for res in results: print>>output, res else: print results if not (options.output): if options.interface=="first": results=FIRST_INTERFACE(options.args) if isinstance(results, list): for res in results: print res else: print results if options.interface=="second": results=SECOND_INTERFACE(options.args) if isinstance(results, list): for res in results: print res else: print results if options.interface=="third": results=THIRD_INTERFACE(options.args) if isinstance(results, list): for res in results: print res else: print results else: print "Not enough arguments supplied" print usage quit() #print FIRST_INTERFACE('PDB_ID,1eqc') #print FIRST_INTERFACE('HETATM_RESIDUES,CTS') #print SECOND_INTERFACE('pdb,1mxd') #print SECOND_INTERFACE('ligand,ACR') #print SECOND_INTERFACE('drug,acarbose') #print SECOND_INTERFACE('keywords,indication,parkinson') #print SECOND_INTERFACE('keywords,organism,candida albicans') #print SECOND_INTERFACE('keywords,macromoleculename,phosphodiesterase') #print SECOND_INTERFACE('keywords,pfamannotation,PDEase_I') #print THIRD_INTERFACE('11gs')