The links for Buyables for each source EM, MC, SA, LN can be modified in the following way by running the below script.
This demo script is for converting the SMILES name to IUPAC name of the molecule for the source as SA.
Note: Prerequisite to this script is that you already have downloaded buyables_ori.json
file
import json
import urllib
import requests
from tqdm import tqdm
s = requests.Session()
with open("buyables_ori.json", "r") as json_input, \
open("sa_buyables_ori_links_new.json", "w") as out:
buyables = json.load(json_input)
new_buyables = []
for data in tqdm(buyables, total=len(buyables)):
smi = data["smiles"]
source = data["source"]
if source != "SA": continue
try:
cactus_url = f"https://cactus.nci.nih.gov/chemical/structure/{smi}/cas"
response = s.get(cactus_url, timeout=3)
if int(response.status_code) != 200: raise
cas = response.text.split("\n")[0]
cas = urllib.parse.quote(cas, safe='/', encoding=None, errors=None)
if "DOCTYPE" in cas: raise
url = f"https://www.sigmaaldrich.com/US/en/search/{cas}?term={cas}&type=product"
except:
smiles = urllib.parse.quote(smi, safe='/', encoding=None, errors=None)
url = f"https://www.sigmaaldrich.com/US/en/search/{smiles}?term={smiles}&type=product&focus=buildingblocks"
data = {
"smiles": smi,
"ppg": data["ppg"],
"source": source,
"lead_time": "",
"properties": [
{"link": url},
{"availability": ""}
]
}
new_buyables.append(data)
json.dump(new_buyables, out)