1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192 |
- import json
- from time import sleep
- from bs4 import BeautifulSoup
- from requests import request
- all_res = {}
- def get_village(url: "str", direct: "bool" = True):
- res, town = {}, url[-14:-5] # town: 9L
- resp = request("get", url, timeout=2)
- dom = BeautifulSoup(resp.content.decode("utf-8"), "lxml")
- targets = dom.select("tr.villagetr")
- size = len(targets)
- assert size > 0
- for village in targets:
- infos = village.select("td")
- res[infos[0].text] = {"type": infos[1].text, "name": infos[2].text}
- print(f"|----{size} villages")
- if not direct:
- return res
- city, county = town[0:4], town[0:6]
- if city not in all_res.keys():
- all_res[city] = {}
- if county not in all_res[city].keys():
- all_res[city][county] = {}
- all_res[city][county][town] = res
- def get_town(url: "str"):
- res, county = {}, url[-11:-5] # cid: 6L
- resp = request("get", url, timeout=2)
- dom = BeautifulSoup(resp.content.decode("utf-8"), "lxml")
- targets = dom.select("tr.towntr > td:last-child > a")
- size = len(targets)
- assert size > 0
- for i, town in enumerate(targets):
- name, link = town.text, town["href"] # link: 2/9.html
- print(f"|--town [{i + 1:02}/{size:02}] from {url[:-11]}/{link}")
- res[link[3:12]] = {"name": name, "children": get_village(f"{url[:-11]}/{link}", False)}
- sleep(1)
- city = county[0:4]
- if city not in all_res.keys():
- all_res[city] = {}
- all_res[city][county] = res
- def get_county(url: "str"):
- res, city = {}, url[-9:-5] # city: 4L
- resp = request("get", url, timeout=2)
- dom = BeautifulSoup(resp.content.decode("utf-8"), "lxml")
- targets = dom.select("tr.towntr > td:last-child > a")
- size = len(targets)
- assert size > 0
- for i, town in enumerate(targets):
- name, link = town.text, town["href"] # link: 2/9.html
- print(f"|--county [{i + 1:02}/{size:02}] from {url[:-9]}/{link}")
- res[link[3:12]] = {"name": name, "children": get_village(f"{url[:-9]}/{link}", False)}
- sleep(1)
- all_res[city] = res
- def main():
- with open("error.json", "r", encoding="utf-8") as fp:
- data = json.load(fp)
- for item in data:
- if item["type"] == "county":
- get_county(item["url"])
- elif item["type"] == "town":
- get_town(item["url"])
- else:
- get_village(item["url"])
- with open("deal.json", "w", encoding="utf-8") as fp:
- json.dump(all_res, fp, ensure_ascii=False, indent=2)
- if __name__ == '__main__':
- main()
- """
- type enum('111', '112', '121', '122', '123', '210', '220')
- level enum('A1+', 'A2+', 'A3+', 'A4+', 'A5-', 'B1+', 'B2-')
- """
|