diff --git a/soup.py b/soup.py index 0413a94..349b0ee 100644 --- a/soup.py +++ b/soup.py @@ -7,9 +7,19 @@ from bs4 import BeautifulSoup -def re_link(link): - urls = re.findall(r'href=[\'"]?([^\'" >]+)', link) - return urls +def get_node_link(node_contend: str): + node_soup = BeautifulSoup(node_contend, "lxml") + + temp_link = node_soup.find_all(name='a',class_='internal-link') + node_link = set() + for i in temp_link: + i = i.get("href") + + if i == "": + continue + + node_link.add(i) + return node_link def new_section(soup): @@ -59,7 +69,7 @@ def soup_link(): break node_id_link = '/' + file_name + '/#' +node_element["id"] - node_dict["links"] = re_link(node_content) + node_dict["links"] = get_node_link(node_content) node_dict["id-link"] = node_id_link node_dict["backlinks"] = [] nodes_dict[node_element["id"]] = node_dict