如何将str转换为漂亮的soup标记

soup = BeautifulSoup(html_string) h3_tags = soup.find_all('h3') ol_tags = [each_h3.find_next('ol') for each_h3 in h3_tags] soup = BeautifulSoup(html_string_new) h3_tags_new = soup.find_all('h3') ol_tags_new = [each_h3.find_next('ol') for each_h3 in h3_tags_new] countries_old = [] countries_new = [] html_new = "" for i in h3_tags: countries_old.append(i.text) for i in h3_tags_new: countries_new.append(i.text) for country in countries_new: idx = countries_old.index(country) tag = str(ol_tags[idx]) tag = tag[:-5] tag = tag[4:] idx_new = countries_new.index(country) tag_new = str(ol_tags_new[idx_new]) tag_new = tag_new[:-5] tag_new = tag_new[4:] tag = "<ol>" + tag + tag_new + "</ol>" ol_tags[idx] = tag html_new += h3_tags[idx] html_new += tag with open("check.html", "w", encoding="utf8") as html_file: html_file.write(html_new) html_file.close() import pypandoc output = pypandoc.convert(source='check.html', format='html', to='docx', outputfile='test.docx', extra_args=["-M2GB", "+RTS", "-K64m", "-RTS"])

1条回答

网友

1楼 · 发布于 2024-06-09 08:43:49

尝试：

from bs4 import BeautifulSoup

html1 = """
<h3>
First heading 
</h3>
<ol>
<li>
hi
</li>
</ol>
<h3>
Second 
</h3>
<ol>
<li>
second
</li>
</ol>
"""

html2 = """
<h3>
First heading 
</h3>
<ol>
<li>
hello
</li>
</ol>
<h3>
Second 
</h3>
<ol>
<li>
second to second
</li>
</ol>
"""

soup1 = BeautifulSoup(html1, "html.parser")
soup2 = BeautifulSoup(html2, "html.parser")

for li in soup2.select("h3 + ol > li"):
    h3_text = li.find_previous("h3").get_text(strip=True)
    h3_soup1 = soup1.find("h3", text=lambda t: h3_text in t)
    if not h3_soup1:
        continue
    h3_soup1.find_next("ol").insert(0, li)

print(soup1.prettify())

印刷品：

<h3>
 First heading
</h3>
<ol>
 <li>
  hello
 </li>
 <li>
  hi
 </li>
</ol>
<h3>
 Second
</h3>
<ol>
 <li>
  second to second
 </li>
 <li>
  second
 </li>
</ol>

相关问题更多 >

编程相关推荐

热门问题

热门文章