使用Pygments过滤空格和换行符
我一直在尝试给我的Django网站添加语法高亮。问题是我发现像
和<br />
这些字符也被格式化了。有没有办法保留这些字符呢?这是我使用的代码:
from BeautifulSoup import BeautifulSoup
from django import template
from django.template.defaultfilters import stringfilter
import pygments
import pygments.formatters
import pygments.lexers
register = template.Library()
@register.filter
@stringfilter
def pygmentized(html):
soup = BeautifulSoup(html)
codeblocks = soup.findAll('code')
for block in codeblocks:
if block.has_key('class'):
try:
code = ''.join([unicode(item) for item in block.contents])
lexer = pygments.lexers.get_lexer_by_name(block['class'], stripall=True)
formatter = pygments.formatters.HtmlFormatter()
code_hl = pygments.highlight(code, lexer, formatter)
block.contents = [BeautifulSoup(code_hl)]
block.name = 'code'
except:
raise
return unicode(soup)
1 个回答
1
嗯,Petri说得对,pre是用来表示代码块的。在他指出这一点之前,我写了一个函数来清理第一次的输出,虽然有点乱,但也许有些人只是想从最终输出中去掉某些东西,可能会觉得这个方法还不错:
from BeautifulSoup import BeautifulSoup
from django import template
from django.template.defaultfilters import stringfilter
import pygments
import pygments.formatters
import pygments.lexers
register = template.Library()
wanted = {'br': '<br />', 'BR': '<BR />', 'nbsp': ' ', 'NBSP': '&NBSP;', '/>': ''}
def uglyfilter(html):
content = BeautifulSoup(html)
for node in content.findAll('span'):
data = ''.join(node.findAll(text=True))
if wanted.has_key(data):
node.replaceWith(wanted.get(data))
return unicode(content)
@register.filter
@stringfilter
def pygmentized(html):
soup = BeautifulSoup(html)
codeblocks = soup.findAll('pre')
for block in codeblocks:
if block.has_key('class'):
try:
code = ''.join([unicode(item) for item in block.contents])
lexer = pygments.lexers.get_lexer_by_name(block['class'], stripall=True)
formatter = pygments.formatters.HtmlFormatter()
code_hl = pygments.highlight(code, lexer, formatter)
clean = uglyfilter(code_hl)
block.contents = [BeautifulSoup(clean)]
block.name = 'pre'
except:
raise
return unicode(soup)