Python比D快?IO操作似乎让D变得很慢...这是怎么回事?
我在上课的时候写了一个虚拟机语言到汇编语言的翻译器(使用的是nand2tetris的课程)。最开始我用Python写的,但因为我在学D语言,所以想把它翻译成D。D语言在语法上和Python挺像的,所以转换起来并不太难。我原以为D语言作为一种性能语言,编译后速度应该至少和Python一样快,处理大文件时会更快。但结果正好相反!尽管两个程序的算法完全相同,当我处理一个非常大的文件时,D语言的速度总是比Python慢一点。在一个大约有500000行的文件上,Python大约花了2.6秒,而D语言则花了3秒。虽然这个差距不算太大,但Python竟然会更快,这点还是挺值得注意的。
我并不是说我天真到认为Python整体上比D快;不过在这个例子中,至少看起来D并没有直观上更快。我希望能得到一些关于我D代码性能下降可能原因的建议。我觉得瓶颈可能在输入输出操作上,但我不太确定。
下面是源代码。具体细节不太重要;主要是分配了一些汇编语言的模板,然后对虚拟机语言进行线性遍历,把每条指令翻译成对应的汇编代码块。
编辑:在使用 dmd -O -release -inline -m64
重新编译D代码后,D语言的表现反而胜出,处理输入的时间为2.20秒。不过,问题依然是,为什么在几乎相同的代码下,D的表现似乎比Python慢。
编辑2:根据下面的建议,我把简单的字符串列表换成了 appender!string()
,这样时间明显改善了。不过值得一提的是,如果你在 appender
中有很多字符串,不要用类似下面的命令把它们写入文件:
auto outputfile = File("foo.txt","w");
foreach(str; my_appender.data)
outputfile.write(str);
而是应该写成:
auto outputfile = File("foo.txt","w");
outputfile.write(my_appender.data);
第二个例子会比使用简单的 string[]
有小幅度的性能提升。但使用第一个例子时,我的执行时间翻倍,性能下降很严重。
改用 appender!string()
后,编译前面提到的大文件大约花了2.75秒(Python是2.8秒),而原来的版本大约花了3秒。这样做,加上在 dmd
中使用优化标志,总的编译时间达到了 1.98
秒!:)
Python:
#!/usr/bin/python
import sys
operations_dict = {"add":"+", "sub":"-",
"and":"&", "or":"|",
"not":"!", "neg":"-",
"lt":"JLT", "gt":"JGT",
"eq":"JEQ", "leq":"JLE",
"geq":"JGE"}
vars_dict = {"this":("THIS","M"),
"that":("THAT","M"),
"argument":("ARG","M",),
"local":("LCL","M",),
"static":("f.%d","M",),
"temp":("TEMP","A",)}
start = "@SP\nAM=M-1\n"
end = "@SP\nM=M+1\n"
binary_template = start + "D=M\n\
@SP\n\
AM=M-1\n\
M=M%sD\n" + end
unary_template = start + "M=%sM\n" + end
comp_template = start + "D=M\n\
@SP\n\
AM=M-1\n\
D=M-D\n\
@COMP.%d.TRUE\n\
D;%s\n\
@COMP.%d.FALSE\n\
0;JMP\n\
(COMP.%d.TRUE)\n\
@SP\n\
A=M\n\
M=-1\n\
@SP\n\
M=M+1\n\
@COMP.%d.END\n\
0;JMP\n\
(COMP.%d.FALSE)\n\
@SP\n\
A=M\n\
M=0\n" + end + "(COMP.%d.END)\n"
push_tail_template = "@SP\n\
A=M\n\
M=D\n\
@SP\n\
M=M+1\n"
push_const_template = "@%d\nD=A\n" + push_tail_template
push_var_template = "@%d\n\
D=A\n\
@%s\n\
A=%s+D\n\
D=M\n" + push_tail_template
push_staticpointer_template = "@%s\nD=M\n" + push_tail_template
pop_template = "@%d\n\
D=A\n\
@%s\n\
D=%s+D\n\
@R13\n\
M=D\n\
@SP\n\
AM=M-1\n\
D=M\n\
@R13\n\
A=M\n\
M=D\n"
pop_staticpointer_template = "@SP\n\
AM=M-1\n\
D=M\n\
@%s\n\
M=D"
type_dict = {"add":"arithmetic", "sub":"arithmetic",
"and":"arithmetic", "or":"arithmetic",
"not":"arithmetic", "neg":"arithmetic",
"lt":"arithmetic", "gt":"arithmetic",
"eq":"arithmetic", "leq":"arithmetic",
"geq":"arithmetic",
"push":"memory", "pop":"memory"}
binary_ops = ["add", "sub", "and", "or"]
unary_ops = ["not", "neg"]
comp_ops = ["lt", "gt", "eq", "leq", "geq"]
op_count = 0
line_count = 0
output = ["// Assembly file generated by my awesome VM compiler\n"]
def compile_operation(op):
global line_count
if (op[0:2] == "//") or (len(op.split()) == 0):
return ""
# print "input: " + op
operation = op.split()[0]
header = "// '" + op + "' (line " + str(line_count) + ")\n"
line_count += 1
if type_dict[operation] == "arithmetic":
return header + compile_arithmetic(op)
elif type_dict[operation] == "memory":
return header + compile_memory(op)
def compile_arithmetic(op):
global op_count
out_string = ""
if op in comp_ops:
out_string += comp_template % (op_count, operations_dict[op], op_count, \
op_count, op_count, op_count, op_count)
op_count += 1
elif op in unary_ops:
out_string += unary_template % operations_dict[op]
else:
out_string += binary_template % operations_dict[op]
return out_string
def compile_memory(op):
global output
instructions = op.split()
inst = instructions[0]
argtype = instructions[1]
val = int(instructions[2])
if inst == "push":
if argtype == "constant":
return push_const_template % val
elif argtype == "static":
return push_staticpointer_template % ("f." + str(val))
elif argtype == "pointer":
if val == 0:
return push_staticpointer_template % ("THIS")
else:
return push_staticpointer_template % ("THAT")
else:
return push_var_template % (val, vars_dict[argtype][0], vars_dict[argtype][1])
elif inst == "pop":
if argtype != "constant":
if argtype == "static":
return pop_staticpointer_template % ("f." + str(val))
elif argtype == "pointer":
if val == 0:
return pop_staticpointer_template % "THIS"
else:
return pop_staticpointer_template % "THAT"
else:
return pop_template % (val, vars_dict[argtype][0], vars_dict[argtype][1])
def main():
global output
if len(sys.argv) == 1:
inputfname = "test.txt"
else:
inputfname = sys.argv[1]
outputfname = inputfname.split('.')[0] + ".asm"
inputf = open(inputfname)
output += ["// Input filename: %s\n" % inputfname]
for line in inputf.readlines():
output += [compile_operation(line.strip())]
outputf = open(outputfname, 'w')
for outl in output:
outputf.write(outl)
outputf.write("(END)\n@END\n0;JMP");
inputf.close()
outputf.close()
print "Output written to " + outputfname
if __name__ == "__main__":
main()
D:
import std.stdio, std.string, std.conv, std.format, std.c.stdlib;
string[string] operations_dict, type_dict;
string[][string] vars_dict;
string[] arithmetic, memory, comp_ops, unary_ops, binary_ops, lines, output;
string start, end, binary_template, unary_template,
comp_template, push_tail_template, push_const_template,
push_var_template, push_staticpointer_template,
pop_template, pop_staticpointer_template;
int op_count, line_count;
void build_dictionaries() {
vars_dict = ["this":["THIS","M"],
"that":["THAT","M"],
"argument":["ARG","M"],
"local":["LCL","M"],
"static":["f.%d","M"],
"temp":["TEMP","A"]];
operations_dict = ["add":"+", "sub":"-",
"and":"&", "or":"|",
"not":"!", "neg":"-",
"lt":"JLT", "gt":"JGT",
"eq":"JEQ", "leq":"JLE",
"geq":"JGE"];
type_dict = ["add":"arithmetic", "sub":"arithmetic",
"and":"arithmetic", "or":"arithmetic",
"not":"arithmetic", "neg":"arithmetic",
"lt":"arithmetic", "gt":"arithmetic",
"eq":"arithmetic", "leq":"arithmetic",
"geq":"arithmetic",
"push":"memory", "pop":"memory"];
binary_ops = ["add", "sub", "and", "or"];
unary_ops = ["not", "neg"];
comp_ops = ["lt", "gt", "eq", "leq", "geq"];
}
bool is_in(string s, string[] list) {
foreach (str; list)
if (str==s) return true;
return false;
}
void build_strings() {
start = "@SP\nAM=M-1\n";
end = "@SP\nM=M+1\n";
binary_template = start ~ "D=M\n"
"@SP\n"
"AM=M-1\n"
"M=M%sD\n" ~ end;
unary_template = start ~ "M=%sM\n" ~ end;
comp_template = start ~ "D=M\n"
"@SP\n"
"AM=M-1\n"
"D=M-D\n"
"@COMP.%s.TRUE\n"
"D;%s\n"
"@COMP.%s.FALSE\n"
"0;JMP\n"
"(COMP.%s.TRUE)\n"
"@SP\n"
"A=M\n"
"M=-1\n"
"@SP\n"
"M=M+1\n"
"@COMP.%s.END\n"
"0;JMP\n"
"(COMP.%s.FALSE)\n"
"@SP\n"
"A=M\n"
"M=0\n" ~ end ~ "(COMP.%s.END)\n";
push_tail_template = "@SP\n"
"A=M\n"
"M=D\n"
"@SP\n"
"M=M+1\n";
push_const_template = "@%s\nD=A\n" ~ push_tail_template;
push_var_template = "@%s\n"
"D=A\n"
"@%s\n"
"A=%s+D\n"
"D=M\n" ~ push_tail_template;
push_staticpointer_template = "@%s\nD=M\n" ~ push_tail_template;
pop_template = "@%s\n"
"D=A\n"
"@%s\n"
"D=%s+D\n"
"@R13\n"
"M=D\n"
"@SP\n"
"AM=M-1\n"
"D=M\n"
"@R13\n"
"A=M\n"
"M=D\n";
pop_staticpointer_template = "@SP\n"
"AM=M-1\n"
"D=M\n"
"@%s\n"
"M=D";
}
void init() {
op_count = 0;
line_count = 0;
output = ["// Assembly file generated by my awesome VM compiler\n"];
build_strings();
build_dictionaries();
}
string compile_operation(string op) {
if (op.length == 0 || op[0..2] == "//")
return "";
string operation = op.split()[0];
string header = "// '" ~ op ~ "' (line " ~ to!string(line_count) ~ ")\n";
++line_count;
if (type_dict[operation] == "arithmetic")
return header ~ compile_arithmetic(op);
else
return header ~ compile_memory(op);
}
string compile_arithmetic(string op) {
if (is_in(op, comp_ops)) {
string out_string = format(comp_template, op_count, operations_dict[op], op_count,
op_count, op_count, op_count, op_count);
op_count += 1;
return out_string;
} else if (is_in(op, unary_ops))
return format(unary_template, operations_dict[op]);
else
return format(binary_template, operations_dict[op]);
}
string compile_memory(string op) {
string[] instructions = op.split();
string inst = instructions[0];
string argtype = instructions[1];
int val = to!int(instructions[2]);
if (inst == "push") {
if (argtype == "constant") {
return format(push_const_template, val);
} else if (argtype == "static")
return format(push_staticpointer_template, ("f." ~ to!string(val)));
else if (argtype == "pointer")
if (val == 0)
return format(push_staticpointer_template, "THIS");
else
return format(push_staticpointer_template, "THAT");
else
return format(push_var_template, val, vars_dict[argtype][0], vars_dict[argtype][1]);
} else {
if (argtype != "constant") {
if (argtype == "static")
return format(pop_staticpointer_template, ("f." ~ to!string(val)));
else if (argtype == "pointer") {
if (val == 0)
return format(pop_staticpointer_template, "THIS");
else
return format(pop_staticpointer_template, "THAT");
}
else
return format(pop_template, val, vars_dict[argtype][0], vars_dict[argtype][1]);
} else {
return "";
}
}
}
void main(string args[]) {
init();
if (args.length < 2) {
writefln("usage: %s <filename>", args[0]);
exit(0);
}
string inputfname = args[1];
string outputfname = args[1].split(".")[0] ~ ".asm";
auto inputf = File(inputfname, "r");
output ~= format("// Input filename: %s\n", inputfname);
foreach (line; inputf.byLine) {
output ~= compile_operation(to!string(line).strip);
}
inputf.close();
auto outputf = File(outputfname, "w");
foreach (outl; output)
outputf.write(outl);
outputf.write("(END)\n@END\n0;JMP");
outputf.close();
writeln("Compilation successful. Output written to " ~ outputfname);
}
2 个回答
你可以试着在主函数中使用 std.array.appender
来输出,而不是直接把数组连接在一起。因为 appender
可以减少内存分配的次数,而且通常在添加元素时表现得更好。你也可以试试 reserve
这个方法。
//Note: untested code
import std.array;
auto output = appender!string();
void init() {
...
output.put("// Assembly file generated by my awesome VM compiler\n");
...
}
void main() {
...
output.put(format("// Input filename: %s\n", inputfname));
foreach (line; inputf.byLine) {
output.put(compile_operation(to!string(line).strip));
}
...
foreach (outl; output.data)
outputf.write(outl);
...
}
对于 output
变量,可以使用 Appender
(文档):
import std.array : appender;
void main() {
auto output = appender!string("// Assembly file generated by my awesome VM compiler\n");
//...
output.put(format("// Input filename: %s\n", inputfname));
foreach (line; inputf.byLine) {
output.put(compile_operation(line.to!string().strip()));
}
//...
outputf.write(output.data());
//...
}
另外,我建议你把 type_dict
改成类似 int[string]
的形式,并用整数常量来使用它。
int[string] type_dict;
const TYPE_ARITHMETIC = 0,
TYPE_MEMORY = 1;
//...
type_dict = ["add": TYPE_ARITHMETIC, "push": TYPE_MEMORY]; // etc
//...
//...
if (type_dict[operation] == TYPE_ARITHMETIC) {
//...
}
//...