为什么Python在数字计算上如此慢?
考虑一下下面这段代码,它是解决“圆形刺穿问题”的一种方法,这个问题是计算几何中的一个经典问题,主要是找出一条能与最多不相交的圆相交的直线。
这里的主要问题是,为什么这段Python代码比C++代码慢了42倍。是因为Python的某些使用不当吗?还是说Python在数学运算上本身就比C++慢?有没有什么办法可以让它快一点呢?
首先,来看一下Python代码:
from __future__ import division
from math import sqrt, atan2, pi
from sys import stdin
__author__ = "Sahand Saba"
EPS = 1e-6
class Point(object):
__slots__ = ['x', 'y']
def __init__(self, xx=0.0, yy=0.0):
self.x = float(xx)
self.y = float(yy)
def angle(self):
"""
Returns the angle the point makes with the x-axis,
counter-clockwise. Result is in the [0, 2*pi) range.
"""
a = atan2(self.y, self.x)
if a < 0:
a = 2 * pi + a
return a
class Circle(object):
__slots__ = ['center', 'radius']
def __init__(self, center, radius):
self.center = center
self.radius = float(radius)
def common_tangents(self, other):
"""
Returns [[p1,p2],[q1,q2]] with p1, p2, q1, q2 all Point objects
representing points on C1 such that the tangent lines to C1 at p1, p2,
q1, q2 are tangent to C2 as well. Further more, p1 and p2 represent
external tangent lines, while q1 and q2 represent internal ones. It is
also guaranteed that p1 and q1 are both on the left-side of the line
connecting C1.center to C2.center, and p2 and q2 are on the right-side
of it.
"""
C1, C2 = self, other
mu = C1.center.x - C2.center.x
eta = C1.center.y - C2.center.y
r1 = C1.radius
r2 = C2.radius
r1r1 = r1 * r1
r1r2 = r1 * r2
delta1 = r1r1 - r1r2
delta2 = r1r1 + r1r2
mumu = mu * mu
etaeta = eta * eta
D = etaeta + mumu
result = [[], []]
if abs(D) < EPS:
return result
if abs(eta) < EPS:
# In this case there is symmetry along the x-axis and we can
# not divide by eta. Use x^2 + y^2 = r^2 to find y.
dmu = -1 if mu < 0 else 1
x = (-delta1 * mu) / D
y = -dmu * sqrt(r1r1 - x * x)
result[0].append(Point(x, y))
result[0].append(Point(x, -y))
x = (-delta2 * mu) / D
y = -dmu * sqrt(r1r1 - x * x)
result[1].append(Point(x, y))
result[1].append(Point(x, -y))
else:
# Here, the symmetry is along the line connecting the two centers.
# Use the equation eta*y + mu *x + r1^2 - r1 * r2 = 0 to derive y
# since we can divide by eta.
dd1 = delta1 * delta1
dd2 = delta2 * delta2
Delta1 = sqrt(dd1 * mumu - D*(dd1 - etaeta * r1r1))
Delta2 = sqrt(dd2 * mumu - D*(dd2 - etaeta * r1r1))
deta = -1 if eta < 0 else 1
x = (-delta1 * mu + deta * Delta1) / D
result[0].append(Point(x, -(mu*x + delta1)/eta))
x = (-delta1 * mu - deta * Delta1) / D
result[0].append(Point(x, -(mu*x + delta1)/eta))
x = (-delta2 * mu + deta * Delta2) / D
result[1].append(Point(x, -(mu*x + delta2)/eta))
x = (-delta2 * mu - deta * Delta2) / D
result[1].append(Point(x, -(mu*x + delta2)/eta))
return result
def add_events(A, p, q):
start = p.angle()
end = q.angle()
A.append((start, 1, p))
A.append((end, -1, q))
return 1 if start > end else 0
def max_intersecting_line(C):
"""
Given a list of circles, returns (m, c, p) where m is the maximal number of
circles in C any line can intersect, and p is a point on a circle c in C
such that the tangent line to c at p intersects m circles in C.
"""
global_max = 1
solution_circle = C[0]
solution_point = Point(C[0].radius, 0.0)
for c1 in C:
local_max = 1
A = []
for c2 in (c for c in C if c is not c1):
Q = c1.common_tangents(c2)
t1 = add_events(A, Q[1][0], Q[0][0])
t2 = add_events(A, Q[0][1], Q[1][1])
local_max += max(t1, t2)
if local_max > global_max:
global_max = local_max
solution_point = Point(c1.radius, 0.0)
solution_circle = c1
A.sort(key=lambda a: a[0])
for a in A:
local_max += a[1]
if local_max > global_max:
global_max = local_max
solution_point = Point(c1.center.x + a[2].x,
c1.center.y + a[2].y)
solution_circle = c1
return global_max, solution_circle, solution_point
if __name__ == '__main__':
T = int(stdin.readline())
for __ in xrange(T):
n = int(stdin.readline())
C = []
for i in xrange(n):
x, y, r = tuple(stdin.readline().split(' '))
C.append(Circle(Point(x, y), r))
print max_intersecting_line(C)[0]
接下来是几乎逐行对应的C++代码:
#include <iostream>
#include <vector>
#include <algorithm>
#include <cmath>
using namespace std;
double EPS = 1e-6;
class Point {
public:
double x, y;
Point(double x=0.0, double y=0.0) : x(x), y(y) {}
double angle() const {
double a = atan2(y, x);
if (a < 0) {
a = atan(1) * 8.0 + a;
}
return a;
}
};
class Event {
public:
double angle;
double count;
Event(double angle = 0, int count = 1) : angle(angle), count(count) {}
bool operator<(const Event &o) const {
return angle < o.angle;
}
};
struct CircleCircleTangents {
public:
Point external[2];
Point internal[2];
};
class Circle {
public:
Point center;
double radius;
Circle(double x=0.0, double y=0.0, double r=1.0) : radius(r), center(x,y) {}
// external[0] and internal[0] are guaranteed to be on the left-side of
// the directed line contennting C1.center to C2.center
CircleCircleTangents commonTangents(const Circle& C2) const {
const Circle& C1 = *this;
double mu = C1.center.x - C2.center.x;
double eta = C1.center.y - C2.center.y;
double r1 = C1.radius;
double r2 = C2.radius;
double r1r1 = r1 * r1;
double r1r2 = r1 * r2;
double delta1 = r1r1 - r1r2;
double delta2 = r1r1 + r1r2;
double D = eta*eta + mu*mu;
CircleCircleTangents result;
if (abs(eta) < EPS){
// Do not divide by eta! Use x^2 + y^2 = r^2 to find y.
double dmu = mu < 0? -1 : 1;
double x = (-delta1 * mu) / D;
double y = -dmu * sqrt(r1r1 - x * x);
result.external[0].x = x;
result.external[0].y = y;
result.external[1].x = x;
result.external[1].y = -y;
x = (-delta2 * mu) / D;
y = -dmu * sqrt(r1r1 - x * x);
result.internal[0].x = x;
result.internal[0].y = y;
result.internal[1].x = x;
result.internal[1].y = -y;
} else {
// Dividing by eta is ok. Use mu*x + eta*y + delta = 0 to find y.
double mumu = mu * mu;
double etaeta = eta * eta;
double dd1 = delta1 * delta1;
double dd2 = delta2 * delta2;
double deta = eta < 0? -1 : 1;
double Delta1 = deta * sqrt(dd1 * mumu - D*(dd1 - etaeta * r1r1));
double Delta2 = deta * sqrt(dd2 * mumu - D*(dd2 - etaeta * r1r1));
double x = (-delta1 * mu + Delta1) / D;
result.external[0].x = x;
result.external[0].y = -(mu*x + delta1)/eta;
x = (-delta1 * mu - Delta1) / D;
result.external[1].x = x;
result.external[1].y = -(mu*x + delta1)/eta;
x = (-delta2 * mu + Delta2) / D;
result.internal[0].x = x;
result.internal[0].y = -(mu*x + delta2)/eta;
x = (-delta2 * mu - Delta2) / D;
result.internal[1].x = x;
result.internal[1].y = -(mu*x + delta2)/eta;
}
return result;
}
};
bool add_events(vector<Event>& A, const Point& p, const Point& q) {
double start = p.angle();
double end = q.angle();
A.push_back(Event(start, 1));
A.push_back(Event(end, -1));
return start > end;
}
// Given a list of circles, returns (m, c, p) where m is the maximal number of
// circles in C any line can intersect, and p is a point on a circle c in C
// such that the tangent line to c at p intersects m circles in C.
int max_intersecting_line(const Circle* C, int n) {
int global_max = 1;
vector<Event> A;
for(int i = 0; i < n; i++) {
const Circle& c1 = C[i];
A.clear();
int local_max = 1;
for(int j = 0; j < n; j++) {
if(j == i) continue;
const Circle& c2 = C[j];
CircleCircleTangents Q = c1.commonTangents(c2);
bool t1 = add_events(A, Q.internal[0], Q.external[0]);
bool t2 = add_events(A, Q.external[1], Q.internal[1]);
if(t1 || t2) {
local_max++;
}
}
if (local_max > global_max) {
global_max = local_max;
}
sort(A.begin(), A.end());
for(int i = 0; i < A.size(); i++) {
local_max += A[i].count;
if(local_max > global_max) {
global_max = local_max;
}
}
}
return global_max;
}
int main() {
Circle C[2000];
int T;
cin >> T;
for (int t = 0; t < T; t++) {
int n;
cin >> n;
for (int i = 0; i < n; i++) {
cin >> C[i].center.x >> C[i].center.y >> C[i].radius;
}
cout << max_intersecting_line(C, n) << endl;
}
return 0;
}
然后是它们的性能差异:
$ time ./janeway < io/Janeway.in > /dev/null
real 0m8.436s
user 0m8.430s
sys 0m0.003s
$ time python janeway.py < io/Janeway.in > /dev/null
real 5m57.899s
user 5m57.217s
sys 0m0.165s
正如你所看到的,C++代码的执行速度大约快了42倍。
(测试输入来自2013年ACM ICPC区域赛的问题。详情请见http://www.acmicpc-pacnw.org/results.htm中的“Janeway”问题。)
补充:这是Python代码的cProfile输出:
799780565 function calls in 507.293 seconds
Ordered by: standard name
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 0.000 0.000 __future__.py:48(<module>)
1 0.000 0.000 0.000 0.000 __future__.py:74(_Feature)
7 0.000 0.000 0.000 0.000 __future__.py:75(__init__)
1 0.047 0.047 507.293 507.293 janeway.py:1(<module>)
25 63.703 2.548 507.207 20.288 janeway.py:103(max_intersecting_line)
1 0.000 0.000 0.000 0.000 janeway.py:11(Point)
24250014 5.671 0.000 5.671 0.000 janeway.py:116(<genexpr>)
96926032 9.343 0.000 9.343 0.000 janeway.py:127(<lambda>)
96955733 57.902 0.000 57.902 0.000 janeway.py:14(__init__)
96926032 46.840 0.000 63.156 0.000 janeway.py:18(angle)
1 0.000 0.000 0.000 0.000 janeway.py:29(Circle)
18506 0.009 0.000 0.009 0.000 janeway.py:32(__init__)
24231508 167.128 0.000 245.945 0.000 janeway.py:36(common_tangents)
48463016 59.402 0.000 129.139 0.000 janeway.py:95(add_events)
48463016 4.106 0.000 4.106 0.000 {abs}
96926032 16.315 0.000 16.315 0.000 {math.atan2}
48463016 4.908 0.000 4.908 0.000 {math.sqrt}
24231508 9.483 0.000 9.483 0.000 {max}
193870570 18.503 0.000 18.503 0.000 {method 'append' of 'list' objects}
1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
18532 0.009 0.000 0.009 0.000 {method 'readline' of 'file' objects}
18506 43.918 0.002 53.261 0.003 {method 'sort' of 'list' objects}
18506 0.007 0.000 0.007 0.000 {method 'split' of 'str' objects}
3 个回答
Python是一种解释型语言,而C++是一种编译型语言。简单来说,解释型语言是逐行执行代码的,而编译型语言则是先把代码翻译成机器能理解的语言,然后再运行。为了让Python代码运行得更快,你可以试试PyPy、Cython或者Shedskin这些工具。
Python是一种解释型语言,而C++是编译型语言。简单来说,当你写一个像“1 + 2”这样的算式时,Python会在内存中创建三个对象:一个代表数字“1”,一个代表数字“2”,还有一个代表结果“3”。而C++在编译后,这些操作会变得简单很多,直接变成更底层的机器指令。因此,在处理数字计算时,Python和C++的性能差距是很正常的。
在某些情况下,使用numpy数组和numpy表达式可以让计算速度变得更快。想了解更多细节,可以查看这个链接:http://wiki.scipy.org/PerformancePython
在C++中,编译器通常可以把一个数学运算直接变成一条处理器指令,这样执行起来就很快。
而在Python中,就麻烦了。因为Python是解释型语言,这意味着每次操作都会有额外的开销,速度会慢一些。不仅如此,Python还不能直接假设你用的对象是数字,它必须先检查这些对象,才能知道该执行什么操作。比如,你可以用+
把两个数字相加,也可以用+
把两个字符串连接在一起。Python在运行之前并不知道这些变量是数字还是字符串。