Python 分布式计算(有效)

7 投票
2 回答
1923 浏览
提问于 2025-04-16 18:49

我在用一个旧的帖子来发布新的代码,试图解决同样的问题。什么才算是安全的pickle呢?这个吗?

sock.py

from socket import socket
from socket import AF_INET
from socket import SOCK_STREAM
from socket import gethostbyname
from socket import gethostname

class SocketServer:
  def __init__(self, port):
    self.sock = socket(AF_INET, SOCK_STREAM)
    self.port = port
  def listen(self, data):
    self.sock.bind(("127.0.0.1", self.port))
    self.sock.listen(len(data))
    while data:
      s = self.sock.accept()[0]
      siz, dat = data.pop()
      s.send(siz)
      s.send(dat)
      s.close()

class Socket:
  def __init__(self, host, port):
    self.sock = socket(AF_INET, SOCK_STREAM)
    self.sock.connect((host, port))
  def recv(self, size):
    return self.sock.recv(size)

pack.py

#http://stackoverflow.com/questions/6234586/we-need-to-pickle-any-sort-of-callable
from marshal import dumps as marshal_dumps
from pickle import dumps as pickle_dumps
from struct import pack as struct_pack

class packer:
  def __init__(self):
    self.f = []
  def pack(self, what):
    if type(what) is type(lambda:None):
      self.f = []
      self.f.append(marshal_dumps(what.func_code))
      self.f.append(pickle_dumps(what.func_name))
      self.f.append(pickle_dumps(what.func_defaults))
      self.f.append(pickle_dumps(what.func_closure))
      self.f = pickle_dumps(self.f)
      return (struct_pack('Q', len(self.f)), self.f)

unpack.py

from types import FunctionType
from pickle import loads as pickle_loads
from marshal import loads as marshal_loads
from struct import unpack as struct_unpack
from struct import calcsize

#http://stackoverflow.com/questions/6234586/we-need-to-pickle-any-sort-of-callable

class unpacker:
  def __init__(self):
    self.f = []
    self.fcompiled = lambda:None
    self.sizeofsize = calcsize('Q')
  def unpack(self, sock):
    size = struct_unpack('Q', sock.recv(self.sizeofsize))[0]
    self.f = pickle_loads(sock.recv(size))
    a = marshal_loads(self.f[0])
    b = globals() ##
    c = pickle_loads(self.f[1])
    d = pickle_loads(self.f[2])
    e = pickle_loads(self.f[3])
    self.fcompiled = FunctionType(a, b, c, d, e)
    return self.fcompiled

test.py

from unpack import unpacker
from pack import packer
from sock import SocketServer
from sock import Socket
from threading import Thread
from time import sleep

count = 2
port = 4446

def f():
  print 42

def server():
  ss = SocketServer(port)
  pack = packer()
  functions = [pack.pack(f) for nothing in range(count)]
  ss.listen(functions)

if __name__ == "__main__":
  Thread(target=server).start()
  sleep(1)
  unpack = unpacker()
  for nothing in range(count):
    print unpack.unpack(Socket("127.0.0.1", port))

输出:

<function f at 0x12917d0>
<function f at 0x12915f0>

2 个回答

4

我觉得进程对象并不是为了在网络上传输而设计的。你可以看看multiprocessing/process.py文件的第256行。

# We subclass bytes to avoid accidental transmission of auth keys over network.

这听起来是有原因的。如果你想进行分布式计算,或许应该找一个专门为此设计的库来使用。

2

当你遇到 ValueError: insecure string pickle 这个错误时,说明你的数据包坏掉了。你确定你在一次 sock.recv() 调用中收到了完整的打包对象吗?(在 unpack.py 文件中)

补充说明:为了避免这个问题,不管数据大小如何,你可以这样做(你的 Socket 类需要支持带有缓冲区大小参数的 recv 调用,也就是:

 class Socket:
    def recv(self, bufsize):
        return self.sock.recv(bufsize)

):

import struct

struct.pack('Q', len(pickled_list))
# Send it, and then send the pickled list.

在接收程序中:

import struct

length = struct.unpack('Q', sock.recv(struct.calcsize('Q')))[0]
pickled_list = sock.recv(length)

'Q' 是一个 unsigned long long 类型。关于其他结构体的内容,可以查看 结构模块的文档

撰写回答