【Python】多进程 AttributeError: Can’t pickle local object

最近写了一个在电脑磁盘搜索全部文件的的一个小程序,效果达到了,但是效率5~6分钟,效率是十分的不理想。故而直接想到提升效率的多线程或者多进程,然后发现的一个诡异的事情,我使用的是官方的Cpython 版本的python 。发现,在Cpython 里面,多进程要比多线程快多了,甚至超一倍。后来了解到在多线程里面,受到GIL全称global interpreter lock,全局解释器锁的影响,多线程是共用一个GIL,
多进程用的每一个进程一个CIL,所以效率更甚。

然而,问题出现了,python 多进程 AttributeError: Can't pickle local object

示例代码(测试用):

from threading import Thread
from multiprocessing import Process, Queue as mQueue
from queue import Queue
import time


def single_test():
    my_sum = 0
    for i in range(1, 10000000):
        my_sum += i
    print("单线程结果:", my_sum)


def thread_test():

    def sum_func(q, start, end):
        my_sum = 0
        for i in range(start, end):
            my_sum += i
        q.put(my_sum)

    def run_thread():
        q = Queue()
        t1 = Thread(target=sum_func, args=(q, 1, 5000000))
        t2 = Thread(target=sum_func, args=(q, 5000000, 10000000))
        t1.start()
        t2.start()
        t1.join()
        t2.join()
        my_sum = 0
        while not q.empty():
            my_sum += q.get()
        print("多线程结果:", my_sum)

    run_thread()


def process_test():

    def sum_process_func(q, start, end):
        my_sum = 0
        for i in range(start, end):
            my_sum += i
        q.put(my_sum)

    def run_process():
        q = mQueue()
        p1 = Process(target=sum_process_func, args=(q, 1, 5000000))
        p2 = Process(target=sum_process_func, args=(q, 5000000, 10000000))
        p1.start()
        p2.start()
        p1.join()
        p2.join()
        my_sum = 0
        while not q.empty():
            my_sum += q.get()
        print("多进程结果:", my_sum)

    run_process()


if __name__ == "__main__":
    t0 = time.time()
    single_test()
    t1 = time.time()
    thread_test()
    t2 = time.time()
    process_test()
    t3 = time.time()
    print(f"单线程耗时:{t1-t0}s")
    print(f"多线程耗时:{t2-t1}s")
    print(f"多进程耗时:{t3-t2}s")

解决方法一(避免是使用闭包:)

from threading import Thread
from multiprocessing import Process, Queue as mQueue
from queue import Queue
import time


def single_test():
    my_sum = 0
    for i in range(1, 10000000):
        my_sum += i
    print("单线程结果:", my_sum)


def thread_test():

    def sum_func(q, start, end):
        my_sum = 0
        for i in range(start, end):
            my_sum += i
        q.put(my_sum)

    def run_thread():
        q = Queue()
        t1 = Thread(target=sum_func, args=(q, 1, 5000000))
        t2 = Thread(target=sum_func, args=(q, 5000000, 10000000))
        t1.start()
        t2.start()
        t1.join()
        t2.join()
        my_sum = 0
        while not q.empty():
            my_sum += q.get()
        print("多线程结果:", my_sum)

    run_thread()


def sum_process_func(q, start, end):
        my_sum = 0
        for i in range(start, end):
            my_sum += i
        q.put(my_sum)

def process_test():

    def run_process():
        q = mQueue()
        p1 = Process(target=sum_process_func, args=(q, 1, 5000000))
        p2 = Process(target=sum_process_func, args=(q, 5000000, 10000000))
        p1.start()
        p2.start()
        p1.join()
        p2.join()
        my_sum = 0
        while not q.empty():
            my_sum += q.get()
        print("多进程结果:", my_sum)

    run_process()


if __name__ == "__main__":
    t0 = time.time()
    single_test()
    t1 = time.time()
    thread_test()
    t2 = time.time()
    process_test()
    t3 = time.time()
    print(f"单线程耗时:{t1-t0}s")
    print(f"多线程耗时:{t2-t1}s")
    print(f"多进程耗时:{t3-t2}s")

解决方法二(pathos模块)

python闭包不支持pickle(序列化)。多进程需要函数能pickle。

1.更换pickle,考虑以下代码:

 from pathos.multiprocessing import ProcessingPool as Pool

网址
https://github.com/uqfoundation/pathos

Logo

为开发者提供学习成长、分享交流、生态实践、资源工具等服务,帮助开发者快速成长。

更多推荐