1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
|
from tqdm.contrib.concurrent import process_map
from multiprocessing import Pool, Process, Manager, Lock
manager = Manager() # Manager dictionary must be used for multiprocessing
# m_saved_postcode_lst = manager.list(saved_postcode_lst)
# m_no_data_lst = manager.list(no_data_lst)
m_chromelock = manager.Lock()
m_nodatalock = manager.Lock()
m_savedpstcdlock = manager.Lock()
m_profile_path_list = manager.list(profile_path_list)
# %%
postcode_list_chunks = [postcode_list[x:x+CHUNK_SIZE] for x in range(0, len(postcode_list), CHUNK_SIZE)]
# %%
args = [(chunk, m_chromelock,m_nodatalock,m_savedpstcdlock, m_profile_path_list, loggername, loggingfile, data_index) for chunk in postcode_list_chunks]
try:
with mp.Pool(PROCESS_NUM) as pool:
results = list(logging_tqdm(pool.imap(helperf, args, chunksize=1), total=len(args),
loggername=loggername,
loggingfile=loggingfile,
desc="Progress Status in Main Process"))
except Exception as e:
logger.error(e)
raise e
finally:
active_children = mp.active_children()
for p in active_children:
p.kill()
p.join()
logger.info("Killing all processes")
# IMPORTANT we must attach dict to the manager dict, nest dict update is banned in multiprocessing
temp = {
"United States": "New York",
"Italy": "Naples",
"England": "London"
}
postcode_info_dict[postcode] = temp # OK
postcode_info_dict[postcode]['United States'] = "New York" # BAD
|