在遇到多任务执行的时候,可以考虑并行计算,具体就是利用python的mutiprocessing包,具体实例如下:
import os
import pandas as pd
from multiprocessing import Pool
# Define the directory paths
raw_data_dir = '/data/stockData/detailRecords2'
preprocessed_data_dir = '/data/preStockData/tick2one'
def process_stock_data(directory_path, stock_code):
print("Inside function process_stock_data")
return 0
# Iterate over the directories in the raw data directory
def main():
with Pool() as pool:
for directory in os.listdir(raw_data_dir):
# Create the directory path
directory1_path = os.path.join(raw_data_dir, directory)
for directory2 in os.listdir(directory1_path):
directory_path = os.path.join(directory1_path, directory2)
# Get the stock code from the directory name
stock_code = directory2
# Submit the task to the pool
pool.apply_async(process_stock_data, args=(directory_path, stock_code))
# Close the pool and wait for all tasks to complete
pool.close()
pool.join()
if __name__ == '__main__':
main()