본문 바로가기
Python/selenium

Python) 멀티프로세싱을 이용한 selenium 웹크롤링

by 유노파이 2022. 2. 3.

 

# -*- coding: utf-8 -*-

import sys
import os
import subprocess
import time
import multiprocessing

from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.alert import Alert
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


#------------------------------------------------------------
def do_multi(code):

	# 네이버에서 검색 하기
  
    print ( str(code)+"번 프로세스 실행") # 1,2번
    
    chrome_options = Options()
    # chrome_options.add_argument('headless')
    # chrome_options.add_argument('window-size=1920x1080')
    # chrome_options.add_argument('--blink-settings=imagesEnabled=false')
    chrome_options.add_argument("disable-gpu")

    chrome_options.add_argument("user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) "
                         "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36")
	
    # 드라이버 설정
    driver = webdriver.Chrome(r'C:\Python27\chromedriver.exe',
                              options=chrome_options)
	
    # 네이버 접속
    url = 'http://www.naver.com/'
    driver.get(url)

    time.sleep(2)
	
    # code 1은 "google 검색"
    if code == 1:
    	driver.execute_script('document.querySelector("#query").value="google"')
    else: # code 2는 "daum 검색"
    	driver.execute_script('document.querySelector("#query").value="daum"')
    
    # 엔터키 클릭
    driver.execute_script('document.querySelector("#search_btn > span.ico_search_submit").click()')
    
#------------------------------------------------------------          
def main():
    
    code_list = [1,2]

    print ('--- start _multiprocessing')

    # cpu 갯수 확인
    cpu_count = multiprocessing.cpu_count()
    print ('--- cpu_count ', cpu_count)

    # cpu 수 결정
    pool = multiprocessing.Pool(2)

    # 실행 함수, 넘겨줄 파라미터
    pool.map(do_multi, code_list)

    # 모든 프로세스 종료까지 기다림
    pool.close()
    pool.join()
    # multi end
        
        
if __name__ == '__main__':
    
    main()

댓글