์น ์๋ํ
Selenium์ ์ฌ์ฉํ๋ฉด ์น ๋ธ๋ผ์ฐ์ ๋ฅผ ํ๋ก๊ทธ๋๋ฐ ๋ฐฉ์์ผ๋ก ์ ์ดํ ์ ์์ต๋๋ค. ๋ฐ๋ณต์ ์ธ ์น ์์ , ํ ์คํธ, ๋ฐ์ดํฐ ์์ง ๋ฑ์ ์๋ํํด๋ณด๊ฒ ์ต๋๋ค.
์ค์นํ๊ธฐโ
Selenium ์ค์นโ
# Selenium ์ค์น
pip install selenium
# WebDriver Manager (๋๋ผ์ด๋ฒ ์๋ ๊ด๋ฆฌ)
pip install webdriver-manager
๋ธ๋ผ์ฐ์ ๋๋ผ์ด๋ฒโ
Selenium์ ๋ธ๋ผ์ฐ์ ๋ฅผ ์ ์ดํ๊ธฐ ์ํด ๋๋ผ์ด๋ฒ๊ฐ ํ์ํฉ๋๋ค.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
# Chrome ๋๋ผ์ด๋ฒ ์๋ ์ค์น ๋ฐ ์คํ
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
# ๋๋ Firefox
from webdriver_manager.firefox import GeckoDriverManager
driver = webdriver.Firefox(service=Service(GeckoDriverManager().install()))
๊ธฐ๋ณธ ์ฌ์ฉ๋ฒโ
๋ธ๋ผ์ฐ์ ์ด๊ณ ๋ซ๊ธฐโ
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
# ๋ธ๋ผ์ฐ์ ์คํ
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
# ์น ํ์ด์ง ์ด๊ธฐ
driver.get("https://www.example.com")
# ํ์ด์ง ์ ๋ชฉ ์ถ๋ ฅ
print(driver.title)
# ํ์ฌ URL ํ์ธ
print(driver.current_url)
# ๋ธ๋ผ์ฐ์ ๋ซ๊ธฐ
driver.quit() # ๋ชจ๋ ์ฐฝ ๋ซ๊ธฐ
# driver.close() # ํ์ฌ ์ฐฝ๋ง ๋ซ๊ธฐ
์์ ์ฐพ๊ธฐโ
from selenium.webdriver.common.by import By
# ID๋ก ์ฐพ๊ธฐ
element = driver.find_element(By.ID, "username")
# ํด๋์ค๋ช
์ผ๋ก ์ฐพ๊ธฐ
element = driver.find_element(By.CLASS_NAME, "btn-primary")
# ํ๊ทธ๋ช
์ผ๋ก ์ฐพ๊ธฐ
element = driver.find_element(By.TAG_NAME, "h1")
# CSS ์ ํ์๋ก ์ฐพ๊ธฐ
element = driver.find_element(By.CSS_SELECTOR, "div.container > p")
# XPath๋ก ์ฐพ๊ธฐ
element = driver.find_element(By.XPATH, "//button[@type='submit']")
# ๋งํฌ ํ
์คํธ๋ก ์ฐพ๊ธฐ
element = driver.find_element(By.LINK_TEXT, "๋ก๊ทธ์ธ")
element = driver.find_element(By.PARTIAL_LINK_TEXT, "์์ธํ")
# ์ฌ๋ฌ ์์ ์ฐพ๊ธฐ (๋ฆฌ์คํธ ๋ฐํ)
elements = driver.find_elements(By.CLASS_NAME, "product-item")
์์์ ์ํธ์์ฉโ
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
# ํ
์คํธ ์
๋ ฅ
search_box = driver.find_element(By.NAME, "q")
search_box.send_keys("Python Selenium")
# Enter ํค ์
๋ ฅ
search_box.send_keys(Keys.RETURN)
# ๋ฒํผ ํด๋ฆญ
button = driver.find_element(By.ID, "submit-btn")
button.click()
# ํ
์คํธ ๊ฐ์ ธ์ค๊ธฐ
text = element.text
# ์์ฑ ๊ฐ์ ธ์ค๊ธฐ
href = element.get_attribute("href")
class_name = element.get_attribute("class")
# ์
๋ ฅ ํ๋ ๋น์ฐ๊ธฐ
search_box.clear()
# ์ฒดํฌ๋ฐ์ค ์ ํ ์ฌ๋ถ ํ์ธ
is_selected = checkbox.is_selected()
# ์์ ํ์ ์ฌ๋ถ ํ์ธ
is_displayed = element.is_displayed()
# ์์ ํ์ฑํ ์ฌ๋ถ ํ์ธ
is_enabled = element.is_enabled()
๋๊ธฐ ์ฒ๋ฆฌโ
์๋ฌต์ ๋๊ธฐโ
# ํ์ด์ง ๋ก๋ฉ์ ์ต๋ 10์ด๊น์ง ๊ธฐ๋ค๋ฆผ
driver.implicitly_wait(10)
# ์ดํ ๋ชจ๋ ์์ ์ฐพ๊ธฐ์ ์ ์ฉ๋จ
element = driver.find_element(By.ID, "dynamic-content")
๋ช ์์ ๋๊ธฐโ
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
# ํน์ ์กฐ๊ฑด์ด ๋ง์กฑ๋ ๋๊น์ง ์ต๋ 10์ด ๋๊ธฐ
wait = WebDriverWait(driver, 10)
# ์์๊ฐ ๋ํ๋ ๋๊น์ง ๋๊ธฐ
element = wait.until(
EC.presence_of_element_located((By.ID, "dynamic-element"))
)
# ์์๊ฐ ํด๋ฆญ ๊ฐ๋ฅํ ๋๊น์ง ๋๊ธฐ
element = wait.until(
EC.element_to_be_clickable((By.ID, "submit-btn"))
)
# ์์๊ฐ ๋ณด์ผ ๋๊น์ง ๋๊ธฐ
element = wait.until(
EC.visibility_of_element_located((By.CLASS_NAME, "popup"))
)
# ์ ๋ชฉ์ ํน์ ํ
์คํธ๊ฐ ํฌํจ๋ ๋๊น์ง ๋๊ธฐ
wait.until(EC.title_contains("๊ฒ์ ๊ฒฐ๊ณผ"))
# ์๋ฆผ์ฐฝ์ด ๋ํ๋ ๋๊น์ง ๋๊ธฐ
wait.until(EC.alert_is_present())
๋ช ์์ ๋๊ธฐ ์กฐ๊ฑดโ
from selenium.webdriver.support import expected_conditions as EC
# ์์ฃผ ์ฌ์ฉ๋๋ ์กฐ๊ฑด๋ค
EC.presence_of_element_located() # ์์๊ฐ DOM์ ์กด์ฌ
EC.visibility_of_element_located() # ์์๊ฐ ๋ณด์
EC.element_to_be_clickable() # ์์๊ฐ ํด๋ฆญ ๊ฐ๋ฅ
EC.invisibility_of_element_located() # ์์๊ฐ ์ ๋ณด์
EC.text_to_be_present_in_element() # ์์์ ํน์ ํ
์คํธ ์กด์ฌ
EC.title_contains() # ์ ๋ชฉ์ ํ
์คํธ ํฌํจ
EC.title_is() # ์ ๋ชฉ์ด ์ ํํ ์ผ์น
EC.url_contains() # URL์ ํ
์คํธ ํฌํจ
EC.alert_is_present() # ์๋ฆผ์ฐฝ ์กด์ฌ
EC.frame_to_be_available_and_switch_to_it() # ํ๋ ์ ์ ํ ๊ฐ๋ฅ
๊ณ ๊ธ ๊ธฐ๋ฅโ
์คํฌ๋กคโ
# ํ์ด์ง ๋๊น์ง ์คํฌ๋กค
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# ํน์ ์์น๋ก ์คํฌ๋กค
driver.execute_script("window.scrollTo(0, 500);")
# ํน์ ์์๊น์ง ์คํฌ๋กค
element = driver.find_element(By.ID, "footer")
driver.execute_script("arguments[0].scrollIntoView();", element)
# ๋ฌดํ ์คํฌ๋กค ์ฒ๋ฆฌ
import time
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
# ๋๊น์ง ์คํฌ๋กค
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
# ์๋ก์ด ๋์ด ๊ณ์ฐ
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
๋๋กญ๋ค์ด ์ ํโ
from selenium.webdriver.support.ui import Select
# Select ๊ฐ์ฒด ์์ฑ
select_element = driver.find_element(By.ID, "country")
select = Select(select_element)
# ์ธ๋ฑ์ค๋ก ์ ํ
select.select_by_index(2)
# ๊ฐ์ผ๋ก ์ ํ
select.select_by_value("kr")
# ๋ณด์ด๋ ํ
์คํธ๋ก ์ ํ
select.select_by_visible_text("๋ ํ๋ฏผ๊ตญ")
# ํ์ฌ ์ ํ๋ ์ต์
selected_option = select.first_selected_option
print(selected_option.text)
# ๋ชจ๋ ์ต์
๊ฐ์ ธ์ค๊ธฐ
all_options = select.options
for option in all_options:
print(option.text)
์ฐฝ/ํญ ๊ด๋ฆฌโ
# ํ์ฌ ์ฐฝ ํธ๋ค
current_window = driver.current_window_handle
# ๋ชจ๋ ์ฐฝ ํธ๋ค
all_windows = driver.window_handles
# ์ ํญ ์ด๊ธฐ
driver.execute_script("window.open('https://www.example.com');")
# ์ ์ฐฝ์ผ๋ก ์ ํ
driver.switch_to.window(driver.window_handles[1])
# ์๋ ์ฐฝ์ผ๋ก ๋์๊ฐ๊ธฐ
driver.switch_to.window(current_window)
# ํ์ฌ ์ฐฝ ๋ซ๊ธฐ
driver.close()
iframe ์ฒ๋ฆฌโ
# iframe์ผ๋ก ์ ํ
iframe = driver.find_element(By.ID, "iframe-id")
driver.switch_to.frame(iframe)
# ๋๋ ์ธ๋ฑ์ค๋ก
driver.switch_to.frame(0)
# iframe ๋ด๋ถ ์์ ์ ๊ทผ
element = driver.find_element(By.ID, "element-in-iframe")
# ๊ธฐ๋ณธ ์ปจํ
์ธ ๋ก ๋์๊ฐ๊ธฐ
driver.switch_to.default_content()
์๋ฆผ์ฐฝ ์ฒ๋ฆฌโ
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# ์๋ฆผ์ฐฝ ๋๊ธฐ
wait = WebDriverWait(driver, 10)
alert = wait.until(EC.alert_is_present())
# ์๋ฆผ์ฐฝ ํ
์คํธ ๊ฐ์ ธ์ค๊ธฐ
alert_text = alert.text
print(alert_text)
# ์๋ฆผ์ฐฝ ์๋ฝ (OK ๋ฒํผ)
alert.accept()
# ์๋ฆผ์ฐฝ ์ทจ์ (Cancel ๋ฒํผ)
alert.dismiss()
# Prompt์ ํ
์คํธ ์
๋ ฅ
alert.send_keys("์
๋ ฅํ ํ
์คํธ")
alert.accept()
์คํฌ๋ฆฐ์ทโ
# ์ ์ฒด ํ์ด์ง ์คํฌ๋ฆฐ์ท
driver.save_screenshot("screenshot.png")
# ํน์ ์์๋ง ์คํฌ๋ฆฐ์ท
element = driver.find_element(By.ID, "logo")
element.screenshot("element.png")
# ๋ฐ์ด๋๋ฆฌ ๋ฐ์ดํฐ๋ก ๊ฐ์ ธ์ค๊ธฐ
screenshot = driver.get_screenshot_as_png()
์ฟ ํค ๊ด๋ฆฌโ
# ๋ชจ๋ ์ฟ ํค ๊ฐ์ ธ์ค๊ธฐ
cookies = driver.get_cookies()
print(cookies)
# ํน์ ์ฟ ํค ๊ฐ์ ธ์ค๊ธฐ
cookie = driver.get_cookie("session_id")
# ์ฟ ํค ์ถ๊ฐ
driver.add_cookie({
"name": "test_cookie",
"value": "test_value"
})
# ์ฟ ํค ์ญ์
driver.delete_cookie("cookie_name")
# ๋ชจ๋ ์ฟ ํค ์ญ์
driver.delete_all_cookies()
์ค์ ์์ โ
1. ๋ก๊ทธ์ธ ์๋ํโ
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time
def auto_login(url, username, password):
"""์น์ฌ์ดํธ ์๋ ๋ก๊ทธ์ธ"""
# ๋ธ๋ผ์ฐ์ ์คํ
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
try:
# ๋ก๊ทธ์ธ ํ์ด์ง ์ด๊ธฐ
driver.get(url)
print(f"์ ์: {url}")
# ํ์ด์ง ๋ก๋ฉ ๋๊ธฐ
wait = WebDriverWait(driver, 10)
# ์์ด๋ ์
๋ ฅ
username_field = wait.until(
EC.presence_of_element_located((By.ID, "username"))
)
username_field.clear()
username_field.send_keys(username)
print("์์ด๋ ์
๋ ฅ ์๋ฃ")
# ๋น๋ฐ๋ฒํธ ์
๋ ฅ
password_field = driver.find_element(By.ID, "password")
password_field.clear()
password_field.send_keys(password)
print("๋น๋ฐ๋ฒํธ ์
๋ ฅ ์๋ฃ")
# ๋ก๊ทธ์ธ ๋ฒํผ ํด๋ฆญ
login_button = driver.find_element(By.CSS_SELECTOR, "button[type='submit']")
login_button.click()
print("๋ก๊ทธ์ธ ๋ฒํผ ํด๋ฆญ")
# ๋ก๊ทธ์ธ ์ฑ๊ณต ํ์ธ (URL ๋ณ๊ฒฝ ๋๊ธฐ)
wait.until(EC.url_changes(url))
# ๋ก๊ทธ์ธ ํ ํ์ด์ง ํ์ธ
if "dashboard" in driver.current_url or "home" in driver.current_url:
print("๋ก๊ทธ์ธ ์ฑ๊ณต!")
return driver
else:
print("๋ก๊ทธ์ธ ์คํจ - URL ํ์ธ ํ์")
return None
except Exception as e:
print(f"์ค๋ฅ ๋ฐ์: {e}")
driver.quit()
return None
# ์ฌ์ฉ ์์
driver = auto_login(
url="https://example.com/login",
username="user@example.com",
password="password123"
)
if driver:
# ๋ก๊ทธ์ธ ํ ์์
์ํ
time.sleep(3)
driver.quit()
2. ํผ ์๋ ์์ฑโ
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time
def fill_registration_form(form_data):
"""ํ์๊ฐ์
ํผ ์๋ ์์ฑ"""
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
try:
# ํ์๊ฐ์
ํ์ด์ง ์ด๊ธฐ
driver.get("https://example.com/register")
# ๊ธฐ๋ณธ ์ ๋ณด ์
๋ ฅ
driver.find_element(By.ID, "name").send_keys(form_data['name'])
driver.find_element(By.ID, "email").send_keys(form_data['email'])
driver.find_element(By.ID, "phone").send_keys(form_data['phone'])
# ๋น๋ฐ๋ฒํธ ์
๋ ฅ
driver.find_element(By.ID, "password").send_keys(form_data['password'])
driver.find_element(By.ID, "confirm_password").send_keys(form_data['password'])
# ์ฑ๋ณ ์ ํ (๋ผ๋์ค ๋ฒํผ)
if form_data['gender'] == 'male':
driver.find_element(By.ID, "male").click()
else:
driver.find_element(By.ID, "female").click()
# ์๋
์์ผ ์ ํ (๋๋กญ๋ค์ด)
year_select = Select(driver.find_element(By.ID, "birth_year"))
year_select.select_by_value(form_data['birth_year'])
month_select = Select(driver.find_element(By.ID, "birth_month"))
month_select.select_by_value(form_data['birth_month'])
day_select = Select(driver.find_element(By.ID, "birth_day"))
day_select.select_by_value(form_data['birth_day'])
# ๊ด์ฌ์ฌ ์ ํ (์ฒดํฌ๋ฐ์ค)
for interest in form_data['interests']:
checkbox = driver.find_element(By.CSS_SELECTOR, f"input[value='{interest}']")
if not checkbox.is_selected():
checkbox.click()
# ์ฝ๊ด ๋์
terms_checkbox = driver.find_element(By.ID, "terms")
driver.execute_script("arguments[0].click();", terms_checkbox)
# ์คํฌ๋ฆฐ์ท ์ ์ฅ
driver.save_screenshot("form_filled.png")
print("ํผ ์์ฑ ์๋ฃ - ์คํฌ๋ฆฐ์ท ์ ์ฅ๋จ")
# ์ ์ถ ๋ฒํผ ํด๋ฆญ (์ฃผ์ ์ฒ๋ฆฌ - ์ค์ ๋ก๋ ์ ์ถ ์ ํจ)
# driver.find_element(By.ID, "submit").click()
time.sleep(2)
finally:
driver.quit()
# ์ฌ์ฉ ์์
form_data = {
'name': 'ํ๊ธธ๋',
'email': 'hong@example.com',
'phone': '010-1234-5678',
'password': 'SecurePass123!',
'gender': 'male',
'birth_year': '1990',
'birth_month': '5',
'birth_day': '15',
'interests': ['sports', 'music', 'travel']
}
fill_registration_form(form_data)
3. ์น ์คํฌ๋ํ (๋์ ํ์ด์ง)โ
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time
import csv
def scrape_product_list(url, max_pages=3):
"""์ํ ๋ชฉ๋ก ์คํฌ๋ํ"""
# Headless ๋ชจ๋ (๋ธ๋ผ์ฐ์ ์ฐฝ ์ ๋์)
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument('--no-sandbox')
driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=options
)
products = []
try:
for page in range(1, max_pages + 1):
print(f"ํ์ด์ง {page} ์คํฌ๋ํ ์ค...")
# ํ์ด์ง ๋ก๋
page_url = f"{url}?page={page}"
driver.get(page_url)
# ํ์ด์ง ๋ก๋ฉ ๋๊ธฐ
wait = WebDriverWait(driver, 10)
wait.until(
EC.presence_of_all_elements_located((By.CLASS_NAME, "product-item"))
)
# ์คํฌ๋กคํ์ฌ ๋ชจ๋ ์ด๋ฏธ์ง ๋ก๋
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
# ์ํ ์ ๋ณด ์ถ์ถ
product_elements = driver.find_elements(By.CLASS_NAME, "product-item")
for element in product_elements:
try:
# ์ํ๋ช
name = element.find_element(By.CLASS_NAME, "product-name").text
# ๊ฐ๊ฒฉ
price = element.find_element(By.CLASS_NAME, "product-price").text
price = price.replace('์', '').replace(',', '')
# ํ์
rating = element.find_element(By.CLASS_NAME, "rating").get_attribute("data-rating")
# ๋งํฌ
link = element.find_element(By.TAG_NAME, "a").get_attribute("href")
products.append({
'name': name,
'price': int(price),
'rating': float(rating),
'link': link
})
except Exception as e:
print(f"์ํ ์ถ์ถ ์ค๋ฅ: {e}")
continue
print(f"ํ์ด์ง {page}: {len(product_elements)}๊ฐ ์ํ ์์ง")
# ๋ค์ ํ์ด์ง ๋๊ธฐ
time.sleep(1)
finally:
driver.quit()
return products
def save_to_csv(products, filename):
"""CSV ํ์ผ๋ก ์ ์ฅ"""
with open(filename, 'w', newline='', encoding='utf-8-sig') as f:
writer = csv.DictWriter(f, fieldnames=['name', 'price', 'rating', 'link'])
writer.writeheader()
writer.writerows(products)
print(f"\n์ด {len(products)}๊ฐ ์ํ ์ ์ฅ: {filename}")
# ์ฌ์ฉ ์์
products = scrape_product_list("https://example.com/products", max_pages=3)
save_to_csv(products, "products.csv")
# ๊ฐ๊ฒฉ ์์ผ๋ก ์ ๋ ฌ
products_sorted = sorted(products, key=lambda x: x['price'])
print("\n์ต์ ๊ฐ ์ํ TOP 5:")
for product in products_sorted[:5]:
print(f"{product['name']}: {product['price']:,}์ (ํ์ : {product['rating']})")
4. ํ์ผ ๋ค์ด๋ก๋ ์๋ํโ
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import os
import time
def download_files(url, download_dir):
"""ํ์ผ ์๋ ๋ค์ด๋ก๋"""
# ๋ค์ด๋ก๋ ํด๋ ์์ฑ
os.makedirs(download_dir, exist_ok=True)
# Chrome ์ต์
์ค ์
options = webdriver.ChromeOptions()
prefs = {
"download.default_directory": download_dir,
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"safebrowsing.enabled": True
}
options.add_experimental_option("prefs", prefs)
driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=options
)
try:
driver.get(url)
print(f"์ ์: {url}")
# ๋ค์ด๋ก๋ ๋งํฌ ์ฐพ๊ธฐ
wait = WebDriverWait(driver, 10)
download_links = wait.until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR, "a[download]"))
)
print(f"{len(download_links)}๊ฐ์ ๋ค์ด๋ก๋ ๋งํฌ ๋ฐ๊ฒฌ")
# ๊ฐ ํ์ผ ๋ค์ด๋ก๋
for i, link in enumerate(download_links, 1):
filename = link.get_attribute("download") or f"file_{i}"
print(f"{i}. ๋ค์ด๋ก๋ ์ค: {filename}")
link.click()
time.sleep(2) # ๋ค์ด๋ก๋ ์์ ๋๊ธฐ
# ๋ชจ๋ ๋ค์ด๋ก๋ ์๋ฃ ๋๊ธฐ
print("\n๋ค์ด๋ก๋ ์๋ฃ ๋๊ธฐ ์ค...")
time.sleep(5)
# ๋ค์ด๋ก๋๋ ํ์ผ ํ์ธ
downloaded_files = os.listdir(download_dir)
print(f"\n๋ค์ด๋ก๋ ์๋ฃ: {len(downloaded_files)}๊ฐ ํ์ผ")
for file in downloaded_files:
file_path = os.path.join(download_dir, file)
size_mb = os.path.getsize(file_path) / (1024 * 1024)
print(f" - {file} ({size_mb:.2f} MB)")
finally:
driver.quit()
# ์ฌ์ฉ ์์
download_files(
url="https://example.com/downloads",
download_dir="/Users/username/Downloads/auto_downloads"
)
5. ๋ฐ๋ณต ์์ ์ค์ผ์ค๋งโ
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time
from datetime import datetime
def check_stock_availability(url, product_name):
"""์ฌ๊ณ ํ์ธ ์๋ํ"""
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=options
)
try:
driver.get(url)
# ์ํ ๊ฒ์
search_box = driver.find_element(By.NAME, "q")
search_box.send_keys(product_name)
search_box.submit()
time.sleep(2)
# ์ฌ๊ณ ์ํ ํ์ธ
try:
stock_status = driver.find_element(By.CLASS_NAME, "stock-status").text
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
if "์ฌ๊ณ ์์" in stock_status or "๊ตฌ๋งค๊ฐ๋ฅ" in stock_status:
print(f"[{timestamp}] โ {product_name}: ์ฌ๊ณ ์์!")
# ์๋ฆผ ๋ณด๋ด๊ธฐ (์: ์ด๋ฉ์ผ, ๋ฉ์์ง ๋ฑ)
send_notification(product_name)
return True
else:
print(f"[{timestamp}] โ {product_name}: ํ์ ")
return False
except:
print(f"[{timestamp}] ? {product_name}: ์ํ ํ์ธ ๋ถ๊ฐ")
return None
finally:
driver.quit()
def send_notification(product_name):
"""์๋ฆผ ์ ์ก (์์ )"""
print(f"๐ ์๋ฆผ: {product_name} ์ฌ๊ณ ํ๋ณด!")
# ์ค์ ๋ก๋ ์ด๋ฉ์ผ, SMS, ๋ฉ์ ์ ๋ฑ์ผ๋ก ์๋ฆผ
def monitor_stock(url, product_name, interval=60):
"""์ฃผ๊ธฐ์ ์ผ๋ก ์ฌ๊ณ ํ์ธ"""
print(f"์ฌ๊ณ ๋ชจ๋ํฐ๋ง ์์: {product_name}")
print(f"ํ์ธ ์ฃผ๊ธฐ: {interval}์ด\n")
while True:
in_stock = check_stock_availability(url, product_name)
if in_stock:
print("\n์ฌ๊ณ ํ์ธ ์๋ฃ - ๋ชจ๋ํฐ๋ง ์ข
๋ฃ")
break
# ๋ค์ ํ์ธ๊น์ง ๋๊ธฐ
time.sleep(interval)
# ์ฌ์ฉ ์์
monitor_stock(
url="https://example.com",
product_name="์ธ๊ธฐ ์ํ",
interval=60 # 60์ด๋ง๋ค ํ์ธ
)
Headless ๋ชจ๋โ
๋ธ๋ผ์ฐ์ ์ฐฝ์ ๋์ฐ์ง ์๊ณ ๋ฐฑ๊ทธ๋ผ์ด๋์์ ์คํํฉ๋๋ค.
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
# Chrome Headless ๋ชจ๋
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument('--no-sandbox')
options.add_argument('--window-size=1920,1080')
driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=options
)
# ์ฌ์ฉ
driver.get("https://www.example.com")
print(driver.title)
driver.quit()
User Agent ๋ณ๊ฒฝโ
๋ด ํ์ง๋ฅผ ํผํ๊ธฐ ์ํด User Agent๋ฅผ ๋ณ๊ฒฝํ ์ ์์ต๋๋ค.
options = webdriver.ChromeOptions()
# User Agent ์ค์
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
options.add_argument(f'user-agent={user_agent}')
# ์๋ํ ํ์ง ๋ฐฉ์ง
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=options
)
# WebDriver ์์ฑ ์จ๊ธฐ๊ธฐ
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
์์ฃผ ๋ฌป๋ ์ง๋ฌธโ
Q1. Selenium์ด ๋๋ฆฐ๋ฐ ๋ ๋น ๋ฅด๊ฒ ํ ์ ์๋์?โ
A: Headless ๋ชจ๋๋ฅผ ์ฌ์ฉํ๊ณ , ์ด๋ฏธ์ง ๋ก๋ฉ์ ๋นํ์ฑํํ์ธ์.
options = webdriver.ChromeOptions()
options.add_argument('--headless')
# ์ด๋ฏธ์ง ๋ก๋ฉ ๋นํ์ฑํ
prefs = {'profile.default_content_setting_values': {'images': 2}}
options.add_experimental_option('prefs', prefs)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
Q2. ์์๋ฅผ ์ฐพ์ ์ ์๋ค๋ ์ค๋ฅ๊ฐ ์์ฃผ ๋์.โ
A: ๋ช ์์ ๋๊ธฐ๋ฅผ ์ฌ์ฉํ๊ณ , ์ฌ๋ฐ๋ฅธ ์ ํ์๋ฅผ ์ฌ์ฉํ์ธ์.
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# ๋๊ธฐ ์๊ฐ ์ฆ๊ฐ
wait = WebDriverWait(driver, 20)
# ์์๊ฐ ํด๋ฆญ ๊ฐ๋ฅํ ๋๊น์ง ๋๊ธฐ
element = wait.until(EC.element_to_be_clickable((By.ID, "button")))
element.click()
Q3. Selenium vs BeautifulSoup ์ด๋ค ๊ฒ์ ์ฌ์ฉํด์ผ ํ๋์?โ
A: ์ ์ ํ์ด์ง๋ BeautifulSoup, ๋์ ํ์ด์ง๋ Selenium์ ์ฌ์ฉํ์ธ์.
# ์ ์ ํ์ด์ง (๋น ๋ฆ)
import requests
from bs4 import BeautifulSoup
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
# ๋์ ํ์ด์ง (JavaScript ์คํ ํ์)
from selenium import webdriver
driver = webdriver.Chrome()
driver.get(url)
Q4. ์น์ฌ์ดํธ๊ฐ ๋ด์ ์ฐจ๋จํ๋ ๊ฒ ๊ฐ์์.โ
A: ๋ค์ ๋ฐฉ๋ฒ๋ค์ ์๋ํด๋ณด์ธ์:
options = webdriver.ChromeOptions()
# User Agent ๋ณ๊ฒฝ
options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')
# ์๋ํ ํ๋๊ทธ ์ ๊ฑฐ
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options)
# WebDriver ์์ฑ ์จ๊ธฐ๊ธฐ
driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
'source': '''
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
'''
})
# ์์ฒญ ์ฌ์ด์ ๋๋ค ๋๊ธฐ
import random
time.sleep(random.uniform(1, 3))
Q5. ์ฌ๋ฌ ๋ธ๋ผ์ฐ์ ๋ฅผ ๋์์ ์คํํ ์ ์๋์?โ
A: ๋ค, ๋ฉํฐ์ค๋ ๋ฉ์ด๋ ๋ฉํฐํ๋ก์ธ์ฑ์ ์ฌ์ฉํ์ธ์.
from concurrent.futures import ThreadPoolExecutor
from selenium import webdriver
def scrape_page(url):
driver = webdriver.Chrome()
driver.get(url)
# ์์
์ํ
result = driver.title
driver.quit()
return result
# ๋์์ 5๊ฐ์ ๋ธ๋ผ์ฐ์ ์คํ
urls = ['url1', 'url2', 'url3', 'url4', 'url5']
with ThreadPoolExecutor(max_workers=5) as executor:
results = executor.map(scrape_page, urls)
for result in results:
print(result)
๋ค์ ๋จ๊ณโ
์น ์๋ํ๋ฅผ ๋ฐฐ์ ๋ค๋ฉด, ๋ค์ ์ฃผ์ ๋ก ๋์ด๊ฐ์ธ์:
- ์์ ์ค์ผ์ค๋ง: ์ ๊ธฐ์ ์ธ ์น ํฌ๋กค๋ง ์๋ํํ๊ธฐ
- API ํ์ฉ: requests๋ก ํจ์จ์ ์ธ ๋ฐ์ดํฐ ์์งํ๊ธฐ
- ๋ฐ์ดํฐ ์ ์ฅ: ์์งํ ๋ฐ์ดํฐ๋ฅผ ๋ฐ์ดํฐ๋ฒ ์ด์ค์ ์ ์ฅํ๊ธฐ
์น ์๋ํ๋ ๋ฐ์ดํฐ ์์ง, ํ ์คํธ, ๋ฐ๋ณต ์์ ์๋ํ์ ๋งค์ฐ ์ ์ฉํ ๊ธฐ์ ์ ๋๋ค. ์ค์ ํ๋ก์ ํธ์ ์ ์ฉํด๋ณด์ธ์!