Zum Hauptinhalt springen

์›น ์ž๋™ํ™”

Selenium์„ ์‚ฌ์šฉํ•˜๋ฉด ์›น ๋ธŒ๋ผ์šฐ์ €๋ฅผ ํ”„๋กœ๊ทธ๋ž˜๋ฐ ๋ฐฉ์‹์œผ๋กœ ์ œ์–ดํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ๋ฐ˜๋ณต์ ์ธ ์›น ์ž‘์—…, ํ…Œ์ŠคํŠธ, ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘ ๋“ฑ์„ ์ž๋™ํ™”ํ•ด๋ณด๊ฒ ์Šต๋‹ˆ๋‹ค.

์„ค์น˜ํ•˜๊ธฐโ€‹

Selenium ์„ค์น˜โ€‹

# Selenium ์„ค์น˜
pip install selenium

# WebDriver Manager (๋“œ๋ผ์ด๋ฒ„ ์ž๋™ ๊ด€๋ฆฌ)
pip install webdriver-manager

๋ธŒ๋ผ์šฐ์ € ๋“œ๋ผ์ด๋ฒ„โ€‹

Selenium์€ ๋ธŒ๋ผ์šฐ์ €๋ฅผ ์ œ์–ดํ•˜๊ธฐ ์œ„ํ•ด ๋“œ๋ผ์ด๋ฒ„๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค.

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

# Chrome ๋“œ๋ผ์ด๋ฒ„ ์ž๋™ ์„ค์น˜ ๋ฐ ์‹คํ–‰
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# ๋˜๋Š” Firefox
from webdriver_manager.firefox import GeckoDriverManager
driver = webdriver.Firefox(service=Service(GeckoDriverManager().install()))

๊ธฐ๋ณธ ์‚ฌ์šฉ๋ฒ•โ€‹

๋ธŒ๋ผ์šฐ์ € ์—ด๊ณ  ๋‹ซ๊ธฐโ€‹

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

# ๋ธŒ๋ผ์šฐ์ € ์‹คํ–‰
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# ์›น ํŽ˜์ด์ง€ ์—ด๊ธฐ
driver.get("https://www.example.com")

# ํŽ˜์ด์ง€ ์ œ๋ชฉ ์ถœ๋ ฅ
print(driver.title)

# ํ˜„์žฌ URL ํ™•์ธ
print(driver.current_url)

# ๋ธŒ๋ผ์šฐ์ € ๋‹ซ๊ธฐ
driver.quit() # ๋ชจ๋“  ์ฐฝ ๋‹ซ๊ธฐ
# driver.close() # ํ˜„์žฌ ์ฐฝ๋งŒ ๋‹ซ๊ธฐ

์š”์†Œ ์ฐพ๊ธฐโ€‹

from selenium.webdriver.common.by import By

# ID๋กœ ์ฐพ๊ธฐ
element = driver.find_element(By.ID, "username")

# ํด๋ž˜์Šค๋ช…์œผ๋กœ ์ฐพ๊ธฐ
element = driver.find_element(By.CLASS_NAME, "btn-primary")

# ํƒœ๊ทธ๋ช…์œผ๋กœ ์ฐพ๊ธฐ
element = driver.find_element(By.TAG_NAME, "h1")

# CSS ์„ ํƒ์ž๋กœ ์ฐพ๊ธฐ
element = driver.find_element(By.CSS_SELECTOR, "div.container > p")

# XPath๋กœ ์ฐพ๊ธฐ
element = driver.find_element(By.XPATH, "//button[@type='submit']")

# ๋งํฌ ํ…์ŠคํŠธ๋กœ ์ฐพ๊ธฐ
element = driver.find_element(By.LINK_TEXT, "๋กœ๊ทธ์ธ")
element = driver.find_element(By.PARTIAL_LINK_TEXT, "์ž์„ธํžˆ")

# ์—ฌ๋Ÿฌ ์š”์†Œ ์ฐพ๊ธฐ (๋ฆฌ์ŠคํŠธ ๋ฐ˜ํ™˜)
elements = driver.find_elements(By.CLASS_NAME, "product-item")

์š”์†Œ์™€ ์ƒํ˜ธ์ž‘์šฉโ€‹

from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

# ํ…์ŠคํŠธ ์ž…๋ ฅ
search_box = driver.find_element(By.NAME, "q")
search_box.send_keys("Python Selenium")

# Enter ํ‚ค ์ž…๋ ฅ
search_box.send_keys(Keys.RETURN)

# ๋ฒ„ํŠผ ํด๋ฆญ
button = driver.find_element(By.ID, "submit-btn")
button.click()

# ํ…์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ
text = element.text

# ์†์„ฑ ๊ฐ€์ ธ์˜ค๊ธฐ
href = element.get_attribute("href")
class_name = element.get_attribute("class")

# ์ž…๋ ฅ ํ•„๋“œ ๋น„์šฐ๊ธฐ
search_box.clear()

# ์ฒดํฌ๋ฐ•์Šค ์„ ํƒ ์—ฌ๋ถ€ ํ™•์ธ
is_selected = checkbox.is_selected()

# ์š”์†Œ ํ‘œ์‹œ ์—ฌ๋ถ€ ํ™•์ธ
is_displayed = element.is_displayed()

# ์š”์†Œ ํ™œ์„ฑํ™” ์—ฌ๋ถ€ ํ™•์ธ
is_enabled = element.is_enabled()

๋Œ€๊ธฐ ์ฒ˜๋ฆฌโ€‹

์•”๋ฌต์  ๋Œ€๊ธฐโ€‹

# ํŽ˜์ด์ง€ ๋กœ๋”ฉ์„ ์ตœ๋Œ€ 10์ดˆ๊นŒ์ง€ ๊ธฐ๋‹ค๋ฆผ
driver.implicitly_wait(10)

# ์ดํ›„ ๋ชจ๋“  ์š”์†Œ ์ฐพ๊ธฐ์— ์ ์šฉ๋จ
element = driver.find_element(By.ID, "dynamic-content")

๋ช…์‹œ์  ๋Œ€๊ธฐโ€‹

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

# ํŠน์ • ์กฐ๊ฑด์ด ๋งŒ์กฑ๋  ๋•Œ๊นŒ์ง€ ์ตœ๋Œ€ 10์ดˆ ๋Œ€๊ธฐ
wait = WebDriverWait(driver, 10)

# ์š”์†Œ๊ฐ€ ๋‚˜ํƒ€๋‚  ๋•Œ๊นŒ์ง€ ๋Œ€๊ธฐ
element = wait.until(
EC.presence_of_element_located((By.ID, "dynamic-element"))
)

# ์š”์†Œ๊ฐ€ ํด๋ฆญ ๊ฐ€๋Šฅํ•  ๋•Œ๊นŒ์ง€ ๋Œ€๊ธฐ
element = wait.until(
EC.element_to_be_clickable((By.ID, "submit-btn"))
)

# ์š”์†Œ๊ฐ€ ๋ณด์ผ ๋•Œ๊นŒ์ง€ ๋Œ€๊ธฐ
element = wait.until(
EC.visibility_of_element_located((By.CLASS_NAME, "popup"))
)

# ์ œ๋ชฉ์— ํŠน์ • ํ…์ŠคํŠธ๊ฐ€ ํฌํ•จ๋  ๋•Œ๊นŒ์ง€ ๋Œ€๊ธฐ
wait.until(EC.title_contains("๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ"))

# ์•Œ๋ฆผ์ฐฝ์ด ๋‚˜ํƒ€๋‚  ๋•Œ๊นŒ์ง€ ๋Œ€๊ธฐ
wait.until(EC.alert_is_present())

๋ช…์‹œ์  ๋Œ€๊ธฐ ์กฐ๊ฑดโ€‹

from selenium.webdriver.support import expected_conditions as EC

# ์ž์ฃผ ์‚ฌ์šฉ๋˜๋Š” ์กฐ๊ฑด๋“ค
EC.presence_of_element_located() # ์š”์†Œ๊ฐ€ DOM์— ์กด์žฌ
EC.visibility_of_element_located() # ์š”์†Œ๊ฐ€ ๋ณด์ž„
EC.element_to_be_clickable() # ์š”์†Œ๊ฐ€ ํด๋ฆญ ๊ฐ€๋Šฅ
EC.invisibility_of_element_located() # ์š”์†Œ๊ฐ€ ์•ˆ ๋ณด์ž„
EC.text_to_be_present_in_element() # ์š”์†Œ์— ํŠน์ • ํ…์ŠคํŠธ ์กด์žฌ
EC.title_contains() # ์ œ๋ชฉ์— ํ…์ŠคํŠธ ํฌํ•จ
EC.title_is() # ์ œ๋ชฉ์ด ์ •ํ™•ํžˆ ์ผ์น˜
EC.url_contains() # URL์— ํ…์ŠคํŠธ ํฌํ•จ
EC.alert_is_present() # ์•Œ๋ฆผ์ฐฝ ์กด์žฌ
EC.frame_to_be_available_and_switch_to_it() # ํ”„๋ ˆ์ž„ ์ „ํ™˜ ๊ฐ€๋Šฅ

๊ณ ๊ธ‰ ๊ธฐ๋Šฅโ€‹

์Šคํฌ๋กคโ€‹

# ํŽ˜์ด์ง€ ๋๊นŒ์ง€ ์Šคํฌ๋กค
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

# ํŠน์ • ์œ„์น˜๋กœ ์Šคํฌ๋กค
driver.execute_script("window.scrollTo(0, 500);")

# ํŠน์ • ์š”์†Œ๊นŒ์ง€ ์Šคํฌ๋กค
element = driver.find_element(By.ID, "footer")
driver.execute_script("arguments[0].scrollIntoView();", element)

# ๋ฌดํ•œ ์Šคํฌ๋กค ์ฒ˜๋ฆฌ
import time

last_height = driver.execute_script("return document.body.scrollHeight")

while True:
# ๋๊นŒ์ง€ ์Šคํฌ๋กค
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)

# ์ƒˆ๋กœ์šด ๋†’์ด ๊ณ„์‚ฐ
new_height = driver.execute_script("return document.body.scrollHeight")

if new_height == last_height:
break

last_height = new_height

๋“œ๋กญ๋‹ค์šด ์„ ํƒโ€‹

from selenium.webdriver.support.ui import Select

# Select ๊ฐ์ฒด ์ƒ์„ฑ
select_element = driver.find_element(By.ID, "country")
select = Select(select_element)

# ์ธ๋ฑ์Šค๋กœ ์„ ํƒ
select.select_by_index(2)

# ๊ฐ’์œผ๋กœ ์„ ํƒ
select.select_by_value("kr")

# ๋ณด์ด๋Š” ํ…์ŠคํŠธ๋กœ ์„ ํƒ
select.select_by_visible_text("๋Œ€ํ•œ๋ฏผ๊ตญ")

# ํ˜„์žฌ ์„ ํƒ๋œ ์˜ต์…˜
selected_option = select.first_selected_option
print(selected_option.text)

# ๋ชจ๋“  ์˜ต์…˜ ๊ฐ€์ ธ์˜ค๊ธฐ
all_options = select.options
for option in all_options:
print(option.text)

์ฐฝ/ํƒญ ๊ด€๋ฆฌโ€‹

# ํ˜„์žฌ ์ฐฝ ํ•ธ๋“ค
current_window = driver.current_window_handle

# ๋ชจ๋“  ์ฐฝ ํ•ธ๋“ค
all_windows = driver.window_handles

# ์ƒˆ ํƒญ ์—ด๊ธฐ
driver.execute_script("window.open('https://www.example.com');")

# ์ƒˆ ์ฐฝ์œผ๋กœ ์ „ํ™˜
driver.switch_to.window(driver.window_handles[1])

# ์›๋ž˜ ์ฐฝ์œผ๋กœ ๋Œ์•„๊ฐ€๊ธฐ
driver.switch_to.window(current_window)

# ํ˜„์žฌ ์ฐฝ ๋‹ซ๊ธฐ
driver.close()

iframe ์ฒ˜๋ฆฌโ€‹

# iframe์œผ๋กœ ์ „ํ™˜
iframe = driver.find_element(By.ID, "iframe-id")
driver.switch_to.frame(iframe)

# ๋˜๋Š” ์ธ๋ฑ์Šค๋กœ
driver.switch_to.frame(0)

# iframe ๋‚ด๋ถ€ ์š”์†Œ ์ ‘๊ทผ
element = driver.find_element(By.ID, "element-in-iframe")

# ๊ธฐ๋ณธ ์ปจํ…์ธ ๋กœ ๋Œ์•„๊ฐ€๊ธฐ
driver.switch_to.default_content()

์•Œ๋ฆผ์ฐฝ ์ฒ˜๋ฆฌโ€‹

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# ์•Œ๋ฆผ์ฐฝ ๋Œ€๊ธฐ
wait = WebDriverWait(driver, 10)
alert = wait.until(EC.alert_is_present())

# ์•Œ๋ฆผ์ฐฝ ํ…์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ
alert_text = alert.text
print(alert_text)

# ์•Œ๋ฆผ์ฐฝ ์ˆ˜๋ฝ (OK ๋ฒ„ํŠผ)
alert.accept()

# ์•Œ๋ฆผ์ฐฝ ์ทจ์†Œ (Cancel ๋ฒ„ํŠผ)
alert.dismiss()

# Prompt์— ํ…์ŠคํŠธ ์ž…๋ ฅ
alert.send_keys("์ž…๋ ฅํ•  ํ…์ŠคํŠธ")
alert.accept()

์Šคํฌ๋ฆฐ์ƒทโ€‹

# ์ „์ฒด ํŽ˜์ด์ง€ ์Šคํฌ๋ฆฐ์ƒท
driver.save_screenshot("screenshot.png")

# ํŠน์ • ์š”์†Œ๋งŒ ์Šคํฌ๋ฆฐ์ƒท
element = driver.find_element(By.ID, "logo")
element.screenshot("element.png")

# ๋ฐ”์ด๋„ˆ๋ฆฌ ๋ฐ์ดํ„ฐ๋กœ ๊ฐ€์ ธ์˜ค๊ธฐ
screenshot = driver.get_screenshot_as_png()

์ฟ ํ‚ค ๊ด€๋ฆฌโ€‹

# ๋ชจ๋“  ์ฟ ํ‚ค ๊ฐ€์ ธ์˜ค๊ธฐ
cookies = driver.get_cookies()
print(cookies)

# ํŠน์ • ์ฟ ํ‚ค ๊ฐ€์ ธ์˜ค๊ธฐ
cookie = driver.get_cookie("session_id")

# ์ฟ ํ‚ค ์ถ”๊ฐ€
driver.add_cookie({
"name": "test_cookie",
"value": "test_value"
})

# ์ฟ ํ‚ค ์‚ญ์ œ
driver.delete_cookie("cookie_name")

# ๋ชจ๋“  ์ฟ ํ‚ค ์‚ญ์ œ
driver.delete_all_cookies()

์‹ค์ „ ์˜ˆ์ œโ€‹

1. ๋กœ๊ทธ์ธ ์ž๋™ํ™”โ€‹

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time

def auto_login(url, username, password):
"""์›น์‚ฌ์ดํŠธ ์ž๋™ ๋กœ๊ทธ์ธ"""

# ๋ธŒ๋ผ์šฐ์ € ์‹คํ–‰
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

try:
# ๋กœ๊ทธ์ธ ํŽ˜์ด์ง€ ์—ด๊ธฐ
driver.get(url)
print(f"์ ‘์†: {url}")

# ํŽ˜์ด์ง€ ๋กœ๋”ฉ ๋Œ€๊ธฐ
wait = WebDriverWait(driver, 10)

# ์•„์ด๋”” ์ž…๋ ฅ
username_field = wait.until(
EC.presence_of_element_located((By.ID, "username"))
)
username_field.clear()
username_field.send_keys(username)
print("์•„์ด๋”” ์ž…๋ ฅ ์™„๋ฃŒ")

# ๋น„๋ฐ€๋ฒˆํ˜ธ ์ž…๋ ฅ
password_field = driver.find_element(By.ID, "password")
password_field.clear()
password_field.send_keys(password)
print("๋น„๋ฐ€๋ฒˆํ˜ธ ์ž…๋ ฅ ์™„๋ฃŒ")

# ๋กœ๊ทธ์ธ ๋ฒ„ํŠผ ํด๋ฆญ
login_button = driver.find_element(By.CSS_SELECTOR, "button[type='submit']")
login_button.click()
print("๋กœ๊ทธ์ธ ๋ฒ„ํŠผ ํด๋ฆญ")

# ๋กœ๊ทธ์ธ ์„ฑ๊ณต ํ™•์ธ (URL ๋ณ€๊ฒฝ ๋Œ€๊ธฐ)
wait.until(EC.url_changes(url))

# ๋กœ๊ทธ์ธ ํ›„ ํŽ˜์ด์ง€ ํ™•์ธ
if "dashboard" in driver.current_url or "home" in driver.current_url:
print("๋กœ๊ทธ์ธ ์„ฑ๊ณต!")
return driver
else:
print("๋กœ๊ทธ์ธ ์‹คํŒจ - URL ํ™•์ธ ํ•„์š”")
return None

except Exception as e:
print(f"์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
driver.quit()
return None

# ์‚ฌ์šฉ ์˜ˆ์ œ
driver = auto_login(
url="https://example.com/login",
username="user@example.com",
password="password123"
)

if driver:
# ๋กœ๊ทธ์ธ ํ›„ ์ž‘์—… ์ˆ˜ํ–‰
time.sleep(3)
driver.quit()

2. ํผ ์ž๋™ ์ž‘์„ฑโ€‹

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time

def fill_registration_form(form_data):
"""ํšŒ์›๊ฐ€์ž… ํผ ์ž๋™ ์ž‘์„ฑ"""

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

try:
# ํšŒ์›๊ฐ€์ž… ํŽ˜์ด์ง€ ์—ด๊ธฐ
driver.get("https://example.com/register")

# ๊ธฐ๋ณธ ์ •๋ณด ์ž…๋ ฅ
driver.find_element(By.ID, "name").send_keys(form_data['name'])
driver.find_element(By.ID, "email").send_keys(form_data['email'])
driver.find_element(By.ID, "phone").send_keys(form_data['phone'])

# ๋น„๋ฐ€๋ฒˆํ˜ธ ์ž…๋ ฅ
driver.find_element(By.ID, "password").send_keys(form_data['password'])
driver.find_element(By.ID, "confirm_password").send_keys(form_data['password'])

# ์„ฑ๋ณ„ ์„ ํƒ (๋ผ๋””์˜ค ๋ฒ„ํŠผ)
if form_data['gender'] == 'male':
driver.find_element(By.ID, "male").click()
else:
driver.find_element(By.ID, "female").click()

# ์ƒ๋…„์›”์ผ ์„ ํƒ (๋“œ๋กญ๋‹ค์šด)
year_select = Select(driver.find_element(By.ID, "birth_year"))
year_select.select_by_value(form_data['birth_year'])

month_select = Select(driver.find_element(By.ID, "birth_month"))
month_select.select_by_value(form_data['birth_month'])

day_select = Select(driver.find_element(By.ID, "birth_day"))
day_select.select_by_value(form_data['birth_day'])

# ๊ด€์‹ฌ์‚ฌ ์„ ํƒ (์ฒดํฌ๋ฐ•์Šค)
for interest in form_data['interests']:
checkbox = driver.find_element(By.CSS_SELECTOR, f"input[value='{interest}']")
if not checkbox.is_selected():
checkbox.click()

# ์•ฝ๊ด€ ๋™์˜
terms_checkbox = driver.find_element(By.ID, "terms")
driver.execute_script("arguments[0].click();", terms_checkbox)

# ์Šคํฌ๋ฆฐ์ƒท ์ €์žฅ
driver.save_screenshot("form_filled.png")
print("ํผ ์ž‘์„ฑ ์™„๋ฃŒ - ์Šคํฌ๋ฆฐ์ƒท ์ €์žฅ๋จ")

# ์ œ์ถœ ๋ฒ„ํŠผ ํด๋ฆญ (์ฃผ์„ ์ฒ˜๋ฆฌ - ์‹ค์ œ๋กœ๋Š” ์ œ์ถœ ์•ˆ ํ•จ)
# driver.find_element(By.ID, "submit").click()

time.sleep(2)

finally:
driver.quit()

# ์‚ฌ์šฉ ์˜ˆ์ œ
form_data = {
'name': 'ํ™๊ธธ๋™',
'email': 'hong@example.com',
'phone': '010-1234-5678',
'password': 'SecurePass123!',
'gender': 'male',
'birth_year': '1990',
'birth_month': '5',
'birth_day': '15',
'interests': ['sports', 'music', 'travel']
}

fill_registration_form(form_data)

3. ์›น ์Šคํฌ๋ž˜ํ•‘ (๋™์  ํŽ˜์ด์ง€)โ€‹

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time
import csv

def scrape_product_list(url, max_pages=3):
"""์ƒํ’ˆ ๋ชฉ๋ก ์Šคํฌ๋ž˜ํ•‘"""

# Headless ๋ชจ๋“œ (๋ธŒ๋ผ์šฐ์ € ์ฐฝ ์•ˆ ๋„์›€)
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument('--no-sandbox')

driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=options
)

products = []

try:
for page in range(1, max_pages + 1):
print(f"ํŽ˜์ด์ง€ {page} ์Šคํฌ๋ž˜ํ•‘ ์ค‘...")

# ํŽ˜์ด์ง€ ๋กœ๋“œ
page_url = f"{url}?page={page}"
driver.get(page_url)

# ํŽ˜์ด์ง€ ๋กœ๋”ฉ ๋Œ€๊ธฐ
wait = WebDriverWait(driver, 10)
wait.until(
EC.presence_of_all_elements_located((By.CLASS_NAME, "product-item"))
)

# ์Šคํฌ๋กคํ•˜์—ฌ ๋ชจ๋“  ์ด๋ฏธ์ง€ ๋กœ๋“œ
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)

# ์ƒํ’ˆ ์ •๋ณด ์ถ”์ถœ
product_elements = driver.find_elements(By.CLASS_NAME, "product-item")

for element in product_elements:
try:
# ์ƒํ’ˆ๋ช…
name = element.find_element(By.CLASS_NAME, "product-name").text

# ๊ฐ€๊ฒฉ
price = element.find_element(By.CLASS_NAME, "product-price").text
price = price.replace('์›', '').replace(',', '')

# ํ‰์ 
rating = element.find_element(By.CLASS_NAME, "rating").get_attribute("data-rating")

# ๋งํฌ
link = element.find_element(By.TAG_NAME, "a").get_attribute("href")

products.append({
'name': name,
'price': int(price),
'rating': float(rating),
'link': link
})

except Exception as e:
print(f"์ƒํ’ˆ ์ถ”์ถœ ์˜ค๋ฅ˜: {e}")
continue

print(f"ํŽ˜์ด์ง€ {page}: {len(product_elements)}๊ฐœ ์ƒํ’ˆ ์ˆ˜์ง‘")

# ๋‹ค์Œ ํŽ˜์ด์ง€ ๋Œ€๊ธฐ
time.sleep(1)

finally:
driver.quit()

return products

def save_to_csv(products, filename):
"""CSV ํŒŒ์ผ๋กœ ์ €์žฅ"""
with open(filename, 'w', newline='', encoding='utf-8-sig') as f:
writer = csv.DictWriter(f, fieldnames=['name', 'price', 'rating', 'link'])
writer.writeheader()
writer.writerows(products)

print(f"\n์ด {len(products)}๊ฐœ ์ƒํ’ˆ ์ €์žฅ: {filename}")

# ์‚ฌ์šฉ ์˜ˆ์ œ
products = scrape_product_list("https://example.com/products", max_pages=3)
save_to_csv(products, "products.csv")

# ๊ฐ€๊ฒฉ ์ˆœ์œผ๋กœ ์ •๋ ฌ
products_sorted = sorted(products, key=lambda x: x['price'])
print("\n์ตœ์ €๊ฐ€ ์ƒํ’ˆ TOP 5:")
for product in products_sorted[:5]:
print(f"{product['name']}: {product['price']:,}์› (ํ‰์ : {product['rating']})")

4. ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ ์ž๋™ํ™”โ€‹

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import os
import time

def download_files(url, download_dir):
"""ํŒŒ์ผ ์ž๋™ ๋‹ค์šด๋กœ๋“œ"""

# ๋‹ค์šด๋กœ๋“œ ํด๋” ์ƒ์„ฑ
os.makedirs(download_dir, exist_ok=True)

# Chrome ์˜ต์…˜ ์„ค์ •
options = webdriver.ChromeOptions()
prefs = {
"download.default_directory": download_dir,
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"safebrowsing.enabled": True
}
options.add_experimental_option("prefs", prefs)

driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=options
)

try:
driver.get(url)
print(f"์ ‘์†: {url}")

# ๋‹ค์šด๋กœ๋“œ ๋งํฌ ์ฐพ๊ธฐ
wait = WebDriverWait(driver, 10)
download_links = wait.until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR, "a[download]"))
)

print(f"{len(download_links)}๊ฐœ์˜ ๋‹ค์šด๋กœ๋“œ ๋งํฌ ๋ฐœ๊ฒฌ")

# ๊ฐ ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ
for i, link in enumerate(download_links, 1):
filename = link.get_attribute("download") or f"file_{i}"
print(f"{i}. ๋‹ค์šด๋กœ๋“œ ์ค‘: {filename}")

link.click()
time.sleep(2) # ๋‹ค์šด๋กœ๋“œ ์‹œ์ž‘ ๋Œ€๊ธฐ

# ๋ชจ๋“  ๋‹ค์šด๋กœ๋“œ ์™„๋ฃŒ ๋Œ€๊ธฐ
print("\n๋‹ค์šด๋กœ๋“œ ์™„๋ฃŒ ๋Œ€๊ธฐ ์ค‘...")
time.sleep(5)

# ๋‹ค์šด๋กœ๋“œ๋œ ํŒŒ์ผ ํ™•์ธ
downloaded_files = os.listdir(download_dir)
print(f"\n๋‹ค์šด๋กœ๋“œ ์™„๋ฃŒ: {len(downloaded_files)}๊ฐœ ํŒŒ์ผ")

for file in downloaded_files:
file_path = os.path.join(download_dir, file)
size_mb = os.path.getsize(file_path) / (1024 * 1024)
print(f" - {file} ({size_mb:.2f} MB)")

finally:
driver.quit()

# ์‚ฌ์šฉ ์˜ˆ์ œ
download_files(
url="https://example.com/downloads",
download_dir="/Users/username/Downloads/auto_downloads"
)

5. ๋ฐ˜๋ณต ์ž‘์—… ์Šค์ผ€์ค„๋งโ€‹

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time
from datetime import datetime

def check_stock_availability(url, product_name):
"""์žฌ๊ณ  ํ™•์ธ ์ž๋™ํ™”"""

options = webdriver.ChromeOptions()
options.add_argument('--headless')

driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=options
)

try:
driver.get(url)

# ์ƒํ’ˆ ๊ฒ€์ƒ‰
search_box = driver.find_element(By.NAME, "q")
search_box.send_keys(product_name)
search_box.submit()

time.sleep(2)

# ์žฌ๊ณ  ์ƒํƒœ ํ™•์ธ
try:
stock_status = driver.find_element(By.CLASS_NAME, "stock-status").text

timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

if "์žฌ๊ณ ์žˆ์Œ" in stock_status or "๊ตฌ๋งค๊ฐ€๋Šฅ" in stock_status:
print(f"[{timestamp}] โœ“ {product_name}: ์žฌ๊ณ  ์žˆ์Œ!")
# ์•Œ๋ฆผ ๋ณด๋‚ด๊ธฐ (์˜ˆ: ์ด๋ฉ”์ผ, ๋ฉ”์‹œ์ง€ ๋“ฑ)
send_notification(product_name)
return True
else:
print(f"[{timestamp}] โœ— {product_name}: ํ’ˆ์ ˆ")
return False

except:
print(f"[{timestamp}] ? {product_name}: ์ƒํƒœ ํ™•์ธ ๋ถˆ๊ฐ€")
return None

finally:
driver.quit()

def send_notification(product_name):
"""์•Œ๋ฆผ ์ „์†ก (์˜ˆ์ œ)"""
print(f"๐Ÿ”” ์•Œ๋ฆผ: {product_name} ์žฌ๊ณ  ํ™•๋ณด!")
# ์‹ค์ œ๋กœ๋Š” ์ด๋ฉ”์ผ, SMS, ๋ฉ”์‹ ์ € ๋“ฑ์œผ๋กœ ์•Œ๋ฆผ

def monitor_stock(url, product_name, interval=60):
"""์ฃผ๊ธฐ์ ์œผ๋กœ ์žฌ๊ณ  ํ™•์ธ"""

print(f"์žฌ๊ณ  ๋ชจ๋‹ˆํ„ฐ๋ง ์‹œ์ž‘: {product_name}")
print(f"ํ™•์ธ ์ฃผ๊ธฐ: {interval}์ดˆ\n")

while True:
in_stock = check_stock_availability(url, product_name)

if in_stock:
print("\n์žฌ๊ณ  ํ™•์ธ ์™„๋ฃŒ - ๋ชจ๋‹ˆํ„ฐ๋ง ์ข…๋ฃŒ")
break

# ๋‹ค์Œ ํ™•์ธ๊นŒ์ง€ ๋Œ€๊ธฐ
time.sleep(interval)

# ์‚ฌ์šฉ ์˜ˆ์ œ
monitor_stock(
url="https://example.com",
product_name="์ธ๊ธฐ ์ƒํ’ˆ",
interval=60 # 60์ดˆ๋งˆ๋‹ค ํ™•์ธ
)

Headless ๋ชจ๋“œโ€‹

๋ธŒ๋ผ์šฐ์ € ์ฐฝ์„ ๋„์šฐ์ง€ ์•Š๊ณ  ๋ฐฑ๊ทธ๋ผ์šด๋“œ์—์„œ ์‹คํ–‰ํ•ฉ๋‹ˆ๋‹ค.

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

# Chrome Headless ๋ชจ๋“œ
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument('--no-sandbox')
options.add_argument('--window-size=1920,1080')

driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=options
)

# ์‚ฌ์šฉ
driver.get("https://www.example.com")
print(driver.title)
driver.quit()

User Agent ๋ณ€๊ฒฝโ€‹

๋ด‡ ํƒ์ง€๋ฅผ ํ”ผํ•˜๊ธฐ ์œ„ํ•ด User Agent๋ฅผ ๋ณ€๊ฒฝํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.

options = webdriver.ChromeOptions()

# User Agent ์„ค์ •
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
options.add_argument(f'user-agent={user_agent}')

# ์ž๋™ํ™” ํƒ์ง€ ๋ฐฉ์ง€
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)

driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=options
)

# WebDriver ์†์„ฑ ์ˆจ๊ธฐ๊ธฐ
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")

์ž์ฃผ ๋ฌป๋Š” ์งˆ๋ฌธโ€‹

Q1. Selenium์ด ๋А๋ฆฐ๋ฐ ๋” ๋น ๋ฅด๊ฒŒ ํ•  ์ˆ˜ ์žˆ๋‚˜์š”?โ€‹

A: Headless ๋ชจ๋“œ๋ฅผ ์‚ฌ์šฉํ•˜๊ณ , ์ด๋ฏธ์ง€ ๋กœ๋”ฉ์„ ๋น„ํ™œ์„ฑํ™”ํ•˜์„ธ์š”.

options = webdriver.ChromeOptions()
options.add_argument('--headless')

# ์ด๋ฏธ์ง€ ๋กœ๋”ฉ ๋น„ํ™œ์„ฑํ™”
prefs = {'profile.default_content_setting_values': {'images': 2}}
options.add_experimental_option('prefs', prefs)

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

Q2. ์š”์†Œ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†๋‹ค๋Š” ์˜ค๋ฅ˜๊ฐ€ ์ž์ฃผ ๋‚˜์š”.โ€‹

A: ๋ช…์‹œ์  ๋Œ€๊ธฐ๋ฅผ ์‚ฌ์šฉํ•˜๊ณ , ์˜ฌ๋ฐ”๋ฅธ ์„ ํƒ์ž๋ฅผ ์‚ฌ์šฉํ•˜์„ธ์š”.

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# ๋Œ€๊ธฐ ์‹œ๊ฐ„ ์ฆ๊ฐ€
wait = WebDriverWait(driver, 20)

# ์š”์†Œ๊ฐ€ ํด๋ฆญ ๊ฐ€๋Šฅํ•  ๋•Œ๊นŒ์ง€ ๋Œ€๊ธฐ
element = wait.until(EC.element_to_be_clickable((By.ID, "button")))
element.click()

Q3. Selenium vs BeautifulSoup ์–ด๋–ค ๊ฒƒ์„ ์‚ฌ์šฉํ•ด์•ผ ํ•˜๋‚˜์š”?โ€‹

A: ์ •์  ํŽ˜์ด์ง€๋Š” BeautifulSoup, ๋™์  ํŽ˜์ด์ง€๋Š” Selenium์„ ์‚ฌ์šฉํ•˜์„ธ์š”.

# ์ •์  ํŽ˜์ด์ง€ (๋น ๋ฆ„)
import requests
from bs4 import BeautifulSoup

response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

# ๋™์  ํŽ˜์ด์ง€ (JavaScript ์‹คํ–‰ ํ•„์š”)
from selenium import webdriver
driver = webdriver.Chrome()
driver.get(url)

Q4. ์›น์‚ฌ์ดํŠธ๊ฐ€ ๋ด‡์„ ์ฐจ๋‹จํ•˜๋Š” ๊ฒƒ ๊ฐ™์•„์š”.โ€‹

A: ๋‹ค์Œ ๋ฐฉ๋ฒ•๋“ค์„ ์‹œ๋„ํ•ด๋ณด์„ธ์š”:

options = webdriver.ChromeOptions()

# User Agent ๋ณ€๊ฒฝ
options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')

# ์ž๋™ํ™” ํ”Œ๋ž˜๊ทธ ์ œ๊ฑฐ
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)

driver = webdriver.Chrome(options=options)

# WebDriver ์†์„ฑ ์ˆจ๊ธฐ๊ธฐ
driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
'source': '''
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
'''
})

# ์š”์ฒญ ์‚ฌ์ด์— ๋žœ๋ค ๋Œ€๊ธฐ
import random
time.sleep(random.uniform(1, 3))

Q5. ์—ฌ๋Ÿฌ ๋ธŒ๋ผ์šฐ์ €๋ฅผ ๋™์‹œ์— ์‹คํ–‰ํ•  ์ˆ˜ ์žˆ๋‚˜์š”?โ€‹

A: ๋„ค, ๋ฉ€ํ‹ฐ์Šค๋ ˆ๋”ฉ์ด๋‚˜ ๋ฉ€ํ‹ฐํ”„๋กœ์„ธ์‹ฑ์„ ์‚ฌ์šฉํ•˜์„ธ์š”.

from concurrent.futures import ThreadPoolExecutor
from selenium import webdriver

def scrape_page(url):
driver = webdriver.Chrome()
driver.get(url)
# ์ž‘์—… ์ˆ˜ํ–‰
result = driver.title
driver.quit()
return result

# ๋™์‹œ์— 5๊ฐœ์˜ ๋ธŒ๋ผ์šฐ์ € ์‹คํ–‰
urls = ['url1', 'url2', 'url3', 'url4', 'url5']

with ThreadPoolExecutor(max_workers=5) as executor:
results = executor.map(scrape_page, urls)

for result in results:
print(result)

๋‹ค์Œ ๋‹จ๊ณ„โ€‹

์›น ์ž๋™ํ™”๋ฅผ ๋ฐฐ์› ๋‹ค๋ฉด, ๋‹ค์Œ ์ฃผ์ œ๋กœ ๋„˜์–ด๊ฐ€์„ธ์š”:

  • ์ž‘์—… ์Šค์ผ€์ค„๋ง: ์ •๊ธฐ์ ์ธ ์›น ํฌ๋กค๋ง ์ž๋™ํ™”ํ•˜๊ธฐ
  • API ํ™œ์šฉ: requests๋กœ ํšจ์œจ์ ์ธ ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘ํ•˜๊ธฐ
  • ๋ฐ์ดํ„ฐ ์ €์žฅ: ์ˆ˜์ง‘ํ•œ ๋ฐ์ดํ„ฐ๋ฅผ ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค์— ์ €์žฅํ•˜๊ธฐ

์›น ์ž๋™ํ™”๋Š” ๋ฐ์ดํ„ฐ ์ˆ˜์ง‘, ํ…Œ์ŠคํŠธ, ๋ฐ˜๋ณต ์ž‘์—… ์ž๋™ํ™”์— ๋งค์šฐ ์œ ์šฉํ•œ ๊ธฐ์ˆ ์ž…๋‹ˆ๋‹ค. ์‹ค์ œ ํ”„๋กœ์ ํŠธ์— ์ ์šฉํ•ด๋ณด์„ธ์š”!