Saltar al contenido principal

์ •๊ทœํ‘œํ˜„์‹

En Python, ์ •๊ทœํ‘œํ˜„์‹(Regular Expression)์„ ์‚ฌ์šฉํ•˜๋Š” ๋ฐฉ๋ฒ•, aprendamos. ์ •๊ทœํ‘œํ˜„์‹์€ ๋ฌธ์ž์—ด ํŒจํ„ด์„ ์ฐพ๊ณ  ์กฐ์ž‘ํ•˜๋Š” ๊ฐ•๋ ฅํ•œ ๋„๊ตฌ์ž…๋‹ˆ๋‹ค.

์ •๊ทœํ‘œํ˜„์‹์ด๋ž€? ๐Ÿ”โ€‹

์ •๊ทœํ‘œํ˜„์‹(regex)์€ ํŠน์ • ํŒจํ„ด์˜ ๋ฌธ์ž์—ด์„ ๊ฒ€์ƒ‰ํ•˜๊ณ  ์กฐ์ž‘ํ•˜๊ธฐ ์œ„ํ•œ ํ˜•์‹ ์–ธ์–ด์ž…๋‹ˆ๋‹ค.

import re

# ๊ฐ„๋‹จํ•œ ์˜ˆ์ œ
text = '๋‚ด ์ „ํ™”๋ฒˆํ˜ธ๋Š” 010-1234-5678์ž…๋‹ˆ๋‹ค.'
pattern = r'\d{3}-\d{4}-\d{4}'

match = re.search(pattern, text)
if match:
print(match.group()) # 010-1234-5678

re ๋ชจ๋“ˆ ๊ธฐ๋ณธ ํ•จ์ˆ˜โ€‹

match(): ๋ฌธ์ž์—ด ์‹œ์ž‘ ๋ถ€๋ถ„ ๋งค์นญโ€‹

import re

text = 'Python is great'

# ์‹œ์ž‘ ๋ถ€๋ถ„์ด ๋งค์นญ๋˜๋ฉด Match ๊ฐ์ฒด ๋ฐ˜ํ™˜
match = re.match(r'Python', text)
if match:
print('๋งค์นญ๋จ:', match.group()) # Python

# ์‹œ์ž‘ ๋ถ€๋ถ„์ด ๋งค์นญ๋˜์ง€ ์•Š์œผ๋ฉด None
match = re.match(r'Java', text)
if match:
print('๋งค์นญ๋จ')
else:
print('๋งค์นญ ์•ˆ๋จ')

search(): ๋ฌธ์ž์—ด ์ „์ฒด์—์„œ ์ฒซ ๋ฒˆ์งธ ๋งค์นญโ€‹

import re

text = 'I love Python programming'

# ๋ฌธ์ž์—ด ์–ด๋””๋“  ๋งค์นญ๋˜๋ฉด Match ๊ฐ์ฒด ๋ฐ˜ํ™˜
match = re.search(r'Python', text)
if match:
print('์ฐพ์Œ:', match.group()) # Python
print('์œ„์น˜:', match.start(), '-', match.end()) # 7 - 13

findall(): ๋ชจ๋“  ๋งค์นญ ์ฐพ๊ธฐโ€‹

import re

text = '์ „ํ™”๋ฒˆํ˜ธ: 010-1234-5678, 010-9876-5432'
pattern = r'\d{3}-\d{4}-\d{4}'

# ๋ชจ๋“  ๋งค์นญ์„ ๋ฆฌ์ŠคํŠธ๋กœ ๋ฐ˜ํ™˜
matches = re.findall(pattern, text)
print(matches) # ['010-1234-5678', '010-9876-5432']

finditer(): ๋ชจ๋“  ๋งค์นญ์„ iterator๋กœโ€‹

import re

text = '๊ฐ€๊ฒฉ: 1000์›, 2000์›, 3000์›'
pattern = r'\d+'

# Match ๊ฐ์ฒด๋ฅผ iterator๋กœ ๋ฐ˜ํ™˜
for match in re.finditer(pattern, text):
print(f'{match.group()} (์œ„์น˜: {match.start()}-{match.end()})')
# 1000 (์œ„์น˜: 4-8)
# 2000 (์œ„์น˜: 11-15)
# 3000 (์œ„์น˜: 17-21)

sub(): ํŒจํ„ด ์น˜ํ™˜โ€‹

import re

text = '๋‚ด ์ „ํ™”๋ฒˆํ˜ธ๋Š” 010-1234-5678์ž…๋‹ˆ๋‹ค.'
pattern = r'\d{3}-\d{4}-\d{4}'

# ํŒจํ„ด์„ ๋‹ค๋ฅธ ๋ฌธ์ž์—ด๋กœ ์น˜ํ™˜
result = re.sub(pattern, '***-****-****', text)
print(result) # ๋‚ด ์ „ํ™”๋ฒˆํ˜ธ๋Š” ***-****-****์ž…๋‹ˆ๋‹ค.

# ํ•จ์ˆ˜๋กœ ์น˜ํ™˜
def mask_phone(match):
phone = match.group()
return phone[:3] + '-****-' + phone[-4:]

result = re.sub(pattern, mask_phone, text)
print(result) # ๋‚ด ์ „ํ™”๋ฒˆํ˜ธ๋Š” 010-****-5678์ž…๋‹ˆ๋‹ค.

split(): ํŒจํ„ด์œผ๋กœ ๋ถ„๋ฆฌโ€‹

import re

text = 'apple,banana;orange:grape'

# ์—ฌ๋Ÿฌ ๊ตฌ๋ถ„์ž๋กœ ๋ถ„๋ฆฌ
parts = re.split(r'[,;:]', text)
print(parts) # ['apple', 'banana', 'orange', 'grape']

# ๊ณต๋ฐฑ์œผ๋กœ ๋ถ„๋ฆฌ (์—ฌ๋Ÿฌ ๊ฐœ์˜ ๊ณต๋ฐฑ ํ—ˆ์šฉ)
text2 = 'hello world python'
words = re.split(r'\s+', text2)
print(words) # ['hello', 'world', 'python']

๊ธฐ๋ณธ ํŒจํ„ด ๐Ÿ“โ€‹

๋ฆฌํ„ฐ๋Ÿด ๋ฌธ์žโ€‹

import re

# ์ •ํ™•ํ•œ ๋ฌธ์ž์—ด ๋งค์นญ
print(re.search(r'hello', 'hello world')) # ๋งค์นญ
print(re.search(r'hello', 'Hello world')) # None (๋Œ€์†Œ๋ฌธ์ž ๊ตฌ๋ถ„)

# ๋Œ€์†Œ๋ฌธ์ž ๋ฌด์‹œ
print(re.search(r'hello', 'Hello world', re.IGNORECASE)) # ๋งค์นญ

๋ฉ”ํƒ€ ๋ฌธ์žโ€‹

๋ฌธ์ž์˜๋ฏธ์˜ˆ์ œ
.์ž„์˜์˜ ๋ฌธ์ž 1๊ฐœa.c โ†’ abc, a1c, a c
^๋ฌธ์ž์—ด ์‹œ์ž‘^hello โ†’ hello๋กœ ์‹œ์ž‘
$๋ฌธ์ž์—ด ๋world$ โ†’ world๋กœ ๋๋‚จ
*0๋ฒˆ ์ด์ƒ ๋ฐ˜๋ณตab*c โ†’ ac, abc, abbc
+1๋ฒˆ ์ด์ƒ ๋ฐ˜๋ณตab+c โ†’ abc, abbc
?0๋ฒˆ ๋˜๋Š” 1๋ฒˆab?c โ†’ ac, abc
|ORcat|dog โ†’ cat ๋˜๋Š” dog
()๊ทธ๋ฃน(ab)+ โ†’ ab, abab
[]๋ฌธ์ž ํด๋ž˜์Šค[abc] โ†’ a, b, c ์ค‘ ํ•˜๋‚˜
{}๋ฐ˜๋ณต ํšŸ์ˆ˜a{3} โ†’ aaa
import re

# ์ (.)์€ ์ค„๋ฐ”๊ฟˆ ์ œ์™ธํ•œ ๋ชจ๋“  ๋ฌธ์ž
print(re.search(r'a.c', 'abc').group()) # abc
print(re.search(r'a.c', 'a1c').group()) # a1c

# ๋ณ„ํ‘œ(*): 0๋ฒˆ ์ด์ƒ
print(re.search(r'ab*c', 'ac').group()) # ac
print(re.search(r'ab*c', 'abc').group()) # abc
print(re.search(r'ab*c', 'abbc').group()) # abbc

# ํ”Œ๋Ÿฌ์Šค(+): 1๋ฒˆ ์ด์ƒ
print(re.search(r'ab+c', 'abc').group()) # abc
print(re.search(r'ab+c', 'ac')) # None

# ๋ฌผ์Œํ‘œ(?): 0๋ฒˆ ๋˜๋Š” 1๋ฒˆ
print(re.search(r'colou?r', 'color').group()) # color
print(re.search(r'colou?r', 'colour').group()) # colour

๋ฌธ์ž ํด๋ž˜์Šคโ€‹

import re

# ๋ฌธ์ž ์ง‘ํ•ฉ
print(re.findall(r'[aeiou]', 'hello')) # ['e', 'o']

# ๋ฒ”์œ„
print(re.findall(r'[a-z]', 'Hello123')) # ['e', 'l', 'l', 'o']
print(re.findall(r'[A-Z]', 'Hello123')) # ['H']
print(re.findall(r'[0-9]', 'Hello123')) # ['1', '2', '3']

# ๋ถ€์ • (^)
print(re.findall(r'[^0-9]', 'Hello123')) # ['H', 'e', 'l', 'l', 'o']

# ํŠน์ˆ˜ ๋ฌธ์ž ํด๋ž˜์Šค
print(re.findall(r'\d', 'abc123')) # ['1', '2', '3'] (์ˆซ์ž)
print(re.findall(r'\D', 'abc123')) # ['a', 'b', 'c'] (๋น„์ˆซ์ž)
print(re.findall(r'\w', 'a_1 !')) # ['a', '_', '1'] (๋‹จ์–ด ๋ฌธ์ž)
print(re.findall(r'\W', 'a_1 !')) # [' ', '!'] (๋น„๋‹จ์–ด ๋ฌธ์ž)
print(re.findall(r'\s', 'a b\tc')) # [' ', '\t'] (๊ณต๋ฐฑ)
print(re.findall(r'\S', 'a b')) # ['a', 'b'] (๋น„๊ณต๋ฐฑ)

๋ฐ˜๋ณต ์ง€์ •โ€‹

import re

# {n}: ์ •ํ™•ํžˆ n๋ฒˆ
print(re.search(r'a{3}', 'aaa').group()) # aaa
print(re.search(r'a{3}', 'aa')) # None

# {n,}: n๋ฒˆ ์ด์ƒ
print(re.search(r'a{2,}', 'aaa').group()) # aaa

# {n,m}: n๋ฒˆ ์ด์ƒ m๋ฒˆ ์ดํ•˜
print(re.search(r'a{2,4}', 'aaaaa').group()) # aaaa

# ์ „ํ™”๋ฒˆํ˜ธ ํŒจํ„ด
phone = re.search(r'\d{3}-\d{4}-\d{4}', '010-1234-5678')
print(phone.group()) # 010-1234-5678

๊ทธ๋ฃน๊ณผ ์บก์ฒ˜ ๐ŸŽฏโ€‹

๊ธฐ๋ณธ ๊ทธ๋ฃนโ€‹

import re

text = '์ด๋ฆ„: ํ™๊ธธ๋™, ๋‚˜์ด: 30์„ธ'

# ๊ทธ๋ฃน์œผ๋กœ ์บก์ฒ˜
pattern = r'์ด๋ฆ„: (\w+), ๋‚˜์ด: (\d+)์„ธ'
match = re.search(pattern, text)

if match:
print('์ „์ฒด:', match.group(0)) # ์ด๋ฆ„: ํ™๊ธธ๋™, ๋‚˜์ด: 30์„ธ
print('์ด๋ฆ„:', match.group(1)) # ํ™๊ธธ๋™
print('๋‚˜์ด:', match.group(2)) # 30
print('๋ชจ๋“  ๊ทธ๋ฃน:', match.groups()) # ('ํ™๊ธธ๋™', '30')

์ด๋ฆ„ ์žˆ๋Š” ๊ทธ๋ฃนโ€‹

import re

text = '2025-11-27'
pattern = r'(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})'

match = re.search(pattern, text)
if match:
print('์—ฐ๋„:', match.group('year')) # 2025
print('์›”:', match.group('month')) # 11
print('์ผ:', match.group('day')) # 27
print('๋”•์…”๋„ˆ๋ฆฌ:', match.groupdict()) # {'year': '2025', 'month': '11', 'day': '27'}

๋น„์บก์ฒ˜ ๊ทธ๋ฃนโ€‹

import re

# (?:...)๋Š” ๊ทธ๋ฃน์ด์ง€๋งŒ ์บก์ฒ˜ํ•˜์ง€ ์•Š์Œ
text = 'http://www.example.com'
pattern = r'(?:http|https)://(\w+\.\w+\.\w+)'

match = re.search(pattern, text)
if match:
print('์ „์ฒด:', match.group(0)) # http://www.example.com
print('๋„๋ฉ”์ธ:', match.group(1)) # www.example.com
# group(2)๋Š” ์—†์Œ (๋น„์บก์ฒ˜ ๊ทธ๋ฃน)

์ปดํŒŒ์ผ๊ณผ ํ”Œ๋ž˜๊ทธโ€‹

ํŒจํ„ด ์ปดํŒŒ์ผโ€‹

import re

# ๊ฐ™์€ ํŒจํ„ด์„ ์—ฌ๋Ÿฌ ๋ฒˆ ์‚ฌ์šฉํ•  ๋•Œ ์ปดํŒŒ์ผํ•˜๋ฉด ํšจ์œจ์ 
pattern = re.compile(r'\d{3}-\d{4}-\d{4}')

text1 = '์ „ํ™”: 010-1234-5678'
text2 = '์—ฐ๋ฝ์ฒ˜: 010-9876-5432'

print(pattern.search(text1).group()) # 010-1234-5678
print(pattern.search(text2).group()) # 010-9876-5432

ํ”Œ๋ž˜๊ทธโ€‹

import re

text = 'Hello\nWorld'

# IGNORECASE: ๋Œ€์†Œ๋ฌธ์ž ๋ฌด์‹œ
print(re.findall(r'hello', text, re.IGNORECASE)) # ['Hello']

# MULTILINE: ^์™€ $๊ฐ€ ๊ฐ ์ค„์— ์ ์šฉ
print(re.findall(r'^World', text, re.MULTILINE)) # ['World']

# DOTALL: .์ด ์ค„๋ฐ”๊ฟˆ๋„ ๋งค์นญ
print(re.search(r'Hello.World', text, re.DOTALL)) # ๋งค์นญ๋จ

# VERBOSE: ์ฝ๊ธฐ ์‰ฝ๊ฒŒ ์ž‘์„ฑ
pattern = re.compile(r'''
\d{3} # ์ง€์—ญ๋ฒˆํ˜ธ
- # ํ•˜์ดํ”ˆ
\d{4} # ๊ตญ๋ฒˆ
- # ํ•˜์ดํ”ˆ
\d{4} # ๋ฒˆํ˜ธ
''', re.VERBOSE)

print(pattern.search('010-1234-5678').group()) # 010-1234-5678

Ejemplos Prรกcticos ๐Ÿ’กโ€‹

์˜ˆ์ œ 1: ์ด๋ฉ”์ผ ๊ฒ€์ฆโ€‹

import re

def validate_email(email):
"""์ด๋ฉ”์ผ ์ฃผ์†Œ ๊ฒ€์ฆ"""
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
return re.match(pattern, email) is not None

# ํ…Œ์ŠคํŠธ
emails = [
'user@example.com', # ์œ ํšจ
'user.name@example.co.kr', # ์œ ํšจ
'user+tag@example.com', # ์œ ํšจ
'invalid@', # ๋ฌดํšจ
'@example.com', # ๋ฌดํšจ
'user@example', # ๋ฌดํšจ
]

for email in emails:
result = 'โœ“' if validate_email(email) else 'โœ—'
print(f'{result} {email}')

์˜ˆ์ œ 2: ์ „ํ™”๋ฒˆํ˜ธ ๊ฒ€์ฆ ๋ฐ ํฌ๋งทํŒ…โ€‹

import re

class PhoneValidator:
"""์ „ํ™”๋ฒˆํ˜ธ ๊ฒ€์ฆ ๋ฐ ํฌ๋งทํŒ…"""

# ๋‹ค์–‘ํ•œ ํ˜•์‹์˜ ์ „ํ™”๋ฒˆํ˜ธ ํŒจํ„ด
PATTERNS = [
r'010-\d{4}-\d{4}', # 010-1234-5678
r'010\d{8}', # 01012345678
r'\d{2,3}-\d{3,4}-\d{4}', # 02-1234-5678, 031-123-4567
]

@classmethod
def extract_numbers(cls, phone):
"""์ „ํ™”๋ฒˆํ˜ธ์—์„œ ์ˆซ์ž๋งŒ ์ถ”์ถœ"""
return re.sub(r'\D', '', phone)

@classmethod
def format_phone(cls, phone):
"""์ „ํ™”๋ฒˆํ˜ธ ํฌ๋งทํŒ… (010-1234-5678 ํ˜•์‹)"""
numbers = cls.extract_numbers(phone)

if len(numbers) == 11 and numbers.startswith('010'):
return f'{numbers[:3]}-{numbers[3:7]}-{numbers[7:]}'
elif len(numbers) == 10:
if numbers.startswith('02'):
return f'{numbers[:2]}-{numbers[2:6]}-{numbers[6:]}'
else:
return f'{numbers[:3]}-{numbers[3:6]}-{numbers[6:]}'

return phone # ํฌ๋งทํŒ… ๋ถˆ๊ฐ€

@classmethod
def validate(cls, phone):
"""์ „ํ™”๋ฒˆํ˜ธ ์œ ํšจ์„ฑ ๊ฒ€์ฆ"""
for pattern in cls.PATTERNS:
if re.match(pattern, phone):
return True
return False

# ์‚ฌ์šฉ ์˜ˆ์ œ
phones = [
'010-1234-5678',
'01012345678',
'02-1234-5678',
'031-123-4567',
]

for phone in phones:
formatted = PhoneValidator.format_phone(phone)
is_valid = PhoneValidator.validate(phone)
print(f'{phone} โ†’ {formatted} (์œ ํšจ: {is_valid})')

์˜ˆ์ œ 3: URL ํŒŒ์‹ฑโ€‹

import re

def parse_url(url):
"""URL์„ ๊ตฌ์„ฑ์š”์†Œ๋กœ ๋ถ„ํ•ด"""
pattern = r'''
(?P<protocol>https?://)? # ํ”„๋กœํ† ์ฝœ (์„ ํƒ)
(?P<subdomain>[\w-]+\.)* # ์„œ๋ธŒ๋„๋ฉ”์ธ (์„ ํƒ, ๋ฐ˜๋ณต)
(?P<domain>[\w-]+) # ๋„๋ฉ”์ธ
\.(?P<tld>[a-z]{2,}) # ์ตœ์ƒ์œ„ ๋„๋ฉ”์ธ
(?::(?P<port>\d+))? # ํฌํŠธ (์„ ํƒ)
(?P<path>/[^\s?]*)? # ๊ฒฝ๋กœ (์„ ํƒ)
(?:\?(?P<query>[^\s#]*))? # ์ฟผ๋ฆฌ (์„ ํƒ)
(?:\#(?P<fragment>[^\s]*))? # ํ”„๋ž˜๊ทธ๋จผํŠธ (์„ ํƒ)
'''

match = re.match(pattern, url, re.VERBOSE)
if match:
return match.groupdict()
return None

# ํ…Œ์ŠคํŠธ
urls = [
'https://www.example.com:8080/path/to/page?key=value#section',
'http://blog.example.co.kr/posts',
'example.com/page',
]

for url in urls:
print(f'\nURL: {url}')
parts = parse_url(url)
if parts:
for key, value in parts.items():
if value:
print(f' {key}: {value}')

์˜ˆ์ œ 4: ๋กœ๊ทธ ํŒŒ์ผ ํŒŒ์‹ฑโ€‹

import re
from datetime import datetime

class LogParser:
"""๋กœ๊ทธ ํŒŒ์ผ ํŒŒ์„œ"""

# ๋กœ๊ทธ ํŒจํ„ด: [2025-11-27 14:30:45] ERROR: Something went wrong
LOG_PATTERN = r'\[(?P<timestamp>[\d-]+ [\d:]+)\] (?P<level>\w+): (?P<message>.*)'

def __init__(self, log_file):
self.log_file = log_file
self.pattern = re.compile(self.LOG_PATTERN)

def parse_line(self, line):
"""ํ•œ ์ค„ ํŒŒ์‹ฑ"""
match = self.pattern.match(line)
if match:
data = match.groupdict()
data['timestamp'] = datetime.strptime(
data['timestamp'],
'%Y-%m-%d %H:%M:%S'
)
return data
return None

def parse_file(self):
"""์ „์ฒด ํŒŒ์ผ ํŒŒ์‹ฑ"""
logs = []
with open(self.log_file, 'r', encoding='utf-8') as f:
for line in f:
parsed = self.parse_line(line.strip())
if parsed:
logs.append(parsed)
return logs

def filter_by_level(self, level):
"""ํŠน์ • ๋ ˆ๋ฒจ์˜ ๋กœ๊ทธ๋งŒ ํ•„ํ„ฐ๋ง"""
logs = self.parse_file()
return [log for log in logs if log['level'] == level]

# ์ƒ˜ํ”Œ ๋กœ๊ทธ ์ƒ์„ฑ
sample_logs = [
'[2025-11-27 14:30:45] INFO: Server started',
'[2025-11-27 14:30:46] ERROR: Connection failed',
'[2025-11-27 14:30:47] WARNING: Memory usage high',
'[2025-11-27 14:30:48] ERROR: Database timeout',
]

with open('app.log', 'w', encoding='utf-8') as f:
f.write('\n'.join(sample_logs))

# ํŒŒ์‹ฑ
parser = LogParser('app.log')
errors = parser.filter_by_level('ERROR')

print(f'์ด {len(errors)}๊ฐœ์˜ ์—๋Ÿฌ ๋ฐœ๊ฒฌ:')
for error in errors:
print(f" {error['timestamp']}: {error['message']}")

์˜ˆ์ œ 5: ๋น„๋ฐ€๋ฒˆํ˜ธ ๊ฐ•๋„ ๊ฒ€์ฆโ€‹

import re

class PasswordValidator:
"""๋น„๋ฐ€๋ฒˆํ˜ธ ๊ฐ•๋„ ๊ฒ€์ฆ๊ธฐ"""

def __init__(self, min_length=8):
self.min_length = min_length

def validate(self, password):
"""๋น„๋ฐ€๋ฒˆํ˜ธ ๊ฒ€์ฆ"""
checks = {
'length': len(password) >= self.min_length,
'uppercase': bool(re.search(r'[A-Z]', password)),
'lowercase': bool(re.search(r'[a-z]', password)),
'digit': bool(re.search(r'\d', password)),
'special': bool(re.search(r'[!@#$%^&*(),.?":{}|<>]', password)),
}

return checks

def get_strength(self, password):
"""๋น„๋ฐ€๋ฒˆํ˜ธ ๊ฐ•๋„ ๊ณ„์‚ฐ"""
checks = self.validate(password)
score = sum(checks.values())

if score == 5:
return '๋งค์šฐ ๊ฐ•ํ•จ'
elif score >= 4:
return '๊ฐ•ํ•จ'
elif score >= 3:
return '๋ณดํ†ต'
else:
return '์•ฝํ•จ'

def get_feedback(self, password):
"""๋น„๋ฐ€๋ฒˆํ˜ธ ๊ฐœ์„  ์ œ์•ˆ"""
checks = self.validate(password)
feedback = []

if not checks['length']:
feedback.append(f'์ตœ์†Œ {self.min_length}์ž ์ด์ƒ์ด์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค')
if not checks['uppercase']:
feedback.append('๋Œ€๋ฌธ์ž๋ฅผ ํฌํ•จํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค')
if not checks['lowercase']:
feedback.append('์†Œ๋ฌธ์ž๋ฅผ ํฌํ•จํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค')
if not checks['digit']:
feedback.append('์ˆซ์ž๋ฅผ ํฌํ•จํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค')
if not checks['special']:
feedback.append('ํŠน์ˆ˜๋ฌธ์ž๋ฅผ ํฌํ•จํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค')

return feedback

# ์‚ฌ์šฉ ์˜ˆ์ œ
validator = PasswordValidator()

passwords = [
'password',
'Password1',
'Password1!',
'P@ssw0rd',
]

for pwd in passwords:
strength = validator.get_strength(pwd)
feedback = validator.get_feedback(pwd)

print(f'\n๋น„๋ฐ€๋ฒˆํ˜ธ: {pwd}')
print(f'๊ฐ•๋„: {strength}')
if feedback:
print('๊ฐœ์„ ์‚ฌํ•ญ:')
for fb in feedback:
print(f' - {fb}')

์˜ˆ์ œ 6: HTML ํƒœ๊ทธ ์ œ๊ฑฐโ€‹

import re

def remove_html_tags(text):
"""HTML ํƒœ๊ทธ ์ œ๊ฑฐ"""
# <...> ํ˜•์‹์˜ ํƒœ๊ทธ ์ œ๊ฑฐ
clean = re.sub(r'<[^>]+>', '', text)
# HTML ์—”ํ‹ฐํ‹ฐ ๋ณ€ํ™˜
clean = re.sub(r'&nbsp;', ' ', clean)
clean = re.sub(r'&lt;', '<', clean)
clean = re.sub(r'&gt;', '>', clean)
clean = re.sub(r'&amp;', '&', clean)
return clean.strip()

# ํ…Œ์ŠคํŠธ
html = '''
<div class="content">
<h1>์ œ๋ชฉ</h1>
<p>์ด๊ฒƒ์€ <strong>์ค‘์š”ํ•œ</strong> ๋‚ด์šฉ์ž…๋‹ˆ๋‹ค.</p>
<a href="http://example.com">๋งํฌ</a>
</div>
'''

clean_text = remove_html_tags(html)
print(clean_text)

์˜ˆ์ œ 7: ํ…์ŠคํŠธ์—์„œ ์ •๋ณด ์ถ”์ถœโ€‹

import re

class TextExtractor:
"""ํ…์ŠคํŠธ์—์„œ ๋‹ค์–‘ํ•œ ์ •๋ณด ์ถ”์ถœ"""

@staticmethod
def extract_emails(text):
"""์ด๋ฉ”์ผ ์ฃผ์†Œ ์ถ”์ถœ"""
pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
return re.findall(pattern, text)

@staticmethod
def extract_urls(text):
"""URL ์ถ”์ถœ"""
pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
return re.findall(pattern, text)

@staticmethod
def extract_phone_numbers(text):
"""์ „ํ™”๋ฒˆํ˜ธ ์ถ”์ถœ"""
pattern = r'(\d{2,3}[-.]?\d{3,4}[-.]?\d{4})'
return re.findall(pattern, text)

@staticmethod
def extract_dates(text):
"""๋‚ ์งœ ์ถ”์ถœ (YYYY-MM-DD, YYYY/MM/DD)"""
pattern = r'\d{4}[-/]\d{2}[-/]\d{2}'
return re.findall(pattern, text)

@staticmethod
def extract_hashtags(text):
"""ํ•ด์‹œํƒœ๊ทธ ์ถ”์ถœ"""
pattern = r'#\w+'
return re.findall(pattern, text)

# ์‚ฌ์šฉ ์˜ˆ์ œ
text = '''
์—ฐ๋ฝ์ฒ˜: user@example.com, 010-1234-5678
์›น์‚ฌ์ดํŠธ: https://www.example.com
๋‚ ์งœ: 2025-11-27
์†Œ์…œ: #python #regex #coding
'''

extractor = TextExtractor()

print('์ด๋ฉ”์ผ:', extractor.extract_emails(text))
print('URL:', extractor.extract_urls(text))
print('์ „ํ™”๋ฒˆํ˜ธ:', extractor.extract_phone_numbers(text))
print('๋‚ ์งœ:', extractor.extract_dates(text))
print('ํ•ด์‹œํƒœ๊ทธ:', extractor.extract_hashtags(text))

์˜ˆ์ œ 8: ๋ฌธ์ž์—ด ์ •๊ทœํ™”โ€‹

import re

class TextNormalizer:
"""ํ…์ŠคํŠธ ์ •๊ทœํ™”"""

@staticmethod
def normalize_whitespace(text):
"""๊ณต๋ฐฑ ์ •๊ทœํ™” (์—ฌ๋Ÿฌ ๊ณต๋ฐฑ์„ ํ•˜๋‚˜๋กœ)"""
return re.sub(r'\s+', ' ', text).strip()

@staticmethod
def remove_special_chars(text):
"""ํŠน์ˆ˜๋ฌธ์ž ์ œ๊ฑฐ (์•ŒํŒŒ๋ฒณ, ์ˆซ์ž, ๊ณต๋ฐฑ๋งŒ ์œ ์ง€)"""
return re.sub(r'[^a-zA-Z0-9๊ฐ€-ํžฃ\s]', '', text)

@staticmethod
def normalize_phone(phone):
"""์ „ํ™”๋ฒˆํ˜ธ ์ •๊ทœํ™”"""
# ์ˆซ์ž๋งŒ ์ถ”์ถœ
numbers = re.sub(r'\D', '', phone)

# 11์ž๋ฆฌ์ธ ๊ฒฝ์šฐ (010-xxxx-xxxx)
if len(numbers) == 11:
return f'{numbers[:3]}-{numbers[3:7]}-{numbers[7:]}'

return phone

@staticmethod
def normalize_email(email):
"""์ด๋ฉ”์ผ ์ •๊ทœํ™” (์†Œ๋ฌธ์ž ๋ณ€ํ™˜)"""
email = email.lower().strip()
# ๊ณต๋ฐฑ ์ œ๊ฑฐ
email = re.sub(r'\s', '', email)
return email

# ์‚ฌ์šฉ ์˜ˆ์ œ
normalizer = TextNormalizer()

print(normalizer.normalize_whitespace('hello world !'))
# 'hello world !'

print(normalizer.remove_special_chars('Hello, World! 123'))
# 'Hello World 123'

print(normalizer.normalize_phone('010 1234 5678'))
# '010-1234-5678'

print(normalizer.normalize_email(' User@Example.COM '))
# 'user@example.com'

์˜ˆ์ œ 9: ๋ฏผ๊ฐํ•œ ์ •๋ณด ๋งˆ์Šคํ‚นโ€‹

import re

class DataMasker:
"""๋ฏผ๊ฐํ•œ ์ •๋ณด ๋งˆ์Šคํ‚น"""

@staticmethod
def mask_email(email):
"""์ด๋ฉ”์ผ ๋งˆ์Šคํ‚น: u***@example.com"""
pattern = r'([a-zA-Z])[a-zA-Z0-9._%+-]*(@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})'
return re.sub(pattern, r'\1***\2', email)

@staticmethod
def mask_phone(phone):
"""์ „ํ™”๋ฒˆํ˜ธ ๋งˆ์Šคํ‚น: 010-****-5678"""
pattern = r'(\d{2,3})[-.]?(\d{3,4})[-.]?(\d{4})'
return re.sub(pattern, r'\1-****-\3', phone)

@staticmethod
def mask_card(card_number):
"""์นด๋“œ๋ฒˆํ˜ธ ๋งˆ์Šคํ‚น: ****-****-****-1234"""
pattern = r'(\d{4})[-\s]?(\d{4})[-\s]?(\d{4})[-\s]?(\d{4})'
return re.sub(pattern, r'****-****-****-\4', card_number)

@staticmethod
def mask_ssn(ssn):
"""์ฃผ๋ฏผ๋ฒˆํ˜ธ ๋งˆ์Šคํ‚น: 123456-*******"""
pattern = r'(\d{6})[-]?(\d{7})'
return re.sub(pattern, r'\1-*******', ssn)

# ์‚ฌ์šฉ ์˜ˆ์ œ
masker = DataMasker()

print(masker.mask_email('user123@example.com'))
# u***@example.com

print(masker.mask_phone('010-1234-5678'))
# 010-****-5678

print(masker.mask_card('1234-5678-9012-3456'))
# ****-****-****-3456

print(masker.mask_ssn('123456-1234567'))
# 123456-*******

์˜ˆ์ œ 10: ์ฝ”๋“œ ํ•˜์ด๋ผ์ดํŒ… (๊ฐ„๋‹จํ•œ ์˜ˆ์ œ)โ€‹

import re

class SimpleHighlighter:
"""๊ฐ„๋‹จํ•œ Python ์ฝ”๋“œ ํ•˜์ด๋ผ์ดํ„ฐ"""

PATTERNS = {
'keyword': r'\b(def|class|if|else|elif|for|while|return|import|from)\b',
'string': r'(["\'])(?:(?=(\\?))\2.)*?\1',
'number': r'\b\d+\b',
'comment': r'#.*$',
'function': r'\b([a-zA-Z_]\w*)\s*(?=\()',
}

def highlight(self, code):
"""์ฝ”๋“œ๋ฅผ HTML๋กœ ํ•˜์ด๋ผ์ดํŒ…"""
highlighted = code

# ํ‚ค์›Œ๋“œ
highlighted = re.sub(
self.PATTERNS['keyword'],
r'<span class="keyword">\1</span>',
highlighted
)

# ๋ฌธ์ž์—ด
highlighted = re.sub(
self.PATTERNS['string'],
r'<span class="string">\g<0></span>',
highlighted
)

# ์ˆซ์ž
highlighted = re.sub(
self.PATTERNS['number'],
r'<span class="number">\g<0></span>',
highlighted
)

return highlighted

# ์‚ฌ์šฉ ์˜ˆ์ œ
highlighter = SimpleHighlighter()
code = 'def hello(name): return "Hello, " + name'
print(highlighter.highlight(code))

Preguntas Frecuentes โ“โ€‹

Q1: ํƒ์š•์ (greedy) vs ๋น„ํƒ์š•์ (non-greedy) ๋งค์นญ์€?โ€‹

import re

text = '<div>content1</div><div>content2</div>'

# ํƒ์š•์ : ๊ฐ€๋Šฅํ•œ ํ•œ ๋งŽ์ด ๋งค์นญ
greedy = re.search(r'<div>.*</div>', text).group()
print(greedy)
# <div>content1</div><div>content2</div>

# ๋น„ํƒ์š•์ : ๊ฐ€๋Šฅํ•œ ํ•œ ์ ๊ฒŒ ๋งค์นญ
non_greedy = re.search(r'<div>.*?</div>', text).group()
print(non_greedy)
# <div>content1</div>

Q2: ์—ญ์Šฌ๋ž˜์‹œ๋ฅผ ๋งค์นญํ•˜๋ ค๋ฉด?โ€‹

import re

# raw string ์‚ฌ์šฉ
pattern = r'\\'
print(re.search(pattern, 'a\\b').group()) # \

# ๋˜๋Š” ์ด์Šค์ผ€์ดํ”„
pattern = '\\\\'
print(re.search(pattern, 'a\\b').group()) # \

Q3: ์ •๊ทœํ‘œํ˜„์‹์„ ๋””๋ฒ„๊ทธํ•˜๋Š” ๋ฐฉ๋ฒ•์€?โ€‹

import re

# ํŒจํ„ด์ด ๋ณต์žกํ•  ๋•Œ ๋ถ€๋ถ„๋ณ„๋กœ ํ…Œ์ŠคํŠธ
pattern = r'\d{3}-\d{4}-\d{4}'

# ๋‹จ๊ณ„๋ณ„ ํ™•์ธ
print(re.search(r'\d{3}', '010')) # 010
print(re.search(r'\d{3}-', '010-')) # 010-
print(re.search(r'\d{3}-\d{4}', '010-1234')) # 010-1234

# ์˜จ๋ผ์ธ ๋„๊ตฌ ์‚ฌ์šฉ: regex101.com, regexr.com

Q4: ๊ฐ™์€ ํŒจํ„ด์ด ์—ฌ๋Ÿฌ ๋ฒˆ ๋‚˜์˜ฌ ๋•Œ ์–ด๋–ป๊ฒŒ ์ฒ˜๋ฆฌํ•˜๋‚˜์š”?โ€‹

import re

text = 'Price: 1000์›, 2000์›, 3000์›'

# findall๋กœ ๋ชจ๋‘ ์ฐพ๊ธฐ
prices = re.findall(r'\d+', text)
print(prices) # ['1000', '2000', '3000']

# finditer๋กœ ์œ„์น˜ ์ •๋ณด์™€ ํ•จ๊ป˜
for match in re.finditer(r'\d+', text):
print(f'{match.group()} at {match.start()}-{match.end()}')

Q5: ์ •๊ทœํ‘œํ˜„์‹ ์„ฑ๋Šฅ์„ ๊ฐœ์„ ํ•˜๋Š” ๋ฐฉ๋ฒ•์€?โ€‹

import re

# 1. ํŒจํ„ด ์ปดํŒŒ์ผ (๋ฐ˜๋ณต ์‚ฌ์šฉ ์‹œ)
pattern = re.compile(r'\d+')
for text in texts:
pattern.search(text)

# 2. ๋น„์บก์ฒ˜ ๊ทธ๋ฃน ์‚ฌ์šฉ
# ๋‚˜์จ: r'(http|https)://...'
# ์ข‹์Œ: r'(?:http|https)://...'

# 3. ํƒ์š•์  ๋งค์นญ ์ œํ•œ
# ๋‚˜์จ: r'.*'
# ์ข‹์Œ: r'[^>]*' ๋˜๋Š” r'.*?'

์ •๊ทœํ‘œํ˜„์‹ ์น˜ํŠธ ์‹œํŠธ ๐Ÿ“‹โ€‹

์ž์ฃผ ์‚ฌ์šฉํ•˜๋Š” ํŒจํ„ดโ€‹

patterns = {
'์ด๋ฉ”์ผ': r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$',
'์ „ํ™”๋ฒˆํ˜ธ': r'^\d{3}-\d{3,4}-\d{4}$',
'URL': r'^https?://[^\s]+$',
'IP์ฃผ์†Œ': r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$',
'๋‚ ์งœ(YYYY-MM-DD)': r'^\d{4}-\d{2}-\d{2}$',
'์‹œ๊ฐ„(HH:MM)': r'^([01]\d|2[0-3]):([0-5]\d)$',
'ํ•œ๊ธ€': r'[๊ฐ€-ํžฃ]+',
'์˜๋ฌธ': r'[a-zA-Z]+',
'์ˆซ์ž': r'\d+',
'๊ณต๋ฐฑ์ œ๊ฑฐ': r'\s+',
}

Prรณximos Pasosโ€‹

  • ํŒŒ์ผ ์ž…์ถœ๋ ฅ: ๋กœ๊ทธ ํŒŒ์ผ ํŒŒ์‹ฑ์— ์ •๊ทœํ‘œํ˜„์‹ ํ™œ์šฉ
  • JSON๊ณผ CSV: ๋ฐ์ดํ„ฐ ๊ฒ€์ฆ์— ์ •๊ทœํ‘œํ˜„์‹ ์‚ฌ์šฉ
  • ์›น ์Šคํฌ๋ž˜ํ•‘: BeautifulSoup๊ณผ ์ •๊ทœํ‘œํ˜„์‹ ์กฐํ•ฉ
  • ๊ณ ๊ธ‰ ์ •๊ทœํ‘œํ˜„์‹: lookahead, lookbehind ํŒจํ„ด