Datenvisualisierung ๐
Datenvisualisierung ist der Prozess der Darstellung komplexer Daten in leicht verstรคndlichen Grafiken. Matplotlib์ ํ์ด์ฌ์์ ๊ฐ์ฅ ๋๋ฆฌ ์ฌ์ฉ๋๋ ์๊ฐํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ์ ๋๋ค.
Was ist Matplotlib?โ
Matplotlib ist eine Bibliothek zum Zeichnen von 2D-Diagrammen und Grafiken in Python. MATLAB๊ณผ ์ ์ฌํ ์ธํฐํ์ด์ค๋ฅผ ์ ๊ณตํฉ๋๋ค.
Hauptmerkmaleโ
- ๋ค์ํ ์ฐจํธ: ์ , ๋ง๋, ์ฐ์ ๋, ํ์คํ ๊ทธ๋จ ๋ฑ
- ์ปค์คํฐ๋ง์ด์ง: ์ธ๋ฐํ ์คํ์ผ ์กฐ์ ๊ฐ๋ฅ
- Pandas ํตํฉ: DataFrame๊ณผ ์ฝ๊ฒ ์ฐ๋
- ์ถํ ํ์ง: ๋ ผ๋ฌธ/๋ณด๊ณ ์์ฉ ๊ทธ๋ํ ์์ฑ
Installationโ
pip install matplotlib
import matplotlib.pyplot as plt
import numpy as np
# ํ๊ธ ํฐํธ ์ค์ (macOS)
plt.rcParams['font.family'] = 'AppleGothic'
# Windows: 'Malgun Gothic'
# Linux: 'NanumGothic'
# ๋ง์ด๋์ค ๊ธฐํธ ๊นจ์ง ๋ฐฉ์ง
plt.rcParams['axes.unicode_minus'] = False
print(f"Matplotlib ๋ฒ์ : {plt.__version__}")
Grundlegende Diagrammeโ
Liniendiagramm (Line Plot)โ
import matplotlib.pyplot as plt
import numpy as np
# ๋ฐ์ดํฐ ์ค๋น
x = np.linspace(0, 10, 100)
y = np.sin(x)
# ๊ทธ๋ํ ๊ทธ๋ฆฌ๊ธฐ
plt.plot(x, y)
plt.title('์ฌ์ธ ํจ์')
plt.xlabel('x')
plt.ylabel('sin(x)')
plt.grid(True)
plt.show()
# ์ฌ๋ฌ ์ ๊ทธ๋ฆฌ๊ธฐ
plt.figure(figsize=(10, 6))
plt.plot(x, np.sin(x), label='sin(x)')
plt.plot(x, np.cos(x), label='cos(x)')
plt.title('์ผ๊ฐํจ์')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.grid(True)
plt.show()
Stiloptionenโ
x = np.linspace(0, 10, 20)
y = x ** 2
# ์ ์คํ์ผ
plt.plot(x, y, 'r--', linewidth=2) # ๋นจ๊ฐ ์ ์
plt.plot(x, y, 'bo-', markersize=8) # ํ๋ ๋๊ทธ๋ผ๋ฏธ์ ์
plt.plot(x, y, 'g^:', alpha=0.5) # ์ด๋ก ์ผ๊ฐํ๊ณผ ์ ์ (๋ฐํฌ๋ช
)
# ๋ ๋ช
์์ ์ผ๋ก
plt.plot(x, y,
color='blue', # ์์
linestyle='--', # ์ ์คํ์ผ
linewidth=2, # ์ ๋๊ป
marker='o', # ๋ง์ปค ๋ชจ์
markersize=8, # ๋ง์ปค ํฌ๊ธฐ
markerfacecolor='red', # ๋ง์ปค ๋ด๋ถ ์
markeredgecolor='black',# ๋ง์ปค ํ
๋๋ฆฌ ์
alpha=0.7, # ํฌ๋ช
๋
label='๋ฐ์ดํฐ' # ๋ฒ๋ก
)
Balkendiagramm (Bar Chart)โ
Vertikales Balkendiagrammโ
import matplotlib.pyplot as plt
# ๋ฐ์ดํฐ
categories = ['A', 'B', 'C', 'D', 'E']
values = [23, 45, 56, 78, 32]
# ๋ง๋ ๊ทธ๋ํ
plt.figure(figsize=(10, 6))
plt.bar(categories, values, color='skyblue', edgecolor='black')
plt.title('์ ํ๋ณ ํ๋งค๋')
plt.xlabel('์ ํ')
plt.ylabel('ํ๋งค๋')
plt.grid(axis='y', alpha=0.3)
plt.show()
# ์์ ๋ค๋ฅด๊ฒ
colors = ['red', 'green', 'blue', 'orange', 'purple']
plt.bar(categories, values, color=colors)
plt.show()
Horizontales Balkendiagrammโ
# ์ํ ๋ง๋
plt.figure(figsize=(10, 6))
plt.barh(categories, values, color='lightcoral')
plt.title('์ ํ๋ณ ํ๋งค๋')
plt.xlabel('ํ๋งค๋')
plt.ylabel('์ ํ')
plt.show()
Gruppiertes Balkendiagrammโ
import numpy as np
# ๋ฐ์ดํฐ
categories = ['A', 'B', 'C', 'D']
values1 = [23, 45, 56, 78]
values2 = [34, 55, 48, 70]
x = np.arange(len(categories))
width = 0.35
# ๊ทธ๋ฃน ๋ง๋
fig, ax = plt.subplots(figsize=(10, 6))
ax.bar(x - width/2, values1, width, label='2023๋
', color='skyblue')
ax.bar(x + width/2, values2, width, label='2024๋
', color='lightcoral')
ax.set_xlabel('์ ํ')
ax.set_ylabel('ํ๋งค๋')
ax.set_title('์ฐ๋๋ณ ์ ํ ํ๋งค ๋น๊ต')
ax.set_xticks(x)
ax.set_xticklabels(categories)
ax.legend()
ax.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()
Gestapeltes Balkendiagrammโ
# ๋์ ๋ง๋
categories = ['Q1', 'Q2', 'Q3', 'Q4']
product_a = [20, 35, 30, 35]
product_b = [25, 32, 34, 20]
product_c = [15, 20, 25, 30]
plt.figure(figsize=(10, 6))
plt.bar(categories, product_a, label='์ ํ A')
plt.bar(categories, product_b, bottom=product_a, label='์ ํ B')
plt.bar(categories, product_c, bottom=np.array(product_a)+np.array(product_b), label='์ ํ C')
plt.xlabel('๋ถ๊ธฐ')
plt.ylabel('ํ๋งค๋')
plt.title('๋ถ๊ธฐ๋ณ ์ ํ ํ๋งค')
plt.legend()
plt.show()
Streudiagramm (Scatter Plot)โ
import matplotlib.pyplot as plt
import numpy as np
# ๋ฐ์ดํฐ
np.random.seed(42)
x = np.random.rand(100) * 100
y = np.random.rand(100) * 100
sizes = np.random.rand(100) * 100
colors = np.random.rand(100)
# ๊ธฐ๋ณธ ์ฐ์ ๋
plt.figure(figsize=(10, 6))
plt.scatter(x, y)
plt.title('๊ธฐ๋ณธ ์ฐ์ ๋')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True, alpha=0.3)
plt.show()
# ํฌ๊ธฐ์ ์์ ์ ์ฉ
plt.figure(figsize=(10, 6))
scatter = plt.scatter(x, y, s=sizes, c=colors, cmap='viridis', alpha=0.6)
plt.colorbar(scatter, label='๊ฐ')
plt.title('ํฌ๊ธฐ์ ์์์ ๊ฐ์ง ์ฐ์ ๋')
plt.xlabel('X')
plt.ylabel('Y')
plt.grid(True, alpha=0.3)
plt.show()
Korrelationsvisualisierungโ
# ํค์ ๋ชธ๋ฌด๊ฒ ์๊ด๊ด๊ณ
height = np.random.normal(170, 10, 100)
weight = height * 0.8 + np.random.normal(0, 5, 100)
plt.figure(figsize=(10, 6))
plt.scatter(height, weight, alpha=0.5)
plt.title('ํค์ ๋ชธ๋ฌด๊ฒ ์๊ด๊ด๊ณ')
plt.xlabel('ํค (cm)')
plt.ylabel('๋ชธ๋ฌด๊ฒ (kg)')
# ์ถ์ธ์ ์ถ๊ฐ
z = np.polyfit(height, weight, 1)
p = np.poly1d(z)
plt.plot(height, p(height), "r--", alpha=0.8, label='์ถ์ธ์ ')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()
Histogrammโ
import matplotlib.pyplot as plt
import numpy as np
# ์ ๊ท๋ถํฌ ๋ฐ์ดํฐ
data = np.random.normal(100, 15, 1000)
# ํ์คํ ๊ทธ๋จ
plt.figure(figsize=(10, 6))
plt.hist(data, bins=30, color='skyblue', edgecolor='black', alpha=0.7)
plt.title('์ํ ์ ์ ๋ถํฌ')
plt.xlabel('์ ์')
plt.ylabel('๋น๋')
plt.axvline(data.mean(), color='red', linestyle='--', linewidth=2, label=f'ํ๊ท : {data.mean():.1f}')
plt.legend()
plt.grid(axis='y', alpha=0.3)
plt.show()
# ์ฌ๋ฌ ํ์คํ ๊ทธ๋จ ๋น๊ต
data1 = np.random.normal(100, 15, 1000)
data2 = np.random.normal(110, 10, 1000)
plt.figure(figsize=(10, 6))
plt.hist(data1, bins=30, alpha=0.5, label='A๋ฐ', color='blue')
plt.hist(data2, bins=30, alpha=0.5, label='B๋ฐ', color='red')
plt.title('๋ฐ๋ณ ์ํ ์ ์ ๋ถํฌ')
plt.xlabel('์ ์')
plt.ylabel('๋น๋')
plt.legend()
plt.grid(axis='y', alpha=0.3)
plt.show()
Kreisdiagramm (Pie Chart)โ
# ๋ฐ์ดํฐ
labels = ['Python', 'Java', 'JavaScript', 'C++', 'Others']
sizes = [35, 25, 20, 10, 10]
colors = ['#ff9999', '#66b3ff', '#99ff99', '#ffcc99', '#ff99cc']
explode = (0.1, 0, 0, 0, 0) # Python ๊ฐ์กฐ
# ํ์ด ์ฐจํธ
plt.figure(figsize=(10, 8))
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
autopct='%1.1f%%', shadow=True, startangle=90)
plt.title('ํ๋ก๊ทธ๋๋ฐ ์ธ์ด ์ฌ์ฉ ๋น์จ')
plt.axis('equal')
plt.show()
# ๋๋ ์ฐจํธ
plt.figure(figsize=(10, 8))
plt.pie(sizes, labels=labels, colors=colors,
autopct='%1.1f%%', startangle=90,
wedgeprops={'width': 0.5}) # ๋๋ ๋ชจ์
plt.title('ํ๋ก๊ทธ๋๋ฐ ์ธ์ด ์ฌ์ฉ ๋น์จ (๋๋)')
plt.axis('equal')
plt.show()
Boxplotโ
import numpy as np
# ๋ฐ์ดํฐ
np.random.seed(42)
data1 = np.random.normal(100, 10, 200)
data2 = np.random.normal(90, 15, 200)
data3 = np.random.normal(110, 5, 200)
# ๋ฐ์ค ํ๋กฏ
plt.figure(figsize=(10, 6))
plt.boxplot([data1, data2, data3],
labels=['A๋ฐ', 'B๋ฐ', 'C๋ฐ'],
patch_artist=True,
boxprops=dict(facecolor='lightblue', alpha=0.5),
medianprops=dict(color='red', linewidth=2))
plt.title('๋ฐ๋ณ ์ํ ์ ์ ๋ถํฌ')
plt.ylabel('์ ์')
plt.grid(axis='y', alpha=0.3)
plt.show()
Unterdiagramme (Subplots)โ
Grundlegende Unterdiagrammeโ
# 2x2 ์๋ธํ๋กฏ
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
# ์ผ์ชฝ ์: ์ ๊ทธ๋ํ
x = np.linspace(0, 10, 100)
axes[0, 0].plot(x, np.sin(x))
axes[0, 0].set_title('์ ๊ทธ๋ํ')
axes[0, 0].grid(True)
# ์ค๋ฅธ์ชฝ ์: ๋ง๋ ๊ทธ๋ํ
categories = ['A', 'B', 'C', 'D']
values = [23, 45, 56, 78]
axes[0, 1].bar(categories, values, color='skyblue')
axes[0, 1].set_title('๋ง๋ ๊ทธ๋ํ')
# ์ผ์ชฝ ์๋: ์ฐ์ ๋
x = np.random.rand(50)
y = np.random.rand(50)
axes[1, 0].scatter(x, y, alpha=0.5)
axes[1, 0].set_title('์ฐ์ ๋')
# ์ค๋ฅธ์ชฝ ์๋: ํ์คํ ๊ทธ๋จ
data = np.random.normal(0, 1, 1000)
axes[1, 1].hist(data, bins=30, color='lightcoral')
axes[1, 1].set_title('ํ์คํ ๊ทธ๋จ')
plt.tight_layout()
plt.show()
Verschiedene Layoutsโ
# ๋ถ๊ท ๋ฑํ ์๋ธํ๋กฏ
fig = plt.figure(figsize=(12, 8))
# 2x2 ๊ทธ๋ฆฌ๋์์ ๋ค์ํ ํฌ๊ธฐ
ax1 = plt.subplot(2, 2, 1)
ax1.plot([1, 2, 3], [1, 4, 9])
ax1.set_title('Plot 1')
ax2 = plt.subplot(2, 2, 2)
ax2.bar(['A', 'B', 'C'], [10, 20, 15])
ax2.set_title('Plot 2')
ax3 = plt.subplot(2, 1, 2) # ์๋์ชฝ ์ ์ฒด
ax3.plot(np.random.rand(100))
ax3.set_title('Plot 3 (Wide)')
plt.tight_layout()
plt.show()