Skip to main content

NumPy Basics ๐Ÿ“Š

NumPy (Numerical Python) is the core library for scientific computing in Python. It provides powerful multidimensional array objects and fast computation capabilities.

What is NumPy?โ€‹

NumPy supports large multidimensional arrays and matrix operations, and includes a library of mathematical functions.

Key Featuresโ€‹

  • Fast Performance: Implemented in C, 10-100x faster than pure Python
  • Memory Efficiency: Uses contiguous memory blocks
  • Broadcasting: Operations between arrays of different sizes
  • Vectorization: Operations on entire arrays without loops

Installationโ€‹

pip install numpy
import numpy as np

# ๋ฒ„์ „ ํ™•์ธ
print(np.__version__) # 1.24.3

Creating ndarraysโ€‹

Basic Creation Methodsโ€‹

import numpy as np

# ๋ฆฌ์ŠคํŠธ์—์„œ ์ƒ์„ฑ
arr1 = np.array([1, 2, 3, 4, 5])
print(arr1) # [1 2 3 4 5]
print(type(arr1)) # <class 'numpy.ndarray'>

# 2์ฐจ์› ๋ฐฐ์—ด
arr2 = np.array([[1, 2, 3], [4, 5, 6]])
print(arr2)
# [[1 2 3]
# [4 5 6]]

# 3์ฐจ์› ๋ฐฐ์—ด
arr3 = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
print(arr3.shape) # (2, 2, 2)

Creating Special Arraysโ€‹

# 0์œผ๋กœ ์ฑ„์›Œ์ง„ ๋ฐฐ์—ด
zeros = np.zeros((3, 4))
print(zeros)
# [[0. 0. 0. 0.]
# [0. 0. 0. 0.]
# [0. 0. 0. 0.]]

# 1๋กœ ์ฑ„์›Œ์ง„ ๋ฐฐ์—ด
ones = np.ones((2, 3))
print(ones)
# [[1. 1. 1.]
# [1. 1. 1.]]

# ํŠน์ • ๊ฐ’์œผ๋กœ ์ฑ„์šฐ๊ธฐ
full = np.full((2, 2), 7)
print(full)
# [[7 7]
# [7 7]]

# ๋‹จ์œ„ ํ–‰๋ ฌ
identity = np.eye(3)
print(identity)
# [[1. 0. 0.]
# [0. 1. 0.]
# [0. 0. 1.]]

# ๋ฒ”์œ„ ๋ฐฐ์—ด
range_arr = np.arange(0, 10, 2)
print(range_arr) # [0 2 4 6 8]

# ๊ท ๋“ฑ ๊ฐ„๊ฒฉ ๋ฐฐ์—ด
linspace = np.linspace(0, 1, 5)
print(linspace) # [0. 0.25 0.5 0.75 1. ]

# ๋žœ๋ค ๋ฐฐ์—ด
random_arr = np.random.rand(3, 3) # 0~1 ์‚ฌ์ด ๊ท ๋“ฑ ๋ถ„ํฌ
print(random_arr)

random_int = np.random.randint(1, 100, size=(3, 3)) # ์ •์ˆ˜ ๋žœ๋ค
print(random_int)

Array Propertiesโ€‹

arr = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])

print(arr.shape) # (2, 4) - ๋ฐฐ์—ด์˜ ํ˜•ํƒœ
print(arr.ndim) # 2 - ์ฐจ์› ์ˆ˜
print(arr.size) # 8 - ์ „์ฒด ์š”์†Œ ๊ฐœ์ˆ˜
print(arr.dtype) # int64 - ๋ฐ์ดํ„ฐ ํƒ€์ž…
print(arr.itemsize) # 8 - ๊ฐ ์š”์†Œ์˜ ๋ฐ”์ดํŠธ ํฌ๊ธฐ

Indexing and Slicingโ€‹

1D Arraysโ€‹

arr = np.array([10, 20, 30, 40, 50])

print(arr[0]) # 10 - ์ฒซ ๋ฒˆ์งธ ์š”์†Œ
print(arr[-1]) # 50 - ๋งˆ์ง€๋ง‰ ์š”์†Œ
print(arr[1:4]) # [20 30 40] - ์Šฌ๋ผ์ด์‹ฑ
print(arr[::2]) # [10 30 50] - 2์นธ์”ฉ ๊ฑด๋„ˆ๋›ฐ๊ธฐ
print(arr[::-1]) # [50 40 30 20 10] - ์—ญ์ˆœ

2D Arraysโ€‹

arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

print(arr[0, 0]) # 1 - ์ฒซ ๋ฒˆ์งธ ํ–‰, ์ฒซ ๋ฒˆ์งธ ์—ด
print(arr[1, 2]) # 6 - ๋‘ ๋ฒˆ์งธ ํ–‰, ์„ธ ๋ฒˆ์งธ ์—ด
print(arr[0]) # [1 2 3] - ์ฒซ ๋ฒˆ์งธ ํ–‰ ์ „์ฒด
print(arr[:, 1]) # [2 5 8] - ๋‘ ๋ฒˆ์งธ ์—ด ์ „์ฒด
print(arr[0:2, 1:3]) # [[2 3] [5 6]] - ๋ถ€๋ถ„ ๋ฐฐ์—ด

Boolean Indexingโ€‹

arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

# ์กฐ๊ฑด์„ ๋งŒ์กฑํ•˜๋Š” ์š”์†Œ๋งŒ ์„ ํƒ
print(arr[arr > 5]) # [6 7 8 9 10]
print(arr[arr % 2 == 0]) # [2 4 6 8 10]

# ์—ฌ๋Ÿฌ ์กฐ๊ฑด
print(arr[(arr > 3) & (arr < 8)]) # [4 5 6 7]

Array Operationsโ€‹

Basic Arithmetic Operationsโ€‹

arr1 = np.array([1, 2, 3, 4])
arr2 = np.array([10, 20, 30, 40])

print(arr1 + arr2) # [11 22 33 44]
print(arr1 - arr2) # [-9 -18 -27 -36]
print(arr1 * arr2) # [10 40 90 160]
print(arr1 / arr2) # [0.1 0.1 0.1 0.1]
print(arr1 ** 2) # [1 4 9 16]

# ์Šค์นผ๋ผ ์—ฐ์‚ฐ
print(arr1 + 10) # [11 12 13 14]
print(arr1 * 2) # [2 4 6 8]

Mathematical Functionsโ€‹

arr = np.array([1, 4, 9, 16])

print(np.sqrt(arr)) # [1. 2. 3. 4.]
print(np.exp(arr)) # ์ง€์ˆ˜ ํ•จ์ˆ˜
print(np.log(arr)) # ์ž์—ฐ ๋กœ๊ทธ
print(np.sin(arr)) # ์‚ผ๊ฐ ํ•จ์ˆ˜

# ์ตœ๋Œ€/์ตœ์†Œ
arr = np.array([3, 1, 4, 1, 5, 9, 2, 6])
print(np.max(arr)) # 9
print(np.min(arr)) # 1
print(np.argmax(arr)) # 5 - ์ตœ๋Œ“๊ฐ’์˜ ์ธ๋ฑ์Šค
print(np.argmin(arr)) # 1 - ์ตœ์†Ÿ๊ฐ’์˜ ์ธ๋ฑ์Šค

Broadcastingโ€‹

Broadcasting automatically handles operations between arrays of different sizes.

# 1์ฐจ์› ๋ฐฐ์—ด + ์Šค์นผ๋ผ
arr = np.array([1, 2, 3])
print(arr + 5) # [6 7 8]

# 2์ฐจ์› ๋ฐฐ์—ด + 1์ฐจ์› ๋ฐฐ์—ด
arr2d = np.array([[1, 2, 3], [4, 5, 6]])
arr1d = np.array([10, 20, 30])
print(arr2d + arr1d)
# [[11 22 33]
# [14 25 36]]

# 2์ฐจ์› ๋ฐฐ์—ด + ์—ด ๋ฒกํ„ฐ
arr2d = np.array([[1, 2, 3], [4, 5, 6]])
col_vec = np.array([[10], [20]])
print(arr2d + col_vec)
# [[11 12 13]
# [24 25 26]]

Aggregation Functionsโ€‹

arr = np.array([[1, 2, 3], [4, 5, 6]])

print(np.sum(arr)) # 21 - ์ „์ฒด ํ•ฉ
print(np.mean(arr)) # 3.5 - ํ‰๊ท 
print(np.std(arr)) # 1.707... - ํ‘œ์ค€ํŽธ์ฐจ
print(np.var(arr)) # 2.916... - ๋ถ„์‚ฐ
print(np.median(arr)) # 3.5 - ์ค‘์•™๊ฐ’

# ์ถ• ์ง€์ •
print(np.sum(arr, axis=0)) # [5 7 9] - ์—ด ๋ฐฉํ–ฅ ํ•ฉ
print(np.sum(arr, axis=1)) # [6 15] - ํ–‰ ๋ฐฉํ–ฅ ํ•ฉ

print(np.mean(arr, axis=0)) # [2.5 3.5 4.5] - ์—ด ํ‰๊ท 
print(np.mean(arr, axis=1)) # [2. 5.] - ํ–‰ ํ‰๊ท 

Reshaping Arraysโ€‹

arr = np.array([1, 2, 3, 4, 5, 6])

# reshape - ํ˜•ํƒœ ๋ณ€๊ฒฝ
reshaped = arr.reshape(2, 3)
print(reshaped)
# [[1 2 3]
# [4 5 6]]

# flatten - 1์ฐจ์›์œผ๋กœ ํŽผ์น˜๊ธฐ
flattened = reshaped.flatten()
print(flattened) # [1 2 3 4 5 6]

# transpose - ์ „์น˜
arr2d = np.array([[1, 2, 3], [4, 5, 6]])
print(arr2d.T)
# [[1 4]
# [2 5]
# [3 6]]

# ์ฐจ์› ์ถ”๊ฐ€
arr = np.array([1, 2, 3])
expanded = np.expand_dims(arr, axis=0)
print(expanded.shape) # (1, 3)

Combining Arraysโ€‹

arr1 = np.array([[1, 2], [3, 4]])
arr2 = np.array([[5, 6], [7, 8]])

# ์ˆ˜์ง ๊ฒฐํ•ฉ
vstack = np.vstack([arr1, arr2])
print(vstack)
# [[1 2]
# [3 4]
# [5 6]
# [7 8]]

# ์ˆ˜ํ‰ ๊ฒฐํ•ฉ
hstack = np.hstack([arr1, arr2])
print(hstack)
# [[1 2 5 6]
# [3 4 7 8]]

# concatenate
concat = np.concatenate([arr1, arr2], axis=0) # ํ–‰ ๋ฐฉํ–ฅ
print(concat)

Practical Examplesโ€‹

Example 1: Student Grade Statisticsโ€‹

import numpy as np

# ํ•™์ƒ 5๋ช…์˜ 3๊ณผ๋ชฉ ์„ฑ์  (ํ–‰: ํ•™์ƒ, ์—ด: ๊ณผ๋ชฉ)
scores = np.array([
[85, 90, 78], # ํ•™์ƒ 1
[92, 88, 95], # ํ•™์ƒ 2
[78, 85, 80], # ํ•™์ƒ 3
[95, 92, 88], # ํ•™์ƒ 4
[88, 86, 92] # ํ•™์ƒ 5
])

# ๊ฐ ํ•™์ƒ์˜ ํ‰๊ท  ์ ์ˆ˜
student_avg = np.mean(scores, axis=1)
print("ํ•™์ƒ๋ณ„ ํ‰๊ท :", student_avg)
# [84.33 91.67 81. 91.67 88.67]

# ๊ฐ ๊ณผ๋ชฉ์˜ ํ‰๊ท  ์ ์ˆ˜
subject_avg = np.mean(scores, axis=0)
print("๊ณผ๋ชฉ๋ณ„ ํ‰๊ท :", subject_avg)
# [87.6 88.2 86.6]

# ์ „์ฒด ํ‰๊ท 
total_avg = np.mean(scores)
print(f"์ „์ฒด ํ‰๊ท : {total_avg:.2f}") # 87.47

# ์ตœ๊ณ  ์ ์ˆ˜์™€ ํ•™์ƒ
max_score = np.max(scores)
max_position = np.unravel_index(np.argmax(scores), scores.shape)
print(f"์ตœ๊ณ  ์ ์ˆ˜: {max_score} (ํ•™์ƒ {max_position[0]+1}, ๊ณผ๋ชฉ {max_position[1]+1})")

# 90์  ์ด์ƒ ๋ฐ›์€ ํšŸ์ˆ˜
high_scores = np.sum(scores >= 90)
print(f"90์  ์ด์ƒ: {high_scores}ํšŒ") # 6ํšŒ

Example 2: Matrix Operations and Linear Algebraโ€‹

import numpy as np

# ํ–‰๋ ฌ ๊ณฑ์…ˆ
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])

# ํ–‰๋ ฌ ๊ณฑ (๋‚ด์ )
C = np.dot(A, B)
# ๋˜๋Š” C = A @ B
print("ํ–‰๋ ฌ ๊ณฑ:")
print(C)
# [[19 22]
# [43 50]]

# ์—ญํ–‰๋ ฌ
inv_A = np.linalg.inv(A)
print("์—ญํ–‰๋ ฌ:")
print(inv_A)
# [[-2. 1. ]
# [ 1.5 -0.5]]

# ์—ญํ–‰๋ ฌ ๊ฒ€์ฆ
identity = A @ inv_A
print("A ร— A^(-1):")
print(np.round(identity, 2)) # ๋‹จ์œ„ ํ–‰๋ ฌ

# ๊ณ ์œ ๊ฐ’๊ณผ ๊ณ ์œ ๋ฒกํ„ฐ
eigenvalues, eigenvectors = np.linalg.eig(A)
print("๊ณ ์œ ๊ฐ’:", eigenvalues)
print("๊ณ ์œ ๋ฒกํ„ฐ:")
print(eigenvectors)

# ํ–‰๋ ฌ์‹
det = np.linalg.det(A)
print(f"ํ–‰๋ ฌ์‹: {det}") # -2.0

Example 3: Calculating Moving Averagesโ€‹

import numpy as np

# ์ฃผ์‹ ๊ฐ€๊ฒฉ ๋ฐ์ดํ„ฐ
prices = np.array([100, 102, 98, 105, 107, 103, 110, 108, 112, 115])

def moving_average(data, window_size):
"""์ด๋™ ํ‰๊ท  ๊ณ„์‚ฐ"""
weights = np.ones(window_size) / window_size
return np.convolve(data, weights, mode='valid')

# 3์ผ ์ด๋™ ํ‰๊ท 
ma_3 = moving_average(prices, 3)
print("3์ผ ์ด๋™ ํ‰๊ท :", np.round(ma_3, 2))

# 5์ผ ์ด๋™ ํ‰๊ท 
ma_5 = moving_average(prices, 5)
print("5์ผ ์ด๋™ ํ‰๊ท :", np.round(ma_5, 2))

# ์ˆ˜์ต๋ฅ  ๊ณ„์‚ฐ
returns = (prices[1:] - prices[:-1]) / prices[:-1] * 100
print("์ผ์ผ ์ˆ˜์ต๋ฅ (%):", np.round(returns, 2))

Example 4: Basic Image Processingโ€‹

import numpy as np

# ๊ฐ„๋‹จํ•œ ์ด๋ฏธ์ง€ (8x8 ๊ทธ๋ ˆ์ด์Šค์ผ€์ผ)
image = np.random.randint(0, 256, size=(8, 8))

print("์›๋ณธ ์ด๋ฏธ์ง€:")
print(image)

# ์ด๋ฏธ์ง€ ๋ฐ˜์ „
inverted = 255 - image
print("\n๋ฐ˜์ „ ์ด๋ฏธ์ง€:")
print(inverted)

# ๋ฐ๊ธฐ ์กฐ์ • (+50)
brightened = np.clip(image + 50, 0, 255)
print("\n๋ฐ๊ฒŒ:")
print(brightened)

# ๋Œ€๋น„ ์ฆ๊ฐ€ (ร—1.5)
contrasted = np.clip(image * 1.5, 0, 255).astype(np.uint8)
print("\n๋Œ€๋น„ ์ฆ๊ฐ€:")
print(contrasted)

# ์ด์ง„ํ™” (์ž„๊ณ„๊ฐ’ 128)
binary = np.where(image > 128, 255, 0)
print("\n์ด์ง„ํ™”:")
print(binary)

Performance Comparisonโ€‹

import numpy as np
import time

# ์ˆœ์ˆ˜ Python vs NumPy
size = 1000000

# Python ๋ฆฌ์ŠคํŠธ
python_list = list(range(size))
start = time.time()
result = [x * 2 for x in python_list]
python_time = time.time() - start

# NumPy ๋ฐฐ์—ด
numpy_array = np.arange(size)
start = time.time()
result = numpy_array * 2
numpy_time = time.time() - start

print(f"Python ๋ฆฌ์ŠคํŠธ: {python_time:.4f}์ดˆ")
print(f"NumPy ๋ฐฐ์—ด: {numpy_time:.4f}์ดˆ")
print(f"NumPy๊ฐ€ {python_time/numpy_time:.1f}๋ฐฐ ๋น ๋ฆ„")

Frequently Asked Questionsโ€‹

What's the difference between NumPy arrays and Python lists?โ€‹

NumPy Arrays:

  • Store only data of the same type
  • Memory efficient
  • Fast operations (implemented in C)
  • Support vectorized operations

Python Lists:

  • Can mix different types
  • Easy dynamic resizing
  • Slow operations (pure Python)

What's the difference between axis=0 and axis=1?โ€‹

arr = np.array([[1, 2, 3], [4, 5, 6]])

# axis=0: ํ–‰ ๋ฐฉํ–ฅ (โ†“)
print(np.sum(arr, axis=0)) # [5 7 9]

# axis=1: ์—ด ๋ฐฉํ–ฅ (โ†’)
print(np.sum(arr, axis=1)) # [6 15]

What does -1 mean in reshape?โ€‹

-1 means to automatically calculate the size.

arr = np.arange(12)

# ์ž๋™์œผ๋กœ 4 ๊ณ„์‚ฐ
reshaped = arr.reshape(3, -1) # (3, 4)

# ์ž๋™์œผ๋กœ 3 ๊ณ„์‚ฐ
reshaped = arr.reshape(-1, 4) # (3, 4)

# 1์ฐจ์›์œผ๋กœ ํŽผ์น˜๊ธฐ
flattened = arr.reshape(-1) # (12,)

Copy vs. View?โ€‹

arr = np.array([1, 2, 3, 4, 5])

# ๋ทฐ (์›๋ณธ ์˜ํ–ฅ)
view = arr[1:4]
view[0] = 999
print(arr) # [1 999 3 4 5]

# ๋ณต์‚ฌ (์›๋ณธ ์•ˆ์ „)
copy = arr[1:4].copy()
copy[0] = 777
print(arr) # [1 999 3 4 5] (๋ณ€๊ฒฝ ์—†์Œ)

Next Stepsโ€‹

Once you've mastered NumPy, learn:

  1. Pandas: NumPy-based data analysis library
  2. SciPy: Advanced scientific computing features
  3. Matplotlib: NumPy array visualization
  4. Machine Learning: Start ML with scikit-learn

Referencesโ€‹