class Item:
"""An item represents an object that can be placed within a knapsack
"""
def __init__(self, index, profit):
"""constructor
Args:
index (int): index of the item, starting from 0
profit (float): profit of choosing the item
"""
self._index = index
self._profit = profit
self._class = 0
self._properties = {}
@property
def index(self): return self._index
@property
def profit(self): return self._profit
def get_class(self): return self._class
def set_class(self, cls): self._class = cls
def get_property(self, name):
return self._properties[name]
def set_property(self, name, value):
self._properties[name] = value
def __str__(self):
p_str = ""
for attr in self._properties:
p_str += f'{attr}: {self._properties[attr]}'
return f"index: {self._index}, profit: {self._profit}, " + \
p_str
8 The Knapsack Problem
The knapsack problem is a classic optimization problem in the field of operations research. It involves selecting a subset of items from a given set of items to maximize the total value/profit while satisfying certain constraints. This problem is NP-hard, meaning that it is computationally difficult to find an optimal solution for large instances of the problem. Various algorithms have been developed to solve this problem, including dynamic programming, branch and bound, and heuristic methods. The knapsack problem has applications in a variety of fields, including computer science, finance, and logistics, among others. In this chapter, we’ll examine two variants of the knapsack problem, namely, the single-dimensional knapsack problem and the multi-dimensional knapsack problem.
8.1 Single-dimensional Knapsack Problem
In the single-dimensional knapsack problem (SDKP), we are given a set of \(n\) items, each with a weight \(w_j\) and a value \(v_j\), and a knapsack with a maximum weight capacity \(W\), the goal is to select a subset of items such that the sum of their weights is less than or equal to \(W\), and the sum of their values is maximized. The SDKP can be formally formulated as follows (Dudziński and Walukiewicz (1987)).
\[\begin{align} \text{max.} &\quad \sum_{j = 1}^nv_i x_j \label{knapsack1-obj} \\ \text{s.t.} &\quad \sum_{j=1}^n w_j x_j \leq W \label{knapsack1-cons1} \\ &\quad x_j \in \{0, 1\}, \ j = 1, \cdots, n \label{knapsack1-cons2} \end{align}\]
In the model, the \(n\) represents the total amount of items that are being considered. The binary decision variable \(x_i\) has a value of 1 when item \(i\) is chosen, and 0 when it’s not. The value or profit of selecting item \(i\) is indicated by \(v_i\), and the weight of item \(i\) is represented by \(w_i\). Finally, \(W\) specifies the maximum weight capacity of the knapsack.
To represent the list of items in the problem, we first define a class Item
that has three attributes:
_index
: this is the index of an item and starts from 0_profit
: this is the profit or value of selecting the item_properties
: this dictionary saves an item’s properties, including weight
Next, we define a class KnapsackDataCenter
to hold all the information we need to solve a knapsack problem. The class has two attributes:
_items
: this is the full list of items being considered for putting into the knapsack_capacities
: this saves the maximal capacity of the knapsack corresponding to different properties, including weight.
The class also defines a method read_data_set_f()
that reads and parses some benchmarking instancese available online.
class SDKPDataCenter(KnapsackDataCenter):
def __init__(self):
super().__init__()
def read_sdkp_dataset_f(self, data_file: str):
"""this function reads and parses data presented in
http://artemisa.unicauca.edu.co/~johnyortega/instances_01_KP/
Args:
data_file (str): path to the data file
num_items (int): number of items in the file
capacity (int): knapsack capacity
"""
with open(data_file) as f:
first_line = f.readline()
num_items, capacity = first_line.split()
self._capacities['weight'] = float(capacity)
item_idx = 0
rest_lines = f.readlines()
for line in rest_lines:
profit, weight = line.split()
item = Item(item_idx, float(profit))
item.set_property('weight', float(weight))
self._items.append(item)
item_idx += 1
if item_idx == int(num_items): break
The code snippet below reads the instance f1_l-d_kp_10_269
and shows its key information.
data_file = "./data/knapsack/SDKP/instances_01_KP/low-dimensional/f1_l-d_kp_10_269"
data_center = SDKPDataCenter()
data_center.read_sdkp_dataset_f(data_file)
capacities = data_center.capacities
print(f"Knapsack info: {capacities}")
items = data_center.items
print("Item info: ")
for item in items:
print(item)
Knapsack info: {'weight': 269.0}
Item info:
index: 0, profit: 55.0, weight: 95.0
index: 1, profit: 10.0, weight: 4.0
index: 2, profit: 47.0, weight: 60.0
index: 3, profit: 5.0, weight: 32.0
index: 4, profit: 4.0, weight: 23.0
index: 5, profit: 50.0, weight: 72.0
index: 6, profit: 8.0, weight: 80.0
index: 7, profit: 61.0, weight: 62.0
index: 8, profit: 85.0, weight: 65.0
index: 9, profit: 87.0, weight: 46.0
Now we are ready to solve the SDKP using OR-Tools. The code below shows the completion definition of the class SDKnapsackSolver
which has a number of attributes:
_data_center
: this should be an instantiatedKnapsackDataCenter
object_solver
: this is the solver object used for modeling and problem solving_var_x
: this is the object that holds all the decision variables used in the model_opt_obj
: this is the optimal solution value found by the solver_opt_x
: this is the optimal solution identified when the solving process completes
The class has two major methods:
build_model()
: this is responsible for instantiating the solver object, creating variables, generating constraints and defining the objective functionoptimize()
: this is the place where the OR-Tools searches for the optimal solution
from ortools.linear_solver import pywraplp
class SDKnapsackSolver:
def __init__(self, data_center):
self._data_center = data_center
self._solver = None
self._var_x = None
self._opt_obj = None
self._opt_x = None
def build_model(self):
self._solver = pywraplp.Solver.CreateSolver('SCIP')
self._create_variables()
self._create_objective()
self._create_constraints()
def optimize(self):
status = self._solver.Solve()
if status == pywraplp.Solver.OPTIMAL:
self._opt_obj = self._solver.Objective().Value()
items = self._data_center.items
self._opt_x = [
self._var_x[item.index].solution_value()
for item in items
]
def _create_variables(self):
items = self._data_center.items
self._var_x = [self._solver.BoolVar(name=f'x_{i}')
for i, item in enumerate(items)]
def _create_objective(self):
items = self._data_center.items
obj_expr = [
self._var_x[item.index] * item.profit
for item in items
]
self._solver.Maximize(self._solver.Sum(obj_expr))
def _create_constraints(self):
items = self._data_center.items
capacities = self._data_center.capacities
expr = [
self._var_x[item.index] *
item.get_property('weight')
for item in items
]
self._solver.Add(
self._solver.Sum(expr) <= capacities['weight']
)
@property
def opt_obj(self): return self._opt_obj
def get_num_chosen_items(self):
return sum(self._opt_x)
We employ the model to tackle 10 benchmark instances that were downloaded from an online source (http://artemisa.unicauca.edu.co/~johnyortega/instances_01_KP/). Table 8.1 displays the computational results.
More specifically, the first column of the table lists the names of the instances, while the second and third columns indicate the number of items and the capacity of the knapsack for each instance, respectively. The last two columns report the optimal solution’s objective value and the number of selected items, respectively.
Instance | No. of Items | Capacity | Optimal Value | No. of Chosen Items |
---|---|---|---|---|
f1_l-d_kp_10_269 | 10 | 269 | 295 | 6 |
f2_l-d_kp_20_878 | 20 | 878 | 1024 | 17 |
f3_l-d_kp_4_20 | 4 | 20 | 35 | 3 |
f4_l-d_kp_4_11 | 4 | 11 | 23 | 2 |
f5_l-d_kp_15_375 | 15 | 375 | 481.069 | 9 |
f6_l-d_kp_10_60 | 10 | 60 | 52 | 7 |
f7_l-d_kp_7_50 | 7 | 50 | 107 | 2 |
f8_l-d_kp_23_10000 | 23 | 10000 | 9767 | 11 |
f9_l-d_kp_5_80 | 5 | 80 | 130 | 4 |
f10_l-d_kp_20_879 | 20 | 879 | 1025 | 17 |
8.2 Multi-Dimensional Knapsack Problem
The multi-dimensional knapsack problem is a variant of the classical knapsack problem where there are multiple candidate items and each item has multiple attributes or dimensions (Petersen (1967)). The goal is to select a subset of items that maximizes the total value or profit subject to the constraint that the sum of the attribute values of the selected items does not exceed certain limits.
Formally, let there be \(n\) items, and for each item \(j \ (1 ≤ j ≤ n)\), let:
- \(v_j\) be the profit of selecting it
- \(w_{ij}\) be the \(i\)th attribute value of item \(j\)
- \(m\) is the number of dimensions or attributes
- \(W_i\) is the capacity of the knapsack for dimension \(i\)
The multi-dimensional knapsack problem can then be formulated as the following optimization problem:
\[\begin{align} \text{max.} &\quad \sum_{j = 1}^n v_j x_j \label{knapsack2-obj} \\ \text{s.t.} &\quad \sum_{j=1}^n w_{ij} x_j \leq W_i, \ \forall i = 1, \cdots, m \label{knapsack2-cons1} \\ &\quad x_j \in \{0, 1\}, \ j = 1, \cdots, n \label{knapsack2-cons2} \end{align}\]
To test the model, we take data from online source (http://people.brunel.ac.uk/~mastjjb/jeb/orlib/mknapinfo.html) and define a reading function read_mdkp_dataset_1()
in class MDKPDataCenter
. Note that the class inherits the KnapsackDataCenter
class defined in the previous section, and all the parsed contents of an instance will be saved in the _items
and _capacities
attributes.
class MDKPDataCenter(KnapsackDataCenter):
def read_mdkp_dataset_1(self, data_file: str):
"""read data from testing instance
Args:
data_file (str): data file
"""
with open(data_file) as f:
first_line = f.readline()
num_items, \
num_constraints, \
opt_val = first_line.split()
second_line = f.readline()
profits = second_line.split()
item_idx = 0
for p in profits:
item = Item(item_idx, float(p))
self._items.append(item)
item_idx += 1
for i in range(int(num_constraints)):
line = f.readline()
weights = line.split()
prop = f'prop_{i}'
for idx, val in enumerate(weights):
self._items[idx].set_property(
prop,
float(val)
)
last_line = f.readline()
for idx, val in enumerate(last_line.split()):
prop = f'prop_{idx}'
self._capacities[prop] = float(val)
Now we define in the code below the class MDKnapsackSolver
that performs a couple of things:
- create variables in lines 31 - 34
- define objective in lines 36 - 42
- create constraints in line 44 - 58
from ortools.linear_solver import pywraplp
class MDKnapsackSolver:
def __init__(self, data_center):
self._data_center = data_center
self._solver = None
self._var_x = None
self._opt_obj = None
self._opt_x = None
def build_model(self):
self._solver = pywraplp.Solver.CreateSolver('SCIP')
self._create_variables()
self._create_objective()
self._create_constraints()
def optimize(self):
status = self._solver.Solve()
if status == pywraplp.Solver.OPTIMAL:
self._opt_obj = self._solver.Objective().Value()
items = self._data_center.items
self._opt_x = [
self._var_x[item.index].solution_value()
for item in items
]
def _create_variables(self):
items = self._data_center.items
self._var_x = [self._solver.BoolVar(name=f'x_{i}')
for i, item in enumerate(items)]
def _create_objective(self):
items = self._data_center.items
obj_expr = [
self._var_x[item.index] * item.profit
for item in items
]
self._solver.Maximize(self._solver.Sum(obj_expr))
def _create_constraints(self):
items = self._data_center.items
capacities = self._data_center.capacities
num_properties = len(capacities)
for p_idx in range(num_properties):
prop = f'prop_{p_idx}'
expr = [
self._var_x[item.index] *
item.get_property(prop)
for item in items
]
self._solver.Add(
self._solver.Sum(expr) <=
capacities[prop], name=f'cons_{p_idx}'
)
@property
def opt_obj(self): return self._opt_obj
def get_num_chosen_items(self):
return sum(self._opt_x)
Table 8.2 shows the computational results of some testing instances.
Instance | No. of Items | Optimal Value | No. of Chosen Items |
---|---|---|---|
inst1.txt | 6 | 3800 | 3 |
inst2.txt | 10 | 8706.1 | 5 |
inst3.txt | 15 | 4015 | 9 |
inst4.txt | 20 | 6120 | 9 |
inst5.txt | 28 | 12400 | 18 |
inst6.txt | 39 | 10618 | 27 |
inst7.txt | 50 | 16537 | 35 |
8.3 Multi-Choice Knapsack Problem
In the multi-choice knapsack problem, there are \(n\) candidate items to be placed in a knapsack and each item belongs to a specific class \(k = 1, \cdots m\). It is required that only one item can be selected from each class. The problem can be formulated as below (Sinha and Zoltners (1979)).
\[\begin{align} \text{max.} &\quad \sum_{j=1}^n v_j x_j \label{knapsack3-obj} \\ \text{s.t.} &\quad \sum_{j=1}^n w_j x_j \leq W \label{knapsack3-cons1}\\ &\quad \sum_{j=1}^n c_{jk} x_j = 1, \ \forall k = 1, \cdots m \label{knapsack3-cons2}\\ &\quad x_j = \{0, 1\}, \ \forall j = 1, \cdots n \label{knapsack3-cons3} \end{align}\]
The class MCKPDataCenter
defines a reader function to parse the instances available online (https://or-dii.unibs.it/index.php?page=tiks).
class MCKPDataCenter(KnapsackDataCenter):
def __init__(self):
super().__init__()
self._num_classes = 0
self._num_choices = 0
def read_mcmdkp_dataset_1(self, data_file: str):
with open(data_file) as f:
f.readline()
num_classes,\
num_choices,\
num_constraints = f.readline().split()
self._num_classes = int(num_classes)
self._num_choices = int(num_choices)
capacities = f.readline().split()
for p_idx in range(int(num_constraints)):
prop = f'prop_{p_idx}'
self._capacities[prop] = float(capacities[p_idx])
item_idx = 0
for i in range(int(num_classes)):
f.readline()
for c in range(int(num_choices)):
line = f.readline().split()
item = Item(item_idx, profit=float(line[0]))
for p_idx in range(int(num_constraints)):
prop = f'prop_{p_idx}'
item.set_property(prop, float(line[p_idx + 1]))
item.set_class(i)
item_idx += 1
self._items.append(item)
@property
def num_classes(self): return self._num_classes
@property
def num_choices(self): return self._num_choices
The complete code to solve the problem is given below.
from ortools.linear_solver import pywraplp
class MCKnapsackSolver:
def __init__(self, data_center):
self._data_center = data_center
self._solver = None
self._var_x = None
self._opt_obj = None
self._opt_x = None
def build_model(self):
self._solver = pywraplp.Solver.CreateSolver('SCIP')
self._create_variables()
self._create_objective()
self._create_constraints()
def optimize(self):
status = self._solver.Solve()
if status == pywraplp.Solver.OPTIMAL:
self._opt_obj = self._solver.Objective().Value()
items = self._data_center.items
self._opt_x = [
self._var_x[item.index].solution_value()
for item in items
]
def _create_variables(self):
items = self._data_center.items
self._var_x = [self._solver.BoolVar(name=f'x_{i}')
for i, item in enumerate(items)]
def _create_objective(self):
items = self._data_center.items
obj_expr = [
self._var_x[item.index] * item.profit
for item in items
]
self._solver.Maximize(self._solver.Sum(obj_expr))
def _create_constraints(self):
items = self._data_center.items
capacities = self._data_center.capacities
p_idx = 0
prop = f'prop_{p_idx}'
expr = [
self._var_x[item.index] *
item.get_property(prop)
for item in items
]
self._solver.Add(
self._solver.Sum(expr) <=
capacities[prop], name=f'cons_{p_idx}'
)
num_classes = self._data_center.num_classes
for k in range(num_classes):
expr = [self._var_x[item.index]
for item in items
if item.get_class() == k]
self._solver.Add(
self._solver.Sum(expr) == 1
)
@property
def opt_obj(self): return self._opt_obj
def get_num_chosen_items(self):
return sum(self._opt_x)
Table 8.3 shows some computational experiments.
Instance | No. of Items | Optimal Value | No. of Chosen Items |
---|---|---|---|
INST01.txt | 500 | 13411 | 50 |
INST02.txt | 500 | 13953 | 50 |
INST03.txt | 600 | 15727 | 60 |
INST04.txt | 700 | 18928 | 70 |
INST05.txt | 750 | 20314 | 75 |
INST06.txt | 750 | 20277 | 75 |
INST07.txt | 800 | 21372 | 80 |
INST08.txt | 800 | 21556 | 80 |
INST09.txt | 800 | 21581 | 80 |
INST10.txt | 900 | 24232 | 90 |
INST11.txt | 900 | 24267 | 90 |
INST12.txt | 1000 | 26206 | 100 |
INST13.txt | 3000 | 24382 | 100 |
INST14.txt | 4500 | 36971 | 150 |
INST15.txt | 5400 | 44001 | 180 |
INST16.txt | 6000 | 48833 | 200 |
INST17.txt | 7500 | 61056 | 250 |
INST18.txt | 5600 | 68021 | 280 |
INST19.txt | 6000 | 73054 | 300 |
INST20.txt | 7000 | 84958 | 350 |
8.4 Multi-Choice Multi-Dimensional Knapsack Problem
In the multi-choice multi-dimensional knapsack problem (MCMDKP), there are \(n\) candidate items and each item belongs to a specific class \(k = 1, \cdots, m\). Each item also has \(d\) attributes. The requirement is to select exactly one item from each class such that the total profit is maximized. Note that the knapsack capacity cannot be violated for any item attribute. The MCMDKP can be formulated as follows (Chen and Hao (2014)).
\[\begin{align} \text{max.} &\quad \sum_{j=1}^n v_j x_j \label{knapsack4-obj} \\ \text{s.t.} &\quad \sum_{j=1}^n w_{ij} x_j \leq W_i, \ \forall i = 1, \cdots d \\ &\quad \sum_{j=1}^n c_{jk} x_j = 1, \ \forall k = 1, \cdots m\\ &\quad x_j = \{0, 1\}, \ \forall j = 1, \cdots n \end{align}\]
The code snippet below gives the complete program to solve this problem.
from ortools.linear_solver import pywraplp
class MCMDKnapsackSolver:
def __init__(self, data_center):
self._data_center = data_center
self._solver = None
self._var_x = None
self._opt_obj = None
self._opt_x = None
def build_model(self):
self._solver = pywraplp.Solver.CreateSolver('SCIP')
self._create_variables()
self._create_objective()
self._create_constraints()
def optimize(self):
status = self._solver.Solve()
if status == pywraplp.Solver.OPTIMAL:
self._opt_obj = self._solver.Objective().Value()
items = self._data_center.items
self._opt_x = [
self._var_x[item.index].solution_value()
for item in items
]
def _create_variables(self):
items = self._data_center.items
self._var_x = [self._solver.BoolVar(name=f'x_{i}')
for i, item in enumerate(items)]
def _create_objective(self):
items = self._data_center.items
obj_expr = [
self._var_x[item.index] * item.profit
for item in items
]
self._solver.Maximize(self._solver.Sum(obj_expr))
def _create_constraints(self):
items = self._data_center.items
capacities = self._data_center.capacities
num_properties = len(capacities)
for p_idx in range(num_properties):
prop = f'prop_{p_idx}'
expr = [
self._var_x[item.index] *
item.get_property(prop)
for item in items
]
self._solver.Add(
self._solver.Sum(expr) <=
capacities[prop], name=f'cons_{p_idx}'
)
num_classes = self._data_center.num_classes
for k in range(num_classes):
expr = [self._var_x[item.index]
for item in items
if item.get_class() == k]
self._solver.Add(
self._solver.Sum(expr) == 1
)
@property
def opt_obj(self): return self._opt_obj
def get_num_chosen_items(self):
return sum(self._opt_x)
Table 8.4 shows some empirical computational results.
Instance | No. of Items | Optimal Value | No. of Chosen Items |
---|---|---|---|
INST01.txt | 250 | 7059 | 25 |
INST02.txt | 250 | 6998 | 25 |
INST03.txt | 300 | 8418 | 30 |
INST04.txt | 300 | 8518 | 30 |
INST05.txt | 300 | 8418 | 30 |
INST06.txt | 300 | 8418 | 30 |
INST07.txt | 300 | 8418 | 30 |
INST08.txt | 300 | 8418 | 30 |
INST09.txt | 300 | 8418 | 30 |
INST10.txt | 300 | 8418 | 30 |
INST11.txt | 300 | 8418 | 30 |
INST12.txt | 300 | 8418 | 30 |
INST13.txt | 900 | 8833 | 30 |
INST14.txt | 900 | 8841 | 30 |
INST15.txt | 900 | 8833 | 30 |
INST16.txt | 900 | 8788 | 30 |
INST17.txt | 900 | 8820 | 30 |
INST18.txt | 600 | 8664 | 30 |
INST19.txt | 600 | 8667 | 30 |
INST20.txt | 600 | 8714 | 30 |