by: Kosal Chhin, 2019 class of MATH 6902 Modern Optimization
The problem below is part of an exercise problem taken from the book: "Optimization - Theory and Practice" by Wilhelm Forst and Deiter Hoffmann, Springer New York, 2010. Chapter 2, Exercise 5.
In a "colloquial speech" of mathematicians one can sometimes hear the following statement: "Strictly convex functions always have exactly one minimizer.".
However, is it really right to use this term so carelessly? Consider two typical representatives
$$ f_i: \mathbb{R}^2 \longrightarrow \mathbb{R}, i \in \{1,2\}:$$
$$ f_1(x,y) = x^2 + y^2 $$
$$ f_2(x,y) = x^2 - y^2 $$
1) Let's begin by visualizing the 3d plots and contour plots of f1 and f2 defined above.
2) Then, define constraints, cons1, cons2, cons3, cons4 and cons5 below:
Let $ cons_i \subset \mathbb{R}^2$ for $i \in \{1,2,3,4,5\}$ be a region in $\mathbb{R}^2$ with:
$$ cons1 := \{(x,y) \in \mathbb{R}^2: x_1^2 + x_2^2 \leq 0.04\} $$
$$ cons2 := \{(x,y) \in \mathbb{R}^2: (x_1 - 0.55)^2 + (x_2 - 0.7)^2 \leq 0.04\} $$
$$ cons3 := \{(x,y) \in \mathbb{R}^2: (x_1 - 0.55)^2 + x_2 \leq 0.04\} $$
The outer boundary of the regions $cons4$ and $cons5$ is defined by:
$$ x = 0.5(0.5 + 0.2\cos(6t))\cos(t) + x_c $$
$$ y = 0.5(0.5 + 0.2\cos(6t))\sin(t) + y_c $$
$$ t \in [\, 0, 2\pi]\, $$
where $(x_c, y_c) = (0, 0)$ for $cons4$ and $(x_c, y_c) = (0, -0.7)$ for $cons5$
3) solve the minimization problem using optimize minimize package.
4) Visualize the solutions along with all the constraints.
Conclusion¶
We see that the function f1 is strictly convex, and if there's no constraint, then we can say that strictly convex function always have exactly one minimizer.
However, the statement described above about strictly convex function should not be used carelessly because if I restrict this optimization problem with $cons5$ (see figure below), there are actually two minimizers, not one.
Below are the visualizations:¶
Below is the python codes used to generate the above plots:¶
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.gridspec as gridspec
import warnings
warnings.filterwarnings('ignore')
#Define 2 functions, f1 and f2:
def f1(x,y):
return x**2 + y**2
def f2(x,y):
return x**2 - y**2
#set up the grid
delta = 0.01
X, Y = np.meshgrid(np.arange(-1,1, delta),np.arange(-1,1, delta))
#evaluate the function to get a set of values for plotting purposes
F1 = f1(X,Y)
F2 = f2(X,Y)
#Visualiaztion
#plot 3d
fig = plt.figure(figsize=(18, 16))
ax1 = fig.add_subplot(221, projection='3d')
ax1.plot_surface(X,Y, F1, cmap=plt.cm.hot)
ax1.contourf(X,Y,F1, offset = 0)
ax2 = fig.add_subplot(222, projection='3d')
ax2.plot_surface(X,Y, F2, cmap=plt.cm.hot)
ax2.contourf(X,Y,F2, offset = -1)
#plot contour 2d
Range = np.arange(-1,1,0.05)
font = 10
cont1 = fig.add_subplot(223)
CP = cont1.contour(X,Y,F1,[Range])
plt.clabel(CP, inline=0, fontsize=font)
cont2 = fig.add_subplot(224)
CP = cont2.contour(X,Y,F2,[Range])
plt.clabel(CP, inline=1, fontsize=font)
fig.show()
from scipy import optimize
#define the functions
f1 = lambda x: x[0]**2 + x[1]**2
f2 = lambda x: x[0]**2 - x[1]**2
#define the constraints in format of optimize.minimize package
cons1 = [{"type": "ineq", "fun": lambda x: -(x[0]**2 + x[1]**2 - 0.04)}]
cons2 = [{"type": "ineq", "fun": lambda x: -((x[0]-0.55)**2 + (x[1]-0.7)**2 - 0.04)}]
cons3 = [{"type": "ineq", "fun": lambda x: -((x[0]-0.55)**2 + x[1]**2 - 0.04)}]
cons4 = [{"type": "ineq", "fun": lambda x: x[2]},
{"type": "ineq", "fun": lambda x: 2*np.pi - x[2]},
{"type": "ineq", "fun": lambda x: ((0.5*(0.5 + 0.2*np.cos(6*x[2]))*np.cos(x[2])) - x[0]) - x[0]},
{"type": "ineq", "fun": lambda x: ((0.5*(0.5 + 0.2*np.cos(6*x[2]))*np.sin(x[2])) - x[1]) - x[1]}]
cons5 = [{"type": "ineq", "fun": lambda x: x[2]},
{"type": "ineq", "fun": lambda x: 2*np.pi - x[2]},
{"type": "eq" , "fun": lambda x: (0.5*(0.5 + 0.2*np.cos(6*x[2]))*np.cos(x[2])) - x[0]},
{"type": "eq" , "fun": lambda x: (0.5*(0.5 + 0.2*np.cos(6*x[2]))*np.sin(x[2]) - 0.7) - x[1]},
{"type": "ineq", "fun": lambda x: x[0] - x[1]}]
#use optimize minimize to solve for f1
f1sol1 = optimize.minimize(f1, (1,1), constraints=cons1, method='SLSQP').x
f1sol2 = optimize.minimize(f1, (1,1), constraints=cons2, method='SLSQP').x
f1sol3 = optimize.minimize(f1, (1,1), constraints=cons3, method='SLSQP').x
f1sol4 = optimize.minimize(f1, (1,0,1), constraints=cons4, method='SLSQP').x
f1sol5 = optimize.minimize(f1, (1,1,1), constraints=cons5, method='SLSQP').x
#use optimize minimize to solve for f2
f2sol1 = optimize.minimize(f2, (1,1), constraints=cons1, method='SLSQP').x
f2sol2 = optimize.minimize(f2, (0,0), constraints=cons2, method='SLSQP').x
f2sol3 = optimize.minimize(f2, (0,0), constraints=cons3, method='SLSQP').x
f2sol4 = optimize.minimize(f2, (1,1,1), constraints=cons4, method='SLSQP').x
f2sol5 = optimize.minimize(f2, (1,1,1), constraints=cons5, method='SLSQP').x
#creat a figure, and add two subplots
fig2 = plt.figure(figsize=(16, 8))
cont1 = fig2.add_subplot(121)
cont2 = fig2.add_subplot(122)
#plot the contour
Range = np.arange(-2,2,0.08)
CP1 = cont1.contour(X,Y,F1,[Range])
plt.clabel(CP1, inline=0, fontsize=10)
CP2 = cont2.contour(X,Y,F2,[Range])
plt.clabel(CP2, inline=1, fontsize=10)
#define the 5 domains which are the constraints
def d1(x):
return np.sqrt(0.04 - x**2)
def d2(x):
pos = np.sqrt(0.04 - (x-0.55)**2) + 0.7
neg = - np.sqrt(0.04 - (x-0.55)**2) + 0.7
return pos, neg
def d3(x):
return np.sqrt(0.04 - (x-0.55)**2)
def d4(t):
x = 0.5*(0.5 + 0.2*np.cos(6*t))*np.cos(t) + 0
y = 0.5*(0.5 + 0.2*np.cos(6*t))*np.sin(t) + 0
return x, y
def d5(t):
x = 0.5*(0.5 + 0.2*np.cos(6*t))*np.cos(t) + 0
y = 0.5*(0.5 + 0.2*np.cos(6*t))*np.sin(t) - 0.7
return x, y
x = np.arange(-1,1,0.001)
t = np.arange(0,2*np.pi,0.001)
#plot all the constraints
cont1.plot(x,d1(x),color='b')
cont1.plot(x,-d1(x),color='b')
cont1.plot(f1sol1[0], f1sol1[1], marker='o', markersize=7, color="red")
cont1.plot(x,d2(x)[0],color='b')
cont1.plot(x,d2(x)[1],color='b')
cont1.fill_between(x, d2(x)[0], d2(x)[1], where=d2(x)[0]>=d2(x)[1], facecolor='gold')
cont1.plot(f1sol2[0], f1sol2[1], marker='o', markersize=7, color="red")
cont1.plot(x,d3(x),color='b')
cont1.plot(x,-d3(x),color='b')
cont1.fill_between(x, d3(x), -d3(x), where=d3(x)>=d3(x), facecolor='gold')
cont1.plot(f1sol3[0], f1sol3[1], marker='o', markersize=7, color="red")
cont1.plot(d4(t)[0],d4(t)[1])
cont1.plot(f1sol4[0], f1sol4[1], marker='o', markersize=7, color="red")
cont1.plot(d5(t)[0],d5(t)[1])
cont1.fill_between(d5(t)[0], d5(t)[1], where=d5(t)[0]>=d5(t)[1], facecolor='gold')
cont1.plot(f1sol5[0], f1sol5[1], marker='o', markersize=7, color="red")
cont2.plot(x,d1(x),color='b')
cont2.plot(x,-d1(x),color='b')
cont2.plot(f2sol1[0], f2sol1[1], marker='o', markersize=7, color="red")
cont2.plot(x,d2(x)[0],color='b')
cont2.plot(x,d2(x)[1],color='b')
cont2.fill_between(x, d2(x)[0], d2(x)[1], where=d2(x)[0]>=d2(x)[1], facecolor='gold')
cont2.plot(f2sol2[0], f2sol2[1], marker='o', markersize=7, color="red")
cont2.plot(x,d3(x),color='b')
cont2.plot(x,-d3(x),color='b')
cont2.fill_between(x, d3(x), -d3(x), where=d3(x)>=d3(x), facecolor='gold')
cont2.plot(f2sol3[0], f2sol3[1], marker='o', markersize=7, color="red")
cont2.plot(d4(t)[0],d4(t)[1])
cont2.plot(f2sol4[0], f2sol4[1], marker='o', markersize=7, color="red")
cont2.plot(d5(t)[0],d5(t)[1])
cont2.fill_between(d5(t)[0], d5(t)[1], where=d5(t)[0]>=d5(t)[1], facecolor='gold')
cont2.plot(f2sol5[0], f2sol5[1], marker='o', markersize=7, color="red")
fig2.show()