\documentclass[a4paper,11pt]{article}
\usepackage{siunitx}
\usepackage{algorithm}
\usepackage{algpseudocode}
\usepackage{textcomp}
\usepackage{mathpazo}
\usepackage{eulervm}
\usepackage[utf8]{inputenc}
\usepackage{algorithmic}
\usepackage{ifthen}
\usepackage{amsmath}
\DeclareUnicodeCharacter{2212}{\textminus}% requires a Unicode capable editor
\sisetup{
detect-mode,
detect-family,
detect-inline-family=math,}
\begin{document}
\begin{algorithm}
\caption{PPO}
\begin{algorithmic}[1]
\For {$i=1,2,\ldots ,m$}
\EndFor
\For {$i=1,2,\ldots,m$}
\State $forbidden\_set\leftarrow\theta$
\State ${u}\leftarrow\ {p}[i]$
\State ${v\leftarrow\ {get}\_original\_node(u)}$
\For {each node $s$ that adjacent to $u$}
\IF{$c[$s$]!\= -1$}
\State ${v\leftarrow\ {get}\_original\_node(s)}$
\State ${a\leftarrow\ Max}\rbra\_original\_node(u)}$
\ENDIF
\State Run policy $\pi_{\theta_{old}}$ in environment for $T$ time steps
\State Compute advantage estimates $\hat{A}_{1},\ldots,\hat{A}_{T}$
\EndFor
\State Optimize surrogate $L$ wrt. $\theta$, with $K$ epochs and minibatch size $M\leq NT$
\State $\theta_{old}\leftarrow\theta$
\end{algorithmic}
\end{algorithm}
\end{document}
- 70,814
1 Answers
You should not load algorithmic along algpseudocode: they are incompatible with each other.
You're using the former package's syntax, so stick with it and use \If{...}...\EndIf.
I also made several changes to the code, please check them carefully. I removed \rbra that doesn't appear anywhere; fix it.
\documentclass[a4paper,11pt]{article}
\usepackage{siunitx}
\usepackage{algorithm}
\usepackage{algpseudocode}
\usepackage{textcomp}
\usepackage{mathpazo}
\usepackage{eulervm}
\usepackage[utf8]{inputenc}
%\usepackage{algorithmic} %%%% NOT along algpseudocode
\usepackage{ifthen}
\usepackage{amsmath}
\DeclareUnicodeCharacter{2212}{\textminus}% requires a Unicode capable editor
\sisetup{
detect-mode,
detect-family,
detect-inline-family=math,}
\begin{document}
\begin{algorithm}
\caption{PPO}
\begin{algorithmic}[1]
\For {$i=1,2,\dots ,m$}
\EndFor
\For {$i=1,2,\dots,m$}
\State $\mathit{forbidden_set}\leftarrow\theta$
\State $u\leftarrow {p}[i]$
\State $v\leftarrow \mathit{get_original_node}(u)$
\For {each node $s$ that adjacent to $u$}
\If{$c[s]\neq -1$}
\State $v\leftarrow \mathit{get_original_node}(s)$
\State $a\leftarrow \mathit{Max_original_node}(u)$
\EndIf
\State Run policy $\pi_{\theta_{\mathrm{old}}}$ in environment for $T$ time steps
\State Compute advantage estimates $\hat{A}{1},\dots,\hat{A}{T}$
\EndFor
\State Optimize surrogate $L$ wrt. $\theta$, with $K$
epochs and minibatch size $M\leq\nobreak NT$
\State $\theta_{\mathrm{old}}\leftarrow\theta$
\EndFor
\end{algorithmic}
\end{algorithm}
\end{document}
In order to fix the long line 15, you could do (wait till you're sure that the text is in final form)
\State Optimize surrogate $L$ wrt. $\theta$, with $K$\hspace*{\fill}\linebreak
\hspace*{3em}epochs and minibatch size $M\leq NT$
This would yield
- 1,121,712


