I'm trying to get encoding in PDF outlines right. The problem is that the letter in the outline must be Ъ - the same as on the page - but it is Ú.
dvitype shows this:
xxx 'pdf: outline 1 << /Title (?) /Dest [ @thispage /FitH @ypos ] >>' non-ASCII character in xxx command!
Below is the example file (I know, it's somewhat lengthly, but I did my best to remove as much irrelevant parts from the original file as I could). Run with:
$ tex -ini -enc '\input plain \dump'
$ tex -fmt plain example.tex
$ dvipdfmx example.dvi
example.tex:
\font\tenbf=labx1000
\newtoks\gtitle % title of current major group
\newtoks\toksE \newtoks\toksF \newtoks\usersanitizer
\newif\iftokprocessed \newif\ifTnum \newif\ifinstr
\def\firstsecno#1.{\setbox0=\hbox{\toksA={#1.}\toksB={}%
\maketoks}}
\def\addtokens#1#2{\edef\addtoks{\noexpand#1={\the#1#2}}\addtoks}
\def\poptoks#1#2|ENDTOKS|{\let\first=#1\toksD={#1}%
\ifcat\noexpand\first0\countB=`#1\else\countB=0\fi\toksA={#2}}
\def\maketoksdone{\edef\st{\global\noexpand\toksA={\the\toksB}}\st}
\def\sanitizecommand#1#2{\addtokens\usersanitizer
{\noexpand\dosanitizecommand\noexpand#1{#2}}}
\def\dosanitizecommand#1#2{\ifx\nxt#1\addF{#2}\fi}
\def\makeoutlinetoks{\Tnumfalse\afterassignment\makeolproctok\let\nxt= }
\def\makeolnexttok{\afterassignment\makeolproctok\let\nxt= }
\def\makeolgobbletok{\afterassignment\makeolnexttok\let\nxt= }
\def\addF#1{\addtokens\toksF{#1}\tokprocessedtrue}
% now comes a routine to "sanitize" section names, for pdf outlines
\def\makeolproctok{\tokprocessedfalse
\let\next\makeolnexttok % default
\ifx\nxt\outlinedone\let\next\outlinedone
\else\ifx{\nxt \else\ifx}\nxt \Tnumfalse \instrfalse % skip braces
\else\ifx$\nxt % or a $ sign
\else\ifx^\nxt \addF^\else\ifx_\nxt \addF_% sanitize ^ and _
\else\ifx\nxt\spacechar \addF\space
\else\if\noexpand\nxt\relax % we have a control sequence; is it one we know?
\ifx\nxt~\addF\space
\else\ifx\nxt\onespace\addF\space
\else\the\usersanitizer
\iftokprocessed\else\makeolproctokctli
\iftokprocessed\else\makeolproctokctlii
\iftokprocessed\else\makeolproctokctliii % if not recognised, skip it
\fi\fi\fi\fi\fi
\else % we don't have a control sequence, it's an ordinary char
\ifx/\nxt \addF{\string\/}% quote chars special to PDF with backslash
\else\ifx(\nxt \addF{\string\(}\else\ifx)\nxt \addF{\string\)}%
\else\ifx[\nxt \addF{\string\[}\else\ifx]\nxt \addF{\string\]}%
\else\expandafter\makeolproctokchar\meaning\nxt
\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi
\next
}
\def\makeolproctokchar#1 #2 #3{\addF{#3}}
\def\makeolproctokctli{%
\ifx\nxt\CEE\addF{C}\let\next\makeolgobbletok % \CEE/
\else\ifx\nxt\UNIX\addF{UNIX}\let\next\makeolgobbletok % \UNIX/
\else\ifx\nxt\TEX\addF{TeX}\let\next\makeolgobbletok % \TEX/
\else\ifx\nxt\TeX\addF{TeX}\else\ifx\nxt\LaTeX\addF{LaTeX}%
\else\ifx\nxt\CPLUSPLUS\addF{C++}\let\next\makeolgobbletok % \CPLUSPLUS/
\else\ifx\nxt\Cee\addF{C}%
\else\ifx\nxt\PB \let\next\makeolgobbletok \tokprocessedtrue % \PB{...}
\else\ifx\nxt\.\tokprocessedtrue\instrtrue % \.{...}
% skip \|
\else\ifx\nxt\\\ifinstr\addF{\bschar\bschar}\else\tokprocessedtrue\fi
\else\ifx\nxt\&\ifinstr\addF&\else\tokprocessedtrue\fi
\else\ifx\nxt\~\ifTnum\addF{0}\else\addF\tildechar\fi % 077->\T{\~77}
\else\ifx\nxt\_\ifTnum\addF{E}\else\addF_\fi % 0.1E5->\T{0.1\_5}
\else\ifx\nxt\^\ifTnum\addF{0x}\else\addF^\fi % 0x77 -> \T{\^77}
\else\ifx\nxt\$\ifTnum\tokprocessedtrue\else\addF$\fi % \T{77\$L}
\else\ifx\nxt\{\addF\lbchar \else\ifx\nxt\}\addF\rbchar
\else\ifx\nxt\ \addF\space \else\ifx\nxt\#\addF{\string\#}%
\else\ifx\nxt\PP\addF{++}\else\ifx\nxt\MM\addF{--}%
\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi
}
\def\outlinedone{\edef\outlinest{\global\noexpand\toksE={\the\toksF}}%
\outlinest\let\outlinedone=\relax}
\def\lapstar{\rlap{*}}
\def\stsec{\rightskip=0pt % get out of C mode (cf. \B)
\sfcode`;=1500 \pretolerance 200 \hyphenpenalty 50 \exhyphenpenalty 50
\noindent{\let\*=\lapstar\bf\secstar.\quad}%
\smash{\raise\baselineskip\hbox to0pt{%
\let\*=\empty\special{%
pdf: dest (\romannumeral\secstar) [ @thispage /FitH @ypos ]}}}}
\let\startsection=\stsec
\def\MN#1{\par % common code for \M, \N
{\xdef\secstar{#1}\let\*=\empty\xdef\secno{#1}}% remove \* from section name
\ifx\secno\secstar\fi
\mark{{{\tensy x}\secno}{1}{\the\gtitle}}}
\let\ZZ=\let % now you can \write the control sequence \ZZ
\let\page=\pagebody \raggedbottom
\def\startpdf{
{\special{pdf: docview << /PageMode /UseOutlines >>}}}
\newwrite\cont
\output{\setbox0=\page % the first page is garbage
\openout\cont=\jobname.toc
\global\output{\shipout\vbox{
\vbox to 9in{
\hbox to 6.5in{\vbox to10pt{}}
\vfill\page}}}}
\vbox to \vsize{} % the first \topmark won't be null
\def\makebookmarks{\let\ZZ=\writebookmarkline \readcontents\relax}
\def\expnumber#1{\expandafter\ifx\csname#1\endcsname\relax 0%
\else \csname#1\endcsname \fi} % Petr Olsak's macros from texinfo.tex
\def\writebookmarkline#1#2#3#4#5{{%
\let\(=\let \let\)=\let \let\[=\let \let\]=\let \let\/=\let
\pdfoutline goto num #3 count -\expnumber{chunk#2.#3} {#5}}}
\def\main#1#2#3.{% beginning of starred section
\toksF={}\makeoutlinetoks#3\outlinedone\outlinedone
\gtitle={#3}\MN{#2}%
\vfil\eject
\def\stripprefix##1>{}\def\gtitletoks{#3}%
\edef\gtitletoks{\expandafter\stripprefix\meaning\gtitletoks}%
\edef\next{\write\cont{\ZZ{\gtitletoks}{#1}{\secno}% write to contents file
{\noexpand\the\pageno}{\the\toksE}}}\next % \ZZ{title}{depth}{sec}{page}{ss}
\special{pdf: outline #1 << /Title (\the\toksE) /Dest
[ @thispage /FitH @ypos ] >>}
\startsection{\bf#3.\quad}\ignorespaces}
\mubytein=1 \mubyteout=2
\mubyte ^^da ^^d0^^aa\endmubyte
\main{1}{1}Ъ.
\bye
EDIT:
To sum up the answers, these are my current settings (without intermediate packages):
\newbox\mybox
\let\oldshipout\shipout
\def\shipout{\afterassignment\myboat\setbox\mybox=}
\def\myboat{\aftergroup\myship}
\def\myship{\setbox\mybox=\vbox{\special{pdf:tounicode UTF8-UCS2}\unvbox\mybox}\oldshipout\box\mybox\global\let\shipout\oldshipout}
(idea from quire.tex)
– Akira Kakuto Apr 05 '15 at 08:15\font\tenbf=labx1000\special{pdf: tounicode UTF8-UCS2}\special{pdf: outline 1 << /Title (Ъ) /Dest[ @thispage /FitH @ypos ] >>}Ъ.\bye