The following combines the great answer by @egreg with my older answer to implement this with your pgfplotstable code. All function variants should be generated, even if they are part of the current kernel, so this should work in your CI (and it does no harm if used on current systems). I changed the seq to store the known hashes to a clist, as we know the hashes won't contain any commas this should be a bit faster when searching for duplicates.
\documentclass[10pt,a4paper]{article}
\usepackage{pgfplotstable}
\pgfplotsset{compat=newest}
\ExplSyntaxOn
\cs_generate_variant:Nn \str_set:Nn { Ne }
\cs_generate_variant:Nn \str_mdfive_hash:n { e }
\cs_generate_variant:Nn \str_range:nnn { e }
\cs_generate_variant:Nn \int_from_hex:n { e }
\cs_generate_variant:Nn \msg_error:nnn { nnV }
\cs_generate_variant:Nn \clist_gput_right:Nn { NV }
\prg_generate_conditional_variant:Nnn \clist_if_in:Nn { NV } { TF }
\str_new:N \l__pascals_hash_str
\clist_new:N \g__pascals_hashes_clist
\msg_new:nnn { pascals } { duplicate-hash }
{ Hash~ #1~ already~ used! }
\cs_new:Npn __pascals_calc_hash_aux:n #1
{
\int_to_Base:nn
{
\int_from_hex:e
{ \str_range:enn { \str_mdfive_hash:e {#1} } \c_one_int { 5 } }
}
{ 36 }
}
\cs_new_protected:Npn __pascals_calc_hash:n #1
{
\str_set:Ne \l__pascals_hash_str { __pascals_calc_hash_aux:n {#1} }
\clist_if_in:NVTF \g__pascals_hashes_clist \l__pascals_hash_str
{ \msg_error:nnV { pascals } { duplicate-hash } \l__pascals_hash_str }
{ \clist_gput_right:NV \g__pascals_hashes_clist \l__pascals_hash_str }
\pgfkeyslet
{ /pgfplots/table/create~ col/next~ content } \l__pascals_hash_str
}
\NewDocumentCommand \clearHashes {} { \clist_gclear:N \g__pascals_hashes_clist }
\NewDocumentCommand \calcHash { m } { __pascals_calc_hash:n {#1} }
\ExplSyntaxOff
\pgfplotstableread[]{
X Y
1 a
2 b
5 c
}\mydata
\begin{document}
\clearHashes
\pgfplotstablecreatecol[
create col/assign/.code={%
\calcHash{\thisrow{X}\thisrow{Y}}%
}]{ID}{\mydata}
\pgfplotstablegetrowsof{\mydata}
\pgfmathtruncatemacro\myDataRows{\pgfplotsretval-1}
\pgfplotstabletypeset[string type]{\mydata}
\end{document}
A variant that directly uses \pdfmdfivesum instead of \str_mdfive_hash:e:
\documentclass[10pt,a4paper]{article}
\usepackage{pgfplotstable}
\pgfplotsset{compat=newest}
\ExplSyntaxOn
\cs_generate_variant:Nn \str_set:Nn { Ne }
\cs_generate_variant:Nn \str_range:nnn { e }
\cs_generate_variant:Nn \int_from_hex:n { e }
\cs_generate_variant:Nn \msg_error:nnn { nnV }
\cs_generate_variant:Nn \clist_gput_right:Nn { NV }
\prg_generate_conditional_variant:Nnn \clist_if_in:Nn { NV } { TF }
\str_new:N \l__pascals_hash_str
\clist_new:N \g__pascals_hashes_clist
\msg_new:nnn { pascals } { duplicate-hash }
{ Hash~ #1~ already~ used! }
\cs_new:Npn __pascals_calc_hash_aux:n #1
{
\int_to_Base:nn
{
\int_from_hex:e
{ \str_range:enn { \pdfmdfivesum {#1} } \c_one_int { 5 } }
}
{ 36 }
}
\cs_new_protected:Npn __pascals_calc_hash:n #1
{
\str_set:Ne \l__pascals_hash_str { __pascals_calc_hash_aux:n {#1} }
\clist_if_in:NVTF \g__pascals_hashes_clist \l__pascals_hash_str
{ \msg_error:nnV { pascals } { duplicate-hash } \l__pascals_hash_str }
{ \clist_gput_right:NV \g__pascals_hashes_clist \l__pascals_hash_str }
\pgfkeyslet
{ /pgfplots/table/create~ col/next~ content } \l__pascals_hash_str
}
\NewDocumentCommand \clearHashes {} { \clist_gclear:N \g__pascals_hashes_clist }
\NewDocumentCommand \calcHash { m } { __pascals_calc_hash:n {#1} }
\ExplSyntaxOff
\pgfplotstableread[]{
X Y
1 a
2 b
5 c
}\mydata
\begin{document}
\clearHashes
\pgfplotstablecreatecol[
create col/assign/.code={%
\calcHash{\thisrow{X}\thisrow{Y}}%
}]{ID}{\mydata}
\pgfplotstablegetrowsof{\mydata}
\pgfmathtruncatemacro\myDataRows{\pgfplotsretval-1}
\pgfplotstabletypeset[string type]{\mydata}
\end{document}
Yet another variant that will also display leading zeroes.
\documentclass[10pt,a4paper]{article}
\usepackage{pgfplotstable}
\pgfplotsset{compat=newest}
\ExplSyntaxOn
\cs_generate_variant:Nn \str_set:Nn { Ne }
\cs_generate_variant:Nn \str_mdfive_hash:n { e }
\cs_generate_variant:Nn \str_range:nnn { e }
\cs_generate_variant:Nn \int_from_hex:n { e }
\cs_generate_variant:Nn \msg_error:nnn { nnV }
\cs_generate_variant:Nn \clist_gput_right:Nn { NV }
\prg_generate_conditional_variant:Nnn \clist_if_in:Nn { NV } { TF }
\str_new:N \l__pascals_hash_str
\clist_new:N \g__pascals_hashes_clist
\msg_new:nnn { pascals } { duplicate-hash }
{ Hash~ #1~ already~ used! }
\cs_new:Npn __pascals_calc_hash_aux:n #1
{
\int_to_Base:nn
{
\int_from_hex:e
{ \str_range:enn { \str_mdfive_hash:e {#1} } \c_one_int { 5 } }
}
{ 36 }
}
\cs_new_protected:Npn __pascals_calc_hash:n #1
{
\str_set:Ne \l__pascals_hash_str { __pascals_calc_hash_aux:n {#1} }
\str_set:Ne \l__pascals_hash_str
{
\prg_replicate:nn { 4 - \str_count:N \l__pascals_hash_str } { 0 }
\l__pascals_hash_str
}
\clist_if_in:NVTF \g__pascals_hashes_clist \l__pascals_hash_str
{ \msg_error:nnV { pascals } { duplicate-hash } \l__pascals_hash_str }
{ \clist_gput_right:NV \g__pascals_hashes_clist \l__pascals_hash_str }
\pgfkeyslet
{ /pgfplots/table/create~ col/next~ content } \l__pascals_hash_str
}
\NewDocumentCommand \clearHashes {} { \clist_gclear:N \g__pascals_hashes_clist }
\NewDocumentCommand \calcHash { m } { __pascals_calc_hash:n {#1} }
\ExplSyntaxOff
\pgfplotstableread[]{
X Y
1 a
2 b
5 c
36020001400 BasementFloor
}\mydata
\begin{document}
\clearHashes
\pgfplotstablecreatecol[
create col/assign/.code={%
\calcHash{\thisrow{X}\thisrow{Y}}%
}]{ID}{\mydata}
\pgfplotstablegetrowsof{\mydata}
\pgfmathtruncatemacro\myDataRows{\pgfplotsretval-1}
\pgfplotstabletypeset[string type]{\mydata}
\end{document}
\str_mdfive_hash:n? – egreg Mar 12 '24 at 17:29\cs_generate_variant:Nn \str_mdfive_hash:n { e }for it to work in your CI, I'd guess. – Skillmon Mar 12 '24 at 17:36\NewExpandableDocumentCommand? Sorry for this stupid question, I‘m absolutely not familar with this syntax… – PascalS Mar 12 '24 at 17:52