-
Notifications
You must be signed in to change notification settings - Fork 1
/
small_vole.c
196 lines (167 loc) · 6.39 KB
/
small_vole.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
#include "small_vole.h"
#include <string.h>
#include <stdbool.h>
#include "util.h"
#define COL_LEN VOLE_COL_BLOCKS
// TODO: probably can ditch most of the "restrict"s in inlined functions.
// There are two different methods for efficiently reducing the PRG outputs (equivalent to computing
// a Hamming code syndrome): Divide and conquer, and a straight-line method based on Gray's code.
// These are both transposes of Hadamard encoding circuits given in "the Exact Lower Bounds of
// Encoding Circuit Sizes of Hamming Codes and Hadamard Codes". This implementation combines these
// two methods. Divide and conquer is used at the lowest level, as it is inherently parallel and has
// a fixed access pattern when unrolled. Above it, the Gray's code method is used, as it needs very
// little temporary storage.
// Output: v (or q) in in_out[1, ..., depth], and u in in_out[0].
static ALWAYS_INLINE void xor_reduce(vole_block* in_out)
{
#ifdef __GNUC__
#pragma GCC unroll (5)
#endif
for (size_t i = 0; i < VOLE_WIDTH_SHIFT; i++)
{
size_t stride = 1 << i;
#ifdef __GNUC__
#pragma GCC unroll (32)
#endif
for (size_t j = 0; j < VOLE_WIDTH; j += 2 * stride)
{
#ifdef __GNUC__
#pragma GCC unroll (5)
#endif
for (size_t d = 0; d <= i; ++d)
in_out[j + d] = vole_block_xor(in_out[j + d], in_out[j + d + stride]);
in_out[j + i + 1] = in_out[j + stride];
}
}
}
static ALWAYS_INLINE void convert_prg_output(
vole_block* prg_output, const prg_vole_block* raw_prg_output, size_t blocks)
{
#if PRG_VOLE_BLOCKS == 2
#ifdef __GNUC__
#pragma GCC unroll (32)
#endif
for (size_t i = 0; i < blocks; ++i)
prg_output[i] = block256_from_2_block128(raw_prg_output[PRG_VOLE_BLOCKS * i], raw_prg_output[PRG_VOLE_BLOCKS * i + 1]);
#else
memcpy(prg_output, raw_prg_output, blocks * sizeof(vole_block));
#endif
}
static ALWAYS_INLINE void process_prg_output(
bool ignore_0, size_t j, unsigned int output_col, vole_block* accum, vole_block* vq,
const prg_vole_block* raw_prg_output)
{
vole_block prg_output[VOLE_WIDTH];
if (!ignore_0)
convert_prg_output(prg_output, raw_prg_output, VOLE_WIDTH);
else
{
prg_output[0] = vole_block_set_zero();
convert_prg_output(&prg_output[1], raw_prg_output, VOLE_WIDTH - 1);
}
xor_reduce(prg_output);
if (!ignore_0)
accum[j] = vole_block_xor(accum[j], prg_output[0]);
for (size_t col = 0; col < VOLE_WIDTH_SHIFT; ++col)
vq[COL_LEN * col + j] = vole_block_xor(vq[COL_LEN * col + j], prg_output[col + 1]);
if (!ignore_0)
// Grey's codes method. output_col is the index of the bit that will change when
// incrementing the Gray's code.
vq[COL_LEN * output_col + j] = vole_block_xor(vq[COL_LEN * output_col + j], accum[j]);
}
// Sender and receiver merged together, since they share most of the same code.
static ALWAYS_INLINE void vole(
bool receiver, unsigned int k,
const block_secpar* restrict keys, block128 iv, const prg_vole_fixed_key* restrict fixed_key,
const vole_block* restrict u_or_c_in, vole_block* restrict vq, vole_block* restrict c_out,
const uint8_t* restrict delta)
{
vole_block accum[COL_LEN];
memset(&accum[0], 0, COL_LEN * sizeof(vole_block));
if (receiver && u_or_c_in)
{
vole_block* q_ptr = vq;
for (unsigned int col = 0; col < k; ++col)
for (size_t j = 0; j < COL_LEN; ++j)
*(q_ptr++) = vole_block_and(u_or_c_in[j], vole_block_set_all_8(delta[col]));
}
else
memset(&vq[0], 0, COL_LEN * k * sizeof(vole_block));
size_t i = 0;
if (receiver)
{
// Handle first iteration separately, since the 0th PRG key is a dummy.
prg_vole_key prgs[VOLE_WIDTH - 1];
prg_vole_iv ivs[VOLE_WIDTH - 1];
prg_vole_block raw_prg_output[(VOLE_WIDTH - 1) * PRG_VOLE_BLOCKS];
#ifdef __GNUC__
_Pragma(STRINGIZE(GCC unroll (VOLE_WIDTH)))
#endif
for (size_t i = 0; i < VOLE_WIDTH - 1; ++i)
memcpy(&ivs[i], &iv, sizeof(ivs[i]));
prg_vole_init(prgs, fixed_key, &keys[1], ivs, VOLE_WIDTH - 1, PRG_VOLE_BLOCKS, 0, raw_prg_output);
process_prg_output(true, 0, 0, accum, vq, raw_prg_output);
for (size_t j = 1; j < COL_LEN; ++j)
{
prg_vole_gen(prgs, fixed_key, VOLE_WIDTH - 1, PRG_VOLE_BLOCKS, j * PRG_VOLE_BLOCKS, raw_prg_output);
process_prg_output(true, j, 0, accum, vq, raw_prg_output);
}
i = VOLE_WIDTH;
}
for (; i < (size_t) 1 << k; i += VOLE_WIDTH)
{
prg_vole_key prgs[VOLE_WIDTH];
prg_vole_iv ivs[VOLE_WIDTH];
prg_vole_block raw_prg_output[VOLE_WIDTH * PRG_VOLE_BLOCKS];
#ifdef __GNUC__
_Pragma(STRINGIZE(GCC unroll (VOLE_WIDTH)))
#endif
for (size_t i = 0; i < VOLE_WIDTH; ++i)
memcpy(&ivs[i], &iv, sizeof(ivs[i]));
// Bitwise or is to make output_col be k - 1 when i + VOLE_WIDTH = 2**k, rather than k.
unsigned int output_col = count_trailing_zeros((i + VOLE_WIDTH) | (1 << (k - 1)));
prg_vole_init(prgs, fixed_key, &keys[i], ivs, VOLE_WIDTH, PRG_VOLE_BLOCKS, 0, raw_prg_output);
process_prg_output(false, 0, output_col, accum, vq, raw_prg_output);
for (size_t j = 1; j < COL_LEN; ++j)
{
prg_vole_gen(prgs, fixed_key, VOLE_WIDTH, PRG_VOLE_BLOCKS, j * PRG_VOLE_BLOCKS, raw_prg_output);
process_prg_output(false, j, output_col, accum, vq, raw_prg_output);
}
}
if (!receiver)
{
if (u_or_c_in)
for (size_t j = 0; j < COL_LEN; ++j)
c_out[j] = vole_block_xor(u_or_c_in[j], accum[j]);
else
memcpy(c_out, accum, sizeof(accum));
}
}
void vole_sender(
unsigned int k, const block_secpar* restrict keys,
block128 iv, const prg_vole_fixed_key* restrict fixed_key,
const vole_block* restrict u, vole_block* restrict v, vole_block* restrict c)
{
vole(false, k, keys, iv, fixed_key, u, v, c, NULL);
}
void vole_receiver(
unsigned int k, const block_secpar* restrict keys,
block128 iv, const prg_vole_fixed_key* restrict fixed_key,
const vole_block* restrict c, vole_block* restrict q,
const uint8_t* restrict delta)
{
vole(true, k, keys, iv, fixed_key, c, q, NULL, delta);
}
void vole_receiver_apply_correction(
size_t row_blocks, size_t cols,
const vole_block* restrict c, vole_block* restrict q, const uint8_t* restrict delta)
{
for (unsigned int col = 0; col < cols; ++col)
for (size_t j = 0; j < row_blocks; ++j)
q[col * COL_LEN + j] = vole_block_xor(q[col * COL_LEN + j],
vole_block_and(c[j], vole_block_set_all_8(delta[col])));
}
extern inline size_t vole_permute_key_index(size_t i);
extern inline size_t vole_permute_key_index_inv(size_t i);
extern inline size_t vole_permute_inv_increment(size_t i, size_t offset);
extern inline void vole_fixed_key_init(prg_vole_fixed_key* fixed_key, block_secpar iv);