Some problems in adapting a new algorithm

I am using the Chipwhisperer board with Kyber, which is a PQC. I tried the Kyber in VS2019 and found it working well, but when I tried to programming the firmwave, I found some errors which are not seemed in the process of VS2019.


So I guess the reason is that I didn’t define the platform used in this project , but when I try this in VS2019, I didn’t define the platform either. I don’t know how to deal with that and could you tell me which platform I should choose?

BTW, I used Chipwhisperer 5.6 in Windows and here are the source code of randombytes.c:

#if defined(linux)
#define _GNU_SOURCE
#endif /* defined(linux) */

#include “randombytes.h”

#if defined(_WIN32)
/* Windows /
// NOLINTNEXTLINE(llvm-include-order): Include order required by Windows
#include <windows.h>
#include <wincrypt.h> /
CryptAcquireContext, CryptGenRandom /
#endif /
defined(_WIN32) */

#if defined(linux)
/* Linux */
// We would need to include <linux/random.h>, but not every target has access
// to the linux headers. We only need RNDGETENTCNT, so we instead inline it.
// RNDGETENTCNT is originally defined in include/uapi/linux/random.h in the
// linux repo.
#define RNDGETENTCNT 0x80045200

#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <poll.h>
#include <stdint.h>
#include <stdio.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>

// We need SSIZE_MAX as the maximum read len from /dev/urandom
#if !defined(SSIZE_MAX)
#define SSIZE_MAX (SIZE_MAX / 2 - 1)
#endif /* defined(SSIZE_MAX) /
#endif /
defined(linux) */

#if defined(_WIN32)
static int randombytes_win32_randombytes(void *buf, const size_t n) {
HCRYPTPROV ctx;
BOOL tmp;

tmp = CryptAcquireContext(&ctx, NULL, NULL, PROV_RSA_FULL,
                          CRYPT_VERIFYCONTEXT);
if (tmp == FALSE) {
    return -1;
}

tmp = CryptGenRandom(ctx, (DWORD)n, (BYTE *)buf);
if (tmp == FALSE) {
    return -1;
}

tmp = CryptReleaseContext(ctx, 0);
if (tmp == FALSE) {
    return -1;
}

return 0;

}
#endif /* defined(_WIN32) */

#if defined(linux) && defined(SYS_getrandom)
static int randombytes_linux_randombytes_getrandom(void buf, size_t n) {
/
I have thought about using a separate PRF, seeded by getrandom, but
* it turns out that the performance of getrandom is good enough
* (250 MB/s on my laptop).
/
size_t offset = 0, chunk;
long int ret;
while (n > 0) {
/
getrandom does not allow chunks larger than 33554431 */
chunk = n <= 33554431 ? n : 33554431;
do {
ret = syscall(SYS_getrandom, (char )buf + offset, chunk, 0);
} while (ret == -1 && errno == EINTR);
if (ret < 0) {
return (int) ret;
}
offset += (size_t) ret;
n -= (size_t) ret;
}
assert(n == 0);
return 0;
}
#endif /
defined(linux) && defined(SYS_getrandom) */

#if defined(linux) && !defined(SYS_getrandom)
static int randombytes_linux_read_entropy_ioctl(int device, int *entropy) {
return ioctl(device, RNDGETENTCNT, entropy);
}

static int randombytes_linux_read_entropy_proc(FILE *stream, int *entropy) {
int retcode;
do {
rewind(stream);
retcode = fscanf(stream, “%d”, entropy);
} while (retcode != 1 && errno == EINTR);
if (retcode != 1) {
return -1;
}
return 0;
}

static int randombytes_linux_wait_for_entropy(int device) {
/* We will block on /dev/random, because any increase in the OS’ entropy
* level will unblock the request. I use poll here (as does libsodium),
* because we don’t actually want to read from the device. */
enum { IOCTL,
PROC
} strategy = IOCTL;
const int bits = 128;
struct pollfd pfd;
int fd;
FILE *proc_file;
int retcode,
retcode_error = 0; // Used as return codes throughout this function
int entropy = 0;

/* If the device has enough entropy already, we will want to return early */
retcode = randombytes_linux_read_entropy_ioctl(device, &entropy);
if (retcode != 0 && errno == ENOTTY) {
    /* The ioctl call on /dev/urandom has failed due to a ENOTTY (i.e.
     * unsupported action). We will fall back to reading from
     * `/proc/sys/kernel/random/entropy_avail`. This is obviously less
     * ideal, but at this point it seems we have no better option. */
    strategy = PROC;
    // Open the entropy count file
    proc_file = fopen("/proc/sys/kernel/random/entropy_avail", "r");
} else if (retcode != 0) {
    // Unrecoverable ioctl error
    return -1;
}
if (entropy >= bits) {
    return 0;
}

do {
    fd = open("/dev/random", O_RDONLY);
} while (fd == -1 && errno == EINTR); /* EAGAIN will not occur */
if (fd == -1) {
    /* Unrecoverable IO error */
    return -1;
}

pfd.fd = fd;
pfd.events = POLLIN;
for (;;) {
    retcode = poll(&pfd, 1, -1);
    if (retcode == -1 && (errno == EINTR || errno == EAGAIN)) {
        continue;
    } else if (retcode == 1) {
        if (strategy == IOCTL) {
            retcode =
                randombytes_linux_read_entropy_ioctl(device, &entropy);
        } else if (strategy == PROC) {
            retcode =
                randombytes_linux_read_entropy_proc(proc_file, &entropy);
        } else {
            return -1; // Unreachable
        }

        if (retcode != 0) {
            // Unrecoverable I/O error
            retcode_error = retcode;
            break;
        }
        if (entropy >= bits) {
            break;
        }
    } else {
        // Unreachable: poll() should only return -1 or 1
        retcode_error = -1;
        break;
    }
}
do {
    retcode = close(fd);
} while (retcode == -1 && errno == EINTR);
if (strategy == PROC) {
    do {
        retcode = fclose(proc_file);
    } while (retcode == -1 && errno == EINTR);
}
if (retcode_error != 0) {
    return retcode_error;
}
return retcode;

}

static int randombytes_linux_randombytes_urandom(void *buf, size_t n) {
int fd;
size_t offset = 0, count;
ssize_t tmp;
do {
fd = open(“/dev/urandom”, O_RDONLY);
} while (fd == -1 && errno == EINTR);
if (fd == -1) {
return -1;
}
if (randombytes_linux_wait_for_entropy(fd) == -1) {
return -1;
}

while (n > 0) {
    count = n <= SSIZE_MAX ? n : SSIZE_MAX;
    tmp = read(fd, (char *)buf + offset, count);
    if (tmp == -1 && (errno == EAGAIN || errno == EINTR)) {
        continue;
    }
    if (tmp == -1) {
        return -1; /* Unrecoverable IO error */
    }
    offset += tmp;
    n -= tmp;
}
assert(n == 0);
return 0;

}
#endif /* defined(linux) && !defined(SYS_getrandom) */

int randombytes(uint8_t buf, size_t n) {
#if defined(linux)
#if defined(SYS_getrandom)
/
Use getrandom system call /
return randombytes_linux_randombytes_getrandom(buf, n);
#else
/
When we have enough entropy, we can read from /dev/urandom /
return randombytes_linux_randombytes_urandom(buf, n);
#endif
#elif defined(_WIN32)
/
Use windows API */
return randombytes_win32_randombytes(buf, n);
#else
#error “randombytes(…) is not supported on this platform”
#endif
}

Hi,

That code is reliant on random number generation provided by an operating system. In general, CW targets don’t run an OS at all, meaning you’ll have to implement a PRNG and replace the random calls at the very least.

I think you would probably have more luck trying to adapt a Kyber implementation that was designed to run on small embedded devices like https://github.com/mupq/pqm4.

Alex

Hi, Alex.
Thank you for your advice and I tried an other PRNG implement and found it working.

But I still have some problems in communicating with the board. I take Lab 3_1 as an example. In capturing the traces, there is a code: target.simpleserial_write(‘p’, text) . I think it writing the text as plaintext to the cw308-stm32f3 board . And in the main part of simpleserial_aes.c, there is a code: simpleserial_addcmd(‘p’, 16, get_pt). So I am gussing maybe this line of code is used to receiving the text sending from capturing the traces part ? I don’t know whether I understand it right.

image

code in simpleserial_aes.c:
image

Best wishes,
Magnolia

target.simpleserial_write() sends data to the target board. simpleserial_addcommand() adds a callback that gets called when the target receives a packet that’s the correct length from the capture board. For AES, we send a 'p' command that has 16 bytes of data, which corresponds to the get_pt callback function. SimpleSerial is documented at https://chipwhisperer.readthedocs.io/en/latest/simpleserial.html.

Alex

Hi.

I read the document and considered simpleserial_put() the function of receiving bytes from target board,which means that maybe I have already do some jobs like encryption, is that right?

Following is my understanding, and I hope I understand right:
When capturing traces in AES(Lab 3_3), use target.simpleserial_write() to send data, and in the firmware, use simpleserial_addcmd(‘p’, 16, get_pt) to receive sent data. But in uint8_t get_pt(uint8_t* pt, uint8_t len), how can I make sure that uint8_t* pt points to the data which sent to the board? Or is uint8_t* pt means the sent data?

What’s more, are the commands used in simpleserial_addcmd() fixed? If I want to add some command, should I change the code of simpleserial_addcmd() ?

Best regard,
Magnolia

simpleserial_addcmd(‘p’, 16, get_pt) doesn’t receive or send any data - it just adds get_pt as the callback when it receives a 16 byte 'p' packet. simpleserial_get() receives the serial message and calls the appropriate callback.

simpleserial_put() sends data from the target to the capture board, which goes from there to the PC.

For SimpleSerial V1, 'w', 'y', and 'v' are reserved. You can use any other ASCII characters for commands besides those and ones that are not already added via a simpleserial_addcmd() call.

You may find it helpful to check the source code for SimpleSerial V1: chipwhisperer/hardware/victims/firmware/simpleserial/simpleserial.c at develop · newaetech/chipwhisperer · GitHub.

Alex

Hi.

Thank you so much for your patience and I think I understand simpleserial_aes.c now.

BTW, I am using SS_VER = 2.0 in my own project, but I see in the simpleserial documentation that SimpleSerial V2.0 has been deprecated, but in the simpleserial.c it is still working. In simpleseial_get() of simpleserial.c, the package sent from PC have four character before the data part, while in the SS_VER = 2.1 of document, there are only three cmd, scmd, dlen. I am confused that which version should I used in my project.

codes from simpleserial.c:

for (int i = 0; i < 4; i++)
data_buf[i] = getch(); //PTR, cmd, scmd, len

Best regards,
Magnolia

More over, I want to send more than 16 bytes in target.simplesrial_write(), and I looked into the python code of SimpleSerial2.py but failed to find where to change.

The first one there is a bug. Thanks for letting me know, I’ll get that fixed up. The other thing is a documentation error; there should be a byte at the beginning that marks where the next frame byte in the sequence is. I’ll get that fixed up as well. The only difference between SSV2 and SSV2.1 is the CRC value used. I recommend using either SSV1.1 or SSV2.1.

The length sent is determined by the length of the data you feed into the function. target.simpleserial_write(bytearray(range(32))) will write 32 bytes, while target.simpleserial_write(bytearray(range(64))) will write 64 bytes. The max length of the data you can send is 249 bytes.

Alex

Hi.

Thank you so much and I can send more than 16 bytes now, but still get a warning said WARNING:ChipWhisperer Target:Read timed out. I am wondering if I should change some settings here.
image

And in firmware:
image

Magnolia

Yeah, you’ll probably need to add a time.sleep() call after scope.capture(). IIRC RSA-1024 takes 4-5 seconds, though I’m not sure how Kyber’s speed compares to RSA. You should be able to use scope.adc.trig_count to see how long your operation takes.

Alex

Thank you so much! I add time.sleep() and find it working.
Thanks again for your patience.

Magnolia