Text to speech Arduino code
And now we come to the code listing:
#include <SPI.h> #include <string.h> #include "S1V30120_defines.h" #include "text_to_speech_img.h" #define S1V30120_RST 33 #define S1V30120_RDY 26 #define S1V30120_CS 77 #define S1V30120_MUTE 54 String mytext = "Success! Look at me, I can speak. I'm the best!"; // Variables // Most received messages are 6 bytes char rcvd_msg[20] = {0}; // Educated guess // “Over the whole document, make the average sentence length // 15-20 words, 25-33 syllables and 75-100 characters.” // https://strainindex.wordpress.com/2008/07/28/the-average-sentence-length/ static volatile char send_msg[200] = {0}; static volatile unsigned short msg_len; static volatile unsigned short txt_len; unsigned short tmp; long idx; bool success; // Used to download image data. This is changed by the // This is why is declares as static volatile. // Note: unsigned short is max 32767, while our image data is 31208 in length // one must change this to unsigned long if future image data becomes larger static volatile unsigned short TTS_DATA_IDX; void setup() { //Pin settings pinMode(S1V30120_RST, OUTPUT); pinMode(S1V30120_RDY, INPUT); pinMode(S1V30120_CS, OUTPUT); pinMode(S1V30120_MUTE, OUTPUT); // Unmute digitalWrite(S1V30120_MUTE,LOW); // for debugging Serial.begin(9600); SPI.begin(); S1V30120_reset(); tmp = S1V30120_get_version(); if (tmp == 0x0402) { Serial.println("S1V30120 found. Downloading boot image!"); } success = S1V30120_download(); Serial.print("Boot image download: "); show_response(success); success = S1V30120_boot_run(); Serial.print("Boot image run: "); show_response(success); delay(150); // Wait for the boot image to execute Serial.print("Registering: "); success = S1V30120_registration(); show_response(success); // Once again print version information S1V30120_get_version(); success = S1V30120_configure_audio(); Serial.print("Configuring audio: "); show_response(success); success = S1V30120_set_volume(); Serial.print("Setting volume: "); show_response(success); success = S1V30120_configure_tts(); Serial.print("Configure TTS: "); show_response(success); success = S1V30120_speech(mytext,0); Serial.print("Speaking1: "); show_response(success); delay(250); Serial.print("Speaking2: "); success = S1V30120_speech("test",0); delay(250); show_response(success); success = S1V30120_speech("2",0); Serial.print("Speaking3: "); show_response(success); } void loop() { // put your main code here, to run repeatedly: } // This function resets the S1V30120 chip and loads the firmware code void S1V30120_reset(void) { digitalWrite(S1V30120_CS,HIGH); // S1V30120 not selected digitalWrite(S1V30120_RST,LOW); // send one dummy byte, this will leave the clock line high SPI.beginTransaction(SPISettings(750000, MSBFIRST, SPI_MODE3)); SPI.transfer(0x00); SPI.endTransaction(); delay(5); digitalWrite(S1V30120_RST,HIGH); delay(150); } unsigned short S1V30120_get_version(void) { // Querry version unsigned short S1V30120_version = 0; unsigned short tmp_disp; // Sending ISC_VERSION_REQ = [0x00, 0x04, 0x00, 0x05]; char msg_ver[] = {0x04, 0x00, 0x05, 0x00}; S1V30120_send_message(msg_ver, 0x04); //wait for ready signal while(digitalRead(S1V30120_RDY) == 0); // receive 20 bytes digitalWrite(S1V30120_CS,LOW); SPI.beginTransaction(SPISettings(750000, MSBFIRST, SPI_MODE3)); // wait for message start while(SPI.transfer(0x00) != 0xAA); for (int i = 0; i < 20; i++) { rcvd_msg[i]= SPI.transfer(0x00); } // Send 16 bytes padding S1V30120_send_padding(16); SPI.endTransaction(); digitalWrite(S1V30120_CS,HIGH); S1V30120_version = rcvd_msg[4] << 8 | rcvd_msg[5]; Serial.print("HW version "); Serial.print(rcvd_msg[4],HEX); Serial.print("."); Serial.println(rcvd_msg[5],HEX); Serial.print("Firmware version "); Serial.print(rcvd_msg[6],HEX); Serial.print("."); Serial.print(rcvd_msg[7],HEX); Serial.print("."); Serial.println(rcvd_msg[16],HEX); Serial.print("Firmware features "); Serial.println(((rcvd_msg[11] << 24) | (rcvd_msg[10] << 16) | (rcvd_msg[9] << 8) | rcvd_msg[8]),HEX); Serial.print("Firmware extended features "); Serial.println(((rcvd_msg[15] << 24) | (rcvd_msg[14] << 16) | (rcvd_msg[13] << 8) | rcvd_msg[12]),HEX); return S1V30120_version; } bool S1V30120_download(void) { // TTS_INIT_DATA is of unsigned char type (one byte) unsigned short len = sizeof (TTS_INIT_DATA); unsigned short fullchunks; unsigned short remaining; bool chunk_result; long data_index = 0; Serial.print("TTS_INIT_DATA length is "); Serial.println(len); // We are loading chunks of data // Each chunk, including header must be of maximum 2048 bytes // as the header is 4 bytes, this leaves 2044 bytes to load each time // Computing number of chunks fullchunks = len / 2044; remaining = len - fullchunks * 2044; Serial.print("Full chunks to load: "); Serial.println(fullchunks); Serial.print("Remaining bytes: "); Serial.println(remaining); // Load a chunk of data for (int num_chunks = 0; num_chunks < fullchunks; num_chunks++) { chunk_result = S1V30120_load_chunk (2044); if (chunk_result) { Serial.println("Success"); } else { Serial.print("Failed at chunk "); Serial.println(num_chunks); return 0; } } // Now load the last chunk of data chunk_result = S1V30120_load_chunk (remaining); if (chunk_result) { Serial.println("Success"); } else { Serial.print("Failed at last chunk "); return 0; } // All was OK, returning 1 return 1; } bool S1V30120_load_chunk(unsigned short chunk_len) { // Load a chunk of data char len_msb = ((chunk_len + 4) & 0xFF00) >> 8; char len_lsb = (chunk_len + 4) & 0xFF; digitalWrite(S1V30120_CS,LOW); SPI.beginTransaction(SPISettings(750000, MSBFIRST, SPI_MODE3)); SPI.transfer(0xAA); // Start Message Command SPI.transfer(len_lsb); // Message length is 2048 bytes = 0x0800 SPI.transfer(len_msb); // LSB first SPI.transfer(0x00); // Send SC_BOOT_LOAD_REQ (0x1000) SPI.transfer(0x10); for (int chunk_idx = 0; chunk_idx < chunk_len; chunk_idx++) { SPI.transfer(TTS_INIT_DATA[TTS_DATA_IDX]); TTS_DATA_IDX++; } SPI.endTransaction(); digitalWrite(S1V30120_CS,HIGH); return S1V30120_parse_response(ISC_BOOT_LOAD_RESP, 0x0001, 16); } bool S1V30120_boot_run(void) { char boot_run_msg[] = {0x04, 0x00, 0x02, 0x10}; S1V30120_send_message(boot_run_msg, 0x04); return S1V30120_parse_response(ISC_BOOT_RUN_RESP, 0x0001, 8); } void show_response(bool response) { if(response) Serial.println("OK!"); else { Serial.println("Failed. System halted!"); while(1); } } bool S1V30120_registration(void) { SPI.beginTransaction(SPISettings(750000, MSBFIRST, SPI_MODE3)); char reg_code[] = {0x0C, 0x00, 0x03, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; S1V30120_send_message(reg_code, 0x0C); return S1V30120_parse_response(ISC_TEST_RESP, 0x0000, 16); } // Message parser // This function receives as parameter the expected response code and result // And returns 1 if the expected result is received, 0 otherwise // As an observation, most messages are 6 bytes in length // (2 bytes length + 2 bytes response code + 2 bytes response) bool S1V30120_parse_response(unsigned short expected_message, unsigned short expected_result, unsigned short padding_bytes) { unsigned short rcvd_tmp; //wait for ready signal while(digitalRead(S1V30120_RDY) == 0); // receive 6 bytes digitalWrite(S1V30120_CS,LOW); SPI.beginTransaction(SPISettings(750000, MSBFIRST, SPI_MODE3)); // wait for message start while(SPI.transfer(0x00) != 0xAA); for (int i = 0; i < 6; i++) { rcvd_msg[i]= SPI.transfer(0x00); } // padding bytes S1V30120_send_padding(padding_bytes); SPI.endTransaction(); digitalWrite(S1V30120_CS,HIGH); // Are we successfull? We shall check rcvd_tmp = rcvd_msg[3] << 8 | rcvd_msg[2]; if (rcvd_tmp == expected_message) // Have we received ISC_BOOT_RUN_RESP? { // We check the response rcvd_tmp = rcvd_msg[5] << 8 | rcvd_msg[4]; if (rcvd_tmp == expected_result) // success, return 1 return 1; else return 0; } else // We received something else return 0; } // Padding function // Sends a num_padding_bytes over the SPI bus void S1V30120_send_padding(unsigned short num_padding_bytes) { for (int i = 0; i < num_padding_bytes; i++) { SPI.transfer(0x00); } } // Functions that run in normal mode void S1V30120_send_message(volatile char message[], unsigned char message_length) { // Check to see if there's an incoming response or indication while(digitalRead(S1V30120_RDY) == 1); // blocking // OK, we can proceed digitalWrite(S1V30120_CS,LOW); SPI.beginTransaction(SPISettings(750000, MSBFIRST, SPI_MODE3)); SPI.transfer(0xAA); // Start Message Command for (int i = 0; i < message_length; i++) { SPI.transfer(message[i]); } SPI.endTransaction(); } bool S1V30120_configure_audio(void) { msg_len = 0x0C; send_msg[0] = msg_len & 0xFF; // LSB of msg len send_msg[1] = (msg_len & 0xFF00) >> 8; // MSB of msg len send_msg[2] = ISC_AUDIO_CONFIG_REQ & 0xFF; send_msg[3] = (ISC_AUDIO_CONFIG_REQ & 0xFF00) >> 8; send_msg[4] = TTS_AUDIO_CONF_AS; send_msg[5] = TTS_AUDIO_CONF_AG; send_msg[6] = TTS_AUDIO_CONF_AMP; send_msg[7] = TTS_AUDIO_CONF_ASR; send_msg[8] = TTS_AUDIO_CONF_AR; send_msg[9] = TTS_AUDIO_CONF_ATC; send_msg[10] = TTS_AUDIO_CONF_ACS; send_msg[11] = TTS_AUDIO_CONF_DC; S1V30120_send_message(send_msg, msg_len); return S1V30120_parse_response(ISC_AUDIO_CONFIG_RESP, 0x0000, 16); } // set gain to 0 db bool S1V30120_set_volume(void) { char setvol_code[]={0x06, 0x00, 0x0A, 0x00, 0x00, 0x00}; S1V30120_send_message(setvol_code, 0x06); return S1V30120_parse_response(ISC_AUDIO_VOLUME_RESP, 0x0000, 16); } bool S1V30120_configure_tts(void) { msg_len = 0x0C; send_msg[0] = msg_len & 0xFF; // LSB of msg len send_msg[1] = (msg_len & 0xFF00) >> 8; // MSB of msg len send_msg[2] = ISC_TTS_CONFIG_REQ & 0xFF; send_msg[3] = (ISC_TTS_CONFIG_REQ & 0xFF00) >> 8; send_msg[4] = ISC_TTS_SAMPLE_RATE; send_msg[5] = ISC_TTS_VOICE; send_msg[6] = ISC_TTS_EPSON_PARSE; send_msg[7] = ISC_TTS_LANGUAGE; send_msg[8] = ISC_TTS_SPEAK_RATE_LSB; send_msg[9] = ISC_TTS_SPEAK_RATE_MSB; send_msg[10] = ISC_TTS_DATASOURCE; send_msg[11] = 0x00; S1V30120_send_message(send_msg, msg_len); return S1V30120_parse_response(ISC_TTS_CONFIG_RESP, 0x0000, 16); } // bool S1V30120_speech(void) bool S1V30120_speech(String text_to_speech, unsigned char flush_enable) { bool response; txt_len = text_to_speech.length(); msg_len = txt_len + 6; send_msg[0] = msg_len & 0xFF; // LSB of msg len send_msg[1] = (msg_len & 0xFF00) >> 8; // MSB of msg len send_msg[2] = ISC_TTS_SPEAK_REQ & 0xFF; send_msg[3] = (ISC_TTS_SPEAK_REQ & 0xFF00) >> 8; send_msg[4] = flush_enable; // flush control for (int i = 0; i < txt_len; i++) { send_msg[i+5] = text_to_speech[i]; } send_msg[msg_len-1] = '\0'; // null character S1V30120_send_message(send_msg, msg_len); response = S1V30120_parse_response(ISC_TTS_SPEAK_RESP, 0x0000, 16); while (!S1V30120_parse_response(ISC_TTS_FINISHED_IND, 0x0000, 16)); // blocking return response; }
To increase the readability of the code I moved some #defines into a separate header file:
// Defines parameters for S1V30120 // Commands // Boot mode #define ISC_VERSION_REQ 0x0005 #define ISC_BOOT_LOAD_REQ 0x1000 #define ISC_BOOT_RUN_REQ 0x1002 #define ISC_TEST_REQ 0x0003 // Normal (run) mode #define ISC_AUDIO_CONFIG_REQ 0x0008 #define ISC_AUDIO_VOLUME_REQ 0x000A #define ISC_AUDIO_MUTE_REQ 0x000C #define ISC_TTS_CONFIG_REQ 0x0012 //11 kHz #define ISC_TTS_SAMPLE_RATE 0x01 #define ISC_TTS_VOICE 0x00 #define ISC_TTS_EPSON_PARSE 0x01 #define ISC_TTS_LANGUAGE 0x00 // 200 words/min #define ISC_TTS_SPEAK_RATE_LSB 0xC8 #define ISC_TTS_SPEAK_RATE_MSB 0x00 #define ISC_TTS_DATASOURCE 0x00 #define ISC_TTS_SPEAK_REQ 0x0014 // Response messages // Boot mode #define ISC_VERSION_RESP 0x0006 #define ISC_BOOT_LOAD_RESP 0x1001 #define ISC_BOOT_RUN_RESP 0x1003 #define ISC_TEST_RESP 0x0004 // Normal (run) mode #define ISC_AUDIO_CONFIG_RESP 0x0009 #define ISC_AUDIO_VOLUME_RESP 0x000B #define ISC_AUDIO_MUTE_RESP 0x000D #define ISC_TTS_CONFIG_RESP 0x0013 #define ISC_TTS_SPEAK_RESP 0x0015 // Fatal error indication #define ISC_ERROR_IND 0x0000 // Request blocked #define ISC_MSG_BLOCKED_RESP 0x0007 #define ISC_TTS_FINISHED_IND 0x0021 // Parameters // Audio config // See page 42 in S1V30120 Message Protocol Specification // MONO = 0x00, all other values = reserved #define TTS_AUDIO_CONF_AS 0x00 // Audio gain = +18 db #define TTS_AUDIO_CONF_AG 0x43 // Audio amp not selected #define TTS_AUDIO_CONF_AMP 0x00 // Sample rate 11kHz #define TTS_AUDIO_CONF_ASR 0x01 // Audio routing: application to DAC #define TTS_AUDIO_CONF_AR 0x00 // Audio tone control: depreciated, set to 0 #define TTS_AUDIO_CONF_ATC 0x00 // Audio click source: internal, set to 0 #define TTS_AUDIO_CONF_ACS 0x00 // DAC is on only while speech decoder // or TTS synthesis is outputting audio #define TTS_AUDIO_CONF_DC 0x00 // TTS Config
Finally, the firmware image file is the one provided in MikroElektronika example. The complete code can be downloaded here.
A few final thoughts
This is a work in progress! I would be very happy to hear which problems you encounter with this code so I can fix them.
Making this code into a library? Perhaps… One all the code issues are solved I will do this. Until then it’s just this, plain code (but at least it should work).
Using Arduino Uno? The only issue is the big firmware file. I’m thinking to use one SD shield, and put that file on a SD card…
Wishlist? Unimplemented features? Your opinion counts! Don’t be afraid to use the comments section…
7 Comments
hello first of thanks for making and sharing this Code for the TTS-click board for making the S1V30120-chip
talk. The code works fine and i use a teensy 3.6 with the chip-configuration and changed some pins to match the teensy, say Mute, Ready, Reset. So……works fine and i modified the code to recieve messages over serial, and that works good, i can send a sentence to the teensy serial and the chip talks it.
But i like this code also to work on an Arduino Mega, i put the img code in PROMEM and compiled for the Mega and it compiled otherwise the code as it is is to big for the Mega. i am a simple programmer and maybe you are able to help me make it possible to run the img code from PROMEM, i need help to fetch the code from PROGMEM and make good sized chunks and download it to the S1V30120-chip. Thanks in advance, Dian Pancras from the Netherlands
Hi,
Check this blog post on using text to speech with Arduino Mega https://electronza.com/arduino-talking-clock-s1v30120/
Thanks for the reply Teodor, i checked this page and this points me to this page for the code so i am back to where i got yous code.
What i am trying to do is put the
const char TTS_INIT_DATA[] = { bytes} in PROGMEM
So get this
const char PROGMEM TTS_INIT_DATA[] =
{ Bytes }
And then compile for the MEGA
Result is GOOD
Sketch uses 38090 bytes (14%) of program storage space. Maximum is 253952 bytes.
Global variables use 907 bytes (11%) of dynamic memory, leaving 7285 bytes for local variables. Maximum is 8192 bytes.
Especialy the Global variables is the bottleneck, but putting it in PROGMEM is the way to go.
I think you know what i mean, and the question to please help me retreving this bytes from PROGMEM and using it inside the
bool S1V30120_download(void)
{
// TTS_INIT_DATA is of unsigned char type (one byte)
etcetera
Piece of the sketch.
On the https://electronza.com/arduino-talking-clock-s1v30120/ LINK i did not see the actual code
so that is why i ask again to explain to me how to do that please, or provide the code you used for the Mega
would be great too.
Thanks again, Dian
Yes i got it, i missed a part of to sketch, but now i got it, and it compiles completely.
Will try to run it on an Arduino Mega, looks good.
I will let you know the results.
Thanks, Dian
And, i use this code a lot with the teensy 3.6 and the TTs-click-board, works great.
I send text or mostly text to sing to the teensy over a serial-port and then it is send through to the tts-board.
Works realy great, My question is: The access-time from sending to singing or speaking is
207 ms, and this is quiet constant, is there a way to shorten this access time.
It would be easier for the teensy tts-board combi to follow the chords that are played in real-time.
Now i have to send the chords-info 207 ms ahead, and that works too, but the earlyer the better it is.
Thanks in advance for your interest, Dian from the Netherlands
Can you please share the code for the teensy 3.6?
I don’t have code for teensy. Sorry.