本帖最后由 PY学习笔记 于 2025-8-25 17:07 编辑
近期,DFRobot 推出了全新开发板 FireBeetle 2 ESP32-P4。这块开发板搭载了 ESP32-P4 主控,虽未集成 Wi-Fi 与蓝牙,但凭借强劲性能,依然令人眼前一亮。很荣幸能抢先体验这块开发板! 1.开发板介绍FireBeetle 2 ESP32-P4有很多种外设: Type-C USB CDC:Type-C USB烧录、调试接口 IO3/LED:板载LED引脚 Power LED:主板电源指示灯 RST:复位按键 IO35/BOOT:IO引脚/BOOT按键 MIC: MEMS PDM麦克风 HIGH-SPEED USB OTG 2.0: Type-C高速USB OTG 2.0 ESP32-P4:ESP32-P4芯片 MIPI-DSI: 两通道MIPI-DSI屏幕(兼容树莓派4B DSI屏幕线序) MIPI-CSI: 两通道MIPI-DSI屏幕(兼容树莓派4B CSI摄像头线序) TF Card: TF卡插槽 16MB FLASH: 16MB Flash存储 ESP32-C6:ESP32-C6-MINI-1模组,通过SDIO与ESP32-P4连接,用于扩展WiFi、蓝牙
2.实现原理由于MicroPython不支持离线语音识别与合成,所以采用在线语音识别和在线语音合成,AI也是采用在线的方式,在线语音识别和AI大模型均使用siliconflow的免费模型,而在线语音合成使用了百度的语音合成(实名认证后免费5万次),再结合ST7789屏幕(使用MIPI屏幕会出问题),MAX9835功放模块,以及板载的PDM麦克风即可实现。 3.代码实现参考代码如下:
- # 录音+语音识别+AI对话+语音合成的完整闭环
- import network, urequests, ujson, gc, time, st7789_spi, baidu_tts
- from easydisplay import EasyDisplay
- from machine import Pin, I2S, SPI
- # ---------- 全局配置 ----------
- WIFI_SSID = "SSID"
- WIFI_PASS = "PWD"
- API_KEY = "API-KEY"
- TTS_API_KEY = "API-KEY"
- TTS_SEC_KEY = "SEC-KEY"
-
- ASR_URL = "https://api.siliconflow.cn/v1/audio/transcriptions"
- CHAT_URL = "https://api.siliconflow.cn/v1/chat/completions"
-
- ASR_MODEL = "FunAudioLLM/SenseVoiceSmall"
- LLM_MODEL = "Qwen/Qwen3-8B"
-
- # I2S 引脚
- SCK_PIN = 12
- SD_PIN = 9
- boot = Pin(35, Pin.IN, Pin.PULL_UP) # BOOT 键,按下为 0
- led = Pin(3,Pin.OUT)
- spi = SPI(2, baudrate=20000000, polarity=0, phase=0, sck=Pin(28), mosi=Pin(29))
- dp = st7789_spi.ST7789(width=240, height=280, spi=spi, cs=20, dc=4, res=30, rotate=1,invert=False, rgb=False)
- ed = EasyDisplay(dp, "RGB565", font="/text_lite_16px_2312.v3.bmf", show=True, color=0xFFFF, clear=True,auto_wrap=True)
- # ---------- 联网 ----------
- def connect_wifi():
- sta = network.WLAN(network.STA_IF)
- sta.active(True)
- sta.connect(WIFI_SSID, WIFI_PASS)
- while not sta.isconnected():
- time.sleep(0.5)
- print("Wi-Fi OK:", sta.ifconfig()[0])
- return sta
-
- def record_audio(sr=8000):
- # 等待按下
- print("长按 BOOT 开始录音...")
- while boot.value() == 1:
- time.sleep_ms(10)
-
- # 开始录音
- pcm = bytearray()
- audio = I2S(0,
- sck=Pin(SCK_PIN),
- sd=Pin(SD_PIN),
- mode=I2S.PDM_RX,
- bits=16,
- format=I2S.MONO,
- rate=sr * 4,
- ibuf=10240)
-
- # 边录边检查按键
- chunk = bytearray(1024)
- print("录音中,松开 BOOT 结束...")
- while boot.value() == 0: # 0 表示仍按着
- n = audio.readinto(chunk)
- pcm.extend(chunk[:n])
-
- audio.deinit()
- return pcm, sr
-
- # ---------- 构造 WAV ----------
- def wav_header(data_len, sample_rate):
- hdr = bytearray(44)
- hdr[0:4] = b'RIFF'
- hdr[4:8] = (data_len + 36).to_bytes(4, 'little')
- hdr[8:12] = b'WAVE'
- hdr[12:16] = b'fmt '
- hdr[16:20] = (16).to_bytes(4, 'little')
- hdr[20:22] = (1).to_bytes(2, 'little') # PCM
- hdr[22:24] = (1).to_bytes(2, 'little') # mono
- hdr[24:28] = sample_rate.to_bytes(4, 'little')
- hdr[28:32] = (sample_rate * 2).to_bytes(4, 'little')
- hdr[32:34] = (2).to_bytes(2, 'little') # block align
- hdr[34:36] = (16).to_bytes(2, 'little') # bits per sample
- hdr[36:40] = b'data'
- hdr[40:44] = data_len.to_bytes(4, 'little')
- return hdr
-
- # ---------- 语音识别 ----------
- def speech_to_text(pcm, sr):
- wav = wav_header(len(pcm), sr) + pcm
- boundary = "----VoiceBoundary"
- body = b"--" + boundary.encode() + b"\r\n"
- body += b'Content-Disposition: form-data; name="file"; filename="mic.wav"\r\n'
- body += b"Content-Type: audio/wav\r\n\r\n"
- body += wav
- body += b"\r\n--" + boundary.encode() + b"\r\n"
- body += b'Content-Disposition: form-data; name="model"\r\n\r\n'
- body += ASR_MODEL.encode()
- body += b"\r\n--" + boundary.encode() + b"--\r\n"
-
- headers = {
- "Authorization": "Bearer " + API_KEY,
- "Content-Type": "multipart/form-data; boundary=" + boundary
- }
- print("识别中…")
- res = urequests.post(ASR_URL, data=body, headers=headers)
- text = res.json().get("text", "").strip()
- res.close()
- gc.collect()
- return text
-
- # ---------- 对话 ----------
- def chat_with_ai(text):
- headers = {
- "Authorization": "Bearer " + API_KEY,
- "Content-Type": "application/json"
- }
- payload = {
- "model": LLM_MODEL,
- "messages": [
- {"role": "system", "content": "你是我的AI助手小智,你必须用中文回答且不超过100字还不允许使用MD进行回答"},
- {"role": "user", "content": text}
- ],
- "enable_thinking":False,
- }
- print("AI思考中…")
- start = time.time()
- res = urequests.post(CHAT_URL, data=ujson.dumps(payload).encode(), headers=headers)
- delta = time.time() - start
- if res.status_code == 200:
- reply = res.json()['choices'][0]['message']['content'].replace("\n", "")
- print(f"({delta:.1f}s) AI:", reply)
- ed.text(f"({delta:.1f}s) AI:"+reply, 0, 50)
- baidu_tts.run(
- access=TTS_API_KEY,
- secret=TTS_SEC_KEY,
- text=reply,
- )
- else:
- print("Error:", res.status_code, res.text)
- reply = ""
- res.close()
- gc.collect()
- return reply
-
- # ---------- 主循环 ----------
- def main():
- connect_wifi()
- while True:
- pcm, sr = record_audio()
- text = speech_to_text(pcm, sr)
- if not text:
- print("没听清,请再说一遍")
- baidu_tts.run(
- access=TTS_API_KEY,
- secret=TTS_SEC_KEY,
- text="没听清,请再说一遍",
- out_path='welcome.wav'
- )
- continue
- elif "开灯" in text:
- print("你:", text)
- led.on()
- print("AI:LED灯已开启")
- ed.text("AI:LED灯已开启", 0, 50)
- baidu_tts.run(
- access=TTS_API_KEY,
- secret=TTS_SEC_KEY,
- text="LED灯已开启",
- )
- elif "关灯" in text:
- print("你:", text)
- led.off()
- print("AI:LED灯已关闭")
- ed.text("AI:LED灯已关闭", 0, 50)
- baidu_tts.run(
- access=TTS_API_KEY,
- secret=TTS_SEC_KEY,
- text="LED灯已关闭",
- )
- else:
- print("你:", text)
- chat_with_ai(text)
-
- if __name__ == "__main__":
- main()
复制代码
4.效果
|