人工智障
——人工智障只是玩笑话,编程改变世界。
编写此程序的目的:
- 熟悉c#语言(其实是在学习,在学校欠的债现在都要还回来)
- 学习如何利用Stack Overflow
- 打算用树莓派做个语音问答机器人,先写个C#的demo来证明方案可行
- 我会说顺便完成毕业前立下的flag嘛,这篇文章是用MarkDown写的,当时励志学习MarkDown写ReadMe来着→_→
整理下人工智障的流程
需要引入的第三方动态库
1.NAudio(.Net下处理音频的库,本例中用来录制以及播放)
2.Aipsdk(百度语音识别)
3.Newtonsoft.Json(用于json的序列化以及反序列化)
开工
Step1
注册百度和图灵机器人的开发者账号,得到各自的apiKey以及secretKey,这些是在调用的时候验证权限用的,个人开发者每天有次数限制(免费的当然受限制啦)。不上链接了
Step2
简单的UI,拖两个Button。分别是开始录音和结束录音,然后来2个TextBlock来显示识别内容和对话结果。
Step3
先看下百度语音识别对音频文件的要求,开发文档。
原始 PCM 的录音参数必须符合 8k/16k 采样率、16bit 位深、单声道,支持的格式有:pcm(不压缩)、wav(不压缩,pcm编码)、amr(压缩格式)。
++百度服务端会将非pcm格式,转为pcm格式,因此使用wav、amr会有额外的转换耗时++
在这里我选择的是用wav格式(因为我不会压缩TAT),有关wav格式和pcm的关系,这篇文章里,博主写的蛮详细的
录制wav文件
查阅Naudio的英文文档
这里发现个好玩的一个将Windows上的音频推流到树莓派上Shaiport的软件,有兴趣的可以研究下。
英文太渣了,还是去找demo研究下吧。
在Stack Overflow找到了一个demo
public WaveIn waveSource = null;
public WaveFileWriter waveFile = null;
private void StartBtn_Click(object sender, EventArgs e)
{
StartBtn.Enabled = false;
StopBtn.Enabled = true;
waveSource = new WaveIn();
waveSource.WaveFormat = new WaveFormat(44100, 1);
waveSource.DataAvailable += new EventHandler<WaveInEventArgs>(waveSource_DataAvailable);
waveSource.RecordingStopped += new EventHandler<StoppedEventArgs>(waveSource_RecordingStopped);
waveFile = new WaveFileWriter(@"C:\Temp\Test0001.wav", waveSource.WaveFormat);
waveSource.StartRecording();
}
private void StopBtn_Click(object sender, EventArgs e)
{
StopBtn.Enabled = false;
waveSource.StopRecording();
}
void waveSource_DataAvailable(object sender, WaveInEventArgs e)
{
if (waveFile != null)
{
waveFile.Write(e.Buffer, 0, e.BytesRecorded);
waveFile.Flush();
}
}
void waveSource_RecordingStopped(object sender, StoppedEventArgs e)
{
if (waveSource != null)
{
waveSource.Dispose();
waveSource = null;
}
if (waveFile != null)
{
waveFile.Dispose();
waveFile = null;
}
StartBtn.Enabled = true;
}
答主的代码可以直接用的,注意以下几点
1.自己添加using
2.按钮的Enable属性在WPF中可能要改成IsEnable
3.保存的wav文件目录,自己要更改下,否则会报不存在
4.new WaveFormat(44100, 1);//这一句是指定码率和声道的,按照百度要求改为new WaveFormat(16000, 1);
调用百度语音识别
调用百度的REST API SDK,如果想要调用web api(需要自己转换文件为Base64字符串提交)请移步语音识别 REST API
封装百度语音API的功能
class API
{
// 设置APPID/AK/SK
string APP_ID = "102***75";
string API_KEY = "fgI3pD**********XOhkwMs";
string SECRET_KEY = "4f86baa58***********922cea947b";
Baidu.Aip.Speech.Asr client;
public static API getAPI()
{
return new API();
}
//初始化
public void init()
{
client = new Baidu.Aip.Speech.Asr(API_KEY, SECRET_KEY);
}
// 识别本地文件
public string AsrData()
{
var data = File.ReadAllBytes(@".\16k.wav");
var result = client.Recognize(data, "wav", 16000);
return result.ToString();
}
}
封装百度语音和图灵机器人的出入参
#region 入参
#region 图灵机入参
/// <summary>
/// 输入信息
/// </summary>
class Perception
{
public InputText inputText = new InputText();
public InputImage inputImage = new InputImage();
public SelfInfo selfInfo = new SelfInfo();
/// <summary>
/// 文本信息
/// </summary>
public class InputText
{
public string text { get; set; }//直接输入文本
}
/// <summary>
/// 图片信息
/// </summary>
public class InputImage
{
public string url { get; set; }//图片地址
}
/// <summary>
/// 客户端属性
/// </summary>
public class SelfInfo
{
public Location location = new Location();
/// <summary>
/// 地理位置信息
/// </summary>
public class Location
{
public string city { get; set; }//所在城市
public string latitude { get; set; }//纬度,大于0为北纬,小于0为南纬
public string longitude { get; set; }//经度,大于0为东经,小于0为西经
public string nearest_poi_name { get; set; }//最近街道名称
public string province { get; set; }//省份
public string street { get; set; }//街道
}
}
}
/// <summary>
/// 用户参数
/// </summary>
public class UserInfo
{
public string apiKey { get; set; }//机器人标识
public string userId { get; set; }//用户唯一标识
public string groupId { get; set; }//群聊唯一标识
public string userIdName { get; set; }//群内用户昵称
}
class InParam
{
/// <summary>
/// 输入类型:0-文本(默认)、1-图片、2-音频
/// </summary>
public int reqType;
public Perception perception = new Perception();
public UserInfo userInfo = new UserInfo();
}
#endregion
#endregion
#region 出参
#region 图灵机出参
public class Intent
{
public int code { get; set; }
public string intentName { get; set; }
public string actionName { get; set; }
public Parameters parameters = new Parameters();
public class Parameters
{
public string nearby_place { get; set; }
}
}
public class Results
{
public int groupType { get; set; }
public string resultType { get; set; }
public Values values = new Values();
public class Values
{
public string text { get; set; }
}
}
class OutParam
{
public Intent intent = new Intent();
public List<Results> results = new List<Results>();
}
#endregion
#region 百度出参
class BaiduOutParam
{
public int err_no { get; set; }
public string err_msg { get; set; }
public string corpus_no { get; set; }
public string sn { get; set; }
public List<string> result { get; set; }
}
#endregion
#endregion
在录音的结束事件里调用(此时文件占用已经解除)
API demo = API.getAPI();
demo.init();
InParam inParam = new InParam();//图灵机入参
inParam.reqType = 0;
BaiduOutParam bdop = Newtonsoft.Json.JsonConvert.DeserializeObject<BaiduOutParam>(demo.AsrData());//反序列化json
inParam.userInfo.apiKey = "7983cba12ff6******11b413474";
inParam.userInfo.userId = "123456";
if (bdop.err_no == 0)
{
inParam.perception.inputText.text = bdop.result[0];//识别出来了,调用图灵机API
this.Dispatcher.Invoke(new Action(() => { TextBox.Text = "语音识别内容为:" + inParam.perception.inputText.text; }));//更新界面
bg.RunWorkerAsync(inParam);
}
else
{
DownloadVoice("对不起,我没理解您在说什么。");
this.Dispatcher.Invoke(new Action(()=> { TextBlock.Text = "对不起,我没理解您在说什么。"; }));//更新界面
}
这一步的时候根据百度语音的识别结果来决定下一步的操作流程
封装语音下载功能
#region 下载临时语音文件
public void DownloadVoice(string responseText)
{
string voicePath = @"./test.mp3";
string url = string.Format("http://tts.baidu.com/text2audio?idx=1&tex={0}&cuid=baidu_speech_demo&cod=2&lan=zh&ctp=1&pdt=1&spd=5&per=4&vol=5&pit=5", responseText);
var httpWebRequest = (HttpWebRequest)WebRequest.Create(url);
httpWebRequest.ContentType = "text/html;charset=UTF-8";
httpWebRequest.Method = "GET";
var httpResponse = (HttpWebResponse)httpWebRequest.GetResponse();
using (Stream stream = httpResponse.GetResponseStream())
{
//将基础流写入内存流
MemoryStream memoryStream = new MemoryStream();
const int bufferLength = 1024;
int actual;
byte[] buffer = new byte[bufferLength];
while ((actual = stream.Read(buffer, 0, bufferLength)) > 0)
{
memoryStream.Write(buffer, 0, actual);
}
memoryStream.Position = 0;
byte[] byt = new byte[memoryStream.Length];
memoryStream.Read(byt, 0, byt.Length);
memoryStream.Seek(0, SeekOrigin.Begin);
BinaryWriter bw = new BinaryWriter(File.Create(voicePath));
bw.Write(byt);
bw.Close();
PlayAudio(voicePath);//播放
}
}
#endregion
下载文件
用HTTPGet的方式去请求百度的语音合成WebAPI
string url = string.Format("http://tts.baidu.com/text2audio?idx=1&tex={0}&cuid=baidu_speech_demo&cod=2&lan=zh&ctp=1&pdt=1&spd=5&per=4&vol=5&pit=5", responseText);
此为对请求地址的处理(入参修改)
在这里我花了不少时间,一是因为基础薄弱不知道该怎么处理response的数据,而是因为不知道怎么把数据写入到文件里。
判断response数据类型的时候,用Chrome浏览器,按F12进入调试模式,然后刷新下页面。在NetWork下查看返回的信息,就能会去到文件的类型,如图
但是尝试将网络流写入文件的时候出现了问题,百度了下发现不能直接写入,得先写到内存流里面之后才能写入文件,写入完后就可以播放了。期间遇到了文件占用的问题,调用Close后再调用Dispose也没能解决。后来使用的using才解决这个问题,对C#理解还不够深入,不知道背后的原理是什么。(难道是using执行完后释放的比我单独调用dispose释放的更为彻底?)
封装语音播放功能
#region 播放语音文件
WaveOut wo;
Mp3FileReader _mainOutputStream;
public void PlayAudio(string FilePath)
{
if (!System.IO.File.Exists(FilePath))
return;
try
{
_mainOutputStream = new Mp3FileReader(FilePath);
wo = new WaveOut(WaveCallbackInfo.FunctionCallback());
wo.PlaybackStopped += PlaybackStopped;//播放完成事件,此时调用Dispose避免文件占用,以及使UI界面的录音按钮可用
wo.Init(_mainOutputStream);
wo.Play();
}
catch (Exception ex)
{
}
finally
{
}
}
void PlaybackStopped(object sender, StoppedEventArgs e)
{
if (wo != null)
{
wo.Dispose();
_mainOutputStream.Dispose();
wo = null;
StartBtn.IsEnabled = true;
}
}
#endregion
Step4
在button的点击事件里整合上面的内容,就可以愉快的和智障对话了
附录——程序源码
界面
<Window x:Class="Demo.MainWindow"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
Title="MainWindow" Height="350" Width="525" WindowStartupLocation="CenterScreen"
>
<Grid>
<TextBlock x:Name="TextBox" Height="51" Width="167" Margin="92,91,0,0" HorizontalAlignment="Left" VerticalAlignment="Top" TextWrapping="Wrap"></TextBlock>
<!--<Button Margin="0,89,127.4,0" Click="Button_Click" HorizontalAlignment="Right" Width="60" Height="40" VerticalAlignment="Top">Click Me</Button>-->
<TextBlock x:Name="TextBlock" Margin="91,147,0,0" TextWrapping="Wrap" Text="" HorizontalAlignment="Left" Width="300" Height="101" VerticalAlignment="Top"/>
<Button x:Name="StartBtn" Content="开始录音" Margin="92,43,0,0" Height="19" VerticalAlignment="Top" HorizontalAlignment="Left" Width="75" Click="StartBtn_Click"/>
<Button x:Name="StopBtn" Content="结束录音" HorizontalAlignment="Right" Margin="0,43,112.4,0" Width="75" Height="19" VerticalAlignment="Top" Click="StopBtn_Click"/>
</Grid>
</Window>
处理逻辑
using NAudio.Wave;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Navigation;
using System.Windows.Shapes;
namespace Demo
{
/// <summary>
/// MainWindow.xaml 的交互逻辑
/// </summary>
public partial class MainWindow : Window
{
System.ComponentModel.BackgroundWorker bg;
public MainWindow()
{
InitializeComponent();
bg = new System.ComponentModel.BackgroundWorker();
bg.WorkerSupportsCancellation = false;
bg.WorkerReportsProgress = false;
bg.DoWork += bg_DoWork;
bg.RunWorkerCompleted += bg_RunWorkerCompleted;
}
/// <summary>
/// 图灵机器人API调用
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void Button_Click(object sender, RoutedEventArgs e)
{
InParam inParam = new InParam();
inParam.reqType = 0;
inParam.perception.inputText.text = TextBox.Text;
inParam.userInfo.apiKey = "7983cba12ff64dcfa47387b11b413474";
inParam.userInfo.userId = "123456";
bg.RunWorkerAsync(inParam);
}
void bg_RunWorkerCompleted(object sender, System.ComponentModel.RunWorkerCompletedEventArgs e)
{
if (e.Result is string)
{
OutParam outParam = Newtonsoft.Json.JsonConvert.DeserializeObject<OutParam>(e.Result as string);
TextBlock.Text = outParam.results[0].values.text;
DownloadVoice(TextBlock.Text);
}
}
void bg_DoWork(object sender, System.ComponentModel.DoWorkEventArgs e)
{
if (e.Argument is InParam)
{
InParam inParam = e.Argument as InParam;
string str = Newtonsoft.Json.JsonConvert.SerializeObject(inParam);
#region 发送http请求
var httpWebRequest = (HttpWebRequest)WebRequest.Create("http://openapi.tuling123.com/openapi/api/v2");
httpWebRequest.ContentType = "application/json";
httpWebRequest.Method = "POST";
using (var streamWriter = new StreamWriter(httpWebRequest.GetRequestStream()))
{
string json = str;
streamWriter.Write(json);
streamWriter.Flush();
streamWriter.Close();
}
var httpResponse = (HttpWebResponse)httpWebRequest.GetResponse();
using (var streamReader = new StreamReader(httpResponse.GetResponseStream()))
{
var result = streamReader.ReadToEnd();
e.Result = result;
}
#endregion
}
}
/// <summary>
/// 百度语音识别SDK调用
/// </summary>
class API
{
// 设置APPID/AK/SK
string APP_ID = "10219675";
string API_KEY = "fgI3pDx7Bh6fPfQ74XOhkwMs";
string SECRET_KEY = "4f86baa581f86d7966eeb4922cea947b";
Baidu.Aip.Speech.Asr client;
public static API getAPI()
{
return new API();
}
//初始化
public void init()
{
client = new Baidu.Aip.Speech.Asr(API_KEY, SECRET_KEY);
}
// 识别本地文件
public string AsrData()
{
var data = File.ReadAllBytes(@".\16k.wav");
var result = client.Recognize(data, "wav", 16000);
return result.ToString();
}
}
#region 录音
public WaveIn waveSource = null;
public WaveFileWriter waveFile = null;
private void StartBtn_Click(object sender, EventArgs e)
{
StartBtn.IsEnabled = false;
StopBtn.IsEnabled = true;
waveSource = new WaveIn();
waveSource.WaveFormat = new WaveFormat(16000, 1);
waveSource.DataAvailable += new EventHandler<WaveInEventArgs>(waveSource_DataAvailable);
waveSource.RecordingStopped += new EventHandler<StoppedEventArgs>(waveSource_RecordingStopped);
waveFile = new WaveFileWriter(@".\16k.wav", waveSource.WaveFormat);
waveSource.StartRecording();
}
private void StopBtn_Click(object sender, EventArgs e)
{
StopBtn.IsEnabled = false;
waveSource.StopRecording();
}
void waveSource_DataAvailable(object sender, WaveInEventArgs e)
{
if (waveFile != null)
{
waveFile.Write(e.Buffer, 0, e.BytesRecorded);
waveFile.Flush();
}
}
void waveSource_RecordingStopped(object sender, StoppedEventArgs e)
{
if (waveSource != null)
{
waveSource.Dispose();
waveSource = null;
}
if (waveFile != null)
{
waveFile.Dispose();
waveFile = null;
}
API demo = API.getAPI();
demo.init();
InParam inParam = new InParam();
inParam.reqType = 0;
BaiduOutParam bdop = Newtonsoft.Json.JsonConvert.DeserializeObject<BaiduOutParam>(demo.AsrData());
inParam.userInfo.apiKey = "7983cba12ff64dcfa47387b11b413474";
inParam.userInfo.userId = "123456";
if (bdop.err_no == 0)
{
inParam.perception.inputText.text = bdop.result[0];//识别出来了,调用图灵机API
this.Dispatcher.Invoke(new Action(() => { TextBox.Text = "语音识别内容为:" + inParam.perception.inputText.text; }));
bg.RunWorkerAsync(inParam);
}
else
{
DownloadVoice("对不起,我没理解您在说什么。");
this.Dispatcher.Invoke(new Action(()=> { TextBlock.Text = "对不起,我没理解您在说什么。"; }));
}
}
#endregion
#region 播放语音文件
WaveOut wo;
Mp3FileReader _mainOutputStream;
public void PlayAudio(string FilePath)
{
if (!System.IO.File.Exists(FilePath))
return;
try
{
_mainOutputStream = new Mp3FileReader(FilePath);
wo = new WaveOut(WaveCallbackInfo.FunctionCallback());
wo.PlaybackStopped += PlaybackStopped;
wo.Init(_mainOutputStream);
wo.Play();
}
catch (Exception ex)
{
}
finally
{
}
}
void PlaybackStopped(object sender, StoppedEventArgs e)
{
if (wo != null)
{
wo.Dispose();
_mainOutputStream.Dispose();
wo = null;
StartBtn.IsEnabled = true;
}
}
#endregion
#region 下载临时语音文件
public void DownloadVoice(string responseText)
{
string voicePath = @"./test.mp3";
string url = string.Format("http://tts.baidu.com/text2audio?idx=1&tex={0}&cuid=baidu_speech_demo&cod=2&lan=zh&ctp=1&pdt=1&spd=5&per=4&vol=5&pit=5", responseText);
var httpWebRequest = (HttpWebRequest)WebRequest.Create(url);
httpWebRequest.ContentType = "text/html;charset=UTF-8";
httpWebRequest.Method = "GET";
var httpResponse = (HttpWebResponse)httpWebRequest.GetResponse();
using (Stream stream = httpResponse.GetResponseStream())
{
//将基础流写入内存流
MemoryStream memoryStream = new MemoryStream();
const int bufferLength = 1024;
int actual;
byte[] buffer = new byte[bufferLength];
while ((actual = stream.Read(buffer, 0, bufferLength)) > 0)
{
memoryStream.Write(buffer, 0, actual);
}
memoryStream.Position = 0;
byte[] byt = new byte[memoryStream.Length];
memoryStream.Read(byt, 0, byt.Length);
memoryStream.Seek(0, SeekOrigin.Begin);
BinaryWriter bw = new BinaryWriter(File.Create(voicePath));
bw.Write(byt);
bw.Close();
PlayAudio(voicePath);//播放
}
}
#endregion
}
#region 入参
#region 图灵机入参
/// <summary>
/// 输入信息
/// </summary>
class Perception
{
public InputText inputText = new InputText();
public InputImage inputImage = new InputImage();
public SelfInfo selfInfo = new SelfInfo();
/// <summary>
/// 文本信息
/// </summary>
public class InputText
{
public string text { get; set; }//直接输入文本
}
/// <summary>
/// 图片信息
/// </summary>
public class InputImage
{
public string url { get; set; }//图片地址
}
/// <summary>
/// 客户端属性
/// </summary>
public class SelfInfo
{
public Location location = new Location();
/// <summary>
/// 地理位置信息
/// </summary>
public class Location
{
public string city { get; set; }//所在城市
public string latitude { get; set; }//纬度,大于0为北纬,小于0为南纬
public string longitude { get; set; }//经度,大于0为东经,小于0为西经
public string nearest_poi_name { get; set; }//最近街道名称
public string province { get; set; }//省份
public string street { get; set; }//街道
}
}
}
/// <summary>
/// 用户参数
/// </summary>
public class UserInfo
{
public string apiKey { get; set; }//机器人标识
public string userId { get; set; }//用户唯一标识
public string groupId { get; set; }//群聊唯一标识
public string userIdName { get; set; }//群内用户昵称
}
class InParam
{
/// <summary>
/// 输入类型:0-文本(默认)、1-图片、2-音频
/// </summary>
public int reqType;
public Perception perception = new Perception();
public UserInfo userInfo = new UserInfo();
}
#endregion
#endregion
#region 出参
#region 图灵机出参
public class Intent
{
public int code { get; set; }
public string intentName { get; set; }
public string actionName { get; set; }
public Parameters parameters = new Parameters();
public class Parameters
{
public string nearby_place { get; set; }
}
}
public class Results
{
public int groupType { get; set; }
public string resultType { get; set; }
public Values values = new Values();
public class Values
{
public string text { get; set; }
}
}
class OutParam
{
public Intent intent = new Intent();
public List<Results> results = new List<Results>();
}
#endregion
#region 百度出参
class BaiduOutParam
{
public int err_no { get; set; }
public string err_msg { get; set; }
public string corpus_no { get; set; }
public string sn { get; set; }
public List<string> result { get; set; }
}
#endregion
#endregion
}