语音数据（Audio Data）-阿里云开发者社区

Kinect for windows有四个麦克风，组成一个麦克风阵列，可以采集外部传来的声单，不但可以收集，还可以除噪，识别。Kinect for windows可以识别正前主100度范围的声单。Kinect for windows通过两组数组来定位声单的方向，一个是波束角度（BeamAngle），一个声源角度（SoundSourceAngle）。波束角度是把100度分成10分，分别为50，40，30，20，10，0，-10，-20，-30，-40，-50这11个值，只要在相应的范围内，采样就不会重新进行，比如现在在Kinect for windows的0度方向，大体上在-5或5的范围内，波束角度都不会更新。声源角度就不一样了，只要声音达到一定的分贝，它就采信，同时它还跟随着一个可信度的值，为0~1之间，1是最可信的，就是声音的来源确定性是最可信的。我们可以通过这两个参数结合骨骼定位能就Kinect for windows周围的人进行较为准确的定位。

同时，Kinect对象有一个AudioSource的属性，它有一个Start方法，可以返回录音的数据流。我们对这些数据进行处理就可以。

创建一个WinForm

现在看后台代码：

public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
Control.CheckForIllegalCrossThreadCalls = false;
}
KinectSensor kinectsensor = null;
private void Form1_Shown(object sender, EventArgs e)
{
//从Kinect集合中找到连接上的Kinect
foreach (KinectSensor ks in KinectSensor.KinectSensors)
{
//找到连接的Kinect
if (ks.Status == KinectStatus.Connected)
{
kinectsensor = ks;
kinectsensor.Start();//开始工作，即可以采集摄像头和红外摄像头信息
this.Text = "Kinect开始工作……";
return;
}
}
}
private void Form1_FormClosing(object sender, FormClosingEventArgs e)
{
if (kinectsensor.Status == KinectStatus.Connected)
{
this.kinectsensor.AudioSource.Stop();//结束语音流的采集
kinectsensor.Stop();//结束Kinect采集工作
MessageBox.Show("Kinect结束工作！");
}
}
private void AudioReadingThread()
{
int second = int.Parse(Second_TB.Text);//Second_TB 中存放着录音的时长，单位为秒
long len = second * 16000 * 2;//得到一定时间内的文件总长度，采样率是16000，2为采样一次的字节数
//创建wav文件流，先写文件头，再写声音数据信息
using (FileStream stream = new FileStream("F:/a/a.wav", FileMode.Create))
{
//WAV 文件头
string wavBegin = "RIFF";
byte[] tmpbyte = Encoding.Default.GetBytes(wavBegin);
stream.Write(tmpbyte, 0, tmpbyte.Length);
// 文件总长度
long wavLength = len;
byte[] tmpArr = new byte[4];
longToIntBinary(wavLength + 16, tmpArr, 0);
stream.Write(tmpArr, 0, tmpArr.Length);
// WAV 文件标识
string wavTag = "WAVEfmt ";
byte[] tmpbyte1 = Encoding.Default.GetBytes(wavTag);
stream.Write(tmpbyte1, 0, tmpbyte1.Length);
// size of .WAV file header
int headLength = 16;
tmpArr = new byte[4];
longToIntBinary(headLength, tmpArr, 0);
stream.Write(tmpArr, 0, tmpArr.Length);
// 格式标记 (01 = Windows PCM)
int wFormatTag = 1;
tmpArr = new byte[2];
toShortBinary(wFormatTag, tmpArr, 0);
stream.Write(tmpArr, 0, tmpArr.Length);
// 通道数 (1=mono, 2=stereo)
int nChannels = 1;
tmpArr = new byte[2];
toShortBinary(nChannels, tmpArr, 0);
stream.Write(tmpArr, 0, tmpArr.Length);
// 采样率
int nSamplesPerSec = 16000;
tmpArr = new byte[4];
longToIntBinary(nSamplesPerSec, tmpArr, 0);
stream.Write(tmpArr, 0, tmpArr.Length);
//每秒播放字节数
int nAvgBytesPerSec = 32000;
tmpArr = new byte[4];
longToIntBinary(nAvgBytesPerSec, tmpArr, 0);
stream.Write(tmpArr, 0, tmpArr.Length);
// 采样一次占字节数
int nBlockAlign = 2;
tmpArr = new byte[2];
toShortBinary(nBlockAlign, tmpArr, 0);
stream.Write(tmpArr, 0, tmpArr.Length);// bytes per sample
// 采样一次占的位数
int wBitsPerSample = 8 * nBlockAlign;//8乘字节数
tmpArr = new byte[2];
toShortBinary(wBitsPerSample, tmpArr, 0);
stream.Write(tmpArr, 0, tmpArr.Length);// bits per sample
/** ******以下是数据头********* */
string dataTag = "data";
byte[] tmpbyte2 = Encoding.Default.GetBytes(dataTag);
stream.Write(tmpbyte2, 0, tmpbyte2.Length);// data tag
//采样数据字节数
tmpArr = new byte[4];
longToIntBinary(wavLength, tmpArr, 0);
stream.Write(tmpArr, 0, tmpArr.Length);// 数据总长度
//开始采信声音流
using (Stream audiostream = kinectsensor.AudioSource.Start())
{
byte[] buf = new byte[1024];
int count, totalcount = 0;
//按1024个字节为一组采集流，并保存，当采信数据没有超过总长度时
while ((count = audiostream.Read(buf, 0, buf.Length)) > 0 && totalcount < len)
{
stream.Write(buf, 0, count);
totalcount += count;
}
audiostream.Close();
}
stream.Close();
}
//让录音按钮生效
StartRecord_But.Enabled = true;
}
//波束角度采集方法
private void AudioSourceBeamChanged(object sender, BeamAngleChangedEventArgs e)
{
double o = e.Angle;
Beam_Lab.Text = "波束角度：" + o;
}
//声源角度和可信度采集方法
private void AudioSourceSoundSourceAngleChanged(object sender, SoundSourceAngleChangedEventArgs e)
{
double o = e.ConfidenceLevel;
double d = e.Angle;
Confidence_Lab.Text = "可信度：" + o;
Sound_Lab.Text = "来源角度：" + d;
}
private Thread readingThread;
/// <summary>
/// 整型转数组
/// </summary>
private static void longToIntBinary(long val, byte[] array, int offset)
{
array[offset] = (byte)(val & 0xff);
array[offset + 1] = (byte)(val >> 8 & 0xff);
array[offset + 2] = (byte)(val >> 16 & 0xff);
array[offset + 3] = (byte)(val >> 24 & 0xff);
}
/// <summary>
/// 字节转短整型
/// </summary>
private static void byteToShortBinary(byte val, byte[] array, int offset)
{
array[offset] = (byte)(val & 0xff);
array[offset + 1] = 0x0;
}
/// <summary>
/// 将超过0x7FFF的short类型保存为int类型。本方法提供了将有符号short类型转换保存在字节数组中，占据两个字节
/// </summary>
private static void toShortBinary(int val, byte[] array, int offset)
{
array[offset] = (byte)(val & 0xff);
array[offset + 1] = (byte)(val >> 8 & 0xff);
}
private void StartRecord_But_Click(object sender, EventArgs e)
{
//绑定波束角度的委托实例
kinectsensor.AudioSource.BeamAngleChanged += this.AudioSourceBeamChanged;
//绑定声音来源角度的委托实例
kinectsensor.AudioSource.SoundSourceAngleChanged += this.AudioSourceSoundSourceAngleChanged;
//起一个线程，来完成录音数据的保存
this.readingThread = new Thread(AudioReadingThread);
this.readingThread.Start();
//让录音按钮失效
StartRecord_But.Enabled = false;
}
}

本文转自桂素伟51CTO博客，原文链接： http://blog.51cto.com/axzxs/1186000，如需转载请自行联系原作者

语音数据（Audio Data）

热门文章

最新文章

相关电子书