XAudio2 Tutorial 5

Author: Jay Tennant

A Brief Look at XAudio2: Streaming a Wave from Disk

XAudio2 is a sound API available on the Windows Vista/7+ and XBox 360 platforms. This tutorial aims at demonstrating in brevity how to stream audio from disk.

The target audience should be at least intermediate level C++ programming. Moderate familiarity with Win32 programming is required. In addition, the prerequisite studies on Events and Asynchronous I/O should be understood.

In this series, we use the rule: code first, ask questions later. The demo wave file is available here and at the end. So here is the code:

//by Jay Tennant 3/8/12
//A Brief Look at XAudio2: Playing a Stream
//demonstrates streaming a wave from disk
//win32developer.com
//this code provided free, as in public domain; score!

#include <windows.h>
#include <xaudio2.h>
#include "streamingWave.h"

//tells all threads it's time to quit
HANDLE g_hAbortEvent;

//XAudio2 objects
IXAudio2* g_engine = NULL;
IXAudio2MasteringVoice* g_master = NULL;

//the voice callback to let us know when the submitted buffer of the stream has finished
struct StreamingVoiceCallback : public IXAudio2VoiceCallback
{
public:
	HANDLE m_hBufferEndEvent;

	StreamingVoiceCallback() : m_hBufferEndEvent( CreateEvent( NULL, TRUE, FALSE, NULL ) ) {}
	virtual ~StreamingVoiceCallback() { CloseHandle( m_hBufferEndEvent ); }

	//overrides
    STDMETHOD_( void, OnVoiceProcessingPassStart )( UINT32 bytesRequired )
    {
    }
    STDMETHOD_( void, OnVoiceProcessingPassEnd )()
    {
    }
    STDMETHOD_( void, OnStreamEnd )()
    {
    }
    STDMETHOD_( void, OnBufferStart )( void* pContext )
    {
    }
    STDMETHOD_( void, OnBufferEnd )( void* pContext )
    {
        SetEvent( m_hBufferEndEvent );
    }
    STDMETHOD_( void, OnLoopEnd )( void* pContext )
    {
    }
    STDMETHOD_( void, OnVoiceError )( void* pContext, HRESULT error )
    {
    }
};

//the context to send to the StreamProc
struct StreamContext
{
	IXAudio2SourceVoice** pVoice; //the source voice that is created on the thread
	LPCTSTR szFile; //name of the file to stream
	HANDLE hVoiceLoadEvent; //lets us know the thread is set up for streaming, or encountered an error
};

//the streaming thread
DWORD WINAPI StreamProc( LPVOID pContext );

int WINAPI WinMain( HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine, int nShowCmd )
{
	//required by XAudio2
	CoInitializeEx( NULL, COINIT_MULTITHREADED );

	//create the engine
	if( FAILED( XAudio2Create( &g_engine ) ) )
	{
		CoUninitialize();
		return -1;
	}

	//create the mastering voice
	if( FAILED( g_engine->CreateMasteringVoice( &g_master ) ) )
	{
		g_engine->Release();
		CoUninitialize();
		return -2;
	}

	//prepare the context to send to the new thread
	StreamContext streamContext = { NULL, TEXT("thisIsATest.wav"), CreateEvent(NULL, FALSE, FALSE, NULL) };

	//create the abort event to end all threads
	g_hAbortEvent = CreateEvent( NULL, TRUE, FALSE, NULL );

	//create the streaming voice thread
	DWORD dwThreadId = 0;
	HANDLE hStreamingVoiceThread = CreateThread( NULL, 0, StreamProc, &streamContext, 0, &dwThreadId );

	if( hStreamingVoiceThread == NULL )
	{
		CloseHandle( g_hAbortEvent );
		g_engine->Release();
		CoUninitialize();
		return -3;
	}

	//wait for the streaming voice thread to signal that it's either
	//prepared the streaming, or that it's encountered an error
	WaitForSingleObject( streamContext.hVoiceLoadEvent, INFINITE );
	if( streamContext.pVoice == NULL || (*streamContext.pVoice) == NULL )
	{
		SetEvent( g_hAbortEvent );
		WaitForSingleObject( hStreamingVoiceThread, INFINITE );

		CloseHandle( hStreamingVoiceThread );
		CloseHandle( g_hAbortEvent );
		g_engine->Release();
		CoUninitialize();
		return -4;
	}

	//main loop
	while( MessageBox( NULL, TEXT("Run the stream?"), NULL, MB_YESNO ) == IDYES )
	{
		//start the streaming voice, which was created on the other thread
		if( streamContext.pVoice != NULL )
			(*streamContext.pVoice)->Start();
	}

	//signal all threads to end
	SetEvent( g_hAbortEvent );

	//wait for that thread to end
	WaitForSingleObject( hStreamingVoiceThread, INFINITE );

	//close all handles we opened
	CloseHandle( hStreamingVoiceThread );
	CloseHandle( g_hAbortEvent );

	//release the engine, cleanup
	g_engine->Release();
	CoUninitialize();

	return 0;
}

//the streaming thread procedure
DWORD WINAPI StreamProc( LPVOID pContext )
{
	//required by XAudio2
	CoInitializeEx( NULL, COINIT_MULTITHREADED );

	if( pContext == NULL )
	{
		CoUninitialize();
		return -1;
	}

	StreamContext* sc = (StreamContext*)pContext;

	//instantiate the voice's callback class
	StreamingVoiceCallback callback;

	//load a file for streaming, non-buffered disk reads (no system cacheing)
	StreamingWave inFile;
	if( !inFile.load( sc->szFile ) )
	{
		SetEvent( sc->hVoiceLoadEvent );
		CoUninitialize();
		return -3;
	}

	//create the voice
	IXAudio2SourceVoice* source = NULL;
	if( FAILED( g_engine->CreateSourceVoice( &source, inFile.wf(), 0, 2.0f, &callback ) ) )
	{
		SetEvent( sc->hVoiceLoadEvent );
		CoUninitialize();
		return -5;
	}

	//fill and queue the maximum number of buffers (except the one needed for reading new wave data)
	bool somethingsWrong = false;
	XAUDIO2_VOICE_STATE voiceState = {0};
	source->GetState( &voiceState );
	while( voiceState.BuffersQueued < STREAMINGWAVE_BUFFER_COUNT - 1 && !somethingsWrong )
	{
		//read and fill the next buffer to present
		switch( inFile.prepare() )
		{
		case StreamingWave::PR_EOF:
			//if end-of-file (or end-of-data), loop the file read
			inFile.resetFile(); //intentionally fall-through to loop sound
		case StreamingWave::PR_SUCCESS:
			//present the next available buffer
			inFile.swap();
			//submit another buffer
			source->SubmitSourceBuffer( inFile.buffer() );
			source->GetState( &voiceState );
			break;
		case StreamingWave::PR_FAILURE:
			somethingsWrong = true;
			break;
		}
	}

	//return the created voice through the context pointer
	sc->pVoice = &source;

	//signal that the voice has prepared for streaming, and ready to start
	SetEvent( sc->hVoiceLoadEvent );

	//group the events for the Wait function
	HANDLE hEvents[2] = { callback.m_hBufferEndEvent, g_hAbortEvent };

	bool quitting = false;
	while( !quitting )
	{
		//wait until either the source voice is ready for another buffer, or the abort signal is set
		DWORD eventFired = WaitForMultipleObjects( 2, hEvents, FALSE, INFINITE );
		switch( eventFired )
		{
		case 0: //buffer ended event for source voice
			//reset the event manually; why Manually? well why not?!
			ResetEvent( hEvents[0] );

			//make sure there's a full number of buffers
			source->GetState( &voiceState );
			while( voiceState.BuffersQueued < STREAMINGWAVE_BUFFER_COUNT - 1 && !somethingsWrong )
			{
				//read and fill the next buffer to present
				switch( inFile.prepare() )
				{
				case StreamingWave::PR_EOF:
					//if end-of-file (or end-of-data), loop the file read
					inFile.resetFile(); //intentionally fall-through to loop sound
				case StreamingWave::PR_SUCCESS:
					//present the next available buffer
					inFile.swap();
					//submit another buffer
					source->SubmitSourceBuffer( inFile.buffer() );
					source->GetState( &voiceState );
					break;
				case StreamingWave::PR_FAILURE:
					somethingsWrong = true;
					break;
				}
			}

			break;
		case 1: //abort event
			quitting = true;
			break;
		default: //something's wrong...
			quitting = true;
		}
	}

	//stop and destroy the voice
	source->Stop();
	source->FlushSourceBuffers();
	source->DestroyVoice();

	//close the streaming wave file;
	//this is done automatically in the class destructor,
	//so this is redundant
	inFile.close();

	//cleanup
	CoUninitialize();
	return 0;
}

And here is the helper class header waveInfo.h:

//waveInfo.h
//by Jay Tennant 3/8/12
//loads the information for a wave file using non-buffered disk reads
//win32developer.com
//this code provided free, as in public domain; score!

#ifndef WAVEINFO_H
#define WAVEINFO_H

#include <windows.h>
#include <xaudio2.h>

class WaveInfo
{
private:
	WAVEFORMATEXTENSIBLE m_wf;
	DWORD m_dataOffset;
	DWORD m_dataLength;

protected:
	//looks for the FOURCC chunk, returning -1 on failure
	DWORD findChunk( HANDLE hFile, FOURCC cc, BYTE* memBuffer, DWORD sectorAlignment ) {
		DWORD dwChunkId = 0;
		DWORD dwChunkSize = 0;
		DWORD i = 0; //guaranteed to be always aligned with the sectors, except when done searching
		OVERLAPPED overlapped = {0};
		DWORD sectorOffset = 0;
		DWORD bytesRead = 0;

		bool searching = true;
		while( searching )
		{
			sectorOffset = 0;
			overlapped.Offset = i;
			if( FALSE == ReadFile( hFile, memBuffer, sectorAlignment, &bytesRead, &overlapped ) )
			{
				return -1;
			}

			bool needAnotherRead = false;
			while( searching && !needAnotherRead )
			{
				if( 8 + sectorOffset > sectorAlignment ) //reached the end of our memory buffer
				{
					needAnotherRead = true;
				}
				else if( 8 + sectorOffset > bytesRead ) //reached EOF, and not found a match
				{
					return -1;
				}
				else //looking through the read memory
				{
					dwChunkId = *reinterpret_cast<DWORD*>( memBuffer + sectorOffset );
					dwChunkSize = *reinterpret_cast<DWORD*>( memBuffer + sectorOffset + 4 );

					if( dwChunkId == cc ) //found a match
					{
						searching = false;
						i += sectorOffset;
					}
					else //no match found, add to offset
					{
						dwChunkSize += 8; //add offsets of the chunk id, and chunk size data entries
						dwChunkSize += 1;
						dwChunkSize &= 0xfffffffe; //guarantees WORD padding alignment

						if( i == 0 && sectorOffset == 0 ) //just in case we're at the 'RIFF' chunk; the dwChunkSize here means the entire file size
							sectorOffset += 12;
						else
							sectorOffset += dwChunkSize;
					}
				}
			}

			//if still searching, search the next sector
			if( searching )
			{
				i += sectorAlignment;
			}
		}

		return i;
	}

	//reads a certain amount of data in, returning the number of bytes copied
	DWORD readData( HANDLE hFile, DWORD bytesToRead, DWORD fileOffset, void* pDest, BYTE* memBuffer, DWORD sectorAlignment ) {
		if( bytesToRead == 0 )
			return 0;

		DWORD totalAmountCopied = 0;
		DWORD copyBeginOffset = fileOffset % sectorAlignment;
		OVERLAPPED overlapped = {0};
		bool fetchingData = true;
		DWORD pass = 0;
		DWORD dwNumberBytesRead = 0;

		//while fetching data
		while( fetchingData )
		{
			//calculate the sector to read
			overlapped.Offset = fileOffset - (fileOffset % sectorAlignment) + pass * sectorAlignment;

			//read the amount in; if the read failed, return 0
			if( FALSE == ReadFile( hFile, memBuffer, sectorAlignment, &dwNumberBytesRead, &overlapped ) )
				return 0;

			//if the full buffer was not filled (ie. EOF)
			if( dwNumberBytesRead < sectorAlignment )
			{
				//calculate how much can be copied
				DWORD amountToCopy = 0;
				if( dwNumberBytesRead > copyBeginOffset )
					amountToCopy = dwNumberBytesRead - copyBeginOffset;
				if( totalAmountCopied + amountToCopy > bytesToRead )
					amountToCopy = bytesToRead - totalAmountCopied;

				//copy that amount over
				memcpy( ((BYTE*)pDest) + totalAmountCopied, memBuffer + copyBeginOffset, amountToCopy );

				//add to the total amount copied
				totalAmountCopied += amountToCopy;

				//end the fetching data loop
				fetchingData = false;
			}
			//else
			else
			{
				//calculate how much can be copied
				DWORD amountToCopy = sectorAlignment - copyBeginOffset;
				if( totalAmountCopied + amountToCopy > bytesToRead )
					amountToCopy = bytesToRead - totalAmountCopied;

				//copy that amount over
				memcpy( ((BYTE*)pDest) + totalAmountCopied, memBuffer + copyBeginOffset, amountToCopy );

				//add to the total amount copied
				totalAmountCopied += amountToCopy;

				//set the copyBeginOffset to 0
				copyBeginOffset = 0;
			}

			//if the total amount equals the bytesToRead, end the fetching data loop
			if( totalAmountCopied == bytesToRead )
				fetchingData = false;

			//increment the pass
			pass++;
		}

		//return the total amount copied
		return totalAmountCopied;
	}

public:
	WaveInfo( LPCTSTR szFile = NULL ) : m_dataOffset(0), m_dataLength(0) {
		memset( &m_wf, 0, sizeof(m_wf) );
		load( szFile );
	}
	WaveInfo( const WaveInfo& c ) : m_wf(c.m_wf), m_dataOffset(c.m_dataOffset), m_dataLength(c.m_dataLength) {}

	//loads the wave format, offset to the wave data, and length of the wave data;
	//returns true on success, false on failure
	bool load( LPCTSTR szFile ) {
		memset( &m_wf, 0, sizeof(m_wf) );
		m_dataOffset = 0;
		m_dataLength = 0;

		if( szFile == NULL )
			return false;

		//load the file without system cacheing
		HANDLE hFile = CreateFile( szFile, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_FLAG_NO_BUFFERING, NULL );

		if( hFile == INVALID_HANDLE_VALUE )
			return false;

		//figure the sector size for reading
		DWORD dwSectorSize = 0;
		{
			DWORD dw1, dw2, dw3;
			GetDiskFreeSpace( NULL, &dw1, &dwSectorSize, &dw2, &dw3 );
		}

		//allocate the aligned memory buffer, used in finding and reading the chunks in the file
		BYTE *memBuffer = (BYTE*)_aligned_malloc( dwSectorSize, dwSectorSize );
		if( memBuffer == NULL )
		{
			CloseHandle( hFile );
			return false;
		}

		//look for 'RIFF' chunk
		DWORD dwChunkOffset = findChunk( hFile, MAKEFOURCC( 'R', 'I', 'F', 'F' ), memBuffer, dwSectorSize );
		if(dwChunkOffset == -1)
		{
			_aligned_free( memBuffer );
			CloseHandle( hFile );
			return false;
		}

		DWORD riffFormat = 0;
		//inFile.seekg( dwChunkOffset + 8, std::ios::beg );
		//inFile.read( reinterpret_cast<char*>(&riffFormat), sizeof(riffFormat) );
		if( sizeof(DWORD) != readData( hFile, sizeof(riffFormat), dwChunkOffset + 8, &riffFormat, memBuffer, dwSectorSize ) )
		{
			_aligned_free( memBuffer );
			CloseHandle( hFile );
			return false;
		}
		if(riffFormat != MAKEFOURCC('W', 'A', 'V', 'E'))
		{
			_aligned_free( memBuffer );
			CloseHandle( hFile );
			return false;
		}

		//look for 'fmt ' chunk
		dwChunkOffset = findChunk( hFile, MAKEFOURCC( 'f', 'm', 't', ' ' ), memBuffer, dwSectorSize );
		if( dwChunkOffset == -1 )
		{
			_aligned_free( memBuffer );
			CloseHandle( hFile );
			return false;
		}

		//read in first the WAVEFORMATEX structure
		//inFile.seekg( dwChunkOffset + 8, std::ios::beg );
		//inFile.read( reinterpret_cast<char*>(&m_wf.Format), sizeof(m_wf.Format) );
		if( sizeof(m_wf.Format) != readData( hFile, sizeof(m_wf.Format), dwChunkOffset + 8, &m_wf.Format, memBuffer, dwSectorSize ) )
		{
			_aligned_free( memBuffer );
			CloseHandle( hFile );
			return false;
		}
		if( m_wf.Format.cbSize == (sizeof(m_wf) - sizeof(m_wf.Format)) )
		{
			//read in whole WAVEFORMATEXTENSIBLE structure
			//inFile.seekg( dwChunkOffset + 8, std::ios::beg );
			//inFile.read( reinterpret_cast<char*>(&m_wf), sizeof(m_wf) );
			if( sizeof(m_wf) != readData( hFile, sizeof(m_wf), dwChunkOffset + 8, &m_wf, memBuffer, dwSectorSize ) )
			{
				_aligned_free( memBuffer );
				CloseHandle( hFile );
				return false;
			}
		}

		//look for 'data' chunk
		dwChunkOffset = findChunk( hFile, MAKEFOURCC( 'd', 'a', 't', 'a' ), memBuffer, dwSectorSize );
		if(dwChunkOffset == -1)
		{
			_aligned_free( memBuffer );
			CloseHandle( hFile );
			return false;
		}

		//set the offset to the wave data, read in length, then return
		m_dataOffset = dwChunkOffset + 8;
		//inFile.seekg( dwChunkOffset + 4, std::ios::beg );
		//inFile.read( reinterpret_cast<char*>(&m_dataLength), 4 );
		if( sizeof(m_dataLength) != readData( hFile, sizeof(m_dataLength), dwChunkOffset + 4, &m_dataLength, memBuffer, dwSectorSize ) )
		{
			_aligned_free( memBuffer );
			CloseHandle( hFile );
			return false;
		}

		_aligned_free( memBuffer );

		CloseHandle( hFile );

		return true;
	}

	//returns true if the format is WAVEFORMATEXTENSIBLE; false if WAVEFORMATEX
	bool isExtensible() const { return (m_wf.Format.cbSize > 0); }
	//retrieves the WAVEFORMATEX structure
	const WAVEFORMATEX* wf() const { return &m_wf.Format; }
	//retrieves the WAVEFORMATEXTENSIBLE structure; meaningless if the wave is not WAVEFORMATEXTENSIBLE
	const WAVEFORMATEXTENSIBLE* wfex() const { return &m_wf; }
	//gets the offset from the beginning of the file to the actual wave data
	DWORD getDataOffset() const { return m_dataOffset; }
	//gets the length of the wave data
	DWORD getDataLength() const { return m_dataLength; }
};

#endif

And here is the helper class header streamingWave.h:

//streamingWave.h
//by Jay Tennant 3/10/12
//loads and streams an unbuffered wave file
//win32developer.com
//this code provided free, as in public domain; score!

#ifndef STREAMINGWAVE_H
#define STREAMINGWAVE_H

#include <windows.h>
#include <xaudio2.h>
#include "waveInfo.h"

//should remain a power of 2; should also stay 4096 or larger, just to guarantee a multiple of the disk sector size (most are at or below 4096)
#define STREAMINGWAVE_BUFFER_SIZE 65536
//should never be less than 3
#define STREAMINGWAVE_BUFFER_COUNT 3

class StreamingWave : public WaveInfo
{
private:
	HANDLE m_hFile; //the file being streamed
	DWORD m_currentReadPass; //the current pass for reading; this number multiplied by STREAMINGWAVE_BUFFER_SIZE, adding getDataOffset(), represents the file position
	DWORD m_currentReadBuffer; //the current buffer used for reading from file; the presentation buffer is the one right before this
	bool m_isPrepared; //whether the buffer is prepared for the swap
	BYTE *m_dataBuffer; //the wave buffers; the size is STREAMINGWAVE_BUFFER_COUNT * STREAMINGWAVE_BUFFER_SIZE + m_sectorAlignment
	XAUDIO2_BUFFER m_xaBuffer[STREAMINGWAVE_BUFFER_COUNT]; //the xaudio2 buffer information
	DWORD m_sectorAlignment; //the sector alignment for reading; this value is added to the entire buffer's size for sector-aligned reading and reference
	DWORD m_bufferBeginOffset; //the starting offset for each buffer (when the file reads are offset by an amount)
public:
	StreamingWave( LPCTSTR szFile = NULL ) : WaveInfo( NULL ), m_hFile(INVALID_HANDLE_VALUE), m_currentReadPass(0), m_currentReadBuffer(0), m_isPrepared(false), 
		m_dataBuffer(NULL), m_sectorAlignment(0), m_bufferBeginOffset(0) {
			memset( m_xaBuffer, 0, sizeof(m_xaBuffer) );

			//figure the sector alignment
			DWORD dw1, dw2, dw3;
			GetDiskFreeSpace( NULL, &dw1, &m_sectorAlignment, &dw2, &dw3 );

			//allocate the buffers
			m_dataBuffer = (BYTE*)_aligned_malloc( STREAMINGWAVE_BUFFER_COUNT * STREAMINGWAVE_BUFFER_SIZE + m_sectorAlignment, m_sectorAlignment );
			memset( m_dataBuffer, 0, STREAMINGWAVE_BUFFER_COUNT * STREAMINGWAVE_BUFFER_SIZE + m_sectorAlignment );

			load( szFile );
	}
	StreamingWave( const StreamingWave& c ) : WaveInfo(c), m_hFile(c.m_hFile), m_currentReadPass(c.m_currentReadPass), m_currentReadBuffer(c.m_currentReadBuffer),
		m_isPrepared(c.m_isPrepared), m_dataBuffer(NULL), m_sectorAlignment(c.m_sectorAlignment), m_bufferBeginOffset(c.m_bufferBeginOffset) {
			if( m_sectorAlignment == 0 )
			{
				//figure the sector alignment
				DWORD dw1, dw2, dw3;
				GetDiskFreeSpace( NULL, &dw1, &m_sectorAlignment, &dw2, &dw3 );
			}

			//allocate the buffers
			m_dataBuffer = (BYTE*)_aligned_malloc( STREAMINGWAVE_BUFFER_COUNT * STREAMINGWAVE_BUFFER_SIZE + m_sectorAlignment, m_sectorAlignment );
			memset( m_dataBuffer, 0, STREAMINGWAVE_BUFFER_COUNT * STREAMINGWAVE_BUFFER_SIZE + m_sectorAlignment );

			memcpy( m_dataBuffer, c.m_dataBuffer, STREAMINGWAVE_BUFFER_COUNT * STREAMINGWAVE_BUFFER_SIZE + m_sectorAlignment );
			memcpy( m_xaBuffer, c.m_xaBuffer, sizeof(m_xaBuffer) );
			for( int i = 0; i < STREAMINGWAVE_BUFFER_COUNT; i++ )
				m_xaBuffer[i].pAudioData = m_dataBuffer + m_bufferBeginOffset + i * STREAMINGWAVE_BUFFER_SIZE;
	}
	~StreamingWave() {
		close();

		if( m_dataBuffer != NULL )
			_aligned_free( m_dataBuffer );
		m_dataBuffer = NULL;
	}

	//loads the file for streaming wave data
	bool load( LPCTSTR szFile ) {
		close();

		//test if the data can be loaded
		if( !WaveInfo::load( szFile ) )
			return false;

		//figure the offset for the wave data in allocated memory
		m_bufferBeginOffset = getDataOffset() % m_sectorAlignment;

		//open the file
		m_hFile = CreateFile( szFile, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_FLAG_NO_BUFFERING, NULL );
		if( m_hFile == INVALID_HANDLE_VALUE )
			return false;

		//set the xaudio2 buffer struct to refer to appropriate buffer starting points (but leave size of the data as 0)
		for( int i = 0; i < STREAMINGWAVE_BUFFER_COUNT; i++ )
			m_xaBuffer[i].pAudioData = m_dataBuffer + m_bufferBeginOffset + i * STREAMINGWAVE_BUFFER_SIZE;

		return true;
	}

	//closes the file stream, resetting this object's state
	void close() {
		if( m_hFile != INVALID_HANDLE_VALUE )
			CloseHandle( m_hFile );
		m_hFile = INVALID_HANDLE_VALUE;

		m_bufferBeginOffset = 0;
		memset( m_xaBuffer, 0, sizeof(m_xaBuffer) );
		memset( m_dataBuffer, 0, STREAMINGWAVE_BUFFER_COUNT * STREAMINGWAVE_BUFFER_SIZE + m_sectorAlignment );
		m_isPrepared = false;
		m_currentReadBuffer = 0;
		m_currentReadPass = 0;

		WaveInfo::load( NULL );
	}

	//swaps the presentation buffer to the next one
	void swap() {m_currentReadBuffer = (m_currentReadBuffer + 1) % STREAMINGWAVE_BUFFER_COUNT; m_isPrepared = false;}

	//gets the current buffer
	const XAUDIO2_BUFFER* buffer() const {return &m_xaBuffer[ (m_currentReadBuffer + STREAMINGWAVE_BUFFER_COUNT - 1) % STREAMINGWAVE_BUFFER_COUNT ];}

	//resets the file pointer to the beginning of the wave data;
	//this will not wipe out buffers that have been prepared, so it is safe to call
	//after a call to prepare() has returned PR_EOF, and before a call to swap() has
	//been made to present the prepared buffer
	void resetFile() {m_currentReadPass = 0;}

	enum PREPARE_RESULT {
		PR_SUCCESS = 0,
		PR_FAILURE = 1,
		PR_EOF = 2,
	};

	//prepares the next buffer for presentation;
	//returns PR_SUCCESS on success,
	//PR_FAILURE on failure,
	//and PR_EOF when the end of the data has been reached
	DWORD prepare() {
		//validation check
		if( m_hFile == INVALID_HANDLE_VALUE )
		{
			m_xaBuffer[ m_currentReadBuffer ].AudioBytes = 0;
			m_xaBuffer[ m_currentReadBuffer ].Flags = XAUDIO2_END_OF_STREAM;
			return PR_FAILURE;
		}

		//are we already prepared?
		if( m_isPrepared )
			return PR_SUCCESS;

		//figure the offset of the file pointer
		OVERLAPPED overlapped = {0};
		overlapped.Offset = getDataOffset() - m_bufferBeginOffset + STREAMINGWAVE_BUFFER_SIZE * m_currentReadPass;

		//preliminary end-of-data check
		if( overlapped.Offset + m_bufferBeginOffset > getDataLength() + getDataOffset() )
		{
			m_xaBuffer[ m_currentReadBuffer ].AudioBytes = 0;
			m_xaBuffer[ m_currentReadBuffer ].Flags = XAUDIO2_END_OF_STREAM;
			m_isPrepared = true;
			return PR_EOF;
		}

		//read in data from file
		DWORD dwNumBytesRead = 0;
		if( FALSE == ReadFile( m_hFile, m_dataBuffer + STREAMINGWAVE_BUFFER_SIZE * m_currentReadBuffer, STREAMINGWAVE_BUFFER_SIZE + m_sectorAlignment, &dwNumBytesRead, &overlapped ) )
		{
			m_xaBuffer[ m_currentReadBuffer ].AudioBytes = 0;
			m_xaBuffer[ m_currentReadBuffer ].Flags = XAUDIO2_END_OF_STREAM;
			return PR_FAILURE;
		}

		//force dwNumBytesRead to be less than the actual amount read if reading past the end of the data chunk
		if( dwNumBytesRead + STREAMINGWAVE_BUFFER_SIZE * m_currentReadPass > getDataLength() )
		{
			if( STREAMINGWAVE_BUFFER_SIZE * m_currentReadPass <= getDataLength() )
				dwNumBytesRead = min( dwNumBytesRead, getDataLength() - STREAMINGWAVE_BUFFER_SIZE * m_currentReadPass ); //bytes read are from overlapping file chunks
			else
				dwNumBytesRead = 0; //none of the bytes are from the correct data chunk; this should never happen due to the preliminary end-of-data check, unless the file was wrong
		}

		//end-of-file/data check
		if( dwNumBytesRead < STREAMINGWAVE_BUFFER_SIZE + m_sectorAlignment )
		{
			//check for case where less than the sectorAlignment amount of data is still available in the file;
			//of course, only do something if there isn't that amount of data left
			if( dwNumBytesRead < m_bufferBeginOffset )
			{//no valid data at all; this shouldn't happen since the preliminary end-of-data check happened already, unless the file was wrong
				m_xaBuffer[ m_currentReadBuffer ].AudioBytes = 0;
				m_xaBuffer[ m_currentReadBuffer ].Flags = XAUDIO2_END_OF_STREAM;
				m_isPrepared = true;

				//increment the current read pass
				m_currentReadPass++;
				return PR_EOF;
			}
			else if( dwNumBytesRead - m_bufferBeginOffset <= STREAMINGWAVE_BUFFER_SIZE )
			{//some valid data; this should always happen for the end-of-file and end-of-data conditions
				m_xaBuffer[ m_currentReadBuffer ].AudioBytes = dwNumBytesRead - m_bufferBeginOffset; //do not include the data offset as valid data
				m_xaBuffer[ m_currentReadBuffer ].Flags = XAUDIO2_END_OF_STREAM;
				m_isPrepared = true;

				//increment the current read pass
				m_currentReadPass++;
				return PR_EOF;
			}
		}

		//set the amount of data available;
		//this should always be STREAMINGWAVE_BUFFER_SIZE, unless one of the previous conditions (end-of-file, end-of-data) were met
		m_xaBuffer[ m_currentReadBuffer ].AudioBytes = STREAMINGWAVE_BUFFER_SIZE;
		m_xaBuffer[ m_currentReadBuffer ].Flags = 0;
		m_isPrepared = true;

		//increment the current read pass
		m_currentReadPass++;

		//return success
		return PR_SUCCESS;
	}
};

#endif

Row, row, row your boat...

Gently loading a stream requires an incredible amount of code because, as discussed in previous tutorials, we are using non-buffered disk I/O--that is, Window's is not to cache the file we are reading. As a reminder, such requires that we read on disk sector boundaries, read data in multiples of the disk sector size, and read into sector-aligned memory. Are you still singing "Merrily, merrily, merrily" at this point?

Although that is a stiff requirement, the painstaking reads have been encapsulated away into the WaveInfo and StreamingWave classes. They are both heavily commented, so I will not discuss their intricacies (it's dry file parsing anyway).

So, to begin with the main source file:

//the voice callback to let us know when the submitted buffer of the stream has finished
struct StreamingVoiceCallback : public IXAudio2VoiceCallback
{
public:
	HANDLE m_hBufferEndEvent;

	StreamingVoiceCallback() : m_hBufferEndEvent( CreateEvent( NULL, TRUE, FALSE, NULL ) ) {}
	virtual ~StreamingVoiceCallback() { CloseHandle( m_hBufferEndEvent ); }

	//overrides
    STDMETHOD_( void, OnVoiceProcessingPassStart )( UINT32 bytesRequired )
    {
    }
    STDMETHOD_( void, OnVoiceProcessingPassEnd )()
    {
    }
    STDMETHOD_( void, OnStreamEnd )()
    {
    }
    STDMETHOD_( void, OnBufferStart )( void* pContext )
    {
    }
    STDMETHOD_( void, OnBufferEnd )( void* pContext )
    {
        SetEvent( m_hBufferEndEvent );
    }
    STDMETHOD_( void, OnLoopEnd )( void* pContext )
    {
    }
    STDMETHOD_( void, OnVoiceError )( void* pContext, HRESULT error )
    {
    }
};

The sole purpose of this callback class is to signal an event that a buffer has been consumed, which allows our application to feed another buffer into the voice. The callback class can do other operations, but it's recommended to do extremely little so as not block for an extended duration (this will upset the XAudio2 engine). In this case, we only cared about overriding the OnBufferEnd() call to signal the event.

struct StreamContext
{
	IXAudio2SourceVoice** pVoice; //the source voice that is created on the thread
	LPCTSTR szFile; //name of the file to stream
	HANDLE hVoiceLoadEvent; //lets us know the thread is set up for streaming, or encountered an error
};

...

StreamContext streamContext = { NULL, TEXT("thisIsATest.wav"), CreateEvent(NULL, FALSE, FALSE, NULL) };

An instance of the StreamContext structure is sent to the thread that streams the sound file from disk. pVoice will contain the pointer to the source voice that is created for the stream. szFile is the name of the file passed to the procedure, and hVoiceLoadEvent is signaled when either the voice is ready, or an error was encountered (such as "file not found"). We create one that has an auto-reset event, though it doesn't matter whether it's auto-reset or not.

HANDLE hStreamingVoiceThread = CreateThread( NULL, 0, StreamProc, &streamContext, 0, &dwThreadId );

Creating the thread. Here, we send the stream context that we created earlier.

WaitForSingleObject( streamContext.hVoiceLoadEvent, INFINITE );
if( streamContext.pVoice == NULL || (*streamContext.pVoice) == NULL )
{
	SetEvent( g_hAbortEvent );
	WaitForSingleObject( hStreamingVoiceThread, INFINITE );
	...
}

After creating the thread, we must wait until the thread has either successfully loaded the file, or reports an error. If an error is reported, we signal that all threads should abort and wait for all of them to end, then clean up. Since we're only waiting for one thread, it seems somewhat redundant to signal the thread should abort, but this scales easily to multiple threads. Obviously, you should use WaitForMultipleObjects() instead if there are multiple threads.

while( MessageBox( NULL, TEXT("Run the stream?"), NULL, MB_YESNO ) == IDYES )
{
	//start the streaming voice, which was created on the other thread
	if( streamContext.pVoice != NULL )
		(*streamContext.pVoice)->Start();
}

This is the main message loop. It's okay to call the Start() method of the voice multiple times, since they're ignored if it's already running.

The rest of the main function is pretty straight-forward, so now to discuss the streaming thread procedure.

StreamingVoiceCallback callback;

A callback is instantiated in the function, and referenced only with the voice created in that function. This allows multiple simultaneous streams to be running and buffering on different threads. Again, the point of our callback class is to alert this thread that the source voice created here has completed another buffer and is ready for another to add to the queue.

StreamingWave inFile;
if( !inFile.load( sc->szFile ) )
	...

Using the StreamingWave helper class, we can load a wave file for streaming without having to see the sector-aligned reading, manage the buffers and offsets, and propagate XAUDIO2_BUFFER structures. It takes a load of work away. Feel free to modify this helper to fit your needs!

IXAudio2SourceVoice* source = NULL;
if( FAILED( g_engine->CreateSourceVoice( &source, inFile.wf(), 0, 2.0f, &callback ) ) )

Creates a source voice, using the wave's file format and the callback class we instantiated. The other values are default values.

XAUDIO2_VOICE_STATE voiceState = {0};
source->GetState( &voiceState );

The voice's state is of special interest when it comes to streaming because we want to guarantee smooth playback of audio data. The XAUDIO2_VOICE_STATE structure is defined as:

typedef struct XAUDIO2_VOICE_STATE {
    void *pCurrentBufferContext;
    UINT32 BuffersQueued;
    UINT64 SamplesPlayed;
} XAUDIO2_VOICE_STATE;

So the BuffersQueued member will contain the number of buffers currently in queue. For smooth playback, this should be about 1 less than the number of buffers available. But why? Briefly, it has to do with the design of streaming audio.

The reason an application should stream audio is to save RAM and CPU processing, or because the source data is actually from a stream (ie. microphone, filter graph, just-in-time rendering, etc. which we will not consider in our discussion right now). What if the user has 8GB of memory, and the audio files are 2MB ogg vorbis or mp3 files? Streaming might not be necessary. The downside to this is that the CPU would have to perform more calculations to decode the data into raw PCM data. Usually, this is not a big issue (for example, according to the Microsoft Documentation, WMA files can be rendered and decoded at a 0.35% CPU hit on a 3GHz CPU.) To reduce that CPU hit, raw PCM wave data (such as wav files) could be loaded instead. But uncompressed wave data can be several times larger, so where there may be a 2MB mp3 (at 256kbps), there would be a comparitively equal quality wav file of 11MB (at 1411 kbps). And what if the user does not have a large amount of memory? So streaming is the answer for an application that needs to save RAM and CPU.

Since the entire file is not loaded at once in a stream, only some of the data is read and placed in temporary buffers. These buffers are arranged in a circular queue so that when a buffer is finished being used, it can be reused again--and on and on until the file is finished streaming.

In audio engine design, it is good to have at least 3 buffers working in a circular queue. When one of the buffers is being processed, the other buffers are filled and queued to be processed. As soon as the one that was being processed finishes, it can be refilled with more data from the file and added back to the queue to be processed. This will continue until the stream is finished or aborted.

XAudio2 can have a number of buffers queued, and it will report when each buffer is finished, such as the OnBufferEnd() method of the IXAudio2VoiceCallback interface. The GetState() method of the voice we created will report the number of buffers queued. After checking whether all of them (save the one being processed) are queued, we submit additional buffers if necessary, as in the following lines of code:

while( voiceState.BuffersQueued < STREAMINGWAVE_BUFFER_COUNT - 1 && !somethingsWrong )
{
	//read and fill the next buffer to present
	switch( inFile.prepare() )
	{
	case StreamingWave::PR_EOF:
		//if end-of-file (or end-of-data), loop the file read
		inFile.resetFile(); //intentionally fall-through to loop sound
	case StreamingWave::PR_SUCCESS:
		//present the next available buffer
		inFile.swap();
		//submit another buffer
		source->SubmitSourceBuffer( inFile.buffer() );
		source->GetState( &voiceState );
		break;
	case StreamingWave::PR_FAILURE:
		somethingsWrong = true;
		break;
	}
}

Briefly, inFile.prepare() will fill the next available buffer in the queue, and will return either PR_EOF, PR_SUCCESS, or PR_FAILURE. inFile.resetFile() will move the file pointer back to the beginning of the sound data. inFile.swap() rotates the circular queue of buffers, where the buffer returned by inFile.buffer() is the one that was just prepared and presented. See the comments in the StreamingWave header for more information.

//group the events for the Wait function
HANDLE hEvents[2] = { callback.m_hBufferEndEvent, g_hAbortEvent };

bool quitting = false;
while( !quitting )
{
	//wait until either the source voice is ready for another buffer, or the abort signal is set
	DWORD eventFired = WaitForMultipleObjects( 2, hEvents, FALSE, INFINITE );
	switch( eventFired )
	{

	...

	}
}

So we wait for either the callback's buffer-end event to be signaled, or the abort event. If it was the buffer-end event, we progress the circular queue, submit new buffers, and wait for the next event.

Clean-up is self-explanatory. It's important to note the order of the destruction: first stop, clear, and destroy the voice; then destroy the file buffers. If the file is closed before the source finishes processing the buffers, there can be access violations up the wazoo. And nobody wants that, especially up the wazoo.

And that wraps it up! Feel free to use/modify the code any way you see fit.

Things to Try

Load multiple files to stream by creating multiple threads, and run them simultaneously.
Load a wma file. Be careful, it will probably be very loud static if it works.
Don't let the stream reset to the file's beginning until the user tells it to by using events.

Additional Information

Demo wave file


Next tutorial

Tutorial 6 - XAudio2: Introducing Submix Voices, Controlling Volume