GPUCacheOptimizer.cc 14.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
/*===========================================================================*\
 *                                                                           *
 *                              OpenFlipper                                  *
 *      Copyright (C) 2001-2011 by Computer Graphics Group, RWTH Aachen      *
 *                           www.openflipper.org                             *
 *                                                                           *
 *---------------------------------------------------------------------------*
 *  This file is part of OpenFlipper.                                        *
 *                                                                           *
 *  OpenFlipper is free software: you can redistribute it and/or modify      *
 *  it under the terms of the GNU Lesser General Public License as           *
 *  published by the Free Software Foundation, either version 3 of           *
 *  the License, or (at your option) any later version with the              *
 *  following exceptions:                                                    *
 *                                                                           *
 *  If other files instantiate templates or use macros                       *
 *  or inline functions from this file, or you compile this file and         *
 *  link it with other files to produce an executable, this file does        *
 *  not by itself cause the resulting executable to be covered by the        *
 *  GNU Lesser General Public License. This exception does not however       *
 *  invalidate any other reasons why the executable file might be            *
 *  covered by the GNU Lesser General Public License.                        *
 *                                                                           *
 *  OpenFlipper is distributed in the hope that it will be useful,           *
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of           *
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            *
 *  GNU Lesser General Public License for more details.                      *
 *                                                                           *
 *  You should have received a copy of the GNU LesserGeneral Public          *
 *  License along with OpenFlipper. If not,                                  *
 *  see <http://www.gnu.org/licenses/>.                                      *
 *                                                                           *
\*===========================================================================*/

/*===========================================================================*\
 *                                                                           *
Jan Möbius's avatar
Jan Möbius committed
37 38 39
 *   $Revision$															 *
 *   $Author$														 *
 *   $Date$													 *
40 41 42
 *                                                                           *
\*===========================================================================*/

Jan Möbius's avatar
Jan Möbius committed
43
//=============================================================================
44 45

#include "GPUCacheOptimizer.hh"
46 47
#include <cassert>
#include <cmath>
48
#include <vector>
49
#include <cstring>
50

Jan Möbius's avatar
Jan Möbius committed
51
//=============================================================================
52 53 54 55

namespace ACG
{

Jan Möbius's avatar
Jan Möbius committed
56
//=============================================================================
57

Jan Möbius's avatar
Jan Möbius committed
58 59 60 61 62 63
GPUCacheOptimizer::GPUCacheOptimizer( unsigned int NumTris, unsigned int NumVerts, unsigned int IndexSize, const void* pIndices) :
        m_NumVerts(NumVerts),
        m_NumTris(NumTris),
        m_IndexSize(IndexSize),
        m_pIndices(pIndices),
        m_NumTransformations(0)
64
{
Jan Möbius's avatar
Jan Möbius committed
65
  m_pTriMap = new unsigned int[m_NumTris];
66 67 68 69 70 71 72
}

GPUCacheOptimizer::~GPUCacheOptimizer(void)
{
	delete [] m_pTriMap;
}

Jan Möbius's avatar
Jan Möbius committed
73
//=============================================================================
74

75
unsigned int GPUCacheOptimizer::GetIndex(unsigned int i) const
76 77 78 79 80 81
{
	assert(i < m_NumTris * 3);

	return GetIndex(i, m_IndexSize, m_pIndices);
}

82
unsigned int GPUCacheOptimizer::GetIndex(unsigned int i, unsigned int IndexSize, const void* pIB)
83 84 85
{
	switch (IndexSize)
	{
86
	case 4: return ((const unsigned int*)pIB)[i]; break;
87 88 89 90 91 92 93 94
	case 2: return ((const unsigned short*)pIB)[i]; break;
	case 1: return ((const unsigned char*)pIB)[i]; break;
	default:
		assert(i == 1 || i == 2 || i == 4); // throw error
	}
	return 0xFFFFFFFF;
}

95
void GPUCacheOptimizer::SetIndex(unsigned int i, unsigned int val, unsigned int IndexSize, void* pIB)
96 97 98
{
	switch (IndexSize)
	{
99
	case 4: ((unsigned int*)pIB)[i] = val; break;
100 101 102 103 104 105 106
	case 2: ((unsigned short*)pIB)[i] = val; break;
	case 1: ((unsigned char*)pIB)[i] = val; break;
	default:
		assert(i == 1 || i == 2 || i == 4); // throw error
	}
}

Jan Möbius's avatar
Jan Möbius committed
107
//=============================================================================
108

109
void GPUCacheOptimizer::WriteIndexBuffer(unsigned int DstIndexSize, void* pDst)
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
{
	assert(DstIndexSize == 1 ||DstIndexSize == 2 || DstIndexSize == 4);
	// TODO: warning log, if DstIndexSize < m_IndexSize

	// support for 'in-place' operation via tmpbuf copy
	char* pSrc = (char*)m_pIndices;

	int bTmpCopy = 0;
	if (pDst == pSrc)
	{
		pSrc = new char[m_IndexSize * m_NumTris * 3];
		memcpy(pSrc, m_pIndices, m_IndexSize * m_NumTris * 3);

		bTmpCopy = 1;
	}

126
	for (unsigned int i = 0; i < m_NumTris; ++i)
127 128 129
	{
		for (int k = 0; k < 3; ++k)	
		{
130
			unsigned int TriVertex = GetIndex(m_pTriMap[i] * 3 + k, m_IndexSize, pSrc);
131 132 133 134 135 136 137 138 139

			// copy remapped tri indices
			SetIndex(i * 3 + k, TriVertex, DstIndexSize, pDst);
		}
	}

	if (bTmpCopy) delete [] pSrc;
}

Jan Möbius's avatar
Jan Möbius committed
140
//=============================================================================
141

142 143 144 145
void GPUCacheOptimizer::RemapVertices(unsigned int NumTris, unsigned int NumVerts,
									  const unsigned int* pVertMap,
									  unsigned int IndexSize, void* pInOutIndices,
									  unsigned int VertexStride, void* pInOutVertices)
146 147 148 149 150 151 152 153 154 155 156
{
	if (pVertMap && pInOutIndices && pInOutVertices && VertexStride)
	{
		// make tmp vertex buffer copy
		char* pTmpBuf = new char[VertexStride * NumVerts];
		memcpy(pTmpBuf, pInOutVertices, VertexStride * NumVerts);

		char* pVertexOut = (char*)pInOutVertices;

		// apply on vertex buffer

157
		for (unsigned int i = 0; i < NumVerts; ++i)
158 159 160 161 162 163 164 165 166 167
		{
			// some mapping destinations might be invalid
			//  this vertex is unused,  ignore then
			if (pVertMap[i] < NumVerts)
				memcpy(pVertexOut + pVertMap[i] * VertexStride,
					pTmpBuf + i * VertexStride, VertexStride);
		}

		// apply on index buffer

168
		for (unsigned int i = 0; i < NumTris * 3; ++i)
169 170 171
		{
			// IndexBuffer[i] = VertMap[IndexBuffer[i]]

172
			unsigned int v = GetIndex(i, IndexSize, pInOutIndices);
173 174 175 176 177 178 179
			SetIndex(i, pVertMap[v], IndexSize, pInOutIndices);
		}

		delete [] pTmpBuf;
	}
}

Jan Möbius's avatar
Jan Möbius committed
180
//=============================================================================
181

182 183 184
void GPUCacheOptimizer::OptimizeVertices(unsigned int NumTris, unsigned int NumVerts,
										 unsigned int IndexSize, const void* pIndices,
										 unsigned int* pVertMap)
185 186 187
{
	// straight forward algorithm
	// simply iterate over indices and increment vertex location if unvisited vertex found
188
	unsigned int uCounter = 0; // vertex counter
189

190
	memset(pVertMap, 0xFFFFFFFF, NumVerts * sizeof(unsigned int));
191

192
	for (unsigned int i = 0; i < NumTris * 3; ++i)
193
	{
194
		unsigned int vertex;
195 196 197 198 199 200 201 202 203

		if (IndexSize == 2) vertex = ((const unsigned short*)pIndices)[i];
		else vertex = ((const unsigned int*)pIndices)[i];

		if (pVertMap[vertex] == 0xFFFFFFFF)
			pVertMap[vertex] = uCounter++;
	}
}

Jan Möbius's avatar
Jan Möbius committed
204
//=============================================================================
205

206
unsigned int GPUCacheOptimizer::ComputeNumberOfVertexTransformations(unsigned int VertexCacheSize)
207 208 209
{
	if (m_NumTransformations) return m_NumTransformations;

210
	unsigned int NumIndices = 3 * m_NumTris;
211 212
	if (!NumIndices) return 0;

213 214 215
	unsigned int* Cache = new unsigned int[VertexCacheSize];
	unsigned int NumSlotsInUse = 0;
	unsigned int LastSlot = 0;
216 217
	m_NumTransformations = 0;

218
	for (unsigned int i = 0; i < m_NumTris; ++i)
219
	{
220
		unsigned int t = m_pTriMap[i];
221 222 223 224

		// for each vertex of triangle t:
		for (int k = 0; k < 3; ++k)
		{
225
			unsigned int Idx = GetIndex(t * 3 + k); // vertex index
226 227

			int bInCache = 0;
228
			for (unsigned int k = 0; k < NumSlotsInUse && !bInCache; ++k)
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
			{
				if (Cache[k] == Idx) bInCache = 1;
			}

			if (!bInCache)
			{
				++m_NumTransformations;
				if (NumSlotsInUse < VertexCacheSize)
				{
					Cache[NumSlotsInUse++] = Idx; 
					++LastSlot;
				}
				else
				{
					if (LastSlot == VertexCacheSize) LastSlot = 0;
					Cache[LastSlot++] = Idx;
				}
			}
		}
		
	}

	delete [] Cache;

	return m_NumTransformations;
}

Jan Möbius's avatar
Jan Möbius committed
256
//=============================================================================
257

258
float GPUCacheOptimizer::ComputeACMR(unsigned int VertexCacheSize)
259
{
260
	unsigned int NumT = ComputeNumberOfVertexTransformations(VertexCacheSize);
261 262 263
	return float(NumT) / float(m_NumTris);
}

Jan Möbius's avatar
Jan Möbius committed
264
//=============================================================================
265

266
float GPUCacheOptimizer::ComputeATVR(unsigned int VertexCacheSize)
267
{
268
	unsigned int NumT = ComputeNumberOfVertexTransformations(VertexCacheSize);
269 270 271
	return float(NumT) / float(m_NumVerts);
}

Jan Möbius's avatar
Jan Möbius committed
272
//=============================================================================
273 274

// forsyth's score function
275
void GPUCacheOptimizer::Opt_Vertex::FindScore(unsigned int MaxSizeVertexCache)
276
{
Jan Möbius's avatar
Jan Möbius committed
277 278


279 280 281 282 283 284 285

	float fNewScore = -1.0f; // -1 : vertex unused
	if (iNumTrisLeft > 0)
	{
		if (iCachePos < 0) fNewScore = 0.0f; // not in FIFO
		else
		{
Jan Möbius's avatar
Jan Möbius committed
286

Jan Möbius's avatar
Jan Möbius committed
287
			if (iCachePos < 3){ // last tri => fixed score
Jan Möbius's avatar
Jan Möbius committed
288

Jan Möbius's avatar
Jan Möbius committed
289
			  const float LastTriScore = 0.75f;
290
				fNewScore = LastTriScore;
Jan Möbius's avatar
Jan Möbius committed
291 292

			} else
293
			{
Jan Möbius's avatar
Jan Möbius committed
294 295
			  const float CacheDecayPower = 1.5f;

296 297 298 299 300 301 302 303 304 305 306
				// check for cache_pos < MaxSizeCachePos here..
				// Points for being high in the cache.
				const float Scaler = 1.0f / (MaxSizeVertexCache - 3);
				fNewScore = 1.0f - ( iCachePos - 3 ) * Scaler;
				fNewScore = powf( fNewScore, CacheDecayPower);
			}
		}

		// Bonus points for having a low number of tris still to
		// use the vert, so we get rid of lone verts quickly.

Jan Möbius's avatar
Jan Möbius committed
307 308 309
	  const float ValenceBoostScale = 2.0f;
	  const float ValenceBoostPower = 0.5f;

310 311 312 313 314 315 316
		float ValenceBoost = powf( float(iNumTrisLeft), -float(ValenceBoostPower));
		fNewScore += ValenceBoostScale * ValenceBoost;
	}

	fScore = fNewScore;
}

317
void GPUCacheOptimizer::Opt_Vertex::RemoveTriFromList(unsigned int tri)
318 319 320 321 322 323 324 325 326 327 328 329 330
{
	for (int k = 0; k < iNumTrisLeft; ++k)
	{
		// replace tri with last tri in this list
		if (pTris[k] == tri)
		{
			pTris[k] = pTris[iNumTrisLeft-1];
			break;
		}
	}
	--iNumTrisLeft;
}

Jan Möbius's avatar
Jan Möbius committed
331
//=============================================================================
332 333
// tipsify

334
GPUCacheOptimizerTipsify::GPUCacheOptimizerTipsify(unsigned int CacheSize, unsigned int NumTris, unsigned int NumVerts, unsigned int IndexSize, const void *pIndices)
335 336 337 338 339 340 341 342 343
: GPUCacheOptimizer(NumTris, NumVerts, IndexSize, pIndices)
{
	if (NumVerts < 3 || !NumTris) return;

	Opt_Vertex* pVerts = new Opt_Vertex[NumVerts];
	Opt_Tris* pTris = new Opt_Tris[NumTris];

	// build adjacency, same start as in forsyth class

344
	for (unsigned int i = 0; i < NumTris; ++i)
345 346 347 348 349 350 351 352 353 354 355 356 357 358
	{
		// copy vertex indices of this tri
		Opt_Tris* pThisTri = pTris + i;

		for (int k = 0; k < 3; ++k)
		{
			pThisTri->v[k] = GetIndex(i * 3 + k);
			
			// count # tris per vertex
			++pVerts[pThisTri->v[k]].iNumTrisTotal;
		}		
	}

	// create list of tris per vertex
359
	for (unsigned int i = 0; i < NumTris; ++i)
360 361 362 363 364
	{
		// add this tri to per vertex tri list
		for (int k = 0; k < 3; ++k)
		{
			Opt_Vertex* pV = pVerts + pTris[i].v[k];
365
			if (!pV->pTris) pV->pTris = new unsigned int[pV->iNumTrisTotal];
366 367 368 369 370 371 372 373 374 375 376

			// abuse <numTrisLeft> as temporal up counter 
			// (automatically sums to numTris, exactly what we want)
			pV->pTris[pV->iNumTrisLeft++] = i;

			pV->iCachePos = 0;
		}
	}

	// use the cache_pos of the OptFaces_Vertex as the time stamp

Jan Möbius's avatar
Jan Möbius committed
377
	//=============================================================================
378 379 380
	// OPTIMIZATION:
	//  push and pop on DeadEndVertexStack greatly increases processing time
	// -> replace with fixed size ring stack
381
	//	std::vector<unsigned int> DeadEndVertexStack;
382 383 384 385 386 387
	//	DeadEndVertexStack.reserve(2048);
	RingStack DeadEndVertexStack(128);


	int f = 0; // arbitrary starting index (vertex)
	int iTimeStamp = CacheSize + 1;
388
	unsigned int i = 1; // cursor
389

390
	unsigned int numTrisAdded = 0;
391

392
	std::vector<unsigned int> N; // 1-ring of next candidates
393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434
	N.reserve(2048);

	while (f >= 0)
	{
		N.clear();

		// this vertex
		Opt_Vertex* pV = pVerts + f;

		// for each adjacent tri of this vertex
		for (int m = 0; m < pV->iNumTrisTotal; ++m)
		{
			Opt_Tris* pT = pTris + pV->pTris[m];

			if (!pT->bAdded)
			{
				// append
				m_pTriMap[numTrisAdded++] = pV->pTris[m];

				for (int k = 0; k < 3; ++k)
				{
					// push to cache
					//					DeadEndVertexStack.push_back(pT->v[k]);
					DeadEndVertexStack.push(pT->v[k]);

					// insert
					N.push_back(pT->v[k]);

					pVerts[pT->v[k]].RemoveTriFromList(pV->pTris[m]);

					if (iTimeStamp - pVerts[pT->v[k]].iCachePos > (int)CacheSize)
						pVerts[pT->v[k]].iCachePos = iTimeStamp++;
				}
				pT->bAdded = 1;
			}
		}


		// select next fanning vertex
		// Get-Next-Vertex
		{
			int n = -1, p = -1; // best candidate and priority
435
			for (unsigned int k = 0; k < N.size(); ++k)
436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
			{
				// for each vertex in N
				Opt_Vertex* pV = pVerts + N[k];

				if (pV->iNumTrisLeft > 0)
				{
					// error here in pseudo code:
					//  literal p should be named m here
					//  to find the best vertex
					int m = 0;
					if (iTimeStamp - pV->iCachePos + 2 * pV->iNumTrisLeft <= (int)CacheSize)
						m = iTimeStamp - pV->iCachePos;

					if (m > p)
					{
						p = m;
						n = N[k];
					}
				}
			}

			if (n == -1)
			{
				// Skip-Dead-End
				while (DeadEndVertexStack.length() && (n == -1))
				{
462
					//					unsigned int d = DeadEndVertexStack.back();
463
					//					DeadEndVertexStack.pop_back();
464
					unsigned int d = DeadEndVertexStack.pop();
465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489

					if (pVerts[d].iNumTrisLeft > 0)
						n = d;
				}

				while (i+1 < NumVerts && (n == -1))
				{
					++i;
					if (pVerts[i].iNumTrisLeft > 0)
						n = i;
				}
			}

			f = n;
		}
	}

	// debugging purpose
	// 	int capac = N.capacity();
	// 	capac = DeadEndVertexStack.capacity();

	delete [] pVerts;
	delete [] pTris;
}

Jan Möbius's avatar
Jan Möbius committed
490
//=============================================================================
491

492 493
GPUCacheEfficiencyTester::GPUCacheEfficiencyTester(unsigned int NumTris, unsigned int NumVerts,
												   unsigned int IndexSize, const void* pIndices)
494 495
: GPUCacheOptimizer(NumTris, NumVerts, IndexSize, pIndices)
{
496
	for (unsigned int i = 0; i < NumTris; ++i) m_pTriMap[i] = i;
497 498
}

Jan Möbius's avatar
Jan Möbius committed
499
//=============================================================================
500 501


Jan Möbius's avatar
Jan Möbius committed
502
}