52 #include "GPUCacheOptimizer.hh"
65 GPUCacheOptimizer::GPUCacheOptimizer(
unsigned int NumTris,
unsigned int NumVerts,
unsigned int IndexSize,
const void* pIndices) :
68 m_IndexSize(IndexSize),
70 m_NumTransformations(0)
75 GPUCacheOptimizer::~GPUCacheOptimizer(
void)
82 unsigned int GPUCacheOptimizer::GetIndex(
unsigned int i)
const
84 assert(i < m_NumTris * 3);
86 return GetIndex(i, m_IndexSize, m_pIndices);
89 unsigned int GPUCacheOptimizer::GetIndex(
unsigned int i,
unsigned int IndexSize,
const void* pIB)
93 case 4:
return ((
const unsigned int*)pIB)[i];
break;
94 case 2:
return ((
const unsigned short*)pIB)[i];
break;
95 case 1:
return ((
const unsigned char*)pIB)[i];
break;
97 assert(i == 1 || i == 2 || i == 4);
102 void GPUCacheOptimizer::SetIndex(
unsigned int i,
unsigned int val,
unsigned int IndexSize,
void* pIB)
106 case 4: ((
unsigned int*)pIB)[i] = val;
break;
107 case 2: ((
unsigned short*)pIB)[i] = val;
break;
108 case 1: ((
unsigned char*)pIB)[i] = val;
break;
110 assert(i == 1 || i == 2 || i == 4);
118 assert(DstIndexSize == 1 ||DstIndexSize == 2 || DstIndexSize == 4);
122 char* pSrc = (
char*)m_pIndices;
127 pSrc =
new char[m_IndexSize * m_NumTris * 3];
128 memcpy(pSrc, m_pIndices, m_IndexSize * m_NumTris * 3);
133 for (
unsigned int i = 0; i < m_NumTris; ++i)
135 for (
int k = 0; k < 3; ++k)
137 unsigned int TriVertex = GetIndex(
m_pTriMap[i] * 3 + k, m_IndexSize, pSrc);
140 SetIndex(i * 3 + k, TriVertex, DstIndexSize, pDst);
144 if (bTmpCopy)
delete [] pSrc;
150 const unsigned int* pVertMap,
151 unsigned int IndexSize,
void* pInOutIndices,
152 unsigned int VertexStride,
void* pInOutVertices)
154 if (pVertMap && pInOutIndices && pInOutVertices && VertexStride)
157 char* pTmpBuf =
new char[VertexStride * NumVerts];
158 memcpy(pTmpBuf, pInOutVertices, VertexStride * NumVerts);
160 char* pVertexOut = (
char*)pInOutVertices;
164 for (
unsigned int i = 0; i < NumVerts; ++i)
168 if (pVertMap[i] < NumVerts)
169 memcpy(pVertexOut + pVertMap[i] * VertexStride,
170 pTmpBuf + i * VertexStride, VertexStride);
175 for (
unsigned int i = 0; i < NumTris * 3; ++i)
179 unsigned int v = GetIndex(i, IndexSize, pInOutIndices);
180 SetIndex(i, pVertMap[v], IndexSize, pInOutIndices);
190 unsigned int IndexSize,
const void* pIndices,
191 unsigned int* pVertMap)
195 unsigned int uCounter = 0;
197 memset(pVertMap, 0xFF, NumVerts *
sizeof(
unsigned int));
199 for (
unsigned int i = 0; i < NumTris * 3; ++i)
203 if (IndexSize == 2) vertex = ((
const unsigned short*)pIndices)[i];
204 else vertex = ((
const unsigned int*)pIndices)[i];
206 if (pVertMap[vertex] == 0xFFFFFFFF)
207 pVertMap[vertex] = uCounter++;
215 if (m_NumTransformations)
return m_NumTransformations;
217 unsigned int NumIndices = 3 * m_NumTris;
218 if (!NumIndices)
return 0;
220 unsigned int* Cache =
new unsigned int[VertexCacheSize];
221 unsigned int NumSlotsInUse = 0;
222 unsigned int LastSlot = 0;
223 m_NumTransformations = 0;
225 for (
unsigned int i = 0; i < m_NumTris; ++i)
230 for (
int k = 0; k < 3; ++k)
232 unsigned int Idx = GetIndex(t * 3 + k);
235 for (
unsigned int k = 0; k < NumSlotsInUse && !bInCache; ++k)
237 if (Cache[k] == Idx) bInCache = 1;
242 ++m_NumTransformations;
243 if (NumSlotsInUse < VertexCacheSize)
245 Cache[NumSlotsInUse++] = Idx;
250 if (LastSlot == VertexCacheSize) LastSlot = 0;
251 Cache[LastSlot++] = Idx;
260 return m_NumTransformations;
268 return float(NumT) / float(m_NumTris);
276 return float(NumT) / float(m_NumVerts);
287 float fNewScore = -1.0f;
290 if (iCachePos < 0) fNewScore = 0.0f;
296 const float LastTriScore = 0.75f;
297 fNewScore = LastTriScore;
301 const float CacheDecayPower = 1.5f;
305 const float Scaler = 1.0f / (MaxSizeVertexCache - 3);
306 fNewScore = 1.0f - ( iCachePos - 3 ) * Scaler;
307 fNewScore = powf( fNewScore, CacheDecayPower);
314 const float ValenceBoostScale = 2.0f;
315 const float ValenceBoostPower = 0.5f;
317 float ValenceBoost = powf(
float(
iNumTrisLeft), -
float(ValenceBoostPower));
318 fNewScore += ValenceBoostScale * ValenceBoost;
324 void GPUCacheOptimizer::Opt_Vertex::RemoveTriFromList(
unsigned int tri)
326 for (
int k = 0; k < iNumTrisLeft; ++k)
331 pTris[k] = pTris[iNumTrisLeft-1];
347 if (NumVerts < 3 || !NumTris)
return;
353 memset(pVerts, 0,
sizeof(
Opt_Vertex) * NumVerts);
356 for (
unsigned int i = 0; i < NumTris; ++i)
362 for (
unsigned int k = 0; k < 3; ++k)
365 const unsigned int idx = GetIndex(i * 3 + k);
366 pThisTri->
v[k] = idx;
374 const unsigned int vertexTriAdjBufSize = NumTris * 3;
375 unsigned int vertexTriAdjBufOffset = 0;
376 unsigned int* vertexTriAdjBuf =
new unsigned int[vertexTriAdjBufSize];
379 for (
unsigned int i = 0; i < NumTris; ++i)
382 for (
int k = 0; k < 3; ++k)
386 pV->pTris = vertexTriAdjBuf + vertexTriAdjBufOffset;
410 int iTimeStamp = CacheSize + 1;
413 unsigned int numTrisAdded = 0;
415 std::vector<unsigned int> N;
428 Opt_Tris* pT = pTris + pV->pTris[m];
433 m_pTriMap[numTrisAdded++] = pV->pTris[m];
435 for (
int k = 0; k < 3; ++k)
438 const unsigned int v = pT->
v[k];
440 DeadEndVertexStack.push(v);
449 if (iTimeStamp - adjV->iCachePos > (
int)CacheSize)
450 adjV->iCachePos = iTimeStamp++;
461 for (
unsigned int k = 0; k < N.size(); ++k)
472 if (iTimeStamp - pV->iCachePos + 2 * pV->
iNumTrisLeft <= (
int)CacheSize)
473 m = iTimeStamp - pV->iCachePos;
486 while (DeadEndVertexStack.
length() && (n == -1))
490 const unsigned int d = DeadEndVertexStack.pop();
492 if (pVerts[d].iNumTrisLeft > 0)
496 while (i+1 < NumVerts && (n == -1))
499 if (pVerts[i].iNumTrisLeft > 0)
512 delete [] vertexTriAdjBuf;
520 GPUCacheEfficiencyTester::GPUCacheEfficiencyTester(
unsigned int NumTris,
unsigned int NumVerts,
521 unsigned int IndexSize,
const void* pIndices)
524 for (
unsigned int i = 0; i < NumTris; ++i)
m_pTriMap[i] = i;
Namespace providing different geometric functions concerning angles.
void FindScore(unsigned int MaxSizeVertexCache)
forsyth's score function
int iNumTrisLeft
tris left to add to final result
int iNumTrisTotal
tris using this vertex
void WriteIndexBuffer(unsigned int DstIndexSize, void *pDst)
Applies the remapping on the initial pIndices constructor's param and stores the result in the given ...
unsigned int v[3]
vertices of this triangle
static void OptimizeVertices(unsigned int NumTris, unsigned int NumVerts, unsigned int IndexSize, const void *pIndices, unsigned int *pVertMap)
Reorders vertex buffer to minimize memory address jumps.
GPUCacheOptimizerTipsify(unsigned int CacheSize, unsigned int NumTris, unsigned int NumVerts, unsigned int IndexSize, const void *pIndices)
The actual computation happens here in this constructor.
float ComputeATVR(unsigned int VertexCacheSize=16)
Measures the efficiency use of the vertex cache. ATVR: Average Transform to Vertex Ratio similar to A...
Simple and fast fixed size stack used in tipsify implementation.
static void RemapVertices(unsigned int NumTris, unsigned int NumVerts, const unsigned int *pVertMap, unsigned int IndexSize, void *pInOutIndices, unsigned int VertexStride, void *pInOutVertices)
Applies the remap table of OptimizeVertices to a vertex and index buffer.
float ComputeACMR(unsigned int VertexCacheSize=16)
Measures the efficiency use of the vertex cache. ACMR: Average Cache Miss Ratio.
unsigned int ComputeNumberOfVertexTransformations(unsigned int VertexCacheSize=16)
Returns the total number of vertex transforms performed with a certain VertexCache.
unsigned int length() const
current stack length