43 #include "GPUCacheOptimizer.hh" 56 GPUCacheOptimizer::GPUCacheOptimizer(
unsigned int NumTris,
unsigned int NumVerts,
unsigned int IndexSize,
const void* pIndices) :
59 m_IndexSize(IndexSize),
61 m_NumTransformations(0)
66 GPUCacheOptimizer::~GPUCacheOptimizer(
void)
73 unsigned int GPUCacheOptimizer::GetIndex(
unsigned int i)
const 75 assert(i < m_NumTris * 3);
77 return GetIndex(i, m_IndexSize, m_pIndices);
80 unsigned int GPUCacheOptimizer::GetIndex(
unsigned int i,
unsigned int IndexSize,
const void* pIB)
84 case 4:
return ((
const unsigned int*)pIB)[i];
break;
85 case 2:
return ((
const unsigned short*)pIB)[i];
break;
86 case 1:
return ((
const unsigned char*)pIB)[i];
break;
88 assert(i == 1 || i == 2 || i == 4);
93 void GPUCacheOptimizer::SetIndex(
unsigned int i,
unsigned int val,
unsigned int IndexSize,
void* pIB)
97 case 4: ((
unsigned int*)pIB)[i] = val;
break;
98 case 2: ((
unsigned short*)pIB)[i] = val;
break;
99 case 1: ((
unsigned char*)pIB)[i] = val;
break;
101 assert(i == 1 || i == 2 || i == 4);
109 assert(DstIndexSize == 1 ||DstIndexSize == 2 || DstIndexSize == 4);
113 char* pSrc = (
char*)m_pIndices;
118 pSrc =
new char[m_IndexSize * m_NumTris * 3];
119 memcpy(pSrc, m_pIndices, m_IndexSize * m_NumTris * 3);
124 for (
unsigned int i = 0; i < m_NumTris; ++i)
126 for (
int k = 0; k < 3; ++k)
128 unsigned int TriVertex = GetIndex(
m_pTriMap[i] * 3 + k, m_IndexSize, pSrc);
131 SetIndex(i * 3 + k, TriVertex, DstIndexSize, pDst);
135 if (bTmpCopy)
delete [] pSrc;
141 const unsigned int* pVertMap,
142 unsigned int IndexSize,
void* pInOutIndices,
143 unsigned int VertexStride,
void* pInOutVertices)
145 if (pVertMap && pInOutIndices && pInOutVertices && VertexStride)
148 char* pTmpBuf =
new char[VertexStride * NumVerts];
149 memcpy(pTmpBuf, pInOutVertices, VertexStride * NumVerts);
151 char* pVertexOut = (
char*)pInOutVertices;
155 for (
unsigned int i = 0; i < NumVerts; ++i)
159 if (pVertMap[i] < NumVerts)
160 memcpy(pVertexOut + pVertMap[i] * VertexStride,
161 pTmpBuf + i * VertexStride, VertexStride);
166 for (
unsigned int i = 0; i < NumTris * 3; ++i)
170 unsigned int v = GetIndex(i, IndexSize, pInOutIndices);
171 SetIndex(i, pVertMap[v], IndexSize, pInOutIndices);
181 unsigned int IndexSize,
const void* pIndices,
182 unsigned int* pVertMap)
186 unsigned int uCounter = 0;
188 memset(pVertMap, 0xFF, NumVerts *
sizeof(
unsigned int));
190 for (
unsigned int i = 0; i < NumTris * 3; ++i)
194 if (IndexSize == 2) vertex = ((
const unsigned short*)pIndices)[i];
195 else vertex = ((
const unsigned int*)pIndices)[i];
197 if (pVertMap[vertex] == 0xFFFFFFFF)
198 pVertMap[vertex] = uCounter++;
206 if (m_NumTransformations)
return m_NumTransformations;
208 unsigned int NumIndices = 3 * m_NumTris;
209 if (!NumIndices)
return 0;
211 unsigned int* Cache =
new unsigned int[VertexCacheSize];
212 unsigned int NumSlotsInUse = 0;
213 unsigned int LastSlot = 0;
214 m_NumTransformations = 0;
216 for (
unsigned int i = 0; i < m_NumTris; ++i)
221 for (
int k = 0; k < 3; ++k)
223 unsigned int Idx = GetIndex(t * 3 + k);
226 for (
unsigned int k = 0; k < NumSlotsInUse && !bInCache; ++k)
228 if (Cache[k] == Idx) bInCache = 1;
233 ++m_NumTransformations;
234 if (NumSlotsInUse < VertexCacheSize)
236 Cache[NumSlotsInUse++] = Idx;
241 if (LastSlot == VertexCacheSize) LastSlot = 0;
242 Cache[LastSlot++] = Idx;
251 return m_NumTransformations;
259 return float(NumT) / float(m_NumTris);
267 return float(NumT) / float(m_NumVerts);
278 float fNewScore = -1.0f;
279 if (iNumTrisLeft > 0)
281 if (iCachePos < 0) fNewScore = 0.0f;
287 const float LastTriScore = 0.75f;
288 fNewScore = LastTriScore;
292 const float CacheDecayPower = 1.5f;
296 const float Scaler = 1.0f / (MaxSizeVertexCache - 3);
297 fNewScore = 1.0f - ( iCachePos - 3 ) * Scaler;
298 fNewScore = powf( fNewScore, CacheDecayPower);
305 const float ValenceBoostScale = 2.0f;
306 const float ValenceBoostPower = 0.5f;
308 float ValenceBoost = powf(
float(iNumTrisLeft), -
float(ValenceBoostPower));
309 fNewScore += ValenceBoostScale * ValenceBoost;
315 void GPUCacheOptimizer::Opt_Vertex::RemoveTriFromList(
unsigned int tri)
317 for (
int k = 0; k < iNumTrisLeft; ++k)
322 pTris[k] = pTris[iNumTrisLeft-1];
338 if (NumVerts < 3 || !NumTris)
return;
344 memset(pVerts, 0,
sizeof(
Opt_Vertex) * NumVerts);
347 for (
unsigned int i = 0; i < NumTris; ++i)
353 for (
unsigned int k = 0; k < 3; ++k)
356 const unsigned int idx = GetIndex(i * 3 + k);
357 pThisTri->
v[k] = idx;
365 const unsigned int vertexTriAdjBufSize = NumTris * 3;
366 unsigned int vertexTriAdjBufOffset = 0;
367 unsigned int* vertexTriAdjBuf =
new unsigned int[vertexTriAdjBufSize];
370 for (
unsigned int i = 0; i < NumTris; ++i)
373 for (
int k = 0; k < 3; ++k)
377 pV->pTris = vertexTriAdjBuf + vertexTriAdjBufOffset;
401 int iTimeStamp = CacheSize + 1;
404 unsigned int numTrisAdded = 0;
406 std::vector<unsigned int> N;
419 Opt_Tris* pT = pTris + pV->pTris[m];
424 m_pTriMap[numTrisAdded++] = pV->pTris[m];
426 for (
int k = 0; k < 3; ++k)
429 const unsigned int v = pT->
v[k];
431 DeadEndVertexStack.push(v);
440 if (iTimeStamp - adjV->iCachePos > (
int)CacheSize)
441 adjV->iCachePos = iTimeStamp++;
452 for (
unsigned int k = 0; k < N.size(); ++k)
463 if (iTimeStamp - pV->iCachePos + 2 * pV->
iNumTrisLeft <= (
int)CacheSize)
464 m = iTimeStamp - pV->iCachePos;
477 while (DeadEndVertexStack.
length() && (n == -1))
481 const unsigned int d = DeadEndVertexStack.pop();
483 if (pVerts[d].iNumTrisLeft > 0)
487 while (i+1 < NumVerts && (n == -1))
490 if (pVerts[i].iNumTrisLeft > 0)
503 delete [] vertexTriAdjBuf;
511 GPUCacheEfficiencyTester::GPUCacheEfficiencyTester(
unsigned int NumTris,
unsigned int NumVerts,
512 unsigned int IndexSize,
const void* pIndices)
515 for (
unsigned int i = 0; i < NumTris; ++i)
m_pTriMap[i] = i;
void FindScore(unsigned int MaxSizeVertexCache)
forsyth's score function
int iNumTrisTotal
tris using this vertex
Namespace providing different geometric functions concerning angles.
static void OptimizeVertices(unsigned int NumTris, unsigned int NumVerts, unsigned int IndexSize, const void *pIndices, unsigned int *pVertMap)
Reorders vertex buffer to minimize memory address jumps.
static void RemapVertices(unsigned int NumTris, unsigned int NumVerts, const unsigned int *pVertMap, unsigned int IndexSize, void *pInOutIndices, unsigned int VertexStride, void *pInOutVertices)
Applies the remap table of OptimizeVertices to a vertex and index buffer.
GPUCacheOptimizerTipsify(unsigned int CacheSize, unsigned int NumTris, unsigned int NumVerts, unsigned int IndexSize, const void *pIndices)
The actual computation happens here in this constructor.
float ComputeATVR(unsigned int VertexCacheSize=16)
Measures the efficiency use of the vertex cache. ATVR: Average Transform to Vertex Ratio similar to A...
void WriteIndexBuffer(unsigned int DstIndexSize, void *pDst)
Applies the remapping on the initial pIndices constructor's param and stores the result in the given ...
unsigned int v[3]
vertices of this triangle
int iNumTrisLeft
tris left to add to final result
unsigned int length() const
current stack length
unsigned int ComputeNumberOfVertexTransformations(unsigned int VertexCacheSize=16)
Returns the total number of vertex transforms performed with a certain VertexCache.
float ComputeACMR(unsigned int VertexCacheSize=16)
Measures the efficiency use of the vertex cache. ACMR: Average Cache Miss Ratio.
Simple and fast fixed size stack used in tipsify implementation.