Fixes and optimizations.
This commit is contained in:
parent
149a50a26f
commit
78fa1e785d
@ -93,6 +93,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvthread", "nvthread\nvthre
|
|||||||
EndProject
|
EndProject
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cubemaptest", "cubemaptest\cubemaptest.vcproj", "{CFB3FEAC-5720-4B16-9D7E-039DB180B641}"
|
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cubemaptest", "cubemaptest\cubemaptest.vcproj", "{CFB3FEAC-5720-4B16-9D7E-039DB180B641}"
|
||||||
ProjectSection(ProjectDependencies) = postProject
|
ProjectSection(ProjectDependencies) = postProject
|
||||||
|
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
|
||||||
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
|
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
|
||||||
EndProjectSection
|
EndProjectSection
|
||||||
EndProject
|
EndProject
|
||||||
|
@ -7,8 +7,6 @@
|
|||||||
#include "nvcore.h"
|
#include "nvcore.h"
|
||||||
#include "Debug.h" // nvDebugCheck
|
#include "Debug.h" // nvDebugCheck
|
||||||
|
|
||||||
#include <stddef.h>
|
|
||||||
|
|
||||||
// Just in case. Grrr.
|
// Just in case. Grrr.
|
||||||
#undef min
|
#undef min
|
||||||
#undef max
|
#undef max
|
||||||
|
@ -221,7 +221,7 @@ namespace nv
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Get scanline pointer.
|
/// Get scanline pointer.
|
||||||
inline float * FloatImage::scanline(uint z, uint y, uint c)
|
inline float * FloatImage::scanline(uint c, uint y, uint z)
|
||||||
{
|
{
|
||||||
nvDebugCheck(y < m_height);
|
nvDebugCheck(y < m_height);
|
||||||
return plane(c, z) + y * m_width;
|
return plane(c, z) + y * m_width;
|
||||||
|
@ -333,7 +333,7 @@ const Vector3 & VectorTable::lookup(uint f, uint x, uint y) const {
|
|||||||
// - parallelize.
|
// - parallelize.
|
||||||
// - use ISPC?
|
// - use ISPC?
|
||||||
|
|
||||||
static Vector3 faceNormals[6] = {
|
static const Vector3 faceNormals[6] = {
|
||||||
Vector3(1, 0, 0),
|
Vector3(1, 0, 0),
|
||||||
Vector3(-1, 0, 0),
|
Vector3(-1, 0, 0),
|
||||||
Vector3(0, 1, 0),
|
Vector3(0, 1, 0),
|
||||||
@ -342,11 +342,31 @@ static Vector3 faceNormals[6] = {
|
|||||||
Vector3(0, 0, -1),
|
Vector3(0, 0, -1),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const Vector3 faceU[6] = {
|
||||||
|
Vector3(0, 0, -1),
|
||||||
|
Vector3(0, 0, 1),
|
||||||
|
Vector3(1, 0, 0),
|
||||||
|
Vector3(1, 0, 0),
|
||||||
|
Vector3(1, 0, 0),
|
||||||
|
Vector3(-1, 0, 0),
|
||||||
|
};
|
||||||
|
|
||||||
|
static const Vector3 faceV[6] = {
|
||||||
|
Vector3(0, -1, 0),
|
||||||
|
Vector3(0, -1, 0),
|
||||||
|
Vector3(0, 0, 1),
|
||||||
|
Vector3(0, 0, -1),
|
||||||
|
Vector3(0, -1, 0),
|
||||||
|
Vector3(0, -1, 0),
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Convolve filter against this cube.
|
// Convolve filter against this cube.
|
||||||
Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir, float cosineConeAngle, float cosinePower)
|
Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir, float coneAngle, float cosinePower)
|
||||||
{
|
{
|
||||||
const float coneAngle = acos(cosineConeAngle);
|
const float cosineConeAngle = cos(coneAngle);
|
||||||
|
nvDebugCheck(cosineConeAngle >= 0);
|
||||||
|
|
||||||
Vector3 color(0);
|
Vector3 color(0);
|
||||||
float sum = 0;
|
float sum = 0;
|
||||||
@ -356,25 +376,74 @@ Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir,
|
|||||||
|
|
||||||
// Test face cone agains filter cone.
|
// Test face cone agains filter cone.
|
||||||
float cosineFaceAngle = dot(filterDir, faceNormals[f]);
|
float cosineFaceAngle = dot(filterDir, faceNormals[f]);
|
||||||
|
float faceAngle = acosf(cosineFaceAngle);
|
||||||
|
|
||||||
if (cosineFaceAngle > cos(coneAngle + atan(sqrt(2)))) { // @@ Simplify this with cos(a+b) = cos(a)cos(b) - sin(a)sin(b) formula?
|
/*if (faceAngle > coneAngle + atanf(sqrtf(2))) {
|
||||||
// Skip face.
|
// Skip face.
|
||||||
continue;
|
continue;
|
||||||
}
|
}*/
|
||||||
|
|
||||||
// @@ We could do a less conservative test and test the face frustum against the cone...
|
// @@ We could do a less conservative test and test the face frustum against the cone...
|
||||||
|
|
||||||
// @@ Compute bounding box of cone intersection against face.
|
// Compute bounding box of cone intersection against face.
|
||||||
// The intersection of the cone with the face is an elipse, we want the extents of that elipse.
|
// The intersection of the cone with the face is an elipse, we want the extents of that elipse.
|
||||||
// Hmm... we could even rasterize an elipse! Sounds like FUN!
|
// @@ Hmm... we could even rasterize an elipse! Sounds like FUN!
|
||||||
uint x0 = 0, x1 = edgeLength-1;
|
|
||||||
uint y0 = 0, y1 = edgeLength-1;
|
const int L = toI32(edgeLength-1);
|
||||||
|
int x0 = 0, x1 = L;
|
||||||
|
int y0 = 0, y1 = L;
|
||||||
|
|
||||||
|
// @@ Ugh. This is wrong, or only right when filterDir is aligned to one axis.
|
||||||
|
if (false) {
|
||||||
|
// uv coordinates corresponding to filterDir.
|
||||||
|
//float u = dot(filterDir, faceU[f]) / cosineFaceAngle;
|
||||||
|
//float v = dot(filterDir, faceV[f]) / cosineFaceAngle;
|
||||||
|
|
||||||
|
// Angular coordinates corresponding to filterDir with respect to faceNormal.
|
||||||
|
float atu = atan2(dot(filterDir, faceU[f]), cosineFaceAngle);
|
||||||
|
float atv = atan2(dot(filterDir, faceV[f]), cosineFaceAngle);
|
||||||
|
|
||||||
|
// Expand angles and project back to the face plane.
|
||||||
|
float u0 = tan(clamp(atu - coneAngle, -PI/4, PI/4));
|
||||||
|
float v0 = tan(clamp(atv - coneAngle, -PI/4, PI/4));
|
||||||
|
float u1 = tan(clamp(atu + coneAngle, -PI/4, PI/4));
|
||||||
|
float v1 = tan(clamp(atv + coneAngle, -PI/4, PI/4));
|
||||||
|
nvDebugCheck(u0 >= -1 && u0 <= 1);
|
||||||
|
nvDebugCheck(v0 >= -1 && v0 <= 1);
|
||||||
|
nvDebugCheck(u1 >= -1 && u1 <= 1);
|
||||||
|
nvDebugCheck(v1 >= -1 && v1 <= 1);
|
||||||
|
|
||||||
|
// Expand uv coordinates from [-1,1] to [0, edgeLength)
|
||||||
|
u0 = (u0 + 1) * edgeLength * 0.5f - 0.5f;
|
||||||
|
v0 = (v0 + 1) * edgeLength * 0.5f - 0.5f;
|
||||||
|
u1 = (u1 + 1) * edgeLength * 0.5f - 0.5f;
|
||||||
|
v1 = (v1 + 1) * edgeLength * 0.5f - 0.5f;
|
||||||
|
nvDebugCheck(u0 >= -0.5f && u0 <= edgeLength - 0.5f);
|
||||||
|
nvDebugCheck(v0 >= -0.5f && v0 <= edgeLength - 0.5f);
|
||||||
|
nvDebugCheck(u1 >= -0.5f && u1 <= edgeLength - 0.5f);
|
||||||
|
nvDebugCheck(v1 >= -0.5f && v1 <= edgeLength - 0.5f);
|
||||||
|
|
||||||
|
x0 = clamp(ifloor(u0), 0, L);
|
||||||
|
y0 = clamp(ifloor(v0), 0, L);
|
||||||
|
x1 = clamp(iceil(u1), 0, L);
|
||||||
|
y1 = clamp(iceil(v1), 0, L);
|
||||||
|
|
||||||
|
nvDebugCheck(x1 >= x0);
|
||||||
|
nvDebugCheck(y1 >= y0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (x1 == x0 || y1 == y0) {
|
||||||
|
// Skip this face.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
const Surface & inputFace = face[f];
|
const Surface & inputFace = face[f];
|
||||||
const FloatImage * inputImage = inputFace.m->image;
|
const FloatImage * inputImage = inputFace.m->image;
|
||||||
|
|
||||||
for (uint y = y0; y <= y1; y++) {
|
for (int y = y0; y <= y1; y++) {
|
||||||
for (uint x = x0; x <= x1; x++) {
|
bool inside = false;
|
||||||
|
for (int x = x0; x <= x1; x++) {
|
||||||
|
|
||||||
Vector3 dir = vectorTable->lookup(f, x, y);
|
Vector3 dir = vectorTable->lookup(f, x, y);
|
||||||
float cosineAngle = dot(dir, filterDir);
|
float cosineAngle = dot(dir, filterDir);
|
||||||
@ -388,6 +457,13 @@ Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir,
|
|||||||
color.x += contribution * inputImage->pixel(0, x, y, 0);
|
color.x += contribution * inputImage->pixel(0, x, y, 0);
|
||||||
color.y += contribution * inputImage->pixel(1, x, y, 0);
|
color.y += contribution * inputImage->pixel(1, x, y, 0);
|
||||||
color.z += contribution * inputImage->pixel(2, x, y, 0);
|
color.z += contribution * inputImage->pixel(2, x, y, 0);
|
||||||
|
|
||||||
|
inside = true;
|
||||||
|
}
|
||||||
|
else if (inside) {
|
||||||
|
// Filter scale is monotonic, if we have been inside once and we just exit, then we can skip the rest of the row.
|
||||||
|
// We could do the same thing for the columns and skip entire rows.
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -398,6 +474,39 @@ Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir,
|
|||||||
return color;
|
return color;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#include "nvthread/ParallelFor.h"
|
||||||
|
|
||||||
|
struct ApplyCosinePowerFilterContext {
|
||||||
|
CubeSurface::Private * inputCube;
|
||||||
|
CubeSurface::Private * filteredCube;
|
||||||
|
float coneAngle;
|
||||||
|
float cosinePower;
|
||||||
|
};
|
||||||
|
|
||||||
|
void ApplyCosinePowerFilterTask(void * context, int id)
|
||||||
|
{
|
||||||
|
ApplyCosinePowerFilterContext * ctx = (ApplyCosinePowerFilterContext *)context;
|
||||||
|
|
||||||
|
int size = ctx->filteredCube->edgeLength;
|
||||||
|
|
||||||
|
int f = id / (size * size);
|
||||||
|
int idx = id % (size * size);
|
||||||
|
int y = idx / size;
|
||||||
|
int x = idx % size;
|
||||||
|
|
||||||
|
nvtt::Surface & filteredFace = ctx->filteredCube->face[f];
|
||||||
|
FloatImage * filteredImage = filteredFace.m->image;
|
||||||
|
|
||||||
|
const Vector3 filterDir = texelDirection(f, x, y, 1.0f / size);
|
||||||
|
|
||||||
|
// Convolve filter against cube.
|
||||||
|
Vector3 color = ctx->inputCube->applyCosinePowerFilter(filterDir, ctx->coneAngle, ctx->cosinePower);
|
||||||
|
|
||||||
|
filteredImage->pixel(0, idx) = color.x;
|
||||||
|
filteredImage->pixel(1, idx) = color.y;
|
||||||
|
filteredImage->pixel(2, idx) = color.z;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower) const
|
CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower) const
|
||||||
{
|
{
|
||||||
@ -415,12 +524,47 @@ CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower) const
|
|||||||
m->vectorTable = new VectorTable(edgeLength);
|
m->vectorTable = new VectorTable(edgeLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
const float threshold = 0.0001f;
|
const float threshold = 0.001f;
|
||||||
const float cosineConeAngle = pow(threshold, 1/cosinePower);
|
const float coneAngle = acosf(powf(threshold, 1.0f/cosinePower));
|
||||||
//const float coneAngle = acos(cosineConeAngle);
|
|
||||||
|
|
||||||
|
|
||||||
#if 0
|
#if 1
|
||||||
|
// Gather approach. This should be easier to parallelize, because there's no contention in the filtered output.
|
||||||
|
|
||||||
|
// For each texel of the output cube.
|
||||||
|
// - Determine what texels of the input cube contribute to it.
|
||||||
|
// - Add weighted contributions. Normalize.
|
||||||
|
|
||||||
|
// For each texel of the output cube.
|
||||||
|
/*for (uint f = 0; f < 6; f++) {
|
||||||
|
nvtt::Surface filteredFace = filteredCube.m->face[f];
|
||||||
|
FloatImage * filteredImage = filteredFace.m->image;
|
||||||
|
|
||||||
|
for (uint y = 0; y < uint(size); y++) {
|
||||||
|
for (uint x = 0; x < uint(size); x++) {
|
||||||
|
|
||||||
|
const Vector3 filterDir = texelDirection(f, x, y, 1.0f / size);
|
||||||
|
|
||||||
|
// Convolve filter against cube.
|
||||||
|
Vector3 color = m->applyCosinePowerFilter(filterDir, coneAngle, cosinePower);
|
||||||
|
|
||||||
|
filteredImage->pixel(0, x, y, 0) = color.x;
|
||||||
|
filteredImage->pixel(1, x, y, 0) = color.y;
|
||||||
|
filteredImage->pixel(2, x, y, 0) = color.z;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}*/
|
||||||
|
|
||||||
|
ApplyCosinePowerFilterContext context;
|
||||||
|
context.inputCube = m;
|
||||||
|
context.filteredCube = filteredCube.m;
|
||||||
|
context.coneAngle = coneAngle;
|
||||||
|
context.cosinePower = cosinePower;
|
||||||
|
|
||||||
|
nv::ParallelFor parallelFor(ApplyCosinePowerFilterTask, &context);
|
||||||
|
parallelFor.run(6 * size * size);
|
||||||
|
|
||||||
|
#else
|
||||||
// Scatter approach.
|
// Scatter approach.
|
||||||
|
|
||||||
// For each texel of the input cube.
|
// For each texel of the input cube.
|
||||||
@ -480,54 +624,6 @@ CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower) const
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
// Gather approach. This should be easier to parallelize, because there's no contention in the filtered output.
|
|
||||||
|
|
||||||
// For each texel of the output cube.
|
|
||||||
// - Determine what texels of the input cube contribute to it.
|
|
||||||
// - Add weighted contributions. Normalize.
|
|
||||||
|
|
||||||
// For each texel of the output cube. @@ Parallelize this loop.
|
|
||||||
for (uint f = 0; f < 6; f++) {
|
|
||||||
nvtt::Surface filteredFace = filteredCube.m->face[f];
|
|
||||||
FloatImage * filteredImage = filteredFace.m->image;
|
|
||||||
|
|
||||||
for (uint y = 0; y < uint(size); y++) {
|
|
||||||
for (uint x = 0; x < uint(size); x++) {
|
|
||||||
|
|
||||||
const Vector3 filterDir = texelDirection(f, x, y, 1.0f / size);
|
|
||||||
|
|
||||||
// Convolve filter against cube.
|
|
||||||
Vector3 color = m->applyCosinePowerFilter(filterDir, cosineConeAngle, cosinePower);
|
|
||||||
|
|
||||||
filteredImage->pixel(0, x, y, 0) = color.x;
|
|
||||||
filteredImage->pixel(1, x, y, 0) = color.y;
|
|
||||||
filteredImage->pixel(2, x, y, 0) = color.z;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*int jobCount = 6 * size * size;
|
|
||||||
for (int i = 0; i < jobCount; i++) {
|
|
||||||
int f = i / (size * size);
|
|
||||||
int idx = i % (size * size);
|
|
||||||
int y = idx / size;
|
|
||||||
int x = idx % size;
|
|
||||||
|
|
||||||
nvtt::Surface filteredFace = filteredCube.m->face[f];
|
|
||||||
FloatImage * filteredImage = filteredFace.m->image;
|
|
||||||
|
|
||||||
const Vector3 filterDir = texelDirection(f, x, y, 1.0f / size);
|
|
||||||
|
|
||||||
// Convolve filter against cube.
|
|
||||||
Vector3 color = m->applyCosinePowerFilter(filterDir, coneAngle, cosinePower);
|
|
||||||
|
|
||||||
filteredImage->pixel(0, idx) = color.x;
|
|
||||||
filteredImage->pixel(1, idx) = color.y;
|
|
||||||
filteredImage->pixel(2, idx) = color.z;
|
|
||||||
}*/
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return filteredCube;
|
return filteredCube;
|
||||||
|
@ -21,8 +21,12 @@
|
|||||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
// OTHER DEALINGS IN THE SOFTWARE.
|
// OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
#include <nvcore/StrLib.h>
|
//#include <nvcore/StrLib.h>
|
||||||
|
#include <nvcore/Timer.h>
|
||||||
#include <nvtt/nvtt.h>
|
#include <nvtt/nvtt.h>
|
||||||
|
#include <nvmath/nvmath.h>
|
||||||
|
|
||||||
|
#include "../tools/cmdline.h"
|
||||||
|
|
||||||
#include <stdlib.h> // EXIT_SUCCESS, EXIT_FAILURE
|
#include <stdlib.h> // EXIT_SUCCESS, EXIT_FAILURE
|
||||||
#include <stdio.h> // printf
|
#include <stdio.h> // printf
|
||||||
@ -30,6 +34,9 @@
|
|||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
|
MyAssertHandler assertHandler;
|
||||||
|
MyMessageHandler messageHandler;
|
||||||
|
|
||||||
// Init context.
|
// Init context.
|
||||||
nvtt::Context context;
|
nvtt::Context context;
|
||||||
|
|
||||||
@ -52,26 +59,43 @@ int main(int argc, char *argv[])
|
|||||||
// Setup output options.
|
// Setup output options.
|
||||||
nvtt::OutputOptions outputOptions;
|
nvtt::OutputOptions outputOptions;
|
||||||
outputOptions.setFileName("filtered_envmap.dds");
|
outputOptions.setFileName("filtered_envmap.dds");
|
||||||
|
outputOptions.setSrgbFlag(true);
|
||||||
|
|
||||||
|
|
||||||
|
const int MAX_MIPMAP_COUNT = 7; // nv::log2(64) + 1;
|
||||||
|
//const int mipmapCount = MAX_MIPMAP_COUNT;
|
||||||
|
const int mipmapCount = 4;
|
||||||
|
//const int mipmapCount = 1;
|
||||||
|
|
||||||
// Output header.
|
// Output header.
|
||||||
context.outputHeader(nvtt::TextureType_Cube, 64, 64, 1, 4, false, compressionOptions, outputOptions);
|
context.outputHeader(nvtt::TextureType_Cube, 64, 64, 1, mipmapCount, false, compressionOptions, outputOptions);
|
||||||
|
|
||||||
|
nv::Timer timer;
|
||||||
|
timer.start();
|
||||||
|
|
||||||
|
nvtt::CubeSurface filteredEnvmap[mipmapCount];
|
||||||
|
|
||||||
// Output filtered mipmaps.
|
// Output filtered mipmaps.
|
||||||
for (int m = 0; m < 4; m++) {
|
for (int m = 0; m < mipmapCount; m++) {
|
||||||
int size = 64 / (1 << m); // 64, 32, 16, 8
|
int size = 64 / (1 << m); // 64, 32, 16, 8
|
||||||
float cosine_power = float(64) / (1 << (2 * m)); // 64, 16, 4, 1
|
float cosine_power = float(64) / (1 << (2 * m)); // 64, 16, 4, 1
|
||||||
|
cosine_power = nv::max(1.0f, cosine_power);
|
||||||
|
|
||||||
printf("filtering step: %d/4.\n", m+1);
|
printf("filtering step: %d/%d\n", m+1, mipmapCount);
|
||||||
|
|
||||||
nvtt::CubeSurface filteredEnvmap = envmap.cosinePowerFilter(size, cosine_power);
|
filteredEnvmap[m] = envmap.cosinePowerFilter(size, cosine_power);
|
||||||
|
filteredEnvmap[m].toGamma(2.2f);
|
||||||
filteredEnvmap.toGamma(2.2f);
|
|
||||||
|
|
||||||
context.compress(filteredEnvmap, m, compressionOptions, outputOptions);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("done.\n");
|
for (int f = 0; f < 6; f++) {
|
||||||
|
for (int m = 0; m < mipmapCount; m++) {
|
||||||
|
context.compress(filteredEnvmap[m].face(f), f, m, compressionOptions, outputOptions);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
timer.stop();
|
||||||
|
|
||||||
|
printf("done in %f seconds\n", timer.elapsed());
|
||||||
|
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user