diff --git a/project/vc8/nvassemble/nvassemble.rc b/project/vc8/nvassemble/nvassemble.rc new file mode 100644 index 0000000..842ded1 Binary files /dev/null and b/project/vc8/nvassemble/nvassemble.rc differ diff --git a/project/vc8/nvassemble/nvassemble.vcproj b/project/vc8/nvassemble/nvassemble.vcproj new file mode 100644 index 0000000..b953801 --- /dev/null +++ b/project/vc8/nvassemble/nvassemble.vcproj @@ -0,0 +1,353 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/project/vc8/nvassemble/nvidia.ico b/project/vc8/nvassemble/nvidia.ico new file mode 100644 index 0000000..6aa721e Binary files /dev/null and b/project/vc8/nvassemble/nvidia.ico differ diff --git a/project/vc8/nvassemble/resource.h b/project/vc8/nvassemble/resource.h new file mode 100644 index 0000000..e765787 --- /dev/null +++ b/project/vc8/nvassemble/resource.h @@ -0,0 +1,16 @@ +//{{NO_DEPENDENCIES}} +// Microsoft Visual C++ generated include file. +// Used by nvcompress.rc +// +#define IDI_ICON1 101 + +// Next default values for new objects +// +#ifdef APSTUDIO_INVOKED +#ifndef APSTUDIO_READONLY_SYMBOLS +#define _APS_NEXT_RESOURCE_VALUE 102 +#define _APS_NEXT_COMMAND_VALUE 40001 +#define _APS_NEXT_CONTROL_VALUE 1000 +#define _APS_NEXT_SYMED_VALUE 101 +#endif +#endif diff --git a/project/vc8/nvddsinfo/nvddsinfo.vcproj b/project/vc8/nvddsinfo/nvddsinfo.vcproj index 7528865..df62cbb 100644 --- a/project/vc8/nvddsinfo/nvddsinfo.vcproj +++ b/project/vc8/nvddsinfo/nvddsinfo.vcproj @@ -43,7 +43,11 @@ Optimization="0" AdditionalIncludeDirectories="..;..\..\..\src;..\..\..\gnuwin32\include" PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE;NVTT_SHARED;__SSE2__;__SSE__;__MMX__" + BasicRuntimeChecks="3" RuntimeLibrary="3" + WarningLevel="3" + Detect64BitPortabilityProblems="true" + DebugInformationFormat="4" /> + + + + + + + + + + + + + + + + + + + + @@ -158,78 +235,6 @@ Name="VCPostBuildEventTool" /> - - - - - - - - - - - - - - - - - - - - +#include namespace nv { @@ -102,10 +102,11 @@ namespace nv bool hasDX10Header() const; }; + NVIMAGE_API Stream & operator<< (Stream & s, DDSHeader & header); /// DirectDraw Surface. (DDS) - class DirectDrawSurface + class NVIMAGE_CLASS DirectDrawSurface { public: DirectDrawSurface(const char * file); diff --git a/src/nvtt/cuda/CompressKernel.cu b/src/nvtt/cuda/CompressKernel.cu index 784a7bf..4db1141 100644 --- a/src/nvtt/cuda/CompressKernel.cu +++ b/src/nvtt/cuda/CompressKernel.cu @@ -294,6 +294,7 @@ __device__ float evalPermutation4(const float3 * colors, float3 color_sum, uint uint akku = 0; // Compute alpha & beta for this permutation. + #pragma unroll for (int i = 0; i < 16; i++) { const uint bits = permutation >> (2*i); @@ -329,6 +330,7 @@ __device__ float evalPermutation3(const float3 * colors, float3 color_sum, uint uint akku = 0; // Compute alpha & beta for this permutation. + #pragma unroll for (int i = 0; i < 16; i++) { const uint bits = permutation >> (2*i); diff --git a/src/nvtt/cuda/CudaCompressDXT.cpp b/src/nvtt/cuda/CudaCompressDXT.cpp index d49633e..45bc395 100644 --- a/src/nvtt/cuda/CudaCompressDXT.cpp +++ b/src/nvtt/cuda/CudaCompressDXT.cpp @@ -109,12 +109,14 @@ void nv::cudaCompressDXT1(const Image * image, const OutputOptions & outputOptio uint imageSize = w * h * 16 * sizeof(Color32); uint * blockLinearImage = (uint *) malloc(imageSize); - convertToBlockLinear(image, blockLinearImage); + convertToBlockLinear(image, blockLinearImage); // @@ Do this on the GPU! const uint blockNum = w * h; const uint compressedSize = blockNum * 8; const uint blockMax = 32768; // 49152, 65535 + clock_t start = clock(); + // Allocate image in device memory. uint * d_data = NULL; cudaMalloc((void**) &d_data, min(imageSize, blockMax * 64U)); @@ -125,8 +127,6 @@ void nv::cudaCompressDXT1(const Image * image, const OutputOptions & outputOptio setupCompressKernel(compressionOptions.colorWeight.ptr()); - clock_t start = clock(); - // TODO: Add support for multiple GPUs. uint bn = 0; while(bn != blockNum) diff --git a/src/nvtt/tools/assemble.cpp b/src/nvtt/tools/assemble.cpp index b8dea45..0e91311 100644 --- a/src/nvtt/tools/assemble.cpp +++ b/src/nvtt/tools/assemble.cpp @@ -24,17 +24,17 @@ #include #include -#include -#include +#include +#include #include +#include #include "cmdline.h" // @@ Add decent error messages. // @@ Add option to resize images. -// @@ Output DDS header according to surface type. -// @@ Output images. +// @@ Add support for reading DDS files with 2D images and possibly mipmaps. int main(int argc, char *argv[]) { @@ -46,6 +46,7 @@ int main(int argc, char *argv[]) bool assembleTextureArray = false; nv::Array files; + nv::Path output = "output.dds"; // Parse arguments. for (int i = 1; i < argc; i++) @@ -69,7 +70,14 @@ int main(int argc, char *argv[]) assembleVolume = false; assembleTextureArray = true; }*/ - + else if (strcmp("-o", argv[i]) == 0) + { + i++; + if (i < argc && argv[i][0] != '-') + { + output = argv[i]; + } + } else if (argv[i][0] != '-') { files.append(argv[i]); @@ -83,7 +91,13 @@ int main(int argc, char *argv[]) return 1; } - if (assembleCubeMap && files.count() != 0) + if (nv::strCaseCmp(output.extension(), ".dds") != 0) + { + //output.stripExtension(); + output.append(".dds"); + } + + if (assembleCubeMap && files.count() != 6) { printf("*** error, 6 files expected, but got %d\n", files.count()); return 1; @@ -96,6 +110,8 @@ int main(int argc, char *argv[]) bool hasAlpha = false; const uint imageCount = files.count(); + images.resize(imageCount); + for (uint i = 0; i < imageCount; i++) { if (!images[i].load(files[i])) @@ -130,9 +146,40 @@ int main(int argc, char *argv[]) return 1; } - // @@ Output DDS header. - - // @@ Output images. + // Output DDS header. + nv::DDSHeader header; + header.setWidth(w); + header.setHeight(h); + + if (assembleCubeMap) + { + header.setTextureCube(); + } + else if (assembleVolume) + { + header.setTexture3D(); + header.setDepth(imageCount); + } + else if (assembleTextureArray) + { + //header.setTextureArray(imageCount); + } + + // @@ It always outputs 32 bpp. + header.setPitch(4 * w); + header.setPixelFormat(32, 0xFF0000, 0xFF00, 0xFF, hasAlpha ? 0xFF000000 : 0); + + stream << header; + + // Output images. + for (uint i = 0; i < imageCount; i++) + { + const uint pixelCount = w * h; + for (uint p = 0; p < pixelCount; p++) + { + stream << images[i].pixel(p).b << images[i].pixel(p).g << images[i].pixel(p).r << images[i].pixel(p).a; + } + } return 0; }