diff --git a/project/vc8/nvassemble/nvassemble.rc b/project/vc8/nvassemble/nvassemble.rc
new file mode 100644
index 0000000..842ded1
Binary files /dev/null and b/project/vc8/nvassemble/nvassemble.rc differ
diff --git a/project/vc8/nvassemble/nvassemble.vcproj b/project/vc8/nvassemble/nvassemble.vcproj
new file mode 100644
index 0000000..b953801
--- /dev/null
+++ b/project/vc8/nvassemble/nvassemble.vcproj
@@ -0,0 +1,353 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/project/vc8/nvassemble/nvidia.ico b/project/vc8/nvassemble/nvidia.ico
new file mode 100644
index 0000000..6aa721e
Binary files /dev/null and b/project/vc8/nvassemble/nvidia.ico differ
diff --git a/project/vc8/nvassemble/resource.h b/project/vc8/nvassemble/resource.h
new file mode 100644
index 0000000..e765787
--- /dev/null
+++ b/project/vc8/nvassemble/resource.h
@@ -0,0 +1,16 @@
+//{{NO_DEPENDENCIES}}
+// Microsoft Visual C++ generated include file.
+// Used by nvcompress.rc
+//
+#define IDI_ICON1 101
+
+// Next default values for new objects
+//
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NEXT_RESOURCE_VALUE 102
+#define _APS_NEXT_COMMAND_VALUE 40001
+#define _APS_NEXT_CONTROL_VALUE 1000
+#define _APS_NEXT_SYMED_VALUE 101
+#endif
+#endif
diff --git a/project/vc8/nvddsinfo/nvddsinfo.vcproj b/project/vc8/nvddsinfo/nvddsinfo.vcproj
index 7528865..df62cbb 100644
--- a/project/vc8/nvddsinfo/nvddsinfo.vcproj
+++ b/project/vc8/nvddsinfo/nvddsinfo.vcproj
@@ -43,7 +43,11 @@
Optimization="0"
AdditionalIncludeDirectories="..;..\..\..\src;..\..\..\gnuwin32\include"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE;NVTT_SHARED;__SSE2__;__SSE__;__MMX__"
+ BasicRuntimeChecks="3"
RuntimeLibrary="3"
+ WarningLevel="3"
+ Detect64BitPortabilityProblems="true"
+ DebugInformationFormat="4"
/>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -158,78 +235,6 @@
Name="VCPostBuildEventTool"
/>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+#include
namespace nv
{
@@ -102,10 +102,11 @@ namespace nv
bool hasDX10Header() const;
};
+ NVIMAGE_API Stream & operator<< (Stream & s, DDSHeader & header);
/// DirectDraw Surface. (DDS)
- class DirectDrawSurface
+ class NVIMAGE_CLASS DirectDrawSurface
{
public:
DirectDrawSurface(const char * file);
diff --git a/src/nvtt/cuda/CompressKernel.cu b/src/nvtt/cuda/CompressKernel.cu
index 784a7bf..4db1141 100644
--- a/src/nvtt/cuda/CompressKernel.cu
+++ b/src/nvtt/cuda/CompressKernel.cu
@@ -294,6 +294,7 @@ __device__ float evalPermutation4(const float3 * colors, float3 color_sum, uint
uint akku = 0;
// Compute alpha & beta for this permutation.
+ #pragma unroll
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
@@ -329,6 +330,7 @@ __device__ float evalPermutation3(const float3 * colors, float3 color_sum, uint
uint akku = 0;
// Compute alpha & beta for this permutation.
+ #pragma unroll
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
diff --git a/src/nvtt/cuda/CudaCompressDXT.cpp b/src/nvtt/cuda/CudaCompressDXT.cpp
index d49633e..45bc395 100644
--- a/src/nvtt/cuda/CudaCompressDXT.cpp
+++ b/src/nvtt/cuda/CudaCompressDXT.cpp
@@ -109,12 +109,14 @@ void nv::cudaCompressDXT1(const Image * image, const OutputOptions & outputOptio
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
- convertToBlockLinear(image, blockLinearImage);
+ convertToBlockLinear(image, blockLinearImage); // @@ Do this on the GPU!
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
const uint blockMax = 32768; // 49152, 65535
+ clock_t start = clock();
+
// Allocate image in device memory.
uint * d_data = NULL;
cudaMalloc((void**) &d_data, min(imageSize, blockMax * 64U));
@@ -125,8 +127,6 @@ void nv::cudaCompressDXT1(const Image * image, const OutputOptions & outputOptio
setupCompressKernel(compressionOptions.colorWeight.ptr());
- clock_t start = clock();
-
// TODO: Add support for multiple GPUs.
uint bn = 0;
while(bn != blockNum)
diff --git a/src/nvtt/tools/assemble.cpp b/src/nvtt/tools/assemble.cpp
index b8dea45..0e91311 100644
--- a/src/nvtt/tools/assemble.cpp
+++ b/src/nvtt/tools/assemble.cpp
@@ -24,17 +24,17 @@
#include
#include
-#include
-#include
+#include
+#include
#include
+#include
#include "cmdline.h"
// @@ Add decent error messages.
// @@ Add option to resize images.
-// @@ Output DDS header according to surface type.
-// @@ Output images.
+// @@ Add support for reading DDS files with 2D images and possibly mipmaps.
int main(int argc, char *argv[])
{
@@ -46,6 +46,7 @@ int main(int argc, char *argv[])
bool assembleTextureArray = false;
nv::Array files;
+ nv::Path output = "output.dds";
// Parse arguments.
for (int i = 1; i < argc; i++)
@@ -69,7 +70,14 @@ int main(int argc, char *argv[])
assembleVolume = false;
assembleTextureArray = true;
}*/
-
+ else if (strcmp("-o", argv[i]) == 0)
+ {
+ i++;
+ if (i < argc && argv[i][0] != '-')
+ {
+ output = argv[i];
+ }
+ }
else if (argv[i][0] != '-')
{
files.append(argv[i]);
@@ -83,7 +91,13 @@ int main(int argc, char *argv[])
return 1;
}
- if (assembleCubeMap && files.count() != 0)
+ if (nv::strCaseCmp(output.extension(), ".dds") != 0)
+ {
+ //output.stripExtension();
+ output.append(".dds");
+ }
+
+ if (assembleCubeMap && files.count() != 6)
{
printf("*** error, 6 files expected, but got %d\n", files.count());
return 1;
@@ -96,6 +110,8 @@ int main(int argc, char *argv[])
bool hasAlpha = false;
const uint imageCount = files.count();
+ images.resize(imageCount);
+
for (uint i = 0; i < imageCount; i++)
{
if (!images[i].load(files[i]))
@@ -130,9 +146,40 @@ int main(int argc, char *argv[])
return 1;
}
- // @@ Output DDS header.
-
- // @@ Output images.
+ // Output DDS header.
+ nv::DDSHeader header;
+ header.setWidth(w);
+ header.setHeight(h);
+
+ if (assembleCubeMap)
+ {
+ header.setTextureCube();
+ }
+ else if (assembleVolume)
+ {
+ header.setTexture3D();
+ header.setDepth(imageCount);
+ }
+ else if (assembleTextureArray)
+ {
+ //header.setTextureArray(imageCount);
+ }
+
+ // @@ It always outputs 32 bpp.
+ header.setPitch(4 * w);
+ header.setPixelFormat(32, 0xFF0000, 0xFF00, 0xFF, hasAlpha ? 0xFF000000 : 0);
+
+ stream << header;
+
+ // Output images.
+ for (uint i = 0; i < imageCount; i++)
+ {
+ const uint pixelCount = w * h;
+ for (uint p = 0; p < pixelCount; p++)
+ {
+ stream << images[i].pixel(p).b << images[i].pixel(p).g << images[i].pixel(p).r << images[i].pixel(p).a;
+ }
+ }
return 0;
}