Add BC7 support. It's incredibly slow - ~60 seconds to compress a 512x512 image, on a Core i7 - but it works.

- Added AVPCL compressor to projects and got it building with VC9 and VC10.
- Removed unused command line interface & file read/write code from AVPCL.
- Convert AVPCL to use NV vector math lib, asserts, etc.
- Convert AVPCL to use double instead of float.
- Added 4x4 symmetric eigensolver, for AVPCL; it's based on the existing 3x3 one, but I had to rewrite the Householder reduction stage.  As with ZOH, using the eigensolver (instead of SVD) gives a ~25% speedup without significantly affecting RMSE.
- Encapsulate ZOH and AVPCL stuff into their own namespaces to keep everything separate.
- Added some missing vector operators to the nvmath lib.
This commit is contained in:
nathaniel.reed@gmail.com 2013-12-07 02:17:08 +00:00
parent f2fa0517b5
commit ab316deeaa
86 changed files with 2944 additions and 11081 deletions

View File

@ -0,0 +1,152 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{F974F34B-AF02-4C88-8E1E-85475094EA78}</ProjectGuid>
<RootNamespace>bc7</RootNamespace>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(SolutionDir)\nvtt.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(SolutionDir)\nvtt.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(SolutionDir)\nvtt.props" />
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="$(SolutionDir)\nvtt.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
<OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\$(Platform)\</OutDir>
<IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\$(Platform)\</IntDir>
<OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\$(Platform)\</OutDir>
<IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(Configuration)\$(Platform)\</IntDir>
<OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\$(Platform)\</OutDir>
<IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\$(Platform)\</IntDir>
<OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\$(Platform)\</OutDir>
<IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(Configuration)\$(Platform)\</IntDir>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<Optimization>Disabled</Optimization>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>EditAndContinue</DebugInformationFormat>
<AdditionalIncludeDirectories>$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
</Midl>
<ClCompile>
<Optimization>Disabled</Optimization>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalIncludeDirectories>$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<IntrinsicFunctions>true</IntrinsicFunctions>
<StringPooling>true</StringPooling>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<FunctionLevelLinking>false</FunctionLevelLinking>
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<AdditionalIncludeDirectories>$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Midl>
<TargetEnvironment>X64</TargetEnvironment>
</Midl>
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<IntrinsicFunctions>true</IntrinsicFunctions>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<FunctionLevelLinking>false</FunctionLevelLinking>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalIncludeDirectories>$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="..\..\..\src\nvtt\bc7\avpcl.h" />
<ClInclude Include="..\..\..\src\nvtt\bc7\bits.h" />
<ClInclude Include="..\..\..\src\nvtt\bc7\endpts.h" />
<ClInclude Include="..\..\..\src\nvtt\bc7\shapes_two.h" />
<ClInclude Include="..\..\..\src\nvtt\bc7\shapes_three.h" />
<ClInclude Include="..\..\..\src\nvtt\bc7\tile.h" />
<ClInclude Include="..\..\..\src\nvtt\bc7\utils.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\..\src\nvtt\bc7\avpcl.cpp" />
<ClCompile Include="..\..\..\src\nvtt\bc7\avpcl_mode0.cpp" />
<ClCompile Include="..\..\..\src\nvtt\bc7\avpcl_mode1.cpp" />
<ClCompile Include="..\..\..\src\nvtt\bc7\avpcl_mode2.cpp" />
<ClCompile Include="..\..\..\src\nvtt\bc7\avpcl_mode3.cpp" />
<ClCompile Include="..\..\..\src\nvtt\bc7\avpcl_mode4.cpp" />
<ClCompile Include="..\..\..\src\nvtt\bc7\avpcl_mode5.cpp" />
<ClCompile Include="..\..\..\src\nvtt\bc7\avpcl_mode6.cpp" />
<ClCompile Include="..\..\..\src\nvtt\bc7\avpcl_mode7.cpp" />
<ClCompile Include="..\..\..\src\nvtt\bc7\utils.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

View File

@ -181,6 +181,9 @@
<ProjectReference Include="..\bc6h\bc6h.vcxproj"> <ProjectReference Include="..\bc6h\bc6h.vcxproj">
<Project>{c33787e3-5564-4834-9fe3-a9020455a669}</Project> <Project>{c33787e3-5564-4834-9fe3-a9020455a669}</Project>
</ProjectReference> </ProjectReference>
<ProjectReference Include="..\bc7\bc7.vcxproj">
<Project>{f974f34b-af02-4c88-8e1e-85475094ea78}</Project>
</ProjectReference>
<ProjectReference Include="..\nvcore\nvcore.vcxproj"> <ProjectReference Include="..\nvcore\nvcore.vcxproj">
<Project>{f143d180-d4c4-4037-b3de-be89a21c8d1d}</Project> <Project>{f143d180-d4c4-4037-b3de-be89a21c8d1d}</Project>
<ReferenceOutputAssembly>false</ReferenceOutputAssembly> <ReferenceOutputAssembly>false</ReferenceOutputAssembly>

View File

@ -355,6 +355,9 @@
<ProjectReference Include="..\bc6h\bc6h.vcxproj"> <ProjectReference Include="..\bc6h\bc6h.vcxproj">
<Project>{c33787e3-5564-4834-9fe3-a9020455a669}</Project> <Project>{c33787e3-5564-4834-9fe3-a9020455a669}</Project>
</ProjectReference> </ProjectReference>
<ProjectReference Include="..\bc7\bc7.vcxproj">
<Project>{f974f34b-af02-4c88-8e1e-85475094ea78}</Project>
</ProjectReference>
<ProjectReference Include="..\nvcore\nvcore.vcxproj"> <ProjectReference Include="..\nvcore\nvcore.vcxproj">
<Project>{f143d180-d4c4-4037-b3de-be89a21c8d1d}</Project> <Project>{f143d180-d4c4-4037-b3de-be89a21c8d1d}</Project>
<ReferenceOutputAssembly>false</ReferenceOutputAssembly> <ReferenceOutputAssembly>false</ReferenceOutputAssembly>

View File

@ -181,6 +181,9 @@
<ProjectReference Include="..\bc6h\bc6h.vcxproj"> <ProjectReference Include="..\bc6h\bc6h.vcxproj">
<Project>{c33787e3-5564-4834-9fe3-a9020455a669}</Project> <Project>{c33787e3-5564-4834-9fe3-a9020455a669}</Project>
</ProjectReference> </ProjectReference>
<ProjectReference Include="..\bc7\bc7.vcxproj">
<Project>{f974f34b-af02-4c88-8e1e-85475094ea78}</Project>
</ProjectReference>
<ProjectReference Include="..\nvcore\nvcore.vcxproj"> <ProjectReference Include="..\nvcore\nvcore.vcxproj">
<Project>{f143d180-d4c4-4037-b3de-be89a21c8d1d}</Project> <Project>{f143d180-d4c4-4037-b3de-be89a21c8d1d}</Project>
<ReferenceOutputAssembly>false</ReferenceOutputAssembly> <ReferenceOutputAssembly>false</ReferenceOutputAssembly>

View File

@ -207,6 +207,9 @@
<ProjectReference Include="..\bc6h\bc6h.vcxproj"> <ProjectReference Include="..\bc6h\bc6h.vcxproj">
<Project>{c33787e3-5564-4834-9fe3-a9020455a669}</Project> <Project>{c33787e3-5564-4834-9fe3-a9020455a669}</Project>
</ProjectReference> </ProjectReference>
<ProjectReference Include="..\bc7\bc7.vcxproj">
<Project>{f974f34b-af02-4c88-8e1e-85475094ea78}</Project>
</ProjectReference>
<ProjectReference Include="..\nvcore\nvcore.vcxproj"> <ProjectReference Include="..\nvcore\nvcore.vcxproj">
<Project>{f143d180-d4c4-4037-b3de-be89a21c8d1d}</Project> <Project>{f143d180-d4c4-4037-b3de-be89a21c8d1d}</Project>
<ReferenceOutputAssembly>false</ReferenceOutputAssembly> <ReferenceOutputAssembly>false</ReferenceOutputAssembly>

View File

@ -209,6 +209,9 @@
<ProjectReference Include="..\bc6h\bc6h.vcxproj"> <ProjectReference Include="..\bc6h\bc6h.vcxproj">
<Project>{c33787e3-5564-4834-9fe3-a9020455a669}</Project> <Project>{c33787e3-5564-4834-9fe3-a9020455a669}</Project>
</ProjectReference> </ProjectReference>
<ProjectReference Include="..\bc7\bc7.vcxproj">
<Project>{f974f34b-af02-4c88-8e1e-85475094ea78}</Project>
</ProjectReference>
<ProjectReference Include="..\nvcore\nvcore.vcxproj"> <ProjectReference Include="..\nvcore\nvcore.vcxproj">
<Project>{f143d180-d4c4-4037-b3de-be89a21c8d1d}</Project> <Project>{f143d180-d4c4-4037-b3de-be89a21c8d1d}</Project>
<ReferenceOutputAssembly>false</ReferenceOutputAssembly> <ReferenceOutputAssembly>false</ReferenceOutputAssembly>

View File

@ -45,6 +45,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bc6h", "bc6h\bc6h.vcxproj",
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvthread", "nvthread\nvthread.vcxproj", "{4CFD4876-A026-46C2-AFCF-FB11346E815D}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvthread", "nvthread\nvthread.vcxproj", "{4CFD4876-A026-46C2-AFCF-FB11346E815D}"
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bc7", "bc7\bc7.vcxproj", "{F974F34B-AF02-4C88-8E1E-85475094EA78}"
EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32 Debug|Win32 = Debug|Win32
@ -313,6 +315,22 @@ Global
{4CFD4876-A026-46C2-AFCF-FB11346E815D}.Release-CUDA|Win32.Build.0 = Release|Win32 {4CFD4876-A026-46C2-AFCF-FB11346E815D}.Release-CUDA|Win32.Build.0 = Release|Win32
{4CFD4876-A026-46C2-AFCF-FB11346E815D}.Release-CUDA|x64.ActiveCfg = Release|x64 {4CFD4876-A026-46C2-AFCF-FB11346E815D}.Release-CUDA|x64.ActiveCfg = Release|x64
{4CFD4876-A026-46C2-AFCF-FB11346E815D}.Release-CUDA|x64.Build.0 = Release|x64 {4CFD4876-A026-46C2-AFCF-FB11346E815D}.Release-CUDA|x64.Build.0 = Release|x64
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug|Win32.ActiveCfg = Debug|Win32
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug|Win32.Build.0 = Debug|Win32
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug|x64.ActiveCfg = Debug|x64
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug|x64.Build.0 = Debug|x64
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug-CUDA|Win32.ActiveCfg = Debug|Win32
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug-CUDA|Win32.Build.0 = Debug|Win32
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug-CUDA|x64.ActiveCfg = Debug|x64
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug-CUDA|x64.Build.0 = Debug|x64
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Release|Win32.ActiveCfg = Release|Win32
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Release|Win32.Build.0 = Release|Win32
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Release|x64.ActiveCfg = Release|x64
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Release|x64.Build.0 = Release|x64
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Release-CUDA|Win32.ActiveCfg = Release|Win32
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Release-CUDA|Win32.Build.0 = Release|Win32
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Release-CUDA|x64.ActiveCfg = Release|x64
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Release-CUDA|x64.Build.0 = Release|x64
EndGlobalSection EndGlobalSection
GlobalSection(SolutionProperties) = preSolution GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE HideSolutionNode = FALSE

View File

@ -471,6 +471,9 @@
<Project>{c33787e3-5564-4834-9fe3-a9020455a669}</Project> <Project>{c33787e3-5564-4834-9fe3-a9020455a669}</Project>
<ReferenceOutputAssembly>false</ReferenceOutputAssembly> <ReferenceOutputAssembly>false</ReferenceOutputAssembly>
</ProjectReference> </ProjectReference>
<ProjectReference Include="..\bc7\bc7.vcxproj">
<Project>{f974f34b-af02-4c88-8e1e-85475094ea78}</Project>
</ProjectReference>
<ProjectReference Include="..\nvcore\nvcore.vcxproj"> <ProjectReference Include="..\nvcore\nvcore.vcxproj">
<Project>{f143d180-d4c4-4037-b3de-be89a21c8d1d}</Project> <Project>{f143d180-d4c4-4037-b3de-be89a21c8d1d}</Project>
<ReferenceOutputAssembly>false</ReferenceOutputAssembly> <ReferenceOutputAssembly>false</ReferenceOutputAssembly>

View File

@ -200,6 +200,9 @@
<ProjectReference Include="..\bc6h\bc6h.vcxproj"> <ProjectReference Include="..\bc6h\bc6h.vcxproj">
<Project>{c33787e3-5564-4834-9fe3-a9020455a669}</Project> <Project>{c33787e3-5564-4834-9fe3-a9020455a669}</Project>
</ProjectReference> </ProjectReference>
<ProjectReference Include="..\bc7\bc7.vcxproj">
<Project>{f974f34b-af02-4c88-8e1e-85475094ea78}</Project>
</ProjectReference>
<ProjectReference Include="..\nvcore\nvcore.vcxproj"> <ProjectReference Include="..\nvcore\nvcore.vcxproj">
<Project>{f143d180-d4c4-4037-b3de-be89a21c8d1d}</Project> <Project>{f143d180-d4c4-4037-b3de-be89a21c8d1d}</Project>
<ReferenceOutputAssembly>false</ReferenceOutputAssembly> <ReferenceOutputAssembly>false</ReferenceOutputAssembly>

340
project/vc9/bc7/bc7.vcproj Normal file
View File

@ -0,0 +1,340 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Name="bc7"
ProjectGUID="{F974F34B-AF02-4C88-8E1E-85475094EA78}"
RootNamespace="bc7"
TargetFrameworkVersion="196613"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="4"
InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops"
CharacterSet="0"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
WarningLevel="3"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="4"
InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops"
CharacterSet="0"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
WarningLevel="3"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="4"
InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops"
CharacterSet="0"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
StringPooling="true"
RuntimeLibrary="2"
EnableFunctionLevelLinking="false"
EnableEnhancedInstructionSet="2"
WarningLevel="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="4"
InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops"
CharacterSet="0"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="2"
EnableIntrinsicFunctions="true"
RuntimeLibrary="2"
EnableFunctionLevelLinking="false"
WarningLevel="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<File
RelativePath="..\..\..\src\nvtt\bc7\avpcl.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\bc7\avpcl.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\bc7\avpcl_mode0.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\bc7\avpcl_mode1.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\bc7\avpcl_mode2.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\bc7\avpcl_mode3.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\bc7\avpcl_mode4.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\bc7\avpcl_mode5.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\bc7\avpcl_mode6.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\bc7\avpcl_mode7.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\bc7\bits.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\bc7\endpts.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\bc7\shapes_three.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\bc7\shapes_two.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\bc7\tile.h"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\bc7\utils.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvtt\bc7\utils.h"
>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@ -5,6 +5,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcproj",
ProjectSection(ProjectDependencies) = postProject ProjectSection(ProjectDependencies) = postProject
{CE017322-01FC-4851-9C8B-64E9A8E26C38} = {CE017322-01FC-4851-9C8B-64E9A8E26C38} {CE017322-01FC-4851-9C8B-64E9A8E26C38} = {CE017322-01FC-4851-9C8B-64E9A8E26C38}
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB} = {3DD3A43D-C6EA-460F-821B-6C339A03C5BB} {3DD3A43D-C6EA-460F-821B-6C339A03C5BB} = {3DD3A43D-C6EA-460F-821B-6C339A03C5BB}
{F974F34B-AF02-4C88-8E1E-85475094EA78} = {F974F34B-AF02-4C88-8E1E-85475094EA78}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669} {C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669}
@ -13,6 +14,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcproj",
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvcompress", "nvcompress\nvcompress.vcproj", "{88079E38-83AA-4E8A-B18A-66A78D1B058B}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvcompress", "nvcompress\nvcompress.vcproj", "{88079E38-83AA-4E8A-B18A-66A78D1B058B}"
ProjectSection(ProjectDependencies) = postProject ProjectSection(ProjectDependencies) = postProject
{F974F34B-AF02-4C88-8E1E-85475094EA78} = {F974F34B-AF02-4C88-8E1E-85475094EA78}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647} {1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
@ -35,6 +37,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "squish", "squish\squish.vcp
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvddsinfo", "nvddsinfo\nvddsinfo.vcproj", "{841B73C5-C679-4EEF-A50A-7D6106642B49}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvddsinfo", "nvddsinfo\nvddsinfo.vcproj", "{841B73C5-C679-4EEF-A50A-7D6106642B49}"
ProjectSection(ProjectDependencies) = postProject ProjectSection(ProjectDependencies) = postProject
{F974F34B-AF02-4C88-8E1E-85475094EA78} = {F974F34B-AF02-4C88-8E1E-85475094EA78}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669} {C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669}
@ -43,6 +46,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvddsinfo", "nvddsinfo\nvdd
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvdecompress", "nvdecompress\nvdecompress.vcproj", "{75A0527D-BFC9-49C3-B46B-CD1A901D5927}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvdecompress", "nvdecompress\nvdecompress.vcproj", "{75A0527D-BFC9-49C3-B46B-CD1A901D5927}"
ProjectSection(ProjectDependencies) = postProject ProjectSection(ProjectDependencies) = postProject
{F974F34B-AF02-4C88-8E1E-85475094EA78} = {F974F34B-AF02-4C88-8E1E-85475094EA78}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669} {C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669}
@ -51,6 +55,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvdecompress", "nvdecompres
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimgdiff", "nvimgdiff\nvimgdiff.vcproj", "{05A59E8B-EA70-4F22-89E8-E0927BA13064}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimgdiff", "nvimgdiff\nvimgdiff.vcproj", "{05A59E8B-EA70-4F22-89E8-E0927BA13064}"
ProjectSection(ProjectDependencies) = postProject ProjectSection(ProjectDependencies) = postProject
{F974F34B-AF02-4C88-8E1E-85475094EA78} = {F974F34B-AF02-4C88-8E1E-85475094EA78}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669} {C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669}
@ -59,6 +64,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimgdiff", "nvimgdiff\nvim
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nvassemble.vcproj", "{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nvassemble.vcproj", "{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}"
ProjectSection(ProjectDependencies) = postProject ProjectSection(ProjectDependencies) = postProject
{F974F34B-AF02-4C88-8E1E-85475094EA78} = {F974F34B-AF02-4C88-8E1E-85475094EA78}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669} {C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669}
@ -67,6 +73,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nv
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvzoom", "nvzoom\nvzoom.vcproj", "{51999D3E-EF22-4BDD-965F-4201034D3DCE}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvzoom", "nvzoom\nvzoom.vcproj", "{51999D3E-EF22-4BDD-965F-4201034D3DCE}"
ProjectSection(ProjectDependencies) = postProject ProjectSection(ProjectDependencies) = postProject
{F974F34B-AF02-4C88-8E1E-85475094EA78} = {F974F34B-AF02-4C88-8E1E-85475094EA78}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669} {C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669}
@ -110,6 +117,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hdrtest", "hdrtest\hdrtest.
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
EndProjectSection EndProjectSection
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bc7", "bc7\bc7.vcproj", "{F974F34B-AF02-4C88-8E1E-85475094EA78}"
EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32 Debug|Win32 = Debug|Win32
@ -410,6 +419,22 @@ Global
{E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Release-CUDA|Win32.Build.0 = Release|Win32 {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Release-CUDA|Win32.Build.0 = Release|Win32
{E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Release-CUDA|x64.ActiveCfg = Release|x64 {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Release-CUDA|x64.ActiveCfg = Release|x64
{E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Release-CUDA|x64.Build.0 = Release|x64 {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Release-CUDA|x64.Build.0 = Release|x64
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug|Win32.ActiveCfg = Debug|Win32
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug|Win32.Build.0 = Debug|Win32
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug|x64.ActiveCfg = Debug|x64
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug|x64.Build.0 = Debug|x64
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug-CUDA|Win32.ActiveCfg = Debug|Win32
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug-CUDA|Win32.Build.0 = Debug|Win32
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug-CUDA|x64.ActiveCfg = Debug|x64
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Debug-CUDA|x64.Build.0 = Debug|x64
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Release|Win32.ActiveCfg = Release|Win32
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Release|Win32.Build.0 = Release|Win32
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Release|x64.ActiveCfg = Release|x64
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Release|x64.Build.0 = Release|x64
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Release-CUDA|Win32.ActiveCfg = Release|Win32
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Release-CUDA|Win32.Build.0 = Release|Win32
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Release-CUDA|x64.ActiveCfg = Release|x64
{F974F34B-AF02-4C88-8E1E-85475094EA78}.Release-CUDA|x64.Build.0 = Release|x64
EndGlobalSection EndGlobalSection
GlobalSection(SolutionProperties) = preSolution GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE HideSolutionNode = FALSE

View File

@ -27,9 +27,10 @@
#include "nvcore/Stream.h" #include "nvcore/Stream.h"
#include "nvcore/Utils.h" // swap #include "nvcore/Utils.h" // swap
#include "nvmath/Half.h" #include "nvmath/Half.h"
#include "nvmath/Vector.inl"
#include "nvtt/bc6h/zoh.h" #include "nvtt/bc6h/zoh.h"
#include "nvtt/bc6h/utils.h" #include "nvtt/bc7/avpcl.h"
using namespace nv; using namespace nv;
@ -617,7 +618,7 @@ void BlockCTX1::setIndices(int * idx)
/// Decode BC6 block. /// Decode BC6 block.
void BlockBC6::decodeBlock(ColorSet * set) const void BlockBC6::decodeBlock(ColorSet * set) const
{ {
Tile tile(4, 4); ZOH::Tile tile(4, 4);
ZOH::decompress((const char *)data, tile); ZOH::decompress((const char *)data, tile);
// Convert ZOH's tile struct back to NVTT's, and convert half to float. // Convert ZOH's tile struct back to NVTT's, and convert half to float.
@ -626,9 +627,9 @@ void BlockBC6::decodeBlock(ColorSet * set) const
{ {
for (uint x = 0; x < 4; ++x) for (uint x = 0; x < 4; ++x)
{ {
uint16 rHalf = Tile::float2half(tile.data[y][x].x); uint16 rHalf = ZOH::Tile::float2half(tile.data[y][x].x);
uint16 gHalf = Tile::float2half(tile.data[y][x].y); uint16 gHalf = ZOH::Tile::float2half(tile.data[y][x].y);
uint16 bHalf = Tile::float2half(tile.data[y][x].z); uint16 bHalf = ZOH::Tile::float2half(tile.data[y][x].z);
set->colors[y * 4 + x].x = to_float(rHalf); set->colors[y * 4 + x].x = to_float(rHalf);
set->colors[y * 4 + x].y = to_float(gHalf); set->colors[y * 4 + x].y = to_float(gHalf);
set->colors[y * 4 + x].z = to_float(bHalf); set->colors[y * 4 + x].z = to_float(bHalf);
@ -641,6 +642,26 @@ void BlockBC6::decodeBlock(ColorSet * set) const
} }
/// Decode BC7 block.
void BlockBC7::decodeBlock(ColorBlock * block) const
{
AVPCL::Tile tile(4, 4);
AVPCL::decompress((const char *)data, tile);
// Convert AVPCL's tile struct back to NVTT's.
for (uint y = 0; y < 4; ++y)
{
for (uint x = 0; x < 4; ++x)
{
Vector4 rgba = tile.data[y][x];
// Note: decoded rgba values are in [0, 255] range and should be an integer,
// because BC7 never uses more than 8 bits per channel. So no need to round.
block->color(x, y).setRGBA(uint8(rgba.x), uint8(rgba.y), uint8(rgba.z), uint8(rgba.w));
}
}
}
/// Flip CTX1 block vertically. /// Flip CTX1 block vertically.
inline void BlockCTX1::flip4() inline void BlockCTX1::flip4()
{ {
@ -707,3 +728,9 @@ Stream & nv::operator<<(Stream & stream, BlockBC6 & block)
stream.serialize(&block, sizeof(block)); stream.serialize(&block, sizeof(block));
return stream; return stream;
} }
Stream & nv::operator<<(Stream & stream, BlockBC7 & block)
{
stream.serialize(&block, sizeof(block));
return stream;
}

View File

@ -220,7 +220,13 @@ namespace nv
void decodeBlock(ColorSet * set) const; void decodeBlock(ColorSet * set) const;
}; };
/// !!!UNDONE: BC7 block /// BC7 block.
struct BlockBC7
{
uint8 data[16]; // Not even going to try to write a union for this thing.
void decodeBlock(ColorBlock * block) const;
};
// Serialization functions. // Serialization functions.
@ -233,6 +239,7 @@ namespace nv
NVIMAGE_API Stream & operator<<(Stream & stream, BlockATI2 & block); NVIMAGE_API Stream & operator<<(Stream & stream, BlockATI2 & block);
NVIMAGE_API Stream & operator<<(Stream & stream, BlockCTX1 & block); NVIMAGE_API Stream & operator<<(Stream & stream, BlockCTX1 & block);
NVIMAGE_API Stream & operator<<(Stream & stream, BlockBC6 & block); NVIMAGE_API Stream & operator<<(Stream & stream, BlockBC6 & block);
NVIMAGE_API Stream & operator<<(Stream & stream, BlockBC7 & block);
} // nv namespace } // nv namespace

View File

@ -1410,6 +1410,12 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba)
} }
} }
} }
else if (header.hasDX10Header() && header.header10.dxgiFormat == DXGI_FORMAT_BC7_UNORM)
{
BlockBC7 block;
*stream << block;
block.decodeBlock(rgba);
}
else else
{ {
nvDebugCheck(false); nvDebugCheck(false);

File diff suppressed because it is too large Load Diff

View File

@ -14,22 +14,32 @@ namespace nv
Vector3 computeCentroid(int n, const Vector3 * points); Vector3 computeCentroid(int n, const Vector3 * points);
Vector3 computeCentroid(int n, const Vector3 * points, const float * weights, const Vector3 & metric); Vector3 computeCentroid(int n, const Vector3 * points, const float * weights, const Vector3 & metric);
Vector4 computeCentroid(int n, const Vector4 * points);
Vector4 computeCentroid(int n, const Vector4 * points, const float * weights, const Vector4 & metric);
Vector3 computeCovariance(int n, const Vector3 * points, float * covariance); Vector3 computeCovariance(int n, const Vector3 * points, float * covariance);
Vector3 computeCovariance(int n, const Vector3 * points, const float * weights, const Vector3 & metric, float * covariance); Vector3 computeCovariance(int n, const Vector3 * points, const float * weights, const Vector3 & metric, float * covariance);
Vector4 computeCovariance(int n, const Vector4 * points, float * covariance);
Vector4 computeCovariance(int n, const Vector4 * points, const float * weights, const Vector4 & metric, float * covariance);
Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points); Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points);
Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points, const float * weights, const Vector3 & metric); Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points, const float * weights, const Vector3 & metric);
Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points); Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points);
Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points, const float * weights, const Vector3 & metric); Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points, const float * weights, const Vector3 & metric);
Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points);
Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points, const float * weights, const Vector4 & metric);
Vector3 computePrincipalComponent_SVD(int n, const Vector3 * points); Vector3 computePrincipalComponent_SVD(int n, const Vector3 * points);
Vector4 computePrincipalComponent_SVD(int n, const Vector4 * points);
Plane bestPlane(int n, const Vector3 * points); Plane bestPlane(int n, const Vector3 * points);
bool isPlanar(int n, const Vector3 * points, float epsilon = NV_EPSILON); bool isPlanar(int n, const Vector3 * points, float epsilon = NV_EPSILON);
bool eigenSolveSymmetric (const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]); bool eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]);
bool eigenSolveSymmetric4(const float matrix[10], float eigenValues[4], Vector4 eigenVectors[4]);
// Returns number of clusters [1-4]. // Returns number of clusters [1-4].
int compute4Means(int n, const Vector3 * points, const float * weights, const Vector3 & metric, Vector3 * cluster); int compute4Means(int n, const Vector3 * points, const float * weights, const Vector3 & metric, Vector3 * cluster);

View File

@ -73,6 +73,7 @@ namespace nv
void operator*=(float s); void operator*=(float s);
void operator/=(float s); void operator/=(float s);
void operator*=(Vector3::Arg v); void operator*=(Vector3::Arg v);
void operator/=(Vector3::Arg v);
friend bool operator==(Vector3::Arg a, Vector3::Arg b); friend bool operator==(Vector3::Arg a, Vector3::Arg b);
friend bool operator!=(Vector3::Arg a, Vector3::Arg b); friend bool operator!=(Vector3::Arg a, Vector3::Arg b);
@ -116,7 +117,9 @@ namespace nv
void operator+=(Vector4::Arg v); void operator+=(Vector4::Arg v);
void operator-=(Vector4::Arg v); void operator-=(Vector4::Arg v);
void operator*=(float s); void operator*=(float s);
void operator/=(float s);
void operator*=(Vector4::Arg v); void operator*=(Vector4::Arg v);
void operator/=(Vector4::Arg v);
friend bool operator==(Vector4::Arg a, Vector4::Arg b); friend bool operator==(Vector4::Arg a, Vector4::Arg b);
friend bool operator!=(Vector4::Arg a, Vector4::Arg b); friend bool operator!=(Vector4::Arg a, Vector4::Arg b);

View File

@ -158,6 +158,13 @@ namespace nv
z *= v.z; z *= v.z;
} }
inline void Vector3::operator/=(Vector3::Arg v)
{
x /= v.x;
y /= v.y;
z /= v.z;
}
inline bool operator==(Vector3::Arg a, Vector3::Arg b) inline bool operator==(Vector3::Arg a, Vector3::Arg b)
{ {
return a.x == b.x && a.y == b.y && a.z == b.z; return a.x == b.x && a.y == b.y && a.z == b.z;
@ -243,6 +250,14 @@ namespace nv
w *= s; w *= s;
} }
inline void Vector4::operator/=(float s)
{
x /= s;
y /= s;
z /= s;
w /= s;
}
inline void Vector4::operator*=(Vector4::Arg v) inline void Vector4::operator*=(Vector4::Arg v)
{ {
x *= v.x; x *= v.x;
@ -251,6 +266,14 @@ namespace nv
w *= v.w; w *= v.w;
} }
inline void Vector4::operator/=(Vector4::Arg v)
{
x /= v.x;
y /= v.y;
z /= v.z;
w /= v.w;
}
inline bool operator==(Vector4::Arg a, Vector4::Arg b) inline bool operator==(Vector4::Arg a, Vector4::Arg b)
{ {
return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w;
@ -677,6 +700,11 @@ namespace nv
return scale(v, s); return scale(v, s);
} }
inline Vector4 operator*(Vector4::Arg v, Vector4::Arg s)
{
return scale(v, s);
}
inline Vector4 operator/(Vector4::Arg v, float s) inline Vector4 operator/(Vector4::Arg v, float s)
{ {
return scale(v, 1.0f/s); return scale(v, 1.0f/s);

View File

@ -29,12 +29,10 @@
#include "CompressionOptions.h" #include "CompressionOptions.h"
#include "nvimage/ColorBlock.h" #include "nvimage/ColorBlock.h"
#include "nvmath/Half.h" #include "nvmath/Half.h"
#include "nvmath/Vector.inl"
#include "bc6h/zoh.h" #include "bc6h/zoh.h"
#include "bc6h/utils.h" #include "bc7/avpcl.h"
//#include "bc7/avpcl.h"
//#include "bc7/utils.h"
using namespace nv; using namespace nv;
using namespace nvtt; using namespace nvtt;
@ -42,21 +40,24 @@ using namespace nvtt;
void CompressorBC6::compressBlock(ColorSet & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output) void CompressorBC6::compressBlock(ColorSet & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
{ {
NV_UNUSED(alphaMode); // ZOH does not support alpha. // !!!UNDONE: support channel weights
// !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...)
NV_UNUSED(alphaMode); // ZOH does not support alpha.
if (compressionOptions.pixelType == PixelType_UnsignedFloat || if (compressionOptions.pixelType == PixelType_UnsignedFloat ||
compressionOptions.pixelType == PixelType_UnsignedNorm || compressionOptions.pixelType == PixelType_UnsignedNorm ||
compressionOptions.pixelType == PixelType_UnsignedInt) compressionOptions.pixelType == PixelType_UnsignedInt)
{ {
Utils::FORMAT = UNSIGNED_F16; // @@ Do not use globals. ZOH::Utils::FORMAT = ZOH::UNSIGNED_F16;
} }
else else
{ {
Utils::FORMAT = SIGNED_F16; ZOH::Utils::FORMAT = ZOH::SIGNED_F16;
} }
// Convert NVTT's tile struct to ZOH's, and convert float to half. // Convert NVTT's tile struct to ZOH's, and convert float to half.
Tile zohTile(tile.w, tile.h); ZOH::Tile zohTile(tile.w, tile.h);
memset(zohTile.data, 0, sizeof(zohTile.data)); memset(zohTile.data, 0, sizeof(zohTile.data));
memset(zohTile.importance_map, 0, sizeof(zohTile.importance_map)); memset(zohTile.importance_map, 0, sizeof(zohTile.importance_map));
for (uint y = 0; y < tile.h; ++y) for (uint y = 0; y < tile.h; ++y)
@ -67,9 +68,9 @@ void CompressorBC6::compressBlock(ColorSet & tile, AlphaMode alphaMode, const Co
uint16 rHalf = to_half(color.x); uint16 rHalf = to_half(color.x);
uint16 gHalf = to_half(color.y); uint16 gHalf = to_half(color.y);
uint16 bHalf = to_half(color.z); uint16 bHalf = to_half(color.z);
zohTile.data[y][x].x = Tile::half2float(rHalf); zohTile.data[y][x].x = ZOH::Tile::half2float(rHalf);
zohTile.data[y][x].y = Tile::half2float(gHalf); zohTile.data[y][x].y = ZOH::Tile::half2float(gHalf);
zohTile.data[y][x].z = Tile::half2float(bHalf); zohTile.data[y][x].z = ZOH::Tile::half2float(bHalf);
zohTile.importance_map[y][x] = 1.0f; zohTile.importance_map[y][x] = 1.0f;
} }
} }
@ -77,8 +78,22 @@ void CompressorBC6::compressBlock(ColorSet & tile, AlphaMode alphaMode, const Co
ZOH::compress(zohTile, (char *)output); ZOH::compress(zohTile, (char *)output);
} }
void CompressorBC7::compressBlock(ColorSet & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output) void CompressorBC7::compressBlock(ColorSet & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
{ {
// @@ TODO // !!!UNDONE: support channel weights
// !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...)
AVPCL::mode_rgb = false;
AVPCL::flag_premult = (alphaMode == AlphaMode_Premultiplied);
AVPCL::flag_nonuniform = false;
AVPCL::flag_nonuniform_ati = false;
// Convert NVTT's tile struct to AVPCL's.
AVPCL::Tile avpclTile(tile.w, tile.h);
memset(avpclTile.data, 0, sizeof(avpclTile.data));
for (uint y = 0; y < tile.h; ++y)
for (uint x = 0; x < tile.w; ++x)
avpclTile.data[y][x] = tile.color(x, y) * 255.0f;
AVPCL::compress(avpclTile, (char *)output);
} }

View File

@ -775,8 +775,7 @@ CompressorInterface * Compressor::Private::chooseCpuCompressor(const Compression
} }
else if (compressionOptions.format == Format_BC7) else if (compressionOptions.format == Format_BC7)
{ {
// !!!UNDONE return new CompressorBC7;
//return new CompressorBC7;
} }
return NULL; return NULL;

View File

@ -707,7 +707,8 @@ bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const voi
format != nvtt::Format_BC3 && format != nvtt::Format_BC3 &&
format != nvtt::Format_BC4 && format != nvtt::Format_BC4 &&
format != nvtt::Format_BC5 && format != nvtt::Format_BC5 &&
format != nvtt::Format_BC6) format != nvtt::Format_BC6 &&
format != nvtt::Format_BC7)
{ {
return false; return false;
} }
@ -822,6 +823,11 @@ bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const voi
const BlockATI2 * block = (const BlockATI2 *)ptr; const BlockATI2 * block = (const BlockATI2 *)ptr;
block->decodeBlock(&colors, decoder == Decoder_D3D9); block->decodeBlock(&colors, decoder == Decoder_D3D9);
} }
else if (format == nvtt::Format_BC7)
{
const BlockBC7 * block = (const BlockBC7 *)ptr;
block->decodeBlock(&colors);
}
else else
{ {
nvDebugCheck(false); nvDebugCheck(false);

View File

@ -10,36 +10,38 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License. See the License for the specific language governing permissions and limitations under the License.
*/ */
#pragma once #pragma once
#ifndef _BITS_H #ifndef _ZOH_BITS_H
#define _BITS_H #define _ZOH_BITS_H
// read/write a bitstream // read/write a bitstream
#include <assert.h> #include "nvcore/Debug.h"
namespace ZOH {
class Bits class Bits
{ {
public: public:
Bits(char *data, int maxdatabits) { assert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;} Bits(char *data, int maxdatabits) { nvAssert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;}
Bits(const char *data, int availdatabits) { assert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;} Bits(const char *data, int availdatabits) { nvAssert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;}
void write(int value, int nbits) { void write(int value, int nbits) {
assert (nbits >= 0 && nbits < 32); nvAssert (nbits >= 0 && nbits < 32);
assert (sizeof(int)>= 4); nvAssert (sizeof(int)>= 4);
for (int i=0; i<nbits; ++i) for (int i=0; i<nbits; ++i)
writeone(value>>i); writeone(value>>i);
} }
int read(int nbits) { int read(int nbits) {
assert (nbits >= 0 && nbits < 32); nvAssert (nbits >= 0 && nbits < 32);
assert (sizeof(int)>= 4); nvAssert (sizeof(int)>= 4);
int out = 0; int out = 0;
for (int i=0; i<nbits; ++i) for (int i=0; i<nbits; ++i)
out |= readone() << i; out |= readone() << i;
return out; return out;
} }
int getptr() { return bptr; } int getptr() { return bptr; }
int setptr(int ptr) { assert (ptr >= 0 && ptr < maxbits); bptr = ptr; } int setptr(int ptr) { nvAssert (ptr >= 0 && ptr < maxbits); bptr = ptr; }
int getsize() { return bend; } int getsize() { return bend; }
private: private:
@ -51,7 +53,7 @@ private:
char readonly; // 1 if this is a read-only stream char readonly; // 1 if this is a read-only stream
int readone() { int readone() {
assert (bptr < bend); nvAssert (bptr < bend);
if (bptr >= bend) return 0; if (bptr >= bend) return 0;
int bit = (readonly ? cbits[bptr>>3] : bits[bptr>>3]) & (1 << (bptr & 7)); int bit = (readonly ? cbits[bptr>>3] : bits[bptr>>3]) & (1 << (bptr & 7));
++bptr; ++bptr;
@ -60,7 +62,7 @@ private:
void writeone(int bit) { void writeone(int bit) {
if (readonly) if (readonly)
throw "Writing a read-only bit stream"; throw "Writing a read-only bit stream";
assert (bptr < maxbits); nvAssert (bptr < maxbits);
if (bptr >= maxbits) return; if (bptr >= maxbits) return;
if (bit&1) if (bit&1)
bits[bptr>>3] |= 1 << (bptr & 7); bits[bptr>>3] |= 1 << (bptr & 7);
@ -70,4 +72,6 @@ private:
} }
}; };
}
#endif #endif

View File

@ -10,8 +10,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License. See the License for the specific language governing permissions and limitations under the License.
*/ */
#pragma once #pragma once
#ifndef _SHAPES_TWO_H #ifndef _ZOH_SHAPES_TWO_H
#define _SHAPES_TWO_H #define _ZOH_SHAPES_TWO_H
// shapes for two regions // shapes for two regions

View File

@ -10,15 +10,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License. See the License for the specific language governing permissions and limitations under the License.
*/ */
#pragma once #pragma once
#ifndef _TILE_H #ifndef _ZOH_TILE_H
#define _TILE_H #define _ZOH_TILE_H
#include "utils.h" #include "utils.h"
#include "nvmath/Vector.h" #include "nvmath/Vector.h"
#include <math.h> #include <math.h>
namespace ZOH {
//#define USE_IMPORTANCE_MAP 1 // define this if you want to increase importance of some pixels in tile //#define USE_IMPORTANCE_MAP 1 // define this if you want to increase importance of some pixels in tile
class Tile class Tile
@ -79,4 +78,6 @@ public:
int size_x, size_y; // actual size of tile int size_x, size_y; // actual size of tile
}; };
#endif // _TILE_H }
#endif // _ZOH_TILE_H

View File

@ -17,6 +17,7 @@ See the License for the specific language governing permissions and limitations
#include <math.h> #include <math.h>
using namespace nv; using namespace nv;
using namespace ZOH;
static const int denom7_weights_64[] = {0, 9, 18, 27, 37, 46, 55, 64}; // divided by 64 static const int denom7_weights_64[] = {0, 9, 18, 27, 37, 46, 55, 64}; // divided by 64
static const int denom15_weights_64[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}; // divided by 64 static const int denom15_weights_64[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}; // divided by 64

View File

@ -12,15 +12,14 @@ See the License for the specific language governing permissions and limitations
// utility class holding common routines // utility class holding common routines
#pragma once #pragma once
#ifndef _UTILS_H #ifndef _ZOH_UTILS_H
#define _UTILS_H #define _ZOH_UTILS_H
#include "nvmath/Vector.h" #include "nvmath/Vector.h"
namespace ZOH {
#define PALETTE_LERP(a, b, i, denom) Utils::lerp(a, b, i, denom) inline int SIGN_EXTEND(int x, int nb) { return ((((signed(x))&(1<<((nb)-1)))?((~0)<<(nb)):0)|(signed(x))); }
#define SIGN_EXTEND(x,nb) ((((signed(x))&(1<<((nb)-1)))?((~0)<<(nb)):0)|(signed(x)))
enum Field { enum Field {
FIELD_M = 1, // mode FIELD_M = 1, // mode
@ -31,20 +30,20 @@ enum Field {
}; };
// some constants // some constants
#define F16S_MASK 0x8000 // f16 sign mask static const int F16S_MASK = 0x8000; // f16 sign mask
#define F16EM_MASK 0x7fff // f16 exp & mantissa mask static const int F16EM_MASK = 0x7fff; // f16 exp & mantissa mask
#define U16MAX 0xffff static const int U16MAX = 0xffff;
#define S16MIN (-0x8000) static const int S16MIN = -0x8000;
#define S16MAX 0x7fff static const int S16MAX = 0x7fff;
#define INT16_MASK 0xffff static const int INT16_MASK = 0xffff;
#define F16MAX (0x7bff) // MAXFLT bit pattern for halfs static const int F16MAX = 0x7bff; // MAXFLT bit pattern for halfs
enum Format { UNSIGNED_F16, SIGNED_F16 }; enum Format { UNSIGNED_F16, SIGNED_F16 };
class Utils class Utils
{ {
public: public:
static ::Format FORMAT; // this is a global -- we're either handling unsigned or unsigned half values static Format FORMAT; // this is a global -- we're either handling unsigned or unsigned half values
// error metrics // error metrics
static float norm(const nv::Vector3 &a, const nv::Vector3 &b); static float norm(const nv::Vector3 &a, const nv::Vector3 &b);
@ -69,4 +68,6 @@ public:
static nv::Vector3 lerp(const nv::Vector3 & a, const nv::Vector3 & b, int i, int denom); static nv::Vector3 lerp(const nv::Vector3 & a, const nv::Vector3 & b, int i, int denom);
}; };
#endif // _UTILS_H }
#endif // _ZOH_UTILS_H

View File

@ -17,6 +17,8 @@ See the License for the specific language governing permissions and limitations
#include <string.h> // memcpy #include <string.h> // memcpy
using namespace ZOH;
bool ZOH::isone(const char *block) bool ZOH::isone(const char *block)
{ {
@ -130,7 +132,7 @@ static void stats(char block[ZOH::BLOCKSIZE])
{ {
char mode = block[0] & 0x1F; if ((mode & 0x3) == 0) mode = 0; if ((mode & 0x3) == 1) mode = 1; modehist[mode]++; char mode = block[0] & 0x1F; if ((mode & 0x3) == 0) mode = 0; if ((mode & 0x3) == 1) mode = 1; modehist[mode]++;
int prec = mode_to_prec[mode]; int prec = mode_to_prec[mode];
assert (prec != -1); nvAssert (prec != -1);
if (!ZOH::isone(block)) if (!ZOH::isone(block))
{ {
tworegions++; tworegions++;

View File

@ -15,17 +15,13 @@ See the License for the specific language governing permissions and limitations
#include "tile.h" #include "tile.h"
namespace ZOH {
// UNUSED ZOH MODES are 0x13, 0x17, 0x1b, 0x1f // UNUSED ZOH MODES are 0x13, 0x17, 0x1b, 0x1f
#define EXTERNAL_RELEASE 1 // define this if we're releasing this code externally static const int NREGIONS_TWO = 2;
static const int NREGIONS_ONE = 1;
#define NREGIONS_TWO 2 static const int NCHANNELS = 3;
#define NREGIONS_ONE 1
#define NCHANNELS 3
// Note: this code only reads OpenEXR files, which are only in F16 format.
// if unsigned is selected, the input is clamped to >= 0.
// if f16 is selected, the range is clamped to 0..0x7bff.
struct FltEndpts struct FltEndpts
{ {
@ -45,28 +41,25 @@ struct ComprEndpts
uint B[NCHANNELS]; uint B[NCHANNELS];
}; };
class ZOH static const int BLOCKSIZE=16;
{ static const int BITSIZE=128;
public:
static const int BLOCKSIZE=16;
static const int BITSIZE=128;
static Format FORMAT;
static void compress(const Tile &t, char *block); void compress(const Tile &t, char *block);
static void decompress(const char *block, Tile &t); void decompress(const char *block, Tile &t);
static float compressone(const Tile &t, char *block); float compressone(const Tile &t, char *block);
static float compresstwo(const Tile &t, char *block); float compresstwo(const Tile &t, char *block);
static void decompressone(const char *block, Tile &t); void decompressone(const char *block, Tile &t);
static void decompresstwo(const char *block, Tile &t); void decompresstwo(const char *block, Tile &t);
static float refinetwo(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_TWO], char *block); float refinetwo(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_TWO], char *block);
static float roughtwo(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_TWO]); float roughtwo(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_TWO]);
static float refineone(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_ONE], char *block); float refineone(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_ONE], char *block);
static float roughone(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_ONE]); float roughone(const Tile &tile, int shape, FltEndpts endpts[NREGIONS_ONE]);
static bool isone(const char *block); bool isone(const char *block);
};
}
#endif // _ZOH_H #endif // _ZOH_H

View File

@ -25,6 +25,7 @@ See the License for the specific language governing permissions and limitations
#include <float.h> // FLT_MAX #include <float.h> // FLT_MAX
using namespace nv; using namespace nv;
using namespace ZOH;
#define NINDICES 16 #define NINDICES 16
#define INDEXBITS 4 #define INDEXBITS 4
@ -324,21 +325,21 @@ static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].x = float(Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec)); palette[i].x = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
a = Utils::unquantize(endpts.A[1], prec); a = Utils::unquantize(endpts.A[1], prec);
b = Utils::unquantize(endpts.B[1], prec); b = Utils::unquantize(endpts.B[1], prec);
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].y = float(Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec)); palette[i].y = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
a = Utils::unquantize(endpts.A[2], prec); a = Utils::unquantize(endpts.A[2], prec);
b = Utils::unquantize(endpts.B[2], prec); b = Utils::unquantize(endpts.B[2], prec);
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].z = float(Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec)); palette[i].z = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
} }
// position 0 was compressed // position 0 was compressed
@ -666,7 +667,7 @@ static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_ONE], V
{ {
for (int region = 0; region < NREGIONS_ONE; ++region) for (int region = 0; region < NREGIONS_ONE; ++region)
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, DENOM); palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, DENOM);
} }
// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined

View File

@ -49,6 +49,7 @@ See the License for the specific language governing permissions and limitations
#include <float.h> // FLT_MAX #include <float.h> // FLT_MAX
using namespace nv; using namespace nv;
using namespace ZOH;
#define NINDICES 8 #define NINDICES 8
#define INDEXBITS 3 #define INDEXBITS 3
@ -396,21 +397,21 @@ static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].x = float(Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec)); palette[i].x = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
a = Utils::unquantize(endpts.A[1], prec); a = Utils::unquantize(endpts.A[1], prec);
b = Utils::unquantize(endpts.B[1], prec); b = Utils::unquantize(endpts.B[1], prec);
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].y = float(Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec)); palette[i].y = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
a = Utils::unquantize(endpts.A[2], prec); a = Utils::unquantize(endpts.A[2], prec);
b = Utils::unquantize(endpts.B[2], prec); b = Utils::unquantize(endpts.B[2], prec);
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].z = float(Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec)); palette[i].z = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec));
} }
static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W]) static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W])
@ -753,7 +754,7 @@ static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_TWO], V
{ {
for (int region = 0; region < NREGIONS_TWO; ++region) for (int region = 0; region < NREGIONS_TWO; ++region)
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, DENOM); palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, DENOM);
} }
// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined

View File

@ -0,0 +1,30 @@
PROJECT(bc7)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
SET(BC7_SRCS
avpcl.cpp
avpcl.h
avpcl_mode0.cpp
avpcl_mode1.cpp
avpcl_mode2.cpp
avpcl_mode3.cpp
avpcl_mode4.cpp
avpcl_mode5.cpp
avpcl_mode6.cpp
avpcl_mode7.cpp
bits.h
endpts.h
shapes_three.h
shapes_two.h
tile.h
utils.cpp
utils.h)
ADD_LIBRARY(bc7 STATIC ${BC7_SRCS})
IF(NOT WIN32)
IF(CMAKE_COMPILER_IS_GNUCXX)
SET_TARGET_PROPERTIES(bc6h PROPERTIES COMPILE_FLAGS -fPIC)
ENDIF(CMAKE_COMPILER_IS_GNUCXX)
ENDIF(NOT WIN32)

View File

@ -1,261 +0,0 @@
///////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
// Digital Ltd. LLC
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Industrial Light & Magic nor the names of
// its contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
///////////////////////////////////////////////////////////////////////////
#ifndef INCLUDED_IMF_ARRAY_H
#define INCLUDED_IMF_ARRAY_H
//-------------------------------------------------------------------------
//
// class Array
// class Array2D
//
// "Arrays of T" whose sizes are not known at compile time.
// When an array goes out of scope, its elements are automatically
// deleted.
//
// Usage example:
//
// struct C
// {
// C () {std::cout << "C::C (" << this << ")\n";};
// virtual ~C () {std::cout << "C::~C (" << this << ")\n";};
// };
//
// int
// main ()
// {
// Array <C> a(3);
//
// C &b = a[1];
// const C &c = a[1];
// C *d = a + 2;
// const C *e = a;
//
// return 0;
// }
//
//-------------------------------------------------------------------------
namespace Imf {
template <class T>
class Array
{
public:
//-----------------------------
// Constructors and destructors
//-----------------------------
Array () {_data = 0;}
Array (long size) {_data = new T[size];}
~Array () {delete [] _data;}
//-----------------------------
// Access to the array elements
//-----------------------------
operator T * () {return _data;}
operator const T * () const {return _data;}
//------------------------------------------------------
// Resize and clear the array (the contents of the array
// are not preserved across the resize operation).
//
// resizeEraseUnsafe() is more memory efficient than
// resizeErase() because it deletes the old memory block
// before allocating a new one, but if allocating the
// new block throws an exception, resizeEraseUnsafe()
// leaves the array in an unusable state.
//
//------------------------------------------------------
void resizeErase (long size);
void resizeEraseUnsafe (long size);
private:
Array (const Array &); // Copying and assignment
Array & operator = (const Array &); // are not implemented
T * _data;
};
template <class T>
class Array2D
{
public:
//-----------------------------
// Constructors and destructors
//-----------------------------
Array2D (); // empty array, 0 by 0 elements
Array2D (long sizeX, long sizeY); // sizeX by sizeY elements
~Array2D ();
//-----------------------------
// Access to the array elements
//-----------------------------
T * operator [] (long x);
const T * operator [] (long x) const;
//------------------------------------------------------
// Resize and clear the array (the contents of the array
// are not preserved across the resize operation).
//
// resizeEraseUnsafe() is more memory efficient than
// resizeErase() because it deletes the old memory block
// before allocating a new one, but if allocating the
// new block throws an exception, resizeEraseUnsafe()
// leaves the array in an unusable state.
//
//------------------------------------------------------
void resizeErase (long sizeX, long sizeY);
void resizeEraseUnsafe (long sizeX, long sizeY);
private:
Array2D (const Array2D &); // Copying and assignment
Array2D & operator = (const Array2D &); // are not implemented
long _sizeY;
T * _data;
};
//---------------
// Implementation
//---------------
template <class T>
inline void
Array<T>::resizeErase (long size)
{
T *tmp = new T[size];
delete [] _data;
_data = tmp;
}
template <class T>
inline void
Array<T>::resizeEraseUnsafe (long size)
{
delete [] _data;
_data = 0;
_data = new T[size];
}
template <class T>
inline
Array2D<T>::Array2D ():
_sizeY (0), _data (0)
{
// emtpy
}
template <class T>
inline
Array2D<T>::Array2D (long sizeX, long sizeY):
_sizeY (sizeY), _data (new T[sizeX * sizeY])
{
// emtpy
}
template <class T>
inline
Array2D<T>::~Array2D ()
{
delete [] _data;
}
template <class T>
inline T *
Array2D<T>::operator [] (long x)
{
return _data + x * _sizeY;
}
template <class T>
inline const T *
Array2D<T>::operator [] (long x) const
{
return _data + x * _sizeY;
}
template <class T>
inline void
Array2D<T>::resizeErase (long sizeX, long sizeY)
{
T *tmp = new T[sizeX * sizeY];
delete [] _data;
_sizeY = sizeY;
_data = tmp;
}
template <class T>
inline void
Array2D<T>::resizeEraseUnsafe (long sizeX, long sizeY)
{
delete [] _data;
_data = 0;
_sizeY = 0;
_data = new T[sizeX * sizeY];
_sizeY = sizeY;
}
} // namespace Imf
#endif

View File

@ -1,342 +0,0 @@
/***************************************************************************
* Math.C *
* *
* Some basic math functions. *
* *
* HISTORY *
* Name Date Description *
* *
* arvo 06/21/93 Initial coding. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 1999, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#include <math.h>
#include <stdlib.h>
#include <iostream>
#include <assert.h>
#include "ArvoMath.h"
#include "form.h"
namespace ArvoMath {
static const float Epsilon = 1.0E-5;
static const double LogTwo = log( 2.0 );
#define BinCoeffMax 500
double RelErr( double x, double y )
{
double z = x - y;
if( x < 0.0 ) x = -x;
if( y < 0.0 ) y = -y;
return z / ( x > y ? x : y );
}
/***************************************************************************
* A R C Q U A D *
* *
* Returns the theta / ( 2*PI ) where the input variables x and y are *
* such that x == COS( theta ) and y == SIN( theta ). *
* *
***************************************************************************/
float ArcQuad( float x, float y )
{
if( Abs( x ) > Epsilon )
{
float temp = OverTwoPi * atan( Abs( y ) / Abs( x ) );
if( x < 0.0 ) temp = 0.5 - temp;
if( y < 0.0 ) temp = 1.0 - temp;
return( temp );
}
else if( y > Epsilon ) return( 0.25 );
else if( y < -Epsilon ) return( 0.75 );
else return( 0.0 );
}
/***************************************************************************
* A R C T A N *
* *
* Returns the angle theta such that x = COS( theta ) & y = SIN( theta ). *
* *
***************************************************************************/
float ArcTan( float x, float y )
{
if( Abs( x ) > Epsilon )
{
float temp = atan( Abs( y ) / Abs( x ) );
if( x < 0.0 ) temp = Pi - temp;
if( y < 0.0 ) temp = TwoPi - temp;
return( temp );
}
else if( y > Epsilon ) return( PiOverTwo );
else if( y < -Epsilon ) return( 3 * PiOverTwo );
else return( 0.0 );
}
/***************************************************************************
* M A C H I N E E P S I L O N *
* *
* Returns the machine epsilon. *
* *
***************************************************************************/
float MachineEpsilon()
{
float x = 1.0;
float y;
float z = 1.0 + x;
while( z > 1.0 )
{
y = x;
x /= 2.0;
z = (float)( 1.0 + (float)x ); // Avoid double precision!
}
return (float)y;
}
/***************************************************************************
* L O G G A M M A *
* *
* Computes the natural log of the gamma function using the Lanczos *
* approximation formula. Gamma is defined by *
* *
* ( z - 1 ) -t *
* gamma( z ) = Integral[ t e dt ] *
* *
* *
* where the integral ranges from 0 to infinity. The gamma function *
* satisfies *
* gamma( n + 1 ) = n! *
* *
* This algorithm has been adapted from "Numerical Recipes", p. 157. *
* *
***************************************************************************/
double LogGamma( double x )
{
static const double
coeff0 = 7.61800917300E+1,
coeff1 = -8.65053203300E+1,
coeff2 = 2.40140982200E+1,
coeff3 = -1.23173951600E+0,
coeff4 = 1.20858003000E-3,
coeff5 = -5.36382000000E-6,
stp = 2.50662827465E+0,
half = 5.00000000000E-1,
fourpf = 4.50000000000E+0,
one = 1.00000000000E+0,
two = 2.00000000000E+0,
three = 3.00000000000E+0,
four = 4.00000000000E+0,
five = 5.00000000000E+0;
double r = coeff0 / ( x ) + coeff1 / ( x + one ) +
coeff2 / ( x + two ) + coeff3 / ( x + three ) +
coeff4 / ( x + four ) + coeff5 / ( x + five ) ;
double s = x + fourpf;
double t = ( x - half ) * log( s ) - s;
return t + log( stp * ( r + one ) );
}
/***************************************************************************
* L O G F A C T *
* *
* Returns the natural logarithm of n factorial. For efficiency, some *
* of the values are cached, so they need be computed only once. *
* *
***************************************************************************/
double LogFact( int n )
{
static const int Cache_Size = 100;
static double c[ Cache_Size ] = { 0.0 }; // Cache some of the values.
if( n <= 1 ) return 0.0;
if( n < Cache_Size )
{
if( c[n] == 0.0 ) c[n] = LogGamma((double)(n+1));
return c[n];
}
return LogGamma((double)(n+1)); // gamma(n+1) == n!
}
/***************************************************************************
* M U L T I N O M I A L C O E F F *
* *
* Returns the multinomial coefficient ( n; X1 X2 ... Xk ) which is *
* defined to be n! / ( X1! X2! ... Xk! ). This is done by computing *
* exp( log(n!) - log(X1!) - log(X2!) - ... - log(Xk!) ). The value of *
* n is obtained by summing the Xi's. *
* *
***************************************************************************/
double MultinomialCoeff( int k, int X[] )
{
int i;
// Find n by summing the coefficients.
int n = X[0];
for( i = 1; i < k; i++ ) n += X[i];
// Compute log(n!) then subtract log(X!) for each X.
double LogCoeff = LogFact( n );
for( i = 0; i < k; i++ ) LogCoeff -= LogFact( X[i] );
// Round the exponential of the result to the nearest integer.
return floor( exp( LogCoeff ) + 0.5 );
}
double MultinomialCoeff( int i, int j, int k )
{
int n = i + j + k;
double x = LogFact( n ) - LogFact( i ) - LogFact( j ) - LogFact( k );
return floor( exp( x ) + 0.5 );
}
/***************************************************************************
* B I N O M I A L C O E F F S *
* *
* Generate all n+1 binomial coefficents for a given n. This is done by *
* computing the n'th row of Pascal's triange, starting from the top. *
* No additional storage is required. *
* *
***************************************************************************/
void BinomialCoeffs( int n, long *coeff )
{
coeff[0] = 1;
for( int i = 1; i <= n; i++ )
{
long a = coeff[0];
long b = coeff[1];
for( int j = 1; j < i; j++ ) // Make next row of Pascal's triangle.
{
coeff[j] = a + b; // Overwrite the old row.
a = b;
b = coeff[j+1];
}
coeff[i] = 1; // The last entry in any row is always 1.
}
}
void BinomialCoeffs( int n, double *coeff )
{
coeff[0] = 1.0;
for( int i = 1; i <= n; i++ )
{
double a = coeff[0];
double b = coeff[1];
for( int j = 1; j < i; j++ ) // Make next row of Pascal's triangle.
{
coeff[j] = a + b; // Overwrite the old row.
a = b;
b = coeff[j+1];
}
coeff[i] = 1.0; // The last entry in any row is always 1.
}
}
const double *BinomialCoeffs( int n )
{
static double *coeff[ BinCoeffMax + 1 ] = { 0 };
if( n > BinCoeffMax || n < 0 )
{
std::cerr << form( "%d is outside of (0,%d) in BinomialCoeffs", n, BinCoeffMax );
return NULL;
}
if( coeff[n] == NULL ) // Fill in this entry.
{
double *c = new double[ n + 1 ];
if( c == NULL )
{
std::cerr << form( "Could not allocate for BinomialCoeffs(%d)", n );
return NULL;
}
BinomialCoeffs( n, c );
coeff[n] = c;
}
return coeff[n];
}
/***************************************************************************
* B I N O M I A L C O E F F *
* *
* Compute a given binomial coefficient. Several rows of Pascal's *
* triangle are stored for efficiently computing the small coefficients. *
* Higher-order terms are computed using LogFact. *
* *
***************************************************************************/
double BinomialCoeff( int n, int k )
{
double b;
int p = n - k;
if( k <= 1 || p <= 1 ) // Check for errors and special cases.
{
if( k == 0 || p == 0 ) return 1;
if( k == 1 || p == 1 ) return n;
std::cerr << form( "BinomialCoeff(%d,%d) is undefined", n, k );
return 0;
}
static const int // Store part of Pascal's triange for small coeffs.
n0[] = { 1 },
n1[] = { 1, 1 },
n2[] = { 1, 2, 1 },
n3[] = { 1, 3, 3, 1 },
n4[] = { 1, 4, 6, 4, 1 },
n5[] = { 1, 5, 10, 10, 5, 1 },
n6[] = { 1, 6, 15, 20, 15, 6, 1 },
n7[] = { 1, 7, 21, 35, 35, 21, 7, 1 },
n8[] = { 1, 8, 28, 56, 70, 56, 28, 8, 1 },
n9[] = { 1, 9, 36, 84, 126, 126, 84, 36, 9, 1 };
switch( n )
{
case 0 : b = n0[k]; break;
case 1 : b = n1[k]; break;
case 2 : b = n2[k]; break;
case 3 : b = n3[k]; break;
case 4 : b = n4[k]; break;
case 5 : b = n5[k]; break;
case 6 : b = n6[k]; break;
case 7 : b = n7[k]; break;
case 8 : b = n8[k]; break;
case 9 : b = n9[k]; break;
default:
{
double x = LogFact( n ) - LogFact( p ) - LogFact( k );
b = floor( exp( x ) + 0.5 );
}
}
return b;
}
/***************************************************************************
* L O G D O U B L E F A C T (Log of double factorial) *
* *
* Return log( n!! ) where the double factorial is defined by *
* *
* (2 n + 1)!! = 1 * 3 * 5 * ... * (2n + 1) (Odd integers) *
* *
* (2 n)!! = 2 * 4 * 6 * ... * 2n (Even integers) *
* *
* and is related to the single factorial via *
* *
* (2 n + 1)!! = (2 n + 1)! / ( 2^n n! ) (Odd integers) *
* *
* (2 n)!! = 2^n n! (Even integers) *
* *
***************************************************************************/
double LogDoubleFact( int n ) // log( n!! )
{
int k = n / 2;
double f = LogFact( k ) + k * LogTwo;
if( Odd(n) ) f = LogFact( n ) - f;
return f;
}
};

View File

@ -1,212 +0,0 @@
/***************************************************************************
* Math.h *
* *
* Convenient constants, macros, and inline functions for basic math *
* functions. *
* *
* HISTORY *
* Name Date Description *
* *
* arvo 06/17/93 Initial coding. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 1999, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#ifndef __MATH_INCLUDED__
#define __MATH_INCLUDED__
#include <math.h>
#include <stdlib.h>
namespace ArvoMath {
#ifndef MAXFLOAT
#define MAXFLOAT 1.0E+20
#endif
static const double
Pi = 3.14159265358979,
PiSquared = Pi * Pi,
TwoPi = 2.0 * Pi,
FourPi = 4.0 * Pi,
PiOverTwo = Pi / 2.0,
PiOverFour = Pi / 4.0,
OverPi = 1.0 / Pi,
OverTwoPi = 1.0 / TwoPi,
OverFourPi = 1.0 / FourPi,
Infinity = MAXFLOAT,
Tiny = 1.0 / MAXFLOAT,
DegreesToRad = Pi / 180.0,
RadToDegrees = 180.0 / Pi;
inline int Odd ( int k ) { return k & 1; }
inline int Even ( int k ) { return !(k & 1); }
inline float Abs ( int x ) { return x > 0 ? x : -x; }
inline float Abs ( float x ) { return x > 0. ? x : -x; }
inline float Abs ( double x ) { return x > 0. ? x : -x; }
inline float Min ( float x, float y ) { return x < y ? x : y; }
inline float Max ( float x, float y ) { return x > y ? x : y; }
inline double dMin ( double x, double y ) { return x < y ? x : y; }
inline double dMax ( double x, double y ) { return x > y ? x : y; }
inline float Sqr ( int x ) { return x * x; }
inline float Sqr ( float x ) { return x * x; }
inline float Sqr ( double x ) { return x * x; }
inline float Sqrt ( double x ) { return x > 0. ? sqrt(x) : 0.; }
inline float Cubed ( float x ) { return x * x * x; }
inline int Sign ( float x ) { return x > 0. ? 1 : (x < 0. ? -1 : 0); }
inline void Swap ( float &a, float &b ) { float c = a; a = b; b = c; }
inline void Swap ( int &a, int &b ) { int c = a; a = b; b = c; }
inline double Sin ( double x, int n ) { return pow( sin(x), n ); }
inline double Cos ( double x, int n ) { return pow( cos(x), n ); }
inline float ToSin ( double x ) { return Sqrt( 1.0 - Sqr(x) ); }
inline float ToCos ( double x ) { return Sqrt( 1.0 - Sqr(x) ); }
inline float MaxAbs( float x, float y ) { return Max( Abs(x), Abs(y) ); }
inline float MinAbs( float x, float y ) { return Min( Abs(x), Abs(y) ); }
inline float Pythag( double x, double y ) { return Sqrt( x*x + y*y ); }
inline double ArcCos( double x )
{
double y;
if( -1.0 <= x && x <= 1.0 ) y = acos( x );
else if( x > 1.0 ) y = 0.0;
else if( x < -1.0 ) y = Pi;
return y;
}
inline double ArcSin( double x )
{
if( x < -1.0 ) x = -1.0;
if( x > 1.0 ) x = 1.0;
return asin( x );
}
inline float Clamp( float min, float &x, float max )
{
if( x < min ) x = min; else
if( x > max ) x = max;
return x;
}
inline double Clamp( float min, double &x, float max )
{
if( x < min ) x = min; else
if( x > max ) x = max;
return x;
}
inline float Max( float x, float y, float z )
{
float t;
if( x >= y && x >= z ) t = x;
else if( y >= z ) t = y;
else t = z;
return t;
}
inline float Min( float x, float y, float z )
{
float t;
if( x <= y && x <= z ) t = x;
else if( y <= z ) t = y;
else t = z;
return t;
}
inline float Max( float x, float y, float z, float w )
{
float t;
if( x >= y && x >= z && x >= w) t = x;
else if( y >= z && y >= w ) t = y;
else if (z >= w) t = z;
else t = w;
return t;
}
inline float Min( float x, float y, float z, float w )
{
float t;
if( x <= y && x <= z && x <= w) t = x;
else if( y <= z && y <= w ) t = y;
else if (z <= w) t = z;
else t = w;
return t;
}
inline double dMax( double x, double y, double z )
{
double t;
if( x >= y && x >= z ) t = x;
else if( y >= z ) t = y;
else t = z;
return t;
}
inline double dMin( double x, double y, double z )
{
double t;
if( x <= y && x <= z ) t = x;
else if( y <= z ) t = y;
else t = z;
return t;
}
inline float MaxAbs( float x, float y, float z )
{
return Max( Abs( x ), Abs( y ), Abs( z ) );
}
inline float MaxAbs( float x, float y, float z, float w )
{
return Max( Abs( x ), Abs( y ), Abs( z ), Abs( w ) );
}
inline float Pythag( float x, float y, float z )
{
return sqrt( x * x + y * y + z * z );
}
extern float ArcTan ( float x, float y );
extern float ArcQuad ( float x, float y );
extern float MachineEpsilon ( );
extern double LogGamma ( double x );
extern double LogFact ( int n );
extern double LogDoubleFact ( int n ); // log( n!! )
extern double BinomialCoeff ( int n, int k );
extern void BinomialCoeffs ( int n, long *coeffs );
extern void BinomialCoeffs ( int n, double *coeffs );
extern double MultinomialCoeff( int i, int j, int k );
extern double MultinomialCoeff( int k, int N[] );
extern double RelErr ( double x, double y );
#ifndef ABS
#define ABS( x ) ((x) > 0 ? (x) : -(x))
#endif
#ifndef MAX
#define MAX( x, y ) ((x) > (y) ? (x) : (y))
#endif
#ifndef MIN
#define MIN( x, y ) ((x) < (y) ? (x) : (y))
#endif
};
#endif

View File

@ -1,420 +0,0 @@
/***************************************************************************
* Char.h *
* *
* Convenient constants, macros, and inline functions for manipulation of *
* characters and strings. *
* *
* HISTORY *
* Name Date Description *
* *
* arvo 07/01/93 Initial coding. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 1999, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "Char.h"
namespace ArvoMath {
typedef char *charPtr;
// Treat "str" as a file name, and return just the directory
// portion -- i.e. strip off the name of the leaf object (but
// leave the final "/".
const char *getPath( const char *str, char *buff )
{
int k;
for( k = strlen( str ) - 1; k >= 0; k-- )
{
if( str[k] == Slash ) break;
}
for( int i = 0; i <= k; i++ ) buff[i] = str[i];
buff[k+1] = NullChar;
return buff;
}
// Treat "str" as a file name, and return just the file name
// portion -- i.e. strip off everything up to and including
// the final "/".
const char *getFile( const char *str, char *buff )
{
int k;
int len = strlen( str );
for( k = len - 1; k >= 0; k-- )
{
if( str[k] == Slash ) break;
}
for( int i = 0; i < len - k; i++ ) buff[i] = str[ i + k + 1 ];
return buff;
}
int getPrefix( const char *str, char *buff )
{
int len = 0;
while( *str != NullChar && *str != Period )
{
*buff++ = *str++;
len++;
}
*buff = NullChar;
return len;
}
int getSuffix( const char *str, char *buff )
{
int n = strlen( str );
int k = n - 1;
while( k >= 0 && str[k] != Period ) k--;
for( int i = k + 1; i < n; i++ ) *buff++ = str[i];
*buff = NullChar;
return n - k - 1;
}
const char* toString( int number, char *buff )
{
static char local_buff[32];
char *str = ( buff == NULL ) ? local_buff : buff;
sprintf( str, "%d", number );
return str;
}
const char* toString( float number, char *buff )
{
static char local_buff[32];
char *str = ( buff == NULL ) ? local_buff : buff;
sprintf( str, "%g", number );
return str;
}
int isInteger( const char *str )
{
int n = strlen( str );
for( int i = 0; i < n; i++ )
{
char c = str[i];
if( isDigit(c) ) continue;
if( c == Plus || c == Minus ) continue;
if( c == Space ) continue;
return 0;
}
return 1;
}
// Test to see if a string has a given suffix.
int hasSuffix( const char *string, const char *suffix )
{
if( suffix == NULL ) return 1; // The null suffix always matches.
if( string == NULL ) return 0; // The null string can only have a null suffix.
int m = strlen( string );
int k = strlen( suffix );
if( k <= 0 ) return 1; // Empty suffix always matches.
if( m < k + 1 ) return 0; // String is too short to have this suffix.
// See if the file has the given suffix.
int s = m - k; // Beginning of suffix (if it matches).
for( int i = 0; i < k; i++ )
if( string[ s + i ] != suffix[ i ] ) return 0;
return s; // Always > 0.
}
// Test to see if a string has a given prefix.
int hasPrefix( const char *string, const char *prefix )
{
if( prefix == NULL ) return 1; // The null prefix always matches.
if( string == NULL ) return 0; // The null string can only have a null suffix.
while( *prefix )
{
if( *prefix++ != *string++ ) return 0;
}
return 1;
}
// Test to see if the string contains the given character.
int inString( char c, const char *str )
{
if( str == NULL || str[0] == NullChar ) return 0;
while( *str != '\0' )
if( *str++ == c ) return 1;
return 0;
}
int nullString( const char *str )
{
return str == NULL || str[0] == NullChar;
}
const char *stripSuffix( const char *string, const char *suffix, char *buff )
{
static char local_buff[256];
if( buff == NULL ) buff = local_buff;
buff[0] = NullChar;
if( !hasSuffix( string, suffix ) ) return NULL;
int s = strlen( string ) - strlen( suffix );
for( int i = 0; i < s; i++ )
{
buff[i] = string[i];
}
buff[s] = NullChar;
return buff;
}
int getIndex( const char *pat, const char *str )
{
int p_len = strlen( pat );
int s_len = strlen( str );
if( p_len == 0 || s_len == 0 ) return -1;
for( int i = 0; i <= s_len - p_len; i++ )
{
int match = 1;
for( int j = 0; j < p_len; j++ )
{
if( str[ i + j ] != pat[ j ] ) { match = 0; break; }
}
if( match ) return i;
}
return -1;
}
int getSubstringAfter( const char *pat, const char *str, char *buff )
{
int ind = getIndex( pat, str );
if( ind < 0 ) return -1;
int p_len = strlen( pat );
int k = 0;
for( int i = ind + p_len; ; i++ )
{
buff[ k++ ] = str[ i ];
if( str[ i ] == NullChar ) break;
}
return k;
}
const char *SubstringAfter( const char *pat, const char *str, char *user_buff )
{
static char temp[128];
char *buff = ( user_buff != NULL ) ? user_buff : temp;
int k = getSubstringAfter( pat, str, buff );
if( k > 0 ) return buff;
return str;
}
const char *metaString( const char *str, char *user_buff )
{
static char temp[128];
char *buff = ( user_buff != NULL ) ? user_buff : temp;
sprintf( buff, "\"%s\"", str );
return buff;
}
// This is the opposite of metaString.
const char *stripQuotes( const char *str, char *user_buff )
{
static char temp[128];
char *buff = ( user_buff != NULL ) ? user_buff : temp;
char *b = buff;
for(;;)
{
if( *str != DoubleQuote ) *b++ = *str;
if( *str == NullChar ) break;
str++;
}
return buff;
}
int getIntFlag( const char *flags, const char *flag, int &value )
{
while( *flags )
{
if( hasPrefix( flags, flag ) )
{
int k = strlen( flag );
if( flags[k] == '=' )
{
value = atoi( flags + k + 1 );
return 1;
}
}
flags++;
}
return 0;
}
int getFloatFlag( const char *flags, const char *flag, float &value )
{
while( *flags )
{
if( hasPrefix( flags, flag ) )
{
int k = strlen( flag );
if( flags[k] == '=' )
{
value = atof( flags + k + 1 );
return 1;
}
}
flags++;
}
return 0;
}
SortedList::SortedList( sort_type type_, int ascend_ )
{
type = type_;
ascend = ascend_;
num_elements = 0;
max_elements = 0;
sorted = 1;
list = NULL;
}
SortedList::~SortedList()
{
Clear();
delete[] list;
}
void SortedList::Clear()
{
// Delete all the private copies of the strings and re-initialize the
// list. Reuse the same list, expanding it when necessary.
for( int i = 0; i < num_elements; i++ )
{
delete list[i];
list[i] = NULL;
}
num_elements = 0;
sorted = 1;
}
SortedList &SortedList::operator<<( const char *str )
{
// Add a new string to the end of the list, expanding the list if necessary.
// Mark the list as unsorted, so that the next reference to an element will
// cause the list to be sorted again.
if( num_elements == max_elements ) Expand();
list[ num_elements++ ] = strdup( str );
sorted = 0;
return *this;
}
const char *SortedList::operator()( int i )
{
// Return the i'th element of the list. Sort first if necessary.
static char *null = "";
if( num_elements == 0 || i < 0 || i >= num_elements ) return null;
if( !sorted ) Sort();
return list[i];
}
void SortedList::Expand()
{
// Create a new list of twice the size and copy the old list into it.
// This doubles "max_elements", but leaves "num_elements" unchanged.
if( max_elements == 0 ) max_elements = 1;
max_elements *= 2;
charPtr *new_list = new charPtr[ max_elements ];
for( int i = 0; i < max_elements; i++ )
new_list[i] = ( i < num_elements ) ? list[i] : NULL;
delete[] list;
list = new_list;
}
void SortedList::Swap( int i, int j )
{
char *temp = list[i];
list[i] = list[j];
list[j] = temp;
}
int SortedList::inOrder( int p, int q ) const
{
int test;
if( type == sort_alphabetic )
test = ( strcmp( list[p], list[q] ) <= 0 );
else
{
int len_p = strlen( list[p] );
int len_q = strlen( list[q] );
test = ( len_p < len_q ) ||
( len_p == len_q && strcmp( list[p], list[q] ) <= 0 );
}
if( ascend ) return test;
return !test;
}
// This is an insertion sort that operates on subsets of the
// input defined by the step length.
void SortedList::InsertionSort( int start, int size, int step )
{
for( int i = 0; i + step < size; i += step )
{
for( int j = i; j >= 0; j -= step )
{
int p = start + j;
int q = p + step;
if( inOrder( p, q ) ) break;
Swap( p, q );
}
}
}
// This is a Shell sort.
void SortedList::Sort()
{
for( int step = num_elements / 2; step > 1; step /= 2 )
for( int start = 0; start < step; start++ )
InsertionSort( start, num_elements - start, step );
InsertionSort( 0, num_elements, 1 );
sorted = 1;
}
void SortedList::SetOrder( sort_type type_, int ascend_ )
{
if( type_ != type || ascend_ != ascend )
{
type = type_;
ascend = ascend_;
sorted = 0;
}
}
int getstring( std::istream &in, const char *str )
{
char ch;
if( str == NULL ) return 1;
while( *str != NullChar )
{
in >> ch;
if( *str != ch ) return 0;
str++;
}
return 1;
}
std::istream &skipWhite( std::istream &in )
{
char c;
while( in.get(c) )
{
if( !isWhite( c ) )
{
in.putback(c);
break;
}
}
return in;
}
};

View File

@ -1,245 +0,0 @@
/***************************************************************************
* Char.h *
* *
* Convenient constants, macros, and inline functions for manipulation of *
* characters and strings. *
* *
* HISTORY *
* Name Date Description *
* *
* arvo 07/01/93 Initial coding. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 1999, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#ifndef __CHAR_INCLUDED__
#define __CHAR_INCLUDED__
#include <string>
#include <iostream>
namespace ArvoMath {
static const char
Apostrophe = '\'' ,
Asterisk = '*' ,
Atsign = '@' ,
Backslash = '\\' ,
Bell = '\7' ,
Colon = ':' ,
Comma = ',' ,
Dash = '-' ,
DoubleQuote = '"' ,
EqualSign = '=' ,
Exclamation = '!' ,
GreaterThan = '>' ,
Hash = '#' ,
Lbrack = '[' ,
Lcurley = '{' ,
LessThan = '<' ,
Lparen = '(' ,
Minus = '-' ,
NewLine = '\n' ,
NullChar = '\0' ,
Percent = '%' ,
Period = '.' ,
Pound = '#' ,
Plus = '+' ,
Rbrack = ']' ,
Rcurley = '}' ,
Rparen = ')' ,
Semicolon = ';' ,
Space = ' ' ,
Slash = '/' ,
Star = '*' ,
Tab = '\t' ,
Tilde = '~' ,
Underscore = '_' ;
inline int isWhite( char c ) { return c == Space || c == NewLine || c == Tab; }
inline int isUcase( char c ) { return 'A' <= c && c <= 'Z'; }
inline int isLcase( char c ) { return 'a' <= c && c <= 'z'; }
inline int isAlpha( char c ) { return isUcase( c ) || isLcase( c ); }
inline int isDigit( char c ) { return '0' <= c && c <= '9'; }
inline char ToLower( char c ) { return isUcase( c ) ? c + ( 'a' - 'A' ) : c; }
inline char ToUpper( char c ) { return isLcase( c ) ? c + ( 'A' - 'a' ) : c; }
extern const char *getPath(
const char *str,
char *buff
);
extern const char *getFile(
const char *str,
char *buff
);
extern int getPrefix(
const char *str,
char *buff
);
extern int getSuffix(
const char *str,
char *buff
);
extern int isInteger(
const char *str
);
extern int hasSuffix(
const char *string,
const char *suffix
);
extern int hasPrefix(
const char *string,
const char *prefix
);
extern int inString(
char c,
const char *str
);
extern int nullString(
const char *str
);
extern const char *stripSuffix( // Return NULL if unsuccessful.
const char *string, // The string to truncate.
const char *suffix, // The suffix to remove.
char *buff = NULL // Defaults to internal buffer.
);
extern const char* toString(
int n, // An integer to convert to a string.
char *buff = NULL // Defauts to internal buffer.
);
extern const char* toString(
float x, // A float to convert to a string.
char *buff = NULL // Defauts to internal buffer.
);
extern int getIndex( // The index of the start of a pattern in a string.
const char *pat, // The pattern to look for.
const char *str // The string to search.
);
extern int getSubstringAfter(
const char *pat,
const char *str,
char *buff
);
extern const char *SubstringAfter(
const char *pat,
const char *str,
char *buff = NULL // Defauts to internal buffer.
);
extern const char *metaString(
const char *str, // Make this a string within a string.
char *buff = NULL // Defauts to internal buffer.
);
extern const char *stripQuotes(
const char *str, // This is the opposite of metaString.
char *buff = NULL // Defauts to internal buffer.
);
extern int getIntFlag(
const char *flags, // List of assignment statements.
const char *flag, // A specific flag to look for.
int &value // The variable to assign the value to.
);
extern int getFloatFlag(
const char *flags, // List of assignment statements.
const char *flag, // A specific flag to look for.
float &value // The variable to assign the value to.
);
extern int getstring(
std::istream &in,
const char *str
);
enum sort_type {
sort_alphabetic, // Standard dictionary ordering.
sort_lexicographic // Sort first by length, then alphabetically.
};
class SortedList {
public:
SortedList( sort_type = sort_alphabetic, int ascending = 1 );
~SortedList();
SortedList &operator<<( const char * );
int Size() const { return num_elements; }
const char *operator()( int i );
void Clear();
void SetOrder( sort_type = sort_alphabetic, int ascending = 1 );
private:
void Sort();
void InsertionSort( int start, int size, int step );
void Swap( int i, int j );
void Expand();
int inOrder( int i, int j ) const;
int num_elements;
int max_elements;
int sorted;
int ascend;
sort_type type;
char **list;
};
inline int Match( const char *s, const char *t )
{
return s != NULL &&
(t != NULL && strcmp( s, t ) == 0);
}
inline int Match( const char *s, const char *t1, const char *t2 )
{
return s != NULL && (
(t1 != NULL && strcmp( s, t1 ) == 0) ||
(t2 != NULL && strcmp( s, t2 ) == 0) );
}
union long_union_float {
long i;
float f;
};
inline long float_as_long( float x )
{
long_union_float u;
u.f = x;
return u.i;
}
inline float long_as_float( long i )
{
long_union_float u;
u.i = i;
return u.f;
}
extern std::istream &skipWhite( std::istream &in );
};
#endif

View File

@ -1,76 +0,0 @@
/***************************************************************************
* Complex.C *
* *
* Complex numbers, complex arithmetic, and functions of a complex *
* variable. *
* *
* HISTORY *
* Name Date Description *
* *
* arvo 03/02/2000 Initial coding. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 2000, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#include "Complex.h"
#include "form.h"
namespace ArvoMath {
const Complex Complex::i( 0.0, 1.0 );
std::ostream &operator<<( std::ostream &out, const Complex &z )
{
out << form( "(%f,%f) ", z.Real(), z.Imag() );
return out;
}
Complex cos( const Complex &z )
{
return Complex(
::cos( z.Real() ) * ::cosh( z.Imag() ),
-::sin( z.Real() ) * ::sinh( z.Imag() )
);
}
Complex sin( const Complex &z )
{
return Complex(
::sin( z.Real() ) * ::cosh( z.Imag() ),
::cos( z.Real() ) * ::sinh( z.Imag() )
);
}
Complex cosh( const Complex &z )
{
return Complex(
::cosh( z.Real() ) * ::cos( z.Imag() ),
::sinh( z.Real() ) * ::sin( z.Imag() )
);
}
Complex sinh( const Complex &z )
{
return Complex(
::sinh( z.Real() ) * ::cos( z.Imag() ),
::cosh( z.Real() ) * ::sin( z.Imag() )
);
}
Complex log( const Complex &z )
{
float r = ::sqrt( z.Real() * z.Real() + z.Imag() * z.Imag() );
float t = ::acos( z.Real() / r );
if( z.Imag() < 0.0 ) t = 2.0 * 3.1415926 - t;
return Complex( ::log(r), t );
}
};

View File

@ -1,187 +0,0 @@
/***************************************************************************
* Complex.h *
* *
* Complex numbers, complex arithmetic, and functions of a complex *
* variable. *
* *
* HISTORY *
* Name Date Description *
* *
* arvo 03/02/2000 Initial coding. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 2000, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#ifndef __COMPLEX_INCLUDED__
#define __COMPLEX_INCLUDED__
#include <math.h>
#include <iostream>
namespace ArvoMath {
class Complex {
public:
Complex() { x = 0; y = 0; }
Complex( float a ) { x = a; y = 0; }
Complex( float a, float b ) { x = a; y = b; }
Complex( const Complex &z ) { *this = z; }
float &Real() { return x; }
float &Imag() { return y; }
float Real() const { return x; }
float Imag() const { return y; }
inline Complex &operator=( const Complex &z );
static const Complex i;
private:
float x;
float y;
};
inline Complex &Complex::operator=( const Complex &z )
{
x = z.Real();
y = z.Imag();
return *this;
}
inline float Real( const Complex &z )
{
return z.Real();
}
inline float Imag( const Complex &z )
{
return z.Imag();
}
inline Complex conj( const Complex &z )
{
return Complex( z.Real(), -z.Imag() );
}
inline double modsqr( const Complex &z )
{
return z.Real() * z.Real() + z.Imag() * z.Imag();
}
inline double modulus( const Complex &z )
{
return sqrt( z.Real() * z.Real() + z.Imag() * z.Imag() );
}
inline double arg( const Complex &z )
{
float t = acos( z.Real() / modulus(z) );
if( z.Imag() < 0.0 ) t = 2.0 * 3.1415926 - t;
return t;
}
inline Complex operator*( const Complex &z, float a )
{
return Complex( a * z.Real(), a * z.Imag() );
}
inline Complex operator*( float a, const Complex &z )
{
return Complex( a * z.Real(), a * z.Imag() );
}
inline Complex operator*( const Complex &z, const Complex &w )
{
return Complex(
z.Real() * w.Real() - z.Imag() * w.Imag(),
z.Real() * w.Imag() + z.Imag() * w.Real()
);
}
inline Complex operator+( const Complex &z, const Complex &w )
{
return Complex( z.Real() + w.Real(), z.Imag() + w.Imag() );
}
inline Complex operator-( const Complex &z, const Complex &w )
{
return Complex( z.Real() - w.Real(), z.Imag() - w.Imag() );
}
inline Complex operator-( const Complex &z )
{
return Complex( -z.Real(), -z.Imag() );
}
inline Complex operator/( const Complex &z, float w )
{
return Complex( z.Real() / w, z.Imag() / w );
}
inline Complex operator/( const Complex &z, const Complex &w )
{
return ( z * conj(w) ) / modsqr(w);
}
inline Complex operator/( float a, const Complex &w )
{
return conj(w) * ( a / modsqr(w) );
}
inline Complex &operator+=( Complex &z, const Complex &w )
{
z.Real() += w.Real();
z.Imag() += w.Imag();
return z;
}
inline Complex &operator*=( Complex &z, const Complex &w )
{
return z = ( z * w );
}
inline Complex &operator-=( Complex &z, const Complex &w )
{
z.Real() -= w.Real();
z.Imag() -= w.Imag();
return z;
}
inline Complex exp( const Complex &z )
{
float r = ::exp( z.Real() );
return Complex( r * cos( z.Imag() ), r * sin( z.Imag() ) );
}
inline Complex pow( const Complex &z, int n )
{
float r = ::pow( modulus( z ), (double)n );
float t = arg( z );
return Complex( r * cos( n * t ), r * sin( n * t ) );
}
inline Complex polar( float r, float theta )
{
return Complex( r * cos( theta ), r * sin( theta ) );
}
extern Complex cos ( const Complex &z );
extern Complex sin ( const Complex &z );
extern Complex cosh( const Complex &z );
extern Complex sinh( const Complex &z );
extern Complex log ( const Complex &z );
extern std::ostream &operator<<(
std::ostream &out,
const Complex &
);
};
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,142 +0,0 @@
/***************************************************************************
* Matrix.h *
* *
* General Vector and Matrix classes, with all the associated methods. *
* *
* HISTORY *
* Name Date Description *
* *
* arvo 08/16/2000 Revamped for CIT tools. *
* arvo 10/31/1994 Combined RowVec & ColVec into Vector. *
* arvo 06/30/1993 Added singular value decomposition class. *
* arvo 06/25/1993 Major revisions. *
* arvo 09/08/1991 Initial implementation. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 2000, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#ifndef __MATRIX_INCLUDED__
#define __MATRIX_INCLUDED__
#include <iostream>
#include "Vector.h"
namespace ArvoMath {
class Matrix {
public:
Matrix( const Matrix & );
Matrix( int num_rows = 0, int num_cols = 0, float value = 0.0 );
~Matrix();
Matrix &operator=( const Matrix &M );
Matrix &operator=( float s );
Vector GetCol( int col ) const;
Vector GetRow( int row ) const;
void SetCol( int col, const Vector & );
void SetRow( int row, const Vector & );
Matrix GetBlock( int imin, int imax, int jmin, int jmax ) const;
void SetBlock( int imin, int imax, int jmin, int jmax, const Matrix & );
void SetBlock( int imin, int imax, int jmin, int jmax, const Vector & );
Matrix &SwapRows( int i1, int i2 );
Matrix &SwapCols( int j1, int j2 );
void SetSize( int rows, int cols = 0 );
double Cofactor( int i, int j ) const;
static const Matrix Null;
public: // Inlined functions.
inline float operator()( int i, int j ) const { return elem[ i * cols + j ]; }
inline float &operator()( int i, int j ) { return elem[ i * cols + j ]; }
inline int Rows () const { return rows; }
inline int Cols () const { return cols; }
inline float *Array () const { return elem; }
private:
int rows; // Number of rows in the matrix.
int cols; // Number of columns in the matrix.
float *elem; // Pointer to the actual data.
};
extern Vector operator * ( const Matrix &, const Vector & );
extern Vector operator * ( const Vector &, const Matrix & );
extern Vector& operator *= ( Vector &, const Matrix & );
extern Matrix Outer ( const Vector &, const Vector & ); // Outer product.
extern Matrix operator + ( const Matrix &, const Matrix & );
extern Matrix operator - ( const Matrix & );
extern Matrix operator - ( const Matrix &, const Matrix & );
extern Matrix operator * ( const Matrix &, const Matrix & );
extern Matrix operator * ( const Matrix &, float );
extern Matrix operator * ( float , const Matrix & );
extern Matrix operator / ( const Matrix &, float );
extern Matrix& operator += ( Matrix &, const Matrix & );
extern Matrix& operator *= ( Matrix &, float );
extern Matrix& operator *= ( Matrix &, const Matrix & );
extern Matrix& operator /= ( Matrix &, float );
extern Matrix Ident ( int n );
extern Matrix Householder ( const Vector & );
extern Matrix Rotation ( const Vector &Axis, float angle );
extern Matrix Rotation ( const Vector &Axis, const Vector &Origin, float angle );
extern Matrix Rotation ( float, float, float ); // For random 3D rotations.
extern Matrix Xrotation ( float );
extern Matrix Yrotation ( float );
extern Matrix Zrotation ( float );
extern Matrix Diag ( const Vector & );
extern Vector Diag ( const Matrix & );
extern Matrix Diag ( float, float, float );
extern Matrix Adjoint ( const Matrix & );
extern Matrix Adjoint ( const Matrix &, double &det );
extern Matrix AATransp ( const Matrix & );
extern Matrix ATranspA ( const Matrix & );
extern double OneNorm ( const Matrix & );
extern double SupNorm ( const Matrix & );
extern double Determinant ( const Matrix & );
extern double Trace ( const Matrix & );
extern Matrix Transp ( const Matrix & );
extern Matrix Inverse ( const Matrix & );
extern int Null ( const Matrix & );
extern int Square ( const Matrix & );
extern Vector ToVector ( const Matrix & ); // Only for vector-shaped matrices.
enum pivot_type {
pivot_off,
pivot_partial,
pivot_total
};
extern int GaussElimination(
const Matrix &A,
const Vector &b, // This is the right-hand side.
Vector &x, // This is the matrix we are solving for.
pivot_type = pivot_off
);
extern int LeastSquares(
const Matrix &A,
const Vector &b,
Vector &x
);
extern int WeightedLeastSquares(
const Matrix &A,
const Vector &b,
const Vector &w,
Vector &x
);
std::ostream &operator<<(
std::ostream &out,
const Matrix &
);
};
#endif

View File

@ -1,503 +0,0 @@
/***************************************************************************
* Perm.C *
* *
* This file defines permutation class: that is, a class for creating and *
* manipulating finite sequences of distinct integers. The main feature *
* of the class is the "++" operator that can be used to step through all *
* N! permutations of a sequence of N integers. As the set of permutations *
* forms a multiplicative group, a multiplication operator and an *
* exponentiation operator are also defined. *
* *
* HISTORY *
* Name Date Description *
* *
* arvo 07/01/93 Added the Partition class. *
* arvo 03/23/93 Initial coding. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 1999, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#include <stdio.h>
#include <string.h>
#include "Perm.h"
#include "ArvoMath.h"
#include "Char.h"
namespace ArvoMath {
/***************************************************************************
*
* L O C A L F U N C T I O N S
*
***************************************************************************/
static void Reverse( int *p, int n )
{
int k = n >> 1;
int m = n - 1;
for( int i = 0; i < k; i++ ) Swap( p[i], p[m-i] );
}
static void Error( char *msg )
{
fprintf( stderr, "ERROR: Perm, %s.\n", msg );
}
/***************************************************************************
**
** M E M B E R F U N C T I O N S
**
***************************************************************************/
Perm::Perm( int Left, int Right )
{
a = ( Left < Right ) ? Left : Right;
b = ( Left > Right ) ? Left : Right;
p = new int[ Size() ];
Reset( *this );
}
Perm::Perm( const Perm &Q )
{
a = Q.Min();
b = Q.Max();
p = new int[ Q.Size() ];
for( int i = 0; i < Size(); i++ ) p[i] = Q[i];
}
Perm::Perm( const char *str )
{
(*this) = str;
}
Perm &Perm::operator=( const char *str )
{
int k, m = 0, n = 0;
char dig[10];
char c;
if( p != NULL ) delete[] p;
p = new int[ strlen(str)/2 + 1 ];
for(;;)
{
c = *str++;
if( isDigit(c) ) dig[m++] = c;
else if( m > 0 )
{
dig[m] = NullChar;
sscanf( dig, "%d", &k );
if( n == 0 ) a = k; else if( k < a ) a = k;
if( n == 0 ) b = k; else if( k > b ) b = k;
p[n++] = k;
m = 0;
}
if( c == NullChar ) break;
}
for( int i = 0; i < n; i++ )
{
int N = i + a;
int okay = 0;
for( int j = 0; j < n; j++ )
if( p[j] == N ) { okay = 1; break; }
if( !okay )
{
Error( "string is not a valid permutation" );
return *this;
}
}
return *this;
}
void Perm::Get( char *str ) const
{
for( int i = 0; i < Size(); i++ )
str += sprintf( str, "%d ", p[i] );
*str = NullChar;
}
int Perm::Next()
{
int i, m, k = 0;
int N, M = 0;
// Look for the first element of p that is larger than its successor.
// If no such element exists, we are done.
M = p[0]; // M is always the "previous" value.
for( i = 1; i < Size(); i++ ) // Now start with second element.
{
if( p[i] > M ) { k = i; break; }
M = p[i];
}
if( k == 0 ) return 0; // Already in descending order.
m = k - 1;
// Find the largest entry before k that is less than p[k].
// One exists because p[k] is bigger than M, i.e. p[k-1].
N = p[k];
for( i = 0; i < k - 1; i++ )
{
if( p[i] < N && p[i] > M ) { M = p[i]; m = i; }
}
Swap( p[m], p[k] ); // Entries 0..k-1 are still decreasing.
Reverse( p, k ); // Make first k elements increasing.
return 1;
}
int Perm::Prev()
{
int i, m, k = 0;
int N, M = 0;
// Look for the first element of p that is less than its successor.
// If no such element exists, we are done.
M = p[0]; // M will always be the "previous" value.
for( i = 1; i < Size(); i++ ) // Start with the second element.
{
if( p[i] < M ) { k = i; break; }
M = p[i];
}
if( k == 0 ) return 0; // Already in ascending order.
m = k - 1;
// Find the smallest entry before k that is greater than p[k].
// One exists because p[k] is less than M, i.e. p[k-1].
N = p[k];
for( i = 0; i < k - 1; i++ )
{
if( p[i] > N && p[i] < M ) { M = p[i]; m = i; }
}
Swap( p[m], p[k] ); // Entries 0..k-1 are still increasing.
Reverse( p, k ); // Make first k elements decreasing.
return 1;
}
/***************************************************************************
**
** O P E R A T O R S
**
***************************************************************************/
int Perm::operator++()
{
return Next();
}
int Perm::operator--()
{
return Prev();
}
Perm &Perm::operator+=( int n )
{
int i;
if( n > 0 ) for( i = 0; i < n; i++ ) if( !Next() ) break;
if( n < 0 ) for( i = n; i < 0; i++ ) if( !Prev() ) break;
return *this;
}
Perm &Perm::operator-=( int n )
{
int i;
if( n > 0 ) for( i = 0; i < n; i++ ) if( !Prev() ) break;
if( n < 0 ) for( i = n; i < 0; i++ ) if( !Next() ) break;
return *this;
}
int Perm::operator[]( int n ) const
{
if( n < 0 || Size() <= n )
{
Error( "permutation index[] out of range" );
return 0;
}
return p[ n ];
}
int Perm::operator()( int n ) const
{
if( n < Min() || Max() < n )
{
Error( "permutation index() out of range" );
return 0;
}
return p[ n - Min() ];
}
Perm &Perm::operator=( const Perm &Q )
{
if( Size() != Q.Size() )
{
delete[] p;
p = new int[ Q.Size() ];
}
a = Q.Min();
b = Q.Max();
for( int i = 0; i < Size(); i++ ) p[i] = Q[i];
return *this;
}
Perm Perm::operator*( const Perm &Q ) const
{
if( Min() != Q.Min() ) return Perm(0);
if( Max() != Q.Max() ) return Perm(0);
Perm A( Min(), Max() );
for( int i = 0; i < Size(); i++ ) A.Elem(i) = p[ Q[i] - Min() ];
return A;
}
Perm Perm::operator^( int n ) const
{
Perm A( Min(), Max() );
int pn = n;
if( n < 0 ) // First compute the inverse.
{
for( int i = 0; i < Size(); i++ )
A.Elem( p[i] - Min() ) = i + Min();
pn = -n;
}
for( int i = 0; i < Size(); i++ )
{
int k = ( n < 0 ) ? A[i] : p[i];
for( int j = 1; j < pn; j++ ) k = p[ k - Min() ];
A.Elem(i) = k;
}
return A;
}
Perm &Perm::operator()( int i, int j )
{
Swap( p[ i - Min() ], p[ j - Min() ] );
return *this;
}
int Perm::operator==( const Perm &Q ) const
{
int i;
if( Min() != Q.Min() ) return 0;
if( Max() != Q.Max() ) return 0;
for( i = 0; i < Size(); i++ ) if( p[i] != Q[i] ) return 0;
return 1;
}
int Perm::operator<=( const Perm &Q ) const
{
int i;
if( Min() != Q.Min() ) return 0;
if( Max() != Q.Max() ) return 0;
for( i = 0; i < Size(); i++ ) if( p[i] != Q[i] ) return p[i] < Q[i];
return 1;
}
void Reset( Perm &P )
{
for( int i = 0; i < P.Size(); i++ ) P.Elem(i) = P.Min() + i;
}
int End( const Perm &P )
{
int c = P[0];
for( int i = 1; i < P.Size(); i++ )
{
if( c < P[i] ) return 0;
c = P[i];
}
return 1;
}
void Print( const Perm &P )
{
if( P.Size() > 0 )
{
printf( "%d", P[0] );
for( int i = 1; i < P.Size(); i++ ) printf( " %d", P[i] );
printf( "\n" );
}
}
int Even( const Perm &P )
{
return !Odd( P );
}
int Odd( const Perm &P )
{
int count = 0;
Perm Q( P );
for( int i = P.Min(); i < P.Max(); i++ )
{
if( Q(i) == i ) continue;
for( int j = P.Min(); j <= P.Max(); j++ )
{
if( j == i ) continue;
if( Q(j) == i )
{
Q(i,j);
count = ( j - i ) + ( count % 2 );
}
}
}
return count % 2;
}
/***************************************************************************
**
** P A R T I T I O N S
**
***************************************************************************/
Partition::Partition( )
{
Bin = NULL;
bins = 0;
balls = 0;
}
Partition::Partition( const Partition &Q )
{
Bin = new int[ Q.Bins() ];
bins = Q.Bins();
balls = Q.Balls();
for( int i = 0; i < bins; i++ ) Bin[i] = Q[i];
}
Partition::Partition( int bins_, int balls_ )
{
bins = bins_;
balls = balls_;
Bin = new int[ bins ];
Reset( *this );
}
void Partition::operator+=( int bin ) // Add a ball to this bin.
{
if( bin < 0 || bin >= bins ) fprintf( stderr, "ERROR -- bin number out of range.\n" );
balls++;
Bin[ bin ]++;
}
int Partition::operator==( const Partition &P ) const // Compare two partitions.
{
if( Balls() != P.Balls() ) return 0;
if( Bins () != P.Bins () ) return 0;
for( int i = 0; i < bins; i++ )
{
if( Bin[i] != P[i] ) return 0;
}
return 1;
}
void Partition::operator=( int n ) // Set to the n'th configuration.
{
Reset( *this );
for( int i = 0; i < n; i++ ) ++(*this);
}
int Partition::operator!=( const Partition &P ) const
{
return !( *this == P );
}
void Partition::operator=( const Partition &Q )
{
if( bins != Q.Bins() )
{
delete[] Bin;
Bin = new int[ Q.Bins() ];
}
bins = Q.Bins();
balls = Q.Balls();
for( int i = 0; i < bins; i++ ) Bin[i] = Q[i];
}
void Partition::Get( char *str ) const
{
for( int i = 0; i < bins; i++ )
str += sprintf( str, "%d ", Bin[i] );
*str = NullChar;
}
int Partition::operator[]( int i ) const
{
if( i < 0 || i >= bins ) return 0;
else return Bin[i];
}
long Partition::NumCombinations() const // How many distinct configurations.
{
// Think of the k "bins" as being k - 1 "partitions" mixed in with
// the n "balls". If the balls and partitions were each distinguishable
// objects, there would be (n + k - 1)! distinct configurations.
// But since both the balls and the partitions are indistinguishable,
// we simply divide by n! (k - 1)!. This is the binomial coefficient
// ( n + k - 1, n ).
//
if( balls == 0 ) return 0;
if( bins == 1 ) return 1;
return (long)floor( BinomialCoeff( balls + bins - 1, balls ) + 0.5 );
}
/***************************************************************************
* O P E R A T O R + + (Next Partition) *
* *
* Rearranges the n "balls" in k "bins" into the next configuration. *
* The first config is assumed to be all balls in the first bin -- i.e. *
* Bin[0]. All possible groupings are generated, each exactly once. The *
* function returns 1 if successful, 0 if the last config has already been *
* reached. (Algorithm by Harold Zatz) *
* *
***************************************************************************/
int Partition::operator++()
{
int i;
if( Bin[0] > 0 )
{
Bin[1] += 1;
Bin[0] -= 1;
}
else
{
for( i = 1; Bin[i] == 0; i++ );
if( i == bins - 1 ) return 0;
Bin[i+1] += 1;
Bin[0] = Bin[i] - 1;
Bin[i] = 0;
}
return 1;
}
void Reset( Partition &P )
{
P.Bin[0] = P.Balls();
for( int i = 1; i < P.Bins(); i++ ) P.Bin[i] = 0;
}
int End( const Partition &P )
{
return P[ P.Bins() - 1 ] == P.Balls();
}
void Print( const Partition &P )
{
if( P.Bins() > 0 )
{
printf( "%d", P[0] );
for( int i = 1; i < P.Bins(); i++ ) printf( " %d", P[i] );
printf( "\n" );
}
}
};

View File

@ -1,111 +0,0 @@
/***************************************************************************
* Perm.h *
* *
* This file defines permutation class: that is, a class for creating and *
* manipulating finite sequences of distinct integers. The main feature *
* of the class is the "++" operator that can be used to step through all *
* N! permutations of a sequence of N integers. As the set of permutations *
* forms a multiplicative group, a multiplication operator and an *
* exponentiation operator are also defined. *
* *
* HISTORY *
* Name Date Description *
* *
* arvo 07/01/93 Added the Partition class. *
* arvo 03/23/93 Initial coding. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 1999, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#ifndef __PERM_INCLUDED__
#define __PERM_INCLUDED__
namespace ArvoMath {
class Perm {
public:
Perm( const Perm & ); // Initialize from a permutation.
Perm( int a = 0, int b = 0 ); // Create permutation of ints a...b.
Perm( const char * ); // Create from string of numbers.
~Perm() { delete p; } // Destructor.
void Get( char * ) const; // Gets a string representation.
int Size() const { return b - a + 1;} // The number of elements.
int Min () const { return a; } // The smallest value.
int Max () const { return b; } // The largest value.
int operator++(); // Make "next" permutation.
int operator--(); // Make "previous" permutation.
Perm &operator+=( int n ); // Advances by n permutations.
Perm &operator-=( int n ); // Decrement by n permutations.
Perm &operator =( const char * ) ; // Resets from string of numbers.
Perm &operator =( const Perm & ) ; // Copy from another permutation.
Perm &operator()( int i, int j ) ; // Swap entries i and j.
int operator()( int n ) const; // Index from Min() to Max().
int operator[]( int n ) const; // Index from 0 to Size() - 1.
Perm operator ^( int n ) const; // Exponentiation: -1 means inverse.
Perm operator *( const Perm & ) const; // Multiplication means composition.
int operator==( const Perm & ) const; // True if all elements match.
int operator<=( const Perm & ) const; // Lexicographic order relation.
private:
int& Elem( int i ) { return p[i]; }
int Next();
int Prev();
int a, b;
int *p;
friend void Reset( Perm & );
};
// A "Partition" is a collection of k indistinguishable "balls" in n "bins".
// The Partition class encapsulates this notion and provides a convenient means
// of generating all possible partitions of k objects among n bins exactly once.
// Starting with all objects in bin zero, the ++ operator creates new and distinct
// distributions among the bins until all objects are in the last bin.
class Partition {
public:
Partition( ); // Creates a null partition.
Partition( const Partition & ); // Initialize from another partition.
Partition( int bins, int balls ); // Specify # of bins & balls.
~Partition() { delete Bin; } // Descructor.
void Get( char * ) const; // Gets a string representation.
int Bins () const { return bins; } // The number of bins.
int Balls() const { return balls; } // The number of balls.
void operator+=( int bin ); // Add a ball to this bin.
void operator =( int n ); // Set to the n'th configuration.
void operator =( const Partition& ); // Copy from another partition.
int operator==( const Partition& ) const; // Compare two partitions.
int operator!=( const Partition& ) const; // Compare two partitions.
int operator++(); // Make "next" partition.
int operator[]( int i ) const; // Return # of balls in bin i.
long NumCombinations() const; // Number of distinct configurations.
private:
int bins;
int balls;
int* Bin;
friend void Reset( Partition & );
};
// Predicates for determining when a permutation or partition is the last of
// the sequence, functions for printing, resetting, and miscellaneous operations.
extern int End ( const Partition & ); // True if all balls in last bin.
extern int End ( const Perm & ); // True if descending.
extern int Even ( const Perm & ); // True if even # of 2-cycles.
extern int Odd ( const Perm & ); // True if odd # of 2-cycles.
extern void Print( const Partition & ); // Write to standard out.
extern void Print( const Perm & ); // Write to standard out.
extern void Reset( Partition & ); // Reset to all balls in bin 0.
extern void Reset( Perm & ); // Reset to ascending order.
};
#endif

View File

@ -1,230 +0,0 @@
/***************************************************************************
* Rand.C (Random Number Generators) *
* *
* Source file for pseudo-random number utilities. Rand is the *
* base class for several different algorithms for generating pseudo-random *
* numbers. Any method can generate individual samples or arrays of *
* samples using "Eval". The random seed can be reset at any time by *
* calling "Seed" with any integer. Random permutations of the integers *
* 0,1,...(n-1) are generated by "Perm(n,P)". *
* *
* HISTORY *
* Name Date Description *
* *
* arvo 08/04/97 Changed to virtual functions. *
* arvo 06/06/93 Optimization, especially for array evaluators. *
* arvo 10/06/91 Converted to C++ *
* arvo 11/20/89 Added "gen_seed" function to handle. *
* arvo 10/30/89 "state" allocation now done in rand_alloc. *
* arvo 07/08/89 Initial coding. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 1989, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#include <stdio.h>
#include <math.h>
#include "Rand.h"
namespace ArvoMath {
#ifndef ABS
#define ABS( x ) ((x) > 0 ? (x) : -(x))
#endif
/*-------------------------------------------------------------------------*
* M E T H O D 1 *
* *
* From "Numerical Recipes," by William H. Press, Brian P. Flannery, *
* Saul A. Teukolsky, and William T. Vetterling, p. 197. *
* *
*-------------------------------------------------------------------------*/
static const long M1 = 714025;
static const long IA = 1366;
static const long IC = 150889;
static const double RM = 1.400512E-6;
float RandGen_1::Eval()
{
register long *elem;
register long offset;
register float rand;
offset = 1 + ( 97 * index ) / M1;
if( offset > 97 ) offset = 97;
if( offset < 1 ) offset = 1;
elem = shuffle + offset;
rand = ( index = *elem ) * RM;
*elem = ( seed = ( IA * seed + IC ) % M1 );
return rand;
}
void RandGen_1::Eval( int n, float *array )
{
register long *shfl = shuffle;
register long *elem;
register long offset;
for( int i = 0; i < n; i++ )
{
offset = 1 + ( 97 * index ) / M1;
if( offset > 97 ) offset = 97;
if( offset < 1 ) offset = 1;
elem = shfl + offset;
*array++ = ( index = *elem ) * RM;
*elem = ( seed = ( IA * seed + IC ) % M1 );
}
}
void RandGen_1::Seed( long seed )
{
long t = ( IC + ABS( seed ) + 1 ) % M1;
for( register int k = 1; k <= 97; k++ )
{
t = ( IA * t + IC ) % M1;
shuffle[k] = ABS( t );
}
t = ( IA * t + IC ) % M1;
seed = ABS( t );
index = ABS( t );
}
/*-------------------------------------------------------------------------*
* M E T H O D 2 *
* *
* From "The Multiple Prime Random Number Generator," by Alexander Haas, *
* ACM Transactions on Mathematical Software, Vol. 13, No. 4, December *
* 1987, pp. 368-381. *
* *
*-------------------------------------------------------------------------*/
float RandGen_2::Eval()
{
if( (m += 7 ) >= 9973 ) m -= 9871;
if( (i += 1907 ) >= 99991 ) i -= 89989;
if( (j += 73939) >= 224729 ) j -= 96233;
r = ((r * m + i + j) % 100000) / 10;
return r * 1.00010001E-4;
}
void RandGen_2::Eval( int n, float *array )
{
for( register int k = 0; k < n; k++ )
{
if( (m += 7 ) >= 9973 ) m -= 9871;
if( (i += 1907 ) >= 99991 ) i -= 89989;
if( (j += 73939) >= 224729 ) j -= 96233;
r = ((r * m + i + j) % 100000) / 10;
*array++ = r * 1.00010001E-4;
}
}
void RandGen_2::Seed( long seed )
{
r = ABS( seed );
m = ABS( seed * 7 );
i = ABS( seed * 11 );
j = ABS( seed * 13 );
if( m < 100 ) m += 100;
if( i < 10000 ) i += 10000;
if( j < 128000 ) j += 128000;
}
/*-------------------------------------------------------------------------*
* M E T H O D 3 *
* *
* From "A More Portable Fortran Random Number Generator," by Linus *
* Schrage, ACM Transactions on Mathematical Software, Vol. 5, No, 2, *
* June 1979, pp. 132-138. *
* *
*-------------------------------------------------------------------------*/
static const long A3 = 16807;
static const long P3 = 2147483647;
float RandGen_3::Eval()
{
long xhi = ix >> 16;
long xalo = ( ix & 0xFFFF ) * A3;
long leftlo = xalo >> 16;
long fhi = xhi * A3 + leftlo;
long k = fhi >> 15;
ix = ( ((xalo - (leftlo << 16)) - P3) +
((fhi - (k << 15)) << 16) ) + k;
if( ix < 0 ) ix += P3;
return ix * 4.656612875E-10;
}
void RandGen_3::Eval( int n, float *array )
{
register long xhi, xalo, leftlo;
register long fhi, k;
for( register int i = 0; i < n; i++ )
{
xhi = ix >> 16;
xalo = ( ix & 0xFFFF ) * A3;
leftlo = xalo >> 16;
fhi = xhi * A3 + leftlo;
k = fhi >> 15;
ix = ( ((xalo - (leftlo << 16)) - P3) +
((fhi - (k << 15)) << 16) ) + k;
if( ix < 0 ) ix += P3;
*array++ = ix * 4.656612875E-10;
}
}
void RandGen_3::Seed( long seed )
{
ix = ABS( seed );
}
/*-------------------------------------------------------------------------*
* R A N D : : P E R M (Permutation) *
* *
* This routine fills an integer array of length "len" with a random *
* permutation of the integers 0, 1, 2, ... (len-1). *
* *
* For efficiency, the random numbers are generated in batches of up to *
* "Nmax" at a time. The constant Nmax can be set to any value >= 1. *
* *
*-------------------------------------------------------------------------*/
static const int Nmax = 20;
void RandGen::Perm( int len, int perm[] )
{
float R[ Nmax ]; // A buffer for getting random numbers.
int L = len - 1; // Total number of random numbers needed.
int N = 0; // How many to generate when we call Eval.
int n = 0; // The array index into R.
// First initialize the array "perm" to the identity permutation.
for( int j = 0; j < len; j++ ) perm[j] = j;
// Now swap a random element in the front with the i'th element.
// When i gets down to 0, we're done.
for( int i = len - 1; i > 0; i-- ) // Element i is a swap candidate.
{
if( n == N ) // Generate more random numbers.
{
N = ( L < Nmax ) ? L : Nmax; // Can't get more than "Nmax".
Eval( N, R ); // Generate N random numbers.
L -= N; // Decrement total counter.
n = 0; // Start index at beginning of R.
}
float r = ( i + 1 ) * R[ n++ ]; // Pick a float in [0,i+1].
int k = (int)r; // Truncate r to an integer.
if( k < i ) // Disregard k == i and k == i+1.
{
int tmp = perm[i]; // Swap elements i and k.
perm[i] = perm[k];
perm[k] = tmp;
}
}
}
};

View File

@ -1,114 +0,0 @@
/***************************************************************************
* Rand.h (Random Number Generators) *
* *
* Header file for Rand.C, pseudo-random number utilities. Rand is the *
* base class for several different algorithms for generating pseudo-random *
* numbers. Any method can generate individual samples or arrays of *
* samples using "Eval". The random seed can be reset at any time by *
* calling "Seed" with any integer. Random permutations of the integers *
* 0,1,...(n-1) are generated by "Perm(n,P)". *
* *
* HISTORY *
* Name Date Description *
* *
* arvo 08/04/97 Changed to virtual functions. *
* arvo 06/06/93 Optimization, especially for array evaluators. *
* arvo 10/06/91 Converted to C++ *
* arvo 11/20/89 Added "gen_seed" function to handle. *
* arvo 10/30/89 "state" allocation now done in rand_alloc. *
* arvo 07/08/89 Initial coding. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 1989, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#ifndef __RAND_INCLUDED__
#define __RAND_INCLUDED__
namespace ArvoMath {
// Base class for random number generators. This class contains
// several pure virtual functions, so it cannot be instanced directly.
class RandGen {
public:
RandGen() {}
virtual float Eval( ) = 0;
virtual void Eval( int n, float x[] ) = 0;
virtual void Seed( long seed ) = 0;
public:
void Perm( int n, int P[] );
float Interval( float a, float b );
void Eval( float &x ) { x = Eval(); }
};
// Method 1: From "Numerical Recipes," by William H. Press, Brian P.
// Flannery, Saul A. Teukolsky, and William T. Vetterling, p. 197.
class RandGen_1 : public RandGen {
public:
RandGen_1( ) { Seed( 1 ); }
RandGen_1( long seed ) { Seed( seed ); }
virtual float Eval( );
virtual void Eval( int n, float x[] );
virtual void Seed( long seed );
private:
long index;
long seed;
long shuffle[ 98 ];
};
// Method 2: From "The Multiple Prime Random Number Generator," by
// Alexander Haas, ACM Transactions on Mathematical Software,
// Vol. 13, No. 4, December 1987, pp. 368-381. *
class RandGen_2 : public RandGen {
public:
RandGen_2( ) { Seed( 1 ); }
RandGen_2( long seed ) { Seed( seed ); }
virtual float Eval( );
virtual void Eval( int n, float x[] );
virtual void Seed( long seed );
private:
long r;
long m;
long i;
long j;
};
// Method 3: From "A More Portable Fortran Random Number Generator,"
// by Linus Schrage, ACM Transactions on Mathematical Software,
// Vol. 5, No, 2, June 1979, pp. 132-138. *
class RandGen_3 : public RandGen {
public:
RandGen_3( ) { Seed( 1 ); }
RandGen_3( long seed ) { Seed( seed ); }
virtual float Eval( );
virtual void Eval( int n, float x[] );
virtual void Seed( long seed );
private:
long ix;
};
inline float RandGen::Interval( float a, float b )
{
return ( a < b ) ?
a + Eval() * ( b - a ) :
b + Eval() * ( a - b ) ;
}
};
#endif

View File

@ -1,232 +0,0 @@
/*****************************************************************************
**
** MODULE NAME SI_units.h International System of Units (SI)
**
** DESCRIPTION
** The purpose of this header file is to provide a simple and efficient
** mechanism for associating physically meaningful units with floating
** point numbers. No extra space is required, and no runtime overhead
** is introduced; all type-checking occurs at compile time.
**
**
** HISTORY
** Name Date Description
**
** arvo 02/09/92 Replaced conversion macros with inline functions.
** arvo 10/16/91 Initial implementation.
**
**
** (c) Copyright 1991, 1992
** Program of Computer Graphics, Cornell University, Ithaca, NY
** ALL RIGHTS RESERVED
**
*****************************************************************************/
#ifndef SI_UNITS_H
#define SI_UNITS_H
#include <iostream.h>
namespace ArvoMath {
const float
SI_deci = 1.0E-1,
SI_centi = 1.0E-2,
SI_milli = 1.0E-3,
SI_micro = 1.0E-6,
SI_nano = 1.0E-9,
SI_kilo = 1.0E+3,
SI_mega = 1.0E+6,
SI_giga = 1.0E+9,
SI_tera = 1.0E+12;
/*******************************************************************************
* *
* I N T E R N A T I O N A L S Y S T E M O F U N I T S *
* *
********************************************************************************
* *
* DIMENSION CLASS INITIALIZER SYMBOL BASE UNITS *
* *
* length SI_length meter m m *
* time SI_time second s s *
* mass SI_mass kilogram kg kg *
* angle SI_angle radian rad rad *
* solid angle SI_solid_angle steradian sr sr *
* temperature SI_temperature kelvin K K *
* luminous intensity SI_lum_inten candela cd cd *
* area SI_area meter2 m2 m2 *
* volume SI_volume meter3 m3 m3 *
* frequency SI_frequency hertz Hz 1/s *
* force SI_force newton N m kg/s2 *
* energy SI_energy joule J m2 kg/s2 *
* power SI_power watt W m2 kg/s3 *
* radiance SI_radiance watts_per_m2sr W/m2sr kg/(s3 sr) *
* irradiance SI_irradiance watts_per_m2 W/m2 kg/s3 *
* radiant intensity SI_rad_inten watts_per_sr W/sr m2 kg/(s3 sr) *
* luminance SI_luminance candela_per_m2 cd/m2 cd/m2 *
* illuminance SI_illuminance lux lx cd sr/m2 *
* luminous flux SI_lum_flux lumen lm cd sr *
* luminous energy SI_lum_energy talbot tb cd sr s *
* *
*******************************************************************************/
class SI_dimensionless {
public:
float Value() const { return value; }
ostream& Put( ostream &s, char *a ) { return s << value << " " << a; }
protected:
SI_dimensionless() { value = 0; }
SI_dimensionless( float x ){ value = x; }
float value;
};
/*******************************************************************************
* The following macro is used for creating new quantity classes and their *
* corresponding initializing functions and abbreviations. This macro is *
* not intended to be used outside of this file -- it is a compact means of *
* defining generic operations for each quantity (e.g. scaling & comparing). *
*******************************************************************************/
#define SI_Make( C, Initializer, Symbol ) \
struct C : SI_dimensionless { \
C ( ) : SI_dimensionless( ) {}; \
C ( float x ) : SI_dimensionless( x ) {}; \
C operator * ( float x ) { return C( value * x ); } \
C operator / ( float x ) { return C( value / x ); } \
C operator /= ( float x ) { return C( value /= x ); } \
C operator *= ( float x ) { return C( value *= x ); } \
C operator + ( C x ) { return C( value + x.Value() ); } \
C operator - ( ) { return C(-value ); } \
C operator - ( C x ) { return C( value - x.Value() ); } \
C operator += ( C x ) { return C( value += x.Value() ); } \
C operator -= ( C x ) { return C( value -= x.Value() ); } \
C operator = ( C x ) { return C( value = x.Value() ); } \
int operator > ( C x ) { return ( value > x.Value() ); } \
int operator < ( C x ) { return ( value < x.Value() ); } \
int operator >= ( C x ) { return ( value >= x.Value() ); } \
int operator <= ( C x ) { return ( value <= x.Value() ); } \
float operator / ( C x ) { return ( value / x.Value() ); } \
}; \
inline ostream& operator<<(ostream &s, C x) {return x.Put(s,Symbol);} \
inline C Initializer( float x ) { return C( x ); } \
inline C operator * ( float x, C y ) { return C( x * y.Value() ); }
/*******************************************************************************
* The following macros define permissible arithmetic operations among *
* variables with different physical meanings. This ensures that the *
* result of any such operation is ALWAYS another meaningful quantity. *
*******************************************************************************/
#define SI_Square( A, B ) \
inline B operator*( A x, A y ) { return B( x.Value() * y.Value() ); } \
inline A operator/( B x, A y ) { return A( x.Value() / y.Value() ); }
#define SI_Recip( A, B ) \
inline B operator/( float x, A y ) { return B( x / y.Value() ); } \
inline A operator/( float x, B y ) { return A( x / y.Value() ); } \
inline float operator*( A x, B y ) { return x.Value() * y.Value(); } \
inline float operator*( B x, A y ) { return x.Value() * y.Value(); }
#define SI_Times( A, B, C ) \
inline C operator*( A x, B y ) { return C( x.Value() * y.Value() ); } \
inline C operator*( B x, A y ) { return C( x.Value() * y.Value() ); } \
inline A operator/( C x, B y ) { return A( x.Value() / y.Value() ); } \
inline B operator/( C x, A y ) { return B( x.Value() / y.Value() ); }
/*******************************************************************************
* The following macros create classes for a variety of quantities. These *
* include base qunatities such as "time" and "length" as well as derived *
* quantities such as "power" and "volume". Each quantity is provided with *
* an initialization function in SI units and an abbreviation for printing. *
*******************************************************************************/
SI_Make( SI_length , meter , "m" ); // Base Units:
SI_Make( SI_mass , kilogram , "kg" );
SI_Make( SI_time , second , "s" );
SI_Make( SI_lum_inten , candela , "cd" );
SI_Make( SI_temperature , kelvin , "K" );
SI_Make( SI_angle , radian , "rad" ); // Supplementary:
SI_Make( SI_solid_angle , steradian , "sr" );
SI_Make( SI_area , meter2 , "m2" ); // Derived units:
SI_Make( SI_volume , meter3 , "m3" );
SI_Make( SI_frequency , hertz , "Hz" );
SI_Make( SI_force , newton , "N" );
SI_Make( SI_energy , joule , "J" );
SI_Make( SI_power , watt , "W" );
SI_Make( SI_radiance , watts_per_m2sr , "W/m2sr" );
SI_Make( SI_irradiance , watts_per_m2 , "W/m2" );
SI_Make( SI_rad_inten , watts_per_sr , "W/sr" );
SI_Make( SI_luminance , candela_per_m2 , "cd/m2" );
SI_Make( SI_illuminance , lux , "lx" );
SI_Make( SI_lum_flux , lumen , "lm" );
SI_Make( SI_lum_energy , talbot , "tb" );
SI_Make( SI_time2 , second2 , "s2" ); // Intermediate:
SI_Make( SI_sa_area , meter2_sr , "m2sr" );
SI_Make( SI_inv_area , inv_meter2 , "1/m2" );
SI_Make( SI_inv_solid_angle, inv_steradian , "1/sr" );
SI_Make( SI_length_temp , meters_kelvin , "m K" );
SI_Make( SI_power_area , watts_m2 , "W m2" );
SI_Make( SI_power_per_volume, watts_per_m3 , "W/m3" );
SI_Square( SI_length , SI_area );
SI_Square( SI_time , SI_time2 );
SI_Recip ( SI_time , SI_frequency );
SI_Recip ( SI_area , SI_inv_area );
SI_Recip ( SI_solid_angle , SI_inv_solid_angle );
SI_Times( SI_area , SI_length , SI_volume );
SI_Times( SI_force , SI_length , SI_energy );
SI_Times( SI_power , SI_time , SI_energy );
SI_Times( SI_lum_flux , SI_time , SI_lum_energy );
SI_Times( SI_lum_inten , SI_solid_angle , SI_lum_flux );
SI_Times( SI_radiance , SI_solid_angle , SI_irradiance );
SI_Times( SI_rad_inten , SI_solid_angle , SI_power );
SI_Times( SI_irradiance , SI_area , SI_power );
SI_Times( SI_illuminance , SI_area , SI_lum_flux );
SI_Times( SI_solid_angle , SI_area , SI_sa_area );
SI_Times( SI_radiance , SI_sa_area , SI_power );
SI_Times( SI_irradiance , SI_inv_solid_angle, SI_radiance );
SI_Times( SI_power , SI_inv_solid_angle, SI_rad_inten );
SI_Times( SI_length , SI_temperature , SI_length_temp );
SI_Times( SI_power , SI_area , SI_power_area );
/*******************************************************************************
* Following are some useful non-SI units. These units can be used in place of *
* the unit-initializers above. Thus, a variable of type SI_length, for example*
* may be initialized in "meters", "inches", or "centimeters". In all cases, *
* however, the value is converted to the underlying SI unit (e.g. meters). *
*******************************************************************************/
#define SI_Convert( SI, New, Old ) inline SI New( float x ) { return x * Old; }
SI_Convert( SI_time , minute , second( 60.0 ) );
SI_Convert( SI_time , hour , minute( 60.0 ) );
SI_Convert( SI_force , dyne , newton( 1.0E-5 ) );
SI_Convert( SI_energy , erg , joule( 1.0E-7 ) );
SI_Convert( SI_power , kilowatt , watt( SI_kilo ) );
SI_Convert( SI_mass , gram , kilogram( SI_milli ) );
SI_Convert( SI_length , inch , meter( 2.54E-2 ) );
SI_Convert( SI_length , foot , inch( 12.0 ) );
SI_Convert( SI_length , centimeter , meter( SI_centi ) );
SI_Convert( SI_length , micron , meter( SI_micro ) );
SI_Convert( SI_length , angstrom , meter( 1.0E-10 ) );
SI_Convert( SI_area , barn , meter2( 1.0E-28 ) );
SI_Convert( SI_angle , degree , radian( 0.017453 ) );
SI_Convert( SI_illuminance , phot , lux( 1.0E+4 ) );
SI_Convert( SI_illuminance , footcandle , lux( 9.29E-2 ) );
SI_Convert( SI_luminance , stilb , candela_per_m2( 1.0E+4 ) );
/*******************************************************************************
* Often there are multiple names for a single quantity. Below are some *
* synonyms for the quantities defined above. These can be used in place of *
* the original quantities and may be clearer in some contexts. *
*******************************************************************************/
typedef SI_power SI_radiant_flux;
typedef SI_irradiance SI_radiant_flux_density;
typedef SI_irradiance SI_radiant_exitance;
typedef SI_radiance SI_intensity;
typedef SI_irradiance SI_radiosity;
};
#endif

View File

@ -1,398 +0,0 @@
/***************************************************************************
* SVD.C *
* *
* Singular Value Decomposition. *
* *
* HISTORY *
* Name Date Description *
* *
* arvo 08/22/2000 Copied to CIT library. *
* arvo 06/28/1993 Rewritten from "Numerical Recipes" C-code. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 2000, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#include <math.h>
#include <assert.h>
#include "ArvoMath.h"
#include "Vector.h"
#include "Matrix.h"
#include "SVD.h"
namespace ArvoMath {
static const int MaxIterations = 30;
static double svd_pythag( double a, double b )
{
double at = Abs(a);
double bt = Abs(b);
if( at > bt )
return at * sqrt( 1.0 + Sqr( bt / at ) );
else if( bt > 0.0 )
return bt * sqrt( 1.0 + Sqr( at / bt ) );
else return 0.0;
}
static inline double SameSign( double a, double b )
{
double t;
if( b >= 0.0 ) t = Abs( a );
else t = -Abs( a );
return t;
}
static int ComputeRank( const Matrix &D, double epsilon )
{
int rank = 0;
for( int i = 0; i < D.Rows(); i++ )
if( Abs(D(i,i)) > epsilon ) rank++;
return rank;
}
SVD::SVD( ) : Q_(0), D_(0), R_(0)
{
}
SVD::SVD( const Matrix &M ) : Q_(0), D_(0), R_(0)
{
(*this) = M;
}
void SVD::operator=( const Matrix &A )
{
if( A.Rows() >= A.Cols() ) Q_ = A;
else
{
Q_ = Matrix( A.Cols() );
for( int i = 0; i < A.Rows(); i++ )
for( int j = 0; j < A.Cols(); j++ ) Q_(i,j) = A(i,j);
}
R_ = Matrix( A.Cols() );
Decompose( Q_, D_, R_ );
}
const Matrix &SVD::Q( double epsilon ) const
{
int rank = 0;
if( epsilon != 0.0 ) rank = ComputeRank( D_, epsilon );
return Q_;
}
const Matrix &SVD::D( double epsilon ) const
{
int rank = 0;
if( epsilon != 0.0 ) rank = ComputeRank( D_, epsilon );
return D_;
}
const Matrix &SVD::R( double epsilon ) const
{
int rank = 0;
if( epsilon != 0.0 ) rank = ComputeRank( D_, epsilon );
return R_;
}
int SVD::Rank( double epsilon ) const
{
return ComputeRank( D_, epsilon );
}
int SVD::Decompose( Matrix &Q, Matrix &D, Matrix &R )
{
int i, j, k, l, m, n, p, q, iter;
double c, f, h, s, x, y, z;
double norm = 0.0;
double g = 0.0;
double scale = 0.0;
m = Q.Rows();
n = Q.Cols();
Vector Temp( n );
Vector diag( n );
for( i = 0; i < n; i++ )
{
Temp(i) = scale * g;
scale = 0.0;
g = 0.0;
s = 0.0;
l = i + 1;
if( i < m )
{
for( k = i; k < m; k++ ) scale += Abs( Q(k,i) );
if( scale != 0.0 )
{
for( k = i; k < m; k++ )
{
Q(k,i) /= scale;
s += Sqr( Q(k,i) );
}
f = Q(i,i);
g = -SameSign( sqrt(s), f );
h = f * g - s;
Q(i,i) = f - g;
if( i != n - 1 )
{
for( j = l; j < n; j++ )
{
s = 0.0;
for( k = i; k < m; k++ ) s += Q(k,i) * Q(k,j);
f = s / h;
for( k = i; k < m; k++ ) Q(k,j) += f * Q(k,i);
}
}
for( k = i; k < m; k++ ) Q(k,i) *= scale;
}
}
diag(i) = scale * g;
g = 0.0;
s = 0.0;
scale = 0.0;
if( i < m && i != n - 1 )
{
for( k = l; k < n; k++ ) scale += Abs( Q(i,k) );
if( scale != 0.0 )
{
for( k = l; k < n; k++ )
{
Q(i,k) /= scale;
s += Sqr( Q(i,k) );
}
f = Q(i,l);
g = -SameSign( sqrt(s), f );
h = f * g - s;
Q(i,l) = f - g;
for( k = l; k < n; k++ ) Temp(k) = Q(i,k) / h;
if( i != m - 1 )
{
for( j = l; j < m; j++ )
{
s = 0.0;
for( k = l; k < n; k++ ) s += Q(j,k) * Q(i,k);
for( k = l; k < n; k++ ) Q(j,k) += s * Temp(k);
}
}
for( k = l; k < n; k++ ) Q(i,k) *= scale;
}
}
norm = Max( norm, Abs( diag(i) ) + Abs( Temp(i) ) );
}
for( i = n - 1; i >= 0; i-- )
{
if( i < n - 1 )
{
if( g != 0.0 )
{
for( j = l; j < n; j++ ) R(i,j) = ( Q(i,j) / Q(i,l) ) / g;
for( j = l; j < n; j++ )
{
s = 0.0;
for( k = l; k < n; k++ ) s += Q(i,k) * R(j,k);
for( k = l; k < n; k++ ) R(j,k) += s * R(i,k);
}
}
for( j = l; j < n; j++ )
{
R(i,j) = 0.0;
R(j,i) = 0.0;
}
}
R(i,i) = 1.0;
g = Temp(i);
l = i;
}
for( i = n - 1; i >= 0; i-- )
{
l = i + 1;
g = diag(i);
if( i < n - 1 ) for( j = l; j < n; j++ ) Q(i,j) = 0.0;
if( g != 0.0 )
{
g = 1.0 / g;
if( i != n - 1 )
{
for( j = l; j < n; j++ )
{
s = 0.0;
for( k = l; k < m; k++ ) s += Q(k,i) * Q(k,j);
f = ( s / Q(i,i) ) * g;
for( k = i; k < m; k++ ) Q(k,j) += f * Q(k,i);
}
}
for( j = i; j < m; j++ ) Q(j,i) *= g;
}
else
{
for( j = i; j < m; j++ ) Q(j,i) = 0.0;
}
Q(i,i) += 1.0;
}
for( k = n - 1; k >= 0; k-- )
{
for( iter = 1; iter <= MaxIterations; iter++ )
{
int jump;
for( l = k; l >= 0; l-- )
{
q = l - 1;
if( Abs( Temp(l) ) + norm == norm ) { jump = 1; break; }
if( Abs( diag(q) ) + norm == norm ) { jump = 0; break; }
}
if( !jump )
{
c = 0.0;
s = 1.0;
for( i = l; i <= k; i++ )
{
f = s * Temp(i);
Temp(i) *= c;
if( Abs( f ) + norm == norm ) break;
g = diag(i);
h = svd_pythag( f, g );
diag(i) = h;
h = 1.0 / h;
c = g * h;
s = -f * h;
for( j = 0; j < m; j++ )
{
y = Q(j,q);
z = Q(j,i);
Q(j,q) = y * c + z * s;
Q(j,i) = z * c - y * s;
}
}
}
z = diag(k);
if( l == k )
{
if( z < 0.0 )
{
diag(k) = -z;
for( j = 0; j < n; j++ ) R(k,j) *= -1.0;
}
break;
}
if( iter >= MaxIterations ) return 0;
x = diag(l);
q = k - 1;
y = diag(q);
g = Temp(q);
h = Temp(k);
f = ( ( y - z ) * ( y + z ) + ( g - h ) * ( g + h ) ) / ( 2.0 * h * y );
g = svd_pythag( f, 1.0 );
f = ( ( x - z ) * ( x + z ) + h * ( ( y / ( f + SameSign( g, f ) ) ) - h ) ) / x;
c = 1.0;
s = 1.0;
for( j = l; j <= q; j++ )
{
i = j + 1;
g = Temp(i);
y = diag(i);
h = s * g;
g = c * g;
z = svd_pythag( f, h );
Temp(j) = z;
c = f / z;
s = h / z;
f = x * c + g * s;
g = g * c - x * s;
h = y * s;
y = y * c;
for( p = 0; p < n; p++ )
{
x = R(j,p);
z = R(i,p);
R(j,p) = x * c + z * s;
R(i,p) = z * c - x * s;
}
z = svd_pythag( f, h );
diag(j) = z;
if( z != 0.0 )
{
z = 1.0 / z;
c = f * z;
s = h * z;
}
f = c * g + s * y;
x = c * y - s * g;
for( p = 0; p < m; p++ )
{
y = Q(p,j);
z = Q(p,i);
Q(p,j) = y * c + z * s;
Q(p,i) = z * c - y * s;
}
}
Temp(l) = 0.0;
Temp(k) = f;
diag(k) = x;
}
}
// Sort the singular values into descending order.
for( i = 0; i < n - 1; i++ )
{
double biggest = diag(i); // Biggest singular value so far.
int bindex = i; // The row/col it occurred in.
for( j = i + 1; j < n; j++ )
{
if( diag(j) > biggest )
{
biggest = diag(j);
bindex = j;
}
}
if( bindex != i ) // Need to swap rows and columns.
{
Q.SwapCols( i, bindex ); // Swap columns in Q.
R.SwapRows( i, bindex ); // Swap rows in R.
diag.Swap ( i, bindex ); // Swap elements in diag.
}
}
D = Diag( diag );
return 1;
}
const Matrix &SVD::PseudoInverse( double epsilon )
{
if( Null(P_) )
{
Matrix D_Inverse( D_ );
for( int i = 0; i < D_Inverse.Rows(); i++ )
{
if( Abs( D_Inverse(i,i) ) > epsilon )
D_Inverse(i,i) = 1.0 / D_Inverse(i,i);
else D_Inverse(i,i) = 0.0;
}
P_ = Q_ * D_Inverse * R_;
}
return P_;
}
};

View File

@ -1,54 +0,0 @@
/***************************************************************************
* SVD.h *
* *
* Singular Value Decomposition. *
* *
* HISTORY *
* Name Date Description *
* *
* arvo 08/22/2000 Split off from Matrix.h *
* arvo 06/28/1993 Rewritten from "Numerical Recipes" C-code. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 2000, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#ifndef __SVD_INCLUDED__
#define __SVD_INCLUDED__
#include "Vector.h"
#include "Matrix.h"
namespace ArvoMath {
class SVD {
public:
SVD( );
SVD( const SVD & ); // Copies the decomposition.
SVD( const Matrix & ); // Performs the decomposition.
~SVD() {};
const Matrix &Q( double epsilon = 0.0 ) const;
const Matrix &D( double epsilon = 0.0 ) const;
const Matrix &R( double epsilon = 0.0 ) const;
const Matrix &PseudoInverse( double epsilon = 0.0 );
int Rank( double epsilon = 0.0 ) const;
void operator=( const Matrix & ); // Performs the decomposition.
private:
int Decompose( Matrix &Q, Matrix &D, Matrix &R );
Matrix Q_;
Matrix D_;
Matrix R_;
Matrix P_; // Pseudo inverse.
int error;
};
};
#endif

View File

@ -1,292 +0,0 @@
/***************************************************************************
* SphTri.C *
* *
* This file defines the SphericalTriangle class definition, which *
* supports member functions for Monte Carlo sampling, point containment, *
* and other basic operations on spherical triangles. *
* *
* Changes: *
* 01/01/2000 arvo Added New_{Alpha,Beta,Gamma} methods. *
* 12/30/1999 arvo Added VecIrrad method for "Vector Irradiance". *
* 04/08/1995 arvo Further optimized sampling algorithm. *
* 10/11/1994 arvo Added analytic sampling algorithm. *
* 06/14/1994 arvo Initial implementation. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 1995, 2000, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#include <iostream>
#include <math.h>
#include "SphTri.h"
#include "form.h"
namespace ArvoMath {
/*-------------------------------------------------------------------------*
* Constructor *
* *
* Construct a spherical triangle from three (non-zero) vectors. The *
* vectors needn't be of unit length. *
* *
*-------------------------------------------------------------------------*/
SphericalTriangle::SphericalTriangle( const Vec3 &A0, const Vec3 &B0, const Vec3 &C0 )
{
Init( A0, B0, C0 );
}
/*-------------------------------------------------------------------------*
* Init *
* *
* Construct the spherical triange from three vertices. Assume that the *
* sphere is centered at the origin. The vectors A, B, and C need not *
* be normalized. *
* *
*-------------------------------------------------------------------------*/
void SphericalTriangle::Init( const Vec3 &A0, const Vec3 &B0, const Vec3 &C0 )
{
// Normalize the three vectors -- these are the vertices.
A_ = Unit( A0 );
B_ = Unit( B0 );
C_ = Unit( C0 );
// Compute and save the cosines of the edge lengths.
cos_a = B_ * C_;
cos_b = A_ * C_;
cos_c = A_ * B_;
// Compute and save the edge lengths.
a_ = ArcCos( cos_a );
b_ = ArcCos( cos_b );
c_ = ArcCos( cos_c );
// Compute the cosines of the internal (i.e. dihedral) angles.
cos_alpha = CosDihedralAngle( C_, A_, B_ );
cos_beta = CosDihedralAngle( A_, B_, C_ );
cos_gamma = CosDihedralAngle( A_, C_, B_ );
// Compute the (dihedral) angles.
alpha = ArcCos( cos_alpha );
beta = ArcCos( cos_beta );
gamma = ArcCos( cos_gamma );
// Compute the solid angle of the spherical triangle.
area = alpha + beta + gamma - Pi;
// Compute the orientation of the triangle.
orient = Sign( A_ * ( B_ ^ C_ ) );
// Initialize three variables that are used for sampling the triangle.
U = Unit( C_ / A_ ); // In plane of AC orthogonal to A.
sin_alpha = sin( alpha );
product = sin_alpha * cos_c;
}
/*-------------------------------------------------------------------------*
* Init *
* *
* Initialize all fields. Create a null spherical triangle. *
* *
*-------------------------------------------------------------------------*/
void SphericalTriangle::Init()
{
a_ = 0; A_ = 0; cos_alpha = 0; cos_a = 0; alpha = 0;
b_ = 0; B_ = 0; cos_beta = 0; cos_b = 0; beta = 0;
c_ = 0; C_ = 0; cos_gamma = 0; cos_c = 0; gamma = 0;
area = 0;
orient = 0;
sin_alpha = 0;
product = 0;
U = 0;
}
/*-------------------------------------------------------------------------*
* "( A, B, C )" operator. *
* *
* Construct the spherical triange from three vertices. Assume that the *
* sphere is centered at the origin. The vectors A, B, and C need not *
* be normalized. *
* *
*-------------------------------------------------------------------------*/
SphericalTriangle & SphericalTriangle::operator()(
const Vec3 &A0,
const Vec3 &B0,
const Vec3 &C0 )
{
Init( A0, B0, C0 );
return *this;
}
/*-------------------------------------------------------------------------*
* Inside *
* *
* Determine if the vector W is inside the triangle. W need not be a *
* unit vector *
* *
*-------------------------------------------------------------------------*/
int SphericalTriangle::Inside( const Vec3 &W ) const
{
Vec3 Z = Orient() * W;
if( Z * ( A() ^ B() ) < 0.0 ) return 0;
if( Z * ( B() ^ C() ) < 0.0 ) return 0;
if( Z * ( C() ^ A() ) < 0.0 ) return 0;
return 1;
}
/*-------------------------------------------------------------------------*
* Chart *
* *
* Generate samples from the current spherical triangle. If x1 and x2 are *
* random variables uniformly distributed over [0,1], then the returned *
* points are uniformly distributed over the solid angle. *
* *
*-------------------------------------------------------------------------*/
Vec3 SphericalTriangle::Chart( float x1, float x2 ) const
{
// Use one random variable to select the area of the sub-triangle.
// Save the sine and cosine of the angle phi.
register float phi = x1 * area - Alpha();
register float s = sin( phi );
register float t = cos( phi );
// Compute the pair (u,v) that determines the new angle beta.
register float u = t - cos_alpha;
register float v = s + product ; // sin_alpha * cos_c
// Compute the cosine of the new edge b.
float q = ( cos_alpha * ( v * t - u * s ) - v ) /
( sin_alpha * ( u * t + v * s ) );
// Compute the third vertex of the sub-triangle.
Vec3 C_new = q * A() + Sqrt( 1.0 - q * q ) * U;
// Use the other random variable to select the height z.
float z = 1.0 - x2 * ( 1.0 - C_new * B() );
// Construct the corresponding point on the sphere.
Vec3 D = C_new / B(); // Remove B component of C_new.
return z * B() + Sqrt( ( 1.0 - z * z ) / ( D * D ) ) * D;
}
/*-------------------------------------------------------------------------*
* Coord *
* *
* Compute the two coordinates (x1,x2) corresponding to a point in the *
* spherical triangle. This is the inverse of "Chart". *
* *
*-------------------------------------------------------------------------*/
Vec2 SphericalTriangle::Coord( const Vec3 &P1 ) const
{
Vec3 P = Unit( P1 );
// Compute the new C vertex, which lies on the arc defined by B-P
// and the arc defined by A-C.
Vec3 C_new = Unit( ( B() ^ P ) ^ ( C() ^ A() ) );
// Adjust the sign of C_new. Make sure it's on the arc between A and C.
if( C_new * ( A() + C() ) < 0.0 ) C_new = -C_new;
// Compute x1, the area of the sub-triangle over the original area.
float cos_beta = CosDihedralAngle( A(), B(), C_new );
float cos_gamma = CosDihedralAngle( A(), C_new , B() );
float sub_area = Alpha() + acos( cos_beta ) + acos( cos_gamma ) - Pi;
float x1 = sub_area / SolidAngle();
// Now compute the second coordinate using the new C vertex.
float z = P * B();
float x2 = ( 1.0 - z ) / ( 1.0 - C_new * B() );
if( x1 < 0.0 ) x1 = 0.0; if( x1 > 1.0 ) x1 = 1.0;
if( x2 < 0.0 ) x2 = 0.0; if( x2 > 1.0 ) x2 = 1.0;
return Vec2( x1, x2 );
}
/*-------------------------------------------------------------------------*
* Dual *
* *
* Construct the dual triangle of the current triangle, which is another *
* spherical triangle. *
* *
*-------------------------------------------------------------------------*/
SphericalTriangle SphericalTriangle::Dual() const
{
Vec3 dual_A = B() ^ C(); if( dual_A * A() < 0.0 ) dual_A *= -1.0;
Vec3 dual_B = A() ^ C(); if( dual_B * B() < 0.0 ) dual_B *= -1.0;
Vec3 dual_C = A() ^ B(); if( dual_C * C() < 0.0 ) dual_C *= -1.0;
return SphericalTriangle( dual_A, dual_B, dual_C );
}
/*-------------------------------------------------------------------------*
* VecIrrad *
* *
* Return the "vector irradiance" due to a light source of unit brightness *
* whose spherical projection is this spherical triangle. The negative of *
* this vector dotted with the surface normal gives the (scalar) *
* irradiance at the origin. *
* *
*-------------------------------------------------------------------------*/
Vec3 SphericalTriangle::VecIrrad() const
{
Vec3 Phi =
a() * Unit( B() ^ C() ) +
b() * Unit( C() ^ A() ) +
c() * Unit( A() ^ B() ) ;
if( Orient() ) Phi *= -1.0;
return Phi;
}
/*-------------------------------------------------------------------------*
* New_Alpha *
* *
* Returns a new spherical triangle derived from the original one by *
* moving the "C" vertex along the edge "BC" until the new "alpha" angle *
* equals the given argument. *
* *
*-------------------------------------------------------------------------*/
SphericalTriangle SphericalTriangle::New_Alpha( float alpha ) const
{
Vec3 V1( A() ), V2( B() ), V3( C() );
Vec3 E1 = Unit( V2 ^ V1 );
Vec3 E2 = E1 ^ V1;
Vec3 G = ( cos(alpha) * E1 ) + ( sin(alpha) * E2 );
Vec3 D = Unit( V3 / V2 );
Vec3 C2 = ((G * D) * V2) - ((G * V2) * D);
if( Triple( V1, V2, C2 ) > 0.0 ) C2 *= -1.0;
return SphericalTriangle( V1, V2, C2 );
}
std::ostream &operator<<( std::ostream &out, const SphericalTriangle &T )
{
out << "SphericalTriangle:\n"
<< " " << T.A() << "\n"
<< " " << T.B() << "\n"
<< " " << T.C() << std::endl;
return out;
}
};

View File

@ -1,124 +0,0 @@
/***************************************************************************
* SphTri.h *
* *
* This file defines the SphericalTriangle class definition, which *
* supports member functions for Monte Carlo sampling, point containment, *
* and other basic operations on spherical triangles. *
* *
* Changes: *
* 01/01/2000 arvo Added New_{Alpha,Beta,Gamma} methods. *
* 12/30/1999 arvo Added VecIrrad method for "Vector Irradiance". *
* 04/08/1995 arvo Further optimized sampling algorithm. *
* 10/11/1994 arvo Added analytic sampling algorithm. *
* 06/14/1994 arvo Initial implementation. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 1995, 2000, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#ifndef __SPHTRI_INCLUDED__
#define __SPHTRI_INCLUDED__
#include "Vec3.h"
#include "Vec2.h"
namespace ArvoMath {
/*
* The (Oblique) Spherical Triangle ABC. Edge lengths (segments of great
* circles) are a, b, and c. The (dihedral) angles are Alpha, Beta, and Gamma.
*
* B
* o
* / \
* / \
* /Beta \
* / \
* c / \ a
* / \
* / \
* / \
* / \
* / \
* /Alpha Gamma \
* o-----------------------o
* A b C
*
*/
class SphericalTriangle {
public: // methods
SphericalTriangle() { Init(); }
SphericalTriangle( const SphericalTriangle &T ) { *this = T; }
SphericalTriangle( const Vec3 &, const Vec3 &, const Vec3 & );
SphericalTriangle & operator()( const Vec3 &, const Vec3 &, const Vec3 & );
~SphericalTriangle( ) {}
void operator=( const SphericalTriangle &T ) { *this = T; }
Vec3 Chart ( float x, float y ) const; // Const-Jacobian map from square.
Vec2 Coord ( const Vec3 &P ) const; // Get 2D coords of a point.
int Orient( ) const { return orient; }
int Inside( const Vec3 & ) const;
float SolidAngle() const { return area; }
float SignedSolidAngle() const { return -orient * area; } // CC is pos.
const Vec3 &A() const { return A_ ; }
const Vec3 &B() const { return B_ ; }
const Vec3 &C() const { return C_ ; }
float a() const { return a_ ; }
float b() const { return b_ ; }
float c() const { return c_ ; }
float Cos_a() const { return cos_a ; }
float Cos_b() const { return cos_b ; }
float Cos_c() const { return cos_c ; }
float Alpha() const { return alpha ; }
float Beta () const { return beta ; }
float Gamma() const { return gamma ; }
float CosAlpha() const { return cos_alpha; }
float CosBeta () const { return cos_beta ; }
float CosGamma() const { return cos_gamma; }
Vec3 VecIrrad() const; // Returns the vector irradiance.
SphericalTriangle Dual() const;
SphericalTriangle New_Alpha( float alpha ) const;
SphericalTriangle New_Beta ( float beta ) const;
SphericalTriangle New_Gamma( float gamma ) const;
private: // methods
void Init( );
void Init( const Vec3 &A, const Vec3 &B, const Vec3 &C );
private: // data
Vec3 A_, B_, C_, U; // The vertices (and a temp vector).
float a_, b_, c_; // The edge lengths.
float alpha, beta, gamma; // The angles.
float cos_a, cos_b, cos_c;
float cos_alpha, cos_beta, cos_gamma;
float area;
float sin_alpha, product; // Used in sampling algorithm.
int orient; // Orientation.
};
inline double CosDihedralAngle( const Vec3 &A, const Vec3 &B, const Vec3 &C )
{
float x = Unit( A ^ B ) * Unit( C ^ B );
if( x < -1.0 ) x = -1.0;
if( x > 1.0 ) x = 1.0;
return x;
}
inline double DihedralAngle( const Vec3 &A, const Vec3 &B, const Vec3 &C )
{
return acos( CosDihedralAngle( A, B, C ) );
}
extern std::ostream &operator<<( std::ostream &out, const SphericalTriangle & );
};
#endif

View File

@ -1,913 +0,0 @@
/***************************************************************************
* Token.h *
* *
* The Token class ecapsulates a lexical analyzer for C++-like syntax. *
* A token instance is associated with one or more text files, and *
* grabs C++ tokens from them sequentially. There are many member *
* functions designed to make parsing easy, such as "==" operators for *
* strings and characters, and automatic conversion of numeric tokens *
* into numeric values. *
* *
* Files can be nested via #include directives, and both styles of C++ *
* comments are supported. *
* *
* HISTORY *
* Name Date Description *
* *
* arvo 10/05/99 Fixed bug in TokFrame string allocation. *
* arvo 01/15/95 Added ifdef, ifndef, else, and endif. *
* arvo 02/13/94 Added Debug() member function. *
* arvo 01/22/94 Several sections rewritten. *
* arvo 06/19/93 Converted to C++ *
* arvo 07/15/89 Rewritten for scene description parser. *
* arvo 01/22/89 Initial coding. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 1999, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#include <stdlib.h>
#include <string.h>
#include "Token.h"
#include "Char.h"
namespace ArvoMath {
FILE* Token::debug = NULL; // Static data member of Token class.
int Token::argc = 0;
char** Token::argv = NULL;
typedef TokMacro *TokMacroPtr;
static const int True = 1;
static const int False = 0;
static const int HashConst = 217; // Size of hash-table for macros.
TokFrame::TokFrame()
{
next = NULL;
source = NULL;
fname = NULL;
line = 0;
column = 0;
}
TokFrame::~TokFrame()
{
if( fname != NULL ) delete[] fname;
if( source != NULL ) fclose( source );
}
void TokFrame::operator=( const TokFrame &frame )
{
next = frame.next;
source = frame.source;
fname = strdup( frame.fname );
line = frame.line;
column = frame.column;
}
static int HashName( const char *str )
{
static int prime[5] = { 7, 11, 17, 23, 3 };
int k = 0;
int h = 0;
while( *str != NullChar )
{
h += (*str++) * prime[k++];
if( k == 5 ) k = 0;
}
if( h < 0 ) h = 0; // Check for overflow.
return h % HashConst;
}
TokMacro *Token::MacroLookup( const char *str ) const
{
if( table == NULL ) return NULL;
int i = HashName( str );
for( TokMacro *m = table[i]; m != NULL; m = m->next )
{
if( strcmp( str, m->macro ) == 0 ) return m;
}
return NULL;
}
int Token::MacroReplace( char *str, int &length, TokType &type ) const
{
TokMacro *m = MacroLookup( str );
if( m == NULL ) return 0;
strcpy( str, m->repl );
length = strlen( str );
type = m->type;
return 1;
}
/*-------------------------------------------------------------------------*
* D e b u g P r i n t *
* *
* This routine is used to record the entire token stream in a file to *
* use as a debugging aid. It does not affect the action of the lexer; *
* it merely records a "shadow" copy of all the tokens that are read by *
* ANY Token instance. The data that is written to the file is *
* *
* <Line number> <Column number> <File name> <Token> *
* *
*-------------------------------------------------------------------------*/
static void DebugPrint( const Token &tok, FILE *fp )
{
fprintf( fp, "%3d %3d ", tok.Line(), tok.Column() );
fprintf( fp, "%s " , tok.FileName() );
fprintf( fp, "%s\n" , tok.Spelling() );
fflush ( fp );
}
/*-------------------------------------------------------------------------*
* T o k e n (Constructors) *
* *
*-------------------------------------------------------------------------*/
Token::Token( const char *file_name )
{
Init();
Open( file_name );
}
Token::Token( FILE *fp )
{
Init();
Open( fp );
}
Token::Token( )
{
Init();
}
/*-------------------------------------------------------------------------*
* T o k e n (Destructor) *
* *
* Close all files and deletes all frames and paths. *
* *
*-------------------------------------------------------------------------*/
Token::~Token( )
{
// Don't try to delete "frame" as its a member of this class, not
// something that we've allocated.
TokFrame *f = frame.next;
while( f != NULL )
{
TokFrame *n = f->next;
delete f;
f = n;
}
ClearPaths();
}
/*-------------------------------------------------------------------------*
* O p e n *
* *
* Establish a new file to read from, either by name, or by pointer. *
* *
*-------------------------------------------------------------------------*/
void Token::Open( const char *file_name )
{
FILE *fp = fopen( file_name, "r" );
if( fp == NULL ) return;
Open( fp );
frame.fname = strdup( file_name );
}
void Token::Open( FILE *fp )
{
frame.source = fp;
frame.line = 1;
frame.column = 0;
pushed = NullChar;
}
/*-------------------------------------------------------------------------*
* O p e r a t o r == *
* *
* A token can be compared with a string, a single character, or a type. *
* *
*-------------------------------------------------------------------------*/
int Token::operator==( const char *s ) const
{
const char *t = spelling;
if( case_sensitive )
{
do { if( *s != *t ) return False; }
while( *s++ && *t++ );
}
else
{
do { if( ToUpper(*s) != ToUpper(*t) ) return False; }
while( *s++ && *t++ );
}
return True;
}
int Token::operator==( char c ) const
{
if( length != 1 ) return False;
if( case_sensitive ) return spelling[0] == c;
else return ToUpper(spelling[0]) == ToUpper(c);
}
int Token::operator==( TokType _type_ ) const
{
int match = 0;
switch( _type_ )
{
case T_char : match = ( type == T_string && Len() == 1 ); break;
case T_numeric: match = ( type == T_integer || type == T_float ); break;
default : match = ( type == _type_ ); break;
}
return match;
}
/*-------------------------------------------------------------------------*
* O p e r a t o r != *
* *
* Define negations of the three types of "==" tests. *
* *
*-------------------------------------------------------------------------*/
int Token::operator!=( const char *s ) const { return !( *this == s ); }
int Token::operator!=( char c ) const { return !( *this == c ); }
int Token::operator!=( TokType t ) const { return !( *this == t ); }
/*-------------------------------------------------------------------------*
* E r r o r *
* *
* Print error message to "stderr" followed by optional "name". *
* *
*-------------------------------------------------------------------------*/
void Token::Error( TokError error, const char *name )
{
char *s;
switch( error )
{
case T_malformed_float : s = "malformed real number "; break;
case T_unterm_string : s = "unterminated string "; break;
case T_unterm_comment : s = "unterminated comment "; break;
case T_file_not_found : s = "include file not found: "; break;
case T_unknown_directive : s = "unknown # directive "; break;
case T_string_expected : s = "string expected "; break;
case T_putback_error : s = "putback overflow "; break;
case T_name_too_long : s = "file name is too long "; break;
case T_no_endif : s = "#endif directive missing"; break;
case T_extra_endif : s = "#endif with no #ifdef "; break;
case T_extra_else : s = "#else with no #ifdef "; break;
default : s = "unknown error type "; break;
}
fprintf( stderr, "LEXICAL ERROR, line %d, column %d: %s",
frame.line, frame.column, s );
if( name == NULL )
fprintf( stderr, " \n" );
else fprintf( stderr, "%s\n", name );
exit( 1 );
}
/*-------------------------------------------------------------------------*
* G e t c *
* *
* This routine fetches one character at a time from the current file *
* being read. It is responsible for keeping track of the column number *
* and for handling single characters that have been "put back". *
* *
*-------------------------------------------------------------------------*/
int Token::Getc( int &c )
{
if( pushed != NullChar ) // Return the pushed character.
{
c = pushed;
pushed = NullChar;
}
else // Get a new character from the source file.
{
c = getc( frame.source );
frame.column++;
}
return c;
}
/*-------------------------------------------------------------------------*
* N o n W h i t e *
* *
* This routine implements a simple finite state machine that skips *
* white space and recognizes the two styles of comments used in C++. *
* It returns the first non-white character not part of a comment. *
* *
*-------------------------------------------------------------------------*/
int Token::NonWhite( int &c )
{
start_state:
Getc( c );
if( c == Space ) goto start_state;
if( c == Tab ) goto start_state;
if( c == NewLine ) goto start_new_line;
if( c == Slash ) goto start_comment;
goto return_char;
start_comment:
Getc( c );
if( c == Star ) goto in_comment1;
if( c == Slash ) goto in_comment2;
Unget( c );
c = Slash;
goto return_char;
in_comment1:
Getc( c );
if( c == Star ) goto end_comment1;
if( c == NewLine ) goto newline_in_comment;
if( c == EOF ) goto return_char;
goto in_comment1;
end_comment1:
Getc( c );
if( c == Slash ) goto start_state;
if( c == NewLine ) goto newline_in_comment;
if( c == EOF ) goto unterm_comment;
goto in_comment1;
in_comment2:
Getc( c );
if( c == NewLine ) goto start_new_line;
if( c == EOF ) goto return_char;
goto in_comment2;
unterm_comment:
Error( T_unterm_comment );
c = EOF;
goto return_char;
start_new_line:
frame.line++;
frame.column = 0;
goto start_state;
newline_in_comment:
frame.line++;
frame.column = 0;
goto in_comment1;
return_char:
Tcolumn = frame.column; // This is where the token starts.
return c;
}
/*-------------------------------------------------------------------------*
* N e x t R a w T o k *
* *
*-------------------------------------------------------------------------*/
int Token::NextRawTok( )
{
static int Trans0[] = { 0, 1, 3, 3, 3 }; // Found a digit.
static int Trans1[] = { 5, 6, 4, 6, 7 }; // Found a sign.
static int Trans2[] = { 1, 6, 7, 6, 7 }; // Found decimal point.
static int Trans3[] = { 2, 2, 7, 6, 7 }; // Found an exponent.
static int Trans4[] = { 5, 6, 7, 6, 7 }; // Found something else.
char *tok = spelling;
int state;
int c;
length = 0;
type = T_null;
// Skip comments and whitespace.
if( NonWhite( c ) == EOF ) goto endtok;
// Is this the beginning of an identifier? If so, get the rest.
if( isAlpha( c ) )
{
type = T_ident;
do {
*tok++ = c;
length++;
if( Getc( c ) == EOF ) goto endtok;
}
while( isAlpha( c ) || isDigit( c ) || c == Underscore );
Unget( c );
goto endtok;
}
// Is this the beginning of a number?
else if( isDigit( c ) || c == Minus || c == Period )
{
char c1 = c;
state = 0;
for(;;)
{
*tok++ = c;
length++;
switch( Getc( c ) )
{
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': state = Trans0[ state ]; break;
case '+':
case '-': state = Trans1[ state ]; break;
case '.': state = Trans2[ state ]; break;
case 'e':
case 'E': state = Trans3[ state ]; break;
default : state = Trans4[ state ]; break;
}
switch( state )
{
case 5 : Unget( c );
type = ( c1 == Period ) ? T_float : T_integer;
goto endtok;
case 6 : Unget( c ); type = T_float ; goto endtok;
case 7 : Error( T_malformed_float ) ; break;
default: continue;
}
} // for
} // if numeric
// Is this the beginning of an operator?
if( c == '*' || c == '>' || c == '<' || c == '+' || c == '-' || c == '!' )
{
char oldc = c;
type = T_other;
*tok++ = c;
length++;
if( Getc( c ) == EOF ) goto endtok;
if( c == oldc || c == EqualSign )
{
*tok++ = c;
length++;
}
else Unget( c );
goto endtok;
}
// Is this the beginning of a string?
else if( c == DoubleQuote )
{
type = T_string;
while( Getc( c ) != EOF && length < MaxTokenLen )
{
if( c == DoubleQuote ) goto endtok;
*tok++ = c;
length++;
}
Error( T_unterm_string );
}
// Is this the beginning of a "#" directive?
else if( c == Hash )
{
type = T_directive;
NonWhite( c );
while( isAlpha( c ) )
{
*tok++ = c;
length++;
Getc( c );
}
Unget( c );
goto endtok;
}
// This must be a one-character token.
else
{
*tok++ = c;
length = 1;
type = T_other;
}
endtok: // Jump to here when token is completed.
*tok = NullChar; // Terminate the string.
if( debug != NULL ) DebugPrint( *this, debug );
return length;
}
/*-------------------------------------------------------------------------*
* N e x t T o k *
* *
*-------------------------------------------------------------------------*/
int Token::NextTok( )
{
NextRawTok();
// If the token is an identifier, see if it's a macro.
// If the macro substitution is null, get another token.
if( type == T_ident )
{
if( table != NULL )
{
if( MacroReplace( spelling, length, type ) && debug != NULL )
DebugPrint( *this, debug );
}
if( type == T_nullmacro ) NextTok();
}
return length;
}
/*-------------------------------------------------------------------------*
* O p e r a t o r - - *
* *
* Puts back the last token found. Only one token can be put back. *
* *
*-------------------------------------------------------------------------*/
Token & Token::operator--( ) // Put the last token back.
{
if( put_back ) Error( T_putback_error ); // Can only handle one putback.
put_back = 1;
return *this;
}
Token & Token::operator--( int ) // Postfix decrement.
{
fprintf( stderr, "Postfix decrement is not implemented for the Token class.\n" );
return *this;
}
/*-------------------------------------------------------------------------*
* H a n d l e D i r e c t i v e *
* *
* Directive beginning with "#" must be handled by the lexer, as they *
* determine the current source file via "#include", etc. *
* *
* Returns 1 if, after handling this directive, we now have the next *
* token. *
* *
*-------------------------------------------------------------------------*/
int Token::HandleDirective( )
{
FILE *fp;
char name[128];
if( *this == "define" )
{
NextRawTok();
strcpy( tempbuff, Spelling() ); // This is the macro name.
int line = Line();
NextRawTok();
if( Line() == line )
AddMacro( tempbuff, Spelling(), Type() );
else
{
// If next token is on a different line; we went too far.
AddMacro( tempbuff, "", T_nullmacro );
return 1; // Signal that we already have the next token.
}
}
else if( *this == "include" )
{
NextRawTok();
if( *this == "<" )
{
GetName( name, sizeof(name) );
PushFrame( ResolveName( name ), name );
}
else if( type == T_string )
{
fp = fopen( spelling, "r" );
if( fp == NULL ) Error( T_file_not_found, spelling );
else PushFrame( fp, spelling );
}
else Error( T_string_expected );
}
else if( *this == "ifdef" )
{
NextRawTok();
TokMacro *m = MacroLookup( Spelling() );
if( m == NULL ) // Skip until else or endif.
{
while( *this != T_null )
{
NextRawTok();
if( *this != T_directive ) continue;
if( *this == "endif" ) break;
if( *this == "else" ) { if_nesting++; break; } // Like m != NULL.
}
if( *this == T_null ) Error( T_no_endif );
return 0; // Ready to get the next token.
}
else if_nesting++;
}
else if( *this == "ifndef" )
{
NextRawTok();
TokMacro *m = MacroLookup( Spelling() );
if( m != NULL ) // Skip until else or endif.
{
while( *this != T_null )
{
NextRawTok();
if( *this != T_directive ) continue;
if( *this == "endif" ) break;
if( *this == "else" ) { if_nesting++; break; } // Like m == NULL.
}
if( *this == T_null ) Error( T_no_endif );
return 0; // Ready to get the next token.
}
else if_nesting++;
}
else if( *this == "else" ) // Skip until #endif.
{
if( if_nesting == 0 ) Error( T_extra_else );
while( *this != T_null )
{
NextRawTok();
if( *this == T_directive && *this == "endif" ) break;
}
if( *this == T_null ) Error( T_no_endif );
if_nesting--;
return 0; // Ready to get next token.
}
else if( *this == "endif" )
{
if( if_nesting == 0 ) Error( T_extra_endif );
if_nesting--;
return 0; // Ready to get next token.
}
else if( *this == "error" )
{
int line = Line();
NextTok(); // Allow macro substitution.
if( Line() == line )
{
fprintf( stderr, "(preprocessor, line %d) %s\n", line, Spelling() );
return 0; // Ready to get next token.
}
else
{
// If next token is on a different line; we went too far.
fprintf( stderr, "(null preprocessor message, line %d)\n", line );
return 1; // Signal that we already have the next token.
}
}
return 0;
}
/*-------------------------------------------------------------------------*
* O p e r a t o r + + *
* *
* Grab the next token from the current source file. If at end of file, *
* pick up where we left off in the previous file. If there is no *
* previous file, return "T_null". *
* *
*-------------------------------------------------------------------------*/
Token & Token::operator++( )
{
if( put_back )
{
put_back = 0;
return *this;
}
// If we've reached the end of an include file, pop the stack.
for(;;)
{
NextTok();
if( type == T_directive )
{
if( HandleDirective() ) break;
}
else if( type == T_null )
{
fclose( frame.source );
if( !PopFrame() ) break;
}
else break; // We have a real token.
}
// Now fill in the value fields if the token is a number.
switch( type )
{
case T_integer : ivalue = atoi( spelling ); break;
case T_float : fvalue = atof( spelling ); break;
case T_null : if( if_nesting > 0 ) Error( T_no_endif ); break;
default : break;
}
return *this;
}
Token & Token::operator++( int )
{
fprintf( stderr, "Postfix increment is not implemented for the Token class.\n" );
return *this;
}
/*-------------------------------------------------------------------------*
* T o k e n Push & Pop Frame *
* *
* These functions are used to create and destroy the context "frames" *
* that are used to handle nested files (via "include"). *
* *
*-------------------------------------------------------------------------*/
void Token::PushFrame( FILE *fp, char *fname )
{
// Create a copy of the current (top-level) frame.
TokFrame *n = new TokFrame;
*n = frame;
// Now overwrite the top-level frame with the new state.
frame.next = n;
frame.source = fp;
frame.line = 1;
frame.column = 0;
frame.fname = strdup( fname );
pushed = NullChar;
}
int Token::PopFrame()
{
if( frame.next == NULL ) return 0;
TokFrame *old = frame.next;
frame = *old;
delete old; // Delete the frame that we just copied from.
return 1;
}
/*-------------------------------------------------------------------------*
* Miscellaneous Functions *
* *
*-------------------------------------------------------------------------*/
void Token::Init()
{
case_sensitive = 1;
put_back = 0;
pushed = NullChar;
if_nesting = 0;
frame.source = NULL;
frame.next = NULL;
frame.fname = NULL;
first = NULL;
last = NULL;
table = NULL;
pushed = NullChar;
SearchArgs(); // Search command-line args for macro definitions.
}
const char* Token::Spelling() const
{
return spelling;
}
char Token::Char() const
{
return spelling[0];
}
const char* Token::FileName() const
{
static char *null_string = "";
if( frame.fname == NULL ) return null_string;
else return frame.fname;
}
float Token::Fvalue() const
{
float val = 0.0;
if( type == T_float ) val = fvalue;
if( type == T_integer ) val = ivalue;
return val;
}
void Token::GetName( char *name, int max )
{
int c;
for( int i = 1; i < max; i++ )
{
if( NonWhite(c) == '>' )
{
*name = NullChar;
return;
}
*name++ = c;
}
Error( T_name_too_long );
}
void Token::AddPath( const char *new_path )
{
char *name = strdup( new_path );
if( name == NULL ) return;
TokPath *p = new TokPath;
p->next = NULL;
p->path = name;
if( first == NULL ) first = p;
else last->next = p;
last = p;
}
void Token::ClearPaths()
{
TokPath *p = first;
while( p != NULL )
{
TokPath *q = p->next;
delete[] p->path; // delete the string.
delete p; // delete the path structure.
p = q;
}
first = NULL;
last = NULL;
}
FILE *Token::ResolveName( const char *name )
{
char resolved[128];
for( const TokPath *p = first; p != NULL; p = p->next )
{
strcpy( resolved, p->path );
strcat( resolved, "/" );
strcat( resolved, name );
FILE *fp = fopen( resolved, "r" );
if( fp != NULL ) return fp;
}
Error( T_file_not_found, name );
return NULL;
}
void Token::CaseSensitive( int on_off = 1 )
{
case_sensitive = on_off;
}
void Token::Debug( FILE *fp )
{
debug = fp;
}
void Token::AddMacro( const char *macro, const char *repl, TokType t )
{
if( table == NULL ) // Create and initialize the table.
{
table = new TokMacroPtr[ HashConst ];
for( int j = 0; j < HashConst; j++ ) table[j] = NULL;
}
int i = HashName( macro );
TokMacro *m = new TokMacro;
m->next = table[i];
m->macro = strdup( macro );
m->repl = strdup( repl );
m->type = t;
table[i] = m;
}
void Token::Args( int argc_, char *argv_[] )
{
argc = argc_; // Set the static variables.
argv = argv_;
}
void Token::SearchArgs( )
{
TokType type = T_null;
for( int i = 1; i < argc; i++ )
{
if( strcmp( argv[i], "-macro" ) == 0 )
{
if( i+2 >= argc )
{
fprintf( stderr, "(Token) ERROR macro argument(s) missing\n" );
return;
}
char *macro = argv[i+1];
char *repl = argv[i+2];
if( isAlpha ( repl[0] ) ) type = T_ident ; else
if( isInteger( repl ) ) type = T_integer; else
type = T_float ;
AddMacro( macro, repl, type );
i += 2;
}
}
}
};

View File

@ -1,203 +0,0 @@
/***************************************************************************
* Token.h *
* *
* The Token class ecapsulates a lexical analyzer for C++-like syntax. *
* A token instance is associated with one or more text files, and *
* grabs C++ tokens from them sequentially. There are many member *
* functions designed to make parsing easy, such as "==" operators for *
* strings and characters, and automatic conversion of numeric tokens *
* into numeric values. *
* *
* Files can be nested via #include directives, and both styles of C++ *
* comments are supported. *
* *
* *
* HISTORY *
* Name Date Description *
* *
* arvo 10/05/99 Fixed bug in TokFrame string allocation. *
* arvo 01/15/95 Added ifdef, ifndef, else, and endif. *
* arvo 02/13/94 Added Debug() member function. *
* arvo 01/22/94 Several sections rewritten. *
* arvo 06/19/93 Converted to C++ *
* arvo 07/15/89 Rewritten for scene description parser. *
* arvo 01/22/89 Initial coding. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 1999, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#ifndef __TOKEN_INCLUDED__
#define __TOKEN_INCLUDED__
#include <iostream>
#include <stdio.h>
namespace ArvoMath {
const int MaxTokenLen = 128;
typedef enum {
T_null,
T_char, // A string of length 1.
T_string,
T_integer,
T_float,
T_ident,
T_other,
T_numeric, // Either T_float or T_int (use with == operator).
T_directive, // Directives like #include are not returned to the user.
T_nullmacro
} TokType;
typedef enum {
T_malformed_float,
T_unterm_string,
T_unterm_comment,
T_file_not_found,
T_unknown_directive,
T_string_expected,
T_putback_error,
T_name_too_long,
T_no_endif,
T_extra_endif,
T_extra_else
} TokError;
class TokFrame {
public:
TokFrame();
TokFrame( const TokFrame &frame ) { *this = frame; }
~TokFrame();
void operator=( const TokFrame & );
public:
TokFrame *next;
FILE *source;
char *fname;
int line;
int column;
};
struct TokPath {
char *path;
TokPath *next;
};
struct TokMacro {
char *macro;
char *repl;
TokType type;
TokMacro *next;
};
class Token {
public:
// Constructors and destructor.
Token();
Token( const char *file_name );
Token( FILE *file_pointer );
~Token();
// Const data members for querying token information.
TokType Type() const { return type; } // The type of token found.
int Len() const { return length; } // The length of the token.
int Line() const { return frame.line; } // The line it was found on.
int Column() const { return Tcolumn; } // The column it began in.
long Ivalue() const { return ivalue; } // Token value if an integer.
float Fvalue() const; // Token value if int or float.
char Char() const; // The token (if a Len() == 1).
// Operators.
int operator == ( const char* ) const; // 1 if strings match.
int operator != ( const char* ) const; // 0 if strings match.
int operator == ( char ) const; // 1 if token is this char.
int operator != ( char ) const; // 0 if token is this char.
int operator == ( TokType ) const; // 1 if token is of this type.
int operator != ( TokType ) const; // 0 if token is of this type.
Token & operator ++ ( ); // (prefix) Get the next token.
Token & operator -- ( ); // (prefix) Put back one token.
Token & operator ++ ( int ); // (postfix) Undefined.
Token & operator -- ( int ); // (postfix) Undefined.
// State-setting member functions.
void Open( FILE * ); // Read already opened file.
void Open( const char * ); // Open the named file.
void CaseSensitive( int on_off ); // Applies to == and != operators.
void AddPath( const char * ); // Adds path for <...> includes.
void ClearPaths(); // Remove all search paths.
// Miscellaneous.
const char* Spelling() const; // The token itself.
const char* FileName() const; // Current file being lexed.
static void Debug( FILE * ); // Write all token streams to a file.
static void Args ( int argc, char *argv[] ); // Search args for macro settings.
void AddMacro( const char*, const char*, TokType type );
void SearchArgs();
private:
// Private member functions.
void Init();
int Getc ( int & );
void Unget( int c ) { pushed = c; }
void Error( TokError error, const char *name = NULL );
int NonWhite( int & );
int HandleDirective();
int NextRawTok(); // No macro substitutions.
int NextTok();
void PushFrame( FILE *fp, char *fname = NULL );
int PopFrame();
void GetName( char *name, int max );
FILE *ResolveName( const char *name );
TokMacro *MacroLookup( const char *str ) const;
int MacroReplace( char *str, int &length, TokType &type ) const;
// Private data members.
TokPath *first;
TokPath *last;
TokMacro **table;
TokFrame frame;
TokType type;
long ivalue;
float fvalue;
int length;
int Tcolumn;
int put_back;
int case_sensitive;
int pushed;
int if_nesting;
char spelling[ MaxTokenLen ];
char tempbuff[ MaxTokenLen ];
// Static data members.
static int argc;
static char **argv;
static FILE *debug;
};
// Predicate-style functions for testing token types.
inline int Null ( const Token &t ) { return t.Type() == T_null; }
inline int Numeric( const Token &t ) { return t.Type() == T_numeric; }
inline int StringP( const Token &t ) { return t.Type() == T_string; }
};
#endif

View File

@ -1,94 +0,0 @@
/***************************************************************************
* Vec2.C *
* *
* Basic operations on 2-dimensional vectors. This special case is useful *
* because nearly all operations are performed inline. *
* *
* HISTORY *
* Name Date Description *
* *
* arvo 05/22/98 Added TimedVec2, extending Vec2. *
* arvo 06/17/93 Initial coding. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 1999, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#include <math.h>
#include "ArvoMath.h"
#include "Vec2.h"
#include "form.h"
namespace ArvoMath {
const Vec2 Vec2::Zero;
const Vec2 Vec2::Xaxis( 1, 0 );
const Vec2 Vec2::Yaxis( 0, 1 );
// Most routines are now inline.
float Normalize( Vec2 &A )
{
float d = Len( A );
if( d != 0.0 )
{
A.X() /= d;
A.Y() /= d;
}
return d;
}
Vec2 Min( const Vec2 &A, const Vec2 &B )
{
return Vec2( Min( A.X(), B.X() ), Min( A.Y(), B.Y() ) );
}
Vec2 Max( const Vec2 &A, const Vec2 &B )
{
return Vec2( Max( A.X(), B.X() ), Max( A.Y(), B.Y() ) );
}
std::ostream &operator<<( std::ostream &out, const Vec2 &A )
{
out << form( " %9.5f %9.5f\n", A.X(), A.Y() );
return out;
}
std::ostream &operator<<( std::ostream &out, const Mat2x2 &M )
{
out << form( " %9.5f %9.5f\n", M(0,0), M(0,1) )
<< form( " %9.5f %9.5f\n", M(1,0), M(1,1) )
<< std::endl;
return out;
}
Mat2x2::Mat2x2( const Vec2 &c1, const Vec2 &c2 )
{
m[0][0] = c1.X();
m[1][0] = c1.Y();
m[0][1] = c2.X();
m[1][1] = c2.Y();
}
// Return solution x of the system Ax = b.
Vec2 Solve( const Mat2x2 &A, const Vec2 &b )
{
float MachEps = MachineEpsilon();
Vec2 x;
double d = det( A );
double n = Norm1( A );
if( n <= MachEps || Abs(d) <= MachEps * n ) return Vec2::Zero;
x.X() = A(1,1) * b.X() - A(0,1) * b.Y();
x.Y() = -A(1,0) * b.X() + A(0,0) * b.Y();
return x / d;
}
};

View File

@ -1,358 +0,0 @@
/***************************************************************************
* Vec2.h *
* *
* Basic operations on 2-dimensional vectors. This special case is useful *
* because nearly all operations are performed inline. *
* *
* HISTORY *
* Name Date Description *
* *
* arvo 05/22/98 Added TimedVec2, extending Vec2. *
* arvo 06/17/93 Initial coding. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 1999, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#ifndef __VEC2_INCLUDED__
#define __VEC2_INCLUDED__
#include <math.h>
#include <iostream>
#include "ArvoMath.h"
namespace ArvoMath {
class Vec2; // 2-D floating-point vector.
class TimedVec2; // 2-D vector with a time stamp.
class Mat2x2; // 2x2 floating-point matrix.
class Vec2 {
public:
Vec2( ) { x = 0.0; y = 0.0; }
Vec2( float a, float b ) { x = a; y = b; }
Vec2( const Vec2 &A ) { x = A.X(); y = A.Y(); }
~Vec2() {}
Vec2 &operator=( float s ) { return Set( s, s ); }
Vec2 &operator=( const Vec2 &A ) { return Set( A.X(), A.Y() ); }
float X() const { return x; }
float Y() const { return y; }
float &X() { return x; }
float &Y() { return y; }
float operator[]( int i ) const { return *( &x + i ); }
float &operator[]( int i ) { return *( &x + i ); }
Vec2 &Set( float a, float b ) { x = a; y = b; return *this; }
Vec2 &Set( const Vec2 &A ) { return Set( A.X(), A.Y() ); }
public:
static const Vec2 Zero;
static const Vec2 Xaxis;
static const Vec2 Yaxis;
protected:
float x, y;
};
// This class simply adds a time field to the Vec2 class so that time-stamped
// coordinates can be easily inserted into objects such as Polylines.
class TimedVec2 : public Vec2 {
public:
TimedVec2() { time = 0; }
TimedVec2( const Vec2 &p , long u = 0 ) { Set( p ); time = u; }
TimedVec2( float x, float y, long u = 0 ) { Set(x,y); time = u; }
~TimedVec2() {}
Vec2 &Coord() { return *this; }
Vec2 Coord() const { return *this; }
long Time () const { return time; }
void SetTime( long u ) { time = u; }
protected:
long time;
};
class Mat2x2 {
public:
Mat2x2( ) { Set( 0, 0, 0, 0 ); }
Mat2x2( float a, float b, float c, float d ) { Set( a, b, c, d ); }
Mat2x2( const Vec2 &c1, const Vec2 &c2 );
~Mat2x2( ) {}
Mat2x2 &operator*=( float scale );
Mat2x2 operator* ( float scale ) const;
void Set( float a, float b, float c, float d )
{ m[0][0] = a; m[0][1] = b; m[1][0] = c; m[1][1] = d; }
float operator()( int i, int j ) const { return m[i][j]; }
float &operator()( int i, int j ) { return m[i][j]; }
private:
float m[2][2];
};
//==========================================
//=== Miscellaneous external functions ===
//==========================================
extern float Normalize( Vec2 &A );
extern Vec2 Min ( const Vec2 &A, const Vec2 &B );
extern Vec2 Max ( const Vec2 &A, const Vec2 &B );
//==========================================
//=== Norm-related functions ===
//==========================================
inline double LenSqr ( const Vec2 &A ) { return Sqr(A[0]) + Sqr(A[1]); }
inline double Len ( const Vec2 &A ) { return sqrt( LenSqr( A ) ); }
inline double OneNorm( const Vec2 &A ) { return Abs( A.X() ) + Abs( A.Y() ); }
inline double TwoNorm( const Vec2 &A ) { return Len(A); }
inline float SupNorm( const Vec2 &A ) { return MaxAbs( A.X(), A.Y() ); }
//==========================================
//=== Addition ===
//==========================================
inline Vec2 operator+( const Vec2 &A, const Vec2 &B )
{
return Vec2( A.X() + B.X(), A.Y() + B.Y() );
}
inline Vec2& operator+=( Vec2 &A, const Vec2 &B )
{
A.X() += B.X();
A.Y() += B.Y();
return A;
}
//==========================================
//=== Subtraction ===
//==========================================
inline Vec2 operator-( const Vec2 &A, const Vec2 &B )
{
return Vec2( A.X() - B.X(), A.Y() - B.Y() );
}
inline Vec2 operator-( const Vec2 &A )
{
return Vec2( -A.X(), -A.Y() );
}
inline Vec2& operator-=( Vec2 &A, const Vec2 &B )
{
A.X() -= B.X();
A.Y() -= B.Y();
return A;
}
//==========================================
//=== Multiplication ===
//==========================================
inline Vec2 operator*( float c, const Vec2 &A )
{
return Vec2( c * A.X(), c * A.Y() );
}
inline Vec2 operator*( const Vec2 &A, float c )
{
return Vec2( c * A.X(), c * A.Y() );
}
inline float operator*( const Vec2 &A, const Vec2 &B ) // Inner product
{
return A.X() * B.X() + A.Y() * B.Y();
}
inline Vec2& operator*=( Vec2 &A, float c )
{
A.X() *= c;
A.Y() *= c;
return A;
}
//==========================================
//=== Division ===
//==========================================
inline Vec2 operator/( const Vec2 &A, float c )
{
return Vec2( A.X() / c, A.Y() / c );
}
inline Vec2 operator/( const Vec2 &A, const Vec2 &B )
{
return A - B * (( A * B ) / LenSqr( B ));
}
//==========================================
//=== Comparison ===
//==========================================
inline int operator==( const Vec2 &A, const Vec2 &B )
{
return A.X() == B.X() && A.Y() == B.Y();
}
inline int operator!=( const Vec2 &A, const Vec2 &B )
{
return A.X() != B.X() || A.Y() != B.Y();
}
inline int operator<=( const Vec2 &A, const Vec2 &B )
{
return A.X() <= B.X() && A.Y() <= B.Y();
}
inline int operator<( const Vec2 &A, const Vec2 &B )
{
return A.X() < B.X() && A.Y() < B.Y();
}
inline int operator>=( const Vec2 &A, const Vec2 &B )
{
return A.X() >= B.X() && A.Y() >= B.Y();
}
inline int operator>( const Vec2 &A, const Vec2 &B )
{
return A.X() > B.X() && A.Y() > B.Y();
}
//==========================================
//=== Miscellaneous ===
//==========================================
inline float operator|( const Vec2 &A, const Vec2 &B ) // Inner product
{
return A * B;
}
inline Vec2 Unit( const Vec2 &A )
{
float c = LenSqr( A );
if( c > 0.0 ) c = 1.0 / sqrt( c );
return c * A;
}
inline Vec2 Unit( const Vec2 &A, float &len )
{
float c = LenSqr( A );
if( c > 0.0 )
{
len = sqrt( c );
return A / len;
}
len = 0.0;
return A;
}
inline Vec2 Unit( float x, float y )
{
return Unit( Vec2( x, y ) );
}
inline double dist( const Vec2 &A, const Vec2 &B )
{
return Len( A - B );
}
inline float operator^( const Vec2 &A, const Vec2 &B )
{
return A.X() * B.Y() - A.Y() * B.X();
}
inline int Quadrant( const Vec2 &A )
{
if( A.Y() >= 0.0 ) return A.X() >= 0.0 ? 1 : 2;
return A.X() >= 0.0 ? 4 : 3;
}
inline Vec2 OrthogonalTo( const Vec2 &A ) // A vector orthogonal to that given.
{
return Vec2( -A.Y(), A.X() );
}
inline Vec2 Interpolate( const Vec2 &A, const Vec2 &B, float t )
{
// Compute a point along the segment joining points A and B
// according to the normalized parameter t in [0,1].
return ( 1.0 - t ) * A + t * B;
}
//==========================================
//=== Operations involving Matrices ===
//==========================================
inline Mat2x2 Outer( const Vec2 &A, const Vec2 &B ) // Outer product.
{
Mat2x2 C;
C(0,0) = A.X() * B.X();
C(0,1) = A.X() * B.Y();
C(1,0) = A.Y() * B.X();
C(1,1) = A.Y() * B.Y();
return C;
}
inline Vec2 operator*( const Mat2x2 &M, const Vec2 &A )
{
return Vec2(
M(0,0) * A.X() + M(0,1) * A.Y(),
M(1,0) * A.X() + M(1,1) * A.Y()
);
}
inline Mat2x2 &Mat2x2::operator*=( float scale )
{
m[0][0] *= scale;
m[0][1] *= scale;
m[1][0] *= scale;
m[1][1] *= scale;
return *this;
}
inline Mat2x2 Mat2x2::operator*( float scale ) const
{
return Mat2x2(
scale * m[0][0], scale * m[0][1],
scale * m[1][0], scale * m[1][1]
);
}
inline Mat2x2 operator*( float scale, const Mat2x2 &M )
{
return M * scale;
}
inline float Norm1( const Mat2x2 &A )
{
return Max( Abs(A(0,0)) + Abs(A(0,1)), Abs(A(1,0)) + Abs(A(1,1)) );
}
inline double det( const Mat2x2 &A )
{
return A(0,0) * A(1,1) - A(1,0) * A(0,1);
}
extern Vec2 Solve( // Return solution x of the system Ax = b.
const Mat2x2 &A,
const Vec2 &b
);
//==========================================
//=== Output routines ===
//==========================================
extern std::ostream &operator<<( std::ostream &out, const Vec2 & );
extern std::ostream &operator<<( std::ostream &out, const Mat2x2 & );
};
#endif

View File

@ -1,119 +0,0 @@
/***************************************************************************
* Vec3.C *
* *
* Basic operations on 3-dimensional vectors. This special case is useful *
* because many operations are performed inline. *
* *
* HISTORY *
* Name Date Description *
* *
* arvo 10/27/94 Reorganized (removed Col & Row distinction). *
* arvo 06/14/93 Initial coding. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 1994, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#include <stdio.h>
#include <math.h>
#include "ArvoMath.h"
#include "Vec3.h"
#include "form.h"
namespace ArvoMath {
float Normalize( Vec3 &A )
{
float d = Len( A );
if( d > 0.0 )
{
double c = 1.0 / d;
A.X() *= c;
A.Y() *= c;
A.Z() *= c;
}
return( d );
}
double Angle( const Vec3 &A, const Vec3 &B )
{
double t = LenSqr(A) * LenSqr(B);
if( t <= 0.0 ) return 0.0;
return ArcCos( (A * B) / sqrt(t) );
}
/*-------------------------------------------------------------------------*
* O R T H O N O R M A L *
* *
* On Input A, B....: Two linearly independent 3-space vectors. *
* *
* On Return A.......: Unit vector pointing in original A direction. *
* B.......: Unit vector orthogonal to A and in subspace spanned *
* by original A and B vectors. *
* C.......: Unit vector orthogonal to both A and B, chosen so *
* that A-B-C forms a right-handed coordinate system. *
* *
*-------------------------------------------------------------------------*/
int Orthonormal( Vec3 &A, Vec3 &B, Vec3 &C )
{
if( Normalize( A ) == 0.0 ) return 1;
B /= A;
if( Normalize( B ) == 0.0 ) return 1;
C = A ^ B;
return 0;
}
int Orthonormal( Vec3 &A, Vec3 &B )
{
if( Normalize( A ) == 0.0 ) return 1;
B /= A;
if( Normalize( B ) == 0.0 ) return 1;
return 0;
}
/*-------------------------------------------------------------------------*
* O R T H O G O N A L T O *
* *
* Returns a vector that is orthogonal to A (but of arbitrary length). *
* *
*-------------------------------------------------------------------------*/
Vec3 OrthogonalTo( const Vec3 &A )
{
float c = 0.5 * SupNorm( A );
if( c == 0.0 ) return Vec3( 1.0, 0.0, 0.0 );
if( c <= Abs(A.X()) ) return Vec3( -A.Y(), A.X(), 0.0 );
if( c <= Abs(A.Y()) ) return Vec3( 0.0, -A.Z(), A.Y() );
return Vec3( A.Z(), 0.0, -A.X() );
}
Vec3 Min( const Vec3 &A, const Vec3 &B )
{
return Vec3(
Min( A.X(), B.X() ),
Min( A.Y(), B.Y() ),
Min( A.Z(), B.Z() ));
}
Vec3 Max( const Vec3 &A, const Vec3 &B )
{
return Vec3(
Max( A.X(), B.X() ),
Max( A.Y(), B.Y() ),
Max( A.Z(), B.Z() ));
}
std::ostream &operator<<( std::ostream &out, const Vec3 &A )
{
out << form( " %9.5f %9.5f %9.5f", A.X(), A.Y(), A.Z() ) << std::endl;
return out;
}
};

View File

@ -1,517 +0,0 @@
/***************************************************************************
* Vec3.h *
* *
* Basic operations on 3-dimensional vectors. This special case is useful *
* because many operations are performed inline. *
* *
* HISTORY *
* Name Date Description *
* *
* arvo 10/27/94 Reorganized (removed Col & Row distinction). *
* arvo 06/14/93 Initial coding. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 1994, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#ifndef __VEC3_INCLUDED__
#define __VEC3_INCLUDED__
#include <math.h>
#include <iostream>
#include "Vec2.h"
namespace ArvoMath {
class Vec3 {
public:
Vec3( float c = 0.0 ) { x = c; y = c; z = c; }
Vec3( float a, float b, float c ) { x = a; y = b; z = c; }
Vec3( const Vec3 &A ) { x = A.X(); y = A.Y(); z = A.Z(); }
void operator=( float c ) { x = c; y = c; z = c; }
void operator=( const Vec3 &A ) { x = A.X(); y = A.Y(); z = A.Z(); }
void operator=( const Vec2 &A ) { x = A.X(); y = A.Y(); z = 0.0; }
~Vec3() {}
float X() const { return x; }
float Y() const { return y; }
float Z() const { return z; }
float & X() { return x; }
float & Y() { return y; }
float & Z() { return z; }
float operator[]( int i ) const { return *( &x + i ); }
float & operator[]( int i ) { return *( &x + i ); }
private:
float x, y, z;
};
//class Mat3x3 {
//public:
// inline Mat3x3( );
// Mat3x3( const Mat3x3 &M ) { *this = M; }
// Mat3x3( const Vec3 &, const Vec3 &, const Vec3 & ); // Three columns.
// ~Mat3x3( ) {}
// float operator()( int i, int j ) const { return m[i][j]; }
// float & operator()( int i, int j ) { return m[i][j]; }
// Mat3x3 & operator=( float );
// Mat3x3 & operator=( const Mat3x3 & );
// inline void ScaleRows( float, float, float );
// inline void ScaleCols( float, float, float );
// void Col( int n, const Vec3 & );
// const float *Base() const { return &(m[0][0]); }
//private:
// float m[3][3];
//};
//class Mat4x4 {
//public:
// Mat4x4( );
// Mat4x4( const Mat4x4 &M ) { *this = M; }
// Mat4x4( const Mat3x3 &M ) ;
// ~Mat4x4( ) {}
// float operator()( int i, int j ) const { return m[i][j]; }
// float & operator()( int i, int j ) { return m[i][j]; }
// Mat4x4 & operator=( float );
// Mat4x4 & operator=( const Mat4x4 & );
// void Row( int i, int j, const Vec3 & );
// void Col( int i, int j, const Vec3 & );
// void ScaleRows( float, float, float, float );
// void ScaleCols( float, float, float, float );
// const float *Base() const { return &(m[0][0]); }
//private:
// float m[4][4];
//};
//==========================================
//=== External operators ===
//==========================================
//extern Vec3 operator * ( const Mat4x4 &, const Vec3 & );
//extern Vec3 operator * ( const Vec3 &, const Mat4x4 & );
//extern Mat3x3 operator * ( float , const Mat3x3 & );
//extern Mat3x3 operator * ( const Mat3x3 &, float );
//extern Mat3x3 operator / ( const Mat3x3 &, double );
//extern Mat3x3 & operator *=( Mat3x3 &, float );
//extern Mat3x3 & operator *=( Mat3x3 &, const Mat3x3 & );
//extern Mat3x3 operator * ( const Mat3x3 &, const Mat3x3 & );
//extern Mat3x3 operator + ( const Mat3x3 &, const Mat3x3 & );
//extern Mat3x3 & operator +=( Mat3x3 &, const Mat3x3 & );
//extern Mat3x3 operator - ( const Mat3x3 &, const Mat3x3 & );
//extern Mat3x3 & operator -=( Mat3x3 &, const Mat3x3 & );
//extern Mat4x4 operator * ( float , const Mat4x4 & );
//extern Mat4x4 operator * ( const Mat4x4 &, float );
//extern Mat4x4 operator / ( const Mat4x4 &, float );
//extern Mat4x4 & operator *=( Mat4x4 &, float );
//extern Mat4x4 operator * ( const Mat4x4 &, const Mat4x4 & );
//extern Mat4x4 operator + ( const Mat4x4 &, const Mat4x4 & );
//extern Mat4x4 & operator +=( Mat4x4 &, const Mat4x4 & );
//extern Mat4x4 operator - ( const Mat4x4 &, const Mat4x4 & );
//extern Mat4x4 & operator -=( Mat4x4 &, const Mat4x4 & );
//==========================================
//=== Miscellaneous external functions ===
//==========================================
//extern Vec3 OrthogonalTo( const Vec3 & ); // A vector orthogonal to that given.
//extern Vec3 Min ( const Vec3 &, const Vec3 & );
//extern Vec3 Max ( const Vec3 &, const Vec3 & );
//extern double Angle ( const Vec3 &, const Vec3 & );
//extern int Orthonormal ( Vec3 &, Vec3 & );
//extern int Orthonormal ( Vec3 &, Vec3 &, Vec3 & );
//extern float Trace ( const Mat3x3 & );
//extern float Normalize ( Vec3 & );
//extern float Norm1 ( const Mat3x3 & );
//extern float SupNorm ( const Mat3x3 & );
//extern double Determinant ( const Mat3x3 & );
//extern Mat3x3 Transp ( const Mat3x3 & );
//extern Mat3x3 Householder ( const Vec3 &, const Vec3 & );
//extern Mat3x3 Householder ( const Vec3 & );
//extern Mat3x3 Rotation3x3 ( float, float, float ); // Values in [0,1].
//extern Mat3x3 Inverse ( const Mat3x3 & );
//extern Mat3x3 Diag3x3 ( const Vec3 & );
//extern Mat3x3 Diag3x3 ( float, float, float );
//extern Mat3x3 Rotation3x3 ( const Vec3 &Axis, float angle );
//extern Mat4x4 Rotation4x4 ( const Vec3 &Axis, const Vec3 &Origin, float angle );
//==========================================
//=== Norm-related functions ===
//==========================================
inline double LenSqr ( const Vec3 &A ) { return Sqr(A[0]) + Sqr(A[1]) + Sqr(A[2]); }
inline double Len ( const Vec3 &A ) { return Sqrt( LenSqr( A ) ); }
inline double Norm1 ( const Vec3 &A ) { return Abs(A[0]) + Abs(A[1]) + Abs(A[2]); }
inline double Norm2 ( const Vec3 &A ) { return Len( A ); }
inline float SupNorm( const Vec3 &A ) { return MaxAbs( A[0], A[1], A[2] ); }
//==========================================
//=== Addition ===
//==========================================
inline Vec3 operator+( const Vec3 &A, const Vec3 &B )
{
return Vec3( A.X() + B.X(), A.Y() + B.Y(), A.Z() + B.Z() );
}
inline Vec3& operator+=( Vec3 &A, const Vec3 &B )
{
A.X() += B.X();
A.Y() += B.Y();
A.Z() += B.Z();
return A;
}
//==========================================
//=== Subtraction ===
//==========================================
inline Vec3 operator-( const Vec3 &A, const Vec3 &B )
{
return Vec3( A.X() - B.X(), A.Y() - B.Y(), A.Z() - B.Z() );
}
inline Vec3 operator-( const Vec3 &A )
{
return Vec3( -A.X(), -A.Y(), -A.Z() );
}
inline Vec3& operator-=( Vec3 &A, const Vec3 &B )
{
A.X() -= B.X();
A.Y() -= B.Y();
A.Z() -= B.Z();
return A;
}
//==========================================
//=== Multiplication ===
//==========================================
inline Vec3 operator*( float a, const Vec3 &x )
{
return Vec3( a * x.X(), a * x.Y(), a * x.Z() );
}
inline Vec3 operator*( const Vec3 &x, float a )
{
return Vec3( a * x.X(), a * x.Y(), a * x.Z() );
}
inline float operator*( const Vec3 &A, const Vec3 &B ) // Inner product.
{
return A.X() * B.X() + A.Y() * B.Y() + A.Z() * B.Z();
}
inline Vec3& operator*=( Vec3 &A, float a )
{
A.X() *= a;
A.Y() *= a;
A.Z() *= a;
return A;
}
//inline Vec3& operator*=( Vec3 &A, const Mat3x3 &M ) // A = M * A
//{
// float x = M(0,0) * A.X() + M(0,1) * A.Y() + M(0,2) * A.Z();
// float y = M(1,0) * A.X() + M(1,1) * A.Y() + M(1,2) * A.Z();
// float z = M(2,0) * A.X() + M(2,1) * A.Y() + M(2,2) * A.Z();
// A.X() = x;
// A.Y() = y;
// A.Z() = z;
// return A;
//}
//inline Vec3& operator*=( Vec3 &A, const Mat4x4 &M ) // A = M * A
//{
// float x = M(0,0) * A.X() + M(0,1) * A.Y() + M(0,2) * A.Z() + M(0,3);
// float y = M(1,0) * A.X() + M(1,1) * A.Y() + M(1,2) * A.Z() + M(1,3);
// float z = M(2,0) * A.X() + M(2,1) * A.Y() + M(2,2) * A.Z() + M(2,3);
// A.X() = x;
// A.Y() = y;
// A.Z() = z;
// return A;
//}
//==========================================
//=== Division ===
//==========================================
inline Vec3 operator/( const Vec3 &A, double c )
{
double t = 1.0 / c;
return Vec3( A.X() * t, A.Y() * t, A.Z() * t );
}
inline Vec3& operator/=( Vec3 &A, double a )
{
A.X() /= a;
A.Y() /= a;
A.Z() /= a;
return A;
}
inline Vec3 operator/( const Vec3 &A, const Vec3 &B ) // Remove component parallel to B.
{
Vec3 C; // Cumbersome due to compiler falure.
double x = LenSqr( B );
if( x > 0.0 ) C = A - B * (( A * B ) / x); else C = A;
return C;
}
inline void operator/=( Vec3 &A, const Vec3 &B ) // Remove component parallel to B.
{
double x = LenSqr( B );
if( x > 0.0 ) A -= B * (( A * B ) / x);
}
//==========================================
//=== Miscellaneous ===
//==========================================
inline float operator|( const Vec3 &A, const Vec3 &B ) // Inner product.
{
return A * B;
}
inline Vec3 Unit( const Vec3 &A )
{
double d = LenSqr( A );
return d > 0.0 ? A / sqrt(d) : Vec3(0,0,0);
}
inline Vec3 Unit( float x, float y, float z )
{
return Unit( Vec3( x, y, z ) );
}
inline Vec3 Ortho( const Vec3 &A, const Vec3 &B )
{
return Unit( A / B );
}
inline int operator==( const Vec3 &A, float x )
{
return (A[0] == x) && (A[1] == x) && (A[2] == x);
}
inline Vec3 operator^( const Vec3 &A, const Vec3 &B )
{
return Vec3(
A.Y() * B.Z() - A.Z() * B.Y(),
A.Z() * B.X() - A.X() * B.Z(),
A.X() * B.Y() - A.Y() * B.X() );
}
inline double dist( const Vec3 &A, const Vec3 &B )
{
return Len( A - B );
}
inline double Dihedral( const Vec3 &A, const Vec3 &B, const Vec3 &C )
{
return ArcCos( Unit( A ^ B ) * Unit( C ^ B ) );
}
inline Vec3 operator>>( const Vec3 &A, const Vec3 &B ) // Project A onto B.
{
Vec3 C;
double x = LenSqr( B );
if( x > 0.0 ) C = B * (( A * B ) / x);
return C;
}
inline Vec3 operator<<( const Vec3 &A, const Vec3 &B ) // Project B onto A.
{
return B >> A;
}
inline double Triple( const Vec3 &A, const Vec3 &B, const Vec3 &C )
{
return ( A ^ B ) * C;
}
//==========================================
//=== Operations involving Matrices ===
//==========================================
//inline Mat3x3 Outer( const Vec3 &A, const Vec3 &B ) // Outer product.
//{
// Mat3x3 C;
// C(0,0) = A.X() * B.X();
// C(0,1) = A.X() * B.Y();
// C(0,2) = A.X() * B.Z();
// C(1,0) = A.Y() * B.X();
// C(1,1) = A.Y() * B.Y();
// C(1,2) = A.Y() * B.Z();
// C(2,0) = A.Z() * B.X();
// C(2,1) = A.Z() * B.Y();
// C(2,2) = A.Z() * B.Z();
// return C;
//}
//inline Vec3 operator*( const Mat3x3 &M, const Vec3 &A )
//{
// return Vec3(
// M(0,0) * A[0] + M(0,1) * A[1] + M(0,2) * A[2],
// M(1,0) * A[0] + M(1,1) * A[1] + M(1,2) * A[2],
// M(2,0) * A[0] + M(2,1) * A[1] + M(2,2) * A[2]);
//}
//inline Vec3 operator*( const Vec3 &A, const Mat3x3 &M )
//{
// return Vec3(
// A[0] * M(0,0) + A[1] * M(1,0) + A[2] * M(2,0),
// A[0] * M(0,1) + A[1] * M(1,1) + A[2] * M(2,1),
// A[0] * M(0,2) + A[1] * M(1,2) + A[2] * M(2,2));
//}
////==========================================
////=== Operations on Matrices ===
////==========================================
//inline Mat3x3 operator+( const Mat3x3 &A, const Mat3x3 &B )
//{
// Mat3x3 C;
// C(0,0) = A(0,0) + B(0,0); C(0,1) = A(0,1) + B(0,1); C(0,2) = A(0,2) + B(0,2);
// C(1,0) = A(1,0) + B(1,0); C(1,1) = A(1,1) + B(1,1); C(1,2) = A(1,2) + B(1,2);
// C(2,0) = A(2,0) + B(2,0); C(2,1) = A(2,1) + B(2,1); C(2,2) = A(2,2) + B(2,2);
// return C;
//}
//inline Mat3x3 operator-( const Mat3x3 &A, const Mat3x3 &B )
//{
// Mat3x3 C;
// C(0,0) = A(0,0) - B(0,0); C(0,1) = A(0,1) - B(0,1); C(0,2) = A(0,2) - B(0,2);
// C(1,0) = A(1,0) - B(1,0); C(1,1) = A(1,1) - B(1,1); C(1,2) = A(1,2) - B(1,2);
// C(2,0) = A(2,0) - B(2,0); C(2,1) = A(2,1) - B(2,1); C(2,2) = A(2,2) - B(2,2);
// return C;
//}
//inline Mat3x3 operator*( const Mat3x3 &A, const Mat3x3 &B )
//{
// Mat3x3 C;
// C(0,0) = A(0,0) * B(0,0) + A(0,1) * B(1,0) + A(0,2) * B(2,0);
// C(0,1) = A(0,0) * B(0,1) + A(0,1) * B(1,1) + A(0,2) * B(2,1);
// C(0,2) = A(0,0) * B(0,2) + A(0,1) * B(1,2) + A(0,2) * B(2,2);
// C(1,0) = A(1,0) * B(0,0) + A(1,1) * B(1,0) + A(1,2) * B(2,0);
// C(1,1) = A(1,0) * B(0,1) + A(1,1) * B(1,1) + A(1,2) * B(2,1);
// C(1,2) = A(1,0) * B(0,2) + A(1,1) * B(1,2) + A(1,2) * B(2,2);
// C(2,0) = A(2,0) * B(0,0) + A(2,1) * B(1,0) + A(2,2) * B(2,0);
// C(2,1) = A(2,0) * B(0,1) + A(2,1) * B(1,1) + A(2,2) * B(2,1);
// C(2,2) = A(2,0) * B(0,2) + A(2,1) * B(1,2) + A(2,2) * B(2,2);
// return C;
//}
//inline void Mat3x3::ScaleRows( float a, float b, float c )
//{
// m[0][0] *= a; m[0][1] *= a; m[0][2] *= a;
// m[1][0] *= b; m[1][1] *= b; m[1][2] *= b;
// m[2][0] *= c; m[2][1] *= c; m[2][2] *= c;
//}
//inline void Mat3x3::ScaleCols( float a, float b, float c )
//{
// m[0][0] *= a; m[0][1] *= b; m[0][2] *= c;
// m[1][0] *= a; m[1][1] *= b; m[1][2] *= c;
// m[2][0] *= a; m[2][1] *= b; m[2][2] *= c;
//}
//==========================================
//=== Special Matrices ===
//==========================================
//inline Mat3x3::Mat3x3()
//{
// m[0][0] = 0; m[0][1] = 0; m[0][2] = 0;
// m[1][0] = 0; m[1][1] = 0; m[1][2] = 0;
// m[2][0] = 0; m[2][1] = 0; m[2][2] = 0;
//}
//inline Mat3x3 Ident3x3()
//{
// Mat3x3 I;
// I(0,0) = 1.0;
// I(1,1) = 1.0;
// I(2,2) = 1.0;
// return I;
//}
//inline Mat4x4 Ident4x4()
//{
// Mat4x4 I;
// I(0,0) = 1.0;
// I(1,1) = 1.0;
// I(2,2) = 1.0;
// I(3,3) = 1.0;
// return I;
//}
//inline void Adjoint( const Mat3x3 &M, Mat3x3 &A )
//{
// A(0,0) = M(1,1) * M(2,2) - M(1,2) * M(2,1);
// A(0,1) = M(1,2) * M(2,0) - M(1,0) * M(2,2);
// A(0,2) = M(1,0) * M(2,1) - M(1,1) * M(2,0);
// A(1,0) = M(0,2) * M(2,1) - M(0,1) * M(2,2);
// A(1,1) = M(0,0) * M(2,2) - M(0,2) * M(2,0);
// A(1,2) = M(0,1) * M(2,0) - M(0,0) * M(2,1);
// A(2,0) = M(0,1) * M(1,2) - M(0,2) * M(1,1);
// A(2,1) = M(0,2) * M(1,0) - M(0,0) * M(1,2);
// A(2,2) = M(0,0) * M(1,1) - M(0,1) * M(1,0);
//}
//inline void TranspAdjoint( const Mat3x3 &M, Mat3x3 &A )
//{
// A(0,0) = M(1,1) * M(2,2) - M(1,2) * M(2,1);
// A(1,0) = M(1,2) * M(2,0) - M(1,0) * M(2,2);
// A(2,0) = M(1,0) * M(2,1) - M(1,1) * M(2,0);
// A(0,1) = M(0,2) * M(2,1) - M(0,1) * M(2,2);
// A(1,1) = M(0,0) * M(2,2) - M(0,2) * M(2,0);
// A(2,1) = M(0,1) * M(2,0) - M(0,0) * M(2,1);
// A(0,2) = M(0,1) * M(1,2) - M(0,2) * M(1,1);
// A(1,2) = M(0,2) * M(1,0) - M(0,0) * M(1,2);
// A(2,2) = M(0,0) * M(1,1) - M(0,1) * M(1,0);
//}
//inline void Adjoint( const Mat3x3 &M, Mat3x3 &A, double &det )
//{
// Adjoint( M, A );
// det = A(0,0) * M(0,0) + A(1,0) * M(1,0) + A(2,0) * M(2,0);
//}
//inline void TranspAdjoint( const Mat3x3 &M, Mat3x3 &A, double &det )
//{
// TranspAdjoint( M, A );
// det = A(0,0) * M(0,0) + A(0,1) * M(1,0) + A(0,2) * M(2,0);
//}
//==========================================
//=== Output routines ===
//==========================================
extern std::ostream &operator<<( std::ostream &out, const Vec3 & );
//extern std::ostream &operator<<( std::ostream &out, const Mat3x3 & );
//extern std::ostream &operator<<( std::ostream &out, const Mat4x4 & );
};
#endif

View File

@ -1,79 +0,0 @@
/***************************************************************************
* Vec4.C *
* *
* Basic operations on 3-dimensional vectors. This special case is useful *
* because many operations are performed inline. *
* *
* HISTORY *
* Name Date Description *
* *
* walt 6/26/07 Edited Vec4 to make this new class *
* arvo 10/27/94 Reorganized (removed Col & Row distinction). *
* arvo 06/14/93 Initial coding. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 1994, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#include <stdio.h>
#include <math.h>
#include "ArvoMath.h"
#include "Vec4.h"
#include "form.h"
namespace ArvoMath {
float Normalize( Vec4 &A )
{
float d = Len( A );
if( d > 0.0 )
{
double c = 1.0 / d;
A.X() *= c;
A.Y() *= c;
A.Z() *= c;
A.W() *= c;
}
return( d );
}
double Angle( const Vec4 &A, const Vec4 &B )
{
double t = LenSqr(A) * LenSqr(B);
if( t <= 0.0 ) return 0.0;
return ArcCos( (A * B) / sqrt(t) );
}
Vec4 Min( const Vec4 &A, const Vec4 &B )
{
return Vec4(
Min( A.X(), B.X() ),
Min( A.Y(), B.Y() ),
Min( A.Z(), B.Z() ),
Min( A.W(), B.W() ) );
}
Vec4 Max( const Vec4 &A, const Vec4 &B )
{
return Vec4(
Max( A.X(), B.X() ),
Max( A.Y(), B.Y() ),
Max( A.Z(), B.Z() ),
Max( A.W(), B.W() ) );
}
std::ostream &operator<<( std::ostream &out, const Vec4 &A )
{
out << form( " %9.5f %9.5f %9.5f %9.5f", A.X(), A.Y(), A.Z(), A.W() ) << std::endl;
return out;
}
};

View File

@ -1,238 +0,0 @@
/***************************************************************************
* Vec4.h *
* *
* Basic operations on 4-dimensional vectors. This special case is useful *
* because many operations are performed inline. *
* *
* HISTORY *
* Name Date Description *
* *
* walt 6/26/07 Edited Vec3 to make this new class *
* arvo 10/27/94 Reorganized (removed Col & Row distinction). *
* arvo 06/14/93 Initial coding. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 1994, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#ifndef __Vec4_INCLUDED__
#define __Vec4_INCLUDED__
#include <math.h>
#include <iostream>
#include "Vec2.h"
#include "Vec3.h"
namespace ArvoMath {
class Vec4 {
public:
Vec4( float c = 0.0 ) { x = c; y = c; z = c; w = c; }
Vec4( float a, float b, float c, float d ) { x = a; y = b; z = c; w = d; }
Vec4( const Vec4 &A ) { x = A.X(); y = A.Y(); z = A.Z(); w = A.W(); }
Vec4( const Vec3 &A, float d ) { x = A.X(); y = A.Y(); z = A.Z(); w = d; }
void operator=( float c ) { x = c; y = c; z = c; w = c; }
void operator=( const Vec4 &A ) { x = A.X(); y = A.Y(); z = A.Z(); w = A.W(); }
void operator=( const Vec3 &A ) { x = A.X(); y = A.Y(); z = A.Z(); w = 0.0; }
void operator=( const Vec2 &A ) { x = A.X(); y = A.Y(); z = 0.0; w = 0.0; }
~Vec4() {}
float X() const { return x; }
float Y() const { return y; }
float Z() const { return z; }
float W() const { return w; }
float & X() { return x; }
float & Y() { return y; }
float & Z() { return z; }
float & W() { return w; }
float operator[]( int i ) const { return *( &x + i ); }
float & operator[]( int i ) { return *( &x + i ); }
private:
float x, y, z, w;
};
//==========================================
//=== Norm-related functions ===
//==========================================
inline double LenSqr ( const Vec4 &A ) { return Sqr(A[0]) + Sqr(A[1]) + Sqr(A[2]) + Sqr(A[3]); }
inline double Len ( const Vec4 &A ) { return Sqrt( LenSqr( A ) ); }
inline double Norm1 ( const Vec4 &A ) { return Abs(A[0]) + Abs(A[1]) + Abs(A[2]) + Abs(A[3]); }
inline double Norm2 ( const Vec4 &A ) { return Len( A ); }
inline float SupNorm( const Vec4 &A ) { return MaxAbs( A[0], A[1], A[2], A[3] ); }
//==========================================
//=== Addition ===
//==========================================
inline Vec4 operator+( const Vec4 &A, const Vec4 &B )
{
return Vec4( A.X() + B.X(), A.Y() + B.Y(), A.Z() + B.Z(), A.W() + B.W() );
}
inline Vec4& operator+=( Vec4 &A, const Vec4 &B )
{
A.X() += B.X();
A.Y() += B.Y();
A.Z() += B.Z();
A.W() += B.W();
return A;
}
//==========================================
//=== Subtraction ===
//==========================================
inline Vec4 operator-( const Vec4 &A, const Vec4 &B )
{
return Vec4( A.X() - B.X(), A.Y() - B.Y(), A.Z() - B.Z(), A.W() - B.W());
}
inline Vec4 operator-( const Vec4 &A )
{
return Vec4( -A.X(), -A.Y(), -A.Z(), -A.W() );
}
inline Vec4& operator-=( Vec4 &A, const Vec4 &B )
{
A.X() -= B.X();
A.Y() -= B.Y();
A.Z() -= B.Z();
A.W() -= B.W();
return A;
}
//==========================================
//=== Multiplication ===
//==========================================
inline Vec4 operator*( float a, const Vec4 &x )
{
return Vec4( a * x.X(), a * x.Y(), a * x.Z(), a * x.W() );
}
inline Vec4 operator*( const Vec4 &x, float a )
{
return Vec4( a * x.X(), a * x.Y(), a * x.Z(), a * x.W() );
}
inline float operator*( const Vec4 &A, const Vec4 &B ) // Inner product.
{
return A.X() * B.X() + A.Y() * B.Y() + A.Z() * B.Z() + A.W() * B.W();
}
inline Vec4& operator*=( Vec4 &A, float a )
{
A.X() *= a;
A.Y() *= a;
A.Z() *= a;
A.W() *= a;
return A;
}
//==========================================
//=== Division ===
//==========================================
inline Vec4 operator/( const Vec4 &A, double c )
{
double t = 1.0 / c;
return Vec4( A.X() * t, A.Y() * t, A.Z() * t, A.W() * t);
}
inline Vec4& operator/=( Vec4 &A, double a )
{
A.X() /= a;
A.Y() /= a;
A.Z() /= a;
A.W() /= a;
return A;
}
inline Vec4 operator/( const Vec4 &A, const Vec4 &B ) // Remove component parallel to B.
{
Vec4 C; // Cumbersome due to compiler falure.
double x = LenSqr( B );
if( x > 0.0 ) C = A - B * (( A * B ) / x); else C = A;
return C;
}
inline void operator/=( Vec4 &A, const Vec4 &B ) // Remove component parallel to B.
{
double x = LenSqr( B );
if( x > 0.0 ) A -= B * (( A * B ) / x);
}
//==========================================
//=== Miscellaneous ===
//==========================================
inline float operator|( const Vec4 &A, const Vec4 &B ) // Inner product.
{
return A * B;
}
inline Vec4 Unit( const Vec4 &A )
{
double d = LenSqr( A );
return d > 0.0 ? A / sqrt(d) : Vec4(0,0,0,0);
}
inline Vec4 Unit( float x, float y, float z, float w )
{
return Unit( Vec4( x, y, z, w ) );
}
inline Vec4 Ortho( const Vec4 &A, const Vec4 &B )
{
return Unit( A / B );
}
inline int operator==( const Vec4 &A, float x )
{
return (A[0] == x) && (A[1] == x) && (A[2] == x) && (A[3] == x);
}
// inline Vec4 operator^( const Vec4 &A, const Vec4 &B ) there is no 4ED "cross product" of 2 4D vectors -- we need six dimensions
inline double dist( const Vec4 &A, const Vec4 &B )
{
return Len( A - B );
}
// inline double Dihedral( const Vec4 &A, const Vec4 &B, const Vec4 &C )
inline Vec4 operator>>( const Vec4 &A, const Vec4 &B ) // Project A onto B.
{
Vec4 C;
double x = LenSqr( B );
if( x > 0.0 ) C = B * (( A * B ) / x);
return C;
}
inline Vec4 operator<<( const Vec4 &A, const Vec4 &B ) // Project B onto A.
{
return B >> A;
}
// inline double Triple( const Vec4 &A, const Vec4 &B, const Vec4 &C )
//==========================================
//=== Output routines ===
//==========================================
extern std::ostream &operator<<( std::ostream &out, const Vec4 & );
};
#endif

View File

@ -1,366 +0,0 @@
/***************************************************************************
* Vector.C *
* *
* General Vector and Matrix classes, with all the associated methods. *
* *
* HISTORY *
* Name Date Description *
* *
* arvo 08/16/2000 Revamped for CIT tools. *
* arvo 10/31/1994 Combined RowVec & ColVec into Vector. *
* arvo 06/30/1993 Added singular value decomposition class. *
* arvo 06/25/1993 Major revisions. *
* arvo 09/08/1991 Initial implementation. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 2000, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#include <iostream>
#include <assert.h>
#include "ArvoMath.h"
#include "Vector.h"
#include "form.h"
namespace ArvoMath {
const Vector Vector::Null(0);
/*-------------------------------------------------------------------------*
* *
* C O N S T R U C T O R S *
* *
*-------------------------------------------------------------------------*/
Vector::Vector( const float *x, int n )
{
Create( n );
for( register int i = 0; i < size; i++ ) elem[i] = x[i];
}
Vector::Vector( const Vector &A )
{
Create( A.Size() );
for( register int i = 0; i < A.Size(); i++ ) elem[i] = A(i);
}
Vector::Vector( int n )
{
Create( n );
for( register int i = 0; i < n; i++ ) elem[i] = 0.0;
}
Vector::Vector( float x, float y )
{
Create( 2 );
elem[0] = x;
elem[1] = y;
}
Vector::Vector( float x, float y, float z )
{
Create( 3 );
elem[0] = x;
elem[1] = y;
elem[2] = z;
}
void Vector::SetSize( int new_size )
{
if( size != new_size )
{
delete[] elem;
Create( new_size );
for( register int i = 0; i < new_size; i++ ) elem[i] = 0.0;
}
}
Vector &Vector::Swap( int i, int j )
{
float temp = elem[i];
elem[i] = elem[j];
elem[j] = temp;
return *this;
}
Vector Vector::GetBlock( int i, int j ) const
{
assert( 0 <= i && i <= j && j < size );
int n = j - i + 1;
Vector V( n );
register float *v = V.Array();
register float *e = elem + i;
for( register int k = 0; k < n; k++ ) *v++ = *e++;
return V;
}
void Vector::SetBlock( int i, int j, const Vector &V )
{
assert( 0 <= i && i <= j && j < size );
int n = j - i + 1;
assert( n == V.Size() );
register float *v = V.Array();
register float *e = elem + i;
for( register int k = 0; k < n; k++ ) *e++ = *v++;
}
/*-------------------------------------------------------------------------*
* *
* O P E R A T O R S *
* *
*-------------------------------------------------------------------------*/
double operator*( const Vector &A, const Vector &B )
{
assert( A.Size() == B.Size() );
double sum = A(0) * B(0);
for( register int i = 1; i < A.Size(); i++ ) sum += A(i) * B(i);
return sum;
}
void Vector::operator=( float c )
{
for( register int i = 0; i < size; i++ ) elem[i] = c;
}
Vector operator*( const Vector &A, float s )
{
Vector C( A.Size() );
for( register int i = 0; i < A.Size(); i++ ) C(i) = A(i) * s;
return C;
}
Vector operator*( float s, const Vector &A )
{
Vector C( A.Size() );
for( register int i = 0; i < A.Size(); i++ ) C(i) = A(i) * s;
return C;
}
Vector operator/( const Vector &A, float s )
{
assert( s != 0.0 );
Vector C( A.Size() );
for( register int i = 0; i < A.Size(); i++ ) C(i) = A(i) / s;
return C;
}
Vector& operator+=( Vector &A, const Vector &B )
{
assert( A.Size() == B.Size() );
for( register int i = 0; i < A.Size(); i++ ) A(i) += B(i);
return A;
}
Vector& operator*=( Vector &A, float scale )
{
for( register int i = 0; i < A.Size(); i++ ) A(i) *= scale;
return A;
}
Vector& operator/=( Vector &A, float scale )
{
for( register int i = 0; i < A.Size(); i++ ) A(i) /= scale;
return A;
}
Vector& Vector::operator=( const Vector &A )
{
SetSize( A.Size() );
for( register int i = 0; i < size; i++ ) elem[i] = A(i);
return *this;
}
Vector operator+( const Vector &A, const Vector &B )
{
assert( A.Size() == B.Size() );
Vector C( A.Size() );
for( register int i = 0; i < A.Size(); i++ ) C(i) = A(i) + B(i);
return C;
}
Vector operator-( const Vector &A, const Vector &B )
{
assert( A.Size() == B.Size() );
Vector C( A.Size() );
for( register int i = 0; i < A.Size(); i++ ) C(i) = A(i) - B(i);
return C;
}
Vector operator-( const Vector &A ) // Unary minus.
{
Vector B( A.Size() );
for( register int i = 0; i < A.Size(); i++ ) B(i) = -A(i);
return B;
}
Vector operator^( const Vector &A, const Vector &B )
{
Vector C(3);
assert( A.Size() == B.Size() );
if( A.Size() == 2 ) // Assume z components of A and B are zero.
{
C(0) = 0.0;
C(1) = 0.0;
C(2) = A(0) * B(1) - A(1) * B(0);
}
else
{
assert( A.Size() == 3 );
C(0) = A(1) * B(2) - A(2) * B(1);
C(1) = A(2) * B(0) - A(0) * B(2);
C(2) = A(0) * B(1) - A(1) * B(0);
}
return C;
}
/*-------------------------------------------------------------------------*
* *
* M I S C E L L A N E O U S F U N C T I O N S *
* *
*-------------------------------------------------------------------------*/
Vector Min( const Vector &A, const Vector &B )
{
assert( A.Size() == B.Size() );
Vector C( A.Size() );
for( register int i = 0; i < A.Size(); i++ ) C(i) = Min( A(i), B(i) );
return C;
}
Vector Max( const Vector &A, const Vector &B )
{
assert( A.Size() == B.Size() );
Vector C( A.Size() );
for( register int i = 0; i < A.Size(); i++ ) C(i) = Max( A(i), B(i) );
return C;
}
Vector Unit( const Vector &A )
{
double norm = TwoNorm( A );
assert( norm > 0.0 );
return A * ( 1.0 / norm );
}
double Normalize( Vector &A )
{
double norm = TwoNorm( A );
assert( norm > 0.0 );
for( register int i = 0; i < A.Size(); i++ ) A(i) /= norm;
return norm;
}
int Null( const Vector &A )
{
return A.Size() == 0;
}
double TwoNormSqr( const Vector &A )
{
double sum = A(0) * A(0);
for( register int i = 1; i < A.Size(); i++ ) sum += A(i) * A(i);
return sum;
}
double TwoNorm( const Vector &A )
{
return sqrt( TwoNormSqr( A ) );
}
double dist( const Vector &A, const Vector &B )
{
return TwoNorm( A - B );
}
double OneNorm( const Vector &A )
{
double norm = Abs( A(0) );
for( register int i = 1; i < A.Size(); i++ ) norm += Abs( A(i) );
return norm;
}
double SupNorm( const Vector &A )
{
double norm = Abs( A(0) );
for( register int i = 1; i < A.Size(); i++ )
{
double a = Abs( A(i) );
if( a > norm ) norm = a;
}
return norm;
}
Vec2 ToVec2( const Vector &V )
{
assert( V.Size() == 2 );
return Vec2( V(0), V(1) );
}
Vec3 ToVec3( const Vector &V )
{
assert( V.Size() == 3 );
return Vec3( V(0), V(1), V(2) );
}
Vector ToVector( const Vec2 &V )
{
return Vector( V.X(), V.Y() );
}
Vector ToVector( const Vec3 &V )
{
return Vector( V.X(), V.Y(), V.Z() );
}
//
// Returns a vector that is orthogonal to A (but of arbitrary length).
//
Vector OrthogonalTo( const Vector &A )
{
Vector B( A.Size() );
double c = 0.5 * SupNorm( A );
if( A.Size() < 2 )
{
// Just return the zero-vector.
}
else if( c == 0.0 )
{
B(0) = 1.0;
}
else for( register int i = 0; i < A.Size(); i++ )
{
if( Abs( A(i)) > c )
{
int k = ( i > 0 ) ? i - 1 : i + 1;
B(k) = -A(i);
B(i) = A(k);
break;
}
}
return B;
}
std::ostream &operator<<( std::ostream &out, const Vector &A )
{
if( A.Size() == 0 )
{
out << "NULL";
}
else for( register int i = 0; i < A.Size(); i++ )
{
out << form( "%3d: %10.5g\n", i, A(i) );
}
out << std::endl;
return out;
}
};

View File

@ -1,103 +0,0 @@
/***************************************************************************
* Vector.h *
* *
* General Vector and Matrix classes, with all the associated methods. *
* *
* HISTORY *
* Name Date Description *
* *
* arvo 08/16/2000 Revamped for CIT tools. *
* arvo 10/31/1994 Combined RowVec & ColVec into Vector. *
* arvo 06/30/1993 Added singular value decomposition class. *
* arvo 06/25/1993 Major revisions. *
* arvo 09/08/1991 Initial implementation. *
* *
*--------------------------------------------------------------------------*
* Copyright (C) 2000, James Arvo *
* *
* This program is free software; you can redistribute it and/or modify it *
* under the terms of the GNU General Public License as published by the *
* Free Software Foundation. See http://www.fsf.org/copyleft/gpl.html *
* *
* This program is distributed in the hope that it will be useful, but *
* WITHOUT EXPRESS OR IMPLIED WARRANTY of merchantability or fitness for *
* any particular purpose. See the GNU General Public License for more *
* details. *
* *
***************************************************************************/
#ifndef __VECTOR_INCLUDED__
#define __VECTOR_INCLUDED__
#include <istream>
#include "Vec2.h"
#include "Vec3.h"
namespace ArvoMath {
class Vector {
public:
Vector( int size = 0 );
Vector( const Vector & );
Vector( float, float );
Vector( float, float, float );
Vector( const float *x, int n );
Vector &operator=( const Vector & );
void operator=( float );
void SetSize( int );
Vector &Swap( int i, int j );
Vector GetBlock( int i, int j ) const;
void SetBlock( int i, int j, const Vector & );
static const Vector Null;
public: // Inlined functions.
inline float operator()( int i ) const { return elem[i]; }
inline float& operator()( int i ) { return elem[i]; }
inline float* Array() const { return elem; }
inline int Size () const { return size; }
inline ~Vector() { delete[] elem; }
private:
void Create( int n = 0 ) { size = n; elem = new float[n]; }
int size;
float* elem;
};
extern Vector operator + ( const Vector &, const Vector & );
extern Vector operator - ( const Vector &, const Vector & ); // Binary minus.
extern Vector operator - ( const Vector & ); // Unary minus.
extern Vector operator * ( const Vector &, float );
extern Vector operator * ( float , const Vector & );
extern Vector operator / ( const Vector &, float );
extern Vector operator / ( const Vector &, const Vector & );
extern Vector operator ^ ( const Vector &, const Vector & );
extern Vector& operator += ( Vector &, const Vector & );
extern Vector& operator *= ( Vector &, float );
extern Vector& operator /= ( Vector &, float );
extern Vector Min ( const Vector &, const Vector & );
extern Vector Max ( const Vector &, const Vector & );
extern double operator * ( const Vector &, const Vector & ); // Inner product.
extern double dist ( const Vector &, const Vector & );
extern Vector OrthogonalTo( const Vector & ); // Returns some orthogonal vector.
extern Vector Unit ( const Vector & );
extern double Normalize ( Vector & );
extern double OneNorm ( const Vector & );
extern double TwoNorm ( const Vector & );
extern double TwoNormSqr ( const Vector & );
extern double SupNorm ( const Vector & );
extern int Null ( const Vector & );
extern Vec2 ToVec2 ( const Vector & );
extern Vec3 ToVec3 ( const Vector & );
extern Vector ToVector ( const Vec2 & );
extern Vector ToVector ( const Vec3 & );
std::ostream &operator<<(
std::ostream &out,
const Vector &
);
};
#endif

View File

@ -1,26 +0,0 @@
#ifndef __FORM_INCLUDED__
#define __FORM_INCLUDED__
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <assert.h>
namespace ArvoMath {
inline const char *form(char *fmt, ...)
{
static char printbfr[65536];
va_list arglist;
va_start(arglist,fmt);
int length = vsprintf(printbfr,fmt,arglist);
va_end(arglist);
assert(length > 65536);
return printbfr;
}
};
#endif

View File

@ -12,41 +12,40 @@ See the License for the specific language governing permissions and limitations
// the avpcl compressor and decompressor // the avpcl compressor and decompressor
#include <string>
#include <iostream>
#include <sstream>
#include <assert.h>
#include <time.h>
#include "ImfArray.h"
#include "RGBA.h"
#include "tile.h" #include "tile.h"
#include "avpcl.h" #include "avpcl.h"
#include "targa.h" #include "nvcore/Debug.h"
#include "nvmath/Vector.inl"
#include <cstring>
#ifndef MIN using namespace nv;
#define MIN(x,y) ((x)<(y)?(x):(y)) using namespace AVPCL;
#endif
using namespace std; // global flags
bool AVPCL::flag_premult = false;
bool AVPCL::flag_nonuniform = false;
bool AVPCL::flag_nonuniform_ati = false;
void AVPCL::compress(const Tile &t, char *block, FILE *errfile) // global mode
bool AVPCL::mode_rgb = false; // true if image had constant alpha = 255
void AVPCL::compress(const Tile &t, char *block)
{ {
char tempblock[AVPCL::BLOCKSIZE]; char tempblock[AVPCL::BLOCKSIZE];
double msebest = DBL_MAX; float msebest = FLT_MAX;
double mse_mode0 = AVPCL::compress_mode0(t, tempblock); if(mse_mode0 < msebest) { msebest = mse_mode0; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } float mse_mode0 = AVPCL::compress_mode0(t, tempblock); if(mse_mode0 < msebest) { msebest = mse_mode0; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
double mse_mode1 = AVPCL::compress_mode1(t, tempblock); if(mse_mode1 < msebest) { msebest = mse_mode1; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } float mse_mode1 = AVPCL::compress_mode1(t, tempblock); if(mse_mode1 < msebest) { msebest = mse_mode1; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
double mse_mode2 = AVPCL::compress_mode2(t, tempblock); if(mse_mode2 < msebest) { msebest = mse_mode2; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } float mse_mode2 = AVPCL::compress_mode2(t, tempblock); if(mse_mode2 < msebest) { msebest = mse_mode2; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
double mse_mode3 = AVPCL::compress_mode3(t, tempblock); if(mse_mode3 < msebest) { msebest = mse_mode3; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } float mse_mode3 = AVPCL::compress_mode3(t, tempblock); if(mse_mode3 < msebest) { msebest = mse_mode3; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
double mse_mode4 = AVPCL::compress_mode4(t, tempblock); if(mse_mode4 < msebest) { msebest = mse_mode4; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } float mse_mode4 = AVPCL::compress_mode4(t, tempblock); if(mse_mode4 < msebest) { msebest = mse_mode4; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
double mse_mode5 = AVPCL::compress_mode5(t, tempblock); if(mse_mode5 < msebest) { msebest = mse_mode5; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } float mse_mode5 = AVPCL::compress_mode5(t, tempblock); if(mse_mode5 < msebest) { msebest = mse_mode5; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
double mse_mode6 = AVPCL::compress_mode6(t, tempblock); if(mse_mode6 < msebest) { msebest = mse_mode6; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } float mse_mode6 = AVPCL::compress_mode6(t, tempblock); if(mse_mode6 < msebest) { msebest = mse_mode6; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
double mse_mode7 = AVPCL::compress_mode7(t, tempblock); if(mse_mode7 < msebest) { msebest = mse_mode7; memcpy(block, tempblock, AVPCL::BLOCKSIZE); } float mse_mode7 = AVPCL::compress_mode7(t, tempblock); if(mse_mode7 < msebest) { msebest = mse_mode7; memcpy(block, tempblock, AVPCL::BLOCKSIZE); }
if (errfile) /*if (errfile)
{ {
double errs[21]; float errs[21];
int nerrs = 8; int nerrs = 8;
errs[0] = mse_mode0; errs[0] = mse_mode0;
errs[1] = mse_mode1; errs[1] = mse_mode1;
@ -56,11 +55,12 @@ void AVPCL::compress(const Tile &t, char *block, FILE *errfile)
errs[5] = mse_mode5; errs[5] = mse_mode5;
errs[6] = mse_mode6; errs[6] = mse_mode6;
errs[7] = mse_mode7; errs[7] = mse_mode7;
if (fwrite(errs, sizeof(double), nerrs, errfile) != nerrs) if (fwrite(errs, sizeof(float), nerrs, errfile) != nerrs)
throw "Write error on error file"; throw "Write error on error file";
} }*/
} }
/*
static int getbit(char *b, int start) static int getbit(char *b, int start)
{ {
if (start < 0 || start >= 128) return 0; // out of range if (start < 0 || start >= 128) return 0; // out of range
@ -94,14 +94,12 @@ static void setbits(char *b, int start, int len, int bits)
for (int i=0; i<len; ++i) for (int i=0; i<len; ++i)
setbit(b, start+i, bits >> i); setbit(b, start+i, bits >> i);
} }
*/
void AVPCL::decompress(const char *cblock, Tile &t) void AVPCL::decompress(const char *cblock, Tile &t)
{ {
Vec4 zero(0); char block[AVPCL::BLOCKSIZE];
memcpy(block, cblock, AVPCL::BLOCKSIZE);
char block[16];
for (int i=0; i<16; ++i) block[i] = cblock[i];
switch(getmode(block)) switch(getmode(block))
{ {
@ -116,12 +114,13 @@ void AVPCL::decompress(const char *cblock, Tile &t)
case 8: // return a black tile if you get a reserved mode case 8: // return a black tile if you get a reserved mode
for (int y=0; y<Tile::TILE_H; ++y) for (int y=0; y<Tile::TILE_H; ++y)
for (int x=0; x<Tile::TILE_W; ++x) for (int x=0; x<Tile::TILE_W; ++x)
t.data[y][x] = zero; t.data[y][x].set(0, 0, 0, 0);
break; break;
default: assert(0); default: nvUnreachable();
} }
} }
/*
void AVPCL::compress(string inf, string avpclf, string errf) void AVPCL::compress(string inf, string avpclf, string errf)
{ {
Array2D<RGBA> pixels; Array2D<RGBA> pixels;
@ -158,12 +157,12 @@ void AVPCL::compress(string inf, string avpclf, string errf)
// convert to tiles and compress each tile // convert to tiles and compress each tile
for (int y=0; y<h; y+=Tile::TILE_H) for (int y=0; y<h; y+=Tile::TILE_H)
{ {
int ysize = MIN(Tile::TILE_H, h-y); int ysize = min(Tile::TILE_H, h-y);
for (int x=0; x<w; x+=Tile::TILE_W) for (int x=0; x<w; x+=Tile::TILE_W)
{ {
if ((tilecnt%100) == 0) { cur = clock(); printf("Progress %d of %d, %5.2f seconds per 100 tiles\r", tilecnt, ntiles, double(cur-prev)/CLOCKS_PER_SEC); fflush(stdout); prev = cur; } if ((tilecnt%100) == 0) { cur = clock(); printf("Progress %d of %d, %5.2f seconds per 100 tiles\r", tilecnt, ntiles, float(cur-prev)/CLOCKS_PER_SEC); fflush(stdout); prev = cur; }
int xsize = MIN(Tile::TILE_W, w-x); int xsize = min(Tile::TILE_W, w-x);
Tile t(xsize, ysize); Tile t(xsize, ysize);
t.insert(pixels, x, y); t.insert(pixels, x, y);
@ -178,7 +177,7 @@ void AVPCL::compress(string inf, string avpclf, string errf)
} }
cur = clock(); cur = clock();
printf("\nTotal time to compress: %.2f seconds\n\n", double(cur-start)/CLOCKS_PER_SEC); // advance to next line finally printf("\nTotal time to compress: %.2f seconds\n\n", float(cur-start)/CLOCKS_PER_SEC); // advance to next line finally
if (fclose(avpclfile)) throw "Close failed on .avpcl file"; if (fclose(avpclfile)) throw "Close failed on .avpcl file";
if (errfile && fclose(errfile)) throw "Close failed on error file"; if (errfile && fclose(errfile)) throw "Close failed on error file";
@ -239,10 +238,10 @@ void AVPCL::decompress(string avpclf, string outf)
// convert to tiles and decompress each tile // convert to tiles and decompress each tile
for (int y=0; y<h; y+=Tile::TILE_H) for (int y=0; y<h; y+=Tile::TILE_H)
{ {
int ysize = MIN(Tile::TILE_H, h-y); int ysize = min(Tile::TILE_H, h-y);
for (int x=0; x<w; x+=Tile::TILE_W) for (int x=0; x<w; x+=Tile::TILE_W)
{ {
int xsize = MIN(Tile::TILE_W, w-x); int xsize = min(Tile::TILE_W, w-x);
Tile t(xsize, ysize); Tile t(xsize, ysize);
if (fread(block, sizeof(char), AVPCL::BLOCKSIZE, avpclfile) != AVPCL::BLOCKSIZE) if (fread(block, sizeof(char), AVPCL::BLOCKSIZE, avpclfile) != AVPCL::BLOCKSIZE)
@ -261,3 +260,4 @@ void AVPCL::decompress(string avpclf, string outf)
printstats(); // print statistics printstats(); // print statistics
} }
*/

View File

@ -13,95 +13,87 @@ See the License for the specific language governing permissions and limitations
#ifndef _AVPCL_H #ifndef _AVPCL_H
#define _AVPCL_H #define _AVPCL_H
#include <string>
#include <assert.h>
#include "tile.h" #include "tile.h"
#include "bits.h" #include "bits.h"
using namespace std;
#define EXTERNAL_RELEASE 1 // define this if we're releasing this code externally
#define DISABLE_EXHAUSTIVE 1 // define this if you don't want to spend a lot of time on exhaustive compression #define DISABLE_EXHAUSTIVE 1 // define this if you don't want to spend a lot of time on exhaustive compression
#define USE_ZOH_INTERP 1 // use zoh interpolator, otherwise use exact avpcl interpolators #define USE_ZOH_INTERP 1 // use zoh interpolator, otherwise use exact avpcl interpolators
#define USE_ZOH_INTERP_ROUNDED 1 // use the rounded versions! #define USE_ZOH_INTERP_ROUNDED 1 // use the rounded versions!
#define NREGIONS_TWO 2 namespace AVPCL {
#define NREGIONS_THREE 3
#define DBL_MAX (1.0e37) // doesn't have to be really dblmax, just bigger than any possible squared error
class AVPCL static const int NREGIONS_TWO = 2;
static const int NREGIONS_THREE = 3;
static const int BLOCKSIZE=16;
static const int BITSIZE=128;
// global flags
extern bool flag_premult;
extern bool flag_nonuniform;
extern bool flag_nonuniform_ati;
// global mode
extern bool mode_rgb; // true if image had constant alpha = 255
void compress(const Tile &t, char *block);
void decompress(const char *block, Tile &t);
float compress_mode0(const Tile &t, char *block);
void decompress_mode0(const char *block, Tile &t);
float compress_mode1(const Tile &t, char *block);
void decompress_mode1(const char *block, Tile &t);
float compress_mode2(const Tile &t, char *block);
void decompress_mode2(const char *block, Tile &t);
float compress_mode3(const Tile &t, char *block);
void decompress_mode3(const char *block, Tile &t);
float compress_mode4(const Tile &t, char *block);
void decompress_mode4(const char *block, Tile &t);
float compress_mode5(const Tile &t, char *block);
void decompress_mode5(const char *block, Tile &t);
float compress_mode6(const Tile &t, char *block);
void decompress_mode6(const char *block, Tile &t);
float compress_mode7(const Tile &t, char *block);
void decompress_mode7(const char *block, Tile &t);
inline int getmode(Bits &in)
{ {
public: int mode = 0;
static const int BLOCKSIZE=16;
static const int BITSIZE=128;
// global flags if (in.read(1)) mode = 0;
static bool flag_premult; else if (in.read(1)) mode = 1;
static bool flag_nonuniform; else if (in.read(1)) mode = 2;
static bool flag_nonuniform_ati; else if (in.read(1)) mode = 3;
else if (in.read(1)) mode = 4;
else if (in.read(1)) mode = 5;
else if (in.read(1)) mode = 6;
else if (in.read(1)) mode = 7;
else mode = 8; // reserved
return mode;
}
inline int getmode(const char *block)
{
int bits = block[0], mode = 0;
// global mode if (bits & 1) mode = 0;
static bool mode_rgb; // true if image had constant alpha = 255 else if ((bits&3) == 2) mode = 1;
else if ((bits&7) == 4) mode = 2;
else if ((bits & 0xF) == 8) mode = 3;
else if ((bits & 0x1F) == 16) mode = 4;
else if ((bits & 0x3F) == 32) mode = 5;
else if ((bits & 0x7F) == 64) mode = 6;
else if ((bits & 0xFF) == 128) mode = 7;
else mode = 8; // reserved
return mode;
}
static void compress(string inf, string zohf, string errf); }
static void decompress(string zohf, string outf);
static void compress(const Tile &t, char *block, FILE *errfile);
static void decompress(const char *block, Tile &t);
static double compress_mode0(const Tile &t, char *block); #endif
static void decompress_mode0(const char *block, Tile &t);
static double compress_mode1(const Tile &t, char *block);
static void decompress_mode1(const char *block, Tile &t);
static double compress_mode2(const Tile &t, char *block);
static void decompress_mode2(const char *block, Tile &t);
static double compress_mode3(const Tile &t, char *block);
static void decompress_mode3(const char *block, Tile &t);
static double compress_mode4(const Tile &t, char *block);
static void decompress_mode4(const char *block, Tile &t);
static double compress_mode5(const Tile &t, char *block);
static void decompress_mode5(const char *block, Tile &t);
static double compress_mode6(const Tile &t, char *block);
static void decompress_mode6(const char *block, Tile &t);
static double compress_mode7(const Tile &t, char *block);
static void decompress_mode7(const char *block, Tile &t);
static int getmode(Bits &in)
{
int mode = 0;
if (in.read(1)) mode = 0;
else if (in.read(1)) mode = 1;
else if (in.read(1)) mode = 2;
else if (in.read(1)) mode = 3;
else if (in.read(1)) mode = 4;
else if (in.read(1)) mode = 5;
else if (in.read(1)) mode = 6;
else if (in.read(1)) mode = 7;
else mode = 8; // reserved
return mode;
}
static int getmode(const char *block)
{
int bits = block[0], mode = 0;
if (bits & 1) mode = 0;
else if ((bits&3) == 2) mode = 1;
else if ((bits&7) == 4) mode = 2;
else if ((bits & 0xF) == 8) mode = 3;
else if ((bits & 0x1F) == 16) mode = 4;
else if ((bits & 0x3F) == 32) mode = 5;
else if ((bits & 0x7F) == 64) mode = 6;
else if ((bits & 0xFF) == 128) mode = 7;
else mode = 8; // reserved
return mode;
}
};
#endif

View File

@ -1,21 +0,0 @@
Microsoft Visual Studio Solution File, Format Version 8.00
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "avpcl", "avpcl.vcproj", "{C6F6CD96-D0C0-4A35-B1BC-53E0A3CB712F}"
ProjectSection(ProjectDependencies) = postProject
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfiguration) = preSolution
Debug = Debug
Release = Release
EndGlobalSection
GlobalSection(ProjectConfiguration) = postSolution
{C6F6CD96-D0C0-4A35-B1BC-53E0A3CB712F}.Debug.ActiveCfg = Debug|Win32
{C6F6CD96-D0C0-4A35-B1BC-53E0A3CB712F}.Debug.Build.0 = Debug|Win32
{C6F6CD96-D0C0-4A35-B1BC-53E0A3CB712F}.Release.ActiveCfg = Release|Win32
{C6F6CD96-D0C0-4A35-B1BC-53E0A3CB712F}.Release.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
EndGlobalSection
GlobalSection(ExtensibilityAddIns) = postSolution
EndGlobalSection
EndGlobal

View File

@ -1,314 +0,0 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="7.10"
Name="avpcl"
ProjectGUID="{3d7401c5-23e7-4280-bfa2-a51073587cf3}"
SccProjectName=""
SccLocalPath="">
<Platforms>
<Platform
Name="Win32"/>
</Platforms>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory=".\Debug"
IntermediateDirectory=".\Debug"
ConfigurationType="1"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="FALSE"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories=""
PreprocessorDefinitions="_DEBUG;WIN32;_CONSOLE"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
ForceConformanceInForLoopScope="TRUE"
RuntimeTypeInfo="TRUE"
UsePrecompiledHeader="0"
ProgramDataBaseFileName="$(IntDir)/$(ProjectName)_d.pdb"
WarningLevel="1"
SuppressStartupBanner="TRUE"
Detect64BitPortabilityProblems="TRUE"
DebugInformationFormat="4"
CompileAs="0"
DisableSpecificWarnings="4290"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="comctl32.lib"
OutputFile="../test/avpclc_d.exe"
LinkIncremental="2"
SuppressStartupBanner="TRUE"
AdditionalLibraryDirectories=""
GenerateDebugInformation="TRUE"
SubSystem="1"
TargetMachine="1"/>
<Tool
Name="VCMIDLTool"
TypeLibraryName="./Debug/avpcl.tlb"
HeaderFileName=""/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1033"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCXMLDataGeneratorTool"/>
<Tool
Name="VCWebDeploymentTool"/>
<Tool
Name="VCManagedWrapperGeneratorTool"/>
<Tool
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory=".\Release"
IntermediateDirectory=".\Release"
ConfigurationType="1"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="FALSE"
CharacterSet="2">
<Tool
Name="VCCLCompilerTool"
Optimization="2"
InlineFunctionExpansion="1"
AdditionalIncludeDirectories=""
PreprocessorDefinitions="NDEBUG;WIN32;_CONSOLE"
StringPooling="TRUE"
RuntimeLibrary="2"
ForceConformanceInForLoopScope="TRUE"
RuntimeTypeInfo="TRUE"
UsePrecompiledHeader="0"
ProgramDataBaseFileName="$(IntDir)/$(ProjectName).pdb"
WarningLevel="1"
SuppressStartupBanner="TRUE"
Detect64BitPortabilityProblems="TRUE"
DebugInformationFormat="3"
CompileAs="0"
DisableSpecificWarnings="4290"/>
<Tool
Name="VCCustomBuildTool"/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="comctl32.lib"
OutputFile="../test/avpclc.exe"
LinkIncremental="1"
SuppressStartupBanner="TRUE"
AdditionalLibraryDirectories=""
GenerateDebugInformation="FALSE"
SubSystem="1"
EntryPointSymbol="mainCRTStartup"
TargetMachine="1"/>
<Tool
Name="VCMIDLTool"
TypeLibraryName="./Release/avpcl.tlb"
HeaderFileName=""/>
<Tool
Name="VCPostBuildEventTool"/>
<Tool
Name="VCPreBuildEventTool"/>
<Tool
Name="VCPreLinkEventTool"/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1033"/>
<Tool
Name="VCWebServiceProxyGeneratorTool"/>
<Tool
Name="VCXMLDataGeneratorTool"/>
<Tool
Name="VCWebDeploymentTool"/>
<Tool
Name="VCManagedWrapperGeneratorTool"/>
<Tool
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cxx;rc;def;r;odl;idl;hpj;bat">
<File
RelativePath=".\avpcl.cpp">
</File>
<File
RelativePath=".\avpcl_mode0.cpp">
</File>
<File
RelativePath=".\avpcl_mode1.cpp">
</File>
<File
RelativePath=".\avpcl_mode2.cpp">
</File>
<File
RelativePath=".\avpcl_mode3.cpp">
</File>
<File
RelativePath=".\avpcl_mode4.cpp">
</File>
<File
RelativePath=".\avpcl_mode5.cpp">
</File>
<File
RelativePath=".\avpcl_mode6.cpp">
</File>
<File
RelativePath=".\avpcl_mode7.cpp">
</File>
<File
RelativePath=".\avpclc.cpp">
</File>
<File
RelativePath=".\targa.cpp">
</File>
<File
RelativePath=".\utils.cpp">
</File>
<Filter
Name="arvo"
Filter="">
<File
RelativePath=".\arvo\ArvoMath.cpp">
</File>
<File
RelativePath=".\arvo\Char.cpp">
</File>
<File
RelativePath=".\arvo\Complex.cpp">
</File>
<File
RelativePath=".\arvo\Matrix.cpp">
</File>
<File
RelativePath=".\arvo\Perm.cpp">
</File>
<File
RelativePath=".\arvo\Rand.cpp">
</File>
<File
RelativePath=".\arvo\SphTri.cpp">
</File>
<File
RelativePath=".\arvo\SVD.cpp">
</File>
<File
RelativePath=".\arvo\Token.cpp">
</File>
<File
RelativePath=".\arvo\Vec2.cpp">
</File>
<File
RelativePath=".\arvo\Vec3.cpp">
</File>
<File
RelativePath=".\arvo\Vec4.cpp">
</File>
<File
RelativePath=".\arvo\Vector.cpp">
</File>
</Filter>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl">
<File
RelativePath=".\avpcl.h">
</File>
<File
RelativePath=".\bits.h">
</File>
<File
RelativePath=".\endpts.h">
</File>
<File
RelativePath=".\rgba.h">
</File>
<File
RelativePath=".\shapes_three.h">
</File>
<File
RelativePath=".\shapes_two.h">
</File>
<File
RelativePath=".\targa.h">
</File>
<File
RelativePath=".\tile.h">
</File>
<File
RelativePath=".\utils.h">
</File>
<Filter
Name="arvo"
Filter="">
<File
RelativePath=".\arvo\ArvoMath.h">
</File>
<File
RelativePath=".\arvo\Char.h">
</File>
<File
RelativePath=".\arvo\Complex.h">
</File>
<File
RelativePath=".\arvo\form.h">
</File>
<File
RelativePath=".\arvo\Matrix.h">
</File>
<File
RelativePath=".\arvo\Perm.h">
</File>
<File
RelativePath=".\arvo\Rand.h">
</File>
<File
RelativePath=".\arvo\SI_units.h">
</File>
<File
RelativePath=".\arvo\SphTri.h">
</File>
<File
RelativePath=".\arvo\SVD.h">
</File>
<File
RelativePath=".\arvo\Token.h">
</File>
<File
RelativePath=".\arvo\Vec2.h">
</File>
<File
RelativePath=".\arvo\Vec3.h">
</File>
<File
RelativePath=".\arvo\Vec4.h">
</File>
<File
RelativePath=".\arvo\Vector.h">
</File>
</Filter>
</Filter>
<Filter
Name="Resource Files"
Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe">
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@ -17,13 +17,13 @@ See the License for the specific language governing permissions and limitations
#include "bits.h" #include "bits.h"
#include "tile.h" #include "tile.h"
#include "avpcl.h" #include "avpcl.h"
#include "arvo/Vec4.h" #include "nvcore/Debug.h"
#include "arvo/Matrix.h" #include "nvmath/Vector.inl"
#include "arvo/SVD.h" #include "nvmath/Matrix.inl"
#include "nvmath/Fitting.h"
#include "utils.h" #include "utils.h"
#include "endpts.h" #include "endpts.h"
#include <cstring>
#include <assert.h>
#include "shapes_three.h" #include "shapes_three.h"
@ -33,7 +33,8 @@ See the License for the specific language governing permissions and limitations
#define NSHAPES 16 #define NSHAPES 16
#define SHAPEBITS 4 #define SHAPEBITS 4
using namespace ArvoMath; using namespace nv;
using namespace AVPCL;
#define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values #define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values
@ -88,7 +89,7 @@ struct PatternPrec
}; };
// this is the precision for each channel and region // this is the precision for each channel and region
// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! // NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
static PatternPrec pattern_precs[NPATTERNS] = static PatternPrec pattern_precs[NPATTERNS] =
{ {
4,4,4, 4,4,4, 4,4,4, 4,4,4, 4,4,4, 4,4,4, 4,4,4, 4,4,4, 4,4,4, 4,4,4, 4,4,4, 4,4,4,
@ -107,7 +108,7 @@ static int nbits(int n, bool issigned)
} }
else else
{ {
assert (issigned); nvAssert (issigned);
for (nb=0; n<-1; ++nb, n>>=1) ; for (nb=0; n<-1; ++nb, n>>=1) ;
return nb + 1; return nb + 1;
} }
@ -115,12 +116,12 @@ static int nbits(int n, bool issigned)
static void transform_forward(IntEndptsRGB_2 ep[NREGIONS]) static void transform_forward(IntEndptsRGB_2 ep[NREGIONS])
{ {
assert(0); nvUnreachable();
} }
static void transform_inverse(IntEndptsRGB_2 ep[NREGIONS]) static void transform_inverse(IntEndptsRGB_2 ep[NREGIONS])
{ {
assert(0); nvUnreachable();
} }
// endpoints are 555,555; reduce to 444,444 and put the lsb bit majority in compr_bits // endpoints are 555,555; reduce to 444,444 and put the lsb bit majority in compr_bits
@ -133,7 +134,7 @@ static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpt
{ {
onescnt += endpts.A[j] & 1; onescnt += endpts.A[j] & 1;
compr_endpts.A[j] = endpts.A[j] >> 1; compr_endpts.A[j] = endpts.A[j] >> 1;
assert (compr_endpts.A[j] < 16); nvAssert (compr_endpts.A[j] < 16);
} }
compr_endpts.a_lsb = onescnt >= 2; compr_endpts.a_lsb = onescnt >= 2;
@ -142,7 +143,7 @@ static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpt
{ {
onescnt += endpts.B[j] & 1; onescnt += endpts.B[j] & 1;
compr_endpts.B[j] = endpts.B[j] >> 1; compr_endpts.B[j] = endpts.B[j] >> 1;
assert (compr_endpts.B[j] < 16); nvAssert (compr_endpts.B[j] < 16);
} }
compr_endpts.b_lsb = onescnt >= 2; compr_endpts.b_lsb = onescnt >= 2;
} }
@ -175,12 +176,12 @@ static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec
for (int region = 0; region < NREGIONS; ++region) for (int region = 0; region < NREGIONS; ++region)
{ {
full_endpts[region].A[0] = Utils::quantize(endpts[region].A.X(), pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space
full_endpts[region].A[1] = Utils::quantize(endpts[region].A.Y(), pattern_prec.region_precs[region].endpt_a_prec[1]+1); full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
full_endpts[region].A[2] = Utils::quantize(endpts[region].A.Z(), pattern_prec.region_precs[region].endpt_a_prec[2]+1); full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
full_endpts[region].B[0] = Utils::quantize(endpts[region].B.X(), pattern_prec.region_precs[region].endpt_b_prec[0]+1); full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
full_endpts[region].B[1] = Utils::quantize(endpts[region].B.Y(), pattern_prec.region_precs[region].endpt_b_prec[1]+1); full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
full_endpts[region].B[2] = Utils::quantize(endpts[region].B.Z(), pattern_prec.region_precs[region].endpt_b_prec[2]+1); full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
compress_one(full_endpts[region], q_endpts[region]); compress_one(full_endpts[region], q_endpts[region]);
} }
} }
@ -194,7 +195,7 @@ static void swap_indices(IntEndptsRGB_2 endpts[NREGIONS], int indices[Tile::TILE
int x = POS_TO_X(position); int x = POS_TO_X(position);
int y = POS_TO_Y(position); int y = POS_TO_Y(position);
assert(REGION(x,y,shapeindex) == region); // double check the table nvAssert(REGION(x,y,shapeindex) == region); // double check the table
if (indices[y][x] & HIGH_INDEXBIT) if (indices[y][x] & HIGH_INDEXBIT)
{ {
// high bit is set, swap the endpts and indices for this region // high bit is set, swap the endpts and indices for this region
@ -236,7 +237,7 @@ static void write_header(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex,
out.write(endpts[i].b_lsb, 1); out.write(endpts[i].b_lsb, 1);
} }
assert (out.getptr() == 83); nvAssert (out.getptr() == 83);
} }
static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index) static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
@ -244,8 +245,8 @@ static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeind
int mode = AVPCL::getmode(in); int mode = AVPCL::getmode(in);
pat_index = 0; pat_index = 0;
assert (pat_index >= 0 && pat_index < NPATTERNS); nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
assert (in.getptr() == patterns[pat_index].modebits); nvAssert (in.getptr() == patterns[pat_index].modebits);
shapeindex = in.read(SHAPEBITS); shapeindex = in.read(SHAPEBITS);
p = patterns[pat_index]; p = patterns[pat_index];
@ -263,7 +264,7 @@ static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeind
endpts[i].b_lsb = in.read(1); endpts[i].b_lsb = in.read(1);
} }
assert (in.getptr() == 83); nvAssert (in.getptr() == 83);
} }
static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out) static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
@ -316,10 +317,10 @@ static void emit_block(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, co
write_indices(indices, shapeindex, out); write_indices(indices, shapeindex, out);
assert(out.getptr() == AVPCL::BITSIZE); nvAssert(out.getptr() == AVPCL::BITSIZE);
} }
static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const RegionPrec &region_prec, Vec4 palette[NINDICES]) static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const RegionPrec &region_prec, Vector4 palette[NINDICES])
{ {
IntEndptsRGB endpts; IntEndptsRGB endpts;
@ -333,30 +334,30 @@ static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const Reg
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].X() = PALETTE_LERP(a, b, i, BIAS, DENOM); palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1);
b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1); b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].Y() = PALETTE_LERP(a, b, i, BIAS, DENOM); palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1);
b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1); b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].Z() = PALETTE_LERP(a, b, i, BIAS, DENOM); palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
// constant alpha // constant alpha
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].W() = RGBA_MAX; palette[i].w = 255.0f;
} }
static void sign_extend(Pattern &p, IntEndptsRGB_2 endpts[NREGIONS]) static void sign_extend(Pattern &p, IntEndptsRGB_2 endpts[NREGIONS])
{ {
assert(0); nvUnreachable();
} }
void AVPCL::decompress_mode0(const char *block, Tile &t) void AVPCL::decompress_mode0(const char *block, Tile &t)
@ -375,7 +376,7 @@ void AVPCL::decompress_mode0(const char *block, Tile &t)
transform_inverse(endpts); transform_inverse(endpts);
} }
Vec4 palette[NREGIONS][NINDICES]; Vector4 palette[NREGIONS][NINDICES];
for (int r = 0; r < NREGIONS; ++r) for (int r = 0; r < NREGIONS; ++r)
generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]); generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
@ -383,7 +384,7 @@ void AVPCL::decompress_mode0(const char *block, Tile &t)
read_indices(in, shapeindex, indices); read_indices(in, shapeindex, indices);
assert(in.getptr() == AVPCL::BITSIZE); nvAssert(in.getptr() == AVPCL::BITSIZE);
// lookup // lookup
for (int y = 0; y < Tile::TILE_H; y++) for (int y = 0; y < Tile::TILE_H; y++)
@ -392,17 +393,17 @@ void AVPCL::decompress_mode0(const char *block, Tile &t)
} }
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec &region_prec, double current_err, int indices[Tile::TILE_TOTAL]) static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
{ {
Vec4 palette[NINDICES]; Vector4 palette[NINDICES];
double toterr = 0; float toterr = 0;
Vec4 err; Vector4 err;
generate_palette_quantized(endpts, region_prec, palette); generate_palette_quantized(endpts, region_prec, palette);
for (int i = 0; i < np; ++i) for (int i = 0; i < np; ++i)
{ {
double err, besterr = DBL_MAX; float err, besterr = FLT_MAX;
for (int j = 0; j < NINDICES && besterr > 0; ++j) for (int j = 0; j < NINDICES && besterr > 0; ++j)
{ {
@ -425,7 +426,7 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB_2 &endp
for (int k = i; k < np; ++k) for (int k = i; k < np; ++k)
indices[k] = -1; indices[k] = -1;
return DBL_MAX; return FLT_MAX;
} }
} }
return toterr; return toterr;
@ -433,10 +434,10 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB_2 &endp
// assign indices given a tile, shape, and quantized endpoints, return toterr for each region // assign indices given a tile, shape, and quantized endpoints, return toterr for each region
static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endpts[NREGIONS], const PatternPrec &pattern_prec, static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endpts[NREGIONS], const PatternPrec &pattern_prec,
int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS]) int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
{ {
// build list of possibles // build list of possibles
Vec4 palette[NREGIONS][NINDICES]; Vector4 palette[NREGIONS][NINDICES];
for (int region = 0; region < NREGIONS; ++region) for (int region = 0; region < NREGIONS; ++region)
{ {
@ -444,13 +445,13 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endp
toterr[region] = 0; toterr[region] = 0;
} }
Vec4 err; Vector4 err;
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
{ {
int region = REGION(x,y,shapeindex); int region = REGION(x,y,shapeindex);
double err, besterr = DBL_MAX; float err, besterr = FLT_MAX;
for (int i = 0; i < NINDICES && besterr > 0; ++i) for (int i = 0; i < NINDICES && besterr > 0; ++i)
{ {
@ -470,8 +471,8 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endp
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
// this function returns either old_err or a value smaller (if it was successful in improving the error) // this function returns either old_err or a value smaller (if it was successful in improving the error)
static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts, static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts,
double old_err, int do_b, int indices[Tile::TILE_TOTAL]) float old_err, int do_b, int indices[Tile::TILE_TOTAL])
{ {
// we have the old endpoints: old_endpts // we have the old endpoints: old_endpts
// we have the perturbed endpoints: new_endpts // we have the perturbed endpoints: new_endpts
@ -541,10 +542,10 @@ static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec
// for np = 16 -- adjust error thresholds as a function of np // for np = 16 -- adjust error thresholds as a function of np
// always ensure endpoint ordering is preserved (no need to overlap the scan) // always ensure endpoint ordering is preserved (no need to overlap the scan)
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices // if orig_err returned from this is less than its input value, then indices[] will contain valid indices
static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &region_prec, double &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL]) static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
{ {
IntEndptsRGB_2 temp_endpts; IntEndptsRGB_2 temp_endpts;
double best_err = orig_err; float best_err = orig_err;
int aprec = region_prec.endpt_a_prec[ch]; int aprec = region_prec.endpt_a_prec[ch];
int bprec = region_prec.endpt_b_prec[ch]; int bprec = region_prec.endpt_b_prec[ch];
int good_indices[Tile::TILE_TOTAL]; int good_indices[Tile::TILE_TOTAL];
@ -553,7 +554,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
indices[i] = -1; indices[i] = -1;
double thr_scale = (double)np / (double)Tile::TILE_TOTAL; float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
if (orig_err == 0) return orig_err; if (orig_err == 0) return orig_err;
@ -562,8 +563,8 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
adelta = MAX(adelta, 3); adelta = max(adelta, 3);
bdelta = MAX(bdelta, 3); bdelta = max(bdelta, 3);
#ifdef DISABLE_EXHAUSTIVE #ifdef DISABLE_EXHAUSTIVE
adelta = bdelta = 3; adelta = bdelta = 3;
@ -572,10 +573,10 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
temp_endpts = opt_endpts; temp_endpts = opt_endpts;
// ok figure out the range of A and B // ok figure out the range of A and B
int alow = MAX(0, opt_endpts.A[ch] - adelta); int alow = max(0, opt_endpts.A[ch] - adelta);
int ahigh = MIN((1<<aprec)-1, opt_endpts.A[ch] + adelta); int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
int blow = MAX(0, opt_endpts.B[ch] - bdelta); int blow = max(0, opt_endpts.B[ch] - bdelta);
int bhigh = MIN((1<<bprec)-1, opt_endpts.B[ch] + bdelta); int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
// now there's no need to swap the ordering of A and B // now there's no need to swap the ordering of A and B
bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch]; bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
@ -586,7 +587,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
{ {
// keep a <= b // keep a <= b
for (int a = alow; a <= ahigh; ++a) for (int a = alow; a <= ahigh; ++a)
for (int b = MAX(a, blow); b < bhigh; ++b) for (int b = max(a, blow); b < bhigh; ++b)
{ {
temp_endpts.A[ch] = a; temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b; temp_endpts.B[ch] = b;
@ -606,7 +607,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
{ {
// keep b <= a // keep b <= a
for (int b = blow; b < bhigh; ++b) for (int b = blow; b < bhigh; ++b)
for (int a = MAX(b, alow); a <= ahigh; ++a) for (int a = max(b, alow); a <= ahigh; ++a)
{ {
temp_endpts.A[ch] = a; temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b; temp_endpts.B[ch] = b;
@ -634,9 +635,9 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
return best_err; return best_err;
} }
static double optimize_one(const Vec4 colors[], int np, double orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_2 &opt_endpts) static float optimize_one(const Vector4 colors[], int np, float orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_2 &opt_endpts)
{ {
double opt_err = orig_err; float opt_err = orig_err;
opt_endpts = orig_endpts; opt_endpts = orig_endpts;
@ -684,7 +685,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[i] = orig_indices[i] = temp_indices0[i]; new_indices[i] = orig_indices[i] = temp_indices0[i];
assert (orig_indices[i] != -1); nvAssert (orig_indices[i] != -1);
} }
opt_endpts.A[ch] = new_a.A[ch]; opt_endpts.A[ch] = new_a.A[ch];
@ -699,7 +700,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[i] = orig_indices[i] = temp_indices1[i]; new_indices[i] = orig_indices[i] = temp_indices1[i];
assert (orig_indices[i] != -1); nvAssert (orig_indices[i] != -1);
} }
opt_endpts.B[ch] = new_b.B[ch]; opt_endpts.B[ch] = new_b.B[ch];
@ -717,7 +718,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[i] = temp_indices0[i]; new_indices[i] = temp_indices0[i];
assert (new_indices[i] != -1); nvAssert (new_indices[i] != -1);
} }
if (do_b == 0) if (do_b == 0)
@ -744,7 +745,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
bool first = true; bool first = true;
for (int ch = 0; ch < NCHANNELS_RGB; ++ch) for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
{ {
double new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0); float new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
if (new_err < opt_err) if (new_err < opt_err)
{ {
@ -755,7 +756,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
orig_indices[i] = temp_indices0[i]; orig_indices[i] = temp_indices0[i];
assert (orig_indices[i] != -1); nvAssert (orig_indices[i] != -1);
} }
first = false; first = false;
} }
@ -780,10 +781,10 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
} }
// this will return a valid set of endpoints in opt_endpts regardless of whether it improve orig_endpts or not // this will return a valid set of endpoints in opt_endpts regardless of whether it improve orig_endpts or not
static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_err[NREGIONS], static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS],
const IntEndptsRGB_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, double opt_err[NREGIONS], IntEndptsRGB_2 opt_endpts[NREGIONS]) const IntEndptsRGB_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_2 opt_endpts[NREGIONS])
{ {
Vec4 pixels[Tile::TILE_TOTAL]; Vector4 pixels[Tile::TILE_TOTAL];
IntEndptsRGB_2 temp_in, temp_out; IntEndptsRGB_2 temp_in, temp_out;
int temp_indices[Tile::TILE_TOTAL]; int temp_indices[Tile::TILE_TOTAL];
@ -800,7 +801,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_
opt_endpts[region] = temp_in = orig_endpts[region]; opt_endpts[region] = temp_in = orig_endpts[region];
opt_err[region] = orig_err[region]; opt_err[region] = orig_err[region];
double best_err = orig_err[region]; float best_err = orig_err[region];
for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode) for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
{ {
@ -808,12 +809,12 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_
temp_in.b_lsb = (lsbmode >> 1) & 1; temp_in.b_lsb = (lsbmode >> 1) & 1;
// make sure we have a valid error for temp_in // make sure we have a valid error for temp_in
// we use DBL_MAX here because we want an accurate temp_in_err, no shortcuts // we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the DBL_MAX position) // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
double temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], DBL_MAX, temp_indices); float temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
// now try to optimize these endpoints // now try to optimize these endpoints
double temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
// if we find an improvement, update the best so far and correct the output endpoints and errors // if we find an improvement, update the best so far and correct the output endpoints and errors
if (temp_out_err < best_err) if (temp_out_err < best_err)
@ -843,9 +844,9 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_
emit compressed block with original data // to try to preserve maximum endpoint precision emit compressed block with original data // to try to preserve maximum endpoint precision
*/ */
static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block) static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
{ {
double orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
IntEndptsRGB_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS]; IntEndptsRGB_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W]; int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
@ -864,8 +865,9 @@ static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endp
transform_inverse(orig_endpts); transform_inverse(orig_endpts);
optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts); optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err); assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
for (int i=0; i<NREGIONS; ++i) // (nreed) Commented out asserts because they go off all the time...not sure why
assert(expected_opt_err[i] == opt_err[i]); //for (int i=0; i<NREGIONS; ++i)
// nvAssert(expected_opt_err[i] == opt_err[i]);
swap_indices(opt_endpts, opt_indices, shapeindex_best); swap_indices(opt_endpts, opt_indices, shapeindex_best);
if (patterns[sp].transformed) if (patterns[sp].transformed)
transform_forward(opt_endpts); transform_forward(opt_endpts);
@ -890,40 +892,40 @@ static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endp
throw "No candidate found, should never happen (mode avpcl 0)."; throw "No candidate found, should never happen (mode avpcl 0).";
} }
static void clamp(Vec4 &v) static void clamp(Vector4 &v)
{ {
if (v.X() < RGBA_MIN) v.X() = RGBA_MIN; if (v.x < 0.0f) v.x = 0.0f;
if (v.X() > RGBA_MAX) v.X() = RGBA_MAX; if (v.x > 255.0f) v.x = 255.0f;
if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; if (v.y < 0.0f) v.y = 0.0f;
if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; if (v.y > 255.0f) v.y = 255.0f;
if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; if (v.z < 0.0f) v.z = 0.0f;
if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; if (v.z > 255.0f) v.z = 255.0f;
v.W() = RGBA_MAX; v.w = 255.0f;
} }
static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vec4 palette[NREGIONS][NINDICES]) static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
{ {
for (int region = 0; region < NREGIONS; ++region) for (int region = 0; region < NREGIONS; ++region)
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, 0.0, float(DENOM)); palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
} }
// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
{ {
// build list of possibles // build list of possibles
Vec4 palette[NREGIONS][NINDICES]; Vector4 palette[NREGIONS][NINDICES];
generate_palette_unquantized(endpts, palette); generate_palette_unquantized(endpts, palette);
double toterr = 0; float toterr = 0;
Vec4 err; Vector4 err;
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
{ {
int region = REGION(x,y,shapeindex); int region = REGION(x,y,shapeindex);
double err, besterr = DBL_MAX; float err, besterr = FLT_MAX;
for (int i = 0; i < NINDICES && besterr > 0; ++i) for (int i = 0; i < NINDICES && besterr > 0; ++i)
{ {
@ -941,19 +943,21 @@ static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpt
// for this mode, we assume alpha = 255 constant and compress only the RGB portion. // for this mode, we assume alpha = 255 constant and compress only the RGB portion.
// however, we do the error check against the actual alpha values supplied for the tile. // however, we do the error check against the actual alpha values supplied for the tile.
static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
{ {
for (int region=0; region<NREGIONS; ++region) for (int region=0; region<NREGIONS; ++region)
{ {
int np = 0; int np = 0;
Vec4 colors[Tile::TILE_TOTAL]; Vector3 colors[Tile::TILE_TOTAL];
Vec4 mean(0,0,0,0); float alphas[2];
Vector4 mean(0,0,0,0);
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
if (REGION(x,y,shapeindex) == region) if (REGION(x,y,shapeindex) == region)
{ {
colors[np] = tile.data[y][x]; colors[np] = tile.data[y][x].xyz();
if (np < 2) alphas[np] = tile.data[y][x].w;
mean += tile.data[y][x]; mean += tile.data[y][x];
++np; ++np;
} }
@ -961,54 +965,40 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]
// handle simple cases // handle simple cases
if (np == 0) if (np == 0)
{ {
Vec4 zero(0,0,0,RGBA_MAX); Vector4 zero(0,0,0,255.0f);
endpts[region].A = zero; endpts[region].A = zero;
endpts[region].B = zero; endpts[region].B = zero;
continue; continue;
} }
else if (np == 1) else if (np == 1)
{ {
endpts[region].A = colors[0]; endpts[region].A = Vector4(colors[0], alphas[0]);
endpts[region].B = colors[0]; endpts[region].B = Vector4(colors[0], alphas[0]);
continue; continue;
} }
else if (np == 2) else if (np == 2)
{ {
endpts[region].A = colors[0]; endpts[region].A = Vector4(colors[0], alphas[0]);
endpts[region].B = colors[1]; endpts[region].B = Vector4(colors[1], alphas[1]);
continue; continue;
} }
Matrix rdq(np, 3);
mean /= float(np); mean /= float(np);
// only look at RGB; ignore A Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
for (int i = 0; i < np; ++i)
{
rdq(i,0) = colors[i].X() - mean.X();
rdq(i,1) = colors[i].Y() - mean.Y();
rdq(i,2) = colors[i].Z() - mean.Z();
}
// perform a singular value decomposition
SVD svd(rdq);
// get the principal component direction (the one with the largest weight)
Vec4 direction(svd.R()(0,0), svd.R()(0,1), svd.R()(0,2), 0.0);
// project each pixel value along the principal direction // project each pixel value along the principal direction
double minp = DBL_MAX, maxp = -DBL_MAX; float minp = FLT_MAX, maxp = -FLT_MAX;
for (int i = 0; i < np; i++) for (int i = 0; i < np; i++)
{ {
float dp = rdq(i,0) * direction.X() + rdq(i,1)*direction.Y() + rdq(i,2)*direction.Z(); float dp = dot(colors[i]-mean.xyz(), direction);
if (dp < minp) minp = dp; if (dp < minp) minp = dp;
if (dp > maxp) maxp = dp; if (dp > maxp) maxp = dp;
} }
// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
endpts[region].A = mean + minp*direction; endpts[region].A = mean + minp*Vector4(direction, 0);
endpts[region].B = mean + maxp*direction; endpts[region].B = mean + maxp*Vector4(direction, 0);
// clamp endpoints // clamp endpoints
// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
@ -1020,13 +1010,13 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]
return map_colors(tile, shapeindex, endpts); return map_colors(tile, shapeindex, endpts);
} }
static void swap(double *list1, int *list2, int i, int j) static void swap(float *list1, int *list2, int i, int j)
{ {
double t = list1[i]; list1[i] = list1[j]; list1[j] = t; float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
} }
double AVPCL::compress_mode0(const Tile &t, char *block) float AVPCL::compress_mode0(const Tile &t, char *block)
{ {
// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
@ -1036,10 +1026,10 @@ double AVPCL::compress_mode0(const Tile &t, char *block)
struct { struct {
FltEndpts endpts[NREGIONS]; FltEndpts endpts[NREGIONS];
} all[NSHAPES]; } all[NSHAPES];
double roughmse[NSHAPES]; float roughmse[NSHAPES];
int index[NSHAPES]; int index[NSHAPES];
char tempblock[AVPCL::BLOCKSIZE]; char tempblock[AVPCL::BLOCKSIZE];
double msebest = DBL_MAX; float msebest = FLT_MAX;
for (int i=0; i<NSHAPES; ++i) for (int i=0; i<NSHAPES; ++i)
{ {
@ -1056,7 +1046,7 @@ double AVPCL::compress_mode0(const Tile &t, char *block)
for (int i=0; i<NITEMS && msebest>0; ++i) for (int i=0; i<NITEMS && msebest>0; ++i)
{ {
int shape = index[i]; int shape = index[i];
double mse = refine(t, shape, &all[shape].endpts[0], tempblock); float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
if (mse < msebest) if (mse < msebest)
{ {
memcpy(block, tempblock, sizeof(tempblock)); memcpy(block, tempblock, sizeof(tempblock));

View File

@ -17,17 +17,18 @@ See the License for the specific language governing permissions and limitations
#include "bits.h" #include "bits.h"
#include "tile.h" #include "tile.h"
#include "avpcl.h" #include "avpcl.h"
#include "arvo/Vec4.h" #include "nvcore/Debug.h"
#include "arvo/Matrix.h" #include "nvmath/Vector.inl"
#include "arvo/SVD.h" #include "nvmath/Matrix.inl"
#include "nvmath/Fitting.h"
#include "utils.h" #include "utils.h"
#include "endpts.h" #include "endpts.h"
#include <cstring>
#include <assert.h>
#include "shapes_two.h" #include "shapes_two.h"
using namespace ArvoMath; using namespace nv;
using namespace AVPCL;
#define NLSBMODES 2 // number of different lsb modes per region. since we have one .1 per region, that can have 2 values #define NLSBMODES 2 // number of different lsb modes per region. since we have one .1 per region, that can have 2 values
@ -83,7 +84,7 @@ struct PatternPrec
// this is the precision for each channel and region // this is the precision for each channel and region
// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! // NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
static PatternPrec pattern_precs[NPATTERNS] = static PatternPrec pattern_precs[NPATTERNS] =
{ {
6,6,6, 6,6,6, 6,6,6, 6,6,6, 6,6,6, 6,6,6, 6,6,6, 6,6,6,
@ -102,7 +103,7 @@ static int nbits(int n, bool issigned)
} }
else else
{ {
assert (issigned); nvAssert (issigned);
for (nb=0; n<-1; ++nb, n>>=1) ; for (nb=0; n<-1; ++nb, n>>=1) ;
return nb + 1; return nb + 1;
} }
@ -111,12 +112,12 @@ static int nbits(int n, bool issigned)
static void transform_forward(IntEndptsRGB_1 ep[NREGIONS]) static void transform_forward(IntEndptsRGB_1 ep[NREGIONS])
{ {
assert(0); nvUnreachable();
} }
static void transform_inverse(IntEndptsRGB_1 ep[NREGIONS]) static void transform_inverse(IntEndptsRGB_1 ep[NREGIONS])
{ {
assert(0); nvUnreachable();
} }
// endpoints are 777,777; reduce to 666,666 and put the lsb bit majority in compr_bits // endpoints are 777,777; reduce to 666,666 and put the lsb bit majority in compr_bits
@ -131,8 +132,8 @@ static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_1& compr_endpt
compr_endpts.A[j] = endpts.A[j] >> 1; compr_endpts.A[j] = endpts.A[j] >> 1;
onescnt += endpts.B[j] & 1; onescnt += endpts.B[j] & 1;
compr_endpts.B[j] = endpts.B[j] >> 1; compr_endpts.B[j] = endpts.B[j] >> 1;
assert (compr_endpts.A[j] < 64); nvAssert (compr_endpts.A[j] < 64);
assert (compr_endpts.B[j] < 64); nvAssert (compr_endpts.B[j] < 64);
} }
compr_endpts.lsb = onescnt >= 3; compr_endpts.lsb = onescnt >= 3;
} }
@ -165,12 +166,12 @@ static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec
for (int region = 0; region < NREGIONS; ++region) for (int region = 0; region < NREGIONS; ++region)
{ {
full_endpts[region].A[0] = Utils::quantize(endpts[region].A.X(), pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space
full_endpts[region].A[1] = Utils::quantize(endpts[region].A.Y(), pattern_prec.region_precs[region].endpt_a_prec[1]+1); full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
full_endpts[region].A[2] = Utils::quantize(endpts[region].A.Z(), pattern_prec.region_precs[region].endpt_a_prec[2]+1); full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
full_endpts[region].B[0] = Utils::quantize(endpts[region].B.X(), pattern_prec.region_precs[region].endpt_b_prec[0]+1); full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
full_endpts[region].B[1] = Utils::quantize(endpts[region].B.Y(), pattern_prec.region_precs[region].endpt_b_prec[1]+1); full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
full_endpts[region].B[2] = Utils::quantize(endpts[region].B.Z(), pattern_prec.region_precs[region].endpt_b_prec[2]+1); full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
compress_one(full_endpts[region], q_endpts[region]); compress_one(full_endpts[region], q_endpts[region]);
} }
} }
@ -184,7 +185,7 @@ static void swap_indices(IntEndptsRGB_1 endpts[NREGIONS], int indices[Tile::TILE
int x = POS_TO_X(position); int x = POS_TO_X(position);
int y = POS_TO_Y(position); int y = POS_TO_Y(position);
assert(REGION(x,y,shapeindex) == region); // double check the table nvAssert(REGION(x,y,shapeindex) == region); // double check the table
if (indices[y][x] & HIGH_INDEXBIT) if (indices[y][x] & HIGH_INDEXBIT)
{ {
// high bit is set, swap the endpts and indices for this region // high bit is set, swap the endpts and indices for this region
@ -220,7 +221,7 @@ static void write_header(const IntEndptsRGB_1 endpts[NREGIONS], int shapeindex,
for (int i=0; i<NREGIONS; ++i) for (int i=0; i<NREGIONS; ++i)
out.write(endpts[i].lsb, 1); out.write(endpts[i].lsb, 1);
assert (out.getptr() == 82); nvAssert (out.getptr() == 82);
} }
static void read_header(Bits &in, IntEndptsRGB_1 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index) static void read_header(Bits &in, IntEndptsRGB_1 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
@ -228,8 +229,8 @@ static void read_header(Bits &in, IntEndptsRGB_1 endpts[NREGIONS], int &shapeind
int mode = AVPCL::getmode(in); int mode = AVPCL::getmode(in);
pat_index = 0; pat_index = 0;
assert (pat_index >= 0 && pat_index < NPATTERNS); nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
assert (in.getptr() == patterns[pat_index].modebits); nvAssert (in.getptr() == patterns[pat_index].modebits);
shapeindex = in.read(SHAPEBITS); shapeindex = in.read(SHAPEBITS);
p = patterns[pat_index]; p = patterns[pat_index];
@ -244,7 +245,7 @@ static void read_header(Bits &in, IntEndptsRGB_1 endpts[NREGIONS], int &shapeind
for (int i=0; i<NREGIONS; ++i) for (int i=0; i<NREGIONS; ++i)
endpts[i].lsb = in.read(1); endpts[i].lsb = in.read(1);
assert (in.getptr() == 82); nvAssert (in.getptr() == 82);
} }
static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out) static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
@ -297,10 +298,10 @@ static void emit_block(const IntEndptsRGB_1 endpts[NREGIONS], int shapeindex, co
write_indices(indices, shapeindex, out); write_indices(indices, shapeindex, out);
assert(out.getptr() == AVPCL::BITSIZE); nvAssert(out.getptr() == AVPCL::BITSIZE);
} }
static void generate_palette_quantized(const IntEndptsRGB_1 &endpts_1, const RegionPrec &region_prec, Vec4 palette[NINDICES]) static void generate_palette_quantized(const IntEndptsRGB_1 &endpts_1, const RegionPrec &region_prec, Vector4 palette[NINDICES])
{ {
IntEndptsRGB endpts; IntEndptsRGB endpts;
@ -316,31 +317,31 @@ static void generate_palette_quantized(const IntEndptsRGB_1 &endpts_1, const Reg
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].X() = PALETTE_LERP(a, b, i, BIAS, DENOM); palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1);
b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1); b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].Y() = PALETTE_LERP(a, b, i, BIAS, DENOM); palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1);
b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1); b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].Z() = PALETTE_LERP(a, b, i, BIAS, DENOM); palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
// constant alpha // constant alpha
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].W() = RGBA_MAX; palette[i].w = 255.0f;
} }
// sign extend but only if it was transformed // sign extend but only if it was transformed
static void sign_extend(Pattern &p, IntEndptsRGB_1 endpts[NREGIONS]) static void sign_extend(Pattern &p, IntEndptsRGB_1 endpts[NREGIONS])
{ {
assert(0); nvUnreachable();
} }
void AVPCL::decompress_mode1(const char *block, Tile &t) void AVPCL::decompress_mode1(const char *block, Tile &t)
@ -359,7 +360,7 @@ void AVPCL::decompress_mode1(const char *block, Tile &t)
transform_inverse(endpts); transform_inverse(endpts);
} }
Vec4 palette[NREGIONS][NINDICES]; Vector4 palette[NREGIONS][NINDICES];
for (int r = 0; r < NREGIONS; ++r) for (int r = 0; r < NREGIONS; ++r)
generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]); generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
@ -367,7 +368,7 @@ void AVPCL::decompress_mode1(const char *block, Tile &t)
read_indices(in, shapeindex, indices); read_indices(in, shapeindex, indices);
assert(in.getptr() == AVPCL::BITSIZE); nvAssert(in.getptr() == AVPCL::BITSIZE);
// lookup // lookup
for (int y = 0; y < Tile::TILE_H; y++) for (int y = 0; y < Tile::TILE_H; y++)
@ -376,17 +377,17 @@ void AVPCL::decompress_mode1(const char *block, Tile &t)
} }
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB_1 &endpts, const RegionPrec &region_prec, double current_err, int indices[Tile::TILE_TOTAL]) static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB_1 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
{ {
Vec4 palette[NINDICES]; Vector4 palette[NINDICES];
double toterr = 0; float toterr = 0;
Vec4 err; Vector4 err;
generate_palette_quantized(endpts, region_prec, palette); generate_palette_quantized(endpts, region_prec, palette);
for (int i = 0; i < np; ++i) for (int i = 0; i < np; ++i)
{ {
double err, besterr = DBL_MAX; float err, besterr = FLT_MAX;
for (int j = 0; j < NINDICES && besterr > 0; ++j) for (int j = 0; j < NINDICES && besterr > 0; ++j)
{ {
@ -409,7 +410,7 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB_1 &endp
for (int k = i; k < np; ++k) for (int k = i; k < np; ++k)
indices[k] = -1; indices[k] = -1;
return DBL_MAX; return FLT_MAX;
} }
} }
return toterr; return toterr;
@ -417,10 +418,10 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB_1 &endp
// assign indices given a tile, shape, and quantized endpoints, return toterr for each region // assign indices given a tile, shape, and quantized endpoints, return toterr for each region
static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_1 endpts[NREGIONS], const PatternPrec &pattern_prec, static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_1 endpts[NREGIONS], const PatternPrec &pattern_prec,
int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS]) int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
{ {
// build list of possibles // build list of possibles
Vec4 palette[NREGIONS][NINDICES]; Vector4 palette[NREGIONS][NINDICES];
for (int region = 0; region < NREGIONS; ++region) for (int region = 0; region < NREGIONS; ++region)
{ {
@ -428,13 +429,13 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_1 endp
toterr[region] = 0; toterr[region] = 0;
} }
Vec4 err; Vector4 err;
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
{ {
int region = REGION(x,y,shapeindex); int region = REGION(x,y,shapeindex);
double err, besterr = DBL_MAX; float err, besterr = FLT_MAX;
for (int i = 0; i < NINDICES && besterr > 0; ++i) for (int i = 0; i < NINDICES && besterr > 0; ++i)
{ {
@ -454,8 +455,8 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_1 endp
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
// this function returns either old_err or a value smaller (if it was successful in improving the error) // this function returns either old_err or a value smaller (if it was successful in improving the error)
static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_1 &old_endpts, IntEndptsRGB_1 &new_endpts, static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_1 &old_endpts, IntEndptsRGB_1 &new_endpts,
double old_err, int do_b, int indices[Tile::TILE_TOTAL]) float old_err, int do_b, int indices[Tile::TILE_TOTAL])
{ {
// we have the old endpoints: old_endpts // we have the old endpoints: old_endpts
// we have the perturbed endpoints: new_endpts // we have the perturbed endpoints: new_endpts
@ -525,10 +526,10 @@ static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec
// for np = 16 -- adjust error thresholds as a function of np // for np = 16 -- adjust error thresholds as a function of np
// always ensure endpoint ordering is preserved (no need to overlap the scan) // always ensure endpoint ordering is preserved (no need to overlap the scan)
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices // if orig_err returned from this is less than its input value, then indices[] will contain valid indices
static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &region_prec, double orig_err, IntEndptsRGB_1 &opt_endpts, int indices[Tile::TILE_TOTAL]) static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGB_1 &opt_endpts, int indices[Tile::TILE_TOTAL])
{ {
IntEndptsRGB_1 temp_endpts; IntEndptsRGB_1 temp_endpts;
double best_err = orig_err; float best_err = orig_err;
int aprec = region_prec.endpt_a_prec[ch]; int aprec = region_prec.endpt_a_prec[ch];
int bprec = region_prec.endpt_b_prec[ch]; int bprec = region_prec.endpt_b_prec[ch];
int good_indices[Tile::TILE_TOTAL]; int good_indices[Tile::TILE_TOTAL];
@ -537,7 +538,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
indices[i] = -1; indices[i] = -1;
double thr_scale = (double)np / (double)Tile::TILE_TOTAL; float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
if (orig_err == 0) return orig_err; if (orig_err == 0) return orig_err;
@ -546,8 +547,8 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
adelta = MAX(adelta, 3); adelta = max(adelta, 3);
bdelta = MAX(bdelta, 3); bdelta = max(bdelta, 3);
#ifdef DISABLE_EXHAUSTIVE #ifdef DISABLE_EXHAUSTIVE
adelta = bdelta = 3; adelta = bdelta = 3;
@ -556,10 +557,10 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
temp_endpts = opt_endpts; temp_endpts = opt_endpts;
// ok figure out the range of A and B // ok figure out the range of A and B
int alow = MAX(0, opt_endpts.A[ch] - adelta); int alow = max(0, opt_endpts.A[ch] - adelta);
int ahigh = MIN((1<<aprec)-1, opt_endpts.A[ch] + adelta); int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
int blow = MAX(0, opt_endpts.B[ch] - bdelta); int blow = max(0, opt_endpts.B[ch] - bdelta);
int bhigh = MIN((1<<bprec)-1, opt_endpts.B[ch] + bdelta); int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
// now there's no need to swap the ordering of A and B // now there's no need to swap the ordering of A and B
bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch]; bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
@ -570,7 +571,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
{ {
// keep a <= b // keep a <= b
for (int a = alow; a <= ahigh; ++a) for (int a = alow; a <= ahigh; ++a)
for (int b = MAX(a, blow); b < bhigh; ++b) for (int b = max(a, blow); b < bhigh; ++b)
{ {
temp_endpts.A[ch] = a; temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b; temp_endpts.B[ch] = b;
@ -590,7 +591,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
{ {
// keep b <= a // keep b <= a
for (int b = blow; b < bhigh; ++b) for (int b = blow; b < bhigh; ++b)
for (int a = MAX(b, alow); a <= ahigh; ++a) for (int a = max(b, alow); a <= ahigh; ++a)
{ {
temp_endpts.A[ch] = a; temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b; temp_endpts.B[ch] = b;
@ -617,9 +618,9 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
return best_err; return best_err;
} }
static double optimize_one(const Vec4 colors[], int np, double orig_err, const IntEndptsRGB_1 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_1 &opt_endpts) static float optimize_one(const Vector4 colors[], int np, float orig_err, const IntEndptsRGB_1 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_1 &opt_endpts)
{ {
double opt_err = orig_err; float opt_err = orig_err;
opt_endpts = orig_endpts; opt_endpts = orig_endpts;
@ -667,7 +668,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[i] = orig_indices[i] = temp_indices0[i]; new_indices[i] = orig_indices[i] = temp_indices0[i];
assert (orig_indices[i] != -1); nvAssert (orig_indices[i] != -1);
} }
opt_endpts.A[ch] = new_a.A[ch]; opt_endpts.A[ch] = new_a.A[ch];
@ -682,7 +683,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[i] = orig_indices[i] = temp_indices1[i]; new_indices[i] = orig_indices[i] = temp_indices1[i];
assert (orig_indices[i] != -1); nvAssert (orig_indices[i] != -1);
} }
opt_endpts.B[ch] = new_b.B[ch]; opt_endpts.B[ch] = new_b.B[ch];
@ -700,7 +701,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[i] = temp_indices0[i]; new_indices[i] = temp_indices0[i];
assert (new_indices[i] != -1); nvAssert (new_indices[i] != -1);
} }
if (do_b == 0) if (do_b == 0)
@ -727,7 +728,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
bool first = true; bool first = true;
for (int ch = 0; ch < NCHANNELS_RGB; ++ch) for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
{ {
double new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0); float new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
if (new_err < opt_err) if (new_err < opt_err)
{ {
@ -738,7 +739,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
orig_indices[i] = temp_indices0[i]; orig_indices[i] = temp_indices0[i];
assert (orig_indices[i] != -1); nvAssert (orig_indices[i] != -1);
} }
first = false; first = false;
} }
@ -762,10 +763,10 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
return opt_err; return opt_err;
} }
static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_err[NREGIONS], static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS],
IntEndptsRGB_1 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, double opt_err[NREGIONS], IntEndptsRGB_1 opt_endpts[NREGIONS]) IntEndptsRGB_1 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_1 opt_endpts[NREGIONS])
{ {
Vec4 pixels[Tile::TILE_TOTAL]; Vector4 pixels[Tile::TILE_TOTAL];
IntEndptsRGB_1 temp_in, temp_out; IntEndptsRGB_1 temp_in, temp_out;
int temp_indices[Tile::TILE_TOTAL]; int temp_indices[Tile::TILE_TOTAL];
@ -782,19 +783,19 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_
opt_endpts[region] = temp_in = orig_endpts[region]; opt_endpts[region] = temp_in = orig_endpts[region];
opt_err[region] = orig_err[region]; opt_err[region] = orig_err[region];
double best_err = orig_err[region]; float best_err = orig_err[region];
for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode) for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
{ {
temp_in.lsb = lsbmode; temp_in.lsb = lsbmode;
// make sure we have a valid error for temp_in // make sure we have a valid error for temp_in
// we use DBL_MAX here because we want an accurate temp_in_err, no shortcuts // we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the DBL_MAX position) // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
double temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], DBL_MAX, temp_indices); float temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
// now try to optimize these endpoints // now try to optimize these endpoints
double temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
// if we find an improvement, update the best so far and correct the output endpoints and errors // if we find an improvement, update the best so far and correct the output endpoints and errors
if (temp_out_err < best_err) if (temp_out_err < best_err)
@ -825,9 +826,9 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_
emit compressed block with original data // to try to preserve maximum endpoint precision emit compressed block with original data // to try to preserve maximum endpoint precision
*/ */
static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block) static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
{ {
double orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
IntEndptsRGB_1 orig_endpts[NREGIONS], opt_endpts[NREGIONS]; IntEndptsRGB_1 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W]; int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
@ -846,14 +847,15 @@ static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endp
transform_inverse(orig_endpts); transform_inverse(orig_endpts);
optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts); optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err); assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
for (int i=0; i<NREGIONS; ++i) // (nreed) Commented out asserts because they go off all the time...not sure why
assert(expected_opt_err[i] == opt_err[i]); //for (int i=0; i<NREGIONS; ++i)
// nvAssert(expected_opt_err[i] == opt_err[i]);
swap_indices(opt_endpts, opt_indices, shapeindex_best); swap_indices(opt_endpts, opt_indices, shapeindex_best);
if (patterns[sp].transformed) if (patterns[sp].transformed)
transform_forward(opt_endpts); transform_forward(opt_endpts);
orig_toterr = opt_toterr = 0; orig_toterr = opt_toterr = 0;
for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; } for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
assert (opt_toterr <= orig_toterr); //nvAssert(opt_toterr <= orig_toterr);
if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr) if (endpts_fit(opt_endpts, patterns[sp]) && opt_toterr < orig_toterr)
{ {
emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block); emit_block(opt_endpts, shapeindex_best, patterns[sp], opt_indices, block);
@ -873,40 +875,40 @@ static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endp
throw "No candidate found, should never happen (mode avpcl 1)."; throw "No candidate found, should never happen (mode avpcl 1).";
} }
static void clamp(Vec4 &v) static void clamp(Vector4 &v)
{ {
if (v.X() < RGBA_MIN) v.X() = RGBA_MIN; if (v.x < 0.0f) v.x = 0.0f;
if (v.X() > RGBA_MAX) v.X() = RGBA_MAX; if (v.x > 255.0f) v.x = 255.0f;
if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; if (v.y < 0.0f) v.y = 0.0f;
if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; if (v.y > 255.0f) v.y = 255.0f;
if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; if (v.z < 0.0f) v.z = 0.0f;
if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; if (v.z > 255.0f) v.z = 255.0f;
v.W() = RGBA_MAX; v.w = 255.0f;
} }
static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vec4 palette[NREGIONS][NINDICES]) static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
{ {
for (int region = 0; region < NREGIONS; ++region) for (int region = 0; region < NREGIONS; ++region)
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, 0.0, float(DENOM)); palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
} }
// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
{ {
// build list of possibles // build list of possibles
Vec4 palette[NREGIONS][NINDICES]; Vector4 palette[NREGIONS][NINDICES];
generate_palette_unquantized(endpts, palette); generate_palette_unquantized(endpts, palette);
double toterr = 0; float toterr = 0;
Vec4 err; Vector4 err;
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
{ {
int region = REGION(x,y,shapeindex); int region = REGION(x,y,shapeindex);
double err, besterr = DBL_MAX; float err, besterr = FLT_MAX;
for (int i = 0; i < NINDICES && besterr > 0; ++i) for (int i = 0; i < NINDICES && besterr > 0; ++i)
{ {
@ -922,19 +924,21 @@ static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpt
return toterr; return toterr;
} }
static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
{ {
for (int region=0; region<NREGIONS; ++region) for (int region=0; region<NREGIONS; ++region)
{ {
int np = 0; int np = 0;
Vec4 colors[Tile::TILE_TOTAL]; Vector3 colors[Tile::TILE_TOTAL];
Vec4 mean(0,0,0,0); float alphas[2];
Vector4 mean(0,0,0,0);
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
if (REGION(x,y,shapeindex) == region) if (REGION(x,y,shapeindex) == region)
{ {
colors[np] = tile.data[y][x]; colors[np] = tile.data[y][x].xyz();
if (np < 2) alphas[np] = tile.data[y][x].w;
mean += tile.data[y][x]; mean += tile.data[y][x];
++np; ++np;
} }
@ -942,54 +946,40 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]
// handle simple cases // handle simple cases
if (np == 0) if (np == 0)
{ {
Vec4 zero(0,0,0,RGBA_MAX); Vector4 zero(0,0,0,255.0f);
endpts[region].A = zero; endpts[region].A = zero;
endpts[region].B = zero; endpts[region].B = zero;
continue; continue;
} }
else if (np == 1) else if (np == 1)
{ {
endpts[region].A = colors[0]; endpts[region].A = Vector4(colors[0], alphas[0]);
endpts[region].B = colors[0]; endpts[region].B = Vector4(colors[0], alphas[0]);
continue; continue;
} }
else if (np == 2) else if (np == 2)
{ {
endpts[region].A = colors[0]; endpts[region].A = Vector4(colors[0], alphas[0]);
endpts[region].B = colors[1]; endpts[region].B = Vector4(colors[1], alphas[1]);
continue; continue;
} }
Matrix rdq(np, 3);
mean /= float(np); mean /= float(np);
// only look at RGB' ignore A Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
for (int i = 0; i < np; ++i)
{
rdq(i,0) = colors[i].X() - mean.X();
rdq(i,1) = colors[i].Y() - mean.Y();
rdq(i,2) = colors[i].Z() - mean.Z();
}
// perform a singular value decomposition
SVD svd(rdq);
// get the principal component direction (well, the one with the largest weight)
Vec4 direction(svd.R()(0,0), svd.R()(0,1), svd.R()(0,2), 0.0);
// project each pixel value along the principal direction // project each pixel value along the principal direction
double minp = DBL_MAX, maxp = -DBL_MAX; float minp = FLT_MAX, maxp = -FLT_MAX;
for (int i = 0; i < np; i++) for (int i = 0; i < np; i++)
{ {
float dp = rdq(i,0) * direction.X() + rdq(i,1)*direction.Y() + rdq(i,2)*direction.Z(); float dp = dot(colors[i]-mean.xyz(), direction);
if (dp < minp) minp = dp; if (dp < minp) minp = dp;
if (dp > maxp) maxp = dp; if (dp > maxp) maxp = dp;
} }
// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
endpts[region].A = mean + minp*direction; endpts[region].A = mean + minp*Vector4(direction, 0);
endpts[region].B = mean + maxp*direction; endpts[region].B = mean + maxp*Vector4(direction, 0);
// clamp endpoints // clamp endpoints
// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
@ -1001,13 +991,13 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]
return map_colors(tile, shapeindex, endpts); return map_colors(tile, shapeindex, endpts);
} }
static void swap(double *list1, int *list2, int i, int j) static void swap(float *list1, int *list2, int i, int j)
{ {
double t = list1[i]; list1[i] = list1[j]; list1[j] = t; float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
} }
double AVPCL::compress_mode1(const Tile &t, char *block) float AVPCL::compress_mode1(const Tile &t, char *block)
{ {
// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
@ -1017,10 +1007,10 @@ double AVPCL::compress_mode1(const Tile &t, char *block)
struct { struct {
FltEndpts endpts[NREGIONS]; FltEndpts endpts[NREGIONS];
} all[NSHAPES]; } all[NSHAPES];
double roughmse[NSHAPES]; float roughmse[NSHAPES];
int index[NSHAPES]; int index[NSHAPES];
char tempblock[AVPCL::BLOCKSIZE]; char tempblock[AVPCL::BLOCKSIZE];
double msebest = DBL_MAX; float msebest = FLT_MAX;
for (int i=0; i<NSHAPES; ++i) for (int i=0; i<NSHAPES; ++i)
{ {
@ -1037,7 +1027,7 @@ double AVPCL::compress_mode1(const Tile &t, char *block)
for (int i=0; i<NITEMS && msebest>0; ++i) for (int i=0; i<NITEMS && msebest>0; ++i)
{ {
int shape = index[i]; int shape = index[i];
double mse = refine(t, shape, &all[shape].endpts[0], tempblock); float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
if (mse < msebest) if (mse < msebest)
{ {
memcpy(block, tempblock, sizeof(tempblock)); memcpy(block, tempblock, sizeof(tempblock));

View File

@ -17,17 +17,18 @@ See the License for the specific language governing permissions and limitations
#include "bits.h" #include "bits.h"
#include "tile.h" #include "tile.h"
#include "avpcl.h" #include "avpcl.h"
#include "arvo/Vec4.h" #include "nvcore/Debug.h"
#include "arvo/Matrix.h" #include "nvmath/Vector.inl"
#include "arvo/SVD.h" #include "nvmath/Matrix.inl"
#include "nvmath/Fitting.h"
#include "utils.h" #include "utils.h"
#include "endpts.h" #include "endpts.h"
#include <cstring>
#include <assert.h>
#include "shapes_three.h" #include "shapes_three.h"
using namespace ArvoMath; using namespace nv;
using namespace AVPCL;
#define NINDICES 4 #define NINDICES 4
#define INDEXBITS 2 #define INDEXBITS 2
@ -80,7 +81,7 @@ struct PatternPrec
// this is the precision for each channel and region // this is the precision for each channel and region
// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! // NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
static PatternPrec pattern_precs[NPATTERNS] = static PatternPrec pattern_precs[NPATTERNS] =
{ {
@ -100,7 +101,7 @@ static int nbits(int n, bool issigned)
} }
else else
{ {
assert (issigned); nvAssert (issigned);
for (nb=0; n<-1; ++nb, n>>=1) ; for (nb=0; n<-1; ++nb, n>>=1) ;
return nb + 1; return nb + 1;
} }
@ -131,12 +132,12 @@ static void quantize_endpts(const FltEndpts endpts[NREGIONS_THREE], const Patter
{ {
for (int region = 0; region < NREGIONS_THREE; ++region) for (int region = 0; region < NREGIONS_THREE; ++region)
{ {
q_endpts[region].A[0] = Utils::quantize(endpts[region].A.X(), pattern_prec.region_precs[region].endpt_a_prec[0]); q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]);
q_endpts[region].A[1] = Utils::quantize(endpts[region].A.Y(), pattern_prec.region_precs[region].endpt_a_prec[1]); q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]);
q_endpts[region].A[2] = Utils::quantize(endpts[region].A.Z(), pattern_prec.region_precs[region].endpt_a_prec[2]); q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]);
q_endpts[region].B[0] = Utils::quantize(endpts[region].B.X(), pattern_prec.region_precs[region].endpt_b_prec[0]); q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]);
q_endpts[region].B[1] = Utils::quantize(endpts[region].B.Y(), pattern_prec.region_precs[region].endpt_b_prec[1]); q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]);
q_endpts[region].B[2] = Utils::quantize(endpts[region].B.Z(), pattern_prec.region_precs[region].endpt_b_prec[2]); q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]);
} }
} }
@ -149,7 +150,7 @@ static void swap_indices(IntEndptsRGB endpts[NREGIONS_THREE], int indices[Tile::
int x = POS_TO_X(position); int x = POS_TO_X(position);
int y = POS_TO_Y(position); int y = POS_TO_Y(position);
assert(REGION(x,y,shapeindex) == region); // double check the table nvAssert(REGION(x,y,shapeindex) == region); // double check the table
if (indices[y][x] & HIGH_INDEXBIT) if (indices[y][x] & HIGH_INDEXBIT)
{ {
// high bit is set, swap the endpts and indices for this region // high bit is set, swap the endpts and indices for this region
@ -181,7 +182,7 @@ static void write_header(const IntEndptsRGB endpts[NREGIONS_THREE], int shapeind
out.write(endpts[i].A[j], p.chan[j].nbitsizes[i*2+0]); out.write(endpts[i].A[j], p.chan[j].nbitsizes[i*2+0]);
out.write(endpts[i].B[j], p.chan[j].nbitsizes[i*2+1]); out.write(endpts[i].B[j], p.chan[j].nbitsizes[i*2+1]);
} }
assert (out.getptr() == 99); nvAssert (out.getptr() == 99);
} }
static void read_header(Bits &in, IntEndptsRGB endpts[NREGIONS_THREE], int &shapeindex, Pattern &p, int &pat_index) static void read_header(Bits &in, IntEndptsRGB endpts[NREGIONS_THREE], int &shapeindex, Pattern &p, int &pat_index)
@ -189,8 +190,8 @@ static void read_header(Bits &in, IntEndptsRGB endpts[NREGIONS_THREE], int &shap
int mode = AVPCL::getmode(in); int mode = AVPCL::getmode(in);
pat_index = 0; pat_index = 0;
assert (pat_index >= 0 && pat_index < NPATTERNS); nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
assert (in.getptr() == patterns[pat_index].modebits); nvAssert (in.getptr() == patterns[pat_index].modebits);
shapeindex = in.read(SHAPEBITS); shapeindex = in.read(SHAPEBITS);
@ -202,7 +203,7 @@ static void read_header(Bits &in, IntEndptsRGB endpts[NREGIONS_THREE], int &shap
endpts[i].A[j] = in.read(p.chan[j].nbitsizes[i*2+0]); endpts[i].A[j] = in.read(p.chan[j].nbitsizes[i*2+0]);
endpts[i].B[j] = in.read(p.chan[j].nbitsizes[i*2+1]); endpts[i].B[j] = in.read(p.chan[j].nbitsizes[i*2+1]);
} }
assert (in.getptr() == 99); nvAssert (in.getptr() == 99);
} }
@ -257,10 +258,10 @@ static void emit_block(const IntEndptsRGB endpts[NREGIONS_THREE], int shapeindex
write_indices(indices, shapeindex, out); write_indices(indices, shapeindex, out);
assert(out.getptr() == AVPCL::BITSIZE); nvAssert(out.getptr() == AVPCL::BITSIZE);
} }
static void generate_palette_quantized(const IntEndptsRGB &endpts, const RegionPrec &region_prec, Vec4 palette[NINDICES]) static void generate_palette_quantized(const IntEndptsRGB &endpts, const RegionPrec &region_prec, Vector4 palette[NINDICES])
{ {
// scale endpoints // scale endpoints
int a, b; // really need a IntVec4... int a, b; // really need a IntVec4...
@ -270,31 +271,31 @@ static void generate_palette_quantized(const IntEndptsRGB &endpts, const RegionP
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].X() = PALETTE_LERP(a, b, i, BIAS, DENOM); palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]); a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]);
b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]); b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]);
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].Y() = PALETTE_LERP(a, b, i, BIAS, DENOM); palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]); a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]);
b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]); b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]);
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].Z() = PALETTE_LERP(a, b, i, BIAS, DENOM); palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
// constant alpha // constant alpha
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].W() = RGBA_MAX; palette[i].w = 255.0f;
} }
// sign extend but only if it was transformed // sign extend but only if it was transformed
static void sign_extend(Pattern &p, IntEndptsRGB endpts[NREGIONS_THREE]) static void sign_extend(Pattern &p, IntEndptsRGB endpts[NREGIONS_THREE])
{ {
assert (p.transformed != 0); nvAssert (p.transformed != 0);
for (int i=0; i<NCHANNELS_RGB; ++i) for (int i=0; i<NCHANNELS_RGB; ++i)
{ {
@ -323,7 +324,7 @@ void AVPCL::decompress_mode2(const char *block, Tile &t)
transform_inverse(endpts); transform_inverse(endpts);
} }
Vec4 palette[NREGIONS_THREE][NINDICES]; Vector4 palette[NREGIONS_THREE][NINDICES];
for (int r = 0; r < NREGIONS_THREE; ++r) for (int r = 0; r < NREGIONS_THREE; ++r)
generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]); generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
@ -331,7 +332,7 @@ void AVPCL::decompress_mode2(const char *block, Tile &t)
read_indices(in, shapeindex, indices); read_indices(in, shapeindex, indices);
assert(in.getptr() == AVPCL::BITSIZE); nvAssert(in.getptr() == AVPCL::BITSIZE);
// lookup // lookup
for (int y = 0; y < Tile::TILE_H; y++) for (int y = 0; y < Tile::TILE_H; y++)
@ -340,17 +341,17 @@ void AVPCL::decompress_mode2(const char *block, Tile &t)
} }
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB &endpts, const RegionPrec &region_prec, double current_err, int indices[Tile::TILE_TOTAL]) static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
{ {
Vec4 palette[NINDICES]; Vector4 palette[NINDICES];
double toterr = 0; float toterr = 0;
Vec4 err; Vector4 err;
generate_palette_quantized(endpts, region_prec, palette); generate_palette_quantized(endpts, region_prec, palette);
for (int i = 0; i < np; ++i) for (int i = 0; i < np; ++i)
{ {
double err, besterr = DBL_MAX; float err, besterr = FLT_MAX;
for (int j = 0; j < NINDICES && besterr > 0; ++j) for (int j = 0; j < NINDICES && besterr > 0; ++j)
{ {
@ -373,7 +374,7 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB &endpts
for (int k = i; k < np; ++k) for (int k = i; k < np; ++k)
indices[k] = -1; indices[k] = -1;
return DBL_MAX; return FLT_MAX;
} }
} }
return toterr; return toterr;
@ -381,10 +382,10 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB &endpts
// assign indices given a tile, shape, and quantized endpoints, return toterr for each region // assign indices given a tile, shape, and quantized endpoints, return toterr for each region
static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB endpts[NREGIONS_THREE], const PatternPrec &pattern_prec, static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB endpts[NREGIONS_THREE], const PatternPrec &pattern_prec,
int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS_THREE]) int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_THREE])
{ {
// build list of possibles // build list of possibles
Vec4 palette[NREGIONS_THREE][NINDICES]; Vector4 palette[NREGIONS_THREE][NINDICES];
for (int region = 0; region < NREGIONS_THREE; ++region) for (int region = 0; region < NREGIONS_THREE; ++region)
{ {
@ -392,13 +393,13 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB endpts
toterr[region] = 0; toterr[region] = 0;
} }
Vec4 err; Vector4 err;
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
{ {
int region = REGION(x,y,shapeindex); int region = REGION(x,y,shapeindex);
double err, besterr = DBL_MAX; float err, besterr = FLT_MAX;
for (int i = 0; i < NINDICES && besterr > 0; ++i) for (int i = 0; i < NINDICES && besterr > 0; ++i)
{ {
@ -418,8 +419,8 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB endpts
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
// this function returns either old_err or a value smaller (if it was successful in improving the error) // this function returns either old_err or a value smaller (if it was successful in improving the error)
static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB &old_endpts, IntEndptsRGB &new_endpts, static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB &old_endpts, IntEndptsRGB &new_endpts,
double old_err, int do_b, int indices[Tile::TILE_TOTAL]) float old_err, int do_b, int indices[Tile::TILE_TOTAL])
{ {
// we have the old endpoints: old_endpts // we have the old endpoints: old_endpts
// we have the perturbed endpoints: new_endpts // we have the perturbed endpoints: new_endpts
@ -489,10 +490,10 @@ static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec
// for np = 16 -- adjust error thresholds as a function of np // for np = 16 -- adjust error thresholds as a function of np
// always ensure endpoint ordering is preserved (no need to overlap the scan) // always ensure endpoint ordering is preserved (no need to overlap the scan)
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices // if orig_err returned from this is less than its input value, then indices[] will contain valid indices
static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &region_prec, double orig_err, IntEndptsRGB &opt_endpts, int indices[Tile::TILE_TOTAL]) static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGB &opt_endpts, int indices[Tile::TILE_TOTAL])
{ {
IntEndptsRGB temp_endpts; IntEndptsRGB temp_endpts;
double best_err = orig_err; float best_err = orig_err;
int aprec = region_prec.endpt_a_prec[ch]; int aprec = region_prec.endpt_a_prec[ch];
int bprec = region_prec.endpt_b_prec[ch]; int bprec = region_prec.endpt_b_prec[ch];
int good_indices[Tile::TILE_TOTAL]; int good_indices[Tile::TILE_TOTAL];
@ -501,7 +502,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
indices[i] = -1; indices[i] = -1;
double thr_scale = (double)np / (double)Tile::TILE_TOTAL; float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
if (orig_err == 0) return orig_err; if (orig_err == 0) return orig_err;
@ -510,8 +511,8 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
adelta = MAX(adelta, 3); adelta = max(adelta, 3);
bdelta = MAX(bdelta, 3); bdelta = max(bdelta, 3);
#ifdef DISABLE_EXHAUSTIVE #ifdef DISABLE_EXHAUSTIVE
adelta = bdelta = 3; adelta = bdelta = 3;
@ -520,10 +521,10 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
temp_endpts = opt_endpts; temp_endpts = opt_endpts;
// ok figure out the range of A and B // ok figure out the range of A and B
int alow = MAX(0, opt_endpts.A[ch] - adelta); int alow = max(0, opt_endpts.A[ch] - adelta);
int ahigh = MIN((1<<aprec)-1, opt_endpts.A[ch] + adelta); int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
int blow = MAX(0, opt_endpts.B[ch] - bdelta); int blow = max(0, opt_endpts.B[ch] - bdelta);
int bhigh = MIN((1<<bprec)-1, opt_endpts.B[ch] + bdelta); int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
// now there's no need to swap the ordering of A and B // now there's no need to swap the ordering of A and B
bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch]; bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
@ -534,7 +535,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
{ {
// keep a <= b // keep a <= b
for (int a = alow; a <= ahigh; ++a) for (int a = alow; a <= ahigh; ++a)
for (int b = MAX(a, blow); b < bhigh; ++b) for (int b = max(a, blow); b < bhigh; ++b)
{ {
temp_endpts.A[ch] = a; temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b; temp_endpts.B[ch] = b;
@ -554,7 +555,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
{ {
// keep b <= a // keep b <= a
for (int b = blow; b < bhigh; ++b) for (int b = blow; b < bhigh; ++b)
for (int a = MAX(b, alow); a <= ahigh; ++a) for (int a = max(b, alow); a <= ahigh; ++a)
{ {
temp_endpts.A[ch] = a; temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b; temp_endpts.B[ch] = b;
@ -582,9 +583,9 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
return best_err; return best_err;
} }
static double optimize_one(const Vec4 colors[], int np, double orig_err, const IntEndptsRGB &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB &opt_endpts) static float optimize_one(const Vector4 colors[], int np, float orig_err, const IntEndptsRGB &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB &opt_endpts)
{ {
double opt_err = orig_err; float opt_err = orig_err;
opt_endpts = orig_endpts; opt_endpts = orig_endpts;
@ -632,7 +633,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[i] = orig_indices[i] = temp_indices0[i]; new_indices[i] = orig_indices[i] = temp_indices0[i];
assert (orig_indices[i] != -1); nvAssert (orig_indices[i] != -1);
} }
opt_endpts.A[ch] = new_a.A[ch]; opt_endpts.A[ch] = new_a.A[ch];
@ -647,7 +648,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[i] = orig_indices[i] = temp_indices1[i]; new_indices[i] = orig_indices[i] = temp_indices1[i];
assert (orig_indices[i] != -1); nvAssert (orig_indices[i] != -1);
} }
opt_endpts.B[ch] = new_b.B[ch]; opt_endpts.B[ch] = new_b.B[ch];
@ -665,7 +666,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[i] = temp_indices0[i]; new_indices[i] = temp_indices0[i];
assert (new_indices[i] != -1); nvAssert (new_indices[i] != -1);
} }
if (do_b == 0) if (do_b == 0)
@ -692,7 +693,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
bool first = true; bool first = true;
for (int ch = 0; ch < NCHANNELS_RGB; ++ch) for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
{ {
double new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0); float new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
if (new_err < opt_err) if (new_err < opt_err)
{ {
@ -703,7 +704,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
orig_indices[i] = temp_indices0[i]; orig_indices[i] = temp_indices0[i];
assert (orig_indices[i] != -1); nvAssert (orig_indices[i] != -1);
} }
first = false; first = false;
} }
@ -727,10 +728,10 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
return opt_err; return opt_err;
} }
static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_err[NREGIONS_THREE], static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS_THREE],
const IntEndptsRGB orig_endpts[NREGIONS_THREE], const PatternPrec &pattern_prec, double opt_err[NREGIONS], IntEndptsRGB opt_endpts[NREGIONS_THREE]) const IntEndptsRGB orig_endpts[NREGIONS_THREE], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB opt_endpts[NREGIONS_THREE])
{ {
Vec4 pixels[Tile::TILE_TOTAL]; Vector4 pixels[Tile::TILE_TOTAL];
IntEndptsRGB temp_in, temp_out; IntEndptsRGB temp_in, temp_out;
for (int region=0; region<NREGIONS_THREE; ++region) for (int region=0; region<NREGIONS_THREE; ++region)
@ -746,14 +747,14 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_
opt_endpts[region] = temp_in = orig_endpts[region]; opt_endpts[region] = temp_in = orig_endpts[region];
opt_err[region] = orig_err[region]; opt_err[region] = orig_err[region];
double best_err = orig_err[region]; float best_err = orig_err[region];
// make sure we have a valid error for temp_in // make sure we have a valid error for temp_in
// we didn't change temp_in, so orig_err[region] is still valid // we didn't change temp_in, so orig_err[region] is still valid
double temp_in_err = orig_err[region]; float temp_in_err = orig_err[region];
// now try to optimize these endpoints // now try to optimize these endpoints
double temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
// if we find an improvement, update the best so far and correct the output endpoints and errors // if we find an improvement, update the best so far and correct the output endpoints and errors
if (temp_out_err < best_err) if (temp_out_err < best_err)
@ -782,9 +783,9 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_
emit compressed block with original data // to try to preserve maximum endpoint precision emit compressed block with original data // to try to preserve maximum endpoint precision
*/ */
static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_THREE], char *block) static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS_THREE], char *block)
{ {
double orig_err[NREGIONS_THREE], opt_err[NREGIONS_THREE], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; float orig_err[NREGIONS_THREE], opt_err[NREGIONS_THREE], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
IntEndptsRGB orig_endpts[NREGIONS_THREE], opt_endpts[NREGIONS_THREE]; IntEndptsRGB orig_endpts[NREGIONS_THREE], opt_endpts[NREGIONS_THREE];
int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W]; int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
@ -803,8 +804,9 @@ static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endp
transform_inverse(orig_endpts); transform_inverse(orig_endpts);
optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts); optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err); assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
for (int i=0; i<NREGIONS; ++i) // (nreed) Commented out asserts because they go off all the time...not sure why
assert(expected_opt_err[i] == opt_err[i]); //for (int i=0; i<NREGIONS; ++i)
// nvAssert(expected_opt_err[i] == opt_err[i]);
swap_indices(opt_endpts, opt_indices, shapeindex_best); swap_indices(opt_endpts, opt_indices, shapeindex_best);
if (patterns[sp].transformed) if (patterns[sp].transformed)
transform_forward(opt_endpts); transform_forward(opt_endpts);
@ -829,40 +831,40 @@ static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endp
throw "No candidate found, should never happen (avpcl mode 2)."; throw "No candidate found, should never happen (avpcl mode 2).";
} }
static void clamp(Vec4 &v) static void clamp(Vector4 &v)
{ {
if (v.X() < RGBA_MIN) v.X() = RGBA_MIN; if (v.x < 0.0f) v.x = 0.0f;
if (v.X() > RGBA_MAX) v.X() = RGBA_MAX; if (v.x > 255.0f) v.x = 255.0f;
if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; if (v.y < 0.0f) v.y = 0.0f;
if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; if (v.y > 255.0f) v.y = 255.0f;
if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; if (v.z < 0.0f) v.z = 0.0f;
if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; if (v.z > 255.0f) v.z = 255.0f;
v.W() = RGBA_MAX; v.w = 255.0f;
} }
static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_THREE], Vec4 palette[NREGIONS_THREE][NINDICES]) static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_THREE], Vector4 palette[NREGIONS_THREE][NINDICES])
{ {
for (int region = 0; region < NREGIONS_THREE; ++region) for (int region = 0; region < NREGIONS_THREE; ++region)
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, 0.0, float(DENOM)); palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
} }
// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_THREE]) static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS_THREE])
{ {
// build list of possibles // build list of possibles
Vec4 palette[NREGIONS_THREE][NINDICES]; Vector4 palette[NREGIONS_THREE][NINDICES];
generate_palette_unquantized(endpts, palette); generate_palette_unquantized(endpts, palette);
double toterr = 0; float toterr = 0;
Vec4 err; Vector4 err;
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
{ {
int region = REGION(x,y,shapeindex); int region = REGION(x,y,shapeindex);
double err, besterr = DBL_MAX; float err, besterr = FLT_MAX;
for (int i = 0; i < NINDICES && besterr > 0; ++i) for (int i = 0; i < NINDICES && besterr > 0; ++i)
{ {
@ -878,19 +880,21 @@ static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpt
return toterr; return toterr;
} }
static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_THREE]) static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_THREE])
{ {
for (int region=0; region<NREGIONS_THREE; ++region) for (int region=0; region<NREGIONS_THREE; ++region)
{ {
int np = 0; int np = 0;
Vec4 colors[Tile::TILE_TOTAL]; Vector3 colors[Tile::TILE_TOTAL];
Vec4 mean(0,0,0,0); float alphas[2];
Vector4 mean(0,0,0,0);
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
if (REGION(x,y,shapeindex) == region) if (REGION(x,y,shapeindex) == region)
{ {
colors[np] = tile.data[y][x]; colors[np] = tile.data[y][x].xyz();
if (np < 2) alphas[np] = tile.data[y][x].w;
mean += tile.data[y][x]; mean += tile.data[y][x];
++np; ++np;
} }
@ -898,54 +902,40 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_
// handle simple cases // handle simple cases
if (np == 0) if (np == 0)
{ {
Vec4 zero(0,0,0,RGBA_MAX); Vector4 zero(0,0,0,255.0f);
endpts[region].A = zero; endpts[region].A = zero;
endpts[region].B = zero; endpts[region].B = zero;
continue; continue;
} }
else if (np == 1) else if (np == 1)
{ {
endpts[region].A = colors[0]; endpts[region].A = Vector4(colors[0], alphas[0]);
endpts[region].B = colors[0]; endpts[region].B = Vector4(colors[0], alphas[0]);
continue; continue;
} }
else if (np == 2) else if (np == 2)
{ {
endpts[region].A = colors[0]; endpts[region].A = Vector4(colors[0], alphas[0]);
endpts[region].B = colors[1]; endpts[region].B = Vector4(colors[1], alphas[1]);
continue; continue;
} }
Matrix rdq(np, 3);
mean /= float(np); mean /= float(np);
// only look at RGB' ignore A Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
for (int i = 0; i < np; ++i)
{
rdq(i,0) = colors[i].X() - mean.X();
rdq(i,1) = colors[i].Y() - mean.Y();
rdq(i,2) = colors[i].Z() - mean.Z();
}
// perform a singular value decomposition
SVD svd(rdq);
// get the principal component direction (well, the one with the largest weight)
Vec4 direction(svd.R()(0,0), svd.R()(0,1), svd.R()(0,2), 0.0);
// project each pixel value along the principal direction // project each pixel value along the principal direction
double minp = DBL_MAX, maxp = -DBL_MAX; float minp = FLT_MAX, maxp = -FLT_MAX;
for (int i = 0; i < np; i++) for (int i = 0; i < np; i++)
{ {
float dp = rdq(i,0) * direction.X() + rdq(i,1)*direction.Y() + rdq(i,2)*direction.Z(); float dp = dot(colors[i]-mean.xyz(), direction);
if (dp < minp) minp = dp; if (dp < minp) minp = dp;
if (dp > maxp) maxp = dp; if (dp > maxp) maxp = dp;
} }
// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
endpts[region].A = mean + minp*direction; endpts[region].A = mean + minp*Vector4(direction, 0);
endpts[region].B = mean + maxp*direction; endpts[region].B = mean + maxp*Vector4(direction, 0);
// clamp endpoints // clamp endpoints
// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
@ -957,13 +947,13 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_
return map_colors(tile, shapeindex, endpts); return map_colors(tile, shapeindex, endpts);
} }
static void swap(double *list1, int *list2, int i, int j) static void swap(float *list1, int *list2, int i, int j)
{ {
double t = list1[i]; list1[i] = list1[j]; list1[j] = t; float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
} }
double AVPCL::compress_mode2(const Tile &t, char *block) float AVPCL::compress_mode2(const Tile &t, char *block)
{ {
// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
@ -973,10 +963,10 @@ double AVPCL::compress_mode2(const Tile &t, char *block)
struct { struct {
FltEndpts endpts[NREGIONS_THREE]; FltEndpts endpts[NREGIONS_THREE];
} all[NSHAPES]; } all[NSHAPES];
double roughmse[NSHAPES]; float roughmse[NSHAPES];
int index[NSHAPES]; int index[NSHAPES];
char tempblock[AVPCL::BLOCKSIZE]; char tempblock[AVPCL::BLOCKSIZE];
double msebest = DBL_MAX; float msebest = FLT_MAX;
for (int i=0; i<NSHAPES; ++i) for (int i=0; i<NSHAPES; ++i)
{ {
@ -993,7 +983,7 @@ double AVPCL::compress_mode2(const Tile &t, char *block)
for (int i=0; i<NITEMS && msebest>0; ++i) for (int i=0; i<NITEMS && msebest>0; ++i)
{ {
int shape = index[i]; int shape = index[i];
double mse = refine(t, shape, &all[shape].endpts[0], tempblock); float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
if (mse < msebest) if (mse < msebest)
{ {
memcpy(block, tempblock, sizeof(tempblock)); memcpy(block, tempblock, sizeof(tempblock));

View File

@ -17,17 +17,18 @@ See the License for the specific language governing permissions and limitations
#include "bits.h" #include "bits.h"
#include "tile.h" #include "tile.h"
#include "avpcl.h" #include "avpcl.h"
#include "arvo/Vec4.h" #include "nvcore/Debug.h"
#include "arvo/Matrix.h" #include "nvmath/Vector.inl"
#include "arvo/SVD.h" #include "nvmath/Matrix.inl"
#include "nvmath/Fitting.h"
#include "utils.h" #include "utils.h"
#include "endpts.h" #include "endpts.h"
#include <cstring>
#include <assert.h>
#include "shapes_two.h" #include "shapes_two.h"
using namespace ArvoMath; using namespace nv;
using namespace AVPCL;
#define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values #define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values
@ -84,7 +85,7 @@ struct PatternPrec
// this is the precision for each channel and region // this is the precision for each channel and region
// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! // NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
static PatternPrec pattern_precs[NPATTERNS] = static PatternPrec pattern_precs[NPATTERNS] =
{ {
7,7,7, 7,7,7, 7,7,7, 7,7,7, 7,7,7, 7,7,7, 7,7,7, 7,7,7,
@ -103,7 +104,7 @@ static int nbits(int n, bool issigned)
} }
else else
{ {
assert (issigned); nvAssert (issigned);
for (nb=0; n<-1; ++nb, n>>=1) ; for (nb=0; n<-1; ++nb, n>>=1) ;
return nb + 1; return nb + 1;
} }
@ -111,12 +112,12 @@ static int nbits(int n, bool issigned)
static void transform_forward(IntEndptsRGB_2 ep[NREGIONS]) static void transform_forward(IntEndptsRGB_2 ep[NREGIONS])
{ {
assert(0); nvUnreachable();
} }
static void transform_inverse(IntEndptsRGB_2 ep[NREGIONS]) static void transform_inverse(IntEndptsRGB_2 ep[NREGIONS])
{ {
assert(0); nvUnreachable();
} }
// endpoints are 888,888; reduce to 777,777 and put the lsb bit majority in compr_bits // endpoints are 888,888; reduce to 777,777 and put the lsb bit majority in compr_bits
@ -129,7 +130,7 @@ static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpt
{ {
onescnt += endpts.A[j] & 1; onescnt += endpts.A[j] & 1;
compr_endpts.A[j] = endpts.A[j] >> 1; compr_endpts.A[j] = endpts.A[j] >> 1;
assert (compr_endpts.A[j] < 128); nvAssert (compr_endpts.A[j] < 128);
} }
compr_endpts.a_lsb = onescnt >= 2; compr_endpts.a_lsb = onescnt >= 2;
@ -138,7 +139,7 @@ static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpt
{ {
onescnt += endpts.B[j] & 1; onescnt += endpts.B[j] & 1;
compr_endpts.B[j] = endpts.B[j] >> 1; compr_endpts.B[j] = endpts.B[j] >> 1;
assert (compr_endpts.B[j] < 128); nvAssert (compr_endpts.B[j] < 128);
} }
compr_endpts.b_lsb = onescnt >= 2; compr_endpts.b_lsb = onescnt >= 2;
} }
@ -171,12 +172,12 @@ static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec
for (int region = 0; region < NREGIONS; ++region) for (int region = 0; region < NREGIONS; ++region)
{ {
full_endpts[region].A[0] = Utils::quantize(endpts[region].A.X(), pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space
full_endpts[region].A[1] = Utils::quantize(endpts[region].A.Y(), pattern_prec.region_precs[region].endpt_a_prec[1]+1); full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
full_endpts[region].A[2] = Utils::quantize(endpts[region].A.Z(), pattern_prec.region_precs[region].endpt_a_prec[2]+1); full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
full_endpts[region].B[0] = Utils::quantize(endpts[region].B.X(), pattern_prec.region_precs[region].endpt_b_prec[0]+1); full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
full_endpts[region].B[1] = Utils::quantize(endpts[region].B.Y(), pattern_prec.region_precs[region].endpt_b_prec[1]+1); full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
full_endpts[region].B[2] = Utils::quantize(endpts[region].B.Z(), pattern_prec.region_precs[region].endpt_b_prec[2]+1); full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
compress_one(full_endpts[region], q_endpts[region]); compress_one(full_endpts[region], q_endpts[region]);
} }
} }
@ -190,7 +191,7 @@ static void swap_indices(IntEndptsRGB_2 endpts[NREGIONS], int indices[Tile::TILE
int x = POS_TO_X(position); int x = POS_TO_X(position);
int y = POS_TO_Y(position); int y = POS_TO_Y(position);
assert(REGION(x,y,shapeindex) == region); // double check the table nvAssert(REGION(x,y,shapeindex) == region); // double check the table
if (indices[y][x] & HIGH_INDEXBIT) if (indices[y][x] & HIGH_INDEXBIT)
{ {
// high bit is set, swap the endpts and indices for this region // high bit is set, swap the endpts and indices for this region
@ -232,7 +233,7 @@ static void write_header(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex,
out.write(endpts[i].b_lsb, 1); out.write(endpts[i].b_lsb, 1);
} }
assert (out.getptr() == 98); nvAssert (out.getptr() == 98);
} }
static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index) static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
@ -240,8 +241,8 @@ static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeind
int mode = AVPCL::getmode(in); int mode = AVPCL::getmode(in);
pat_index = 0; pat_index = 0;
assert (pat_index >= 0 && pat_index < NPATTERNS); nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
assert (in.getptr() == patterns[pat_index].modebits); nvAssert (in.getptr() == patterns[pat_index].modebits);
shapeindex = in.read(SHAPEBITS); shapeindex = in.read(SHAPEBITS);
p = patterns[pat_index]; p = patterns[pat_index];
@ -259,7 +260,7 @@ static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeind
endpts[i].b_lsb = in.read(1); endpts[i].b_lsb = in.read(1);
} }
assert (in.getptr() == 98); nvAssert (in.getptr() == 98);
} }
static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out) static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
@ -312,10 +313,10 @@ static void emit_block(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, co
write_indices(indices, shapeindex, out); write_indices(indices, shapeindex, out);
assert(out.getptr() == AVPCL::BITSIZE); nvAssert(out.getptr() == AVPCL::BITSIZE);
} }
static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const RegionPrec &region_prec, Vec4 palette[NINDICES]) static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const RegionPrec &region_prec, Vector4 palette[NINDICES])
{ {
IntEndptsRGB endpts; IntEndptsRGB endpts;
@ -329,30 +330,30 @@ static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const Reg
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].X() = PALETTE_LERP(a, b, i, BIAS, DENOM); palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1);
b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1); b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].Y() = PALETTE_LERP(a, b, i, BIAS, DENOM); palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1);
b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1); b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].Z() = PALETTE_LERP(a, b, i, BIAS, DENOM); palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
// constant alpha // constant alpha
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].W() = RGBA_MAX; palette[i].w = 255.0f;
} }
static void sign_extend(Pattern &p, IntEndptsRGB_2 endpts[NREGIONS]) static void sign_extend(Pattern &p, IntEndptsRGB_2 endpts[NREGIONS])
{ {
assert(0); nvUnreachable();
} }
void AVPCL::decompress_mode3(const char *block, Tile &t) void AVPCL::decompress_mode3(const char *block, Tile &t)
@ -371,7 +372,7 @@ void AVPCL::decompress_mode3(const char *block, Tile &t)
transform_inverse(endpts); transform_inverse(endpts);
} }
Vec4 palette[NREGIONS][NINDICES]; Vector4 palette[NREGIONS][NINDICES];
for (int r = 0; r < NREGIONS; ++r) for (int r = 0; r < NREGIONS; ++r)
generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]); generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
@ -379,7 +380,7 @@ void AVPCL::decompress_mode3(const char *block, Tile &t)
read_indices(in, shapeindex, indices); read_indices(in, shapeindex, indices);
assert(in.getptr() == AVPCL::BITSIZE); nvAssert(in.getptr() == AVPCL::BITSIZE);
// lookup // lookup
for (int y = 0; y < Tile::TILE_H; y++) for (int y = 0; y < Tile::TILE_H; y++)
@ -388,17 +389,17 @@ void AVPCL::decompress_mode3(const char *block, Tile &t)
} }
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec &region_prec, double current_err, int indices[Tile::TILE_TOTAL]) static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
{ {
Vec4 palette[NINDICES]; Vector4 palette[NINDICES];
double toterr = 0; float toterr = 0;
Vec4 err; Vector4 err;
generate_palette_quantized(endpts, region_prec, palette); generate_palette_quantized(endpts, region_prec, palette);
for (int i = 0; i < np; ++i) for (int i = 0; i < np; ++i)
{ {
double err, besterr = DBL_MAX; float err, besterr = FLT_MAX;
for (int j = 0; j < NINDICES && besterr > 0; ++j) for (int j = 0; j < NINDICES && besterr > 0; ++j)
{ {
@ -421,17 +422,17 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB_2 &endp
for (int k = i; k < np; ++k) for (int k = i; k < np; ++k)
indices[k] = -1; indices[k] = -1;
return DBL_MAX; return FLT_MAX;
} }
} }
return toterr; return toterr;
} }
static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endpts[NREGIONS], const PatternPrec &pattern_prec, static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endpts[NREGIONS], const PatternPrec &pattern_prec,
int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS]) int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
{ {
// build list of possibles // build list of possibles
Vec4 palette[NREGIONS][NINDICES]; Vector4 palette[NREGIONS][NINDICES];
for (int region = 0; region < NREGIONS; ++region) for (int region = 0; region < NREGIONS; ++region)
{ {
@ -439,13 +440,13 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endp
toterr[region] = 0; toterr[region] = 0;
} }
Vec4 err; Vector4 err;
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
{ {
int region = REGION(x,y,shapeindex); int region = REGION(x,y,shapeindex);
double err, besterr = DBL_MAX; float err, besterr = FLT_MAX;
for (int i = 0; i < NINDICES && besterr > 0; ++i) for (int i = 0; i < NINDICES && besterr > 0; ++i)
{ {
@ -465,8 +466,8 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endp
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
// this function returns either old_err or a value smaller (if it was successful in improving the error) // this function returns either old_err or a value smaller (if it was successful in improving the error)
static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts, static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts,
double old_err, int do_b, int indices[Tile::TILE_TOTAL]) float old_err, int do_b, int indices[Tile::TILE_TOTAL])
{ {
// we have the old endpoints: old_endpts // we have the old endpoints: old_endpts
// we have the perturbed endpoints: new_endpts // we have the perturbed endpoints: new_endpts
@ -536,10 +537,10 @@ static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec
// for np = 16 -- adjust error thresholds as a function of np // for np = 16 -- adjust error thresholds as a function of np
// always ensure endpoint ordering is preserved (no need to overlap the scan) // always ensure endpoint ordering is preserved (no need to overlap the scan)
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices // if orig_err returned from this is less than its input value, then indices[] will contain valid indices
static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &region_prec, double &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL]) static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
{ {
IntEndptsRGB_2 temp_endpts; IntEndptsRGB_2 temp_endpts;
double best_err = orig_err; float best_err = orig_err;
int aprec = region_prec.endpt_a_prec[ch]; int aprec = region_prec.endpt_a_prec[ch];
int bprec = region_prec.endpt_b_prec[ch]; int bprec = region_prec.endpt_b_prec[ch];
int good_indices[Tile::TILE_TOTAL]; int good_indices[Tile::TILE_TOTAL];
@ -548,7 +549,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
indices[i] = -1; indices[i] = -1;
double thr_scale = (double)np / (double)Tile::TILE_TOTAL; float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
if (orig_err == 0) return orig_err; if (orig_err == 0) return orig_err;
@ -557,8 +558,8 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
adelta = MAX(adelta, 3); adelta = max(adelta, 3);
bdelta = MAX(bdelta, 3); bdelta = max(bdelta, 3);
#ifdef DISABLE_EXHAUSTIVE #ifdef DISABLE_EXHAUSTIVE
adelta = bdelta = 3; adelta = bdelta = 3;
@ -567,10 +568,10 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
temp_endpts = opt_endpts; temp_endpts = opt_endpts;
// ok figure out the range of A and B // ok figure out the range of A and B
int alow = MAX(0, opt_endpts.A[ch] - adelta); int alow = max(0, opt_endpts.A[ch] - adelta);
int ahigh = MIN((1<<aprec)-1, opt_endpts.A[ch] + adelta); int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
int blow = MAX(0, opt_endpts.B[ch] - bdelta); int blow = max(0, opt_endpts.B[ch] - bdelta);
int bhigh = MIN((1<<bprec)-1, opt_endpts.B[ch] + bdelta); int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
// now there's no need to swap the ordering of A and B // now there's no need to swap the ordering of A and B
bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch]; bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
@ -581,7 +582,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
{ {
// keep a <= b // keep a <= b
for (int a = alow; a <= ahigh; ++a) for (int a = alow; a <= ahigh; ++a)
for (int b = MAX(a, blow); b < bhigh; ++b) for (int b = max(a, blow); b < bhigh; ++b)
{ {
temp_endpts.A[ch] = a; temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b; temp_endpts.B[ch] = b;
@ -601,7 +602,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
{ {
// keep b <= a // keep b <= a
for (int b = blow; b < bhigh; ++b) for (int b = blow; b < bhigh; ++b)
for (int a = MAX(b, alow); a <= ahigh; ++a) for (int a = max(b, alow); a <= ahigh; ++a)
{ {
temp_endpts.A[ch] = a; temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b; temp_endpts.B[ch] = b;
@ -629,9 +630,9 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
return best_err; return best_err;
} }
static double optimize_one(const Vec4 colors[], int np, double orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_2 &opt_endpts) static float optimize_one(const Vector4 colors[], int np, float orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGB_2 &opt_endpts)
{ {
double opt_err = orig_err; float opt_err = orig_err;
opt_endpts = orig_endpts; opt_endpts = orig_endpts;
@ -679,7 +680,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[i] = orig_indices[i] = temp_indices0[i]; new_indices[i] = orig_indices[i] = temp_indices0[i];
assert (orig_indices[i] != -1); nvAssert (orig_indices[i] != -1);
} }
opt_endpts.A[ch] = new_a.A[ch]; opt_endpts.A[ch] = new_a.A[ch];
@ -694,7 +695,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[i] = orig_indices[i] = temp_indices1[i]; new_indices[i] = orig_indices[i] = temp_indices1[i];
assert (orig_indices[i] != -1); nvAssert (orig_indices[i] != -1);
} }
opt_endpts.B[ch] = new_b.B[ch]; opt_endpts.B[ch] = new_b.B[ch];
@ -712,7 +713,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[i] = temp_indices0[i]; new_indices[i] = temp_indices0[i];
assert (new_indices[i] != -1); nvAssert (new_indices[i] != -1);
} }
if (do_b == 0) if (do_b == 0)
@ -739,7 +740,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
bool first = true; bool first = true;
for (int ch = 0; ch < NCHANNELS_RGB; ++ch) for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
{ {
double new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0); float new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
if (new_err < opt_err) if (new_err < opt_err)
{ {
@ -750,7 +751,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
orig_indices[i] = temp_indices0[i]; orig_indices[i] = temp_indices0[i];
assert (orig_indices[i] != -1); nvAssert (orig_indices[i] != -1);
} }
first = false; first = false;
} }
@ -775,10 +776,10 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
} }
// this will return a valid set of endpoints in opt_endpts regardless of whether it improve orig_endpts or not // this will return a valid set of endpoints in opt_endpts regardless of whether it improve orig_endpts or not
static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_err[NREGIONS], static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS],
const IntEndptsRGB_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, double opt_err[NREGIONS], IntEndptsRGB_2 opt_endpts[NREGIONS]) const IntEndptsRGB_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_2 opt_endpts[NREGIONS])
{ {
Vec4 pixels[Tile::TILE_TOTAL]; Vector4 pixels[Tile::TILE_TOTAL];
IntEndptsRGB_2 temp_in, temp_out; IntEndptsRGB_2 temp_in, temp_out;
int temp_indices[Tile::TILE_TOTAL]; int temp_indices[Tile::TILE_TOTAL];
@ -795,7 +796,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_
opt_endpts[region] = temp_in = orig_endpts[region]; opt_endpts[region] = temp_in = orig_endpts[region];
opt_err[region] = orig_err[region]; opt_err[region] = orig_err[region];
double best_err = orig_err[region]; float best_err = orig_err[region];
for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode) for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
{ {
@ -803,12 +804,12 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_
temp_in.b_lsb = (lsbmode >> 1) & 1; temp_in.b_lsb = (lsbmode >> 1) & 1;
// make sure we have a valid error for temp_in // make sure we have a valid error for temp_in
// we use DBL_MAX here because we want an accurate temp_in_err, no shortcuts // we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the DBL_MAX position) // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
double temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], DBL_MAX, temp_indices); float temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
// now try to optimize these endpoints // now try to optimize these endpoints
double temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
// if we find an improvement, update the best so far and correct the output endpoints and errors // if we find an improvement, update the best so far and correct the output endpoints and errors
if (temp_out_err < best_err) if (temp_out_err < best_err)
@ -838,9 +839,9 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_
emit compressed block with original data // to try to preserve maximum endpoint precision emit compressed block with original data // to try to preserve maximum endpoint precision
*/ */
static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block) static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
{ {
double orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
IntEndptsRGB_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS]; IntEndptsRGB_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W]; int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
@ -859,8 +860,9 @@ static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endp
transform_inverse(orig_endpts); transform_inverse(orig_endpts);
optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts); optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err); assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
for (int i=0; i<NREGIONS; ++i) // (nreed) Commented out asserts because they go off all the time...not sure why
assert(expected_opt_err[i] == opt_err[i]); //for (int i=0; i<NREGIONS; ++i)
// nvAssert(expected_opt_err[i] == opt_err[i]);
swap_indices(opt_endpts, opt_indices, shapeindex_best); swap_indices(opt_endpts, opt_indices, shapeindex_best);
if (patterns[sp].transformed) if (patterns[sp].transformed)
transform_forward(opt_endpts); transform_forward(opt_endpts);
@ -885,40 +887,40 @@ static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endp
throw "No candidate found, should never happen (avpcl mode 3)."; throw "No candidate found, should never happen (avpcl mode 3).";
} }
static void clamp(Vec4 &v) static void clamp(Vector4 &v)
{ {
if (v.X() < RGBA_MIN) v.X() = RGBA_MIN; if (v.x < 0.0f) v.x = 0.0f;
if (v.X() > RGBA_MAX) v.X() = RGBA_MAX; if (v.x > 255.0f) v.x = 255.0f;
if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; if (v.y < 0.0f) v.y = 0.0f;
if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; if (v.y > 255.0f) v.y = 255.0f;
if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; if (v.z < 0.0f) v.z = 0.0f;
if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; if (v.z > 255.0f) v.z = 255.0f;
v.W() = RGBA_MAX; v.w = 255.0f;
} }
static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vec4 palette[NREGIONS][NINDICES]) static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
{ {
for (int region = 0; region < NREGIONS; ++region) for (int region = 0; region < NREGIONS; ++region)
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, 0.0, float(DENOM)); palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
} }
// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
{ {
// build list of possibles // build list of possibles
Vec4 palette[NREGIONS][NINDICES]; Vector4 palette[NREGIONS][NINDICES];
generate_palette_unquantized(endpts, palette); generate_palette_unquantized(endpts, palette);
double toterr = 0; float toterr = 0;
Vec4 err; Vector4 err;
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
{ {
int region = REGION(x,y,shapeindex); int region = REGION(x,y,shapeindex);
double err, besterr = DBL_MAX; float err, besterr = FLT_MAX;
for (int i = 0; i < NINDICES && besterr > 0; ++i) for (int i = 0; i < NINDICES && besterr > 0; ++i)
{ {
@ -934,19 +936,21 @@ static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpt
return toterr; return toterr;
} }
static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
{ {
for (int region=0; region<NREGIONS; ++region) for (int region=0; region<NREGIONS; ++region)
{ {
int np = 0; int np = 0;
Vec4 colors[Tile::TILE_TOTAL]; Vector3 colors[Tile::TILE_TOTAL];
Vec4 mean(0,0,0,0); float alphas[2];
Vector4 mean(0,0,0,0);
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
if (REGION(x,y,shapeindex) == region) if (REGION(x,y,shapeindex) == region)
{ {
colors[np] = tile.data[y][x]; colors[np] = tile.data[y][x].xyz();
if (np < 2) alphas[np] = tile.data[y][x].w;
mean += tile.data[y][x]; mean += tile.data[y][x];
++np; ++np;
} }
@ -954,54 +958,40 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]
// handle simple cases // handle simple cases
if (np == 0) if (np == 0)
{ {
Vec4 zero(0,0,0,RGBA_MAX); Vector4 zero(0,0,0,255.0f);
endpts[region].A = zero; endpts[region].A = zero;
endpts[region].B = zero; endpts[region].B = zero;
continue; continue;
} }
else if (np == 1) else if (np == 1)
{ {
endpts[region].A = colors[0]; endpts[region].A = Vector4(colors[0], alphas[0]);
endpts[region].B = colors[0]; endpts[region].B = Vector4(colors[0], alphas[0]);
continue; continue;
} }
else if (np == 2) else if (np == 2)
{ {
endpts[region].A = colors[0]; endpts[region].A = Vector4(colors[0], alphas[0]);
endpts[region].B = colors[1]; endpts[region].B = Vector4(colors[1], alphas[1]);
continue; continue;
} }
Matrix rdq(np, 3);
mean /= float(np); mean /= float(np);
// only look at RGB' ignore A Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
for (int i = 0; i < np; ++i)
{
rdq(i,0) = colors[i].X() - mean.X();
rdq(i,1) = colors[i].Y() - mean.Y();
rdq(i,2) = colors[i].Z() - mean.Z();
}
// perform a singular value decomposition
SVD svd(rdq);
// get the principal component direction (well, the one with the largest weight)
Vec4 direction(svd.R()(0,0), svd.R()(0,1), svd.R()(0,2), 0.0);
// project each pixel value along the principal direction // project each pixel value along the principal direction
double minp = DBL_MAX, maxp = -DBL_MAX; float minp = FLT_MAX, maxp = -FLT_MAX;
for (int i = 0; i < np; i++) for (int i = 0; i < np; i++)
{ {
float dp = rdq(i,0) * direction.X() + rdq(i,1)*direction.Y() + rdq(i,2)*direction.Z(); float dp = dot(colors[i]-mean.xyz(), direction);
if (dp < minp) minp = dp; if (dp < minp) minp = dp;
if (dp > maxp) maxp = dp; if (dp > maxp) maxp = dp;
} }
// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
endpts[region].A = mean + minp*direction; endpts[region].A = mean + minp*Vector4(direction, 0);
endpts[region].B = mean + maxp*direction; endpts[region].B = mean + maxp*Vector4(direction, 0);
// clamp endpoints // clamp endpoints
// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
@ -1013,13 +1003,13 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]
return map_colors(tile, shapeindex, endpts); return map_colors(tile, shapeindex, endpts);
} }
static void swap(double *list1, int *list2, int i, int j) static void swap(float *list1, int *list2, int i, int j)
{ {
double t = list1[i]; list1[i] = list1[j]; list1[j] = t; float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
} }
double AVPCL::compress_mode3(const Tile &t, char *block) float AVPCL::compress_mode3(const Tile &t, char *block)
{ {
// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
@ -1029,10 +1019,10 @@ double AVPCL::compress_mode3(const Tile &t, char *block)
struct { struct {
FltEndpts endpts[NREGIONS]; FltEndpts endpts[NREGIONS];
} all[NSHAPES]; } all[NSHAPES];
double roughmse[NSHAPES]; float roughmse[NSHAPES];
int index[NSHAPES]; int index[NSHAPES];
char tempblock[AVPCL::BLOCKSIZE]; char tempblock[AVPCL::BLOCKSIZE];
double msebest = DBL_MAX; float msebest = FLT_MAX;
for (int i=0; i<NSHAPES; ++i) for (int i=0; i<NSHAPES; ++i)
{ {
@ -1049,7 +1039,7 @@ double AVPCL::compress_mode3(const Tile &t, char *block)
for (int i=0; i<NITEMS && msebest>0; ++i) for (int i=0; i<NITEMS && msebest>0; ++i)
{ {
int shape = index[i]; int shape = index[i];
double mse = refine(t, shape, &all[shape].endpts[0], tempblock); float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
if (mse < msebest) if (mse < msebest)
{ {
memcpy(block, tempblock, sizeof(tempblock)); memcpy(block, tempblock, sizeof(tempblock));

View File

@ -17,15 +17,16 @@ See the License for the specific language governing permissions and limitations
#include "bits.h" #include "bits.h"
#include "tile.h" #include "tile.h"
#include "avpcl.h" #include "avpcl.h"
#include "arvo/Vec4.h" #include "nvcore/Debug.h"
#include "arvo/Matrix.h" #include "nvmath/Vector.inl"
#include "arvo/SVD.h" #include "nvmath/Matrix.inl"
#include "nvmath/Fitting.h"
#include "utils.h" #include "utils.h"
#include "endpts.h" #include "endpts.h"
#include <cstring>
#include <assert.h> using namespace nv;
using namespace AVPCL;
using namespace ArvoMath;
// there are 2 index arrays. INDEXMODE selects between the arrays being 2 & 3 bits or 3 & 2 bits // there are 2 index arrays. INDEXMODE selects between the arrays being 2 & 3 bits or 3 & 2 bits
// array 0 is always the RGB array and array 1 is always the A array // array 0 is always the RGB array and array 1 is always the A array
@ -111,7 +112,7 @@ struct PatternPrec
}; };
// this is the precision for each channel and region // this is the precision for each channel and region
// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! // NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
static PatternPrec pattern_precs[NPATTERNS] = static PatternPrec pattern_precs[NPATTERNS] =
{ {
5,5,5,6, 5,5,5,6, 5,5,5,6, 5,5,5,6,
@ -131,7 +132,7 @@ static int nbits(int n, bool issigned)
} }
else else
{ {
assert (issigned); nvAssert (issigned);
for (nb=0; n<-1; ++nb, n>>=1) ; for (nb=0; n<-1; ++nb, n>>=1) ;
return nb + 1; return nb + 1;
} }
@ -172,15 +173,15 @@ static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec
{ {
for (int region = 0; region < NREGIONS; ++region) for (int region = 0; region < NREGIONS; ++region)
{ {
q_endpts[region].A[0] = Utils::quantize(endpts[region].A.X(), pattern_prec.region_precs[region].endpt_a_prec[0]); q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]);
q_endpts[region].A[1] = Utils::quantize(endpts[region].A.Y(), pattern_prec.region_precs[region].endpt_a_prec[1]); q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]);
q_endpts[region].A[2] = Utils::quantize(endpts[region].A.Z(), pattern_prec.region_precs[region].endpt_a_prec[2]); q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]);
q_endpts[region].A[3] = Utils::quantize(endpts[region].A.W(), pattern_prec.region_precs[region].endpt_a_prec[3]); q_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]);
q_endpts[region].B[0] = Utils::quantize(endpts[region].B.X(), pattern_prec.region_precs[region].endpt_b_prec[0]); q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]);
q_endpts[region].B[1] = Utils::quantize(endpts[region].B.Y(), pattern_prec.region_precs[region].endpt_b_prec[1]); q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]);
q_endpts[region].B[2] = Utils::quantize(endpts[region].B.Z(), pattern_prec.region_precs[region].endpt_b_prec[2]); q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]);
q_endpts[region].B[3] = Utils::quantize(endpts[region].B.W(), pattern_prec.region_precs[region].endpt_b_prec[3]); q_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]);
} }
} }
@ -196,7 +197,7 @@ static void swap_indices(int shapeindex, int indexmode, IntEndptsRGBA endpts[NRE
{ {
int x = index_positions[region] & 3; int x = index_positions[region] & 3;
int y = (index_positions[region] >> 2) & 3; int y = (index_positions[region] >> 2) & 3;
assert(REGION(x,y,shapeindex) == region); // double check the table nvAssert(REGION(x,y,shapeindex) == region); // double check the table
// swap RGB // swap RGB
if (indices[INDEXARRAY_RGB][y][x] & HIGH_INDEXBIT_RGB(indexmode)) if (indices[INDEXARRAY_RGB][y][x] & HIGH_INDEXBIT_RGB(indexmode))
@ -243,7 +244,7 @@ static void write_header(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, c
out.write(endpts[i].A[j], p.chan[j].nbitsizes[0]); out.write(endpts[i].A[j], p.chan[j].nbitsizes[0]);
out.write(endpts[i].B[j], p.chan[j].nbitsizes[1]); out.write(endpts[i].B[j], p.chan[j].nbitsizes[1]);
} }
assert (out.getptr() == 50); nvAssert (out.getptr() == 50);
} }
static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeindex, int &rotatemode, int &indexmode, Pattern &p, int &pat_index) static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeindex, int &rotatemode, int &indexmode, Pattern &p, int &pat_index)
@ -252,8 +253,8 @@ static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeinde
pat_index = 0; pat_index = 0;
assert (pat_index >= 0 && pat_index < NPATTERNS); nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
assert (in.getptr() == patterns[pat_index].modebits); nvAssert (in.getptr() == patterns[pat_index].modebits);
p = patterns[pat_index]; p = patterns[pat_index];
@ -267,7 +268,7 @@ static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeinde
endpts[i].A[j] = in.read(p.chan[j].nbitsizes[0]); endpts[i].A[j] = in.read(p.chan[j].nbitsizes[0]);
endpts[i].B[j] = in.read(p.chan[j].nbitsizes[1]); endpts[i].B[j] = in.read(p.chan[j].nbitsizes[1]);
} }
assert (in.getptr() == 50); nvAssert (in.getptr() == 50);
} }
static void write_indices(const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int shapeindex, int indexmode, Bits &out) static void write_indices(const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int shapeindex, int indexmode, Bits &out)
@ -275,12 +276,12 @@ static void write_indices(const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TI
// the indices we shorten is always index 0 // the indices we shorten is always index 0
// do the 2 bit indices first // do the 2 bit indices first
assert ((indices[INDEXARRAY_2BITS(indexmode)][0][0] & HIGH_INDEXBIT2) == 0); nvAssert ((indices[INDEXARRAY_2BITS(indexmode)][0][0] & HIGH_INDEXBIT2) == 0);
for (int i = 0; i < Tile::TILE_TOTAL; ++i) for (int i = 0; i < Tile::TILE_TOTAL; ++i)
out.write(indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3], INDEXBITS2 - (i==0?1:0)); // write i..[1:0] or i..[0] out.write(indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3], INDEXBITS2 - (i==0?1:0)); // write i..[1:0] or i..[0]
// then the 3 bit indices // then the 3 bit indices
assert ((indices[INDEXARRAY_3BITS(indexmode)][0][0] & HIGH_INDEXBIT3) == 0); nvAssert ((indices[INDEXARRAY_3BITS(indexmode)][0][0] & HIGH_INDEXBIT3) == 0);
for (int i = 0; i < Tile::TILE_TOTAL; ++i) for (int i = 0; i < Tile::TILE_TOTAL; ++i)
out.write(indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3], INDEXBITS3 - (i==0?1:0)); // write i..[2:0] or i..[1:0] out.write(indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3], INDEXBITS3 - (i==0?1:0)); // write i..[2:0] or i..[1:0]
} }
@ -306,10 +307,10 @@ static void emit_block(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, con
write_indices(indices, shapeindex, indexmode, out); write_indices(indices, shapeindex, indexmode, out);
assert(out.getptr() == AVPCL::BITSIZE); nvAssert(out.getptr() == AVPCL::BITSIZE);
} }
static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const RegionPrec &region_prec, int indexmode, Vec3 palette_rgb[NINDICES3], float palette_a[NINDICES3]) static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const RegionPrec &region_prec, int indexmode, Vector3 palette_rgb[NINDICES3], float palette_a[NINDICES3])
{ {
// scale endpoints for RGB // scale endpoints for RGB
int a, b; int a, b;
@ -319,28 +320,28 @@ static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const
// interpolate R // interpolate R
for (int i = 0; i < NINDICES_RGB(indexmode); ++i) for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
palette_rgb[i].X() = PALETTE_LERP(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)); palette_rgb[i].x = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]); a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]);
b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]); b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]);
// interpolate G // interpolate G
for (int i = 0; i < NINDICES_RGB(indexmode); ++i) for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
palette_rgb[i].Y() = PALETTE_LERP(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)); palette_rgb[i].y = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]); a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]);
b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]); b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]);
// interpolate B // interpolate B
for (int i = 0; i < NINDICES_RGB(indexmode); ++i) for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
palette_rgb[i].Z() = PALETTE_LERP(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)); palette_rgb[i].z = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]); a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]);
b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]); b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]);
// interpolate A // interpolate A
for (int i = 0; i < NINDICES_A(indexmode); ++i) for (int i = 0; i < NINDICES_A(indexmode); ++i)
palette_a[i] = PALETTE_LERP(a, b, i, BIAS_A(indexmode), DENOM_A(indexmode)); palette_a[i] = float(Utils::lerp(a, b, i, BIAS_A(indexmode), DENOM_A(indexmode)));
} }
@ -372,10 +373,10 @@ static void rotate_tile(const Tile &in, int rotatemode, Tile &out)
switch(rotatemode) switch(rotatemode)
{ {
case ROTATEMODE_RGBA_RGBA: break; case ROTATEMODE_RGBA_RGBA: break;
case ROTATEMODE_RGBA_AGBR: t = (out.data[y][x]).X(); (out.data[y][x]).X() = (out.data[y][x]).W(); (out.data[y][x]).W() = t; break; case ROTATEMODE_RGBA_AGBR: t = (out.data[y][x]).x; (out.data[y][x]).x = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
case ROTATEMODE_RGBA_RABG: t = (out.data[y][x]).Y(); (out.data[y][x]).Y() = (out.data[y][x]).W(); (out.data[y][x]).W() = t; break; case ROTATEMODE_RGBA_RABG: t = (out.data[y][x]).y; (out.data[y][x]).y = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
case ROTATEMODE_RGBA_RGAB: t = (out.data[y][x]).Z(); (out.data[y][x]).Z() = (out.data[y][x]).W(); (out.data[y][x]).W() = t; break; case ROTATEMODE_RGBA_RGAB: t = (out.data[y][x]).z; (out.data[y][x]).z = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
default: assert(0); default: nvUnreachable();
} }
} }
} }
@ -395,7 +396,7 @@ void AVPCL::decompress_mode4(const char *block, Tile &t)
if (p.transform_mode) if (p.transform_mode)
transform_inverse(p.transform_mode, endpts); transform_inverse(p.transform_mode, endpts);
Vec3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2 Vector3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2
float palette_a[NREGIONS][NINDICES3]; // could be nindices2 float palette_a[NREGIONS][NINDICES3]; // could be nindices2
for (int region = 0; region < NREGIONS; ++region) for (int region = 0; region < NREGIONS; ++region)
@ -405,14 +406,14 @@ void AVPCL::decompress_mode4(const char *block, Tile &t)
read_indices(in, shapeindex, indexmode, indices); read_indices(in, shapeindex, indexmode, indices);
assert(in.getptr() == AVPCL::BITSIZE); nvAssert(in.getptr() == AVPCL::BITSIZE);
Tile temp(t.size_x, t.size_y); Tile temp(t.size_x, t.size_y);
// lookup // lookup
for (int y = 0; y < Tile::TILE_H; y++) for (int y = 0; y < Tile::TILE_H; y++)
for (int x = 0; x < Tile::TILE_W; x++) for (int x = 0; x < Tile::TILE_W; x++)
temp.data[y][x] = Vec4(palette_rgb[REGION(x,y,shapeindex)][indices[INDEXARRAY_RGB][y][x]], palette_a[REGION(x,y,shapeindex)][indices[INDEXARRAY_A][y][x]]); temp.data[y][x] = Vector4(palette_rgb[REGION(x,y,shapeindex)][indices[INDEXARRAY_RGB][y][x]], palette_a[REGION(x,y,shapeindex)][indices[INDEXARRAY_A][y][x]]);
rotate_tile(temp, rotatemode, t); rotate_tile(temp, rotatemode, t);
} }
@ -420,31 +421,31 @@ void AVPCL::decompress_mode4(const char *block, Tile &t)
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
// we already have a candidate mapping when we call this function, thus an error. take an early exit if the accumulated error so far // we already have a candidate mapping when we call this function, thus an error. take an early exit if the accumulated error so far
// exceeds what we already have // exceeds what we already have
static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec &region_prec, double current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) static float map_colors(const Vector4 colors[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec &region_prec, float current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
{ {
Vec3 palette_rgb[NINDICES3]; // could be nindices2 Vector3 palette_rgb[NINDICES3]; // could be nindices2
float palette_a[NINDICES3]; // could be nindices2 float palette_a[NINDICES3]; // could be nindices2
double toterr = 0; float toterr = 0;
generate_palette_quantized_rgb_a(endpts, region_prec, indexmode, &palette_rgb[0], &palette_a[0]); generate_palette_quantized_rgb_a(endpts, region_prec, indexmode, &palette_rgb[0], &palette_a[0]);
Vec3 rgb; Vector3 rgb;
float a; float a;
for (int i = 0; i < np; ++i) for (int i = 0; i < np; ++i)
{ {
double err, besterr; float err, besterr;
float palette_alpha = 0, tile_alpha = 0; float palette_alpha = 0, tile_alpha = 0;
if(AVPCL::flag_premult) if(AVPCL::flag_premult)
tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (colors[i]).X() : tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (colors[i]).x :
(rotatemode == ROTATEMODE_RGBA_RABG) ? (colors[i]).Y() : (rotatemode == ROTATEMODE_RGBA_RABG) ? (colors[i]).y :
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (colors[i]).Z() : (colors[i]).W(); (rotatemode == ROTATEMODE_RGBA_RGAB) ? (colors[i]).z : (colors[i]).w;
rgb.X() = (colors[i]).X(); rgb.x = (colors[i]).x;
rgb.Y() = (colors[i]).Y(); rgb.y = (colors[i]).y;
rgb.Z() = (colors[i]).Z(); rgb.z = (colors[i]).z;
a = (colors[i]).W(); a = (colors[i]).w;
// compute the two indices separately // compute the two indices separately
// if we're doing premultiplied alpha, we need to choose first the index that // if we're doing premultiplied alpha, we need to choose first the index that
@ -453,7 +454,7 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm
if (rotatemode == ROTATEMODE_RGBA_RGBA) if (rotatemode == ROTATEMODE_RGBA_RGBA)
{ {
// do A index first as it has the alpha // do A index first as it has the alpha
besterr = DBL_MAX; besterr = FLT_MAX;
for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j) for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
{ {
err = Utils::metric1(a, palette_a[j], rotatemode); err = Utils::metric1(a, palette_a[j], rotatemode);
@ -470,7 +471,7 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm
toterr += besterr; // squared-error norms are additive since we don't do the square root toterr += besterr; // squared-error norms are additive since we don't do the square root
// do RGB index // do RGB index
besterr = DBL_MAX; besterr = FLT_MAX;
for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j) for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
{ {
err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) : err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) :
@ -493,13 +494,13 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm
indices[INDEXARRAY_RGB][k] = -1; indices[INDEXARRAY_RGB][k] = -1;
indices[INDEXARRAY_A][k] = -1; indices[INDEXARRAY_A][k] = -1;
} }
return DBL_MAX; return FLT_MAX;
} }
} }
else else
{ {
// do RGB index // do RGB index
besterr = DBL_MAX; besterr = FLT_MAX;
int bestindex; int bestindex;
for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j) for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
{ {
@ -515,13 +516,13 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm
indices[INDEXARRAY_RGB][i] = j; indices[INDEXARRAY_RGB][i] = j;
} }
} }
palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).X() : palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).x :
(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).Y() : (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).y :
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).Z() : (assert(0),0); (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : (nvCheckMacro(0),0);
toterr += besterr; toterr += besterr;
// do A index // do A index
besterr = DBL_MAX; besterr = FLT_MAX;
for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j) for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
{ {
err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[j], rotatemode) : err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[j], rotatemode) :
@ -544,7 +545,7 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm
indices[INDEXARRAY_RGB][k] = -1; indices[INDEXARRAY_RGB][k] = -1;
indices[INDEXARRAY_A][k] = -1; indices[INDEXARRAY_A][k] = -1;
} }
return DBL_MAX; return FLT_MAX;
} }
} }
} }
@ -553,9 +554,9 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm
// assign indices given a tile, shape, and quantized endpoints, return toterr for each region // assign indices given a tile, shape, and quantized endpoints, return toterr for each region
static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int indexmode, IntEndptsRGBA endpts[NREGIONS], const PatternPrec &pattern_prec, static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int indexmode, IntEndptsRGBA endpts[NREGIONS], const PatternPrec &pattern_prec,
int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS]) int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
{ {
Vec3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2 Vector3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2
float palette_a[NREGIONS][NINDICES3]; // could be nindices2 float palette_a[NREGIONS][NINDICES3]; // could be nindices2
for (int region = 0; region < NREGIONS; ++region) for (int region = 0; region < NREGIONS; ++region)
@ -564,25 +565,25 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int
toterr[region] = 0; toterr[region] = 0;
} }
Vec3 rgb; Vector3 rgb;
float a; float a;
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
{ {
int region = REGION(x,y,shapeindex); int region = REGION(x,y,shapeindex);
double err, besterr; float err, besterr;
float palette_alpha = 0, tile_alpha = 0; float palette_alpha = 0, tile_alpha = 0;
rgb.X() = (tile.data[y][x]).X(); rgb.x = (tile.data[y][x]).x;
rgb.Y() = (tile.data[y][x]).Y(); rgb.y = (tile.data[y][x]).y;
rgb.Z() = (tile.data[y][x]).Z(); rgb.z = (tile.data[y][x]).z;
a = (tile.data[y][x]).W(); a = (tile.data[y][x]).w;
if(AVPCL::flag_premult) if(AVPCL::flag_premult)
tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (tile.data[y][x]).X() : tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (tile.data[y][x]).x :
(rotatemode == ROTATEMODE_RGBA_RABG) ? (tile.data[y][x]).Y() : (rotatemode == ROTATEMODE_RGBA_RABG) ? (tile.data[y][x]).y :
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (tile.data[y][x]).Z() : (tile.data[y][x]).W(); (rotatemode == ROTATEMODE_RGBA_RGAB) ? (tile.data[y][x]).z : (tile.data[y][x]).w;
// compute the two indices separately // compute the two indices separately
// if we're doing premultiplied alpha, we need to choose first the index that // if we're doing premultiplied alpha, we need to choose first the index that
@ -591,7 +592,7 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int
if (rotatemode == ROTATEMODE_RGBA_RGBA) if (rotatemode == ROTATEMODE_RGBA_RGBA)
{ {
// do A index first as it has the alpha // do A index first as it has the alpha
besterr = DBL_MAX; besterr = FLT_MAX;
for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i) for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
{ {
err = Utils::metric1(a, palette_a[region][i], rotatemode); err = Utils::metric1(a, palette_a[region][i], rotatemode);
@ -608,7 +609,7 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int
toterr[region] += besterr; // squared-error norms are additive since we don't do the square root toterr[region] += besterr; // squared-error norms are additive since we don't do the square root
// do RGB index // do RGB index
besterr = DBL_MAX; besterr = FLT_MAX;
for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i) for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
{ {
err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) : err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) :
@ -627,7 +628,7 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int
else else
{ {
// do RGB index first as it has the alpha // do RGB index first as it has the alpha
besterr = DBL_MAX; besterr = FLT_MAX;
int bestindex; int bestindex;
for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i) for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
{ {
@ -643,13 +644,13 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int
bestindex = i; bestindex = i;
} }
} }
palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).X() : palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).x :
(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).Y() : (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).y :
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).Z() : (assert(0),0); (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : (nvCheckMacro(0),0);
toterr[region] += besterr; toterr[region] += besterr;
// do A index // do A index
besterr = DBL_MAX; besterr = FLT_MAX;
for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i) for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
{ {
err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[region][i], rotatemode) : err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[region][i], rotatemode) :
@ -670,8 +671,8 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
// this function returns either old_err or a value smaller (if it was successful in improving the error) // this function returns either old_err or a value smaller (if it was successful in improving the error)
static double perturb_one(const Vec4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts, static float perturb_one(const Vector4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts,
double old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) float old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
{ {
// we have the old endpoints: old_endpts // we have the old endpoints: old_endpts
// we have the perturbed endpoints: new_endpts // we have the perturbed endpoints: new_endpts
@ -742,10 +743,10 @@ static double perturb_one(const Vec4 colors[], int np, int rotatemode, int index
// if err > 40 6.25% // if err > 40 6.25%
// for np = 16 -- adjust error thresholds as a function of np // for np = 16 -- adjust error thresholds as a function of np
// always ensure endpoint ordering is preserved (no need to overlap the scan) // always ensure endpoint ordering is preserved (no need to overlap the scan)
static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, double orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) static float exhaustive(const Vector4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
{ {
IntEndptsRGBA temp_endpts; IntEndptsRGBA temp_endpts;
double best_err = orig_err; float best_err = orig_err;
int aprec = region_prec.endpt_a_prec[ch]; int aprec = region_prec.endpt_a_prec[ch];
int bprec = region_prec.endpt_b_prec[ch]; int bprec = region_prec.endpt_b_prec[ch];
int good_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; int good_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
@ -755,7 +756,7 @@ static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexm
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
indices[j][i] = -1; indices[j][i] = -1;
double thr_scale = (double)np / (double)Tile::TILE_TOTAL; float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
if (orig_err == 0) return orig_err; if (orig_err == 0) return orig_err;
@ -764,8 +765,8 @@ static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexm
else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
adelta = MAX(adelta, 3); adelta = max(adelta, 3);
bdelta = MAX(bdelta, 3); bdelta = max(bdelta, 3);
#ifdef DISABLE_EXHAUSTIVE #ifdef DISABLE_EXHAUSTIVE
adelta = bdelta = 3; adelta = bdelta = 3;
@ -774,10 +775,10 @@ static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexm
temp_endpts = opt_endpts; temp_endpts = opt_endpts;
// ok figure out the range of A and B // ok figure out the range of A and B
int alow = MAX(0, opt_endpts.A[ch] - adelta); int alow = max(0, opt_endpts.A[ch] - adelta);
int ahigh = MIN((1<<aprec)-1, opt_endpts.A[ch] + adelta); int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
int blow = MAX(0, opt_endpts.B[ch] - bdelta); int blow = max(0, opt_endpts.B[ch] - bdelta);
int bhigh = MIN((1<<bprec)-1, opt_endpts.B[ch] + bdelta); int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
// now there's no need to swap the ordering of A and B // now there's no need to swap the ordering of A and B
bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch]; bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
@ -788,7 +789,7 @@ static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexm
{ {
// keep a <= b // keep a <= b
for (int a = alow; a <= ahigh; ++a) for (int a = alow; a <= ahigh; ++a)
for (int b = MAX(a, blow); b < bhigh; ++b) for (int b = max(a, blow); b < bhigh; ++b)
{ {
temp_endpts.A[ch] = a; temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b; temp_endpts.B[ch] = b;
@ -809,7 +810,7 @@ static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexm
{ {
// keep b <= a // keep b <= a
for (int b = blow; b < bhigh; ++b) for (int b = blow; b < bhigh; ++b)
for (int a = MAX(b, alow); a <= ahigh; ++a) for (int a = max(b, alow); a <= ahigh; ++a)
{ {
temp_endpts.A[ch] = a; temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b; temp_endpts.B[ch] = b;
@ -839,9 +840,9 @@ static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexm
return best_err; return best_err;
} }
static double optimize_one(const Vec4 colors[], int np, int rotatemode, int indexmode, double orig_err, const IntEndptsRGBA &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA &opt_endpts) static float optimize_one(const Vector4 colors[], int np, int rotatemode, int indexmode, float orig_err, const IntEndptsRGBA &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA &opt_endpts)
{ {
double opt_err = orig_err; float opt_err = orig_err;
opt_endpts = orig_endpts; opt_endpts = orig_endpts;
@ -888,7 +889,7 @@ static double optimize_one(const Vec4 colors[], int np, int rotatemode, int inde
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[j][i] = orig_indices[j][i] = temp_indices0[j][i]; new_indices[j][i] = orig_indices[j][i] = temp_indices0[j][i];
assert (orig_indices[j][i] != -1); nvAssert (orig_indices[j][i] != -1);
} }
opt_endpts.A[ch] = new_a.A[ch]; opt_endpts.A[ch] = new_a.A[ch];
@ -904,7 +905,7 @@ static double optimize_one(const Vec4 colors[], int np, int rotatemode, int inde
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[j][i] = orig_indices[j][i] = temp_indices1[j][i]; new_indices[j][i] = orig_indices[j][i] = temp_indices1[j][i];
assert (orig_indices[j][i] != -1); nvAssert (orig_indices[j][i] != -1);
} }
opt_endpts.B[ch] = new_b.B[ch]; opt_endpts.B[ch] = new_b.B[ch];
@ -923,7 +924,7 @@ static double optimize_one(const Vec4 colors[], int np, int rotatemode, int inde
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[j][i] = temp_indices0[j][i]; new_indices[j][i] = temp_indices0[j][i];
assert (orig_indices[j][i] != -1); nvAssert (orig_indices[j][i] != -1);
} }
if (do_b == 0) if (do_b == 0)
@ -948,7 +949,7 @@ static double optimize_one(const Vec4 colors[], int np, int rotatemode, int inde
bool first = true; bool first = true;
for (int ch = 0; ch < NCHANNELS_RGBA; ++ch) for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
{ {
double new_err = exhaustive(colors, np, rotatemode, indexmode, ch, region_prec, opt_err, opt_endpts, temp_indices0); float new_err = exhaustive(colors, np, rotatemode, indexmode, ch, region_prec, opt_err, opt_endpts, temp_indices0);
if (new_err < opt_err) if (new_err < opt_err)
{ {
@ -960,7 +961,7 @@ static double optimize_one(const Vec4 colors[], int np, int rotatemode, int inde
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
orig_indices[j][i] = temp_indices0[j][i]; orig_indices[j][i] = temp_indices0[j][i];
assert (orig_indices[j][i] != -1); nvAssert (orig_indices[j][i] != -1);
} }
first = false; first = false;
} }
@ -984,10 +985,10 @@ static double optimize_one(const Vec4 colors[], int np, int rotatemode, int inde
return opt_err; return opt_err;
} }
static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, int indexmode, const double orig_err[NREGIONS], static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, int indexmode, const float orig_err[NREGIONS],
const IntEndptsRGBA orig_endpts[NREGIONS], const PatternPrec &pattern_prec, double opt_err[NREGIONS], IntEndptsRGBA opt_endpts[NREGIONS]) const IntEndptsRGBA orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA opt_endpts[NREGIONS])
{ {
Vec4 pixels[Tile::TILE_TOTAL]; Vector4 pixels[Tile::TILE_TOTAL];
IntEndptsRGBA temp_in, temp_out; IntEndptsRGBA temp_in, temp_out;
for (int region=0; region<NREGIONS; ++region) for (int region=0; region<NREGIONS; ++region)
@ -1003,14 +1004,14 @@ static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, in
opt_endpts[region] = temp_in = orig_endpts[region]; opt_endpts[region] = temp_in = orig_endpts[region];
opt_err[region] = orig_err[region]; opt_err[region] = orig_err[region];
double best_err = orig_err[region]; float best_err = orig_err[region];
// make sure we have a valid error for temp_in // make sure we have a valid error for temp_in
// we didn't change temp_in, so orig_err[region] is still valid // we didn't change temp_in, so orig_err[region] is still valid
double temp_in_err = orig_err[region]; float temp_in_err = orig_err[region];
// now try to optimize these endpoints // now try to optimize these endpoints
double temp_out_err = optimize_one(pixels, np, rotatemode, indexmode, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); float temp_out_err = optimize_one(pixels, np, rotatemode, indexmode, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
// if we find an improvement, update the best so far and correct the output endpoints and errors // if we find an improvement, update the best so far and correct the output endpoints and errors
if (temp_out_err < best_err) if (temp_out_err < best_err)
@ -1039,12 +1040,11 @@ static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, in
emit compressed block with original data // to try to preserve maximum endpoint precision emit compressed block with original data // to try to preserve maximum endpoint precision
*/ */
static double refine(const Tile &tile, int shapeindex_best, int rotatemode, int indexmode, const FltEndpts endpts[NREGIONS], char *block) static float refine(const Tile &tile, int shapeindex_best, int rotatemode, int indexmode, const FltEndpts endpts[NREGIONS], char *block)
{ {
double orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
IntEndptsRGBA orig_endpts[NREGIONS], opt_endpts[NREGIONS]; IntEndptsRGBA orig_endpts[NREGIONS], opt_endpts[NREGIONS];
int orig_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], opt_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W]; int orig_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], opt_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
int temp_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
for (int sp = 0; sp < NPATTERNS; ++sp) for (int sp = 0; sp < NPATTERNS; ++sp)
{ {
@ -1066,8 +1066,9 @@ static double refine(const Tile &tile, int shapeindex_best, int rotatemode, int
optimize_endpts(tile, shapeindex_best, rotatemode, indexmode, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts); optimize_endpts(tile, shapeindex_best, rotatemode, indexmode, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
assign_indices(tile, shapeindex_best, rotatemode, indexmode, opt_endpts, pattern_precs[sp], opt_indices, opt_err); assign_indices(tile, shapeindex_best, rotatemode, indexmode, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
for (int i=0; i<NREGIONS; ++i) // (nreed) Commented out asserts because they go off all the time...not sure why
assert(expected_opt_err[i] == opt_err[i]); //for (int i=0; i<NREGIONS; ++i)
// nvAssert(expected_opt_err[i] == opt_err[i]);
swap_indices(shapeindex_best, indexmode, opt_endpts, opt_indices); swap_indices(shapeindex_best, indexmode, opt_endpts, opt_indices);
if (patterns[sp].transform_mode) if (patterns[sp].transform_mode)
@ -1094,16 +1095,16 @@ static double refine(const Tile &tile, int shapeindex_best, int rotatemode, int
throw "No candidate found, should never happen (avpcl mode 4)."; throw "No candidate found, should never happen (avpcl mode 4).";
} }
static void clamp(Vec4 &v) static void clamp(Vector4 &v)
{ {
if (v.X() < RGBA_MIN) v.X() = RGBA_MIN; if (v.x < 0.0f) v.x = 0.0f;
if (v.X() > RGBA_MAX) v.X() = RGBA_MAX; if (v.x > 255.0f) v.x = 255.0f;
if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; if (v.y < 0.0f) v.y = 0.0f;
if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; if (v.y > 255.0f) v.y = 255.0f;
if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; if (v.z < 0.0f) v.z = 0.0f;
if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; if (v.z > 255.0f) v.z = 255.0f;
if (v.W() < RGBA_MIN) v.W() = RGBA_MIN; if (v.w < 0.0f) v.w = 0.0f;
if (v.W() > RGBA_MAX) v.W() = RGBA_MAX; if (v.w > 255.0f) v.w = 255.0f;
} }
// compute initial endpoints for the "RGB" portion and the "A" portion. // compute initial endpoints for the "RGB" portion and the "A" portion.
@ -1113,14 +1114,16 @@ static void rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
for (int region=0; region<NREGIONS; ++region) for (int region=0; region<NREGIONS; ++region)
{ {
int np = 0; int np = 0;
Vec4 colors[Tile::TILE_TOTAL]; Vector3 colors[Tile::TILE_TOTAL];
Vec4 mean(0,0,0,0); float alphas[Tile::TILE_TOTAL];
Vector4 mean(0,0,0,0);
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
if (REGION(x,y,shapeindex) == region) if (REGION(x,y,shapeindex) == region)
{ {
colors[np] = tile.data[y][x]; colors[np] = tile.data[y][x].xyz();
alphas[np] = tile.data[y][x].w;
mean += tile.data[y][x]; mean += tile.data[y][x];
++np; ++np;
} }
@ -1128,76 +1131,59 @@ static void rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
// handle simple cases // handle simple cases
if (np == 0) if (np == 0)
{ {
Vec4 zero(0,0,0,RGBA_MAX); Vector4 zero(0,0,0,255.0f);
endpts[region].A = zero; endpts[region].A = zero;
endpts[region].B = zero; endpts[region].B = zero;
continue; continue;
} }
else if (np == 1) else if (np == 1)
{ {
endpts[region].A = colors[0]; endpts[region].A = Vector4(colors[0], alphas[0]);
endpts[region].B = colors[0]; endpts[region].B = Vector4(colors[0], alphas[0]);
continue; continue;
} }
else if (np == 2) else if (np == 2)
{ {
endpts[region].A = colors[0]; endpts[region].A = Vector4(colors[0], alphas[0]);
endpts[region].B = colors[1]; endpts[region].B = Vector4(colors[1], alphas[1]);
continue; continue;
} }
Matrix rdq(np, 3);
float alpha[Tile::TILE_TOTAL];
mean /= float(np); mean /= float(np);
for (int i = 0; i < np; ++i) Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
{
rdq(i,0) = colors[i].X() - mean.X();
rdq(i,1) = colors[i].Y() - mean.Y();
rdq(i,2) = colors[i].Z() - mean.Z();
alpha[i] = colors[i].W() - mean.W();
}
// perform a singular value decomposition
SVD svd(rdq);
// get the principal component direction (the one with the largest weight)
// hack the alpha channel
Vec4 direction(svd.R()(0,0), svd.R()(0,1), svd.R()(0,2), 0.0);
// project each pixel value along the principal direction // project each pixel value along the principal direction
double minp = DBL_MAX, maxp = -DBL_MAX; float minp = FLT_MAX, maxp = -FLT_MAX;
double mina = DBL_MAX, maxa = -DBL_MAX; float mina = FLT_MAX, maxa = -FLT_MAX;
for (int i = 0; i < np; i++) for (int i = 0; i < np; i++)
{ {
float dp = rdq(i,0) * direction.X() + rdq(i,1)*direction.Y() + rdq(i,2)*direction.Z(); float dp = dot(colors[i]-mean.xyz(), direction);
if (dp < minp) minp = dp; if (dp < minp) minp = dp;
if (dp > maxp) maxp = dp; if (dp > maxp) maxp = dp;
dp = alpha[i]; dp = alphas[i] - mean.w;
if (dp < mina) mina = dp; if (dp < mina) mina = dp;
if (dp > maxa) maxa = dp; if (dp > maxa) maxa = dp;
} }
// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
endpts[region].A = mean + minp*direction; endpts[region].A = mean + Vector4(minp*direction, mina);
endpts[region].B = mean + maxp*direction; endpts[region].B = mean + Vector4(maxp*direction, maxa);
endpts[region].A.W() = mean.W() + mina;
endpts[region].B.W() = mean.W() + maxa;
// clamp endpoints // clamp endpoints
// WORK: is [0,255] the right range, or should it be [0,255.5) or even [0,256) ? // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
// shape based on endpoints being clamped
clamp(endpts[region].A); clamp(endpts[region].A);
clamp(endpts[region].B); clamp(endpts[region].B);
} }
} }
double AVPCL::compress_mode4(const Tile &t, char *block) float AVPCL::compress_mode4(const Tile &t, char *block)
{ {
FltEndpts endpts[NREGIONS]; FltEndpts endpts[NREGIONS];
char tempblock[AVPCL::BLOCKSIZE]; char tempblock[AVPCL::BLOCKSIZE];
double msebest = DBL_MAX; float msebest = FLT_MAX;
int shape = 0; int shape = 0;
Tile t1; Tile t1;
@ -1208,7 +1194,7 @@ double AVPCL::compress_mode4(const Tile &t, char *block)
rough(t1, shape, endpts); rough(t1, shape, endpts);
for (int i = 0; i < NINDEXMODES && msebest > 0; ++i) for (int i = 0; i < NINDEXMODES && msebest > 0; ++i)
{ {
double mse = refine(t1, shape, r, i, endpts, tempblock); float mse = refine(t1, shape, r, i, endpts, tempblock);
if (mse < msebest) if (mse < msebest)
{ {
memcpy(block, tempblock, sizeof(tempblock)); memcpy(block, tempblock, sizeof(tempblock));

View File

@ -17,15 +17,16 @@ See the License for the specific language governing permissions and limitations
#include "bits.h" #include "bits.h"
#include "tile.h" #include "tile.h"
#include "avpcl.h" #include "avpcl.h"
#include "arvo/Vec4.h" #include "nvcore/Debug.h"
#include "arvo/Matrix.h" #include "nvmath/Vector.inl"
#include "arvo/SVD.h" #include "nvmath/Matrix.inl"
#include "nvmath/Fitting.h"
#include "utils.h" #include "utils.h"
#include "endpts.h" #include "endpts.h"
#include <cstring>
#include <assert.h> using namespace nv;
using namespace AVPCL;
using namespace ArvoMath;
// there are 2 index arrays. INDEXMODE selects between the arrays being 2 & 3 bits or 3 & 2 bits // there are 2 index arrays. INDEXMODE selects between the arrays being 2 & 3 bits or 3 & 2 bits
// array 0 is always the RGB array and array 1 is always the A array // array 0 is always the RGB array and array 1 is always the A array
@ -111,7 +112,7 @@ struct PatternPrec
}; };
// this is the precision for each channel and region // this is the precision for each channel and region
// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! // NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
static PatternPrec pattern_precs[NPATTERNS] = static PatternPrec pattern_precs[NPATTERNS] =
{ {
7,7,7,8, 7,7,7,8, 7,7,7,8, 7,7,7,8,
@ -131,7 +132,7 @@ static int nbits(int n, bool issigned)
} }
else else
{ {
assert (issigned); nvAssert (issigned);
for (nb=0; n<-1; ++nb, n>>=1) ; for (nb=0; n<-1; ++nb, n>>=1) ;
return nb + 1; return nb + 1;
} }
@ -172,15 +173,15 @@ static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec
{ {
for (int region = 0; region < NREGIONS; ++region) for (int region = 0; region < NREGIONS; ++region)
{ {
q_endpts[region].A[0] = Utils::quantize(endpts[region].A.X(), pattern_prec.region_precs[region].endpt_a_prec[0]); q_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]);
q_endpts[region].A[1] = Utils::quantize(endpts[region].A.Y(), pattern_prec.region_precs[region].endpt_a_prec[1]); q_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]);
q_endpts[region].A[2] = Utils::quantize(endpts[region].A.Z(), pattern_prec.region_precs[region].endpt_a_prec[2]); q_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]);
q_endpts[region].A[3] = Utils::quantize(endpts[region].A.W(), pattern_prec.region_precs[region].endpt_a_prec[3]); q_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]);
q_endpts[region].B[0] = Utils::quantize(endpts[region].B.X(), pattern_prec.region_precs[region].endpt_b_prec[0]); q_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]);
q_endpts[region].B[1] = Utils::quantize(endpts[region].B.Y(), pattern_prec.region_precs[region].endpt_b_prec[1]); q_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]);
q_endpts[region].B[2] = Utils::quantize(endpts[region].B.Z(), pattern_prec.region_precs[region].endpt_b_prec[2]); q_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]);
q_endpts[region].B[3] = Utils::quantize(endpts[region].B.W(), pattern_prec.region_precs[region].endpt_b_prec[3]); q_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]);
} }
} }
@ -196,7 +197,7 @@ static void swap_indices(int shapeindex, int indexmode, IntEndptsRGBA endpts[NRE
{ {
int x = index_positions[region] & 3; int x = index_positions[region] & 3;
int y = (index_positions[region] >> 2) & 3; int y = (index_positions[region] >> 2) & 3;
assert(REGION(x,y,shapeindex) == region); // double check the table nvAssert(REGION(x,y,shapeindex) == region); // double check the table
// swap RGB // swap RGB
if (indices[INDEXARRAY_RGB][y][x] & HIGH_INDEXBIT_RGB(indexmode)) if (indices[INDEXARRAY_RGB][y][x] & HIGH_INDEXBIT_RGB(indexmode))
@ -243,7 +244,7 @@ static void write_header(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, c
out.write(endpts[i].A[j], p.chan[j].nbitsizes[0]); out.write(endpts[i].A[j], p.chan[j].nbitsizes[0]);
out.write(endpts[i].B[j], p.chan[j].nbitsizes[1]); out.write(endpts[i].B[j], p.chan[j].nbitsizes[1]);
} }
assert (out.getptr() == 66); nvAssert (out.getptr() == 66);
} }
static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeindex, int &rotatemode, int &indexmode, Pattern &p, int &pat_index) static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeindex, int &rotatemode, int &indexmode, Pattern &p, int &pat_index)
@ -252,8 +253,8 @@ static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeinde
pat_index = 0; pat_index = 0;
assert (pat_index >= 0 && pat_index < NPATTERNS); nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
assert (in.getptr() == patterns[pat_index].modebits); nvAssert (in.getptr() == patterns[pat_index].modebits);
p = patterns[pat_index]; p = patterns[pat_index];
@ -269,7 +270,7 @@ static void read_header(Bits &in, IntEndptsRGBA endpts[NREGIONS], int &shapeinde
endpts[i].A[j] = in.read(p.chan[j].nbitsizes[0]); endpts[i].A[j] = in.read(p.chan[j].nbitsizes[0]);
endpts[i].B[j] = in.read(p.chan[j].nbitsizes[1]); endpts[i].B[j] = in.read(p.chan[j].nbitsizes[1]);
} }
assert (in.getptr() == 66); nvAssert (in.getptr() == 66);
} }
static void write_indices(const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int shapeindex, int indexmode, Bits &out) static void write_indices(const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], int shapeindex, int indexmode, Bits &out)
@ -277,12 +278,12 @@ static void write_indices(const int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TI
// the indices we shorten is always index 0 // the indices we shorten is always index 0
// do the 2 bit indices first // do the 2 bit indices first
assert ((indices[INDEXARRAY_2BITS(indexmode)][0][0] & HIGH_INDEXBIT2) == 0); nvAssert ((indices[INDEXARRAY_2BITS(indexmode)][0][0] & HIGH_INDEXBIT2) == 0);
for (int i = 0; i < Tile::TILE_TOTAL; ++i) for (int i = 0; i < Tile::TILE_TOTAL; ++i)
out.write(indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3], INDEXBITS2 - (i==0?1:0)); // write i..[1:0] or i..[0] out.write(indices[INDEXARRAY_2BITS(indexmode)][i>>2][i&3], INDEXBITS2 - (i==0?1:0)); // write i..[1:0] or i..[0]
// then the 3 bit indices // then the 3 bit indices
assert ((indices[INDEXARRAY_3BITS(indexmode)][0][0] & HIGH_INDEXBIT3) == 0); nvAssert ((indices[INDEXARRAY_3BITS(indexmode)][0][0] & HIGH_INDEXBIT3) == 0);
for (int i = 0; i < Tile::TILE_TOTAL; ++i) for (int i = 0; i < Tile::TILE_TOTAL; ++i)
out.write(indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3], INDEXBITS3 - (i==0?1:0)); // write i..[2:0] or i..[1:0] out.write(indices[INDEXARRAY_3BITS(indexmode)][i>>2][i&3], INDEXBITS3 - (i==0?1:0)); // write i..[2:0] or i..[1:0]
} }
@ -308,10 +309,10 @@ static void emit_block(const IntEndptsRGBA endpts[NREGIONS], int shapeindex, con
write_indices(indices, shapeindex, indexmode, out); write_indices(indices, shapeindex, indexmode, out);
assert(out.getptr() == AVPCL::BITSIZE); nvAssert(out.getptr() == AVPCL::BITSIZE);
} }
static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const RegionPrec &region_prec, int indexmode, Vec3 palette_rgb[NINDICES3], float palette_a[NINDICES3]) static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const RegionPrec &region_prec, int indexmode, Vector3 palette_rgb[NINDICES3], float palette_a[NINDICES3])
{ {
// scale endpoints for RGB // scale endpoints for RGB
int a, b; int a, b;
@ -321,28 +322,28 @@ static void generate_palette_quantized_rgb_a(const IntEndptsRGBA &endpts, const
// interpolate R // interpolate R
for (int i = 0; i < NINDICES_RGB(indexmode); ++i) for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
palette_rgb[i].X() = PALETTE_LERP(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)); palette_rgb[i].x = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]); a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]);
b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]); b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]);
// interpolate G // interpolate G
for (int i = 0; i < NINDICES_RGB(indexmode); ++i) for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
palette_rgb[i].Y() = PALETTE_LERP(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)); palette_rgb[i].y = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]); a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]);
b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]); b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]);
// interpolate B // interpolate B
for (int i = 0; i < NINDICES_RGB(indexmode); ++i) for (int i = 0; i < NINDICES_RGB(indexmode); ++i)
palette_rgb[i].Z() = PALETTE_LERP(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)); palette_rgb[i].z = float(Utils::lerp(a, b, i, BIAS_RGB(indexmode), DENOM_RGB(indexmode)));
a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]); a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]);
b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]); b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]);
// interpolate A // interpolate A
for (int i = 0; i < NINDICES_A(indexmode); ++i) for (int i = 0; i < NINDICES_A(indexmode); ++i)
palette_a[i] = PALETTE_LERP(a, b, i, BIAS_A(indexmode), DENOM_A(indexmode)); palette_a[i] = float(Utils::lerp(a, b, i, BIAS_A(indexmode), DENOM_A(indexmode)));
} }
static void sign_extend(Pattern &p, IntEndptsRGBA endpts[NREGIONS]) static void sign_extend(Pattern &p, IntEndptsRGBA endpts[NREGIONS])
@ -373,10 +374,10 @@ static void rotate_tile(const Tile &in, int rotatemode, Tile &out)
switch(rotatemode) switch(rotatemode)
{ {
case ROTATEMODE_RGBA_RGBA: break; case ROTATEMODE_RGBA_RGBA: break;
case ROTATEMODE_RGBA_AGBR: t = (out.data[y][x]).X(); (out.data[y][x]).X() = (out.data[y][x]).W(); (out.data[y][x]).W() = t; break; case ROTATEMODE_RGBA_AGBR: t = (out.data[y][x]).x; (out.data[y][x]).x = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
case ROTATEMODE_RGBA_RABG: t = (out.data[y][x]).Y(); (out.data[y][x]).Y() = (out.data[y][x]).W(); (out.data[y][x]).W() = t; break; case ROTATEMODE_RGBA_RABG: t = (out.data[y][x]).y; (out.data[y][x]).y = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
case ROTATEMODE_RGBA_RGAB: t = (out.data[y][x]).Z(); (out.data[y][x]).Z() = (out.data[y][x]).W(); (out.data[y][x]).W() = t; break; case ROTATEMODE_RGBA_RGAB: t = (out.data[y][x]).z; (out.data[y][x]).z = (out.data[y][x]).w; (out.data[y][x]).w = t; break;
default: assert(0); default: nvUnreachable();
} }
} }
} }
@ -396,7 +397,7 @@ void AVPCL::decompress_mode5(const char *block, Tile &t)
if (p.transform_mode) if (p.transform_mode)
transform_inverse(p.transform_mode, endpts); transform_inverse(p.transform_mode, endpts);
Vec3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2 Vector3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2
float palette_a[NREGIONS][NINDICES3]; // could be nindices2 float palette_a[NREGIONS][NINDICES3]; // could be nindices2
for (int region = 0; region < NREGIONS; ++region) for (int region = 0; region < NREGIONS; ++region)
@ -406,14 +407,14 @@ void AVPCL::decompress_mode5(const char *block, Tile &t)
read_indices(in, shapeindex, indexmode, indices); read_indices(in, shapeindex, indexmode, indices);
assert(in.getptr() == AVPCL::BITSIZE); nvAssert(in.getptr() == AVPCL::BITSIZE);
Tile temp(t.size_x, t.size_y); Tile temp(t.size_x, t.size_y);
// lookup // lookup
for (int y = 0; y < Tile::TILE_H; y++) for (int y = 0; y < Tile::TILE_H; y++)
for (int x = 0; x < Tile::TILE_W; x++) for (int x = 0; x < Tile::TILE_W; x++)
temp.data[y][x] = Vec4(palette_rgb[REGION(x,y,shapeindex)][indices[INDEXARRAY_RGB][y][x]], palette_a[REGION(x,y,shapeindex)][indices[INDEXARRAY_A][y][x]]); temp.data[y][x] = Vector4(palette_rgb[REGION(x,y,shapeindex)][indices[INDEXARRAY_RGB][y][x]], palette_a[REGION(x,y,shapeindex)][indices[INDEXARRAY_A][y][x]]);
rotate_tile(temp, rotatemode, t); rotate_tile(temp, rotatemode, t);
} }
@ -421,31 +422,31 @@ void AVPCL::decompress_mode5(const char *block, Tile &t)
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
// we already have a candidate mapping when we call this function, thus an error. take an early exit if the accumulated error so far // we already have a candidate mapping when we call this function, thus an error. take an early exit if the accumulated error so far
// exceeds what we already have // exceeds what we already have
static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec &region_prec, double current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) static float map_colors(const Vector4 colors[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec &region_prec, float current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
{ {
Vec3 palette_rgb[NINDICES3]; // could be nindices2 Vector3 palette_rgb[NINDICES3]; // could be nindices2
float palette_a[NINDICES3]; // could be nindices2 float palette_a[NINDICES3]; // could be nindices2
double toterr = 0; float toterr = 0;
generate_palette_quantized_rgb_a(endpts, region_prec, indexmode, &palette_rgb[0], &palette_a[0]); generate_palette_quantized_rgb_a(endpts, region_prec, indexmode, &palette_rgb[0], &palette_a[0]);
Vec3 rgb; Vector3 rgb;
float a; float a;
for (int i = 0; i < np; ++i) for (int i = 0; i < np; ++i)
{ {
double err, besterr; float err, besterr;
float palette_alpha = 0, tile_alpha = 0; float palette_alpha = 0, tile_alpha = 0;
if(AVPCL::flag_premult) if(AVPCL::flag_premult)
tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (colors[i]).X() : tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (colors[i]).x :
(rotatemode == ROTATEMODE_RGBA_RABG) ? (colors[i]).Y() : (rotatemode == ROTATEMODE_RGBA_RABG) ? (colors[i]).y :
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (colors[i]).Z() : (colors[i]).W(); (rotatemode == ROTATEMODE_RGBA_RGAB) ? (colors[i]).z : (colors[i]).w;
rgb.X() = (colors[i]).X(); rgb.x = (colors[i]).x;
rgb.Y() = (colors[i]).Y(); rgb.y = (colors[i]).y;
rgb.Z() = (colors[i]).Z(); rgb.z = (colors[i]).z;
a = (colors[i]).W(); a = (colors[i]).w;
// compute the two indices separately // compute the two indices separately
// if we're doing premultiplied alpha, we need to choose first the index that // if we're doing premultiplied alpha, we need to choose first the index that
@ -454,7 +455,7 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm
if (rotatemode == ROTATEMODE_RGBA_RGBA) if (rotatemode == ROTATEMODE_RGBA_RGBA)
{ {
// do A index first as it has the alpha // do A index first as it has the alpha
besterr = DBL_MAX; besterr = FLT_MAX;
for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j) for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
{ {
err = Utils::metric1(a, palette_a[j], rotatemode); err = Utils::metric1(a, palette_a[j], rotatemode);
@ -471,7 +472,7 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm
toterr += besterr; // squared-error norms are additive since we don't do the square root toterr += besterr; // squared-error norms are additive since we don't do the square root
// do RGB index // do RGB index
besterr = DBL_MAX; besterr = FLT_MAX;
for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j) for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
{ {
err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) : err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[j], rotatemode) :
@ -494,13 +495,13 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm
indices[INDEXARRAY_RGB][k] = -1; indices[INDEXARRAY_RGB][k] = -1;
indices[INDEXARRAY_A][k] = -1; indices[INDEXARRAY_A][k] = -1;
} }
return DBL_MAX; return FLT_MAX;
} }
} }
else else
{ {
// do RGB index // do RGB index
besterr = DBL_MAX; besterr = FLT_MAX;
int bestindex; int bestindex;
for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j) for (int j = 0; j < NINDICES_RGB(indexmode) && besterr > 0; ++j)
{ {
@ -516,13 +517,13 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm
indices[INDEXARRAY_RGB][i] = j; indices[INDEXARRAY_RGB][i] = j;
} }
} }
palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).X() : palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).x :
(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).Y() : (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).y :
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).Z() : (assert(0),0); (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : (nvCheckMacro(0),0);
toterr += besterr; toterr += besterr;
// do A index // do A index
besterr = DBL_MAX; besterr = FLT_MAX;
for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j) for (int j = 0; j < NINDICES_A(indexmode) && besterr > 0; ++j)
{ {
err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[j], rotatemode) : err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[j], rotatemode) :
@ -545,7 +546,7 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm
indices[INDEXARRAY_RGB][k] = -1; indices[INDEXARRAY_RGB][k] = -1;
indices[INDEXARRAY_A][k] = -1; indices[INDEXARRAY_A][k] = -1;
} }
return DBL_MAX; return FLT_MAX;
} }
} }
} }
@ -554,9 +555,9 @@ static double map_colors(const Vec4 colors[], int np, int rotatemode, int indexm
// assign indices given a tile, shape, and quantized endpoints, return toterr for each region // assign indices given a tile, shape, and quantized endpoints, return toterr for each region
static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int indexmode, IntEndptsRGBA endpts[NREGIONS], const PatternPrec &pattern_prec, static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int indexmode, IntEndptsRGBA endpts[NREGIONS], const PatternPrec &pattern_prec,
int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS]) int indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
{ {
Vec3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2 Vector3 palette_rgb[NREGIONS][NINDICES3]; // could be nindices2
float palette_a[NREGIONS][NINDICES3]; // could be nindices2 float palette_a[NREGIONS][NINDICES3]; // could be nindices2
for (int region = 0; region < NREGIONS; ++region) for (int region = 0; region < NREGIONS; ++region)
@ -565,25 +566,25 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int
toterr[region] = 0; toterr[region] = 0;
} }
Vec3 rgb; Vector3 rgb;
float a; float a;
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
{ {
int region = REGION(x,y,shapeindex); int region = REGION(x,y,shapeindex);
double err, besterr; float err, besterr;
float palette_alpha = 0, tile_alpha = 0; float palette_alpha = 0, tile_alpha = 0;
rgb.X() = (tile.data[y][x]).X(); rgb.x = (tile.data[y][x]).x;
rgb.Y() = (tile.data[y][x]).Y(); rgb.y = (tile.data[y][x]).y;
rgb.Z() = (tile.data[y][x]).Z(); rgb.z = (tile.data[y][x]).z;
a = (tile.data[y][x]).W(); a = (tile.data[y][x]).w;
if(AVPCL::flag_premult) if(AVPCL::flag_premult)
tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (tile.data[y][x]).X() : tile_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (tile.data[y][x]).x :
(rotatemode == ROTATEMODE_RGBA_RABG) ? (tile.data[y][x]).Y() : (rotatemode == ROTATEMODE_RGBA_RABG) ? (tile.data[y][x]).y :
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (tile.data[y][x]).Z() : (tile.data[y][x]).W(); (rotatemode == ROTATEMODE_RGBA_RGAB) ? (tile.data[y][x]).z : (tile.data[y][x]).w;
// compute the two indices separately // compute the two indices separately
// if we're doing premultiplied alpha, we need to choose first the index that // if we're doing premultiplied alpha, we need to choose first the index that
@ -592,7 +593,7 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int
if (rotatemode == ROTATEMODE_RGBA_RGBA) if (rotatemode == ROTATEMODE_RGBA_RGBA)
{ {
// do A index first as it has the alpha // do A index first as it has the alpha
besterr = DBL_MAX; besterr = FLT_MAX;
for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i) for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
{ {
err = Utils::metric1(a, palette_a[region][i], rotatemode); err = Utils::metric1(a, palette_a[region][i], rotatemode);
@ -609,7 +610,7 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int
toterr[region] += besterr; // squared-error norms are additive since we don't do the square root toterr[region] += besterr; // squared-error norms are additive since we don't do the square root
// do RGB index // do RGB index
besterr = DBL_MAX; besterr = FLT_MAX;
for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i) for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
{ {
err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) : err = !AVPCL::flag_premult ? Utils::metric3(rgb, palette_rgb[region][i], rotatemode) :
@ -628,7 +629,7 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int
else else
{ {
// do RGB index first as it has the alpha // do RGB index first as it has the alpha
besterr = DBL_MAX; besterr = FLT_MAX;
int bestindex; int bestindex;
for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i) for (int i = 0; i < NINDICES_RGB(indexmode) && besterr > 0; ++i)
{ {
@ -644,13 +645,13 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int
bestindex = i; bestindex = i;
} }
} }
palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).X() : palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).x :
(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).Y() : (rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).y :
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).Z() : (assert(0),0); (rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : (nvCheckMacro(0),0);
toterr[region] += besterr; toterr[region] += besterr;
// do A index // do A index
besterr = DBL_MAX; besterr = FLT_MAX;
for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i) for (int i = 0; i < NINDICES_A(indexmode) && besterr > 0; ++i)
{ {
err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[region][i], rotatemode) : err = !AVPCL::flag_premult ? Utils::metric1(a, palette_a[region][i], rotatemode) :
@ -671,8 +672,8 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
// this function returns either old_err or a value smaller (if it was successful in improving the error) // this function returns either old_err or a value smaller (if it was successful in improving the error)
static double perturb_one(const Vec4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts, static float perturb_one(const Vector4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts,
double old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) float old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
{ {
// we have the old endpoints: old_endpts // we have the old endpoints: old_endpts
// we have the perturbed endpoints: new_endpts // we have the perturbed endpoints: new_endpts
@ -743,10 +744,10 @@ static double perturb_one(const Vec4 colors[], int np, int rotatemode, int index
// if err > 40 6.25% // if err > 40 6.25%
// for np = 16 -- adjust error thresholds as a function of np // for np = 16 -- adjust error thresholds as a function of np
// always ensure endpoint ordering is preserved (no need to overlap the scan) // always ensure endpoint ordering is preserved (no need to overlap the scan)
static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, double orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL]) static float exhaustive(const Vector4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
{ {
IntEndptsRGBA temp_endpts; IntEndptsRGBA temp_endpts;
double best_err = orig_err; float best_err = orig_err;
int aprec = region_prec.endpt_a_prec[ch]; int aprec = region_prec.endpt_a_prec[ch];
int bprec = region_prec.endpt_b_prec[ch]; int bprec = region_prec.endpt_b_prec[ch];
int good_indices[NINDEXARRAYS][Tile::TILE_TOTAL]; int good_indices[NINDEXARRAYS][Tile::TILE_TOTAL];
@ -756,7 +757,7 @@ static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexm
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
indices[j][i] = -1; indices[j][i] = -1;
double thr_scale = (double)np / (double)Tile::TILE_TOTAL; float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
if (orig_err == 0) return orig_err; if (orig_err == 0) return orig_err;
@ -765,8 +766,8 @@ static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexm
else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
adelta = MAX(adelta, 3); adelta = max(adelta, 3);
bdelta = MAX(bdelta, 3); bdelta = max(bdelta, 3);
#ifdef DISABLE_EXHAUSTIVE #ifdef DISABLE_EXHAUSTIVE
adelta = bdelta = 3; adelta = bdelta = 3;
@ -775,10 +776,10 @@ static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexm
temp_endpts = opt_endpts; temp_endpts = opt_endpts;
// ok figure out the range of A and B // ok figure out the range of A and B
int alow = MAX(0, opt_endpts.A[ch] - adelta); int alow = max(0, opt_endpts.A[ch] - adelta);
int ahigh = MIN((1<<aprec)-1, opt_endpts.A[ch] + adelta); int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
int blow = MAX(0, opt_endpts.B[ch] - bdelta); int blow = max(0, opt_endpts.B[ch] - bdelta);
int bhigh = MIN((1<<bprec)-1, opt_endpts.B[ch] + bdelta); int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
// now there's no need to swap the ordering of A and B // now there's no need to swap the ordering of A and B
bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch]; bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
@ -789,7 +790,7 @@ static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexm
{ {
// keep a <= b // keep a <= b
for (int a = alow; a <= ahigh; ++a) for (int a = alow; a <= ahigh; ++a)
for (int b = MAX(a, blow); b < bhigh; ++b) for (int b = max(a, blow); b < bhigh; ++b)
{ {
temp_endpts.A[ch] = a; temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b; temp_endpts.B[ch] = b;
@ -810,7 +811,7 @@ static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexm
{ {
// keep b <= a // keep b <= a
for (int b = blow; b < bhigh; ++b) for (int b = blow; b < bhigh; ++b)
for (int a = MAX(b, alow); a <= ahigh; ++a) for (int a = max(b, alow); a <= ahigh; ++a)
{ {
temp_endpts.A[ch] = a; temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b; temp_endpts.B[ch] = b;
@ -840,9 +841,9 @@ static double exhaustive(const Vec4 colors[], int np, int rotatemode, int indexm
return best_err; return best_err;
} }
static double optimize_one(const Vec4 colors[], int np, int rotatemode, int indexmode, double orig_err, const IntEndptsRGBA &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA &opt_endpts) static float optimize_one(const Vector4 colors[], int np, int rotatemode, int indexmode, float orig_err, const IntEndptsRGBA &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA &opt_endpts)
{ {
double opt_err = orig_err; float opt_err = orig_err;
opt_endpts = orig_endpts; opt_endpts = orig_endpts;
@ -889,7 +890,7 @@ static double optimize_one(const Vec4 colors[], int np, int rotatemode, int inde
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[j][i] = orig_indices[j][i] = temp_indices0[j][i]; new_indices[j][i] = orig_indices[j][i] = temp_indices0[j][i];
assert (orig_indices[j][i] != -1); nvAssert (orig_indices[j][i] != -1);
} }
opt_endpts.A[ch] = new_a.A[ch]; opt_endpts.A[ch] = new_a.A[ch];
@ -905,7 +906,7 @@ static double optimize_one(const Vec4 colors[], int np, int rotatemode, int inde
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[j][i] = orig_indices[j][i] = temp_indices1[j][i]; new_indices[j][i] = orig_indices[j][i] = temp_indices1[j][i];
assert (orig_indices[j][i] != -1); nvAssert (orig_indices[j][i] != -1);
} }
opt_endpts.B[ch] = new_b.B[ch]; opt_endpts.B[ch] = new_b.B[ch];
@ -924,7 +925,7 @@ static double optimize_one(const Vec4 colors[], int np, int rotatemode, int inde
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[j][i] = temp_indices0[j][i]; new_indices[j][i] = temp_indices0[j][i];
assert (orig_indices[j][i] != -1); nvAssert (orig_indices[j][i] != -1);
} }
if (do_b == 0) if (do_b == 0)
@ -949,7 +950,7 @@ static double optimize_one(const Vec4 colors[], int np, int rotatemode, int inde
bool first = true; bool first = true;
for (int ch = 0; ch < NCHANNELS_RGBA; ++ch) for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
{ {
double new_err = exhaustive(colors, np, rotatemode, indexmode, ch, region_prec, opt_err, opt_endpts, temp_indices0); float new_err = exhaustive(colors, np, rotatemode, indexmode, ch, region_prec, opt_err, opt_endpts, temp_indices0);
if (new_err < opt_err) if (new_err < opt_err)
{ {
@ -961,7 +962,7 @@ static double optimize_one(const Vec4 colors[], int np, int rotatemode, int inde
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
orig_indices[j][i] = temp_indices0[j][i]; orig_indices[j][i] = temp_indices0[j][i];
assert (orig_indices[j][i] != -1); nvAssert (orig_indices[j][i] != -1);
} }
first = false; first = false;
} }
@ -985,10 +986,10 @@ static double optimize_one(const Vec4 colors[], int np, int rotatemode, int inde
return opt_err; return opt_err;
} }
static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, int indexmode, const double orig_err[NREGIONS], static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, int indexmode, const float orig_err[NREGIONS],
const IntEndptsRGBA orig_endpts[NREGIONS], const PatternPrec &pattern_prec, double opt_err[NREGIONS], IntEndptsRGBA opt_endpts[NREGIONS]) const IntEndptsRGBA orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA opt_endpts[NREGIONS])
{ {
Vec4 pixels[Tile::TILE_TOTAL]; Vector4 pixels[Tile::TILE_TOTAL];
IntEndptsRGBA temp_in, temp_out; IntEndptsRGBA temp_in, temp_out;
for (int region=0; region<NREGIONS; ++region) for (int region=0; region<NREGIONS; ++region)
@ -1004,14 +1005,14 @@ static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, in
opt_endpts[region] = temp_in = orig_endpts[region]; opt_endpts[region] = temp_in = orig_endpts[region];
opt_err[region] = orig_err[region]; opt_err[region] = orig_err[region];
double best_err = orig_err[region]; float best_err = orig_err[region];
// make sure we have a valid error for temp_in // make sure we have a valid error for temp_in
// we didn't change temp_in, so orig_err[region] is still valid // we didn't change temp_in, so orig_err[region] is still valid
double temp_in_err = orig_err[region]; float temp_in_err = orig_err[region];
// now try to optimize these endpoints // now try to optimize these endpoints
double temp_out_err = optimize_one(pixels, np, rotatemode, indexmode, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); float temp_out_err = optimize_one(pixels, np, rotatemode, indexmode, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
// if we find an improvement, update the best so far and correct the output endpoints and errors // if we find an improvement, update the best so far and correct the output endpoints and errors
if (temp_out_err < best_err) if (temp_out_err < best_err)
@ -1040,12 +1041,11 @@ static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, in
emit compressed block with original data // to try to preserve maximum endpoint precision emit compressed block with original data // to try to preserve maximum endpoint precision
*/ */
static double refine(const Tile &tile, int shapeindex_best, int rotatemode, int indexmode, const FltEndpts endpts[NREGIONS], char *block) static float refine(const Tile &tile, int shapeindex_best, int rotatemode, int indexmode, const FltEndpts endpts[NREGIONS], char *block)
{ {
double orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
IntEndptsRGBA orig_endpts[NREGIONS], opt_endpts[NREGIONS]; IntEndptsRGBA orig_endpts[NREGIONS], opt_endpts[NREGIONS];
int orig_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], opt_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W]; int orig_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W], opt_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
int temp_indices[NINDEXARRAYS][Tile::TILE_H][Tile::TILE_W];
for (int sp = 0; sp < NPATTERNS; ++sp) for (int sp = 0; sp < NPATTERNS; ++sp)
{ {
@ -1067,8 +1067,9 @@ static double refine(const Tile &tile, int shapeindex_best, int rotatemode, int
optimize_endpts(tile, shapeindex_best, rotatemode, indexmode, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts); optimize_endpts(tile, shapeindex_best, rotatemode, indexmode, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
assign_indices(tile, shapeindex_best, rotatemode, indexmode, opt_endpts, pattern_precs[sp], opt_indices, opt_err); assign_indices(tile, shapeindex_best, rotatemode, indexmode, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
for (int i=0; i<NREGIONS; ++i) // (nreed) Commented out asserts because they go off all the time...not sure why
assert(expected_opt_err[i] == opt_err[i]); //for (int i=0; i<NREGIONS; ++i)
// nvAssert(expected_opt_err[i] == opt_err[i]);
swap_indices(shapeindex_best, indexmode, opt_endpts, opt_indices); swap_indices(shapeindex_best, indexmode, opt_endpts, opt_indices);
if (patterns[sp].transform_mode) if (patterns[sp].transform_mode)
@ -1095,16 +1096,16 @@ static double refine(const Tile &tile, int shapeindex_best, int rotatemode, int
throw "No candidate found, should never happen (avpcl mode 5)."; throw "No candidate found, should never happen (avpcl mode 5).";
} }
static void clamp(Vec4 &v) static void clamp(Vector4 &v)
{ {
if (v.X() < RGBA_MIN) v.X() = RGBA_MIN; if (v.x < 0.0f) v.x = 0.0f;
if (v.X() > RGBA_MAX) v.X() = RGBA_MAX; if (v.x > 255.0f) v.x = 255.0f;
if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; if (v.y < 0.0f) v.y = 0.0f;
if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; if (v.y > 255.0f) v.y = 255.0f;
if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; if (v.z < 0.0f) v.z = 0.0f;
if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; if (v.z > 255.0f) v.z = 255.0f;
if (v.W() < RGBA_MIN) v.W() = RGBA_MIN; if (v.w < 0.0f) v.w = 0.0f;
if (v.W() > RGBA_MAX) v.W() = RGBA_MAX; if (v.w > 255.0f) v.w = 255.0f;
} }
// compute initial endpoints for the "RGB" portion and the "A" portion. // compute initial endpoints for the "RGB" portion and the "A" portion.
@ -1114,14 +1115,16 @@ static void rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
for (int region=0; region<NREGIONS; ++region) for (int region=0; region<NREGIONS; ++region)
{ {
int np = 0; int np = 0;
Vec4 colors[Tile::TILE_TOTAL]; Vector3 colors[Tile::TILE_TOTAL];
Vec4 mean(0,0,0,0); float alphas[Tile::TILE_TOTAL];
Vector4 mean(0,0,0,0);
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
if (REGION(x,y,shapeindex) == region) if (REGION(x,y,shapeindex) == region)
{ {
colors[np] = tile.data[y][x]; colors[np] = tile.data[y][x].xyz();
alphas[np] = tile.data[y][x].w;
mean += tile.data[y][x]; mean += tile.data[y][x];
++np; ++np;
} }
@ -1129,76 +1132,59 @@ static void rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
// handle simple cases // handle simple cases
if (np == 0) if (np == 0)
{ {
Vec4 zero(0,0,0,RGBA_MAX); Vector4 zero(0,0,0,255.0f);
endpts[region].A = zero; endpts[region].A = zero;
endpts[region].B = zero; endpts[region].B = zero;
continue; continue;
} }
else if (np == 1) else if (np == 1)
{ {
endpts[region].A = colors[0]; endpts[region].A = Vector4(colors[0], alphas[0]);
endpts[region].B = colors[0]; endpts[region].B = Vector4(colors[0], alphas[0]);
continue; continue;
} }
else if (np == 2) else if (np == 2)
{ {
endpts[region].A = colors[0]; endpts[region].A = Vector4(colors[0], alphas[0]);
endpts[region].B = colors[1]; endpts[region].B = Vector4(colors[1], alphas[1]);
continue; continue;
} }
Matrix rdq(np, 3);
float alpha[Tile::TILE_TOTAL];
mean /= float(np); mean /= float(np);
for (int i = 0; i < np; ++i) Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
{
rdq(i,0) = colors[i].X() - mean.X();
rdq(i,1) = colors[i].Y() - mean.Y();
rdq(i,2) = colors[i].Z() - mean.Z();
alpha[i] = colors[i].W() - mean.W();
}
// perform a singular value decomposition
SVD svd(rdq);
// get the principal component direction (the one with the largest weight)
// hack the alpha channel
Vec4 direction(svd.R()(0,0), svd.R()(0,1), svd.R()(0,2), 0.0);
// project each pixel value along the principal direction // project each pixel value along the principal direction
double minp = DBL_MAX, maxp = -DBL_MAX; float minp = FLT_MAX, maxp = -FLT_MAX;
double mina = DBL_MAX, maxa = -DBL_MAX; float mina = FLT_MAX, maxa = -FLT_MAX;
for (int i = 0; i < np; i++) for (int i = 0; i < np; i++)
{ {
float dp = rdq(i,0) * direction.X() + rdq(i,1)*direction.Y() + rdq(i,2)*direction.Z(); float dp = dot(colors[i]-mean.xyz(), direction);
if (dp < minp) minp = dp; if (dp < minp) minp = dp;
if (dp > maxp) maxp = dp; if (dp > maxp) maxp = dp;
dp = alpha[i]; dp = alphas[i] - mean.w;
if (dp < mina) mina = dp; if (dp < mina) mina = dp;
if (dp > maxa) maxa = dp; if (dp > maxa) maxa = dp;
} }
// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
endpts[region].A = mean + minp*direction; endpts[region].A = mean + Vector4(minp*direction, mina);
endpts[region].B = mean + maxp*direction; endpts[region].B = mean + Vector4(maxp*direction, maxa);
endpts[region].A.W() = mean.W() + mina;
endpts[region].B.W() = mean.W() + maxa;
// clamp endpoints // clamp endpoints
// WORK: is [0,255] the right range, or should it be [0,255.5) or even [0,256) ? // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
// shape based on endpoints being clamped
clamp(endpts[region].A); clamp(endpts[region].A);
clamp(endpts[region].B); clamp(endpts[region].B);
} }
} }
double AVPCL::compress_mode5(const Tile &t, char *block) float AVPCL::compress_mode5(const Tile &t, char *block)
{ {
FltEndpts endpts[NREGIONS]; FltEndpts endpts[NREGIONS];
char tempblock[AVPCL::BLOCKSIZE]; char tempblock[AVPCL::BLOCKSIZE];
double msebest = DBL_MAX; float msebest = FLT_MAX;
int shape = 0; int shape = 0;
Tile t1; Tile t1;
@ -1210,7 +1196,7 @@ double AVPCL::compress_mode5(const Tile &t, char *block)
// for (int i = 0; i < NINDEXMODES && msebest > 0; ++i) // for (int i = 0; i < NINDEXMODES && msebest > 0; ++i)
for (int i = 0; i < 1 && msebest > 0; ++i) for (int i = 0; i < 1 && msebest > 0; ++i)
{ {
double mse = refine(t1, shape, r, i, endpts, tempblock); float mse = refine(t1, shape, r, i, endpts, tempblock);
if (mse < msebest) if (mse < msebest)
{ {
memcpy(block, tempblock, sizeof(tempblock)); memcpy(block, tempblock, sizeof(tempblock));

View File

@ -17,15 +17,17 @@ See the License for the specific language governing permissions and limitations
#include "bits.h" #include "bits.h"
#include "tile.h" #include "tile.h"
#include "avpcl.h" #include "avpcl.h"
#include "arvo/Vec4.h" #include "nvcore/Debug.h"
#include "arvo/Matrix.h" #include "nvmath/Vector.inl"
#include "arvo/SVD.h" #include "nvmath/Matrix.inl"
#include "nvmath/Fitting.h"
#include "utils.h" #include "utils.h"
#include "endpts.h" #include "endpts.h"
#include <cstring>
#include <assert.h>
using namespace ArvoMath; using namespace nv;
using namespace AVPCL;
#define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values #define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values
@ -83,7 +85,7 @@ struct PatternPrec
}; };
// this is the precision for each channel and region // this is the precision for each channel and region
// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! // NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
static PatternPrec pattern_precs[NPATTERNS] = static PatternPrec pattern_precs[NPATTERNS] =
{ {
7,7,7,7, 7,7,7,7, 7,7,7,7, 7,7,7,7,
@ -102,7 +104,7 @@ static int nbits(int n, bool issigned)
} }
else else
{ {
assert (issigned); nvAssert (issigned);
for (nb=0; n<-1; ++nb, n>>=1) ; for (nb=0; n<-1; ++nb, n>>=1) ;
return nb + 1; return nb + 1;
} }
@ -143,7 +145,7 @@ static void compress_one(const IntEndptsRGBA& endpts, IntEndptsRGBA_2& compr_end
// ignore the alpha channel in the count // ignore the alpha channel in the count
onescnt += (j==CHANNEL_A) ? 0 : (endpts.A[j] & 1); onescnt += (j==CHANNEL_A) ? 0 : (endpts.A[j] & 1);
compr_endpts.A[j] = endpts.A[j] >> 1; compr_endpts.A[j] = endpts.A[j] >> 1;
assert (compr_endpts.A[j] < 128); nvAssert (compr_endpts.A[j] < 128);
} }
compr_endpts.a_lsb = onescnt >= 2; compr_endpts.a_lsb = onescnt >= 2;
@ -152,7 +154,7 @@ static void compress_one(const IntEndptsRGBA& endpts, IntEndptsRGBA_2& compr_end
{ {
onescnt += (j==CHANNEL_A) ? 0 : (endpts.B[j] & 1); onescnt += (j==CHANNEL_A) ? 0 : (endpts.B[j] & 1);
compr_endpts.B[j] = endpts.B[j] >> 1; compr_endpts.B[j] = endpts.B[j] >> 1;
assert (compr_endpts.B[j] < 128); nvAssert (compr_endpts.B[j] < 128);
} }
compr_endpts.b_lsb = onescnt >= 2; compr_endpts.b_lsb = onescnt >= 2;
} }
@ -186,15 +188,15 @@ static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec
for (int region = 0; region < NREGIONS; ++region) for (int region = 0; region < NREGIONS; ++region)
{ {
full_endpts[region].A[0] = Utils::quantize(endpts[region].A.X(), pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space
full_endpts[region].A[1] = Utils::quantize(endpts[region].A.Y(), pattern_prec.region_precs[region].endpt_a_prec[1]+1); full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
full_endpts[region].A[2] = Utils::quantize(endpts[region].A.Z(), pattern_prec.region_precs[region].endpt_a_prec[2]+1); full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
full_endpts[region].A[3] = Utils::quantize(endpts[region].A.W(), pattern_prec.region_precs[region].endpt_a_prec[3]+1); full_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]+1);
full_endpts[region].B[0] = Utils::quantize(endpts[region].B.X(), pattern_prec.region_precs[region].endpt_b_prec[0]+1); full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
full_endpts[region].B[1] = Utils::quantize(endpts[region].B.Y(), pattern_prec.region_precs[region].endpt_b_prec[1]+1); full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
full_endpts[region].B[2] = Utils::quantize(endpts[region].B.Z(), pattern_prec.region_precs[region].endpt_b_prec[2]+1); full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
full_endpts[region].B[3] = Utils::quantize(endpts[region].B.W(), pattern_prec.region_precs[region].endpt_b_prec[3]+1); full_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]+1);
compress_one(full_endpts[region], q_endpts[region]); compress_one(full_endpts[region], q_endpts[region]);
} }
@ -212,7 +214,7 @@ static void swap_indices(IntEndptsRGBA_2 endpts[NREGIONS], int indices[Tile::TIL
{ {
int x = index_positions[region] & 3; int x = index_positions[region] & 3;
int y = (index_positions[region] >> 2) & 3; int y = (index_positions[region] >> 2) & 3;
assert(REGION(x,y,shapeindex) == region); // double check the table nvAssert(REGION(x,y,shapeindex) == region); // double check the table
if (indices[y][x] & HIGH_INDEXBIT) if (indices[y][x] & HIGH_INDEXBIT)
{ {
// high bit is set, swap the endpts and indices for this region // high bit is set, swap the endpts and indices for this region
@ -253,7 +255,7 @@ static void write_header(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex,
out.write(endpts[i].b_lsb, 1); out.write(endpts[i].b_lsb, 1);
} }
assert (out.getptr() == 65); nvAssert (out.getptr() == 65);
} }
static void read_header(Bits &in, IntEndptsRGBA_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index) static void read_header(Bits &in, IntEndptsRGBA_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
@ -262,8 +264,8 @@ static void read_header(Bits &in, IntEndptsRGBA_2 endpts[NREGIONS], int &shapein
pat_index = 0; pat_index = 0;
assert (pat_index >= 0 && pat_index < NPATTERNS); nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
assert (in.getptr() == patterns[pat_index].modebits); nvAssert (in.getptr() == patterns[pat_index].modebits);
p = patterns[pat_index]; p = patterns[pat_index];
@ -282,12 +284,12 @@ static void read_header(Bits &in, IntEndptsRGBA_2 endpts[NREGIONS], int &shapein
endpts[i].b_lsb = in.read(1); endpts[i].b_lsb = in.read(1);
} }
assert (in.getptr() == 65); nvAssert (in.getptr() == 65);
} }
static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out) static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
{ {
assert ((indices[0][0] & HIGH_INDEXBIT) == 0); nvAssert ((indices[0][0] & HIGH_INDEXBIT) == 0);
// the index we shorten is always index 0 // the index we shorten is always index 0
for (int i = 0; i < Tile::TILE_TOTAL; ++i) for (int i = 0; i < Tile::TILE_TOTAL; ++i)
@ -320,10 +322,10 @@ static void emit_block(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex, c
write_indices(indices, shapeindex, out); write_indices(indices, shapeindex, out);
assert(out.getptr() == AVPCL::BITSIZE); nvAssert(out.getptr() == AVPCL::BITSIZE);
} }
static void generate_palette_quantized(const IntEndptsRGBA_2 &endpts_2, const RegionPrec &region_prec, Vec4 palette[NINDICES]) static void generate_palette_quantized(const IntEndptsRGBA_2 &endpts_2, const RegionPrec &region_prec, Vector4 palette[NINDICES])
{ {
IntEndptsRGBA endpts; IntEndptsRGBA endpts;
@ -337,28 +339,28 @@ static void generate_palette_quantized(const IntEndptsRGBA_2 &endpts_2, const Re
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].X() = PALETTE_LERP(a, b, i, BIAS, DENOM); palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1);
b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1); b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].Y() = PALETTE_LERP(a, b, i, BIAS, DENOM); palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1);
b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1); b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].Z() = PALETTE_LERP(a, b, i, BIAS, DENOM); palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]+1); a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]+1);
b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]+1); b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]+1);
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].W() = PALETTE_LERP(a, b, i, BIAS, DENOM); palette[i].w = float(Utils::lerp(a, b, i, BIAS, DENOM));
} }
void AVPCL::decompress_mode6(const char *block, Tile &t) void AVPCL::decompress_mode6(const char *block, Tile &t)
@ -371,7 +373,7 @@ void AVPCL::decompress_mode6(const char *block, Tile &t)
read_header(in, endpts, shapeindex, p, pat_index); read_header(in, endpts, shapeindex, p, pat_index);
Vec4 palette[NREGIONS][NINDICES]; Vector4 palette[NREGIONS][NINDICES];
for (int r = 0; r < NREGIONS; ++r) for (int r = 0; r < NREGIONS; ++r)
generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]); generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
@ -379,7 +381,7 @@ void AVPCL::decompress_mode6(const char *block, Tile &t)
read_indices(in, shapeindex, indices); read_indices(in, shapeindex, indices);
assert(in.getptr() == AVPCL::BITSIZE); nvAssert(in.getptr() == AVPCL::BITSIZE);
// lookup // lookup
for (int y = 0; y < Tile::TILE_H; y++) for (int y = 0; y < Tile::TILE_H; y++)
@ -388,17 +390,17 @@ void AVPCL::decompress_mode6(const char *block, Tile &t)
} }
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
static double map_colors(const Vec4 colors[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec &region_prec, double current_err, int indices[Tile::TILE_TOTAL]) static float map_colors(const Vector4 colors[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
{ {
Vec4 palette[NINDICES]; Vector4 palette[NINDICES];
double toterr = 0; float toterr = 0;
Vec4 err; Vector4 err;
generate_palette_quantized(endpts, region_prec, palette); generate_palette_quantized(endpts, region_prec, palette);
for (int i = 0; i < np; ++i) for (int i = 0; i < np; ++i)
{ {
double err, besterr = DBL_MAX; float err, besterr = FLT_MAX;
for (int j = 0; j < NINDICES && besterr > 0; ++j) for (int j = 0; j < NINDICES && besterr > 0; ++j)
{ {
@ -422,7 +424,7 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGBA_2 &end
for (int k = i; k < np; ++k) for (int k = i; k < np; ++k)
indices[k] = -1; indices[k] = -1;
return DBL_MAX; return FLT_MAX;
} }
} }
return toterr; return toterr;
@ -430,10 +432,10 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGBA_2 &end
// assign indices given a tile, shape, and quantized endpoints, return toterr for each region // assign indices given a tile, shape, and quantized endpoints, return toterr for each region
static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 endpts[NREGIONS], const PatternPrec &pattern_prec, static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 endpts[NREGIONS], const PatternPrec &pattern_prec,
int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS]) int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
{ {
// build list of possibles // build list of possibles
Vec4 palette[NREGIONS][NINDICES]; Vector4 palette[NREGIONS][NINDICES];
for (int region = 0; region < NREGIONS; ++region) for (int region = 0; region < NREGIONS; ++region)
{ {
@ -441,13 +443,13 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 end
toterr[region] = 0; toterr[region] = 0;
} }
Vec4 err; Vector4 err;
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
{ {
int region = REGION(x,y,shapeindex); int region = REGION(x,y,shapeindex);
double err, besterr = DBL_MAX; float err, besterr = FLT_MAX;
for (int i = 0; i < NINDICES && besterr > 0; ++i) for (int i = 0; i < NINDICES && besterr > 0; ++i)
{ {
@ -468,8 +470,8 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 end
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
// this function returns either old_err or a value smaller (if it was successful in improving the error) // this function returns either old_err or a value smaller (if it was successful in improving the error)
static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts, static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts,
double old_err, int do_b, int indices[Tile::TILE_TOTAL]) float old_err, int do_b, int indices[Tile::TILE_TOTAL])
{ {
// we have the old endpoints: old_endpts // we have the old endpoints: old_endpts
// we have the perturbed endpoints: new_endpts // we have the perturbed endpoints: new_endpts
@ -539,10 +541,10 @@ static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec
// for np = 16 -- adjust error thresholds as a function of np // for np = 16 -- adjust error thresholds as a function of np
// always ensure endpoint ordering is preserved (no need to overlap the scan) // always ensure endpoint ordering is preserved (no need to overlap the scan)
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices // if orig_err returned from this is less than its input value, then indices[] will contain valid indices
static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &region_prec, double orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL]) static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
{ {
IntEndptsRGBA_2 temp_endpts; IntEndptsRGBA_2 temp_endpts;
double best_err = orig_err; float best_err = orig_err;
int aprec = region_prec.endpt_a_prec[ch]; int aprec = region_prec.endpt_a_prec[ch];
int bprec = region_prec.endpt_b_prec[ch]; int bprec = region_prec.endpt_b_prec[ch];
int good_indices[Tile::TILE_TOTAL]; int good_indices[Tile::TILE_TOTAL];
@ -551,7 +553,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
indices[i] = -1; indices[i] = -1;
double thr_scale = (double)np / (double)Tile::TILE_TOTAL; float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
if (orig_err == 0) return orig_err; if (orig_err == 0) return orig_err;
@ -560,8 +562,8 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
adelta = MAX(adelta, 3); adelta = max(adelta, 3);
bdelta = MAX(bdelta, 3); bdelta = max(bdelta, 3);
#ifdef DISABLE_EXHAUSTIVE #ifdef DISABLE_EXHAUSTIVE
adelta = bdelta = 3; adelta = bdelta = 3;
@ -570,10 +572,10 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
temp_endpts = opt_endpts; temp_endpts = opt_endpts;
// ok figure out the range of A and B // ok figure out the range of A and B
int alow = MAX(0, opt_endpts.A[ch] - adelta); int alow = max(0, opt_endpts.A[ch] - adelta);
int ahigh = MIN((1<<aprec)-1, opt_endpts.A[ch] + adelta); int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
int blow = MAX(0, opt_endpts.B[ch] - bdelta); int blow = max(0, opt_endpts.B[ch] - bdelta);
int bhigh = MIN((1<<bprec)-1, opt_endpts.B[ch] + bdelta); int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
// now there's no need to swap the ordering of A and B // now there's no need to swap the ordering of A and B
bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch]; bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
@ -584,7 +586,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
{ {
// keep a <= b // keep a <= b
for (int a = alow; a <= ahigh; ++a) for (int a = alow; a <= ahigh; ++a)
for (int b = MAX(a, blow); b < bhigh; ++b) for (int b = max(a, blow); b < bhigh; ++b)
{ {
temp_endpts.A[ch] = a; temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b; temp_endpts.B[ch] = b;
@ -604,7 +606,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
{ {
// keep b <= a // keep b <= a
for (int b = blow; b < bhigh; ++b) for (int b = blow; b < bhigh; ++b)
for (int a = MAX(b, alow); a <= ahigh; ++a) for (int a = max(b, alow); a <= ahigh; ++a)
{ {
temp_endpts.A[ch] = a; temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b; temp_endpts.B[ch] = b;
@ -632,9 +634,9 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
return best_err; return best_err;
} }
static double optimize_one(const Vec4 colors[], int np, double orig_err, const IntEndptsRGBA_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA_2 &opt_endpts) static float optimize_one(const Vector4 colors[], int np, float orig_err, const IntEndptsRGBA_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA_2 &opt_endpts)
{ {
double opt_err = orig_err; float opt_err = orig_err;
opt_endpts = orig_endpts; opt_endpts = orig_endpts;
@ -682,7 +684,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[i] = orig_indices[i] = temp_indices0[i]; new_indices[i] = orig_indices[i] = temp_indices0[i];
assert (orig_indices[i] != -1); nvAssert (orig_indices[i] != -1);
} }
opt_endpts.A[ch] = new_a.A[ch]; opt_endpts.A[ch] = new_a.A[ch];
@ -697,7 +699,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[i] = orig_indices[i] = temp_indices1[i]; new_indices[i] = orig_indices[i] = temp_indices1[i];
assert (orig_indices[i] != -1); nvAssert (orig_indices[i] != -1);
} }
opt_endpts.B[ch] = new_b.B[ch]; opt_endpts.B[ch] = new_b.B[ch];
@ -715,7 +717,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[i] = temp_indices0[i]; new_indices[i] = temp_indices0[i];
assert (new_indices[i] != -1); nvAssert (new_indices[i] != -1);
} }
if (do_b == 0) if (do_b == 0)
@ -742,7 +744,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
bool first = true; bool first = true;
for (int ch = 0; ch < NCHANNELS_RGBA; ++ch) for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
{ {
double new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0); float new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
if (new_err < opt_err) if (new_err < opt_err)
{ {
@ -753,7 +755,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
orig_indices[i] = temp_indices0[i]; orig_indices[i] = temp_indices0[i];
assert (orig_indices[i] != -1); nvAssert (orig_indices[i] != -1);
} }
first = false; first = false;
} }
@ -777,10 +779,10 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
return opt_err; return opt_err;
} }
static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_err[NREGIONS], static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS],
IntEndptsRGBA_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, double opt_err[NREGIONS], IntEndptsRGBA_2 opt_endpts[NREGIONS]) IntEndptsRGBA_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA_2 opt_endpts[NREGIONS])
{ {
Vec4 pixels[Tile::TILE_TOTAL]; Vector4 pixels[Tile::TILE_TOTAL];
IntEndptsRGBA_2 temp_in, temp_out; IntEndptsRGBA_2 temp_in, temp_out;
int temp_indices[Tile::TILE_TOTAL]; int temp_indices[Tile::TILE_TOTAL];
@ -797,7 +799,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_
opt_endpts[region] = temp_in = orig_endpts[region]; opt_endpts[region] = temp_in = orig_endpts[region];
opt_err[region] = orig_err[region]; opt_err[region] = orig_err[region];
double best_err = orig_err[region]; float best_err = orig_err[region];
// try all lsb modes as we search for better endpoints // try all lsb modes as we search for better endpoints
for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode) for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
@ -806,12 +808,12 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_
temp_in.b_lsb = (lsbmode >> 1) & 1; temp_in.b_lsb = (lsbmode >> 1) & 1;
// make sure we have a valid error for temp_in // make sure we have a valid error for temp_in
// we use DBL_MAX here because we want an accurate temp_in_err, no shortcuts // we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the DBL_MAX position) // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
double temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], DBL_MAX, temp_indices); float temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
// now try to optimize these endpoints // now try to optimize these endpoints
double temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
// if we find an improvement, update the best so far and correct the output endpoints and errors // if we find an improvement, update the best so far and correct the output endpoints and errors
if (temp_out_err < best_err) if (temp_out_err < best_err)
@ -843,9 +845,9 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_
simplify the above given that there is no transform now and that endpoints will always fit simplify the above given that there is no transform now and that endpoints will always fit
*/ */
static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block) static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
{ {
double orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
IntEndptsRGBA_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS]; IntEndptsRGBA_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W]; int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
@ -858,13 +860,14 @@ static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endp
optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts); optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err); assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
for (int i=0; i<NREGIONS; ++i) // (nreed) Commented out asserts because they go off all the time...not sure why
assert(expected_opt_err[i] == opt_err[i]); //for (int i=0; i<NREGIONS; ++i)
// nvAssert(expected_opt_err[i] == opt_err[i]);
swap_indices(opt_endpts, opt_indices, shapeindex_best); swap_indices(opt_endpts, opt_indices, shapeindex_best);
orig_toterr = opt_toterr = 0; orig_toterr = opt_toterr = 0;
for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; } for (int i=0; i < NREGIONS; ++i) { orig_toterr += orig_err[i]; opt_toterr += opt_err[i]; }
assert (opt_toterr <= orig_toterr); //nvAssert(opt_toterr <= orig_toterr);
if (opt_toterr < orig_toterr) if (opt_toterr < orig_toterr)
{ {
@ -880,41 +883,41 @@ static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endp
throw "No candidate found, should never happen (avpcl mode 6)."; throw "No candidate found, should never happen (avpcl mode 6).";
} }
static void clamp(Vec4 &v) static void clamp(Vector4 &v)
{ {
if (v.X() < RGBA_MIN) v.X() = RGBA_MIN; if (v.x < 0.0f) v.x = 0.0f;
if (v.X() > RGBA_MAX) v.X() = RGBA_MAX; if (v.x > 255.0f) v.x = 255.0f;
if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; if (v.y < 0.0f) v.y = 0.0f;
if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; if (v.y > 255.0f) v.y = 255.0f;
if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; if (v.z < 0.0f) v.z = 0.0f;
if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; if (v.z > 255.0f) v.z = 255.0f;
if (v.W() < RGBA_MIN) v.W() = RGBA_MIN; if (v.w < 0.0f) v.w = 0.0f;
if (v.W() > RGBA_MAX) v.W() = RGBA_MAX; if (v.w > 255.0f) v.w = 255.0f;
} }
static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vec4 palette[NREGIONS][NINDICES]) static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
{ {
for (int region = 0; region < NREGIONS; ++region) for (int region = 0; region < NREGIONS; ++region)
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, 0.0, float(DENOM)); palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
} }
// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
{ {
// build list of possibles // build list of possibles
Vec4 palette[NREGIONS][NINDICES]; Vector4 palette[NREGIONS][NINDICES];
generate_palette_unquantized(endpts, palette); generate_palette_unquantized(endpts, palette);
double toterr = 0; float toterr = 0;
Vec4 err; Vector4 err;
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
{ {
int region = REGION(x,y,shapeindex); int region = REGION(x,y,shapeindex);
double err, besterr; float err, besterr;
besterr = Utils::metric4(tile.data[y][x], palette[region][0]); besterr = Utils::metric4(tile.data[y][x], palette[region][0]);
@ -932,13 +935,13 @@ static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpt
return toterr; return toterr;
} }
static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
{ {
for (int region=0; region<NREGIONS; ++region) for (int region=0; region<NREGIONS; ++region)
{ {
int np = 0; int np = 0;
Vec4 colors[Tile::TILE_TOTAL]; Vector4 colors[Tile::TILE_TOTAL];
Vec4 mean(0,0,0,0); Vector4 mean(0,0,0,0);
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
@ -952,7 +955,7 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]
// handle simple cases // handle simple cases
if (np == 0) if (np == 0)
{ {
Vec4 zero(0,0,0,RGBA_MAX); Vector4 zero(0,0,0,255.0f);
endpts[region].A = zero; endpts[region].A = zero;
endpts[region].B = zero; endpts[region].B = zero;
continue; continue;
@ -970,29 +973,15 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]
continue; continue;
} }
Matrix rdq(np, 4);
mean /= float(np); mean /= float(np);
for (int i = 0; i < np; ++i) Vector4 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
{
rdq(i,0) = colors[i].X() - mean.X();
rdq(i,1) = colors[i].Y() - mean.Y();
rdq(i,2) = colors[i].Z() - mean.Z();
rdq(i,3) = colors[i].W() - mean.W();
}
// perform a singular value decomposition
SVD svd(rdq);
// get the principal component direction (the one with the largest weight)
Vec4 direction(svd.R()(0,0), svd.R()(0,1), svd.R()(0,2), svd.R()(0,3));
// project each pixel value along the principal direction // project each pixel value along the principal direction
double minp = DBL_MAX, maxp = -DBL_MAX; float minp = FLT_MAX, maxp = -FLT_MAX;
for (int i = 0; i < np; i++) for (int i = 0; i < np; i++)
{ {
float dp = rdq(i,0) * direction.X() + rdq(i,1)*direction.Y() + rdq(i,2)*direction.Z() + rdq(i,3)*direction.W(); float dp = dot(colors[i]-mean, direction);
if (dp < minp) minp = dp; if (dp < minp) minp = dp;
if (dp > maxp) maxp = dp; if (dp > maxp) maxp = dp;
} }
@ -1011,13 +1000,13 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]
return map_colors(tile, shapeindex, endpts); return map_colors(tile, shapeindex, endpts);
} }
static void swap(double *list1, int *list2, int i, int j) static void swap(float *list1, int *list2, int i, int j)
{ {
double t = list1[i]; list1[i] = list1[j]; list1[j] = t; float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
} }
double AVPCL::compress_mode6(const Tile &t, char *block) float AVPCL::compress_mode6(const Tile &t, char *block)
{ {
// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
@ -1027,10 +1016,10 @@ double AVPCL::compress_mode6(const Tile &t, char *block)
struct { struct {
FltEndpts endpts[NREGIONS]; FltEndpts endpts[NREGIONS];
} all[NSHAPES]; } all[NSHAPES];
double roughmse[NSHAPES]; float roughmse[NSHAPES];
int index[NSHAPES]; int index[NSHAPES];
char tempblock[AVPCL::BLOCKSIZE]; char tempblock[AVPCL::BLOCKSIZE];
double msebest = DBL_MAX; float msebest = FLT_MAX;
for (int i=0; i<NSHAPES; ++i) for (int i=0; i<NSHAPES; ++i)
{ {
@ -1047,7 +1036,7 @@ double AVPCL::compress_mode6(const Tile &t, char *block)
for (int i=0; i<NITEMS && msebest>0; ++i) for (int i=0; i<NITEMS && msebest>0; ++i)
{ {
int shape = index[i]; int shape = index[i];
double mse = refine(t, shape, &all[shape].endpts[0], tempblock); float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
if (mse < msebest) if (mse < msebest)
{ {
memcpy(block, tempblock, sizeof(tempblock)); memcpy(block, tempblock, sizeof(tempblock));

View File

@ -17,17 +17,18 @@ See the License for the specific language governing permissions and limitations
#include "bits.h" #include "bits.h"
#include "tile.h" #include "tile.h"
#include "avpcl.h" #include "avpcl.h"
#include "arvo/Vec4.h" #include "nvcore/Debug.h"
#include "arvo/Matrix.h" #include "nvmath/Vector.inl"
#include "arvo/SVD.h" #include "nvmath/Matrix.inl"
#include "nvmath/Fitting.h"
#include "utils.h" #include "utils.h"
#include "endpts.h" #include "endpts.h"
#include <cstring>
#include <assert.h>
#include "shapes_two.h" #include "shapes_two.h"
using namespace ArvoMath; using namespace nv;
using namespace AVPCL;
#define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values #define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values
@ -84,7 +85,7 @@ struct PatternPrec
// this is the precision for each channel and region // this is the precision for each channel and region
// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this! // NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
static PatternPrec pattern_precs[NPATTERNS] = static PatternPrec pattern_precs[NPATTERNS] =
{ {
5,5,5,5, 5,5,5,5, 5,5,5,5, 5,5,5,5, 5,5,5,5, 5,5,5,5, 5,5,5,5, 5,5,5,5,
@ -103,7 +104,7 @@ static int nbits(int n, bool issigned)
} }
else else
{ {
assert (issigned); nvAssert (issigned);
for (nb=0; n<-1; ++nb, n>>=1) ; for (nb=0; n<-1; ++nb, n>>=1) ;
return nb + 1; return nb + 1;
} }
@ -111,12 +112,12 @@ static int nbits(int n, bool issigned)
static void transform_forward(IntEndptsRGBA_2 ep[NREGIONS]) static void transform_forward(IntEndptsRGBA_2 ep[NREGIONS])
{ {
assert(0); nvUnreachable();
} }
static void transform_inverse(IntEndptsRGBA_2 ep[NREGIONS]) static void transform_inverse(IntEndptsRGBA_2 ep[NREGIONS])
{ {
assert(0); nvUnreachable();
} }
/* /*
@ -154,7 +155,7 @@ static void compress_one(const IntEndptsRGBA& endpts, IntEndptsRGBA_2& compr_end
// ignore the alpha channel in the count // ignore the alpha channel in the count
onescnt += (j==CHANNEL_A) ? 0 : (endpts.A[j] & 1); onescnt += (j==CHANNEL_A) ? 0 : (endpts.A[j] & 1);
compr_endpts.A[j] = endpts.A[j] >> 1; compr_endpts.A[j] = endpts.A[j] >> 1;
assert (compr_endpts.A[j] < 32); nvAssert (compr_endpts.A[j] < 32);
} }
compr_endpts.a_lsb = onescnt >= 2; compr_endpts.a_lsb = onescnt >= 2;
@ -163,7 +164,7 @@ static void compress_one(const IntEndptsRGBA& endpts, IntEndptsRGBA_2& compr_end
{ {
onescnt += (j==CHANNEL_A) ? 0 : (endpts.B[j] & 1); onescnt += (j==CHANNEL_A) ? 0 : (endpts.B[j] & 1);
compr_endpts.B[j] = endpts.B[j] >> 1; compr_endpts.B[j] = endpts.B[j] >> 1;
assert (compr_endpts.B[j] < 32); nvAssert (compr_endpts.B[j] < 32);
} }
compr_endpts.b_lsb = onescnt >= 2; compr_endpts.b_lsb = onescnt >= 2;
} }
@ -194,15 +195,15 @@ static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec
for (int region = 0; region < NREGIONS; ++region) for (int region = 0; region < NREGIONS; ++region)
{ {
full_endpts[region].A[0] = Utils::quantize(endpts[region].A.X(), pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space
full_endpts[region].A[1] = Utils::quantize(endpts[region].A.Y(), pattern_prec.region_precs[region].endpt_a_prec[1]+1); full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
full_endpts[region].A[2] = Utils::quantize(endpts[region].A.Z(), pattern_prec.region_precs[region].endpt_a_prec[2]+1); full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
full_endpts[region].A[3] = Utils::quantize(endpts[region].A.W(), pattern_prec.region_precs[region].endpt_a_prec[3]+1); full_endpts[region].A[3] = Utils::quantize(endpts[region].A.w, pattern_prec.region_precs[region].endpt_a_prec[3]+1);
full_endpts[region].B[0] = Utils::quantize(endpts[region].B.X(), pattern_prec.region_precs[region].endpt_b_prec[0]+1); full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
full_endpts[region].B[1] = Utils::quantize(endpts[region].B.Y(), pattern_prec.region_precs[region].endpt_b_prec[1]+1); full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
full_endpts[region].B[2] = Utils::quantize(endpts[region].B.Z(), pattern_prec.region_precs[region].endpt_b_prec[2]+1); full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
full_endpts[region].B[3] = Utils::quantize(endpts[region].B.W(), pattern_prec.region_precs[region].endpt_b_prec[3]+1); full_endpts[region].B[3] = Utils::quantize(endpts[region].B.w, pattern_prec.region_precs[region].endpt_b_prec[3]+1);
compress_one(full_endpts[region], q_endpts[region]); compress_one(full_endpts[region], q_endpts[region]);
} }
@ -218,7 +219,7 @@ static void swap_indices(IntEndptsRGBA_2 endpts[NREGIONS], int indices[Tile::TIL
int x = POS_TO_X(position); int x = POS_TO_X(position);
int y = POS_TO_Y(position); int y = POS_TO_Y(position);
assert(REGION(x,y,shapeindex) == region); // double check the table nvAssert(REGION(x,y,shapeindex) == region); // double check the table
if (indices[y][x] & HIGH_INDEXBIT) if (indices[y][x] & HIGH_INDEXBIT)
{ {
// high bit is set, swap the endpts and indices for this region // high bit is set, swap the endpts and indices for this region
@ -260,7 +261,7 @@ static void write_header(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex,
out.write(endpts[i].b_lsb, 1); out.write(endpts[i].b_lsb, 1);
} }
assert (out.getptr() == 98); nvAssert (out.getptr() == 98);
} }
static void read_header(Bits &in, IntEndptsRGBA_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index) static void read_header(Bits &in, IntEndptsRGBA_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
@ -268,8 +269,8 @@ static void read_header(Bits &in, IntEndptsRGBA_2 endpts[NREGIONS], int &shapein
int mode = AVPCL::getmode(in); int mode = AVPCL::getmode(in);
pat_index = 0; pat_index = 0;
assert (pat_index >= 0 && pat_index < NPATTERNS); nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
assert (in.getptr() == patterns[pat_index].modebits); nvAssert (in.getptr() == patterns[pat_index].modebits);
shapeindex = in.read(SHAPEBITS); shapeindex = in.read(SHAPEBITS);
p = patterns[pat_index]; p = patterns[pat_index];
@ -287,7 +288,7 @@ static void read_header(Bits &in, IntEndptsRGBA_2 endpts[NREGIONS], int &shapein
endpts[i].b_lsb = in.read(1); endpts[i].b_lsb = in.read(1);
} }
assert (in.getptr() == 98); nvAssert (in.getptr() == 98);
} }
// WORK PLACEHOLDER -- keep it simple for now // WORK PLACEHOLDER -- keep it simple for now
@ -341,10 +342,10 @@ static void emit_block(const IntEndptsRGBA_2 endpts[NREGIONS], int shapeindex, c
write_indices(indices, shapeindex, out); write_indices(indices, shapeindex, out);
assert(out.getptr() == AVPCL::BITSIZE); nvAssert(out.getptr() == AVPCL::BITSIZE);
} }
static void generate_palette_quantized(const IntEndptsRGBA_2 &endpts_2, const RegionPrec &region_prec, Vec4 palette[NINDICES]) static void generate_palette_quantized(const IntEndptsRGBA_2 &endpts_2, const RegionPrec &region_prec, Vector4 palette[NINDICES])
{ {
IntEndptsRGBA endpts; IntEndptsRGBA endpts;
@ -358,34 +359,34 @@ static void generate_palette_quantized(const IntEndptsRGBA_2 &endpts_2, const Re
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].X() = PALETTE_LERP(a, b, i, BIAS, DENOM); palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1); a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1);
b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1); b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].Y() = PALETTE_LERP(a, b, i, BIAS, DENOM); palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1); a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1);
b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1); b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].Z() = PALETTE_LERP(a, b, i, BIAS, DENOM); palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]+1); a = Utils::unquantize(endpts.A[3], region_prec.endpt_a_prec[3]+1);
b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]+1); b = Utils::unquantize(endpts.B[3], region_prec.endpt_b_prec[3]+1);
// interpolate // interpolate
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[i].W() = PALETTE_LERP(a, b, i, BIAS, DENOM); palette[i].w = float(Utils::lerp(a, b, i, BIAS, DENOM));
} }
// sign extend but only if it was transformed // sign extend but only if it was transformed
static void sign_extend(Pattern &p, IntEndptsRGBA_2 endpts[NREGIONS]) static void sign_extend(Pattern &p, IntEndptsRGBA_2 endpts[NREGIONS])
{ {
assert(0); nvUnreachable();
} }
void AVPCL::decompress_mode7(const char *block, Tile &t) void AVPCL::decompress_mode7(const char *block, Tile &t)
@ -404,7 +405,7 @@ void AVPCL::decompress_mode7(const char *block, Tile &t)
transform_inverse(endpts); transform_inverse(endpts);
} }
Vec4 palette[NREGIONS][NINDICES]; Vector4 palette[NREGIONS][NINDICES];
for (int r = 0; r < NREGIONS; ++r) for (int r = 0; r < NREGIONS; ++r)
generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]); generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
@ -412,7 +413,7 @@ void AVPCL::decompress_mode7(const char *block, Tile &t)
read_indices(in, shapeindex, indices); read_indices(in, shapeindex, indices);
assert(in.getptr() == AVPCL::BITSIZE); nvAssert(in.getptr() == AVPCL::BITSIZE);
// lookup // lookup
for (int y = 0; y < Tile::TILE_H; y++) for (int y = 0; y < Tile::TILE_H; y++)
@ -421,17 +422,17 @@ void AVPCL::decompress_mode7(const char *block, Tile &t)
} }
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
static double map_colors(const Vec4 colors[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec &region_prec, double current_err, int indices[Tile::TILE_TOTAL]) static float map_colors(const Vector4 colors[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec &region_prec, float current_err, int indices[Tile::TILE_TOTAL])
{ {
Vec4 palette[NINDICES]; Vector4 palette[NINDICES];
double toterr = 0; float toterr = 0;
Vec4 err; Vector4 err;
generate_palette_quantized(endpts, region_prec, palette); generate_palette_quantized(endpts, region_prec, palette);
for (int i = 0; i < np; ++i) for (int i = 0; i < np; ++i)
{ {
double err, besterr = DBL_MAX; float err, besterr = FLT_MAX;
for (int j = 0; j < NINDICES && besterr > 0; ++j) for (int j = 0; j < NINDICES && besterr > 0; ++j)
{ {
@ -455,7 +456,7 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGBA_2 &end
for (int k = i; k < np; ++k) for (int k = i; k < np; ++k)
indices[k] = -1; indices[k] = -1;
return DBL_MAX; return FLT_MAX;
} }
} }
return toterr; return toterr;
@ -463,10 +464,10 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGBA_2 &end
// assign indices given a tile, shape, and quantized endpoints, return toterr for each region // assign indices given a tile, shape, and quantized endpoints, return toterr for each region
static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 endpts[NREGIONS], const PatternPrec &pattern_prec, static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 endpts[NREGIONS], const PatternPrec &pattern_prec,
int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS]) int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
{ {
// build list of possibles // build list of possibles
Vec4 palette[NREGIONS][NINDICES]; Vector4 palette[NREGIONS][NINDICES];
for (int region = 0; region < NREGIONS; ++region) for (int region = 0; region < NREGIONS; ++region)
{ {
@ -474,13 +475,13 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 end
toterr[region] = 0; toterr[region] = 0;
} }
Vec4 err; Vector4 err;
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
{ {
int region = REGION(x,y,shapeindex); int region = REGION(x,y,shapeindex);
double err, besterr = DBL_MAX; float err, besterr = FLT_MAX;
for (int i = 0; i < NINDICES && besterr > 0; ++i) for (int i = 0; i < NINDICES && besterr > 0; ++i)
{ {
@ -501,8 +502,8 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 end
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's // note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
// this function returns either old_err or a value smaller (if it was successful in improving the error) // this function returns either old_err or a value smaller (if it was successful in improving the error)
static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts, static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts,
double old_err, int do_b, int indices[Tile::TILE_TOTAL]) float old_err, int do_b, int indices[Tile::TILE_TOTAL])
{ {
// we have the old endpoints: old_endpts // we have the old endpoints: old_endpts
// we have the perturbed endpoints: new_endpts // we have the perturbed endpoints: new_endpts
@ -572,10 +573,10 @@ static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec
// for np = 16 -- adjust error thresholds as a function of np // for np = 16 -- adjust error thresholds as a function of np
// always ensure endpoint ordering is preserved (no need to overlap the scan) // always ensure endpoint ordering is preserved (no need to overlap the scan)
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices // if orig_err returned from this is less than its input value, then indices[] will contain valid indices
static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &region_prec, double orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL]) static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec &region_prec, float orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
{ {
IntEndptsRGBA_2 temp_endpts; IntEndptsRGBA_2 temp_endpts;
double best_err = orig_err; float best_err = orig_err;
int aprec = region_prec.endpt_a_prec[ch]; int aprec = region_prec.endpt_a_prec[ch];
int bprec = region_prec.endpt_b_prec[ch]; int bprec = region_prec.endpt_b_prec[ch];
int good_indices[Tile::TILE_TOTAL]; int good_indices[Tile::TILE_TOTAL];
@ -584,7 +585,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
indices[i] = -1; indices[i] = -1;
double thr_scale = (double)np / (double)Tile::TILE_TOTAL; float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
if (orig_err == 0) return orig_err; if (orig_err == 0) return orig_err;
@ -593,8 +594,8 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; } else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; } else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; } else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
adelta = MAX(adelta, 3); adelta = max(adelta, 3);
bdelta = MAX(bdelta, 3); bdelta = max(bdelta, 3);
#ifdef DISABLE_EXHAUSTIVE #ifdef DISABLE_EXHAUSTIVE
adelta = bdelta = 3; adelta = bdelta = 3;
@ -603,10 +604,10 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
temp_endpts = opt_endpts; temp_endpts = opt_endpts;
// ok figure out the range of A and B // ok figure out the range of A and B
int alow = MAX(0, opt_endpts.A[ch] - adelta); int alow = max(0, opt_endpts.A[ch] - adelta);
int ahigh = MIN((1<<aprec)-1, opt_endpts.A[ch] + adelta); int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
int blow = MAX(0, opt_endpts.B[ch] - bdelta); int blow = max(0, opt_endpts.B[ch] - bdelta);
int bhigh = MIN((1<<bprec)-1, opt_endpts.B[ch] + bdelta); int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
// now there's no need to swap the ordering of A and B // now there's no need to swap the ordering of A and B
bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch]; bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
@ -617,7 +618,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
{ {
// keep a <= b // keep a <= b
for (int a = alow; a <= ahigh; ++a) for (int a = alow; a <= ahigh; ++a)
for (int b = MAX(a, blow); b < bhigh; ++b) for (int b = max(a, blow); b < bhigh; ++b)
{ {
temp_endpts.A[ch] = a; temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b; temp_endpts.B[ch] = b;
@ -637,7 +638,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
{ {
// keep b <= a // keep b <= a
for (int b = blow; b < bhigh; ++b) for (int b = blow; b < bhigh; ++b)
for (int a = MAX(b, alow); a <= ahigh; ++a) for (int a = max(b, alow); a <= ahigh; ++a)
{ {
temp_endpts.A[ch] = a; temp_endpts.A[ch] = a;
temp_endpts.B[ch] = b; temp_endpts.B[ch] = b;
@ -665,9 +666,9 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
return best_err; return best_err;
} }
static double optimize_one(const Vec4 colors[], int np, double orig_err, const IntEndptsRGBA_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA_2 &opt_endpts) static float optimize_one(const Vector4 colors[], int np, float orig_err, const IntEndptsRGBA_2 &orig_endpts, const RegionPrec &region_prec, IntEndptsRGBA_2 &opt_endpts)
{ {
double opt_err = orig_err; float opt_err = orig_err;
opt_endpts = orig_endpts; opt_endpts = orig_endpts;
@ -715,7 +716,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[i] = orig_indices[i] = temp_indices0[i]; new_indices[i] = orig_indices[i] = temp_indices0[i];
assert (orig_indices[i] != -1); nvAssert (orig_indices[i] != -1);
} }
opt_endpts.A[ch] = new_a.A[ch]; opt_endpts.A[ch] = new_a.A[ch];
@ -730,7 +731,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[i] = orig_indices[i] = temp_indices1[i]; new_indices[i] = orig_indices[i] = temp_indices1[i];
assert (orig_indices[i] != -1); nvAssert (orig_indices[i] != -1);
} }
opt_endpts.B[ch] = new_b.B[ch]; opt_endpts.B[ch] = new_b.B[ch];
@ -748,7 +749,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
new_indices[i] = temp_indices0[i]; new_indices[i] = temp_indices0[i];
assert (new_indices[i] != -1); nvAssert (new_indices[i] != -1);
} }
if (do_b == 0) if (do_b == 0)
@ -775,7 +776,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
bool first = true; bool first = true;
for (int ch = 0; ch < NCHANNELS_RGBA; ++ch) for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
{ {
double new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0); float new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
if (new_err < opt_err) if (new_err < opt_err)
{ {
@ -786,7 +787,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
for (int i=0; i<np; ++i) for (int i=0; i<np; ++i)
{ {
orig_indices[i] = temp_indices0[i]; orig_indices[i] = temp_indices0[i];
assert (orig_indices[i] != -1); nvAssert (orig_indices[i] != -1);
} }
first = false; first = false;
} }
@ -810,10 +811,10 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
return opt_err; return opt_err;
} }
static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_err[NREGIONS], static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS],
IntEndptsRGBA_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, double opt_err[NREGIONS], IntEndptsRGBA_2 opt_endpts[NREGIONS]) IntEndptsRGBA_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA_2 opt_endpts[NREGIONS])
{ {
Vec4 pixels[Tile::TILE_TOTAL]; Vector4 pixels[Tile::TILE_TOTAL];
IntEndptsRGBA_2 temp_in, temp_out; IntEndptsRGBA_2 temp_in, temp_out;
int temp_indices[Tile::TILE_TOTAL]; int temp_indices[Tile::TILE_TOTAL];
@ -830,7 +831,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_
opt_endpts[region] = temp_in = orig_endpts[region]; opt_endpts[region] = temp_in = orig_endpts[region];
opt_err[region] = orig_err[region]; opt_err[region] = orig_err[region];
double best_err = orig_err[region]; float best_err = orig_err[region];
// try all lsb modes as we search for better endpoints // try all lsb modes as we search for better endpoints
for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode) for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
@ -839,12 +840,12 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_
temp_in.b_lsb = (lsbmode >> 1) & 1; temp_in.b_lsb = (lsbmode >> 1) & 1;
// make sure we have a valid error for temp_in // make sure we have a valid error for temp_in
// we use DBL_MAX here because we want an accurate temp_in_err, no shortcuts // we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the DBL_MAX position) // (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
double temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], DBL_MAX, temp_indices); float temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
// now try to optimize these endpoints // now try to optimize these endpoints
double temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out); float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
// if we find an improvement, update the best so far and correct the output endpoints and errors // if we find an improvement, update the best so far and correct the output endpoints and errors
if (temp_out_err < best_err) if (temp_out_err < best_err)
@ -874,9 +875,9 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_
emit compressed block with original data // to try to preserve maximum endpoint precision emit compressed block with original data // to try to preserve maximum endpoint precision
*/ */
static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block) static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
{ {
double orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS]; float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
IntEndptsRGBA_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS]; IntEndptsRGBA_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W]; int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
@ -895,8 +896,9 @@ static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endp
transform_inverse(orig_endpts); transform_inverse(orig_endpts);
optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts); optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err); assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
for (int i=0; i<NREGIONS; ++i) // (nreed) Commented out asserts because they go off all the time...not sure why
assert(expected_opt_err[i] == opt_err[i]); //for (int i=0; i<NREGIONS; ++i)
// nvAssert(expected_opt_err[i] == opt_err[i]);
swap_indices(opt_endpts, opt_indices, shapeindex_best); swap_indices(opt_endpts, opt_indices, shapeindex_best);
if (patterns[sp].transformed) if (patterns[sp].transformed)
transform_forward(opt_endpts); transform_forward(opt_endpts);
@ -921,41 +923,41 @@ static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endp
throw "No candidate found, should never happen (avpcl mode 7)."; throw "No candidate found, should never happen (avpcl mode 7).";
} }
static void clamp(Vec4 &v) static void clamp(Vector4 &v)
{ {
if (v.X() < RGBA_MIN) v.X() = RGBA_MIN; if (v.x < 0.0f) v.x = 0.0f;
if (v.X() > RGBA_MAX) v.X() = RGBA_MAX; if (v.x > 255.0f) v.x = 255.0f;
if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN; if (v.y < 0.0f) v.y = 0.0f;
if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX; if (v.y > 255.0f) v.y = 255.0f;
if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN; if (v.z < 0.0f) v.z = 0.0f;
if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX; if (v.z > 255.0f) v.z = 255.0f;
if (v.W() < RGBA_MIN) v.W() = RGBA_MIN; if (v.w < 0.0f) v.w = 0.0f;
if (v.W() > RGBA_MAX) v.W() = RGBA_MAX; if (v.w > 255.0f) v.w = 255.0f;
} }
static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vec4 palette[NREGIONS][NINDICES]) static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
{ {
for (int region = 0; region < NREGIONS; ++region) for (int region = 0; region < NREGIONS; ++region)
for (int i = 0; i < NINDICES; ++i) for (int i = 0; i < NINDICES; ++i)
palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, 0.0, float(DENOM)); palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
} }
// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined // generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS]) static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
{ {
// build list of possibles // build list of possibles
Vec4 palette[NREGIONS][NINDICES]; Vector4 palette[NREGIONS][NINDICES];
generate_palette_unquantized(endpts, palette); generate_palette_unquantized(endpts, palette);
double toterr = 0; float toterr = 0;
Vec4 err; Vector4 err;
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
{ {
int region = REGION(x,y,shapeindex); int region = REGION(x,y,shapeindex);
double err, besterr = DBL_MAX; float err, besterr = FLT_MAX;
for (int i = 0; i < NINDICES && besterr > 0; ++i) for (int i = 0; i < NINDICES && besterr > 0; ++i)
{ {
@ -971,13 +973,13 @@ static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpt
return toterr; return toterr;
} }
static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]) static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
{ {
for (int region=0; region<NREGIONS; ++region) for (int region=0; region<NREGIONS; ++region)
{ {
int np = 0; int np = 0;
Vec4 colors[Tile::TILE_TOTAL]; Vector4 colors[Tile::TILE_TOTAL];
Vec4 mean(0,0,0,0); Vector4 mean(0,0,0,0);
for (int y = 0; y < tile.size_y; y++) for (int y = 0; y < tile.size_y; y++)
for (int x = 0; x < tile.size_x; x++) for (int x = 0; x < tile.size_x; x++)
@ -991,7 +993,7 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]
// handle simple cases // handle simple cases
if (np == 0) if (np == 0)
{ {
Vec4 zero(0,0,0,RGBA_MAX); Vector4 zero(0,0,0,255.0f);
endpts[region].A = zero; endpts[region].A = zero;
endpts[region].B = zero; endpts[region].B = zero;
continue; continue;
@ -1009,29 +1011,15 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]
continue; continue;
} }
Matrix rdq(np, 4);
mean /= float(np); mean /= float(np);
for (int i = 0; i < np; ++i) Vector4 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
{
rdq(i,0) = colors[i].X() - mean.X();
rdq(i,1) = colors[i].Y() - mean.Y();
rdq(i,2) = colors[i].Z() - mean.Z();
rdq(i,3) = colors[i].W() - mean.W();
}
// perform a singular value decomposition
SVD svd(rdq);
// get the principal component direction (the one with the largest weight)
Vec4 direction(svd.R()(0,0), svd.R()(0,1), svd.R()(0,2), svd.R()(0,3));
// project each pixel value along the principal direction // project each pixel value along the principal direction
double minp = DBL_MAX, maxp = -DBL_MAX; float minp = FLT_MAX, maxp = -FLT_MAX;
for (int i = 0; i < np; i++) for (int i = 0; i < np; i++)
{ {
float dp = rdq(i,0) * direction.X() + rdq(i,1)*direction.Y() + rdq(i,2)*direction.Z() + rdq(i,3)*direction.W(); float dp = dot(colors[i]-mean, direction);
if (dp < minp) minp = dp; if (dp < minp) minp = dp;
if (dp > maxp) maxp = dp; if (dp > maxp) maxp = dp;
} }
@ -1050,13 +1038,13 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]
return map_colors(tile, shapeindex, endpts); return map_colors(tile, shapeindex, endpts);
} }
static void swap(double *list1, int *list2, int i, int j) static void swap(float *list1, int *list2, int i, int j)
{ {
double t = list1[i]; list1[i] = list1[j]; list1[j] = t; float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1; int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
} }
double AVPCL::compress_mode7(const Tile &t, char *block) float AVPCL::compress_mode7(const Tile &t, char *block)
{ {
// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES // number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out // NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
@ -1066,10 +1054,10 @@ double AVPCL::compress_mode7(const Tile &t, char *block)
struct { struct {
FltEndpts endpts[NREGIONS]; FltEndpts endpts[NREGIONS];
} all[NSHAPES]; } all[NSHAPES];
double roughmse[NSHAPES]; float roughmse[NSHAPES];
int index[NSHAPES]; int index[NSHAPES];
char tempblock[AVPCL::BLOCKSIZE]; char tempblock[AVPCL::BLOCKSIZE];
double msebest = DBL_MAX; float msebest = FLT_MAX;
for (int i=0; i<NSHAPES; ++i) for (int i=0; i<NSHAPES; ++i)
{ {
@ -1086,7 +1074,7 @@ double AVPCL::compress_mode7(const Tile &t, char *block)
for (int i=0; i<NITEMS && msebest>0; ++i) for (int i=0; i<NITEMS && msebest>0; ++i)
{ {
int shape = index[i]; int shape = index[i];
double mse = refine(t, shape, &all[shape].endpts[0], tempblock); float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
if (mse < msebest) if (mse < msebest)
{ {
memcpy(block, tempblock, sizeof(tempblock)); memcpy(block, tempblock, sizeof(tempblock));

View File

@ -1,348 +0,0 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
// NOTE: the compressor will compress RGB tiles where the input alpha is constant at 255
// using modes where the alpha is variable if that mode gives a smaller mean squared error.
#include <iostream>
#include <sstream>
#include <string>
#include <stdexcept>
#include <assert.h>
#include "ImfArray.h"
#include "targa.h"
#include "avpcl.h"
using namespace std;
static void analyze(string in1, string in2)
{
Array2D<RGBA> pin1, pin2;
int w1, h1, w2, h2;
Targa::read(in1, pin1, w1, h1);
Targa::read(in2, pin2, w2, h2);
// choose the smaller of the two dimensions (since the old compressor would truncate to multiple-of-4 sizes)
int w = MIN(w1, w2);
int h = MIN(h1, h2);
double nsamples = 0;
double mabse_rgb = 0, mabse_a = 0, mabse_rgba = 0, mse_rgb = 0, mse_a = 0, mse_rgba = 0;
int errdist_rgb[9], errdist_a[9], errdist_rgba[9];
int errs[4*16];
for (int i=0; i<9; ++i)
errdist_rgb[i] = errdist_a[i] = errdist_rgba[i] = 0;
int psnrhist[100];
for (int i=0; i<100; ++i)
psnrhist[i] = 0;
bool first = true;
int worstx, worsty;
double worstpsnr = 999.0;
bool constant_alpha = true;
for (int y = 0; y < h; y+=4)
for (int x = 0; x < w; x+=4)
{
int xw = MIN(w-x, 4);
int yw = MIN(h-y, 4);
int np = 0;
float a[4], b[4];
for (int y0=0; y0<yw; ++y0)
for (int x0=0; x0<xw; ++x0)
{
a[0] = (pin1[y+y0][x+x0]).r;
a[1] = (pin1[y+y0][x+x0]).g;
a[2] = (pin1[y+y0][x+x0]).b;
a[3] = (pin1[y+y0][x+x0]).a;
b[0] = (pin2[y+y0][x+x0]).r;
b[1] = (pin2[y+y0][x+x0]).g;
b[2] = (pin2[y+y0][x+x0]).b;
b[3] = (pin2[y+y0][x+x0]).a;
if (AVPCL::flag_premult)
{
// premultiply
for (int i=0; i<3; ++i)
{
a[i] = Utils::premult(a[i], a[3]);
b[i] = Utils::premult(b[i], b[3]);
}
}
if (a[3] != RGBA_MAX || b[3] != RGBA_MAX)
constant_alpha = false;
for (int i=0; i<4; ++i)
errs[np+i] = a[i] - b[i];
np += 4;
}
double msetile = 0.0;
for (int i = 0; i < np; ++i)
{
int err = errs[i];
int abse = err > 0 ? err : -err;
int j = i & 3;
int lsb;
for (lsb=0; (abse>>lsb)>0; ++lsb)
;
assert (lsb <= 8);
if (j == 3)
{
mabse_a += (double)abse;
mse_a += (double)abse * abse;
errdist_a[lsb]++;
}
else
{
mabse_rgb += (double)abse;
mse_rgb += (double)abse * abse;
errdist_rgb[lsb]++;
}
mabse_rgba += (double)abse;
mse_rgba += (double)abse * abse;
errdist_rgba[lsb]++;
msetile += (double)abse * abse;
}
double psnrtile, rmsetile;
rmsetile = sqrt(msetile / double(np));
psnrtile = (rmsetile == 0) ? 99.0 : 20.0 * log10(255.0/rmsetile);
if (psnrtile < worstpsnr)
{
worstx = x; worsty = y; worstpsnr = psnrtile;
}
#ifdef EXTERNAL_RELEASE
int psnrquant = (int) floor (psnrtile); // 10 means [10,11) psnrs, e.g.
// clamp just in case
psnrquant = (psnrquant < 0) ? 0 : (psnrquant > 99) ? 99 : psnrquant;
psnrhist[psnrquant]++;
if (first && psnrquant < 16)
{
first = false;
printf("Tiles with RGBA PSNR's worse than 16dB\n");
}
if (psnrquant < 16)
printf("X %4d Y %4d RGBA PSNR %7.2f\n", x, y, psnrtile);
#endif
}
nsamples = w * h;
mabse_a /= nsamples;
mse_a /= nsamples;
mabse_rgb /= (nsamples*3);
mse_rgb /= (nsamples*3);
mabse_rgba /= (nsamples*4);
mse_rgba /= (nsamples*4);
double rmse_a, psnr_a, rmse_rgb, psnr_rgb, rmse_rgba, psnr_rgba;
rmse_a = sqrt(mse_a);
psnr_a = (rmse_a == 0) ? 999.0 : 20.0 * log10(255.0/rmse_a);
rmse_rgb = sqrt(mse_rgb);
psnr_rgb = (rmse_rgb == 0) ? 999.0 : 20.0 * log10(255.0/rmse_rgb);
rmse_rgba = sqrt(mse_rgba);
psnr_rgba = (rmse_rgba == 0) ? 999.0 : 20.0 * log10(255.0/rmse_rgba);
printf("Image size compared: %dw x %dh\n", w, h);
printf("Image alpha is %s.\n", constant_alpha ? "CONSTANT" : "VARIABLE");
if (w != w1 || w != w2 || h != h1 || h != h2)
printf("--- NOTE: only the overlap between the 2 images (%d,%d) and (%d,%d) was compared\n", w1, h1, w2, h2);
printf("Total pixels: %12d\n", w * h);
char *which = !AVPCL::flag_premult ? "RGB" : "aRaGaB";
printf("\n%s Mean absolute error: %f\n", which, mabse_rgb);
printf("%s Root mean squared error: %f (MSE %f)\n", which, rmse_rgb, rmse_rgb*rmse_rgb);
printf("%s Peak signal to noise ratio in dB: %f\n", which, psnr_rgb);
printf("%s Histogram of number of channels with indicated LSB error\n", which);
for (int i = 0; i < 9; ++i)
if (errdist_rgb[i]) printf("%2d LSB error: %10d\n", i, errdist_rgb[i]);
printf("\nAlpha Mean absolute error: %f\n", mabse_a);
printf("Alpha Root mean squared error: %f (MSE %f)\n", rmse_a, rmse_a*rmse_a);
printf("Alpha Peak signal to noise ratio in dB: %f\n", psnr_a);
printf("Alpha Histogram of number of channels with indicated LSB error\n");
for (int i = 0; i < 9; ++i)
if (errdist_a[i]) printf("%2d LSB error: %10d\n", i, errdist_a[i]);
printf("\nRGBA Mean absolute error: %f\n", mabse_rgba);
printf("RGBA Root mean squared error: %f (MSE %f)\n", rmse_rgba, rmse_rgba*rmse_rgba);
printf("RGBA Peak signal to noise ratio in dB: %f\n", psnr_rgba);
printf("RGBA Histogram of number of channels with indicated LSB error\n");
for (int i = 0; i < 9; ++i)
if (errdist_rgba[i]) printf("%2d LSB error: %10d\n", i, errdist_rgba[i]);
printf("\nWorst tile RGBA PSNR %f at x %d y %d\n", worstpsnr, worstx, worsty);
#if 0
printf("Histogram of per-tile PSNR\n");
for (int i = 0; i < 100; ++i)
if (psnrhist[i])
printf("[%2d,%2d) %6d\n", i, i+1, psnrhist[i]);
#endif
}
static bool ext(string inf, char *extension)
{
size_t n = inf.rfind('.', inf.length()-1);
if (n != string::npos)
return inf.substr(n, inf.length()) == extension;
else if (*extension != '\0')
return false;
else
return true; // extension is null and we didn't find a .
}
template <typename T>
std::string toString(const T &thing)
{
std::stringstream os;
os << thing;
return os.str();
}
static int str2int(std::string s)
{
int thing;
std::stringstream str (stringstream::in | stringstream::out);
str << s;
str >> thing;
return thing;
}
static void usage()
{
cout << endl <<
"Usage:" << endl <<
"avpclc infile.tga outroot generates outroot-w-h.avpcl and outroot-avpcl.tga" << endl <<
"avpclc foo-w-h.avpcl outroot generates outroot-avpcl.tga" << endl <<
"avpclc infile.tga outfile.tga compares the two images" << endl << endl <<
"Flags:" << endl <<
"-p use a metric based on AR AG AB A (note: if the image has alpha constant 255 this option is overridden)" << endl <<
"-n use a non-uniformly-weighed metric (weights .299 .587 .114)" << endl <<
"-na use a non-uniformly-weighed metric (ATI weights .3086 .6094 .0820)" << endl <<
"-e dump squared errors for each tile to outroot-errors.bin" << endl;
}
bool AVPCL::flag_premult = false;
bool AVPCL::flag_nonuniform = false;
bool AVPCL::flag_nonuniform_ati = false;
bool AVPCL::mode_rgb = false;
int main(int argc, char* argv[])
{
bool noerrfile = true;
#ifdef EXTERNAL_RELEASE
cout << "avpcl/BC7L Targa RGBA Compressor/Decompressor version 1.41 (May 27, 2010)." << endl <<
"Bug reports, questions, and suggestions to wdonovan a t nvidia d o t com." << endl;
#endif
try
{
char * args[2];
int nargs = 0;
// process flags, copy any non flag arg to args[]
for (int i = 1; i < argc; ++i)
if ((argv[i])[0] == '-')
switch ((argv[i])[1]) {
case 'p': AVPCL::flag_premult = true; break;
case 'n': if ((argv[i])[2] == 'a') { AVPCL::flag_nonuniform_ati = true; AVPCL::flag_nonuniform = false; }
else { AVPCL::flag_nonuniform = true; AVPCL::flag_nonuniform_ati = false; }
break;
case 'e': noerrfile = false; break;
default: throw "bad flag arg";
}
else
{
if (nargs > 1) throw "Incorrect number of args";
args[nargs++] = argv[i];
}
if (nargs != 2) throw "Incorrect number of args";
string inf(args[0]), outroot(args[1]);
if (ext(outroot, ""))
{
if (ext(inf, ".tga"))
{
int width, height;
Targa::fileinfo(inf, width, height, AVPCL::mode_rgb);
string outf, avpclf, errf;
outf = outroot + "-avpcl.tga";
avpclf = outroot + "-" + toString(width) + "-" + toString(height) + "-" + (AVPCL::mode_rgb ? "RGB" : "RGBA") + ".avpcl";
cout << "Compressing " << (AVPCL::mode_rgb ? "RGB file " : "RGBA file ") << inf << " to " << avpclf << endl;
if (!noerrfile)
{
errf = outroot + "-errors" + ".bin";
cout << "Errors output file is " << errf << endl;
}
else
errf = "";
AVPCL::compress(inf, avpclf, errf);
cout << "Decompressing " << avpclf << " to " << outf << endl;
AVPCL::decompress(avpclf, outf);
analyze(inf, outf);
}
else if (ext(inf, ".avpcl"))
{
string outf;
outf = outroot + "-avpcl.tga";
cout << "Decompressing " << inf << " to " << outf << endl;
AVPCL::decompress(inf, outf);
}
else throw "Invalid file args";
}
else if (ext(inf, ".tga") && ext(outroot, ".tga"))
{
analyze(inf, outroot);
}
else throw "Invalid file args";
}
catch(const exception& e)
{
// Print error message and usage instructions
cerr << e.what() << endl;
usage();
return 1;
}
catch(char * msg)
{
cerr << msg << endl;
usage();
return 1;
}
return 0;
}

View File

@ -10,36 +10,38 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License. See the License for the specific language governing permissions and limitations under the License.
*/ */
#ifndef _BITS_H #ifndef _AVPCL_BITS_H
#define _BITS_H #define _AVPCL_BITS_H
// read/write a bitstream // read/write a bitstream
#include <assert.h> #include "nvcore/Debug.h"
namespace AVPCL {
class Bits class Bits
{ {
public: public:
Bits(char *data, int maxdatabits) { assert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;} Bits(char *data, int maxdatabits) { nvAssert (data && maxdatabits > 0); bptr = bend = 0; bits = data; maxbits = maxdatabits; readonly = 0;}
Bits(const char *data, int availdatabits) { assert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;} Bits(const char *data, int availdatabits) { nvAssert (data && availdatabits > 0); bptr = 0; bend = availdatabits; cbits = data; maxbits = availdatabits; readonly = 1;}
void write(int value, int nbits) { void write(int value, int nbits) {
assert (nbits >= 0 && nbits < 32); nvAssert (nbits >= 0 && nbits < 32);
assert (sizeof(int)>= 4); nvAssert (sizeof(int)>= 4);
for (int i=0; i<nbits; ++i) for (int i=0; i<nbits; ++i)
writeone(value>>i); writeone(value>>i);
} }
int read(int nbits) { int read(int nbits) {
assert (nbits >= 0 && nbits < 32); nvAssert (nbits >= 0 && nbits < 32);
assert (sizeof(int)>= 4); nvAssert (sizeof(int)>= 4);
int out = 0; int out = 0;
for (int i=0; i<nbits; ++i) for (int i=0; i<nbits; ++i)
out |= readone() << i; out |= readone() << i;
return out; return out;
} }
int getptr() { return bptr; } int getptr() { return bptr; }
void setptr(int ptr) { assert (ptr >= 0 && ptr < maxbits); bptr = ptr; } void setptr(int ptr) { nvAssert (ptr >= 0 && ptr < maxbits); bptr = ptr; }
int getsize() { return bend; } int getsize() { return bend; }
private: private:
@ -51,7 +53,7 @@ private:
char readonly; // 1 if this is a read-only stream char readonly; // 1 if this is a read-only stream
int readone() { int readone() {
assert (bptr < bend); nvAssert (bptr < bend);
if (bptr >= bend) return 0; if (bptr >= bend) return 0;
int bit = (readonly ? cbits[bptr>>3] : bits[bptr>>3]) & (1 << (bptr & 7)); int bit = (readonly ? cbits[bptr>>3] : bits[bptr>>3]) & (1 << (bptr & 7));
++bptr; ++bptr;
@ -60,7 +62,7 @@ private:
void writeone(int bit) { void writeone(int bit) {
if (readonly) if (readonly)
throw "Writing a read-only bit stream"; throw "Writing a read-only bit stream";
assert (bptr < maxbits); nvAssert (bptr < maxbits);
if (bptr >= maxbits) return; if (bptr >= maxbits) return;
if (bit&1) if (bit&1)
bits[bptr>>3] |= 1 << (bptr & 7); bits[bptr>>3] |= 1 << (bptr & 7);
@ -70,4 +72,6 @@ private:
} }
}; };
}
#endif #endif

View File

@ -10,26 +10,26 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License. See the License for the specific language governing permissions and limitations under the License.
*/ */
#ifndef _ENDPTS_H #ifndef _AVPCL_ENDPTS_H
#define _ENDPTS_H #define _AVPCL_ENDPTS_H
// endpoint definitions and routines to search through endpoint space // endpoint definitions and routines to search through endpoint space
#include "arvo/Vec4.h" #include "nvmath/Vector.h"
using namespace ArvoMath; namespace AVPCL {
#define NCHANNELS_RGB 3 static const int NCHANNELS_RGB = 3;
#define NCHANNELS_RGBA 4 static const int NCHANNELS_RGBA = 4;
#define CHANNEL_R 0 static const int CHANNEL_R = 0;
#define CHANNEL_G 1 static const int CHANNEL_G = 1;
#define CHANNEL_B 2 static const int CHANNEL_B = 2;
#define CHANNEL_A 3 static const int CHANNEL_A = 3;
struct FltEndpts struct FltEndpts
{ {
Vec4 A; nv::Vector4 A;
Vec4 B; nv::Vector4 B;
}; };
struct IntEndptsRGB struct IntEndptsRGB
@ -76,5 +76,6 @@ struct IntEndptsRGBA_2a
int b_lsb; // lsb for RGB channels of A int b_lsb; // lsb for RGB channels of A
}; };
#endif }
#endif

View File

@ -1,27 +0,0 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#ifndef _RGBA_H
#define _RGBA_H
#define RGBA_MIN 0
#define RGBA_MAX 255 // range of RGBA
class RGBA
{
public:
float r, g, b, a;
RGBA(): r(0), g(0), b(0), a(0){}
RGBA(float r, float g, float b, float a): r(r), g(g), b(b), a(a){}
};
#endif

View File

@ -10,8 +10,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License. See the License for the specific language governing permissions and limitations under the License.
*/ */
#ifndef _SHAPES_THREE_H #ifndef _AVPCL_SHAPES_THREE_H
#define _SHAPES_THREE_H #define _AVPCL_SHAPES_THREE_H
// shapes for 3 regions // shapes for 3 regions

View File

@ -10,8 +10,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License. See the License for the specific language governing permissions and limitations under the License.
*/ */
#ifndef _SHAPES_TWO_H #ifndef _AVPCL_SHAPES_TWO_H
#define _SHAPES_TWO_H #define _AVPCL_SHAPES_TWO_H
// shapes for two regions // shapes for two regions

View File

@ -1,179 +0,0 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
// Quick and dirty Targa file I/O -- doesn't handle compressed format targa files, though.
#include <stdexcept>
#include <iostream>
#include "ImfArray.h"
#include "targa.h"
#include "rgba.h"
Targa::Targa() {}
Targa::~Targa() {}
// read either RGB or RGBA files
static int readTgaHeader(FILE *fp, int& width, int& height, int &bpp, int &origin)
{
unsigned char hdr[18];
if (fread(hdr, sizeof(hdr), 1, fp ) != 1)
return 0;
if (hdr[2] != 2)
return 0;
bpp = hdr[16];
if (bpp != 24 && bpp != 32)
return 0;
int alphabpp = hdr[17] & 0xF;
origin = (hdr[17] >> 4) & 0x3;
if (bpp == 24 && alphabpp != 0)
return 0;
if (bpp == 32 && alphabpp != 8)
return 0;
width = (hdr[13] << 8) | hdr[12];
height = (hdr[15] << 8) | hdr[14];
// skip image ID field
int idsize = hdr[0];
for (; idsize; --idsize)
(void) getc(fp);
return 1;
}
static void read_file(FILE *fp, Imf::Array2D<RGBA>& pixels, int width, int height, int bpp, int origin)
{
pixels.resizeErase(height, width);
// bottom to top order
for (int y = 0; y < height; ++y)
for (int x = 0; x < width; ++x)
{
float b = float(getc(fp));
float g = float(getc(fp));
float r = float(getc(fp));
float a = (bpp == 24) ? RGBA_MAX : float(getc(fp));
int xt, yt;
// transform based on origin
switch (origin)
{
case 0: xt = x; yt = height-1-y; break; // bottom left
case 1: xt = width-1-x; yt = y; break; // bottom right
case 2: xt = x; yt = y; break; // top left
case 3: xt = width-1-x; yt = height-1-y; break; // top right
default: throw "impossible origin value";
}
pixels[yt][xt].a = a;
pixels[yt][xt].r = r;
pixels[yt][xt].g = g;
pixels[yt][xt].b = b;
}
}
void Targa::fileinfo(const std::string& filename, int& width, int& height, bool& const_alpha)
{
int bpp, origin;
FILE *fp = fopen(filename.c_str(), "rb");
if (fp == (FILE *) 0)
throw "Unable to open infile";
if (readTgaHeader(fp, width, height, bpp, origin) == 0)
throw "Invalid or unimplemented format for infile, needs to be a 24 or 32 bit uncompressed TGA file";
if (bpp == 24)
const_alpha = true;
else
{
// even if file is 32bpp the alpha may still be constant. so read file and check
Imf::Array2D<RGBA> pixels;
read_file(fp, pixels, width, height, bpp, origin);
const_alpha = true;
for (int y=0; y<height && const_alpha; ++y)
for (int x=0; x<width && const_alpha; ++x)
if (pixels[y][x].a != 255.0)
const_alpha = false;
}
fclose(fp);
}
void Targa::read(const std::string& filename, Imf::Array2D<RGBA>& pixels, int& width, int& height)
{
int bpp, origin;
FILE *fp = fopen(filename.c_str(), "rb");
if (fp == (FILE *) 0)
throw "Unable to open infile";
if (readTgaHeader(fp, width, height, bpp, origin) == 0)
throw "Invalid or unimplemented format for infile, needs to be a 24 or 32 bit uncompressed TGA file";
read_file(fp, pixels, width, height, bpp, origin);
fclose(fp);
}
void Targa::write(const std::string& filename, const Imf::Array2D<RGBA>& pixels, int width, int height)
{
FILE *fp = fopen(filename.c_str(), "wb");
if (fp == (FILE *) 0)
throw "Unable to open outfile";
unsigned char hdr[18];
// we're lazy, always write this as a 32bpp file, even if the alpha is constant 255
memset(hdr, 0, sizeof(hdr));
hdr[2] = 2;
hdr[12] = width & 0xFF;
hdr[13] = width >> 8;
hdr[14] = height & 0xFF;
hdr[15] = height >> 8;
hdr[16] = 32;
hdr[17] = 0x28;
fwrite( hdr, sizeof(hdr), 1, fp );
// top to bottom order
for (int y = 0; y < height; ++y)
for (int x = 0; x < width; ++x)
{
int a = int((pixels[y][x]).a + 0.5f);
int r = int((pixels[y][x]).r + 0.5f);
int g = int((pixels[y][x]).g + 0.5f);
int b = int((pixels[y][x]).b + 0.5f);
if (b < RGBA_MIN) b = RGBA_MIN; if (b > RGBA_MAX) b = RGBA_MAX; fputc(b, fp);
if (g < RGBA_MIN) g = RGBA_MIN; if (g > RGBA_MAX) g = RGBA_MAX; fputc(g, fp);
if (r < RGBA_MIN) r = RGBA_MIN; if (r > RGBA_MAX) r = RGBA_MAX; fputc(r, fp);
if (a < RGBA_MIN) a = RGBA_MIN; if (a > RGBA_MAX) a = RGBA_MAX; fputc(a, fp);
}
fclose(fp);
}

View File

@ -1,30 +0,0 @@
/*
Copyright 2007 nVidia, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License.
*/
#ifndef _targa_h_
#define _targa_h_
#include "ImfArray.h"
#include "rgba.h"
class Targa
{
public:
Targa();
~Targa();
static void fileinfo( const std::string& filename, int& width, int& height, bool& const_alpha);
static void read( const std::string& filename, Imf::Array2D<RGBA>& pixels, int& width, int& height );
static void write(const std::string& filename, const Imf::Array2D<RGBA>& pixels, int width, int height );
};
#endif /* _targa_h_ */

View File

@ -10,17 +10,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and limitations under the License. See the License for the specific language governing permissions and limitations under the License.
*/ */
#ifndef _TILE_H #ifndef _AVPCL_TILE_H
#define _TILE_H #define _AVPCL_TILE_H
#include "ImfArray.h" #include "nvmath/Vector.h"
#include <math.h> #include <math.h>
#include "arvo/Vec4.h"
#include "utils.h" #include "utils.h"
#include "rgba.h"
using namespace Imf; namespace AVPCL {
using namespace ArvoMath;
// extract a tile of pixels from an array // extract a tile of pixels from an array
@ -30,38 +27,14 @@ public:
static const int TILE_H = 4; static const int TILE_H = 4;
static const int TILE_W = 4; static const int TILE_W = 4;
static const int TILE_TOTAL = TILE_H * TILE_W; static const int TILE_TOTAL = TILE_H * TILE_W;
Vec4 data[TILE_H][TILE_W]; nv::Vector4 data[TILE_H][TILE_W];
int size_x, size_y; // actual size of tile int size_x, size_y; // actual size of tile
Tile() {}; Tile() {};
~Tile(){}; ~Tile(){};
Tile(int xs, int ys) {size_x = xs; size_y = ys;} Tile(int xs, int ys) {size_x = xs; size_y = ys;}
// pixels -> tile
void inline insert(const Array2D<RGBA> &pixels, int x, int y)
{
for (int y0=0; y0<size_y; ++y0)
for (int x0=0; x0<size_x; ++x0)
{
data[y0][x0].X() = (pixels[y+y0][x+x0]).r;
data[y0][x0].Y() = (pixels[y+y0][x+x0]).g;
data[y0][x0].Z() = (pixels[y+y0][x+x0]).b;
data[y0][x0].W() = (pixels[y+y0][x+x0]).a;
}
}
// tile -> pixels
void inline extract(Array2D<RGBA> &pixels, int x, int y)
{
for (int y0=0; y0<size_y; ++y0)
for (int x0=0; x0<size_x; ++x0)
{
pixels[y+y0][x+x0].r = data[y0][x0].X();
pixels[y+y0][x+x0].g = data[y0][x0].Y();
pixels[y+y0][x+x0].b = data[y0][x0].Z();
pixels[y+y0][x+x0].a = data[y0][x0].W();
}
}
}; };
}
#endif #endif

View File

@ -14,22 +14,23 @@ See the License for the specific language governing permissions and limitations
#include "utils.h" #include "utils.h"
#include "avpcl.h" #include "avpcl.h"
#include "nvcore/Debug.h"
#include "nvmath/Vector.inl"
#include <math.h> #include <math.h>
#include <assert.h>
#include "rgba.h"
#include "arvo/Vec3.h"
#include "arvo/Vec4.h"
static int denom7_weights[] = {0, 9, 18, 27, 37, 46, 55, 64}; // divided by 64 using namespace nv;
static int denom15_weights[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}; // divided by 64 using namespace AVPCL;
static const int denom7_weights[] = {0, 9, 18, 27, 37, 46, 55, 64}; // divided by 64
static const int denom15_weights[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64}; // divided by 64
int Utils::lerp(int a, int b, int i, int bias, int denom) int Utils::lerp(int a, int b, int i, int bias, int denom)
{ {
#ifdef USE_ZOH_INTERP #ifdef USE_ZOH_INTERP
assert (denom == 3 || denom == 7 || denom == 15); nvAssert (denom == 3 || denom == 7 || denom == 15);
assert (i >= 0 && i <= denom); nvAssert (i >= 0 && i <= denom);
assert (bias >= 0 && bias <= denom/2); nvAssert (bias >= 0 && bias <= denom/2);
assert (a >= 0 && b >= 0); nvAssert (a >= 0 && b >= 0);
int round = 0; int round = 0;
#ifdef USE_ZOH_INTERP_ROUNDED #ifdef USE_ZOH_INTERP_ROUNDED
@ -41,29 +42,29 @@ int Utils::lerp(int a, int b, int i, int bias, int denom)
case 3: denom *= 5; i *= 5; // fall through to case 15 case 3: denom *= 5; i *= 5; // fall through to case 15
case 15:return (a*denom15_weights[denom-i] + b*denom15_weights[i] + round) >> 6; case 15:return (a*denom15_weights[denom-i] + b*denom15_weights[i] + round) >> 6;
case 7: return (a*denom7_weights[denom-i] + b*denom7_weights[i] + round) >> 6; case 7: return (a*denom7_weights[denom-i] + b*denom7_weights[i] + round) >> 6;
default: assert(0); return 0; default: nvUnreachable(); return 0;
} }
#else #else
return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom)); // simple exact interpolation return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom)); // simple exact interpolation
#endif #endif
} }
Vec4 Utils::lerp(const Vec4& a, const Vec4 &b, int i, int bias, int denom) Vector4 Utils::lerp(Vector4::Arg a, Vector4::Arg b, int i, int bias, int denom)
{ {
#ifdef USE_ZOH_INTERP #ifdef USE_ZOH_INTERP
assert (denom == 3 || denom == 7 || denom == 15); nvAssert (denom == 3 || denom == 7 || denom == 15);
assert (i >= 0 && i <= denom); nvAssert (i >= 0 && i <= denom);
assert (bias >= 0 && bias <= denom/2); nvAssert (bias >= 0 && bias <= denom/2);
// assert (a >= 0 && b >= 0); // nvAssert (a >= 0 && b >= 0);
// no need to bias these as this is an exact division // no need to bias these as this is an exact division
switch (denom) switch (denom)
{ {
case 3: denom *= 5; i *= 5; // fall through to case 15 case 3: denom *= 5; i *= 5; // fall through to case 15
case 15:return (a*denom15_weights[denom-i] + b*denom15_weights[i]) / 64.0; case 15:return (a*float(denom15_weights[denom-i]) + b*float(denom15_weights[i])) / 64.0f;
case 7: return (a*denom7_weights[denom-i] + b*denom7_weights[i]) / 64.0; case 7: return (a*float(denom7_weights[denom-i]) + b*float(denom7_weights[i])) / 64.0f;
default: assert(0); return 0; default: nvUnreachable(); return Vector4(0);
} }
#else #else
return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom)); // simple exact interpolation return (((a)*((denom)-i)+(b)*(i)+(bias))/(denom)); // simple exact interpolation
@ -75,8 +76,7 @@ int Utils::unquantize(int q, int prec)
{ {
int unq; int unq;
assert (prec > 3); // we only want to do one replicate nvAssert (prec > 3); // we only want to do one replicate
assert (RGBA_MIN == 0);
#ifdef USE_ZOH_QUANT #ifdef USE_ZOH_QUANT
if (prec >= 8) if (prec >= 8)
@ -84,9 +84,9 @@ int Utils::unquantize(int q, int prec)
else if (q == 0) else if (q == 0)
unq = 0; unq = 0;
else if (q == ((1<<prec)-1)) else if (q == ((1<<prec)-1))
unq = RGBA_MAX; unq = 255;
else else
unq = (q * (RGBA_MAX+1) + (RGBA_MAX+1)/2) >> prec; unq = (q * 256 + 128) >> prec;
#else #else
// avpcl unquantizer -- bit replicate // avpcl unquantizer -- bit replicate
unq = (q << (8-prec)) | (q >> (2*prec-8)); unq = (q << (8-prec)) | (q >> (2*prec-8));
@ -100,112 +100,111 @@ int Utils::quantize(float value, int prec)
{ {
int q, unq; int q, unq;
assert (prec > 3); // we only want to do one replicate nvAssert (prec > 3); // we only want to do one replicate
assert (RGBA_MIN == 0);
unq = (int)floor(value + 0.5); unq = (int)floor(value + 0.5f);
assert (unq >= RGBA_MIN && unq <= RGBA_MAX); nvAssert (unq <= 255);
#ifdef USE_ZOH_QUANT #ifdef USE_ZOH_QUANT
q = (prec >= 8) ? unq : (unq << prec) / (RGBA_MAX+1); q = (prec >= 8) ? unq : (unq << prec) / 256;
#else #else
// avpcl quantizer -- scale properly for best possible bit-replicated result // avpcl quantizer -- scale properly for best possible bit-replicated result
q = (unq * ((1<<prec)-1) + RGBA_MAX/2)/RGBA_MAX; q = (unq * ((1<<prec)-1) + 127)/255;
#endif #endif
assert (q >= 0 && q < (1 << prec)); nvAssert (q >= 0 && q < (1 << prec));
return q; return q;
} }
double Utils::metric4(const Vec4& a, const Vec4& b) float Utils::metric4(Vector4::Arg a, Vector4::Arg b)
{ {
Vec4 err = a - b; Vector4 err = a - b;
// if nonuniform, select weights and weigh away // if nonuniform, select weights and weigh away
if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
{ {
double rwt, gwt, bwt; float rwt, gwt, bwt;
if (AVPCL::flag_nonuniform) if (AVPCL::flag_nonuniform)
{ {
rwt = 0.299; gwt = 0.587; bwt = 0.114; rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
} }
else if (AVPCL::flag_nonuniform_ati) else if (AVPCL::flag_nonuniform_ati)
{ {
rwt = 0.3086; gwt = 0.6094; bwt = 0.0820; rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
} }
// weigh the components // weigh the components
err.X() *= rwt; err.x *= rwt;
err.Y() *= gwt; err.y *= gwt;
err.Z() *= bwt; err.z *= bwt;
} }
return err * err; return lengthSquared(err);
} }
// WORK -- implement rotatemode for the below -- that changes where the rwt, gwt, and bwt's go. // WORK -- implement rotatemode for the below -- that changes where the rwt, gwt, and bwt's go.
double Utils::metric3(const Vec3& a, const Vec3& b, int rotatemode) float Utils::metric3(Vector3::Arg a, Vector3::Arg b, int rotatemode)
{ {
Vec3 err = a - b; Vector3 err = a - b;
// if nonuniform, select weights and weigh away // if nonuniform, select weights and weigh away
if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
{ {
double rwt, gwt, bwt; float rwt, gwt, bwt;
if (AVPCL::flag_nonuniform) if (AVPCL::flag_nonuniform)
{ {
rwt = 0.299; gwt = 0.587; bwt = 0.114; rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
} }
else if (AVPCL::flag_nonuniform_ati) else if (AVPCL::flag_nonuniform_ati)
{ {
rwt = 0.3086; gwt = 0.6094; bwt = 0.0820; rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
} }
// adjust weights based on rotatemode // adjust weights based on rotatemode
switch(rotatemode) switch(rotatemode)
{ {
case ROTATEMODE_RGBA_RGBA: break; case ROTATEMODE_RGBA_RGBA: break;
case ROTATEMODE_RGBA_AGBR: rwt = 1.0; break; case ROTATEMODE_RGBA_AGBR: rwt = 1.0f; break;
case ROTATEMODE_RGBA_RABG: gwt = 1.0; break; case ROTATEMODE_RGBA_RABG: gwt = 1.0f; break;
case ROTATEMODE_RGBA_RGAB: bwt = 1.0; break; case ROTATEMODE_RGBA_RGAB: bwt = 1.0f; break;
default: assert(0); default: nvUnreachable();
} }
// weigh the components // weigh the components
err.X() *= rwt; err.x *= rwt;
err.Y() *= gwt; err.y *= gwt;
err.Z() *= bwt; err.z *= bwt;
} }
return err * err; return lengthSquared(err);
} }
double Utils::metric1(const float a, const float b, int rotatemode) float Utils::metric1(const float a, const float b, int rotatemode)
{ {
float err = a - b; float err = a - b;
// if nonuniform, select weights and weigh away // if nonuniform, select weights and weigh away
if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
{ {
double rwt, gwt, bwt, awt; float rwt, gwt, bwt, awt;
if (AVPCL::flag_nonuniform) if (AVPCL::flag_nonuniform)
{ {
rwt = 0.299; gwt = 0.587; bwt = 0.114; rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
} }
else if (AVPCL::flag_nonuniform_ati) else if (AVPCL::flag_nonuniform_ati)
{ {
rwt = 0.3086; gwt = 0.6094; bwt = 0.0820; rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
} }
// adjust weights based on rotatemode // adjust weights based on rotatemode
switch(rotatemode) switch(rotatemode)
{ {
case ROTATEMODE_RGBA_RGBA: awt = 1.0; break; case ROTATEMODE_RGBA_RGBA: awt = 1.0f; break;
case ROTATEMODE_RGBA_AGBR: awt = rwt; break; case ROTATEMODE_RGBA_AGBR: awt = rwt; break;
case ROTATEMODE_RGBA_RABG: awt = gwt; break; case ROTATEMODE_RGBA_RABG: awt = gwt; break;
case ROTATEMODE_RGBA_RGAB: awt = bwt; break; case ROTATEMODE_RGBA_RGAB: awt = bwt; break;
default: assert(0); default: nvUnreachable();
} }
// weigh the components // weigh the components
@ -218,169 +217,169 @@ double Utils::metric1(const float a, const float b, int rotatemode)
float Utils::premult(float r, float a) float Utils::premult(float r, float a)
{ {
// note that the args are really integers stored in floats // note that the args are really integers stored in floats
int R = r, A = a; int R = int(r), A = int(a);
assert ((R==r) && (A==a)); nvAssert ((R==r) && (A==a));
return float((R*A + RGBA_MAX/2)/RGBA_MAX); return float((R*A + 127)/255);
} }
static void premult4(Vec4& rgba) static void premult4(Vector4& rgba)
{ {
rgba.X() = Utils::premult(rgba.X(), rgba.W()); rgba.x = Utils::premult(rgba.x, rgba.w);
rgba.Y() = Utils::premult(rgba.Y(), rgba.W()); rgba.y = Utils::premult(rgba.y, rgba.w);
rgba.Z() = Utils::premult(rgba.Z(), rgba.W()); rgba.z = Utils::premult(rgba.z, rgba.w);
} }
static void premult3(Vec3& rgb, float a) static void premult3(Vector3& rgb, float a)
{ {
rgb.X() = Utils::premult(rgb.X(), a); rgb.x = Utils::premult(rgb.x, a);
rgb.Y() = Utils::premult(rgb.Y(), a); rgb.y = Utils::premult(rgb.y, a);
rgb.Z() = Utils::premult(rgb.Z(), a); rgb.z = Utils::premult(rgb.z, a);
} }
double Utils::metric4premult(const Vec4& a, const Vec4& b) float Utils::metric4premult(Vector4::Arg a, Vector4::Arg b)
{ {
Vec4 pma = a, pmb = b; Vector4 pma = a, pmb = b;
premult4(pma); premult4(pma);
premult4(pmb); premult4(pmb);
Vec4 err = pma - pmb; Vector4 err = pma - pmb;
// if nonuniform, select weights and weigh away // if nonuniform, select weights and weigh away
if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
{ {
double rwt, gwt, bwt; float rwt, gwt, bwt;
if (AVPCL::flag_nonuniform) if (AVPCL::flag_nonuniform)
{ {
rwt = 0.299; gwt = 0.587; bwt = 0.114; rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
} }
else if (AVPCL::flag_nonuniform_ati) else if (AVPCL::flag_nonuniform_ati)
{ {
rwt = 0.3086; gwt = 0.6094; bwt = 0.0820; rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
} }
// weigh the components // weigh the components
err.X() *= rwt; err.x *= rwt;
err.Y() *= gwt; err.y *= gwt;
err.Z() *= bwt; err.z *= bwt;
} }
return err * err; return lengthSquared(err);
} }
double Utils::metric3premult_alphaout(const Vec3& rgb0, float a0, const Vec3& rgb1, float a1) float Utils::metric3premult_alphaout(Vector3::Arg rgb0, float a0, Vector3::Arg rgb1, float a1)
{ {
Vec3 pma = rgb0, pmb = rgb1; Vector3 pma = rgb0, pmb = rgb1;
premult3(pma, a0); premult3(pma, a0);
premult3(pmb, a1); premult3(pmb, a1);
Vec3 err = pma - pmb; Vector3 err = pma - pmb;
// if nonuniform, select weights and weigh away // if nonuniform, select weights and weigh away
if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
{ {
double rwt, gwt, bwt; float rwt, gwt, bwt;
if (AVPCL::flag_nonuniform) if (AVPCL::flag_nonuniform)
{ {
rwt = 0.299; gwt = 0.587; bwt = 0.114; rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
} }
else if (AVPCL::flag_nonuniform_ati) else if (AVPCL::flag_nonuniform_ati)
{ {
rwt = 0.3086; gwt = 0.6094; bwt = 0.0820; rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
} }
// weigh the components // weigh the components
err.X() *= rwt; err.x *= rwt;
err.Y() *= gwt; err.y *= gwt;
err.Z() *= bwt; err.z *= bwt;
} }
return err * err; return lengthSquared(err);
} }
double Utils::metric3premult_alphain(const Vec3& rgb0, const Vec3& rgb1, int rotatemode) float Utils::metric3premult_alphain(Vector3::Arg rgb0, Vector3::Arg rgb1, int rotatemode)
{ {
Vec3 pma = rgb0, pmb = rgb1; Vector3 pma = rgb0, pmb = rgb1;
switch(rotatemode) switch(rotatemode)
{ {
case ROTATEMODE_RGBA_RGBA: case ROTATEMODE_RGBA_RGBA:
// this function isn't supposed to be called for this rotatemode // this function isn't supposed to be called for this rotatemode
assert(0); nvUnreachable();
break; break;
case ROTATEMODE_RGBA_AGBR: case ROTATEMODE_RGBA_AGBR:
pma.Y() = premult(pma.Y(), pma.X()); pma.y = premult(pma.y, pma.x);
pma.Z() = premult(pma.Z(), pma.X()); pma.z = premult(pma.z, pma.x);
pmb.Y() = premult(pmb.Y(), pmb.X()); pmb.y = premult(pmb.y, pmb.x);
pmb.Z() = premult(pmb.Z(), pmb.X()); pmb.z = premult(pmb.z, pmb.x);
break; break;
case ROTATEMODE_RGBA_RABG: case ROTATEMODE_RGBA_RABG:
pma.X() = premult(pma.X(), pma.Y()); pma.x = premult(pma.x, pma.y);
pma.Z() = premult(pma.Z(), pma.Y()); pma.z = premult(pma.z, pma.y);
pmb.X() = premult(pmb.X(), pmb.Y()); pmb.x = premult(pmb.x, pmb.y);
pmb.Z() = premult(pmb.Z(), pmb.Y()); pmb.z = premult(pmb.z, pmb.y);
break; break;
case ROTATEMODE_RGBA_RGAB: case ROTATEMODE_RGBA_RGAB:
pma.X() = premult(pma.X(), pma.Z()); pma.x = premult(pma.x, pma.z);
pma.Y() = premult(pma.Y(), pma.Z()); pma.y = premult(pma.y, pma.z);
pmb.X() = premult(pmb.X(), pmb.Z()); pmb.x = premult(pmb.x, pmb.z);
pmb.Y() = premult(pmb.Y(), pmb.Z()); pmb.y = premult(pmb.y, pmb.z);
break; break;
default: assert(0); default: nvUnreachable();
} }
Vec3 err = pma - pmb; Vector3 err = pma - pmb;
// if nonuniform, select weights and weigh away // if nonuniform, select weights and weigh away
if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
{ {
double rwt, gwt, bwt; float rwt, gwt, bwt;
if (AVPCL::flag_nonuniform) if (AVPCL::flag_nonuniform)
{ {
rwt = 0.299; gwt = 0.587; bwt = 0.114; rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
} }
else if (AVPCL::flag_nonuniform_ati) else if (AVPCL::flag_nonuniform_ati)
{ {
rwt = 0.3086; gwt = 0.6094; bwt = 0.0820; rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
} }
// weigh the components // weigh the components
err.X() *= rwt; err.x *= rwt;
err.Y() *= gwt; err.y *= gwt;
err.Z() *= bwt; err.z *= bwt;
} }
return err * err; return lengthSquared(err);
} }
double Utils::metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode) float Utils::metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode)
{ {
float err = premult(rgb0, a0) - premult(rgb1, a1); float err = premult(rgb0, a0) - premult(rgb1, a1);
// if nonuniform, select weights and weigh away // if nonuniform, select weights and weigh away
if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati) if (AVPCL::flag_nonuniform || AVPCL::flag_nonuniform_ati)
{ {
double rwt, gwt, bwt, awt; float rwt, gwt, bwt, awt;
if (AVPCL::flag_nonuniform) if (AVPCL::flag_nonuniform)
{ {
rwt = 0.299; gwt = 0.587; bwt = 0.114; rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
} }
else if (AVPCL::flag_nonuniform_ati) else if (AVPCL::flag_nonuniform_ati)
{ {
rwt = 0.3086; gwt = 0.6094; bwt = 0.0820; rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
} }
// adjust weights based on rotatemode // adjust weights based on rotatemode
switch(rotatemode) switch(rotatemode)
{ {
case ROTATEMODE_RGBA_RGBA: awt = 1.0; break; case ROTATEMODE_RGBA_RGBA: awt = 1.0f; break;
case ROTATEMODE_RGBA_AGBR: awt = rwt; break; case ROTATEMODE_RGBA_AGBR: awt = rwt; break;
case ROTATEMODE_RGBA_RABG: awt = gwt; break; case ROTATEMODE_RGBA_RABG: awt = gwt; break;
case ROTATEMODE_RGBA_RGAB: awt = bwt; break; case ROTATEMODE_RGBA_RGAB: awt = bwt; break;
default: assert(0); default: nvUnreachable();
} }
// weigh the components // weigh the components

View File

@ -11,49 +11,39 @@ See the License for the specific language governing permissions and limitations
*/ */
// utility class holding common routines // utility class holding common routines
#ifndef _UTILS_H #ifndef _AVPCL_UTILS_H
#define _UTILS_H #define _AVPCL_UTILS_H
#include "arvo/Vec4.h" #include "nvmath/Vector.h"
using namespace ArvoMath; namespace AVPCL {
#ifndef MIN inline int SIGN_EXTEND(int x, int nb) { return ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x)); }
#define MIN(x,y) ((x)<(y)?(x):(y))
#endif
#ifndef MAX static const int INDEXMODE_BITS = 1; // 2 different index modes
#define MAX(x,y) ((x)>(y)?(x):(y)) static const int NINDEXMODES = (1<<(INDEXMODE_BITS));
#endif static const int INDEXMODE_ALPHA_IS_3BITS = 0;
static const int INDEXMODE_ALPHA_IS_2BITS = 1;
#define PALETTE_LERP(a, b, i, bias, denom) Utils::lerp(a, b, i, bias, denom) static const int ROTATEMODE_BITS = 2; // 4 different rotate modes
static const int NROTATEMODES = (1<<(ROTATEMODE_BITS));
#define SIGN_EXTEND(x,nb) ((((x)&(1<<((nb)-1)))?((~0)<<(nb)):0)|(x)) static const int ROTATEMODE_RGBA_RGBA = 0;
static const int ROTATEMODE_RGBA_AGBR = 1;
#define INDEXMODE_BITS 1 // 2 different index modes static const int ROTATEMODE_RGBA_RABG = 2;
#define NINDEXMODES (1<<(INDEXMODE_BITS)) static const int ROTATEMODE_RGBA_RGAB = 3;
#define INDEXMODE_ALPHA_IS_3BITS 0
#define INDEXMODE_ALPHA_IS_2BITS 1
#define ROTATEMODE_BITS 2 // 4 different rotate modes
#define NROTATEMODES (1<<(ROTATEMODE_BITS))
#define ROTATEMODE_RGBA_RGBA 0
#define ROTATEMODE_RGBA_AGBR 1
#define ROTATEMODE_RGBA_RABG 2
#define ROTATEMODE_RGBA_RGAB 3
class Utils class Utils
{ {
public: public:
// error metrics // error metrics
static double metric4(const Vec4& a, const Vec4& b); static float metric4(nv::Vector4::Arg a, nv::Vector4::Arg b);
static double metric3(const Vec3& a, const Vec3& b, int rotatemode); static float metric3(nv::Vector3::Arg a, nv::Vector3::Arg b, int rotatemode);
static double metric1(float a, float b, int rotatemode); static float metric1(float a, float b, int rotatemode);
static double metric4premult(const Vec4& rgba0, const Vec4& rgba1); static float metric4premult(nv::Vector4::Arg rgba0, nv::Vector4::Arg rgba1);
static double metric3premult_alphaout(const Vec3& rgb0, float a0, const Vec3& rgb1, float a1); static float metric3premult_alphaout(nv::Vector3::Arg rgb0, float a0, nv::Vector3::Arg rgb1, float a1);
static double metric3premult_alphain(const Vec3& rgb0, const Vec3& rgb1, int rotatemode); static float metric3premult_alphain(nv::Vector3::Arg rgb0, nv::Vector3::Arg rgb1, int rotatemode);
static double metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode); static float metric1premult(float rgb0, float a0, float rgb1, float a1, int rotatemode);
static float Utils::premult(float r, float a); static float Utils::premult(float r, float a);
@ -63,7 +53,9 @@ public:
// lerping // lerping
static int lerp(int a, int b, int i, int bias, int denom); static int lerp(int a, int b, int i, int bias, int denom);
static Vec4 lerp(const Vec4& a, const Vec4 &b, int i, int bias, int denom); static nv::Vector4 lerp(nv::Vector4::Arg a, nv::Vector4::Arg b, int i, int bias, int denom);
}; };
}
#endif #endif

View File

@ -102,7 +102,7 @@ namespace nvtt
Format_CTX1, // Not supported on CPU yet. Format_CTX1, // Not supported on CPU yet.
Format_BC6, Format_BC6,
Format_BC7, // Not supported yet. Format_BC7,
Format_DXT1_Luma, Format_DXT1_Luma,
}; };

View File

@ -188,6 +188,7 @@ enum Mode {
Mode_BC5_Normal_Quartic, Mode_BC5_Normal_Quartic,
//Mode_BC5_Normal_DualParaboloid, //Mode_BC5_Normal_DualParaboloid,
Mode_BC6, Mode_BC6,
Mode_BC7,
Mode_Count Mode_Count
}; };
static const char * s_modeNames[] = { static const char * s_modeNames[] = {
@ -207,6 +208,7 @@ static const char * s_modeNames[] = {
"BC5-Normal-Quartic", // Mode_BC5_Normal_Quartic, "BC5-Normal-Quartic", // Mode_BC5_Normal_Quartic,
//"BC5-Normal-DualParaboloid", // Mode_BC5_Normal_DualParaboloid, //"BC5-Normal-DualParaboloid", // Mode_BC5_Normal_DualParaboloid,
"BC6", // Mode_BC6, "BC6", // Mode_BC6,
"BC7", // Mode_BC7,
}; };
nvStaticCheck(NV_ARRAY_SIZE(s_modeNames) == Mode_Count); nvStaticCheck(NV_ARRAY_SIZE(s_modeNames) == Mode_Count);
@ -216,14 +218,14 @@ struct Test {
Mode modes[6]; Mode modes[6];
}; };
static Test s_imageTests[] = { static Test s_imageTests[] = {
{"Color", 4, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, Mode_BC6, /*Mode_BC3_LUVW*/}}, {"Color", 3, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, /*Mode_BC3_LUVW*/}},
{"Alpha", 3, {Mode_BC1_Alpha, Mode_BC2_Alpha, Mode_BC3_Alpha}}, {"Alpha", 3, {Mode_BC1_Alpha, Mode_BC2_Alpha, Mode_BC3_Alpha}},
//{"Normal", 3, {Mode_BC1_Normal, Mode_BC3_Normal, Mode_BC5_Normal}}, //{"Normal", 3, {Mode_BC1_Normal, Mode_BC3_Normal, Mode_BC5_Normal}},
{"Normal", 4, {Mode_BC5_Normal, Mode_BC5_Normal_Stereographic, Mode_BC5_Normal_Paraboloid, Mode_BC5_Normal_Quartic}}, {"Normal", 4, {Mode_BC5_Normal, Mode_BC5_Normal_Stereographic, Mode_BC5_Normal_Paraboloid, Mode_BC5_Normal_Quartic}},
{"Lightmap", 4, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, Mode_BC3_RGBS}}, {"Lightmap", 4, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, Mode_BC3_RGBS}},
{"HDR", 2, {Mode_BC3_RGBM, Mode_BC6}}, {"HDR", 2, {Mode_BC3_RGBM, Mode_BC6}},
//{"BC6", 1, {Mode_BC6}}, // temporary mode for testing {"BC6", 1, {Mode_BC6}},
//{"BC7", 1, {Mode_BC7}}, // temporary mode for testing {"BC7", 1, {Mode_BC7}},
}; };
const int s_imageTestCount = ARRAY_SIZE(s_imageTests); const int s_imageTestCount = ARRAY_SIZE(s_imageTests);
@ -616,6 +618,10 @@ int main(int argc, char *argv[])
{ {
format = nvtt::Format_BC6; format = nvtt::Format_BC6;
} }
else if (mode == Mode_BC7)
{
format = nvtt::Format_BC7;
}
else else
{ {
nvDebugCheck(false); nvDebugCheck(false);

View File

@ -265,11 +265,10 @@ int main(int argc, char *argv[])
{ {
format = nvtt::Format_BC6; format = nvtt::Format_BC6;
} }
// !!!UNDONE: add BC7 support else if (strcmp("-bc7", argv[i]) == 0)
/*else if (strcmp("-bc7", argv[i]) == 0)
{ {
format = nvtt::Format_BC7; format = nvtt::Format_BC7;
}*/ }
// Undocumented option. Mainly used for testing. // Undocumented option. Mainly used for testing.
else if (strcmp("-ext", argv[i]) == 0) else if (strcmp("-ext", argv[i]) == 0)
@ -355,7 +354,7 @@ int main(int argc, char *argv[])
printf(" -bc4 \tBC4 format (ATI1)\n"); printf(" -bc4 \tBC4 format (ATI1)\n");
printf(" -bc5 \tBC5 format (3Dc/ATI2)\n"); printf(" -bc5 \tBC5 format (3Dc/ATI2)\n");
printf(" -bc6 \tBC6 format\n"); printf(" -bc6 \tBC6 format\n");
//printf(" -bc7 \tBC7 format\n\n"); printf(" -bc7 \tBC7 format\n\n");
printf("Output options:\n"); printf("Output options:\n");
printf(" -silent \tDo not output progress messages\n"); printf(" -silent \tDo not output progress messages\n");