120 + fps时有数百万个精灵

图片

如果您在DOTS 论坛上徘徊,您可以在此处找到有关作者如何编写能够渲染一百万个动画精灵的库的类似帖子,但仍然只能获得60fps。我创建了自己的DOTS Sprite渲染器,对我们的游戏来说足够好了,但是不能应付一百万个。我很好奇

因此,我分叉了知识库,并决定检查它是否可以在学术界使用。我对他做了一点实验,看着他如何渲染一个精灵,一百个,然后数千个。事实证明,他还没有准备好在我们的游戏中使用。它缺少某些方面,例如,从后到前对精灵进行排序。我试图写一个这个功能的hack。当我阅读代码时,我意识到可能值得编写一个可以使用的全新库。我只需要弄清楚它如何渲染精灵,但是我已经理解了原理。

基础


如果我想重新创建这种渲染技术,那么我需要做最简单的事情:渲染一个单独的精灵。该库使用ComputeBuffers。他们必须使用计算着色器将计算转移到GPU。我不知道在屏幕上渲染某些东西的常规着色器中可以使用什么。您可以将它们感知为可以分配给材质的数字数组,然后着色器访问这些材质。因此,您可以根据需要传输数据,例如位置,旋转,比例,uv坐标,颜色。下面是基于这个很棒的库修改的着色器:

  Shader "Instanced/ComputeBufferSprite" {
    Properties {
        _MainTex ("Albedo (RGB)", 2D) = "white" {}
    }
    
    SubShader {
        Tags{
            "Queue"="Transparent"
            "IgnoreProjector"="True"
            "RenderType"="Transparent"
        }
        Cull Back
        Lighting Off
        ZWrite On
        Blend One OneMinusSrcAlpha
        Pass {
            CGPROGRAM
            // Upgrade NOTE: excluded shader from OpenGL ES 2.0 because it uses non-square matrices
            #pragma exclude_renderers gles

            #pragma vertex vert
            #pragma fragment frag
            #pragma target 4.5

            #include "UnityCG.cginc"

            sampler2D _MainTex;

            // xy for position, z for rotation, and w for scale
            StructuredBuffer<float4> transformBuffer;

            // xy is the uv size, zw is the uv offset/coordinate
            StructuredBuffer<float4> uvBuffer; 

	        StructuredBuffer<float4> colorsBuffer;

            struct v2f{
                float4 pos : SV_POSITION;
                float2 uv: TEXCOORD0;
		        fixed4 color : COLOR0;
            };

            float4x4 rotationZMatrix(float zRotRadians) {
                float c = cos(zRotRadians);
                float s = sin(zRotRadians);
                float4x4 ZMatrix  = 
                    float4x4( 
                       c,  -s, 0,  0,a
                       s,  c,  0,  0,
                       0,  0,  1,  0,
                       0,  0,  0,  1);
                return ZMatrix;
            }

            v2f vert (appdata_full v, uint instanceID : SV_InstanceID) {
                float4 transform = transformBuffer[instanceID];
                float4 uv = uvBuffer[instanceID];
                
                //rotate the vertex
                v.vertex = mul(v.vertex - float4(0.5, 0.5, 0,0), rotationZMatrix(transform.z));
                
                //scale it
                float3 worldPosition = float3(transform.x, transform.y, -transform.y/10) + (v.vertex.xyz * transform.w);
                
                v2f o;
                o.pos = UnityObjectToClipPos(float4(worldPosition, 1.0f));
                
                // XY here is the dimension (width, height). 
                // ZW is the offset in the texture (the actual UV coordinates)
                o.uv =  v.texcoord * uv.xy + uv.zw;
                
		        o.color = colorsBuffer[instanceID];
                return o;
            }

            fixed4 frag (v2f i) : SV_Target{
                fixed4 col = tex2D(_MainTex, i.uv) * i.color;
				clip(col.a - 1.0 / 255.0);
                col.rgb *= col.a;

				return col;
            }

            ENDCG
        }
    }
}

变量transformBuffer,uvBuffer和colorsBuffer是我们在代码中使用ComputeBuffers定义的“数组”。这就是我们现在(现在)渲染精灵的全部。这是用于渲染单个精灵的MonoBehaviour脚本:

public class ComputeBufferBasic : MonoBehaviour {
    [SerializeField]
    private Material material;

    private Mesh mesh;
    
    // Transform here is a compressed transform information
    // xy is the position, z is rotation, w is the scale
    private ComputeBuffer transformBuffer;
    
    // uvBuffer contains float4 values in which xy is the uv dimension and zw is the texture offset
    private ComputeBuffer uvBuffer;
    private ComputeBuffer colorBuffer;

    private readonly uint[] args = {
        6, 1, 0, 0, 0
    };
    
    private ComputeBuffer argsBuffer;

    private void Awake() {
        this.mesh = CreateQuad();
        
        this.transformBuffer = new ComputeBuffer(1, 16);
        float scale = 0.2f;
        this.transformBuffer.SetData(new float4[]{ new float4(0, 0, 0, scale) });
        int matrixBufferId = Shader.PropertyToID("transformBuffer");
        this.material.SetBuffer(matrixBufferId, this.transformBuffer);
        
        this.uvBuffer = new ComputeBuffer(1, 16);
        this.uvBuffer.SetData(new float4[]{ new float4(0.25f, 0.25f, 0, 0) });
        int uvBufferId = Shader.PropertyToID("uvBuffer");
        this.material.SetBuffer(uvBufferId, this.uvBuffer);
        
        this.colorBuffer = new ComputeBuffer(1, 16);
        this.colorBuffer.SetData(new float4[]{ new float4(1, 1, 1, 1) });
        int colorsBufferId = Shader.PropertyToID("colorsBuffer");
        this.material.SetBuffer(colorsBufferId, this.colorBuffer);

        this.argsBuffer = new ComputeBuffer(1, this.args.Length * sizeof(uint), ComputeBufferType.IndirectArguments);
        this.argsBuffer.SetData(this.args);
    }

    private static readonly Bounds BOUNDS = new Bounds(Vector2.zero, Vector3.one);

    private void Update() {   
        // Draw
        Graphics.DrawMeshInstancedIndirect(this.mesh, 0, this.material, BOUNDS, this.argsBuffer);
    }
    
    // This can be refactored to a utility class
    // Just added it here for the article
    private static Mesh CreateQuad() {
        Mesh mesh = new Mesh();
        Vector3[] vertices = new Vector3[4];
        vertices[0] = new Vector3(0, 0, 0);
        vertices[1] = new Vector3(1, 0, 0);
        vertices[2] = new Vector3(0, 1, 0);
        vertices[3] = new Vector3(1, 1, 0);
        mesh.vertices = vertices;

        int[] tri = new int[6];
        tri[0] = 0;
        tri[1] = 2;
        tri[2] = 1;
        tri[3] = 2;
        tri[4] = 3;
        tri[5] = 1;
        mesh.triangles = tri;

        Vector3[] normals = new Vector3[4];
        normals[0] = -Vector3.forward;
        normals[1] = -Vector3.forward;
        normals[2] = -Vector3.forward;
        normals[3] = -Vector3.forward;
        mesh.normals = normals;

        Vector2[] uv = new Vector2[4];
        uv[0] = new Vector2(0, 0);
        uv[1] = new Vector2(1, 0);
        uv[2] = new Vector2(0, 1);
        uv[3] = new Vector2(1, 1);
        mesh.uv = uv;

        return mesh;
    }
}

让我们按顺序处理此代码。对于材质,我们需要创建一种新材质,然后为其设置上述着色器。给它分配一个纹理/精灵表。我使用库中的Sprite工作表,这是一个4x4 Sprite表情符号图标。


这里的网格是由CreateQuad()创建的网格。它只是由两个三角形组成的四边形。接下来是三个ComputeBuffer变量,稍后我们将为其定义材料。我用与着色器中的StructuredBuffer变量相同的方式命名它们。这不是必需的,但是更方便。

args和argsBuffer变量将用于调用Graphics.DrawMeshInstancedIndirect()。文档在这里。一个函数需要一个具有五个uint值的缓冲区。在我们的情况下,只有前两个很重要。第一个是索引的数量,对于我们的四边形,它是6。第二个是四边形将被渲染的次数,即1。我也将其表示为着色器用来索引StructuredBuffer的最大值。像那样:

for(int i = 0; i < count; ++i) {
    CallShaderUsingThisIndexForBuffers(i);
}

Awake()方法只是为材料分配准备ComputeBuffers。我们以0.2f的比例且不旋转的方式在点(0,0)上渲染精灵。对于UV,我们使用左下角的精灵(亲吻表情符号)。然后我们分配白色。args数组设置为argsBuffer。

在Update()中,我们仅调用Graphics.DrawMeshInstancedIndirect()。(我还不太了解如何在这里使用BOUNDS并将其从库中复制出来。)

最后的步骤将是使用正交相机准备一个场景。创建另一个GameObject并添加ComputeBufferBasic组件。让我们使用刚刚显示的着色器为其设置材质。在启动时,我们得到以下信息:


哦耶!使用ComputeBuffer渲染的精灵。

如果可以做一个,就可以做很多


既然我们已经学会了如何使用ComputeBuffers渲染一个精灵,我们可以画很多东西。这是我创建的另一个脚本,该脚本具有数量参数,并以随机的位置,比例,旋转和颜色渲染指定数量的精灵:

public class ComputeBufferMultipleSprites : MonoBehaviour {
    [SerializeField]
    private Material material;
    
    [SerializeField]
    private float minScale = 0.15f;
    
    [SerializeField]
    private float maxScale = 0.2f;  

    [SerializeField]
    private int count;

    private Mesh mesh;
    
    // Matrix here is a compressed transform information
    // xy is the position, z is rotation, w is the scale
    private ComputeBuffer transformBuffer;
    
    // uvBuffer contains float4 values in which xy is the uv dimension and zw is the texture offset
    private ComputeBuffer uvBuffer;
    private ComputeBuffer colorBuffer;

    private uint[] args;
    
    private ComputeBuffer argsBuffer;

    private void Awake() {
        QualitySettings.vSyncCount = 0;
        Application.targetFrameRate = -1;
        
        this.mesh = CreateQuad();
        
        // Prepare values
        float4[] transforms = new float4[this.count];
        float4[] uvs = new float4[this.count];
        float4[] colors = new float4[this.count];

        const float maxRotation = Mathf.PI * 2;
        for (int i = 0; i < this.count; ++i) {
            // transform
            float x = UnityEngine.Random.Range(-8f, 8f);
            float y = UnityEngine.Random.Range(-4.0f, 4.0f);
            float rotation = UnityEngine.Random.Range(0, maxRotation);
            float scale = UnityEngine.Random.Range(this.minScale, this.maxScale);
            transforms[i] = new float4(x, y, rotation, scale);
            
            // UV
            float u = UnityEngine.Random.Range(0, 4) * 0.25f;
            float v = UnityEngine.Random.Range(0, 4) * 0.25f;
            uvs[i] = new float4(0.25f, 0.25f, u, v);
            
            // color
            float r = UnityEngine.Random.Range(0f, 1.0f);
            float g = UnityEngine.Random.Range(0f, 1.0f);
            float b = UnityEngine.Random.Range(0f, 1.0f);
            colors[i] = new float4(r, g, b, 1.0f);
        }
        
        this.transformBuffer = new ComputeBuffer(this.count, 16);
        this.transformBuffer.SetData(transforms);
        int matrixBufferId = Shader.PropertyToID("transformBuffer");
        this.material.SetBuffer(matrixBufferId, this.transformBuffer);
        
        this.uvBuffer = new ComputeBuffer(this.count, 16);
        this.uvBuffer.SetData(uvs);
        int uvBufferId = Shader.PropertyToID("uvBuffer");
        this.material.SetBuffer(uvBufferId, this.uvBuffer);
        
        this.colorBuffer = new ComputeBuffer(this.count, 16);
        this.colorBuffer.SetData(colors);
        int colorsBufferId = Shader.PropertyToID("colorsBuffer");
        this.material.SetBuffer(colorsBufferId, this.colorBuffer);

        this.args = new uint[] {
            6, (uint)this.count, 0, 0, 0
        };
        this.argsBuffer = new ComputeBuffer(1, this.args.Length * sizeof(uint), ComputeBufferType.IndirectArguments);
        this.argsBuffer.SetData(this.args);
    }

    private static readonly Bounds BOUNDS = new Bounds(Vector2.zero, Vector3.one);

    private void Update() {   
        // Draw
        Graphics.DrawMeshInstancedIndirect(this.mesh, 0, this.material, BOUNDS, this.argsBuffer);
    }

    private static Mesh CreateQuad() {
        // Just the same as previous code. I told you this can be refactored.
    }
}

与渲染单个精灵相比,几乎没有任何变化。不同之处在于,现在我们正在准备具有序列化变量count指定的X内容的数组我们还在args数组中设置了第二个数字,将其设置为count

使用此脚本,我们可以将count设置为任何值,它将生成指定数量的sprite,但是它将仅在一个draw调用中呈现它们。


这是10,000个随机精灵。

为什么将minScale和maxScale序列化变量?当我用600,000个精灵测试代码时,我注意到速度降至60fps以下。如果源库具有一百万的能力,那么此代码为什么会失败?


这是600,000个精灵。它工作缓慢。

我建议这可能是由于重新绘制。因此,我制作了minScale和maxScale序列化参数,并设置了较小的数字,如0.01和0.02。直到那时,我才能够以超过60fps的速度(由编辑器的分析器判断)重新创建一百万个精灵。也许该代码具有更多功能,但谁需要一百万个精灵?在我们的游戏中,不需要这个数字的四分之一。


一百万个小精灵。

探查器


因此,我想看看这段代码如何在测试版本中工作。我的车子的特点:3.7 GHz(4核),16 GB RAM,Radeon RX460。这是我得到的:


如您所见,一切都很快。对Graphics.DrawMeshInstancedIndirect()的调用显示为0毫秒。尽管我不太确定是否要担心Gfx.PresentFrame。


没那么快


尽管结果令人印象深刻,但在实际游戏中,代码将以不同的方式使用。最重要的缺失方面是子画面的排序。并且它将占用大部分CPU资源。此外,对于移动的精灵,将需要在每帧中更新ComputeBuffers。还有很多工作要做。我并不期望在一个实际的工作框架中可以达到一百万,但是如果我在不到2毫秒的时间内达到300,000,那么这对我来说已经足够了。DOTS肯定会对此有所帮助,但这是另一篇文章的主题。

All Articles