OpenGL 使用合批优化渲染 Draw Call 过程~~

音视频开发进阶 2021-11-22 08:32

收录于合集

#opengl4个

#OpenGL 学习专题36个

什么是Draw Call

在渲染物体之前，物体模型顶点数据保存在内存中，CPU通过向GPU发送渲染指令后，数据会复制到显存中，然后进行渲染。

在这个过程中，CPU向GPU发送渲染指令的过程，名为Draw Call。

OpenGL中的渲染指令是指：glDrawArrays(GL_TRIANGLES, 0, amount_of_vertices);函数

什么是批处理

当我们在渲染一个场景时，该场景中包含非常多的简单模型，比如星星，草等，这些模型的顶点数据极少，但是每次单独渲染一个简单模型时都会调用一次Draw Call，渲染的速度很快。

但是发送指令的过程是很慢的（CPU告诉GPU该从哪个缓冲读取数据，从哪寻找顶点属性，而且这些都是在相对缓慢的CPU到GPU总线(CPU to GPU Bus)上进行的），这就会使渲染整个场景的速度变得非常慢，传统的方案就是：

for(unsigned int i = 0; i < amount_of_models_to_draw; i++)
{
    DoSomePreparations(); // 绑定VAO，绑定纹理，设置uniform等
    glDrawArrays(GL_TRIANGLES, 0, amount_of_vertices);
}

但如果我们能将这些星星或者草一次性全部发给GPU去渲染，速度就会非常快，这就是批处理。

批处理能节省Draw Call的数量，极大提升渲染速度，因此在Unity中，批处理的作用非常大

使用批处理

使用批处理的前提是多个物体使用同一个shader做渲染，在Unity中，就是指使用相同的材质球。

在OpenGL中使用批处理，只需要将glDrawArrays和glDrawElements的渲染调用分别改为glDrawArraysInstanced和glDrawElementsInstanced就可以了。

这些渲染函数的实例化版本需要一个额外的参数，叫做实例数量(Instance Count)

这个函数本身并没有什么用。渲染同一个物体一千次对我们并没有什么用处，每个物体都是完全相同的，而且还在同一个位置。我们只能看见一个物体！

出于这个原因，GLSL在顶点着色器中嵌入了另一个内建变量，gl_InstanceID。

在使用实例化渲染调用时，gl_InstanceID会从0开始，在每个实例被渲染时递增1。

比如说，我们正在渲染第43个实例，那么顶点着色器中它的gl_InstanceID将会是42。

因为每个实例都有唯一的ID，我们可以建立一个数组，将ID与位置值对应起来，将每个实例放置在世界的不同位置。

使用 gl_InstanceID 对多个物体做偏移处理

物体顶点数据

float quadVertices[] = {
    // 位置          // 颜色
    -0.05f,  0.05f,  1.0f, 0.0f, 0.0f,
     0.05f, -0.05f,  0.0f, 1.0f, 0.0f,
    -0.05f, -0.05f,  0.0f, 0.0f, 1.0f,

    -0.05f,  0.05f,  1.0f, 0.0f, 0.0f,
     0.05f, -0.05f,  0.0f, 1.0f, 0.0f,   
     0.05f,  0.05f,  0.0f, 1.0f, 1.0f                   
};

顶点着色器：

#version 330 core
layout (location = 0) in vec2 aPos;
layout (location = 1) in vec3 aColor;

out vec3 fColor;

uniform vec2 offsets[100];

void main()
{
    vec2 offset = offsets[gl_InstanceID];
    gl_Position = vec4(aPos + offset, 0.0, 1.0);
    fColor = aColor;
}

片元着色器：

#version 330 core
out vec4 FragColor;

in vec3 fColor;

void main()
{
    FragColor = vec4(fColor, 1.0);
}

绘制：

glBindVertexArray(quadVAO);
//绘制100个相同的图形
glDrawArraysInstanced(GL_TRIANGLES, 0, 6, 100);

实例测试

顶点着色器：

#version 330 core
layout (location = 0) in vec2 aPos;
layout (location = 1) in vec3 aColor;
layout (location = 2) in vec2 aOffset;

out vec3 fColor;

void main()
{
    fColor = aColor;
    gl_Position = vec4(aPos + aOffset, 0.0, 1.0);
}

片元着色器：

#version 330 core
out vec4 FragColor;

in vec3 fColor;

void main()
{
    FragColor = vec4(fColor, 1.0);
}

运行源码：

#include <glad/glad.h>
#include <GLFW/glfw3.h>

#include "shader.h"

#include <iostream>

void framebuffer_size_callback(GLFWwindow* window, int width, int height);

// settings
const unsigned int SCR_WIDTH = 1280;
const unsigned int SCR_HEIGHT = 720;

int main()
{
 // glfw: initialize and configure
 // ------------------------------
 glfwInit();
 glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
 glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
 glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);


 // glfw window creation
 // --------------------
 GLFWwindow* window = glfwCreateWindow(SCR_WIDTH, SCR_HEIGHT, "LearnOpenGL", NULL, NULL);
 if (window == NULL)
 {
  std::cout << "Failed to create GLFW window" << std::endl;
  glfwTerminate();
  return -1;
 }
 glfwMakeContextCurrent(window);

 // glad: load all OpenGL function pointers
 // ---------------------------------------
 if (!gladLoadGLLoader((GLADloadproc)glfwGetProcAddress))
 {
  std::cout << "Failed to initialize GLAD" << std::endl;
  return -1;
 }

 // configure global opengl state
 // -----------------------------
 glEnable(GL_DEPTH_TEST);

 // build and compile shaders
 // -------------------------
 Shader shader("batch.vs", "batch.fs");

 // generate a list of 100 quad locations/translation-vectors
 // ---------------------------------------------------------
 glm::vec2 translations[100];
 int index = 0;
 float offset = 0.1f;
 for (int y = -10; y < 10; y += 2)
 {
  for (int x = -10; x < 10; x += 2)
  {
   glm::vec2 translation;
   translation.x = (float)x / 10.0f + offset;
   translation.y = (float)y / 10.0f + offset;
   translations[index++] = translation;
  }
 }

 // store instance data in an array buffer
 // --------------------------------------
 unsigned int instanceVBO;
 glGenBuffers(1, &instanceVBO);
 glBindBuffer(GL_ARRAY_BUFFER, instanceVBO);
 glBufferData(GL_ARRAY_BUFFER, sizeof(glm::vec2) * 100, &translations[0], GL_STATIC_DRAW);
 glBindBuffer(GL_ARRAY_BUFFER, 0);

 // set up vertex data (and buffer(s)) and configure vertex attributes
 // ------------------------------------------------------------------
 float quadVertices[] = {
  // positions     // colors
  -0.05f,  0.05f,  1.0f, 1.0f, 0.0f,
   0.05f, -0.05f,  1.0f, 1.0f, 0.0f,
  -0.05f, -0.05f,  1.0f, 1.0f, 0.0f,

  -0.05f,  0.05f,  1.0f, 1.0f, 0.0f,
   0.05f, -0.05f,  1.0f, 1.0f, 0.0f,
   0.05f,  0.05f,  1.0f, 1.0f, 0.0f
 };
 unsigned int quadVAO, quadVBO;
 glGenVertexArrays(1, &quadVAO);
 glGenBuffers(1, &quadVBO);
 glBindVertexArray(quadVAO);
 glBindBuffer(GL_ARRAY_BUFFER, quadVBO);
 glBufferData(GL_ARRAY_BUFFER, sizeof(quadVertices), quadVertices, GL_STATIC_DRAW);
 glEnableVertexAttribArray(0);
 glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 5 * sizeof(float), (void*)0);
 glEnableVertexAttribArray(1);
 glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, 5 * sizeof(float), (void*)(2 * sizeof(float)));
 // also set instance data
 glEnableVertexAttribArray(2);
 glBindBuffer(GL_ARRAY_BUFFER, instanceVBO); // this attribute comes from a different vertex buffer
 glVertexAttribPointer(2, 2, GL_FLOAT, GL_FALSE, 2 * sizeof(float), (void*)0);
 glBindBuffer(GL_ARRAY_BUFFER, 0);
 //我们调用了glVertexAttribDivisor。这个函数告诉了OpenGL该什么时候更新顶点属性的内容至新一组数据。它的第一个参数是需要的顶点属性，第二个参数是属性除数(Attribute Divisor)。默认情况下，属性除数是0，告诉OpenGL我们需要在顶点着色器的每次迭代时更新顶点属性。将它设置为1时，我们告诉OpenGL我们希望在渲染一个新实例的时候更新顶点属性。而设置为2时，我们希望每2个实例更新一次属性，以此类推。我们将属性除数设置为1，是在告诉OpenGL，处于位置值2的顶点属性是一个实例化数组。
 glVertexAttribDivisor(2, 1); // tell OpenGL this is an instanced vertex attribute.


 // render loop
 // -----------
 while (!glfwWindowShouldClose(window))
 {
  // render
  // ------
  glClearColor(0.1f, 0.1f, 0.1f, 1.0f);
  glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

  // draw 100 instanced quads
  shader.use();
  glBindVertexArray(quadVAO);
  glDrawArraysInstanced(GL_TRIANGLES, 0, 6, 100); // 100 triangles of 6 vertices each
  glBindVertexArray(0);

  // glfw: swap buffers and poll IO events (keys pressed/released, mouse moved etc.)
  // -------------------------------------------------------------------------------
  glfwSwapBuffers(window);
  glfwPollEvents();
 }

 // optional: de-allocate all resources once they've outlived their purpose:
 // ------------------------------------------------------------------------
 glDeleteVertexArrays(1, &quadVAO);
 glDeleteBuffers(1, &quadVBO);

 glfwTerminate();
 return 0;
}

// glfw: whenever the window size changed (by OS or user resize) this callback function executes
// ---------------------------------------------------------------------------------------------
void framebuffer_size_callback(GLFWwindow* window, int width, int height)
{
 // make sure the viewport matches the new window dimensions; note that width and 
 // height will be significantly larger than specified on retina displays.
 glViewport(0, 0, width, height);
}