先了解一些Compute Shader的基本概念
https://www.cg.tuwien.ac.at/courses/Realtime/repetitorium/VU.WS.2014/rtr_rep_2014_ComputeShader.pdf
最小执行单元为thread,也称为invocation,一个thread执行一遍shader程序
网格化的thread构成了work group,可以是1维、2维或者3维的。shader程序中用layout语句指定的local_size_x,local_size_y,local_size_z决定了work group的大小。
网格化的work group构成了dispatch,同样也可以是1维、2维或者3维的。glDispatchCompute中的三个参数指定了x, y, z三维的大小
ps:
https://www.khronos.org/assets/uploads/developers/library/2014-siggraph-bof/KITE-BOF_Aug14.pdf
根据资料,work group的尺寸也可以完全由c程序决定,GLSL中的声明改为
layout( local_size_variable ) in;
对比cuda的概念,compute shader中的work group就是cuda的block,dispatch就是cuda的grid。
OpenGL 4.2中Image介绍
https://blog.csdn.net/u010462297/article/details/50469950
image类型数据与sampler类型数据的区别:sampler取样本的时候,可以是非整数坐标,而且需要对周边像素进行插值才能获取。而image类型只能取整数坐标,直接取出原始数据样本
ps: gl_GlobalInvocationID.x的类型是uint,而imageLoad需要的坐标参数为int。可以用int(表达式)进行强制类型转换
要开始跑计算,首先要解决的是数据的输入和输出问题。ComputeShader中参数的声明非常类似原来的FragmentShader,还是使用uniform作为输入。不过数据类型可以使用image类型了,不再是sampler类型,解决精准取值的问题。另外,作为输入的image,还需要在layout指令中指明格式,例如r32f或者rgba32f(这里只有1、2、4色的格式,没有rgb三色的格式。不过比较奇怪的一点,作为输出的image可以不指明格式,暂时不知道为什么)。而且,image对象需要调用glBindImageTexture跟纹理对象进行绑定。而且还有一点需要非常注意
“A very important restriction for using shader images is that the underlying texture must have been allocated using "immutable" storage, i.e. via glTexStorage*()-like functions, and not glTexImage2D().”
cpu与gpu之间传数据,往gpu传入使用 glTexSubImage,从gpu读出使用glGetTexImage(glReadPixels需要跟FBO一起使用才可以,而glGetTexImage可以直接从texture读取)
至于OpenGL的初始化工作,还是采用freeglut+glew的经典组合就可以完成。
一个最简单的例子,传输一个一维数组,然后给数组里面每个元素加1。
#include <GL/glew.h>
#include <GL/freeglut.h>
#include <stdio.h>
void InitWindow(int, char*[]);
void Initialize(int, char*[]);
void CreateShaders(void);
void CreateTexture(void);
GLuint programId, computeShaderId;
GLuint inputTexId, outputTexId;
const int kArraySize = 32;
const GLchar* Program = " \
#version 430\n\
layout (local_size_x = 16, local_size_y = 1) in;\n\
layout (r32f, binding = 0) uniform image1D in_array; \n\
layout (r32f, binding = 1) uniform image1D out_array; \n\
\
void main() \n\
{ \
int pos = int(gl_GlobalInvocationID.x);\n\
vec4 value = imageLoad(in_array, pos);\n\
value.x += 1.0f;\n\
imageStore(out_array, pos, value);\n\
} \
";
void CheckGLErrors()
{
GLenum e = glGetError();
if (e != GL_NO_ERROR) {
fprintf(stderr, "OpenGL error: %s (%d)\n", gluErrorString(e), e);
exit(20);
}
}
// 创建opengl的窗口
void InitWindow(int argc, char* argv[])
{
glutInit(&argc, argv);
glutInitContextVersion(2, 0);
glutInitContextFlags(GLUT_FORWARD_COMPATIBLE);
glutInitContextProfile(GLUT_CORE_PROFILE);
int WindowHandle = glutCreateWindow("test");
if (WindowHandle < 1) {
fprintf(
stderr,
"ERROR: Could not create a new rendering window.\n"
);
exit(EXIT_FAILURE);
}
//glutDisplayFunc(RenderFunction);
//glutCloseFunc(Cleanup);
}
void Initialize(int argc, char* argv[])
{
GLenum GlewInitResult;
InitWindow(argc, argv);
//glewExperimental = GL_TRUE;
GlewInitResult = glewInit();
if (GLEW_OK != GlewInitResult) {
fprintf(
stderr,
"ERROR: %s\n",
glewGetErrorString(GlewInitResult)
);
exit(EXIT_FAILURE);
}
fprintf(
stdout,
"INFO: OpenGL Version: %s\n",
glGetString(GL_VERSION)
);
CreateShaders();
CreateTexture();
}
void CreateShaders(void)
{
GLchar messages[256];
GLenum ErrorCheckValue = glGetError();
/* Compile the shader. */
computeShaderId = glCreateShader(GL_COMPUTE_SHADER);
glShaderSource(computeShaderId, 1, &Program, NULL);
glCompileShader(computeShaderId);
/* Print the compilation log. */
glGetShaderInfoLog(computeShaderId, sizeof(messages), NULL, messages);
printf("Compile Log: %s\n", messages);
/* Set up program objects. */
programId = glCreateProgram();
/* Create a complete program object. */
glAttachShader(programId, computeShaderId);
glLinkProgram(programId);
/* And print the link log. */
glGetProgramInfoLog(programId, sizeof(messages), NULL, messages);
printf("Link Log: %s\n", messages);
CheckGLErrors();
}
void CreateTexture(void)
{
// Create the input texture
glGenTextures(1, &inputTexId);
// And bind it to texture unit 0
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_1D, inputTexId);
// Set texture size and format
glTexStorage1D(GL_TEXTURE_1D, 1, GL_R32F, kArraySize);
// Create the output texture
glGenTextures(1, &outputTexId);
// And bind it to texture unit 1
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_1D, outputTexId);
// Set texture size and format
glTexStorage1D(GL_TEXTURE_1D, 1, GL_R32F, kArraySize);
glBindImageTexture(0, inputTexId, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32F);
glBindImageTexture(1, outputTexId, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32F);
CheckGLErrors();
}
void DoCompute()
{
float *inputData = new float[kArraySize];
float *outputData = new float[kArraySize];
int i;
for (i = 0; i < kArraySize; i++)
{
inputData[i] = i;
}
glBindTexture(GL_TEXTURE_1D, inputTexId);
glTexSubImage1D(GL_TEXTURE_1D, 0, 0, kArraySize, GL_RED, GL_FLOAT, inputData);
{ // launch compute shaders!
glUseProgram(programId);
glDispatchCompute((GLuint)kArraySize/16, 1, 1);
}
// make sure writing to image has finished before read
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
glBindTexture(GL_TEXTURE_1D, outputTexId);
glGetTexImage(GL_TEXTURE_1D, 0, GL_RED, GL_FLOAT, outputData);
glBindTexture(GL_TEXTURE_2D, 0);
CheckGLErrors();
for (i = 0; i < kArraySize; i++)
{
printf("%f ", outputData[i]);
}
delete []outputData;
delete []inputData;
}
int main(int argc, char *argv[])
{
Initialize(argc, argv);
DoCompute();
//glutMainLoop();
return(0);
}
参考
http://wili.cc/blog/opengl-cs.html
https://antongerdelan.net/opengl/compute.html
https://www.cnblogs.com/chen9510/p/12000320.html
https://blog.csdn.net/koibiki/article/details/80590885
https://arm-software.github.io/opengl-es-sdk-for-android/compute_intro.html