.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "gallery/compute_timestamps.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        :ref:`Go to the end <sphx_glr_download_gallery_compute_timestamps.py>`
        to download the full example code.

.. rst-class:: sphx-glr-example-title

.. _sphx_glr_gallery_compute_timestamps.py:


Compute Timestamps
------------------

A simple example to profile a compute pass using ComputePassTimestampWrites.

.. GENERATED FROM PYTHON SOURCE LINES 7-169

.. code-block:: Python


    import wgpu

    """
    Define the number of elements, global and local sizes.
    Change these and see how it affects performance.
    """
    n = 512 * 512
    local_size = [32, 1, 1]
    global_size = [n // local_size[0], 1, 1]

    shader_source = f"""
    @group(0) @binding(0)
    var<storage,read> data1: array<i32>;

    @group(0) @binding(1)
    var<storage,read> data2: array<i32>;

    @group(0) @binding(2)
    var<storage,read_write> data3: array<i32>;

    @compute
    @workgroup_size({",".join(map(str, local_size))})
    fn main(@builtin(global_invocation_id) index: vec3<u32>) {{
        let i: u32 = index.x;
        data3[i] = data1[i] + data2[i];
    }}
    """

    # Define two arrays
    data1 = memoryview(bytearray(n * 4)).cast("i")
    data2 = memoryview(bytearray(n * 4)).cast("i")

    # Initialize the arrays
    for i in range(n):
        data1[i] = i

    for i in range(n):
        data2[i] = i * 2

    adapter = wgpu.gpu.request_adapter_sync(power_preference="high-performance")

    # Request a device with the timestamp_query feature, so we can profile our computation
    device = adapter.request_device_sync(
        required_features=[wgpu.FeatureName.timestamp_query]
    )
    cshader = device.create_shader_module(code=shader_source)

    # Create buffer objects, input buffer is mapped.
    buffer1 = device.create_buffer_with_data(data=data1, usage=wgpu.BufferUsage.STORAGE)
    buffer2 = device.create_buffer_with_data(data=data2, usage=wgpu.BufferUsage.STORAGE)
    buffer3 = device.create_buffer(
        size=data1.nbytes, usage=wgpu.BufferUsage.STORAGE | wgpu.BufferUsage.COPY_SRC
    )

    # Setup layout and bindings
    binding_layouts = [
        {
            "binding": 0,
            "visibility": wgpu.ShaderStage.COMPUTE,
            "buffer": {
                "type": wgpu.BufferBindingType.read_only_storage,
            },
        },
        {
            "binding": 1,
            "visibility": wgpu.ShaderStage.COMPUTE,
            "buffer": {
                "type": wgpu.BufferBindingType.read_only_storage,
            },
        },
        {
            "binding": 2,
            "visibility": wgpu.ShaderStage.COMPUTE,
            "buffer": {
                "type": wgpu.BufferBindingType.storage,
            },
        },
    ]
    bindings = [
        {
            "binding": 0,
            "resource": {"buffer": buffer1, "offset": 0, "size": buffer1.size},
        },
        {
            "binding": 1,
            "resource": {"buffer": buffer2, "offset": 0, "size": buffer2.size},
        },
        {
            "binding": 2,
            "resource": {"buffer": buffer3, "offset": 0, "size": buffer3.size},
        },
    ]

    # Put everything together
    bind_group_layout = device.create_bind_group_layout(entries=binding_layouts)
    pipeline_layout = device.create_pipeline_layout(bind_group_layouts=[bind_group_layout])
    bind_group = device.create_bind_group(layout=bind_group_layout, entries=bindings)

    # Create and run the pipeline
    compute_pipeline = device.create_compute_pipeline(
        layout=pipeline_layout,
        compute={"module": cshader, "entry_point": "main"},
    )

    """
    Create a QuerySet to store the 'beginning_of_pass' and 'end_of_pass' timestamps.
    Set the 'count' parameter to 2, as this set will contain 2 timestamps.
    """
    query_set = device.create_query_set(type=wgpu.QueryType.timestamp, count=2)
    command_encoder = device.create_command_encoder()

    # Pass our QuerySet and the indices into it, where the timestamps will be written.
    compute_pass = command_encoder.begin_compute_pass(
        timestamp_writes={
            "query_set": query_set,
            "beginning_of_pass_write_index": 0,
            "end_of_pass_write_index": 1,
        }
    )

    """
    Create the buffer to store our query results.
    Each timestamp is 8 bytes. We mark the buffer usage to be QUERY_RESOLVE,
    as we will use this buffer in a resolve_query_set call later.
    """
    query_buf = device.create_buffer(
        size=8 * query_set.count,
        usage=wgpu.BufferUsage.QUERY_RESOLVE | wgpu.BufferUsage.COPY_SRC,
    )
    compute_pass.set_pipeline(compute_pipeline)
    compute_pass.set_bind_group(0, bind_group)
    compute_pass.dispatch_workgroups(*global_size)  # x y z
    compute_pass.end()

    # Resolve our queries, and store the results in the destination buffer we created above.
    command_encoder.resolve_query_set(
        query_set=query_set,
        first_query=0,
        query_count=2,
        destination=query_buf,
        destination_offset=0,
    )
    device.queue.submit([command_encoder.finish()])

    """
    Read the query buffer to get the timestamps.
    Index 0: beginning timestamp
    Index 1: end timestamp
    """
    timestamps = device.queue.read_buffer(query_buf).cast("Q").tolist()
    print(f"Adding two {n} sized arrays took {(timestamps[1] - timestamps[0]) / 1000} us")

    # Read result
    out = device.queue.read_buffer(buffer3).cast("i")
    result = out.tolist()

    # Calculate the result on the CPU for comparison
    result_cpu = [a + b for a, b in zip(data1, data2, strict=False)]

    # Ensure results are the same
    assert result == result_cpu


.. _sphx_glr_download_gallery_compute_timestamps.py:

.. only:: html

  .. container:: sphx-glr-footer sphx-glr-footer-example

    .. container:: sphx-glr-download sphx-glr-download-jupyter

      :download:`Download Jupyter notebook: compute_timestamps.ipynb <compute_timestamps.ipynb>`

    .. container:: sphx-glr-download sphx-glr-download-python

      :download:`Download Python source code: compute_timestamps.py <compute_timestamps.py>`

    .. container:: sphx-glr-download sphx-glr-download-zip

      :download:`Download zipped: compute_timestamps.zip <compute_timestamps.zip>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_