<?xml version="1.0" encoding="utf-8" standalone="yes"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml"><url><loc>https://mateuszpieniak.com/courses/</loc><lastmod>2026-07-02T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/deep-q-network/</loc><lastmod>2026-07-02T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/deep-reinforcement-learning/</loc><lastmod>2026-07-02T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/double-dqn/</loc><lastmod>2026-07-02T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/dqn/</loc><lastmod>2026-07-02T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/dueling-dqn/</loc><lastmod>2026-07-02T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/experience-replay/</loc><lastmod>2026-07-02T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/</loc><lastmod>2026-07-02T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/overestimation-bias/</loc><lastmod>2026-07-02T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/prioritized-experience-replay/</loc><lastmod>2026-07-02T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/reinforcement-learning/</loc><lastmod>2026-07-02T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/courses/reinforcement-learning/104-deep-q-networks/</loc><lastmod>2026-07-02T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/courses/reinforcement-learning/</loc><lastmod>2026-07-02T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/reward-clipping/</loc><lastmod>2026-07-02T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/</loc><lastmod>2026-07-02T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/target-network/</loc><lastmod>2026-07-02T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/approximate-q-learning/</loc><lastmod>2026-06-23T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/deadly-triad/</loc><lastmod>2026-06-23T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/expected-sarsa/</loc><lastmod>2026-06-23T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/function-approximation/</loc><lastmod>2026-06-23T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/model-free-reinforcement-learning/</loc><lastmod>2026-06-23T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/q-learning/</loc><lastmod>2026-06-23T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/courses/reinforcement-learning/103-approximate-methods/</loc><lastmod>2026-06-23T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/sarsa/</loc><lastmod>2026-06-23T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/semi-gradient-methods/</loc><lastmod>2026-06-23T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/exploration/</loc><lastmod>2026-06-21T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/monte-carlo/</loc><lastmod>2026-06-21T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/courses/reinforcement-learning/102-q-learning-sarsa/</loc><lastmod>2026-06-21T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/temporal-difference/</loc><lastmod>2026-06-21T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/banach-fixed-point-theorem/</loc><lastmod>2026-06-17T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/bellman-expectation-equation/</loc><lastmod>2026-06-17T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/bellman-optimality-equation/</loc><lastmod>2026-06-17T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/contraction-mapping/</loc><lastmod>2026-06-17T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/generalized-policy-iteration/</loc><lastmod>2026-06-17T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/model-based-reinforcement-learning/</loc><lastmod>2026-06-17T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/policy-iteration/</loc><lastmod>2026-06-17T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/courses/reinforcement-learning/101-policy-iteration-value-iteration/</loc><lastmod>2026-06-17T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/tags/value-iteration/</loc><lastmod>2026-06-17T00:00:00+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/categories/</loc><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/posts/</loc><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://mateuszpieniak.com/search/</loc><changefreq>weekly</changefreq><priority>0.5</priority></url></urlset>