<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="reviewer-report"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIRx Med</journal-id><journal-id journal-id-type="publisher-id">xmed</journal-id><journal-id journal-id-type="index">34</journal-id><journal-title>JMIRx Med</journal-title><abbrev-journal-title>JMIRx Med</abbrev-journal-title><issn pub-type="epub">2563-6316</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v7i1e96225</article-id><article-id pub-id-type="doi">10.2196/96225</article-id><article-categories><subj-group subj-group-type="heading"><subject>Peer-Review Report</subject></subj-group></article-categories><title-group><article-title>Peer Review of &#x201C;The Performance of DeepSeek R1 and Gemini 3 in Complex Medical Scenarios: Comparative Study&#x201D;</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>You</surname><given-names>Jacqueline Guan-Ting</given-names></name><xref ref-type="aff" rid="aff1"/></contrib></contrib-group><aff id="aff1"><institution>Mass General Brigham</institution><addr-line>Boston</addr-line><addr-line>MA</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Schwartz</surname><given-names>Amy</given-names></name></contrib></contrib-group><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>27</day><month>4</month><year>2026</year></pub-date><volume>7</volume><elocation-id>e96225</elocation-id><history><date date-type="received"><day>26</day><month>03</month><year>2026</year></date><date date-type="rev-recd"><day>26</day><month>03</month><year>2026</year></date><date date-type="accepted"><day>26</day><month>03</month><year>2026</year></date></history><copyright-statement>&#x00A9; Jacqueline Guan-Ting You. Originally published in JMIRx Med (<ext-link ext-link-type="uri" xlink:href="https://med.jmirx.org">https://med.jmirx.org</ext-link>), 27.4.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIRx Med, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://med.jmirx.org/">https://med.jmirx.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://xmed.jmir.org/2026/1/e96225"/><related-article related-article-type="companion" ext-link-type="doi" xlink:href="10.1101/2025.04.29.25326666" xlink:title="Preprint (medRxiv):" xlink:type="simple">https://www.medrxiv.org/content/10.1101/2025.04.29.25326666v1</related-article><related-article related-article-type="companion" ext-link-type="doi" xlink:href="10.2196/96220" xlink:title="Authors' Response to Peer-Review Reports" xlink:type="simple">https://med.jmirx.org/2026/1/e96220</related-article><related-article related-article-type="companion" ext-link-type="doi" xlink:href="10.2196/76822" xlink:title="Published Article" xlink:type="simple">https://med.jmirx.org/2026/1/e76822</related-article><kwd-group><kwd>large reasoning model</kwd><kwd>LRM</kwd><kwd>large language model</kwd><kwd>LLM</kwd><kwd>accuracy</kwd><kwd>medical scenario</kwd><kwd>DeepSeek R1</kwd><kwd>Gemini 3</kwd></kwd-group></article-meta></front><body><p><italic>This is a peer-review report for &#x201C;The Performance of DeepSeek R1 and Gemini 3 in Complex Medical Scenarios: Comparative Study.&#x201D;</italic></p><sec id="s2"><title>Round 1 Review</title><sec id="s1-1"><title>General Comments</title><p>This paper [<xref ref-type="bibr" rid="ref1">1</xref>] seeks to evaluate the accuracy of DeepSeek R1 in correctly identifying the primary medical diagnosis in the medical scenarios dataset portion of Massive Multitask Language Understanding Pro (MMLU-Pro) using an open-ended format. Some clarifications on the methods and results (especially around the roles of subject matter experts vs core team members in the publication), would be helpful in understanding how these results were derived.</p></sec><sec id="s1-2"><title>Specific Comments</title><sec id="s1-2-1"><title>Minor Comments</title><list list-type="order"><list-item><p>Introduction: consider citing Deepseek AI&#x2019;s Deepseek R1 paper [<xref ref-type="bibr" rid="ref2">2</xref>].</p></list-item><list-item><p>Methods: please clarify who your subject matter experts were (eg, physicians, researchers) in terms of rank, specialty, and role and how they were used to grade answers (eg, selected based on specialty, 2 reviewer process, etc).</p></list-item><list-item><p>Methods: please indicate when the analyses were run.</p></list-item><list-item><p>Results: who determines whether references are related or unrelated?</p></list-item><list-item><p>Results and Discussion: it is unclear to me from reading the discussion portion of the paper as to whether we have any sense of whether DeepSeek R1 has correct reasoning for questions with correct diagnoses (eg, it may get the right diagnosis but may have incorrect reasoning). Similarly, did you determine the &#x201C;correct answer&#x201D; based on string matching (for example, if the answer was &#x201C;septic arthritis&#x201D; and the DeepSeek output stated &#x201C;septic shock,&#x201D; would this be incorrect)?</p></list-item><list-item><p>Discussion: consider acknowledging the sample size of questions as a limitation.</p></list-item></list></sec></sec></sec><sec id="s3"><title>Round 2 Review</title><sec id="s2-1"><title>General Comments</title><p>The paper has been revised to address the Transparent Reporting of a Multivariable Prediction Model for Individual Prognosis or Diagnosis&#x2013;Large Language Model (TRIPOD-LLM) guidelines. Overall, it appears most concerns from both reviewers have been addressed.</p></sec></sec></body><back><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">MMLU-Pro</term><def><p>Massive Multitask Language Understanding Pro</p></def></def-item><def-item><term id="abb2">TRIPOD-LLM</term><def><p>Transparent Reporting of a Multivariable Prediction Model for Individual Prognosis or Diagnosis&#x2013;Large Language Model</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bajwa</surname><given-names>M</given-names> </name><name name-style="western"><surname>Hoyt</surname><given-names>R</given-names> </name><name name-style="western"><surname>Knight</surname><given-names>D</given-names> </name><name name-style="western"><surname>Haider</surname><given-names>M</given-names> </name></person-group><article-title>The performance of DeepSeek R1 and Gemini 3 in complex medical scenarios: comparative study</article-title><source>JMIRx Med</source><year>2026</year><volume>7</volume><fpage>e76822</fpage><pub-id pub-id-type="doi">10.2196/76822</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Guo</surname><given-names>D</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>D</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>H</given-names> </name><etal/></person-group><article-title>DeepSeek-R1: incentivizing reasoning capability in LLMs via reinforcement learning</article-title><comment>Preprint posted online on  Jan 22, 2025</comment><pub-id pub-id-type="doi">10.48550/arXiv.2501.12948</pub-id></nlm-citation></ref></ref-list></back></article>