{"$schema":"https://policywindow.org/critique/api/schema","name":"Critical AI — critique calibration","description":"How close Critical AI's per-paper critiques are to the published human-expert standard. Each critique is compared, on a shared dimension vocabulary, against the benchmark corpus of real Comments/replications/reanalyses: dimensional alignment (does it attend to the lenses experts emphasise), breadth vs the human range, and the credibility gates the benchmarks embody (sourced, severity-disciplined, claims-not-motives). All re-derived in-app from the corpus + the critiques.","docs":"https://policywindow.org/critique/benchmarks","alignment_threshold":0.8,"reference":{"n":39,"frequency":{"methods":0.9487179487179487,"identification":0.4358974358974359,"statistics":0.8461538461538461,"data_code":0.28205128205128205,"claims":0.8974358974358975,"reproducibility":0.6666666666666666,"overclaiming":0.358974358974359,"generalisation":0.38461538461538464,"theory":0.1794871794871795,"novelty":0.07692307692307693},"breadth":{"min":3,"q1":5,"median":5,"q3":5.5,"max":6,"mean":5.08}},"domain_profiles":[{"domain":"political_science","label":"Political science","reference":{"n":8,"frequency":{"methods":0.875,"identification":0.375,"statistics":1,"data_code":0.5,"claims":0.75,"reproducibility":0.875,"overclaiming":0.5,"generalisation":0.25,"theory":0.125,"novelty":0.125},"breadth":{"min":5,"q1":5,"median":5,"q3":6,"max":6,"mean":5.38}},"topDimensions":[{"dimension":"statistics","frequency":1},{"dimension":"methods","frequency":0.875},{"dimension":"reproducibility","frequency":0.875},{"dimension":"claims","frequency":0.75}]},{"domain":"economics","label":"Economics & finance","reference":{"n":6,"frequency":{"methods":1,"identification":0.6666666666666666,"statistics":0.8333333333333334,"data_code":0.6666666666666666,"claims":1,"reproducibility":0.8333333333333334,"overclaiming":0.16666666666666666,"generalisation":0.16666666666666666,"theory":0,"novelty":0},"breadth":{"min":4,"q1":5,"median":5.5,"q3":6,"max":6,"mean":5.33}},"topDimensions":[{"dimension":"methods","frequency":1},{"dimension":"claims","frequency":1},{"dimension":"statistics","frequency":0.8333333333333334},{"dimension":"reproducibility","frequency":0.8333333333333334}]},{"domain":"sociology","label":"Sociology","reference":{"n":6,"frequency":{"methods":1,"identification":0.3333333333333333,"statistics":0.6666666666666666,"data_code":0.3333333333333333,"claims":1,"reproducibility":0.6666666666666666,"overclaiming":0.5,"generalisation":0.3333333333333333,"theory":0.3333333333333333,"novelty":0},"breadth":{"min":5,"q1":5,"median":5,"q3":5,"max":6,"mean":5.17}},"topDimensions":[{"dimension":"methods","frequency":1},{"dimension":"claims","frequency":1},{"dimension":"statistics","frequency":0.6666666666666666},{"dimension":"reproducibility","frequency":0.6666666666666666}]},{"domain":"psychology","label":"Psychology","reference":{"n":5,"frequency":{"methods":1,"identification":0.2,"statistics":1,"data_code":0,"claims":1,"reproducibility":1,"overclaiming":0.2,"generalisation":0.6,"theory":0.2,"novelty":0},"breadth":{"min":5,"q1":5,"median":5,"q3":5,"max":6,"mean":5.2}},"topDimensions":[{"dimension":"methods","frequency":1},{"dimension":"statistics","frequency":1},{"dimension":"claims","frequency":1},{"dimension":"reproducibility","frequency":1}]},{"domain":"public_policy","label":"Public policy & criminology","reference":{"n":5,"frequency":{"methods":1,"identification":0.2,"statistics":0.8,"data_code":0.2,"claims":1,"reproducibility":0.2,"overclaiming":0.4,"generalisation":0.4,"theory":0.2,"novelty":0.2},"breadth":{"min":3,"q1":5,"median":5,"q3":5,"max":5,"mean":4.6}},"topDimensions":[{"dimension":"methods","frequency":1},{"dimension":"claims","frequency":1},{"dimension":"statistics","frequency":0.8},{"dimension":"overclaiming","frequency":0.4}]},{"domain":"management","label":"Management, IS & marketing","reference":{"n":3,"frequency":{"methods":1,"identification":1,"statistics":1,"data_code":0,"claims":0.3333333333333333,"reproducibility":0.6666666666666666,"overclaiming":0,"generalisation":0.3333333333333333,"theory":0,"novelty":0},"breadth":{"min":4,"q1":4,"median":4,"q3":4.5,"max":5,"mean":4.33}},"topDimensions":[{"dimension":"methods","frequency":1},{"dimension":"identification","frequency":1},{"dimension":"statistics","frequency":1},{"dimension":"reproducibility","frequency":0.6666666666666666}]},{"domain":"communication","label":"Communication & media","reference":{"n":3,"frequency":{"methods":1,"identification":0.3333333333333333,"statistics":0.6666666666666666,"data_code":0,"claims":1,"reproducibility":0.6666666666666666,"overclaiming":0.3333333333333333,"generalisation":0.6666666666666666,"theory":0,"novelty":0},"breadth":{"min":4,"q1":4,"median":4,"q3":5,"max":6,"mean":4.67}},"topDimensions":[{"dimension":"methods","frequency":1},{"dimension":"claims","frequency":1},{"dimension":"statistics","frequency":0.6666666666666666},{"dimension":"reproducibility","frequency":0.6666666666666666}]},{"domain":"education","label":"Education","reference":{"n":3,"frequency":{"methods":0.6666666666666666,"identification":0.6666666666666666,"statistics":0.6666666666666666,"data_code":0,"claims":1,"reproducibility":0,"overclaiming":0.6666666666666666,"generalisation":0.6666666666666666,"theory":0.6666666666666666,"novelty":0.3333333333333333},"breadth":{"min":5,"q1":5,"median":5,"q3":5.5,"max":6,"mean":5.33}},"topDimensions":[{"dimension":"claims","frequency":1},{"dimension":"methods","frequency":0.6666666666666666},{"dimension":"identification","frequency":0.6666666666666666},{"dimension":"statistics","frequency":0.6666666666666666}]}],"scored":13,"calibrated":4,"mean_alignment":0.694,"results":[{"critiqueId":"CRIT-000013","slug":"peng-copilot-developer-productivity","targetTitle":"The Impact of AI on Developer Productivity: Evidence from GitHub Copilot","accessBasis":"open_access","dimensions":["identification","statistics","generalisation","claims","reproducibility","overclaiming","methods"],"alignment":0.923,"aligned":true,"breadth":7,"breadthBand":"comprehensive","domain":"other","domainAlignment":null,"disciplined":true,"disciplineErrors":[],"grounding":1,"grounded":true,"verdict":"calibrated","note":"attends to the expert-emphasised lenses (alignment 0.92); broader than a typical Comment (7 dimensions vs human median 5)."},{"critiqueId":"CRIT-000014","slug":"farach-scaffolding-human-ai-collaboration","targetTitle":"Scaffolding Human–AI Collaboration: A Field Experiment on Behavioral Protocols and Cognitive Reframing","accessBasis":"open_access","dimensions":["identification","statistics","methods","claims","generalisation","reproducibility"],"alignment":0.919,"aligned":true,"breadth":6,"breadthBand":"comprehensive","domain":"other","domainAlignment":null,"disciplined":true,"disciplineErrors":[],"grounding":1,"grounded":true,"verdict":"calibrated","note":"attends to the expert-emphasised lenses (alignment 0.92); broader than a typical Comment (6 dimensions vs human median 5)."},{"critiqueId":"CRIT-000002","slug":"brynjolfsson-li-raymond-generative-ai-at-work-qje-2025","targetTitle":"Generative AI at Work","accessBasis":"open_access","dimensions":["identification","methods","statistics","claims","reproducibility","data_code","overclaiming","generalisation"],"alignment":0.918,"aligned":true,"breadth":8,"breadthBand":"comprehensive","domain":"economics","domainAlignment":0.906,"disciplined":true,"disciplineErrors":[],"grounding":1,"grounded":true,"verdict":"calibrated","note":"attends to the expert-emphasised lenses (alignment 0.92); broader than a typical Comment (8 dimensions vs human median 5)."},{"critiqueId":"CRIT-000009","slug":"the-politics-of-artificial-intelligence-alignment","targetTitle":"The politics of artificial intelligence alignment: Public reactions to AI moderation in the case of Google’s Gemini","accessBasis":"abstract_only","dimensions":["generalisation","statistics","claims","methods"],"alignment":0.828,"aligned":true,"breadth":4,"breadthBand":"focused","domain":"communication","domainAlignment":0.884,"disciplined":true,"disciplineErrors":[],"grounding":1,"grounded":true,"verdict":"calibrated","note":"attends to the expert-emphasised lenses (alignment 0.83); more focused than a typical critique (4 dimensions vs human median 5)."},{"critiqueId":"CRIT-000012","slug":"generative-ai-propaganda-and-digital-authoritarian","targetTitle":"Generative AI, propaganda, and digital authoritarianism: Comparative insights from six democratically weakened countries","accessBasis":"abstract_only","dimensions":["generalisation","claims","methods","reproducibility"],"alignment":0.78,"aligned":false,"breadth":4,"breadthBand":"focused","domain":"economics","domainAlignment":0.721,"disciplined":true,"disciplineErrors":[],"grounding":1,"grounded":true,"verdict":"needs_review","note":"scope-limited by abstract-only access — cannot reach the methods/statistics/identification lenses experts emphasise (alignment 0.78 < 0.8); full-text review needed to reach the calibrated standard; more focused than a typical critique (4 dimensions vs human median 5)."},{"critiqueId":"CRIT-000010","slug":"refusal-as-silence-gendered-disparities-in-vision","targetTitle":"Refusal as silence: Gendered disparities in Vision-Language Model responses","accessBasis":"abstract_only","dimensions":["generalisation","reproducibility","claims","methods"],"alignment":0.78,"aligned":false,"breadth":4,"breadthBand":"focused","domain":"communication","domainAlignment":0.884,"disciplined":true,"disciplineErrors":[],"grounding":1,"grounded":true,"verdict":"needs_review","note":"scope-limited by abstract-only access — cannot reach the methods/statistics/identification lenses experts emphasise (alignment 0.78 < 0.8); full-text review needed to reach the calibrated standard; more focused than a typical critique (4 dimensions vs human median 5)."},{"critiqueId":"CRIT-000008","slug":"ai-meets-politics-examining-the-effects-of-differe","targetTitle":"AI meets politics: Examining the effects of different targeting strategies across 15 countries","accessBasis":"abstract_only","dimensions":["generalisation","claims","methods","identification"],"alignment":0.718,"aligned":false,"breadth":4,"breadthBand":"focused","domain":"economics","domainAlignment":0.681,"disciplined":true,"disciplineErrors":[],"grounding":1,"grounded":true,"verdict":"needs_review","note":"scope-limited by abstract-only access — cannot reach the methods/statistics/identification lenses experts emphasise (alignment 0.72 < 0.8); full-text review needed to reach the calibrated standard; more focused than a typical critique (4 dimensions vs human median 5)."},{"critiqueId":"CRIT-000003","slug":"the-cybernetic-teammate-a-field-experiment-on-gene","targetTitle":"The Cybernetic Teammate: A Field Experiment on Generative AI and Teamwork","accessBasis":"abstract_only","dimensions":["generalisation","claims","methods","overclaiming"],"alignment":0.697,"aligned":false,"breadth":4,"breadthBand":"focused","domain":"management","domainAlignment":0.435,"disciplined":true,"disciplineErrors":[],"grounding":1,"grounded":true,"verdict":"needs_review","note":"scope-limited by abstract-only access — cannot reach the methods/statistics/identification lenses experts emphasise (alignment 0.70 < 0.8); full-text review needed to reach the calibrated standard; more focused than a typical critique (4 dimensions vs human median 5)."},{"critiqueId":"CRIT-000006","slug":"can-chatgpt-kill-user-generated-qa-platforms","targetTitle":"Can ChatGPT Kill User-Generated Q&A Platforms?","accessBasis":"abstract_only","dimensions":["generalisation","identification","claims"],"alignment":0.534,"aligned":false,"breadth":3,"breadthBand":"focused","domain":"management","domainAlignment":0.503,"disciplined":true,"disciplineErrors":[],"grounding":1,"grounded":true,"verdict":"needs_review","note":"scope-limited by abstract-only access — cannot reach the methods/statistics/identification lenses experts emphasise (alignment 0.53 < 0.8); full-text review needed to reach the calibrated standard; more focused than a typical critique (3 dimensions vs human median 5)."},{"critiqueId":"CRIT-000004","slug":"artificial-collusion-examining-supracompetitive-pr","targetTitle":"Artificial Collusion: Examining Supracompetitive Pricing by Q-Learning Algorithms","accessBasis":"abstract_only","dimensions":["generalisation","overclaiming","claims","theory"],"alignment":0.49,"aligned":false,"breadth":4,"breadthBand":"focused","domain":"management","domainAlignment":0.174,"disciplined":true,"disciplineErrors":[],"grounding":1,"grounded":true,"verdict":"needs_review","note":"scope-limited by abstract-only access — cannot reach the methods/statistics/identification lenses experts emphasise (alignment 0.49 < 0.8); full-text review needed to reach the calibrated standard; more focused than a typical critique (4 dimensions vs human median 5)."},{"critiqueId":"CRIT-000011","slug":"from-rule-of-law-to-rule-of-algorithm-generative-a","targetTitle":"From rule of law to rule of algorithm: Generative Artificial Intelligence's threat to democracy","accessBasis":"abstract_only","dimensions":["claims","overclaiming","theory","generalisation"],"alignment":0.49,"aligned":false,"breadth":4,"breadthBand":"focused","domain":"economics","domainAlignment":0.32,"disciplined":true,"disciplineErrors":[],"grounding":1,"grounded":true,"verdict":"needs_review","note":"scope-limited by abstract-only access — cannot reach the methods/statistics/identification lenses experts emphasise (alignment 0.49 < 0.8); full-text review needed to reach the calibrated standard; more focused than a typical critique (4 dimensions vs human median 5)."},{"critiqueId":"CRIT-000005","slug":"unraveling-generative-ai-from-a-human-intelligence","targetTitle":"Unraveling Generative AI from a Human Intelligence Perspective: A Battery of Experiments","accessBasis":"abstract_only","dimensions":["theory","overclaiming","generalisation","claims"],"alignment":0.49,"aligned":false,"breadth":4,"breadthBand":"focused","domain":"management","domainAlignment":0.174,"disciplined":true,"disciplineErrors":[],"grounding":1,"grounded":true,"verdict":"needs_review","note":"scope-limited by abstract-only access — cannot reach the methods/statistics/identification lenses experts emphasise (alignment 0.49 < 0.8); full-text review needed to reach the calibrated standard; more focused than a typical critique (4 dimensions vs human median 5)."},{"critiqueId":"CRIT-000007","slug":"made-with-ai-consumer-engagement-with-social-media","targetTitle":"Made With AI: Consumer Engagement with Social Media Containing AI Disclosures","accessBasis":"abstract_only","dimensions":["generalisation","claims","theory"],"alignment":0.454,"aligned":false,"breadth":3,"breadthBand":"focused","domain":"management","domainAlignment":0.201,"disciplined":true,"disciplineErrors":[],"grounding":1,"grounded":true,"verdict":"needs_review","note":"scope-limited by abstract-only access — cannot reach the methods/statistics/identification lenses experts emphasise (alignment 0.45 < 0.8); full-text review needed to reach the calibrated standard; more focused than a typical critique (3 dimensions vs human median 5)."}]}