train_rick/generate_rick_science_dataset/generate_questions.py (250 lines of code) (raw):

""" python generate_questions.py > questions.txt """ from openai import OpenAI client = OpenAI() themes = [ "Physics/Kinematics", "Physics/Dynamics", "Physics/Energy", "Physics/Momentum", "Physics/Rotational Motion", "Physics/Gravitation", "Physics/Fluid Mechanics", "Physics/Thermodynamics", "Physics/Waves", "Physics/Sound", "Physics/Optics", "Physics/Electricity", "Physics/Magnetism", "Physics/Electromagnetic Waves", "Physics/Quantum Mechanics", "Physics/Special Relativity", "Physics/General Relativity", "Physics/Nuclear Physics", "Physics/Particle Physics", "Physics/Astrophysics", "Physics/Experimental Techniques", "Physics/Error Analysis", "Physics/Units & Measurement", "Chemistry/Atomic Structure", "Chemistry/Periodic Table", "Chemistry/Ionic Bonding", "Chemistry/Covalent Bonding", "Chemistry/Metallic Bonding", "Chemistry/Intermolecular Forces", "Chemistry/Molecular Geometry", "Chemistry/States of Matter", "Chemistry/Gas Laws", "Chemistry/Solutions", "Chemistry/Acids and Bases", "Chemistry/Redox Reactions", "Chemistry/Chemical Reactions", "Chemistry/Thermochemistry", "Chemistry/Reaction Kinetics", "Chemistry/Chemical Equilibrium", "Chemistry/Nuclear Chemistry", "Chemistry/Organic Chemistry", "Chemistry/Hydrocarbons", "Chemistry/Functional Groups", "Chemistry/Biochemistry", "Chemistry/Polymers", "Chemistry/Analytical Techniques", "Chemistry/Spectroscopy", "Biology/Cell Structure", "Biology/Cell Membrane", "Biology/Cell Division", "Biology/Cellular Respiration", "Biology/Photosynthesis", "Biology/DNA Structure", "Biology/Protein Synthesis", "Biology/Genetics", "Biology/Inheritance", "Biology/Epigenetics", "Biology/Evolution", "Biology/Taxonomy", "Biology/Plants", "Biology/Animal Physiology", "Biology/Nervous System", "Biology/Endocrine System", "Biology/Immune System", "Biology/Cardiovascular System", "Biology/Respiratory System", "Biology/Digestive System", "Biology/Reproductive System", "Biology/Population Biology", "Biology/Ecology", "Biology/Microbiology", "Biology/Biotechnology", "Earth Science/Geology", "Earth Science/Plate Tectonics", "Earth Science/Volcanoes", "Earth Science/Earthquakes", "Earth Science/Minerals", "Earth Science/Rocks", "Earth Science/Fossils", "Earth Science/Geologic Time", "Earth Science/Weathering & Erosion", "Earth Science/Oceanography", "Earth Science/Weather", "Earth Science/Climate", "Earth Science/Atmosphere", "Earth Science/Hydrosphere", "Earth Science/Natural Disasters", "Earth Science/Soil Science", "Earth Science/Earth’s Interior", "Earth Science/Magnetic Field", "Astronomy/Solar System", "Astronomy/Planets", "Astronomy/Moons", "Astronomy/Sun", "Astronomy/Stars", "Astronomy/Galaxies", "Astronomy/Black Holes", "Astronomy/Dark Matter", "Astronomy/Cosmology", "Astronomy/Exoplanets", "Astronomy/Space Missions", "Astronomy/Space-Time", "Astronomy/Telescopes", "Astronomy/Observational Techniques", "Environmental Science/Ecosystems", "Environmental Science/Biodiversity", "Environmental Science/Pollution", "Environmental Science/Water Resources", "Environmental Science/Air Quality", "Environmental Science/Climate Change", "Environmental Science/Sustainable Energy", "Environmental Science/Deforestation", "Environmental Science/Conservation", "Environmental Science/Waste Management", "Environmental Science/Carbon Cycle", "Environmental Science/Nitrogen Cycle", "Environmental Science/Food Chains", "Environmental Science/Population Growth", "Environmental Science/Land Use", "Math/Calculus - Derivatives", "Math/Calculus - Integrals", "Math/Calculus - Applications", "Math/Differential Equations", "Math/Probability", "Math/Statistics", "Math/Linear Algebra", "Math/Matrices", "Math/Trigonometry", "Math/Geometry", "Math/Algebra", "Math/Number Theory", "Math/Set Theory", "Math/Graph Theory", "Math/Mathematical Logic", "Math/Complex Numbers", "Computer Science/Algorithms", "Computer Science/Data Structures", "Computer Science/Boolean Logic", "Computer Science/Computational Complexity", "Computer Science/Artificial Intelligence", "Computer Science/Machine Learning", "Computer Science/Quantum Computing", "Computer Science/Theoretical CS", "Computer Science/Cryptography", "Computer Science/Networks", "Computer Science/Software Engineering", "Computer Science/Programming Languages", "Computer Science/Operating Systems", "Engineering/Mechanical Engineering", "Engineering/Thermodynamics", "Engineering/Electrical Engineering", "Engineering/Control Systems", "Engineering/Signal Processing", "Engineering/Materials Science", "Engineering/Structural Engineering", "Engineering/Robotics", "Engineering/Civil Engineering", "Engineering/Computer Engineering", "Engineering/Aerospace Engineering", "Medicine/Anatomy", "Medicine/Physiology", "Medicine/Pharmacology", "Medicine/Pathology", "Medicine/Neuroscience", "Medicine/Medical Imaging", "Medicine/Public Health", "Medicine/Epidemiology", "Medicine/Genetics in Medicine", "Medicine/Infectious Diseases", "Medicine/Cardiology", "Medicine/Oncology", "Medicine/Endocrinology", "Medicine/Immunology", "Medicine/Gastroenterology", "Medicine/Dermatology", "Psychology/Cognitive Psychology", "Psychology/Behavioral Psychology", "Psychology/Developmental Psychology", "Psychology/Biopsychology", "Psychology/Neuropsychology", "Psychology/Sensation and Perception", "Psychology/Social Psychology", "Psychology/Personality Theory", "Psychology/Psychological Disorders", "Psychology/Therapeutic Approaches", "Science/Scientific Method", "Science/Experimental Design", "Science/Measurement & Units", "Science/Error & Uncertainty", "Science/Models & Simulations", "Science/Ethics in Science", "Science/History of Science", "Math/Topology", "Math/Real Analysis", "Math/Abstract Algebra", "Math/Vector Calculus", "Math/Fractions and Decimals", "Math/Math History", "Math/Math in Nature", "Computer Science/Natural Language Processing", "Computer Science/Computer Vision", "Computer Science/Human-Computer Interaction", "Computer Science/Cybersecurity", "Computer Science/Cloud Computing", "Engineering/Environmental Engineering", "Engineering/Biomedical Engineering", "Engineering/Nanotechnology", "Engineering/Transportation Engineering", "Engineering/Mechatronics", "Physics/Computational Physics", "Physics/Biophysics", "Chemistry/Coordination Compounds", "Chemistry/Environmental Chemistry", "Chemistry/Green Chemistry", "Chemistry/Industrial Chemistry", "Biology/Developmental Biology", "Biology/Neurobiology", "Biology/Synthetic Biology", "Biology/Systems Biology", "Biology/Marine Biology", "Earth Science/Remote Sensing", "Earth Science/Glaciology", "Earth Science/Meteorology", "Astronomy/Astrobiology", "Astronomy/Radio Astronomy", "Astronomy/Planetary Geology", "Astronomy/Space Weather", "Environmental Science/Environmental Policy", "Environmental Science/Energy Resources", "Environmental Science/Ecological Footprint", "Environmental Science/Environmental Toxicology", "Medicine/Genomics", "Medicine/Surgical Techniques", "Medicine/Preventive Medicine", "Medicine/Health Informatics", "Psychology/Forensic Psychology", "Psychology/Educational Psychology", "Psychology/Industrial-Organizational Psychology", "Psychology/Research Methods", "Science/Philosophy of Science", "Science/Science Communication", "Science/Interdisciplinary Science", ] template = """Generate 5 unique science questions on the theme: "{theme}". Guidelines: - Questions must be original and non-repetitive. - Vary the difficulty level: include a mix of basic, intermediate, and advanced questions. - Each question should require reasoning, explanation, or calculation — not trivia. - Use clear, precise phrasing that reads like a natural question, not an exam command. - Avoid directive phrasing like "Calculate...", "Derive...", or "Using calculus, determine...". - Prefer natural, curiosity-driven forms like "How does...", "What is the velocity of...", "Why does...", "What happens if...", etc. - At least 3 of the 5 questions should lead to a numerical answer or require numerical computation (e.g. velocity, force, energy). - Do not include meta-instructions like “Explain your reasoning” in the question text. Output format: Return the 5 questions in a valid JSON array, like this: [ "Question 1 text", "Question 2 text", "Question 3 text", "Question 4 text", "Question 5 text" ] """ for theme in themes: response = client.responses.create(model="gpt-4o", input=template.format(theme=theme)) print(response.output_text)