train_rick/generate_rick_science_dataset/generate_questions.py (250 lines of code) (raw):
"""
python generate_questions.py > questions.txt
"""
from openai import OpenAI
client = OpenAI()
themes = [
"Physics/Kinematics",
"Physics/Dynamics",
"Physics/Energy",
"Physics/Momentum",
"Physics/Rotational Motion",
"Physics/Gravitation",
"Physics/Fluid Mechanics",
"Physics/Thermodynamics",
"Physics/Waves",
"Physics/Sound",
"Physics/Optics",
"Physics/Electricity",
"Physics/Magnetism",
"Physics/Electromagnetic Waves",
"Physics/Quantum Mechanics",
"Physics/Special Relativity",
"Physics/General Relativity",
"Physics/Nuclear Physics",
"Physics/Particle Physics",
"Physics/Astrophysics",
"Physics/Experimental Techniques",
"Physics/Error Analysis",
"Physics/Units & Measurement",
"Chemistry/Atomic Structure",
"Chemistry/Periodic Table",
"Chemistry/Ionic Bonding",
"Chemistry/Covalent Bonding",
"Chemistry/Metallic Bonding",
"Chemistry/Intermolecular Forces",
"Chemistry/Molecular Geometry",
"Chemistry/States of Matter",
"Chemistry/Gas Laws",
"Chemistry/Solutions",
"Chemistry/Acids and Bases",
"Chemistry/Redox Reactions",
"Chemistry/Chemical Reactions",
"Chemistry/Thermochemistry",
"Chemistry/Reaction Kinetics",
"Chemistry/Chemical Equilibrium",
"Chemistry/Nuclear Chemistry",
"Chemistry/Organic Chemistry",
"Chemistry/Hydrocarbons",
"Chemistry/Functional Groups",
"Chemistry/Biochemistry",
"Chemistry/Polymers",
"Chemistry/Analytical Techniques",
"Chemistry/Spectroscopy",
"Biology/Cell Structure",
"Biology/Cell Membrane",
"Biology/Cell Division",
"Biology/Cellular Respiration",
"Biology/Photosynthesis",
"Biology/DNA Structure",
"Biology/Protein Synthesis",
"Biology/Genetics",
"Biology/Inheritance",
"Biology/Epigenetics",
"Biology/Evolution",
"Biology/Taxonomy",
"Biology/Plants",
"Biology/Animal Physiology",
"Biology/Nervous System",
"Biology/Endocrine System",
"Biology/Immune System",
"Biology/Cardiovascular System",
"Biology/Respiratory System",
"Biology/Digestive System",
"Biology/Reproductive System",
"Biology/Population Biology",
"Biology/Ecology",
"Biology/Microbiology",
"Biology/Biotechnology",
"Earth Science/Geology",
"Earth Science/Plate Tectonics",
"Earth Science/Volcanoes",
"Earth Science/Earthquakes",
"Earth Science/Minerals",
"Earth Science/Rocks",
"Earth Science/Fossils",
"Earth Science/Geologic Time",
"Earth Science/Weathering & Erosion",
"Earth Science/Oceanography",
"Earth Science/Weather",
"Earth Science/Climate",
"Earth Science/Atmosphere",
"Earth Science/Hydrosphere",
"Earth Science/Natural Disasters",
"Earth Science/Soil Science",
"Earth Science/Earth’s Interior",
"Earth Science/Magnetic Field",
"Astronomy/Solar System",
"Astronomy/Planets",
"Astronomy/Moons",
"Astronomy/Sun",
"Astronomy/Stars",
"Astronomy/Galaxies",
"Astronomy/Black Holes",
"Astronomy/Dark Matter",
"Astronomy/Cosmology",
"Astronomy/Exoplanets",
"Astronomy/Space Missions",
"Astronomy/Space-Time",
"Astronomy/Telescopes",
"Astronomy/Observational Techniques",
"Environmental Science/Ecosystems",
"Environmental Science/Biodiversity",
"Environmental Science/Pollution",
"Environmental Science/Water Resources",
"Environmental Science/Air Quality",
"Environmental Science/Climate Change",
"Environmental Science/Sustainable Energy",
"Environmental Science/Deforestation",
"Environmental Science/Conservation",
"Environmental Science/Waste Management",
"Environmental Science/Carbon Cycle",
"Environmental Science/Nitrogen Cycle",
"Environmental Science/Food Chains",
"Environmental Science/Population Growth",
"Environmental Science/Land Use",
"Math/Calculus - Derivatives",
"Math/Calculus - Integrals",
"Math/Calculus - Applications",
"Math/Differential Equations",
"Math/Probability",
"Math/Statistics",
"Math/Linear Algebra",
"Math/Matrices",
"Math/Trigonometry",
"Math/Geometry",
"Math/Algebra",
"Math/Number Theory",
"Math/Set Theory",
"Math/Graph Theory",
"Math/Mathematical Logic",
"Math/Complex Numbers",
"Computer Science/Algorithms",
"Computer Science/Data Structures",
"Computer Science/Boolean Logic",
"Computer Science/Computational Complexity",
"Computer Science/Artificial Intelligence",
"Computer Science/Machine Learning",
"Computer Science/Quantum Computing",
"Computer Science/Theoretical CS",
"Computer Science/Cryptography",
"Computer Science/Networks",
"Computer Science/Software Engineering",
"Computer Science/Programming Languages",
"Computer Science/Operating Systems",
"Engineering/Mechanical Engineering",
"Engineering/Thermodynamics",
"Engineering/Electrical Engineering",
"Engineering/Control Systems",
"Engineering/Signal Processing",
"Engineering/Materials Science",
"Engineering/Structural Engineering",
"Engineering/Robotics",
"Engineering/Civil Engineering",
"Engineering/Computer Engineering",
"Engineering/Aerospace Engineering",
"Medicine/Anatomy",
"Medicine/Physiology",
"Medicine/Pharmacology",
"Medicine/Pathology",
"Medicine/Neuroscience",
"Medicine/Medical Imaging",
"Medicine/Public Health",
"Medicine/Epidemiology",
"Medicine/Genetics in Medicine",
"Medicine/Infectious Diseases",
"Medicine/Cardiology",
"Medicine/Oncology",
"Medicine/Endocrinology",
"Medicine/Immunology",
"Medicine/Gastroenterology",
"Medicine/Dermatology",
"Psychology/Cognitive Psychology",
"Psychology/Behavioral Psychology",
"Psychology/Developmental Psychology",
"Psychology/Biopsychology",
"Psychology/Neuropsychology",
"Psychology/Sensation and Perception",
"Psychology/Social Psychology",
"Psychology/Personality Theory",
"Psychology/Psychological Disorders",
"Psychology/Therapeutic Approaches",
"Science/Scientific Method",
"Science/Experimental Design",
"Science/Measurement & Units",
"Science/Error & Uncertainty",
"Science/Models & Simulations",
"Science/Ethics in Science",
"Science/History of Science",
"Math/Topology",
"Math/Real Analysis",
"Math/Abstract Algebra",
"Math/Vector Calculus",
"Math/Fractions and Decimals",
"Math/Math History",
"Math/Math in Nature",
"Computer Science/Natural Language Processing",
"Computer Science/Computer Vision",
"Computer Science/Human-Computer Interaction",
"Computer Science/Cybersecurity",
"Computer Science/Cloud Computing",
"Engineering/Environmental Engineering",
"Engineering/Biomedical Engineering",
"Engineering/Nanotechnology",
"Engineering/Transportation Engineering",
"Engineering/Mechatronics",
"Physics/Computational Physics",
"Physics/Biophysics",
"Chemistry/Coordination Compounds",
"Chemistry/Environmental Chemistry",
"Chemistry/Green Chemistry",
"Chemistry/Industrial Chemistry",
"Biology/Developmental Biology",
"Biology/Neurobiology",
"Biology/Synthetic Biology",
"Biology/Systems Biology",
"Biology/Marine Biology",
"Earth Science/Remote Sensing",
"Earth Science/Glaciology",
"Earth Science/Meteorology",
"Astronomy/Astrobiology",
"Astronomy/Radio Astronomy",
"Astronomy/Planetary Geology",
"Astronomy/Space Weather",
"Environmental Science/Environmental Policy",
"Environmental Science/Energy Resources",
"Environmental Science/Ecological Footprint",
"Environmental Science/Environmental Toxicology",
"Medicine/Genomics",
"Medicine/Surgical Techniques",
"Medicine/Preventive Medicine",
"Medicine/Health Informatics",
"Psychology/Forensic Psychology",
"Psychology/Educational Psychology",
"Psychology/Industrial-Organizational Psychology",
"Psychology/Research Methods",
"Science/Philosophy of Science",
"Science/Science Communication",
"Science/Interdisciplinary Science",
]
template = """Generate 5 unique science questions on the theme: "{theme}".
Guidelines:
- Questions must be original and non-repetitive.
- Vary the difficulty level: include a mix of basic, intermediate, and advanced questions.
- Each question should require reasoning, explanation, or calculation — not trivia.
- Use clear, precise phrasing that reads like a natural question, not an exam command.
- Avoid directive phrasing like "Calculate...", "Derive...", or "Using calculus, determine...".
- Prefer natural, curiosity-driven forms like "How does...", "What is the velocity of...", "Why does...", "What happens if...", etc.
- At least 3 of the 5 questions should lead to a numerical answer or require numerical computation (e.g. velocity, force, energy).
- Do not include meta-instructions like “Explain your reasoning” in the question text.
Output format:
Return the 5 questions in a valid JSON array, like this:
[
"Question 1 text",
"Question 2 text",
"Question 3 text",
"Question 4 text",
"Question 5 text"
]
"""
for theme in themes:
response = client.responses.create(model="gpt-4o", input=template.format(theme=theme))
print(response.output_text)