scripts/database/migration-from-tm2-postgres.sql (207 lines of code) (raw):

-- SQL queries for migrating data from TM2 table structure to TM3 table structure -- This script copies from "tm2" to "taskingmanager". Continue reading to learn how to prepare for this. -- -- We assume the original operational TM2 db is named "tasking-manager" and the user is "tm". -- Also, we assume you have created the TM3 database by following the README or migration guide, -- meaning you will have a database named "taskingmanager" already. Make sure you have run the alembic -- database upgrades to load the schema of "taskingmanager". As a refresher, this is done in the base -- TM3 directory via: venv/bin/python manage.py db upgrade -- If you run into errors, make sure your environmental variable is set for TM_DB. -- -- Now to the migration... -- To be extra cautious, we first backup the old TM2 database and load it into a temporary -- "tm2" database that is used for the migration: -- -- pg_dump -U tm -W tasking-manager > tm2.sql -- -- Next we load this into our temporary "tm2" database: -- -- psql -U tm -d tm2 -f tm2.sql -- -- Now, you are able to run this script to copy and transform the data from the temporary "tm2" -- to the new "taskingmanager" database. -- -- You should now be done with the migration. -- USERS Initial Load -- make sure new tables emptied of any test data first truncate taskingmanager.users cascade; -- truncate taskingmanager.areas_of_interest cascade; -- Populate users with ids and default stats - sets users to beginner mapper level -- previous roles were 8: experienced mapper, 4: experienced validator, 2: project manager, 1: admin, 0: mapper -- new roles are 4: experienced validator, 2: project manager, 1: admin, 0: mapper, -1: read only insert into taskingmanager.users (id,username,role,mapping_level, tasks_mapped, tasks_validated, tasks_invalidated, is_email_verified, date_registered, last_validation_date) (select id,username, case when role is null then 0 when role = 8 then 0 when role = 4 then 4 when role = 2 then 2 when role = 1 then 1 else 0 end, 1,0,0,0, FALSE, current_timestamp, current_timestamp from tm2.users); -- update sequence (commented out as not needed. ID comes from OSM not from the sequence.) -- select setval('taskingmanager.users_id_seq',(select max(id) from taskingmanager.users)); -- LICENCES INSERT INTO taskingmanager.licenses( id, name, description, plain_text) (SELECT id, name, description, plain_text from tm2.licenses); -- update sequence select setval('taskingmanager.licenses_id_seq',(select max(id) from taskingmanager.licenses)); -- USERS_LICENSES INSERT INTO taskingmanager.users_licenses ("user", license) (select "user", license from tm2.users_licenses); -- AREAS OF INTEREST --populate areas of interest with details from old --insert into taskingmanager.areas_of_interest (id, geometry, centroid) -- (select id, geometry, centroid from tm2.areas); --select setval('taskingmanager.areas_of_interest_id_seq',(select max(id) from taskingmanager.areas_of_interest)); -- PROJECTS -- Transfer project data, all projects set to mapper level beginner -- Skipped projects with null author_id INSERT INTO taskingmanager.projects( id, status, created, priority, default_locale, author_id, mapper_level, enforce_mapper_level, enforce_validator_role, private, entities_to_map, changeset_comment, due_date, imagery, josm_preset, last_updated, mapping_types, organisation_tag, campaign_tag, total_tasks, tasks_mapped, tasks_validated, tasks_bad_imagery, centroid, geometry) (select p.id, p.status, p.created, p.priority, 'en', p.author_id, 1, false, false, p.private, p.entities_to_map, p.changeset_comment, p.due_date, p.imagery, p.josm_preset, p.last_update, null, '', '', 1, 0, 0, 0, a.centroid, a.geometry from tm2.project p, tm2.areas a where p.area_id = a.id and p.author_id is not null ); select setval('taskingmanager.projects_id_seq',(select max(id) from taskingmanager.projects)); -- Set the task_creation_mode to 'arbitrary' when project's zoom was None in -- TM2 or 'grid' when it was not None Update taskingmanager.projects set task_creation_mode = 1 from tm2.projects as p where p.id = taskingmanager.projects.id and p.zoom is NULL; Update taskingmanager.projects set task_creation_mode = 0 from tm2.projects as p where p.id = taskingmanager.projects.id and p.zoom is not NULL; -- Project info & translations -- Skip any records relating to projects that have not been imported INSERT INTO taskingmanager.project_info( project_id, locale, name, short_description, description, instructions) (select id, locale, name, short_description, description, instructions from tm2.project_translation pt where exists(select p.id from taskingmanager.projects p where p.id = pt.id)); -- Delete empty languages delete from taskingmanager.project_info where name = '' and short_description = '' and description = '' and instructions = ''; -- Create trigger for text search CREATE TRIGGER tsvectorupdate BEFORE INSERT OR UPDATE ON taskingmanager.project_info FOR EACH ROW EXECUTE PROCEDURE tsvector_update_trigger(text_searchable, 'pg_catalog.english', project_id_str, short_description, description); -- set project-id which will update text search index update taskingmanager.project_info set project_id_str = project_id::text; CREATE INDEX textsearch_idx ON taskingmanager.project_info USING GIN (text_searchable); -- TASKS -- Get all tasks that don't have a state of -1 (removed) and where they relate to a project that has been migrated above -- default any null x, y values to -1 -- default any null zoom levels to 13 INSERT INTO taskingmanager.tasks( id, project_id, x, y, zoom, geometry, task_status) (SELECT t.id, t.project_id, t.x, t.y, t.zoom, t.geometry, 0 from tm2.task t where not exists(select id from tm2.task_state ts where ts.task_id = t.id and ts.project_id = t.project_id and ts.state = -1) and exists(select id from taskingmanager.projects p where p.id = t.project_id) ); -- Copy across per-task-instructions update taskingmanager.project_info p set per_task_instructions = old.per_task_instructions from (select id, locale, per_task_instructions from tm2.project_translation where length(per_task_instructions) > 5) old where project_id = old.id and p.locale = old.locale; -- Update tasks with "Done" task_status and mapped_by info update taskingmanager.tasks nt set task_status = 2, mapped_by = val.user_id from (select * from tm2.task_state where state = 2) as val where val.task_id = nt.id and val.project_id = nt.project_id; -- Update tasks with validated task_status and validated_by info from old task_state tables -- Note, old task_status validated = 3; new task status validated = 4 update taskingmanager.tasks nt set task_status = 4, validated_by = val.user_id from (select * from tm2.task_state where state = 3) as val where val.task_id = nt.id and val.project_id = nt.project_id; -- Update PROJECT with task stats. Don't have info on bad-imagery update taskingmanager.projects p set total_tasks = (select count(id) from taskingmanager.tasks t where t.project_id = p.id); -- tasks_mapped = (select count(id) from taskingmanager.tasks t where t.project_id = p.id and task_status in (2,4)), -- tasks_validated = (select count(id) from taskingmanager.tasks t where t.project_id = p.id and task_status = 4); -- update tasks mapped count UPDATE taskingmanager.projects SET tasks_mapped=subquery.count FROM ( select project_id, count(project_id) from taskingmanager.tasks where taskingmanager.tasks.task_status in (2, 4) group by tasks.project_id) AS subquery WHERE taskingmanager.projects.id=subquery.project_id ; UPDATE taskingmanager.projects SET tasks_validated=subquery.count FROM ( select project_id, count(project_id) from taskingmanager.tasks where tasks.task_status = 4 group by tasks.project_id) AS subquery WHERE taskingmanager.projects.id=subquery.project_id ; -- TASK HISTORY -- State Changes -- only insert state changes where user_id exists, and only for tasks that have been migrated INSERT INTO taskingmanager.task_history( project_id, task_id, action, action_text, action_date, user_id) (SELECT project_id, task_id, 'STATE_CHANGE', CASE state when 0 then 'READY' when 1 then 'INVALIDATED' when 2 then 'MAPPED' when 3 then 'VALIDATED' end, date, user_id from tm2.task_state ts where user_id is not null and exists(select id from taskingmanager.tasks t where t.project_id = ts.project_id and t.id = ts.task_id )); -- Locking -- assuming all the lock events in the old system are locked_for_mapping events not for validation -- not attempting to calculate the length of time task locked -- only insert lock events where user_id exists, and only for tasks that have been migrated INSERT INTO taskingmanager.task_history( project_id, task_id, action, action_text, action_date, user_id) (SELECT project_id, task_id, 'LOCKED_FOR_MAPPING', '', date, user_id from tm2.task_lock ts where user_id is not null and lock = true and exists(select id from taskingmanager.tasks t where t.project_id = ts.project_id and t.id = ts.task_id )); -- Comments -- only insert comments where author_id exists, and only for tasks that have been migrated INSERT INTO taskingmanager.task_history( project_id, task_id, action, action_text, action_date, user_id) (SELECT project_id, task_id, 'COMMENT', comment, date, author_id from tm2.task_comment tc where author_id is not null and exists(select id from taskingmanager.tasks t where t.project_id = tc.project_id and t.id = tc.task_id )); -- Update date registered based on first contribution in task_history, should cover 90% of users update taskingmanager.users set date_registered = action_date from (select t.user_id, min(action_date) action_date from taskingmanager.users u, taskingmanager.task_history t where u.id = t.user_id group by user_id) old where id = old.user_id; -- Update USER STATISTICS -- User Task stats with m as (select user_id, count(id) as mapped from taskingmanager.task_history where action = 'STATE_CHANGE' and action_text = 'MAPPED' group by user_id), v as (select user_id, count(id) as validated from taskingmanager.task_history where action = 'STATE_CHANGE' and action_text = 'VALIDATED' group by user_id), i as (select user_id, count(id) as invalidated from taskingmanager.task_history where action = 'STATE_CHANGE' and action_text = 'INVALIDATED' group by user_id) update taskingmanager.users us set tasks_mapped = coalesce(m.mapped,0), tasks_validated = coalesce(v.validated,0), tasks_invalidated = coalesce(i.invalidated,0) from taskingmanager.users u left join m on m.user_id = u.id left join v on v.user_id = u.id left join i on i.user_id = u.id where us.id = u.id; -- User Project List with p as (select user_id, array_agg(distinct project_id) as projects from taskingmanager.task_history where action = 'STATE_CHANGE' group by user_id) update taskingmanager.users u set projects_mapped = p.projects from p where u.id = p.user_id; -- MESSAGES -- only migrating messages that have not yet been read INSERT INTO taskingmanager.messages( message, subject, from_user_id, to_user_id, date, read) (select message, subject, from_user_id, to_user_id, date, read from tm2.message where read = false); -- PRIORITY_AREAS -- migrate all areas INSERT INTO taskingmanager.priority_areas( id, geometry) (SELECT id, geometry from tm2.priority_area); -- Update sequence select setval('taskingmanager.priority_areas_id_seq',(select max(id) from taskingmanager.priority_areas)); -- Migrate project_priority areas link but only where a matching project exists. -- Remove duplicate records INSERT INTO taskingmanager.project_priority_areas( project_id, priority_area_id) (SELECT distinct pa.project_id, pa.priority_area_id from tm2.project_priority_areas pa where exists(select null from taskingmanager.projects p where p.id = pa.project_id) ); -- PROJECT ALLOWED USERS -- Remove duplicate records INSERT INTO taskingmanager.project_allowed_users( project_id, user_id) (select distinct project_id, user_id from tm2.project_allowed_users); -- TASK ISSQUARE FLAG -- Ensure the is_sqaure flag is consistent with the x,y,zoom values UPDATE taskingmanager.tasks SET is_square = (x IS NOT NULL AND y IS NOT NULL AND zoom IS NOT NULL); -------------------------------------------------- -- Migration Results 28/04/2017 -- -- Projects: Old = 2500 New = 2426 (97%) -- Users : Old = 65323 New = 65323 (100%) -- Tasks : Old = 814106 New = 750281 (92%) -- Areas : Old = 2500 New = 2500 (100%) -- Licences: Old = 6 New = 6 (100%) --------------------------------------------------