## ---------------------------------------------------------------------------
## Licensed to the Apache Software Foundation (ASF) under one or more
## contributor license agreements.  See the NOTICE file distributed with
## this work for additional information regarding copyright ownership.
## The ASF licenses this file to You under the Apache License, Version 2.0
## (the "License"); you may not use this file except in compliance with
## the License.  You may obtain a copy of the License at
##
##      http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
## ---------------------------------------------------------------------------

# Put the ApacheConAsia讲师信息收集表.xlsx、ApacheConSessions.xlsx、drawing1.xml file in the project root directory
# will be generated in the root directory mapping.csv file

from xml.dom.minidom import parse
from pypinyin import lazy_pinyin
import xml.dom.minidom
import openpyxl
import csv

 
 # First, define a CSV file
head =["collect_row","pic","zh_name","en_name","position","track","title","mail","sessions_row"]
with open("./mapping.csv","a+",encoding="utf-8",newline="") as f:
   csvf=csv.writer(f)
   csvf.writerow(head)

# Using minidom parser to open XML document
DOMTree = xml.dom.minidom.parse("./drawing1.xml")

# Open the EXECL file
collect=openpyxl.load_workbook("./ApacheConAsia讲师信息收集表.xlsx")
session=openpyxl.load_workbook("./ApacheConSessions.xlsx")

# Getting data in a collection
collection = DOMTree.documentElement
twoCellAnchors = collection.getElementsByTagName("xdr"+":"+"twoCellAnchor")

# Take the string between two fixed symbols
def get_str_btw(str,begin,end):
   par=str.partition(begin)
   return (par[2].partition(end))[0][:]

# Get row and pic
for twoCellAnchor in twoCellAnchors:
 
   # Get row
   from1 = twoCellAnchor.getElementsByTagName('xdr'+':'+'to')[0]
   row1=from1.getElementsByTagName('xdr'+':'+'row')[0]
   row=int(row1.childNodes[0].data)

   # Get pic
   pic = twoCellAnchor.getElementsByTagName('xdr'+':'+'pic')[0]
   blipFill = pic.getElementsByTagName('xdr'+':'+'blipFill')[0]
   blip = blipFill.getElementsByTagName('a'+':'+'blip')[0]
   if blip.hasAttribute("r"+":"+"embed"):
      pic="picture"+(blip.getAttribute("r"+":"+"embed"))[3:]

   # Gets the active table object
   collect_active=collect.active
   session_active=session.active

   # Get the name, position, track and email address of the lecturer
   zh_name=collect_active.cell(row,3).value
   position=collect_active.cell(row,6).value
   track=collect_active.cell(row,9).value
   mail=collect_active.cell(row,10).value

   # Judge whether it is a Chinese name
   if '\u4e00' <=zh_name <='\u9fff':
      # Translate Chinese name into English name
      name_list=lazy_pinyin(zh_name)
      xin=name_list[0]
      ming_list=name_list[1:]
      ming=""
      for i in ming_list:
         ming=ming+i
      en_name=ming.capitalize()+"_"+xin.capitalize()
   else:
      en_name=zh_name

   # According to the mail from the collection table, find the corresponding row from the session table. 
   # If there is no corresponding row, return 0
   flag=False
   for cell1 in session_active['J']:
      str=cell1.value
      mail_list=str.split(",")
      for i in mail_list:
         session_mail=get_str_btw(i,'<','>')
         if session_mail==mail :
            flag=True
            session_row=cell1.row
            title=session_active.cell(cell1.row,7).value
            break
   if flag==False:
      session_row=0
      title=collect_active.cell(row,4).value       

   # Write data to a CSV file
   data=[
      (row,pic,zh_name,en_name,position,track,title,mail,session_row)
   ]
   with open("./mapping.csv","a+",encoding="utf-8",newline="") as f:
      csvf=csv.writer(f)
      csvf.writerows(data)
