in bleach/_vendor/html5lib/_tokenizer.py [0:0]
def attributeNameState(self):
data = self.stream.char()
leavingThisState = True
emitToken = False
if data == "=":
self.state = self.beforeAttributeValueState
elif data in asciiLetters:
self.currentToken["data"][-1][0] += data +\
self.stream.charsUntil(asciiLetters, True)
leavingThisState = False
elif data == ">":
# XXX If we emit here the attributes are converted to a dict
# without being checked and when the code below runs we error
# because data is a dict not a list
emitToken = True
elif data in spaceCharacters:
self.state = self.afterAttributeNameState
elif data == "/":
self.state = self.selfClosingStartTagState
elif data == "\u0000":
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "invalid-codepoint"})
self.currentToken["data"][-1][0] += "\uFFFD"
leavingThisState = False
elif data in ("'", '"', "<"):
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data":
"invalid-character-in-attribute-name"})
self.currentToken["data"][-1][0] += data
leavingThisState = False
elif data is EOF:
self.tokenQueue.append({"type": tokenTypes["ParseError"],
"data": "eof-in-attribute-name"})
self.state = self.dataState
else:
self.currentToken["data"][-1][0] += data
leavingThisState = False
if leavingThisState:
# Attributes are not dropped at this stage. That happens when the
# start tag token is emitted so values can still be safely appended
# to attributes, but we do want to report the parse error in time.
self.currentToken["data"][-1][0] = (
self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
for name, _ in self.currentToken["data"][:-1]:
if self.currentToken["data"][-1][0] == name:
self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
"duplicate-attribute"})
break
# XXX Fix for above XXX
if emitToken:
self.emitCurrentToken()
return True