import re
from typing import Any, Optional
from wikibaseintegrator.datatypes.basedatatype import BaseDataType
[docs]
class Sense(BaseDataType):
"""
Implements the Wikibase data type 'wikibase-sense'
"""
DTYPE = 'wikibase-sense'
sparql_query = '''
SELECT * WHERE {{
?item_id <{wb_url}/prop/{pid}> ?s .
?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/{value}> .
}}
'''
[docs]
def __init__(self, value: Optional[str] = None, **kwargs: Any):
"""
Constructor, calls the superclass BaseDataType
:param value: Value using the format "L<Lexeme ID>-S<Sense ID>" (example: L252248-S123)
"""
super().__init__(**kwargs)
self.set_value(value=value)
[docs]
def set_value(self, value: Optional[str] = None):
assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})"
if value:
pattern = re.compile(r'^L[0-9]+-S[0-9]+$')
matches = pattern.match(value)
if not matches:
raise ValueError(f"Invalid sense ID ({value}), format must be 'L[0-9]+-S[0-9]+'")
self.mainsnak.datavalue = {
'value': {
'entity-type': 'sense',
'id': value
},
'type': 'wikibase-entityid'
}
[docs]
def get_sparql_value(self) -> str:
return self.mainsnak.datavalue['value']['id']
[docs]
def get_lexeme_id(self) -> str:
"""
Return the lexeme ID of the Sense
"""
return self.mainsnak.datavalue['value']['id'].split('-')[0]