Transcription Models

Data models for musical transcription data.

Core Models

Piece

class idtap.Piece(options: dict | None = None)[source]

Bases: object

__init__(options: dict | None = None) → None[source]

property phrases: List[Phrase]

property dur_array: List[float]

property section_starts_grid: List[List[int]]: Compute section starts from phrase-level is_section_start flags.

property section_starts: List[int]

property section_categorization: List[Dict[str, Dict[str, bool] | str]]

property assemblages: List['Assemblage']

update_start_times() → None[source]

dur_tot_from_phrases() → None[source]: Set durTot from contained phrases and pad shorter tracks.

dur_array_from_phrases() → None[source]: Recompute durArrayGrid removing NaN trajectories.

set_dur_tot(dur_tot: float) → None[source]

fill_remaining_duration(target_duration: float, track: int = 0) → None[source]

Add a silent trajectory to fill the remaining duration to reach target_duration.

Parameters:

target_duration – The desired total duration for the piece
track – Which instrument track to add the silence to (default: 0)

add_trajectory(trajectory_data: Trajectory | Dict[str, Any], inst_track: int, start_time: float) → bool[source]

Add a trajectory to the piece by replacing part of a silent trajectory.

Parameters:

trajectory_data – Either a Trajectory object or dict with trajectory parameters
inst_track – Index of the instrument track to add the trajectory to
start_time – Start time in the piece (in seconds)

Returns:

True if trajectory was successfully added, False otherwise

Return type:

bool

realign_pitches() → None[source]

update_fundamental(fundamental: float) → None[source]

put_raga_in_phrase() → None[source]

property sections_grid: List[List['Section']]

property sections: List['Section']

add_meter(meter: Meter) → None[source]

remove_meter(meter: Meter) → None[source]

all_trajectories(inst: int = 0, string_idx: int = 0) → List[Trajectory][source]

Get all trajectories for a given instrument track and string index.

Parameters:

inst – Instrument track index (default 0).
string_idx – String index within the instrument (default 0). For Sitar/Sarangi, string 0 is main, string 1 is jor/second.

track_from_traj(traj: Trajectory) → int[source]

track_from_traj_uid(traj_uid: str) → int[source]

phrase_from_uid(uid: str) → Phrase[source]

track_from_phrase_uid(uid: str) → int[source]

string_from_traj(traj: Trajectory) → int[source]

Determine which string index contains a given trajectory.

Searches all phrases across all strings by unique_id. Returns the string index (0 or 1). Raises ValueError if not found.

ensure_string_synchronization() → None[source]

For Sitar/Sarangi, ensure trajectory_grid[1] exists and is synchronized.

If string 1 is empty or contains only silent trajectories (id=12), fill it with a single silent trajectory matching the phrase duration.

traj_from_uid(uid: str, track: int = 0) → Trajectory[source]

traj_from_time(time: float, track: int = 0) → Trajectory | None[source]

phrase_from_time(time: float, track: int = 0) → Phrase[source]

phrase_idx_from_time(time: float, track: int = 0) → int[source]

all_groups(instrument_idx: int = 0) → List[Group][source]

p_idx_from_group(g: Group) → int[source]

s_idx_from_p_idx(p_idx: int, inst: int = 0) → int[source]

durations_of_fixed_pitches(inst: int = 0, output_type: str = 'pitchNumber') → Dict[source]

proportions_of_fixed_pitches(inst: int = 0, output_type: str = 'pitchNumber') → Dict[source]

chikari_freqs(inst_idx: int = 0) → List[float][source]

Return 4 chikari frequencies derived from the raga.

Returns 0.0 for strings that are silent (None pitch).

dur_starts(track: int = 0) → List[float][source]

traj_start_times(inst: int = 0, string_idx: int = 0) → List[float][source]

Get start times for all trajectories in a given string.

For string 0: cumulative duration (standard sequential timing). For string > 0: phrase-boundary based (phrase.start_time + traj.start_time).

all_pitches(repetition: bool = True, pitch_number: bool = False, track: int = 0) → List[Any][source]

property highest_pitch_number: float

property lowest_pitch_number: float

most_recent_traj(time: float, inst: int = 0) → Trajectory[source]

chunked_trajs(inst: int = 0, duration: float = 30) → List[List[Trajectory]][source]

all_display_bols(inst: int = 0) → List[Dict[str, Any]][source]

all_display_sargam(inst: int = 0) → List[Dict[str, Any]][source]

all_phrase_divs(inst: int = 0) → List[Dict[str, Any]][source]

all_display_vowels(inst: int = 0) → List[Dict[str, Any]][source]

all_display_ending_consonants(inst: int = 0) → List[Dict[str, Any]][source]

all_display_chikaris(inst: int = 0) → List[Dict[str, Any]][source]

chunked_display_chikaris(inst: int = 0, duration: float = 30) → List[List[Dict[str, Any]]][source]

chunked_display_consonants(inst: int = 0, duration: float = 30) → List[List[Dict[str, Any]]][source]

chunked_display_vowels(inst: int = 0, duration: float = 30) → List[List[Dict[str, Any]]][source]

chunked_display_sargam(inst: int = 0, duration: float = 30) → List[List[Dict[str, Any]]][source]

chunked_display_bols(inst: int = 0, duration: float = 30) → List[List[Dict[str, Any]]][source]

chunked_phrase_divs(inst: int = 0, duration: float = 30) → List[List[Dict[str, Any]]][source]

chunked_meters(duration: float = 30) → List[List[Meter]][source]

pulse_from_id(id: str)[source]

clean_up_section_categorization(c: Dict[str, Dict[str, bool] | str]) → None[source]

to_json() → Dict[str, Any][source]

static from_json(obj: Dict[str, Any]) → Piece[source]

Phrase

class idtap.Phrase(options: Dict[str, Any] | None = None)[source]

Bases: object

__init__(options: Dict[str, Any] | None = None) → None[source]

update_fundamental(fundamental: float) → None[source]

get_groups(idx: int = 0) → List[Group][source]

get_group_from_id(gid: str) → Group | None[source]

assign_phrase_idx() → None[source]

assign_traj_nums() → None[source]

dur_tot_from_trajectories() → None[source]

dur_array_from_trajectories() → None[source]

compute(x: float, log_scale: bool = False)[source]

realign_pitches() → None[source]

assign_start_times() → None[source]

get_range() → Dict[str, Dict[str, Any]][source]

consolidate_silent_trajs() → None[source]

chikaris_during_traj(traj: Trajectory, track: int)[source]

property trajectories: List[Trajectory]

property chikaris: Dict[str, Chikari]

property swara: List[Dict[str, Any]]

all_pitches(repetition: bool = True) → List[Pitch][source]

first_traj_idxs() → List[int][source]

traj_idx_from_time(time: float) → int[source]

to_json() → Dict[str, Any][source]

static from_json(obj: Dict[str, Any], ratios=None, fundamental=None) → Phrase[source]

to_note_view_phrase() → NoteViewPhrase[source]

reset() → None[source]

Trajectory

class idtap.Trajectory(options: dict | None = None)[source]

Bases: object

__init__(options: dict | None = None) → None[source]

property freqs: List[float]

property log_freqs: List[float]

property sloped: bool

property min_freq: float

property max_freq: float

property min_log_freq: float

property max_log_freq: float

property end_time: float | None

property name_: str

update_fundamental(fundamental: float) → None[source]

compute(x: float, log_scale: bool = False) → float[source]

id0(x: float, lf: List[float] | None = None) → float[source]

id1(x: float, lf: List[float] | None = None) → float[source]

id2(x: float, lf: List[float] | None = None, sl: float | None = None) → float[source]

id3(x: float, lf: List[float] | None = None, sl: float | None = None) → float[source]

id4(x: float, lf: List[float] | None = None, sl: float | None = None, da: List[float] | None = None) → float[source]

id5(x: float, lf: List[float] | None = None, sl: float | None = None, da: List[float] | None = None) → float[source]

id6(x: float, lf: List[float] | None = None, da: List[float] | None = None) → float[source]

id7(x: float, lf: List[float] | None = None, da: List[float] | None = None) → float[source]

id8(x: float, lf: List[float] | None = None, da: List[float] | None = None) → float[source]

id9(x: float, lf: List[float] | None = None, da: List[float] | None = None) → float[source]

id10(x: float, lf: List[float] | None = None, da: List[float] | None = None) → float[source]

id12(x: float) → float[source]

id13(x: float) → float[source]

remove_consonant(start: bool = True) → None[source]

add_consonant(consonant: str, start: bool = True) → None[source]

change_consonant(consonant: str, start: bool = True) → None[source]

durations_of_fixed_pitches(opts: Dict | None = None) → Dict[source]

convert_c_iso_to_hindi_and_ipa() → None[source]

update_vowel(v_iso: str) → None[source]

to_json() → Dict[source]

static from_json(obj: Dict, ratios=None, fundamental=None) → Trajectory[source]

static names() → List[str][source]

Pitch

class idtap.Pitch(options: PitchOptionsType | None = None)[source]

Bases: object

__init__(options: PitchOptionsType | None = None)[source]

property frequency

property non_offset_frequency

property non_offset_log_freq

property log_freq

property sargam_letter

property numbered_pitch

property chroma

to_json()[source]

set_oct(new_oct)[source]

static pitch_number_to_chroma(pitch_number: int) → int[source]

static chroma_to_scale_degree(chroma: int) → tuple[int, bool][source]

static from_pitch_number(pitch_number: int, fundamental: float = 261.63) → Pitch[source]

property solfege_letter: str

property scale_degree: int

property octaved_scale_degree: str

property octaved_sargam_letter: str

property octaved_sargam_letter_with_cents: str

property octaved_solfege_letter: str

property octaved_solfege_letter_with_cents: str

property octaved_chroma: str

property octaved_chroma_with_cents: str

property cents_string: str

property latex_sargam_letter: str: LaTeX-compatible base sargam letter.

property latex_octaved_sargam_letter: str: LaTeX math mode sargam letter with properly positioned diacritics.

property a440_cents_deviation: str

property western_pitch: str

property movable_c_cents_deviation: str

same_as(other: Pitch) → bool[source]

classmethod from_json(obj: dict, ratios=None, fundamental=None) → Pitch[source]

Musical Elements

Raga

class idtap.Raga(options: RagaOptionsType | None = None, preserve_ratios: bool = False, client=None)[source]

Bases: object

__init__(options: RagaOptionsType | None = None, preserve_ratios: bool = False, client=None) → None[source]

property sargam_letters: List[str]

property solfege_strings: List[str]

property pc_strings: List[str]

property western_pitch_strings: List[str]

property rule_set_num_pitches: int

pitch_number_to_sargam_letter(pitch_number: int) → str | None[source]

get_pitch_numbers(low: int, high: int) → List[int][source]

pitch_number_to_scale_number(pitch_number: int) → int[source]

scale_number_to_pitch_number(scale_number: int) → int[source]

scale_number_to_sargam_letter(scale_number: int) → str | None[source]

set_ratios(rule_set: Dict[str, bool | Dict[str, bool]]) → List[float][source]

get_pitches(low: float = 100, high: float = 800) → List[Pitch][source]

Get all pitches in the given frequency range.

When ratios have been preserved from transcription data, we generate pitches based on those actual ratios rather than the rule_set.

property stratified_ratios: List[float | List[float]]

Get stratified ratios matching the structure of the rule_set.

When ratios were preserved from transcription data (preserve_ratios=True), they may not match the rule_set structure. In this case, we use the tuning values directly since the ratios represent the actual transcribed pitches, not the theoretical rule_set structure.

property chikari_pitches: List[Pitch | None]

Derive 4 chikari pitches from the raga rule set.

Returns list of 4 pitches (or None for silent strings): [0] Sa oct 2 (always present) [1] Sa oct 1 (always present) [2] Pa oct 1 (present if Pa is in the raga, else None) [3] Ga oct 1 (present if exactly one Ga variant, else None)

get_frequencies(low: float = 100, high: float = 800) → List[float][source]

property sargam_names: List[str]

property swara_objects: List[Dict[str, int | bool]]

pitch_from_log_freq(log_freq: float) → Pitch[source]

ratio_idx_to_tuning_tuple(idx: int) → Tuple[str, str | None][source]

to_json() → Dict[str, str | float | List[float] | Dict[str, float | Dict[str, float]]][source]

static from_json(obj: Dict, client=None) → Raga[source]

Section

class idtap.Section(options: dict | None = None)[source]

Bases: object

__init__(options: dict | None = None) → None[source]

all_pitches(repetition: bool = True) → List[Pitch][source]

property trajectories: List[Trajectory]

Meter

Bases: object

tala_presets: Dict[TalaName, TalaDefinition] = {TalaName.AdaChautal: {'hierarchy': [[2, 2, 2, 2, 2, 2, 2], 4], 'vibhaga': ['X', 'O', 2, 'O', 3, 4, 'O']}, TalaName.Dadra: {'hierarchy': [[3, 3], 4], 'vibhaga': ['X', 'O']}, TalaName.DeepchandiDhrupad: {'hierarchy': [[4, 2, 4, 2], 4], 'vibhaga': ['X', 2, 'O', 3]}, TalaName.DeepchandiThumri: {'hierarchy': [[3, 4, 3, 4], 4], 'vibhaga': ['X', 'O', 2, 3]}, TalaName.Dhamar: {'hierarchy': [[5, 2, 3, 4], 4], 'vibhaga': ['X', 2, 'O', 3]}, TalaName.Ektal: {'hierarchy': [[2, 2, 2, 2, 2, 2], 4], 'vibhaga': ['X', 'O', 2, 'O', 3, 4]}, TalaName.Jhaptal: {'hierarchy': [[2, 3, 2, 3], 4], 'vibhaga': ['X', 2, 'O', 3]}, TalaName.Jhoomra: {'hierarchy': [[3, 4, 3, 4], 4], 'vibhaga': ['X', 2, 'O', 3]}, TalaName.Keherwa: {'hierarchy': [4, 4], 'vibhaga': ['X', 'O']}, TalaName.Rupak: {'hierarchy': [3, 2, 2], 'vibhaga': ['X', 2, 3]}, TalaName.SoolTaal: {'hierarchy': [[2, 2, 2, 2, 2], 4], 'vibhaga': ['X', 2, 'O', 3, 4]}, TalaName.Tilwada: {'hierarchy': [[4, 4, 4, 4], 4], 'vibhaga': ['X', 2, 'O', 3]}, TalaName.Tintal: {'hierarchy': [[4, 4, 4, 4], 4], 'vibhaga': ['X', 2, 'O', 3]}, TalaName.Tivra: {'hierarchy': [[3, 2, 2], 4], 'vibhaga': ['X', 2, 3]}}

__init__(hierarchy: List[int | List[int]] | None = None, start_time: float = 0.0, tempo: float = 60.0, unique_id: str | None = None, repetitions: int = 1, tala_name: TalaName | None = None, vibhaga: List[str | int] | None = None) → None[source]

classmethod from_tala(name: TalaName, start_time: float, tempo: float, repetitions: int) → Meter[source]

Create a Meter from a predefined tala preset.

Parameters:

name – The tala name (e.g., TalaName.Tintal)
start_time – Start time in seconds
tempo – Tempo in BPM (at matra level)
repetitions – Number of tala cycles

Returns:

A Meter configured with the tala preset

property cycle_dur: float

get_tempo_at_layer(layer: int) → float[source]

Get the tempo at a specific hierarchical layer.

Parameters:: layer – The hierarchy layer (0 = coarsest/vibhag, higher = finer subdivisions)
Returns:: The tempo (BPM) at that layer
Raises:: ValueError – If layer is out of bounds

property all_pulses: List[Pulse]

Get all pulses from the finest layer (lowest level) of the hierarchy.

This concatenates pulses from all pulse structures in the last layer, matching the TypeScript implementation: lastLayer.map(ps => ps.pulses).flat()

property real_times: List[float]

offset_pulse(pulse: Pulse, offset: float) → None[source]

reset_tempo() → None[source]

grow_cycle() → None[source]

add_time_points(time_points: List[float], layer: int = 1) → None[source]

static from_time_points(time_points: List[float], hierarchy: List[int | List[int]], repetitions: int = 1, layer: int = 0) → Meter[source]

Create a Meter from actual pulse time points, handling timing variations.

This method creates a meter that accurately represents actual pulse timing (including rubato and tempo variations) rather than theoretical even spacing. Uses timing regularization algorithm to handle extreme deviations.

Parameters:

time_points – List of actual pulse times in seconds
hierarchy – Meter hierarchy (e.g., [4, 4, 2])
repetitions – Number of cycle repetitions
layer – Which hierarchical layer the time points represent (0 or 1)

Returns:

Meter object with pulses positioned at the provided time points

static from_json(obj: Dict) → Meter[source]

to_json() → Dict[source]

get_segment_boundary_indices() → List[int][source]

Get the indices of matra pulses that are at segment (vibhag) boundaries.

For Tintal [[4,4,4,4], 4]: returns [0, 4, 8, 12] per cycle For Jhoomra [[3,4,3,4], 4]: returns [0, 3, 7, 10] per cycle Returns empty list if hierarchy[0] is not compound.

get_matra_pulses() → List[Pulse][source]

Get only the matra-level pulses (pulses that correspond to matras/beats).

For a hierarchy like [[4,4,4,4], 4] (Tintal): - Total pulses = 64 (16 matras × 4 subdivisions) - Returns only the 16 matra pulses (every 4th pulse)

These are the pulses that correspond to beats in the tala structure.

is_segment_boundary(pulse: Pulse) → bool[source]: Check if a pulse is at a segment (vibhag) boundary.

get_segment_for_matra_index(matra_idx: int) → Dict[str, int] | None[source]

Get the segment range (start and end matra indices) for a given matra index.

Returns None if the hierarchy doesn’t have compound first layer.

Parameters:: matra_idx – The matra index to find the segment for
Returns:: Dict with ‘start’ and ‘end’ keys, or None

offset_segment_boundary(pulse: Pulse, offset: float) → bool[source]

Offset a segment boundary pulse and proportionally adjust all matra pulses within that segment.

This makes nudging a vibhag boundary move all the matras within that vibhag proportionally.

Parameters:

pulse – The pulse to offset (must be at a segment boundary)
offset – The time offset in seconds

Returns:

True if segment-aware offset was applied, False if regular offset should be used

get_musical_time(real_time: float, reference_level: int | None = None) → 'MusicalTime' | Literal[False][source]

Convert real time to musical time within this meter.

Parameters:

real_time – Time in seconds
reference_level – Hierarchical level for fractional calculation (0=beat, 1=subdivision, etc.). Defaults to finest level.

Returns:

MusicalTime object if time falls within meter boundaries, False otherwise

Advanced Models

Articulation

class idtap.Articulation(options: ArticulationOptions | None = None)[source]

Bases: object

__init__(options: ArticulationOptions | None = None) → None[source]

static from_json(obj: Dict) → Articulation[source]

to_json() → Dict[source]

Assemblage

class idtap.Assemblage(instrument: Instrument, name: str, id: str | None = None)[source]

Bases: object

__init__(instrument: Instrument, name: str, id: str | None = None) → None[source]

add_strand(label: str, id: str | None = None) → None[source]

add_phrase(phrase: Phrase, strand_id: str | None = None) → None[source]

remove_strand(id: str) → None[source]

move_phrase_to_strand(phrase: Phrase, target_strand_id: str | None = None) → None[source]

remove_phrase(phrase: Phrase) → None[source]

static from_descriptor(descriptor: AssemblageDescriptorType, phrases: List[Phrase]) → Assemblage[source]

property loose_phrases: List[Phrase]

property descriptor: AssemblageDescriptorType

Automation

class idtap.Automation(options: AutomationOptionsType | None = None)[source]

Bases: object

__init__(options: AutomationOptionsType | None = None) → None[source]

add_value(norm_time: float, value: float) → None[source]

remove_value(idx: int) → None[source]

value_at_x(x: float) → float[source]

generate_value_curve(value_dur: float, duration: float, max_val: float = 1.0) → List[float][source]

partition(dur_array: List[float]) → List[Automation][source]

static compress(automations: List[Automation], dur_array: List[float]) → Automation[source]

static from_json(obj: Dict) → Automation[source]

to_json() → Dict[source]

Chikari

class idtap.Chikari(options: ChikariOptionsType | None = None)[source]

Bases: object

__init__(options: ChikariOptionsType | None = None) → None[source]

to_json() → Dict[source]

static from_json(obj: Dict) → Chikari[source]

Group

class idtap.Group(options: Dict | None = None)[source]

Bases: object

__init__(options: Dict | None = None) → None[source]

property min_freq: float

property max_freq: float

all_pitches(repetition: bool = True) → List[Pitch][source]

test_for_adjacency() → bool[source]

add_traj(traj: Trajectory) → None[source]

to_json() → Dict[source]

static from_json(obj: Dict) → Group[source]

NoteViewPhrase

class idtap.NoteViewPhrase(options: Dict[str, Any] | None = None)[source]

Bases: object

__init__(options: Dict[str, Any] | None = None) → None[source]