Transcription Models

Data models for musical transcription data.

Core Models

Piece

class idtap.Piece(options: dict | None = None)[source]

Bases: object

__init__(options: dict | None = None) None[source]
property phrases: List[Phrase]
property dur_array: List[float]
property section_starts_grid: List[List[int]]

Compute section starts from phrase-level is_section_start flags.

property section_starts: List[int]
property section_categorization: List[Dict[str, Dict[str, bool] | str]]
property assemblages: List['Assemblage']
update_start_times() None[source]
dur_tot_from_phrases() None[source]

Set durTot from contained phrases and pad shorter tracks.

dur_array_from_phrases() None[source]

Recompute durArrayGrid removing NaN trajectories.

set_dur_tot(dur_tot: float) None[source]
fill_remaining_duration(target_duration: float, track: int = 0) None[source]

Add a silent trajectory to fill the remaining duration to reach target_duration.

Parameters:
  • target_duration – The desired total duration for the piece

  • track – Which instrument track to add the silence to (default: 0)

add_trajectory(trajectory_data: Trajectory | Dict[str, Any], inst_track: int, start_time: float) bool[source]

Add a trajectory to the piece by replacing part of a silent trajectory.

Parameters:
  • trajectory_data – Either a Trajectory object or dict with trajectory parameters

  • inst_track – Index of the instrument track to add the trajectory to

  • start_time – Start time in the piece (in seconds)

Returns:

True if trajectory was successfully added, False otherwise

Return type:

bool

realign_pitches() None[source]
update_fundamental(fundamental: float) None[source]
put_raga_in_phrase() None[source]
property sections_grid: List[List['Section']]
property sections: List['Section']
add_meter(meter: Meter) None[source]
remove_meter(meter: Meter) None[source]
all_trajectories(inst: int = 0, string_idx: int = 0) List[Trajectory][source]

Get all trajectories for a given instrument track and string index.

Parameters:
  • inst – Instrument track index (default 0).

  • string_idx – String index within the instrument (default 0). For Sitar/Sarangi, string 0 is main, string 1 is jor/second.

track_from_traj(traj: Trajectory) int[source]
track_from_traj_uid(traj_uid: str) int[source]
phrase_from_uid(uid: str) Phrase[source]
track_from_phrase_uid(uid: str) int[source]
string_from_traj(traj: Trajectory) int[source]

Determine which string index contains a given trajectory.

Searches all phrases across all strings by unique_id. Returns the string index (0 or 1). Raises ValueError if not found.

ensure_string_synchronization() None[source]

For Sitar/Sarangi, ensure trajectory_grid[1] exists and is synchronized.

If string 1 is empty or contains only silent trajectories (id=12), fill it with a single silent trajectory matching the phrase duration.

traj_from_uid(uid: str, track: int = 0) Trajectory[source]
traj_from_time(time: float, track: int = 0) Trajectory | None[source]
phrase_from_time(time: float, track: int = 0) Phrase[source]
phrase_idx_from_time(time: float, track: int = 0) int[source]
all_groups(instrument_idx: int = 0) List[Group][source]
p_idx_from_group(g: Group) int[source]
s_idx_from_p_idx(p_idx: int, inst: int = 0) int[source]
durations_of_fixed_pitches(inst: int = 0, output_type: str = 'pitchNumber') Dict[source]
proportions_of_fixed_pitches(inst: int = 0, output_type: str = 'pitchNumber') Dict[source]
chikari_freqs(inst_idx: int = 0) List[float][source]

Return 4 chikari frequencies derived from the raga.

Returns 0.0 for strings that are silent (None pitch).

dur_starts(track: int = 0) List[float][source]
traj_start_times(inst: int = 0, string_idx: int = 0) List[float][source]

Get start times for all trajectories in a given string.

For string 0: cumulative duration (standard sequential timing). For string > 0: phrase-boundary based (phrase.start_time + traj.start_time).

all_pitches(repetition: bool = True, pitch_number: bool = False, track: int = 0) List[Any][source]
property highest_pitch_number: float
property lowest_pitch_number: float
most_recent_traj(time: float, inst: int = 0) Trajectory[source]
chunked_trajs(inst: int = 0, duration: float = 30) List[List[Trajectory]][source]
all_display_bols(inst: int = 0) List[Dict[str, Any]][source]
all_display_sargam(inst: int = 0) List[Dict[str, Any]][source]
all_phrase_divs(inst: int = 0) List[Dict[str, Any]][source]
all_display_vowels(inst: int = 0) List[Dict[str, Any]][source]
all_display_ending_consonants(inst: int = 0) List[Dict[str, Any]][source]
all_display_chikaris(inst: int = 0) List[Dict[str, Any]][source]
chunked_display_chikaris(inst: int = 0, duration: float = 30) List[List[Dict[str, Any]]][source]
chunked_display_consonants(inst: int = 0, duration: float = 30) List[List[Dict[str, Any]]][source]
chunked_display_vowels(inst: int = 0, duration: float = 30) List[List[Dict[str, Any]]][source]
chunked_display_sargam(inst: int = 0, duration: float = 30) List[List[Dict[str, Any]]][source]
chunked_display_bols(inst: int = 0, duration: float = 30) List[List[Dict[str, Any]]][source]
chunked_phrase_divs(inst: int = 0, duration: float = 30) List[List[Dict[str, Any]]][source]
chunked_meters(duration: float = 30) List[List[Meter]][source]
pulse_from_id(id: str)[source]
clean_up_section_categorization(c: Dict[str, Dict[str, bool] | str]) None[source]
to_json() Dict[str, Any][source]
static from_json(obj: Dict[str, Any]) Piece[source]

Phrase

class idtap.Phrase(options: Dict[str, Any] | None = None)[source]

Bases: object

__init__(options: Dict[str, Any] | None = None) None[source]
update_fundamental(fundamental: float) None[source]
get_groups(idx: int = 0) List[Group][source]
get_group_from_id(gid: str) Group | None[source]
assign_phrase_idx() None[source]
assign_traj_nums() None[source]
dur_tot_from_trajectories() None[source]
dur_array_from_trajectories() None[source]
compute(x: float, log_scale: bool = False)[source]
realign_pitches() None[source]
assign_start_times() None[source]
get_range() Dict[str, Dict[str, Any]][source]
consolidate_silent_trajs() None[source]
chikaris_during_traj(traj: Trajectory, track: int)[source]
property trajectories: List[Trajectory]
property chikaris: Dict[str, Chikari]
property swara: List[Dict[str, Any]]
all_pitches(repetition: bool = True) List[Pitch][source]
first_traj_idxs() List[int][source]
traj_idx_from_time(time: float) int[source]
to_json() Dict[str, Any][source]
static from_json(obj: Dict[str, Any], ratios=None, fundamental=None) Phrase[source]
to_note_view_phrase() NoteViewPhrase[source]
reset() None[source]

Trajectory

class idtap.Trajectory(options: dict | None = None)[source]

Bases: object

__init__(options: dict | None = None) None[source]
property freqs: List[float]
property log_freqs: List[float]
property sloped: bool
property min_freq: float
property max_freq: float
property min_log_freq: float
property max_log_freq: float
property end_time: float | None
property name_: str
update_fundamental(fundamental: float) None[source]
compute(x: float, log_scale: bool = False) float[source]
id0(x: float, lf: List[float] | None = None) float[source]
id1(x: float, lf: List[float] | None = None) float[source]
id2(x: float, lf: List[float] | None = None, sl: float | None = None) float[source]
id3(x: float, lf: List[float] | None = None, sl: float | None = None) float[source]
id4(x: float, lf: List[float] | None = None, sl: float | None = None, da: List[float] | None = None) float[source]
id5(x: float, lf: List[float] | None = None, sl: float | None = None, da: List[float] | None = None) float[source]
id6(x: float, lf: List[float] | None = None, da: List[float] | None = None) float[source]
id7(x: float, lf: List[float] | None = None, da: List[float] | None = None) float[source]
id8(x: float, lf: List[float] | None = None, da: List[float] | None = None) float[source]
id9(x: float, lf: List[float] | None = None, da: List[float] | None = None) float[source]
id10(x: float, lf: List[float] | None = None, da: List[float] | None = None) float[source]
id12(x: float) float[source]
id13(x: float) float[source]
remove_consonant(start: bool = True) None[source]
add_consonant(consonant: str, start: bool = True) None[source]
change_consonant(consonant: str, start: bool = True) None[source]
durations_of_fixed_pitches(opts: Dict | None = None) Dict[source]
convert_c_iso_to_hindi_and_ipa() None[source]
update_vowel(v_iso: str) None[source]
to_json() Dict[source]
static from_json(obj: Dict, ratios=None, fundamental=None) Trajectory[source]
static names() List[str][source]

Pitch

class idtap.Pitch(options: PitchOptionsType | None = None)[source]

Bases: object

__init__(options: PitchOptionsType | None = None)[source]
property frequency
property non_offset_frequency
property non_offset_log_freq
property log_freq
property sargam_letter
property numbered_pitch
property chroma
to_json()[source]
set_oct(new_oct)[source]
static pitch_number_to_chroma(pitch_number: int) int[source]
static chroma_to_scale_degree(chroma: int) tuple[int, bool][source]
static from_pitch_number(pitch_number: int, fundamental: float = 261.63) Pitch[source]
property solfege_letter: str
property scale_degree: int
property octaved_scale_degree: str
property octaved_sargam_letter: str
property octaved_sargam_letter_with_cents: str
property octaved_solfege_letter: str
property octaved_solfege_letter_with_cents: str
property octaved_chroma: str
property octaved_chroma_with_cents: str
property cents_string: str
property latex_sargam_letter: str

LaTeX-compatible base sargam letter.

property latex_octaved_sargam_letter: str

LaTeX math mode sargam letter with properly positioned diacritics.

property a440_cents_deviation: str
property western_pitch: str
property movable_c_cents_deviation: str
same_as(other: Pitch) bool[source]
classmethod from_json(obj: dict, ratios=None, fundamental=None) Pitch[source]

Musical Elements

Raga

class idtap.Raga(options: RagaOptionsType | None = None, preserve_ratios: bool = False, client=None)[source]

Bases: object

__init__(options: RagaOptionsType | None = None, preserve_ratios: bool = False, client=None) None[source]
property sargam_letters: List[str]
property solfege_strings: List[str]
property pc_strings: List[str]
property western_pitch_strings: List[str]
property rule_set_num_pitches: int
pitch_number_to_sargam_letter(pitch_number: int) str | None[source]
get_pitch_numbers(low: int, high: int) List[int][source]
pitch_number_to_scale_number(pitch_number: int) int[source]
scale_number_to_pitch_number(scale_number: int) int[source]
scale_number_to_sargam_letter(scale_number: int) str | None[source]
set_ratios(rule_set: Dict[str, bool | Dict[str, bool]]) List[float][source]
get_pitches(low: float = 100, high: float = 800) List[Pitch][source]

Get all pitches in the given frequency range.

When ratios have been preserved from transcription data, we generate pitches based on those actual ratios rather than the rule_set.

property stratified_ratios: List[float | List[float]]

Get stratified ratios matching the structure of the rule_set.

When ratios were preserved from transcription data (preserve_ratios=True), they may not match the rule_set structure. In this case, we use the tuning values directly since the ratios represent the actual transcribed pitches, not the theoretical rule_set structure.

property chikari_pitches: List[Pitch | None]

Derive 4 chikari pitches from the raga rule set.

Returns list of 4 pitches (or None for silent strings): [0] Sa oct 2 (always present) [1] Sa oct 1 (always present) [2] Pa oct 1 (present if Pa is in the raga, else None) [3] Ga oct 1 (present if exactly one Ga variant, else None)

get_frequencies(low: float = 100, high: float = 800) List[float][source]
property sargam_names: List[str]
property swara_objects: List[Dict[str, int | bool]]
pitch_from_log_freq(log_freq: float) Pitch[source]
ratio_idx_to_tuning_tuple(idx: int) Tuple[str, str | None][source]
to_json() Dict[str, str | float | List[float] | Dict[str, float | Dict[str, float]]][source]
static from_json(obj: Dict, client=None) Raga[source]

Section

class idtap.Section(options: dict | None = None)[source]

Bases: object

__init__(options: dict | None = None) None[source]
all_pitches(repetition: bool = True) List[Pitch][source]
property trajectories: List[Trajectory]

Meter

class idtap.Meter(hierarchy: List[int | List[int]] | None = None, start_time: float = 0.0, tempo: float = 60.0, unique_id: str | None = None, repetitions: int = 1, tala_name: TalaName | None = None, vibhaga: List[str | int] | None = None)[source]

Bases: object

tala_presets: Dict[TalaName, TalaDefinition] = {TalaName.AdaChautal: {'hierarchy': [[2, 2, 2, 2, 2, 2, 2], 4], 'vibhaga': ['X', 'O', 2, 'O', 3, 4, 'O']}, TalaName.Dadra: {'hierarchy': [[3, 3], 4], 'vibhaga': ['X', 'O']}, TalaName.DeepchandiDhrupad: {'hierarchy': [[4, 2, 4, 2], 4], 'vibhaga': ['X', 2, 'O', 3]}, TalaName.DeepchandiThumri: {'hierarchy': [[3, 4, 3, 4], 4], 'vibhaga': ['X', 'O', 2, 3]}, TalaName.Dhamar: {'hierarchy': [[5, 2, 3, 4], 4], 'vibhaga': ['X', 2, 'O', 3]}, TalaName.Ektal: {'hierarchy': [[2, 2, 2, 2, 2, 2], 4], 'vibhaga': ['X', 'O', 2, 'O', 3, 4]}, TalaName.Jhaptal: {'hierarchy': [[2, 3, 2, 3], 4], 'vibhaga': ['X', 2, 'O', 3]}, TalaName.Jhoomra: {'hierarchy': [[3, 4, 3, 4], 4], 'vibhaga': ['X', 2, 'O', 3]}, TalaName.Keherwa: {'hierarchy': [4, 4], 'vibhaga': ['X', 'O']}, TalaName.Rupak: {'hierarchy': [3, 2, 2], 'vibhaga': ['X', 2, 3]}, TalaName.SoolTaal: {'hierarchy': [[2, 2, 2, 2, 2], 4], 'vibhaga': ['X', 2, 'O', 3, 4]}, TalaName.Tilwada: {'hierarchy': [[4, 4, 4, 4], 4], 'vibhaga': ['X', 2, 'O', 3]}, TalaName.Tintal: {'hierarchy': [[4, 4, 4, 4], 4], 'vibhaga': ['X', 2, 'O', 3]}, TalaName.Tivra: {'hierarchy': [[3, 2, 2], 4], 'vibhaga': ['X', 2, 3]}}
__init__(hierarchy: List[int | List[int]] | None = None, start_time: float = 0.0, tempo: float = 60.0, unique_id: str | None = None, repetitions: int = 1, tala_name: TalaName | None = None, vibhaga: List[str | int] | None = None) None[source]
classmethod from_tala(name: TalaName, start_time: float, tempo: float, repetitions: int) Meter[source]

Create a Meter from a predefined tala preset.

Parameters:
  • name – The tala name (e.g., TalaName.Tintal)

  • start_time – Start time in seconds

  • tempo – Tempo in BPM (at matra level)

  • repetitions – Number of tala cycles

Returns:

A Meter configured with the tala preset

property cycle_dur: float
get_tempo_at_layer(layer: int) float[source]

Get the tempo at a specific hierarchical layer.

Parameters:

layer – The hierarchy layer (0 = coarsest/vibhag, higher = finer subdivisions)

Returns:

The tempo (BPM) at that layer

Raises:

ValueError – If layer is out of bounds

property all_pulses: List[Pulse]

Get all pulses from the finest layer (lowest level) of the hierarchy.

This concatenates pulses from all pulse structures in the last layer, matching the TypeScript implementation: lastLayer.map(ps => ps.pulses).flat()

property real_times: List[float]
offset_pulse(pulse: Pulse, offset: float) None[source]
reset_tempo() None[source]
grow_cycle() None[source]
add_time_points(time_points: List[float], layer: int = 1) None[source]
static from_time_points(time_points: List[float], hierarchy: List[int | List[int]], repetitions: int = 1, layer: int = 0) Meter[source]

Create a Meter from actual pulse time points, handling timing variations.

This method creates a meter that accurately represents actual pulse timing (including rubato and tempo variations) rather than theoretical even spacing. Uses timing regularization algorithm to handle extreme deviations.

Parameters:
  • time_points – List of actual pulse times in seconds

  • hierarchy – Meter hierarchy (e.g., [4, 4, 2])

  • repetitions – Number of cycle repetitions

  • layer – Which hierarchical layer the time points represent (0 or 1)

Returns:

Meter object with pulses positioned at the provided time points

static from_json(obj: Dict) Meter[source]
to_json() Dict[source]
get_segment_boundary_indices() List[int][source]

Get the indices of matra pulses that are at segment (vibhag) boundaries.

For Tintal [[4,4,4,4], 4]: returns [0, 4, 8, 12] per cycle For Jhoomra [[3,4,3,4], 4]: returns [0, 3, 7, 10] per cycle Returns empty list if hierarchy[0] is not compound.

get_matra_pulses() List[Pulse][source]

Get only the matra-level pulses (pulses that correspond to matras/beats).

For a hierarchy like [[4,4,4,4], 4] (Tintal): - Total pulses = 64 (16 matras × 4 subdivisions) - Returns only the 16 matra pulses (every 4th pulse)

These are the pulses that correspond to beats in the tala structure.

is_segment_boundary(pulse: Pulse) bool[source]

Check if a pulse is at a segment (vibhag) boundary.

get_segment_for_matra_index(matra_idx: int) Dict[str, int] | None[source]

Get the segment range (start and end matra indices) for a given matra index.

Returns None if the hierarchy doesn’t have compound first layer.

Parameters:

matra_idx – The matra index to find the segment for

Returns:

Dict with ‘start’ and ‘end’ keys, or None

offset_segment_boundary(pulse: Pulse, offset: float) bool[source]

Offset a segment boundary pulse and proportionally adjust all matra pulses within that segment.

This makes nudging a vibhag boundary move all the matras within that vibhag proportionally.

Parameters:
  • pulse – The pulse to offset (must be at a segment boundary)

  • offset – The time offset in seconds

Returns:

True if segment-aware offset was applied, False if regular offset should be used

get_musical_time(real_time: float, reference_level: int | None = None) 'MusicalTime' | Literal[False][source]

Convert real time to musical time within this meter.

Parameters:
  • real_time – Time in seconds

  • reference_level – Hierarchical level for fractional calculation (0=beat, 1=subdivision, etc.). Defaults to finest level.

Returns:

MusicalTime object if time falls within meter boundaries, False otherwise

Advanced Models

Articulation

class idtap.Articulation(options: ArticulationOptions | None = None)[source]

Bases: object

__init__(options: ArticulationOptions | None = None) None[source]
static from_json(obj: Dict) Articulation[source]
to_json() Dict[source]

Assemblage

class idtap.Assemblage(instrument: Instrument, name: str, id: str | None = None)[source]

Bases: object

__init__(instrument: Instrument, name: str, id: str | None = None) None[source]
add_strand(label: str, id: str | None = None) None[source]
add_phrase(phrase: Phrase, strand_id: str | None = None) None[source]
remove_strand(id: str) None[source]
move_phrase_to_strand(phrase: Phrase, target_strand_id: str | None = None) None[source]
remove_phrase(phrase: Phrase) None[source]
static from_descriptor(descriptor: AssemblageDescriptorType, phrases: List[Phrase]) Assemblage[source]
property loose_phrases: List[Phrase]
property descriptor: AssemblageDescriptorType

Automation

class idtap.Automation(options: AutomationOptionsType | None = None)[source]

Bases: object

__init__(options: AutomationOptionsType | None = None) None[source]
add_value(norm_time: float, value: float) None[source]
remove_value(idx: int) None[source]
value_at_x(x: float) float[source]
generate_value_curve(value_dur: float, duration: float, max_val: float = 1.0) List[float][source]
partition(dur_array: List[float]) List[Automation][source]
static compress(automations: List[Automation], dur_array: List[float]) Automation[source]
static from_json(obj: Dict) Automation[source]
to_json() Dict[source]

Chikari

class idtap.Chikari(options: ChikariOptionsType | None = None)[source]

Bases: object

__init__(options: ChikariOptionsType | None = None) None[source]
to_json() Dict[source]
static from_json(obj: Dict) Chikari[source]

Group

class idtap.Group(options: Dict | None = None)[source]

Bases: object

__init__(options: Dict | None = None) None[source]
property min_freq: float
property max_freq: float
all_pitches(repetition: bool = True) List[Pitch][source]
test_for_adjacency() bool[source]
add_traj(traj: Trajectory) None[source]
to_json() Dict[source]
static from_json(obj: Dict) Group[source]

NoteViewPhrase

class idtap.NoteViewPhrase(options: Dict[str, Any] | None = None)[source]

Bases: object

__init__(options: Dict[str, Any] | None = None) None[source]